]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.cc
arm: Fix MVE predicates synthesis [PR 108443]
[thirdparty/gcc.git] / gcc / config / arm / arm.cc
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2023 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "tm_p.h"
38 #include "stringpool.h"
39 #include "attribs.h"
40 #include "optabs.h"
41 #include "regs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "cgraph.h"
45 #include "diagnostic-core.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "varasm.h"
51 #include "output.h"
52 #include "insn-attr.h"
53 #include "flags.h"
54 #include "reload.h"
55 #include "explow.h"
56 #include "expr.h"
57 #include "cfgrtl.h"
58 #include "sched-int.h"
59 #include "common/common-target.h"
60 #include "langhooks.h"
61 #include "intl.h"
62 #include "libfuncs.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "selftest.h"
73 #include "tree-vectorizer.h"
74 #include "opts.h"
75 #include "aarch-common.h"
76 #include "aarch-common-protos.h"
77
78 /* This file should be included last. */
79 #include "target-def.h"
80
81 /* Forward definitions of types. */
82 typedef struct minipool_node Mnode;
83 typedef struct minipool_fixup Mfix;
84
85 void (*arm_lang_output_object_attributes_hook)(void);
86
87 struct four_ints
88 {
89 int i[4];
90 };
91
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx);
94 static int arm_needs_doubleword_align (machine_mode, const_tree);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets *arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
100 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap);
103 static int arm_address_register_rtx_p (rtx, int);
104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
105 static bool is_called_in_ARM_mode (tree);
106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
111 inline static int thumb1_index_register_rtx_p (rtx, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx, int);
117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
118 static bool arm_print_operand_punct_valid_p (unsigned char code);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
120 static arm_cc get_arm_condition_code (rtx);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx *, const char *, const char *,
123 int, HOST_WIDE_INT);
124 static const char *shift_op (rtx, HOST_WIDE_INT *);
125 static struct machine_function *arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
129 static Mnode *add_minipool_forward_ref (Mfix *);
130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
131 static Mnode *add_minipool_backward_ref (Mfix *);
132 static void assign_minipool_offsets (Mfix *);
133 static void arm_print_value (FILE *, rtx);
134 static void dump_minipool (rtx_insn *);
135 static int arm_barrier_cost (rtx_insn *);
136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
139 machine_mode, rtx);
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree);
145 static unsigned long arm_compute_func_type (void);
146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
151 #endif
152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree, const_tree);
157 static void arm_set_default_type_attributes (tree);
158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code,
161 unsigned HOST_WIDE_INT val,
162 struct four_ints *return_sequence);
163 static int optimal_immediate_sequence_1 (enum rtx_code code,
164 unsigned HOST_WIDE_INT val,
165 struct four_ints *return_sequence,
166 int i);
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree, tree);
169 static machine_mode arm_promote_function_mode (const_tree,
170 machine_mode, int *,
171 const_tree, int);
172 static bool arm_return_in_memory (const_tree, const_tree);
173 static rtx arm_function_value (const_tree, const_tree, bool);
174 static rtx arm_libcall_value_1 (machine_mode);
175 static rtx arm_libcall_value (machine_mode, const_rtx);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
179 tree);
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
182 static bool arm_legitimate_constant_p (machine_mode, rtx);
183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
184 static int arm_insn_cost (rtx_insn *, bool);
185 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
186 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
187 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
188 static void emit_constant_insn (rtx cond, rtx pattern);
189 static rtx_insn *emit_set_insn (rtx, rtx);
190 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
191 static rtx emit_multi_reg_push (unsigned long, unsigned long);
192 static void arm_emit_multi_reg_pop (unsigned long);
193 static int vfp_emit_fstmd (int, int);
194 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
195 static int arm_arg_partial_bytes (cumulative_args_t,
196 const function_arg_info &);
197 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
198 static void arm_function_arg_advance (cumulative_args_t,
199 const function_arg_info &);
200 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
201 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
202 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
203 const_tree);
204 static rtx aapcs_libcall_value (machine_mode);
205 static int aapcs_select_return_coproc (const_tree, const_tree);
206
207 #ifdef OBJECT_FORMAT_ELF
208 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
209 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
210 #endif
211 #ifndef ARM_PE
212 static void arm_encode_section_info (tree, rtx, int);
213 #endif
214
215 static void arm_file_end (void);
216 static void arm_file_start (void);
217 static void arm_insert_attributes (tree, tree *);
218
219 static void arm_setup_incoming_varargs (cumulative_args_t,
220 const function_arg_info &, int *, int);
221 static bool arm_pass_by_reference (cumulative_args_t,
222 const function_arg_info &);
223 static bool arm_promote_prototypes (const_tree);
224 static bool arm_default_short_enums (void);
225 static bool arm_align_anon_bitfield (void);
226 static bool arm_return_in_msb (const_tree);
227 static bool arm_must_pass_in_stack (const function_arg_info &);
228 static bool arm_return_in_memory (const_tree, const_tree);
229 #if ARM_UNWIND_INFO
230 static void arm_unwind_emit (FILE *, rtx_insn *);
231 static bool arm_output_ttype (rtx);
232 static void arm_asm_emit_except_personality (rtx);
233 #endif
234 static void arm_asm_init_sections (void);
235 static rtx arm_dwarf_register_span (rtx);
236
237 static tree arm_cxx_guard_type (void);
238 static bool arm_cxx_guard_mask_bit (void);
239 static tree arm_get_cookie_size (tree);
240 static bool arm_cookie_has_size (void);
241 static bool arm_cxx_cdtor_returns_this (void);
242 static bool arm_cxx_key_method_may_be_inline (void);
243 static void arm_cxx_determine_class_data_visibility (tree);
244 static bool arm_cxx_class_data_always_comdat (void);
245 static bool arm_cxx_use_aeabi_atexit (void);
246 static void arm_init_libfuncs (void);
247 static tree arm_build_builtin_va_list (void);
248 static void arm_expand_builtin_va_start (tree, rtx);
249 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
250 static void arm_option_override (void);
251 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
252 struct cl_target_option *);
253 static void arm_override_options_after_change (void);
254 static void arm_option_print (FILE *, int, struct cl_target_option *);
255 static void arm_set_current_function (tree);
256 static bool arm_can_inline_p (tree, tree);
257 static void arm_relayout_function (tree);
258 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
259 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
260 static bool arm_sched_can_speculate_insn (rtx_insn *);
261 static bool arm_macro_fusion_p (void);
262 static bool arm_cannot_copy_insn_p (rtx_insn *);
263 static int arm_issue_rate (void);
264 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
265 static int arm_first_cycle_multipass_dfa_lookahead (void);
266 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
267 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
268 static bool arm_output_addr_const_extra (FILE *, rtx);
269 static bool arm_allocate_stack_slots_for_args (void);
270 static bool arm_warn_func_return (tree);
271 static tree arm_promoted_type (const_tree t);
272 static bool arm_scalar_mode_supported_p (scalar_mode);
273 static bool arm_frame_pointer_required (void);
274 static bool arm_can_eliminate (const int, const int);
275 static void arm_asm_trampoline_template (FILE *);
276 static void arm_trampoline_init (rtx, tree, rtx);
277 static rtx arm_trampoline_adjust_address (rtx);
278 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
279 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
280 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
281 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
282 static bool arm_array_mode_supported_p (machine_mode,
283 unsigned HOST_WIDE_INT);
284 static machine_mode arm_preferred_simd_mode (scalar_mode);
285 static bool arm_class_likely_spilled_p (reg_class_t);
286 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
287 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
288 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
289 const_tree type,
290 int misalignment,
291 bool is_packed);
292 static void arm_conditional_register_usage (void);
293 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
294 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
295 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
296 static int arm_default_branch_cost (bool, bool);
297 static int arm_cortex_a5_branch_cost (bool, bool);
298 static int arm_cortex_m_branch_cost (bool, bool);
299 static int arm_cortex_m7_branch_cost (bool, bool);
300
301 static bool arm_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, rtx,
302 rtx, const vec_perm_indices &);
303
304 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
305
306 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
307 tree vectype,
308 int misalign ATTRIBUTE_UNUSED);
309
310 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
311 bool op0_preserve_value);
312 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
313
314 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
315 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
316 const_tree);
317 static section *arm_function_section (tree, enum node_frequency, bool, bool);
318 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
319 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
320 int reloc);
321 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
322 static opt_scalar_float_mode arm_floatn_mode (int, bool);
323 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
324 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
325 static bool arm_modes_tieable_p (machine_mode, machine_mode);
326 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
327 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
328 vec<machine_mode> &,
329 vec<const char *> &, vec<rtx> &,
330 HARD_REG_SET &, location_t);
331 static const char *arm_identify_fpu_from_isa (sbitmap);
332 \f
333 /* Table of machine attributes. */
334 static const struct attribute_spec arm_attribute_table[] =
335 {
336 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
337 affects_type_identity, handler, exclude } */
338 /* Function calls made to this symbol must be done indirectly, because
339 it may lie outside of the 26 bit addressing range of a normal function
340 call. */
341 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
342 /* Whereas these functions are always known to reside within the 26 bit
343 addressing range. */
344 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
345 /* Specify the procedure call conventions for a function. */
346 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
347 NULL },
348 /* Interrupt Service Routines have special prologue and epilogue requirements. */
349 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
350 NULL },
351 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
352 NULL },
353 { "naked", 0, 0, true, false, false, false,
354 arm_handle_fndecl_attribute, NULL },
355 #ifdef ARM_PE
356 /* ARM/PE has three new attributes:
357 interfacearm - ?
358 dllexport - for exporting a function/variable that will live in a dll
359 dllimport - for importing a function/variable from a dll
360
361 Microsoft allows multiple declspecs in one __declspec, separating
362 them with spaces. We do NOT support this. Instead, use __declspec
363 multiple times.
364 */
365 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
366 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
367 { "interfacearm", 0, 0, true, false, false, false,
368 arm_handle_fndecl_attribute, NULL },
369 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
370 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
371 NULL },
372 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
373 NULL },
374 { "notshared", 0, 0, false, true, false, false,
375 arm_handle_notshared_attribute, NULL },
376 #endif
377 /* ARMv8-M Security Extensions support. */
378 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
379 arm_handle_cmse_nonsecure_entry, NULL },
380 { "cmse_nonsecure_call", 0, 0, false, false, false, true,
381 arm_handle_cmse_nonsecure_call, NULL },
382 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL },
383 { NULL, 0, 0, false, false, false, false, NULL, NULL }
384 };
385 \f
386 /* Initialize the GCC target structure. */
387 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
388 #undef TARGET_MERGE_DECL_ATTRIBUTES
389 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
390 #endif
391
392 #undef TARGET_CHECK_BUILTIN_CALL
393 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
394
395 #undef TARGET_LEGITIMIZE_ADDRESS
396 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
397
398 #undef TARGET_ATTRIBUTE_TABLE
399 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
400
401 #undef TARGET_INSERT_ATTRIBUTES
402 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
403
404 #undef TARGET_ASM_FILE_START
405 #define TARGET_ASM_FILE_START arm_file_start
406 #undef TARGET_ASM_FILE_END
407 #define TARGET_ASM_FILE_END arm_file_end
408
409 #undef TARGET_ASM_ALIGNED_SI_OP
410 #define TARGET_ASM_ALIGNED_SI_OP NULL
411 #undef TARGET_ASM_INTEGER
412 #define TARGET_ASM_INTEGER arm_assemble_integer
413
414 #undef TARGET_PRINT_OPERAND
415 #define TARGET_PRINT_OPERAND arm_print_operand
416 #undef TARGET_PRINT_OPERAND_ADDRESS
417 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
418 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
419 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
420
421 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
422 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
423
424 #undef TARGET_ASM_FUNCTION_PROLOGUE
425 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
426
427 #undef TARGET_ASM_FUNCTION_EPILOGUE
428 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
429
430 #undef TARGET_CAN_INLINE_P
431 #define TARGET_CAN_INLINE_P arm_can_inline_p
432
433 #undef TARGET_RELAYOUT_FUNCTION
434 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
435
436 #undef TARGET_OPTION_OVERRIDE
437 #define TARGET_OPTION_OVERRIDE arm_option_override
438
439 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
440 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
441
442 #undef TARGET_OPTION_RESTORE
443 #define TARGET_OPTION_RESTORE arm_option_restore
444
445 #undef TARGET_OPTION_PRINT
446 #define TARGET_OPTION_PRINT arm_option_print
447
448 #undef TARGET_COMP_TYPE_ATTRIBUTES
449 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
450
451 #undef TARGET_SCHED_CAN_SPECULATE_INSN
452 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
453
454 #undef TARGET_SCHED_MACRO_FUSION_P
455 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
456
457 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
458 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
459
460 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
461 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
462
463 #undef TARGET_SCHED_ADJUST_COST
464 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
465
466 #undef TARGET_SET_CURRENT_FUNCTION
467 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
468
469 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
470 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
471
472 #undef TARGET_SCHED_REORDER
473 #define TARGET_SCHED_REORDER arm_sched_reorder
474
475 #undef TARGET_REGISTER_MOVE_COST
476 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
477
478 #undef TARGET_MEMORY_MOVE_COST
479 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
480
481 #undef TARGET_ENCODE_SECTION_INFO
482 #ifdef ARM_PE
483 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
484 #else
485 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
486 #endif
487
488 #undef TARGET_STRIP_NAME_ENCODING
489 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
490
491 #undef TARGET_ASM_INTERNAL_LABEL
492 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
493
494 #undef TARGET_FLOATN_MODE
495 #define TARGET_FLOATN_MODE arm_floatn_mode
496
497 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
498 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
499
500 #undef TARGET_FUNCTION_VALUE
501 #define TARGET_FUNCTION_VALUE arm_function_value
502
503 #undef TARGET_LIBCALL_VALUE
504 #define TARGET_LIBCALL_VALUE arm_libcall_value
505
506 #undef TARGET_FUNCTION_VALUE_REGNO_P
507 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
508
509 #undef TARGET_ASM_OUTPUT_MI_THUNK
510 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
511 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
512 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
513
514 #undef TARGET_RTX_COSTS
515 #define TARGET_RTX_COSTS arm_rtx_costs
516 #undef TARGET_ADDRESS_COST
517 #define TARGET_ADDRESS_COST arm_address_cost
518 #undef TARGET_INSN_COST
519 #define TARGET_INSN_COST arm_insn_cost
520
521 #undef TARGET_SHIFT_TRUNCATION_MASK
522 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
523 #undef TARGET_VECTOR_MODE_SUPPORTED_P
524 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
525 #undef TARGET_ARRAY_MODE_SUPPORTED_P
526 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
527 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
528 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
529 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
530 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
531 arm_autovectorize_vector_modes
532
533 #undef TARGET_MACHINE_DEPENDENT_REORG
534 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
535
536 #undef TARGET_INIT_BUILTINS
537 #define TARGET_INIT_BUILTINS arm_init_builtins
538 #undef TARGET_EXPAND_BUILTIN
539 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
540 #undef TARGET_BUILTIN_DECL
541 #define TARGET_BUILTIN_DECL arm_builtin_decl
542
543 #undef TARGET_INIT_LIBFUNCS
544 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
545
546 #undef TARGET_PROMOTE_FUNCTION_MODE
547 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
548 #undef TARGET_PROMOTE_PROTOTYPES
549 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
550 #undef TARGET_PASS_BY_REFERENCE
551 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
552 #undef TARGET_ARG_PARTIAL_BYTES
553 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
554 #undef TARGET_FUNCTION_ARG
555 #define TARGET_FUNCTION_ARG arm_function_arg
556 #undef TARGET_FUNCTION_ARG_ADVANCE
557 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
558 #undef TARGET_FUNCTION_ARG_PADDING
559 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
560 #undef TARGET_FUNCTION_ARG_BOUNDARY
561 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
562
563 #undef TARGET_SETUP_INCOMING_VARARGS
564 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
565
566 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
567 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
568
569 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
570 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
571 #undef TARGET_TRAMPOLINE_INIT
572 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
573 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
574 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
575
576 #undef TARGET_WARN_FUNC_RETURN
577 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
578
579 #undef TARGET_DEFAULT_SHORT_ENUMS
580 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
581
582 #undef TARGET_ALIGN_ANON_BITFIELD
583 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
584
585 #undef TARGET_NARROW_VOLATILE_BITFIELD
586 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
587
588 #undef TARGET_CXX_GUARD_TYPE
589 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
590
591 #undef TARGET_CXX_GUARD_MASK_BIT
592 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
593
594 #undef TARGET_CXX_GET_COOKIE_SIZE
595 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
596
597 #undef TARGET_CXX_COOKIE_HAS_SIZE
598 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
599
600 #undef TARGET_CXX_CDTOR_RETURNS_THIS
601 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
602
603 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
604 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
605
606 #undef TARGET_CXX_USE_AEABI_ATEXIT
607 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
608
609 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
610 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
611 arm_cxx_determine_class_data_visibility
612
613 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
614 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
615
616 #undef TARGET_RETURN_IN_MSB
617 #define TARGET_RETURN_IN_MSB arm_return_in_msb
618
619 #undef TARGET_RETURN_IN_MEMORY
620 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
621
622 #undef TARGET_MUST_PASS_IN_STACK
623 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
624
625 #if ARM_UNWIND_INFO
626 #undef TARGET_ASM_UNWIND_EMIT
627 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
628
629 /* EABI unwinding tables use a different format for the typeinfo tables. */
630 #undef TARGET_ASM_TTYPE
631 #define TARGET_ASM_TTYPE arm_output_ttype
632
633 #undef TARGET_ARM_EABI_UNWINDER
634 #define TARGET_ARM_EABI_UNWINDER true
635
636 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
637 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
638
639 #endif /* ARM_UNWIND_INFO */
640
641 #undef TARGET_ASM_INIT_SECTIONS
642 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
643
644 #undef TARGET_DWARF_REGISTER_SPAN
645 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
646
647 #undef TARGET_CANNOT_COPY_INSN_P
648 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
649
650 #ifdef HAVE_AS_TLS
651 #undef TARGET_HAVE_TLS
652 #define TARGET_HAVE_TLS true
653 #endif
654
655 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
656 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
657
658 #undef TARGET_LEGITIMATE_CONSTANT_P
659 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
660
661 #undef TARGET_CANNOT_FORCE_CONST_MEM
662 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
663
664 #undef TARGET_MAX_ANCHOR_OFFSET
665 #define TARGET_MAX_ANCHOR_OFFSET 4095
666
667 /* The minimum is set such that the total size of the block
668 for a particular anchor is -4088 + 1 + 4095 bytes, which is
669 divisible by eight, ensuring natural spacing of anchors. */
670 #undef TARGET_MIN_ANCHOR_OFFSET
671 #define TARGET_MIN_ANCHOR_OFFSET -4088
672
673 #undef TARGET_SCHED_ISSUE_RATE
674 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
675
676 #undef TARGET_SCHED_VARIABLE_ISSUE
677 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
678
679 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
680 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
681 arm_first_cycle_multipass_dfa_lookahead
682
683 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
684 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
685 arm_first_cycle_multipass_dfa_lookahead_guard
686
687 #undef TARGET_MANGLE_TYPE
688 #define TARGET_MANGLE_TYPE arm_mangle_type
689
690 #undef TARGET_INVALID_CONVERSION
691 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
692
693 #undef TARGET_INVALID_UNARY_OP
694 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
695
696 #undef TARGET_INVALID_BINARY_OP
697 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
698
699 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
700 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
701
702 #undef TARGET_BUILD_BUILTIN_VA_LIST
703 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
704 #undef TARGET_EXPAND_BUILTIN_VA_START
705 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
706 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
707 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
708
709 #ifdef HAVE_AS_TLS
710 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
711 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
712 #endif
713
714 #undef TARGET_LEGITIMATE_ADDRESS_P
715 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
716
717 #undef TARGET_PREFERRED_RELOAD_CLASS
718 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
719
720 #undef TARGET_PROMOTED_TYPE
721 #define TARGET_PROMOTED_TYPE arm_promoted_type
722
723 #undef TARGET_SCALAR_MODE_SUPPORTED_P
724 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
725
726 #undef TARGET_COMPUTE_FRAME_LAYOUT
727 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
728
729 #undef TARGET_FRAME_POINTER_REQUIRED
730 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
731
732 #undef TARGET_CAN_ELIMINATE
733 #define TARGET_CAN_ELIMINATE arm_can_eliminate
734
735 #undef TARGET_CONDITIONAL_REGISTER_USAGE
736 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
737
738 #undef TARGET_CLASS_LIKELY_SPILLED_P
739 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
740
741 #undef TARGET_VECTORIZE_BUILTINS
742 #define TARGET_VECTORIZE_BUILTINS
743
744 #undef TARGET_VECTOR_ALIGNMENT
745 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
746
747 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
748 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
749 arm_vector_alignment_reachable
750
751 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
752 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
753 arm_builtin_support_vector_misalignment
754
755 #undef TARGET_PREFERRED_RENAME_CLASS
756 #define TARGET_PREFERRED_RENAME_CLASS \
757 arm_preferred_rename_class
758
759 #undef TARGET_VECTORIZE_VEC_PERM_CONST
760 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
761
762 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
763 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
764 arm_builtin_vectorization_cost
765
766 #undef TARGET_CANONICALIZE_COMPARISON
767 #define TARGET_CANONICALIZE_COMPARISON \
768 arm_canonicalize_comparison
769
770 #undef TARGET_ASAN_SHADOW_OFFSET
771 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
772
773 #undef MAX_INSN_PER_IT_BLOCK
774 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
775
776 #undef TARGET_CAN_USE_DOLOOP_P
777 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
778
779 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
780 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
781
782 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
783 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
784
785 #undef TARGET_SCHED_FUSION_PRIORITY
786 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
787
788 #undef TARGET_ASM_FUNCTION_SECTION
789 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
790
791 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
792 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
793
794 #undef TARGET_SECTION_TYPE_FLAGS
795 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
796
797 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
798 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
799
800 #undef TARGET_C_EXCESS_PRECISION
801 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
802
803 /* Although the architecture reserves bits 0 and 1, only the former is
804 used for ARM/Thumb ISA selection in v7 and earlier versions. */
805 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
806 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
807
808 #undef TARGET_FIXED_CONDITION_CODE_REGS
809 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
810
811 #undef TARGET_HARD_REGNO_NREGS
812 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
813 #undef TARGET_HARD_REGNO_MODE_OK
814 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
815
816 #undef TARGET_MODES_TIEABLE_P
817 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
818
819 #undef TARGET_CAN_CHANGE_MODE_CLASS
820 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
821
822 #undef TARGET_CONSTANT_ALIGNMENT
823 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
824
825 #undef TARGET_INVALID_WITHIN_DOLOOP
826 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
827
828 #undef TARGET_MD_ASM_ADJUST
829 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
830
831 #undef TARGET_STACK_PROTECT_GUARD
832 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
833
834 #undef TARGET_VECTORIZE_GET_MASK_MODE
835 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
836 \f
837 /* Obstack for minipool constant handling. */
838 static struct obstack minipool_obstack;
839 static char * minipool_startobj;
840
841 /* The maximum number of insns skipped which
842 will be conditionalised if possible. */
843 static int max_insns_skipped = 5;
844
845 /* True if we are currently building a constant table. */
846 int making_const_table;
847
848 /* The processor for which instructions should be scheduled. */
849 enum processor_type arm_tune = TARGET_CPU_arm_none;
850
851 /* The current tuning set. */
852 const struct tune_params *current_tune;
853
854 /* Which floating point hardware to schedule for. */
855 int arm_fpu_attr;
856
857 /* Used for Thumb call_via trampolines. */
858 rtx thumb_call_via_label[14];
859 static int thumb_call_reg_needed;
860
861 /* The bits in this mask specify which instruction scheduling options should
862 be used. */
863 unsigned int tune_flags = 0;
864
865 /* The highest ARM architecture version supported by the
866 target. */
867 enum base_architecture arm_base_arch = BASE_ARCH_0;
868
869 /* Active target architecture and tuning. */
870
871 struct arm_build_target arm_active_target;
872
873 /* The following are used in the arm.md file as equivalents to bits
874 in the above two flag variables. */
875
876 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
877 int arm_arch4 = 0;
878
879 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
880 int arm_arch4t = 0;
881
882 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
883 int arm_arch5t = 0;
884
885 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
886 int arm_arch5te = 0;
887
888 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
889 int arm_arch6 = 0;
890
891 /* Nonzero if this chip supports the ARM 6K extensions. */
892 int arm_arch6k = 0;
893
894 /* Nonzero if this chip supports the ARM 6KZ extensions. */
895 int arm_arch6kz = 0;
896
897 /* Nonzero if instructions present in ARMv6-M can be used. */
898 int arm_arch6m = 0;
899
900 /* Nonzero if this chip supports the ARM 7 extensions. */
901 int arm_arch7 = 0;
902
903 /* Nonzero if this chip supports the Large Physical Address Extension. */
904 int arm_arch_lpae = 0;
905
906 /* Nonzero if instructions not present in the 'M' profile can be used. */
907 int arm_arch_notm = 0;
908
909 /* Nonzero if instructions present in ARMv7E-M can be used. */
910 int arm_arch7em = 0;
911
912 /* Nonzero if instructions present in ARMv8 can be used. */
913 int arm_arch8 = 0;
914
915 /* Nonzero if this chip supports the ARMv8.1 extensions. */
916 int arm_arch8_1 = 0;
917
918 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
919 int arm_arch8_2 = 0;
920
921 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
922 int arm_arch8_3 = 0;
923
924 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
925 int arm_arch8_4 = 0;
926
927 /* Nonzero if this chip supports the ARM Architecture 8-M Mainline
928 extensions. */
929 int arm_arch8m_main = 0;
930
931 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
932 extensions. */
933 int arm_arch8_1m_main = 0;
934
935 /* Nonzero if this chip supports the FP16 instructions extension of ARM
936 Architecture 8.2. */
937 int arm_fp16_inst = 0;
938
939 /* Nonzero if this chip can benefit from load scheduling. */
940 int arm_ld_sched = 0;
941
942 /* Nonzero if this chip is a StrongARM. */
943 int arm_tune_strongarm = 0;
944
945 /* Nonzero if this chip supports Intel Wireless MMX technology. */
946 int arm_arch_iwmmxt = 0;
947
948 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
949 int arm_arch_iwmmxt2 = 0;
950
951 /* Nonzero if this chip is an XScale. */
952 int arm_arch_xscale = 0;
953
954 /* Nonzero if tuning for XScale */
955 int arm_tune_xscale = 0;
956
957 /* Nonzero if we want to tune for stores that access the write-buffer.
958 This typically means an ARM6 or ARM7 with MMU or MPU. */
959 int arm_tune_wbuf = 0;
960
961 /* Nonzero if tuning for Cortex-A9. */
962 int arm_tune_cortex_a9 = 0;
963
964 /* Nonzero if we should define __THUMB_INTERWORK__ in the
965 preprocessor.
966 XXX This is a bit of a hack, it's intended to help work around
967 problems in GLD which doesn't understand that armv5t code is
968 interworking clean. */
969 int arm_cpp_interwork = 0;
970
971 /* Nonzero if chip supports Thumb 1. */
972 int arm_arch_thumb1;
973
974 /* Nonzero if chip supports Thumb 2. */
975 int arm_arch_thumb2;
976
977 /* Nonzero if chip supports integer division instruction. */
978 int arm_arch_arm_hwdiv;
979 int arm_arch_thumb_hwdiv;
980
981 /* Nonzero if chip disallows volatile memory access in IT block. */
982 int arm_arch_no_volatile_ce;
983
984 /* Nonzero if we shouldn't use literal pools. */
985 bool arm_disable_literal_pool = false;
986
987 /* The register number to be used for the PIC offset register. */
988 unsigned arm_pic_register = INVALID_REGNUM;
989
990 enum arm_pcs arm_pcs_default;
991
992 /* For an explanation of these variables, see final_prescan_insn below. */
993 int arm_ccfsm_state;
994 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
995 enum arm_cond_code arm_current_cc;
996
997 rtx arm_target_insn;
998 int arm_target_label;
999 /* The number of conditionally executed insns, including the current insn. */
1000 int arm_condexec_count = 0;
1001 /* A bitmask specifying the patterns for the IT block.
1002 Zero means do not output an IT block before this insn. */
1003 int arm_condexec_mask = 0;
1004 /* The number of bits used in arm_condexec_mask. */
1005 int arm_condexec_masklen = 0;
1006
1007 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1008 int arm_arch_crc = 0;
1009
1010 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1011 int arm_arch_dotprod = 0;
1012
1013 /* Nonzero if chip supports the ARMv8-M security extensions. */
1014 int arm_arch_cmse = 0;
1015
1016 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1017 int arm_m_profile_small_mul = 0;
1018
1019 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1020 int arm_arch_i8mm = 0;
1021
1022 /* Nonzero if chip supports the BFloat16 instructions. */
1023 int arm_arch_bf16 = 0;
1024
1025 /* Nonzero if chip supports the Custom Datapath Extension. */
1026 int arm_arch_cde = 0;
1027 int arm_arch_cde_coproc = 0;
1028 const int arm_arch_cde_coproc_bits[] = {
1029 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1030 };
1031
1032 /* The condition codes of the ARM, and the inverse function. */
1033 static const char * const arm_condition_codes[] =
1034 {
1035 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1036 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1037 };
1038
1039 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1040 int arm_regs_in_sequence[] =
1041 {
1042 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1043 };
1044
1045 #define DEF_FP_SYSREG(reg) #reg,
1046 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1047 FP_SYSREGS
1048 };
1049 #undef DEF_FP_SYSREG
1050
1051 #define ARM_LSL_NAME "lsl"
1052 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1053
1054 #define THUMB2_WORK_REGS \
1055 (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1056 | (1 << SP_REGNUM) \
1057 | (1 << PC_REGNUM) \
1058 | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM \
1059 ? (1 << PIC_OFFSET_TABLE_REGNUM) \
1060 : 0)))
1061 \f
1062 /* Initialization code. */
1063
1064 struct cpu_tune
1065 {
1066 enum processor_type scheduler;
1067 unsigned int tune_flags;
1068 const struct tune_params *tune;
1069 };
1070
1071 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1072 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1073 { \
1074 num_slots, \
1075 l1_size, \
1076 l1_line_size \
1077 }
1078
1079 /* arm generic vectorizer costs. */
1080 static const
1081 struct cpu_vec_costs arm_default_vec_cost = {
1082 1, /* scalar_stmt_cost. */
1083 1, /* scalar load_cost. */
1084 1, /* scalar_store_cost. */
1085 1, /* vec_stmt_cost. */
1086 1, /* vec_to_scalar_cost. */
1087 1, /* scalar_to_vec_cost. */
1088 1, /* vec_align_load_cost. */
1089 1, /* vec_unalign_load_cost. */
1090 1, /* vec_unalign_store_cost. */
1091 1, /* vec_store_cost. */
1092 3, /* cond_taken_branch_cost. */
1093 1, /* cond_not_taken_branch_cost. */
1094 };
1095
1096 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1097 #include "aarch-cost-tables.h"
1098
1099
1100
1101 const struct cpu_cost_table cortexa9_extra_costs =
1102 {
1103 /* ALU */
1104 {
1105 0, /* arith. */
1106 0, /* logical. */
1107 0, /* shift. */
1108 COSTS_N_INSNS (1), /* shift_reg. */
1109 COSTS_N_INSNS (1), /* arith_shift. */
1110 COSTS_N_INSNS (2), /* arith_shift_reg. */
1111 0, /* log_shift. */
1112 COSTS_N_INSNS (1), /* log_shift_reg. */
1113 COSTS_N_INSNS (1), /* extend. */
1114 COSTS_N_INSNS (2), /* extend_arith. */
1115 COSTS_N_INSNS (1), /* bfi. */
1116 COSTS_N_INSNS (1), /* bfx. */
1117 0, /* clz. */
1118 0, /* rev. */
1119 0, /* non_exec. */
1120 true /* non_exec_costs_exec. */
1121 },
1122 {
1123 /* MULT SImode */
1124 {
1125 COSTS_N_INSNS (3), /* simple. */
1126 COSTS_N_INSNS (3), /* flag_setting. */
1127 COSTS_N_INSNS (2), /* extend. */
1128 COSTS_N_INSNS (3), /* add. */
1129 COSTS_N_INSNS (2), /* extend_add. */
1130 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1131 },
1132 /* MULT DImode */
1133 {
1134 0, /* simple (N/A). */
1135 0, /* flag_setting (N/A). */
1136 COSTS_N_INSNS (4), /* extend. */
1137 0, /* add (N/A). */
1138 COSTS_N_INSNS (4), /* extend_add. */
1139 0 /* idiv (N/A). */
1140 }
1141 },
1142 /* LD/ST */
1143 {
1144 COSTS_N_INSNS (2), /* load. */
1145 COSTS_N_INSNS (2), /* load_sign_extend. */
1146 COSTS_N_INSNS (2), /* ldrd. */
1147 COSTS_N_INSNS (2), /* ldm_1st. */
1148 1, /* ldm_regs_per_insn_1st. */
1149 2, /* ldm_regs_per_insn_subsequent. */
1150 COSTS_N_INSNS (5), /* loadf. */
1151 COSTS_N_INSNS (5), /* loadd. */
1152 COSTS_N_INSNS (1), /* load_unaligned. */
1153 COSTS_N_INSNS (2), /* store. */
1154 COSTS_N_INSNS (2), /* strd. */
1155 COSTS_N_INSNS (2), /* stm_1st. */
1156 1, /* stm_regs_per_insn_1st. */
1157 2, /* stm_regs_per_insn_subsequent. */
1158 COSTS_N_INSNS (1), /* storef. */
1159 COSTS_N_INSNS (1), /* stored. */
1160 COSTS_N_INSNS (1), /* store_unaligned. */
1161 COSTS_N_INSNS (1), /* loadv. */
1162 COSTS_N_INSNS (1) /* storev. */
1163 },
1164 {
1165 /* FP SFmode */
1166 {
1167 COSTS_N_INSNS (14), /* div. */
1168 COSTS_N_INSNS (4), /* mult. */
1169 COSTS_N_INSNS (7), /* mult_addsub. */
1170 COSTS_N_INSNS (30), /* fma. */
1171 COSTS_N_INSNS (3), /* addsub. */
1172 COSTS_N_INSNS (1), /* fpconst. */
1173 COSTS_N_INSNS (1), /* neg. */
1174 COSTS_N_INSNS (3), /* compare. */
1175 COSTS_N_INSNS (3), /* widen. */
1176 COSTS_N_INSNS (3), /* narrow. */
1177 COSTS_N_INSNS (3), /* toint. */
1178 COSTS_N_INSNS (3), /* fromint. */
1179 COSTS_N_INSNS (3) /* roundint. */
1180 },
1181 /* FP DFmode */
1182 {
1183 COSTS_N_INSNS (24), /* div. */
1184 COSTS_N_INSNS (5), /* mult. */
1185 COSTS_N_INSNS (8), /* mult_addsub. */
1186 COSTS_N_INSNS (30), /* fma. */
1187 COSTS_N_INSNS (3), /* addsub. */
1188 COSTS_N_INSNS (1), /* fpconst. */
1189 COSTS_N_INSNS (1), /* neg. */
1190 COSTS_N_INSNS (3), /* compare. */
1191 COSTS_N_INSNS (3), /* widen. */
1192 COSTS_N_INSNS (3), /* narrow. */
1193 COSTS_N_INSNS (3), /* toint. */
1194 COSTS_N_INSNS (3), /* fromint. */
1195 COSTS_N_INSNS (3) /* roundint. */
1196 }
1197 },
1198 /* Vector */
1199 {
1200 COSTS_N_INSNS (1), /* alu. */
1201 COSTS_N_INSNS (4), /* mult. */
1202 COSTS_N_INSNS (1), /* movi. */
1203 COSTS_N_INSNS (2), /* dup. */
1204 COSTS_N_INSNS (2) /* extract. */
1205 }
1206 };
1207
1208 const struct cpu_cost_table cortexa8_extra_costs =
1209 {
1210 /* ALU */
1211 {
1212 0, /* arith. */
1213 0, /* logical. */
1214 COSTS_N_INSNS (1), /* shift. */
1215 0, /* shift_reg. */
1216 COSTS_N_INSNS (1), /* arith_shift. */
1217 0, /* arith_shift_reg. */
1218 COSTS_N_INSNS (1), /* log_shift. */
1219 0, /* log_shift_reg. */
1220 0, /* extend. */
1221 0, /* extend_arith. */
1222 0, /* bfi. */
1223 0, /* bfx. */
1224 0, /* clz. */
1225 0, /* rev. */
1226 0, /* non_exec. */
1227 true /* non_exec_costs_exec. */
1228 },
1229 {
1230 /* MULT SImode */
1231 {
1232 COSTS_N_INSNS (1), /* simple. */
1233 COSTS_N_INSNS (1), /* flag_setting. */
1234 COSTS_N_INSNS (1), /* extend. */
1235 COSTS_N_INSNS (1), /* add. */
1236 COSTS_N_INSNS (1), /* extend_add. */
1237 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1238 },
1239 /* MULT DImode */
1240 {
1241 0, /* simple (N/A). */
1242 0, /* flag_setting (N/A). */
1243 COSTS_N_INSNS (2), /* extend. */
1244 0, /* add (N/A). */
1245 COSTS_N_INSNS (2), /* extend_add. */
1246 0 /* idiv (N/A). */
1247 }
1248 },
1249 /* LD/ST */
1250 {
1251 COSTS_N_INSNS (1), /* load. */
1252 COSTS_N_INSNS (1), /* load_sign_extend. */
1253 COSTS_N_INSNS (1), /* ldrd. */
1254 COSTS_N_INSNS (1), /* ldm_1st. */
1255 1, /* ldm_regs_per_insn_1st. */
1256 2, /* ldm_regs_per_insn_subsequent. */
1257 COSTS_N_INSNS (1), /* loadf. */
1258 COSTS_N_INSNS (1), /* loadd. */
1259 COSTS_N_INSNS (1), /* load_unaligned. */
1260 COSTS_N_INSNS (1), /* store. */
1261 COSTS_N_INSNS (1), /* strd. */
1262 COSTS_N_INSNS (1), /* stm_1st. */
1263 1, /* stm_regs_per_insn_1st. */
1264 2, /* stm_regs_per_insn_subsequent. */
1265 COSTS_N_INSNS (1), /* storef. */
1266 COSTS_N_INSNS (1), /* stored. */
1267 COSTS_N_INSNS (1), /* store_unaligned. */
1268 COSTS_N_INSNS (1), /* loadv. */
1269 COSTS_N_INSNS (1) /* storev. */
1270 },
1271 {
1272 /* FP SFmode */
1273 {
1274 COSTS_N_INSNS (36), /* div. */
1275 COSTS_N_INSNS (11), /* mult. */
1276 COSTS_N_INSNS (20), /* mult_addsub. */
1277 COSTS_N_INSNS (30), /* fma. */
1278 COSTS_N_INSNS (9), /* addsub. */
1279 COSTS_N_INSNS (3), /* fpconst. */
1280 COSTS_N_INSNS (3), /* neg. */
1281 COSTS_N_INSNS (6), /* compare. */
1282 COSTS_N_INSNS (4), /* widen. */
1283 COSTS_N_INSNS (4), /* narrow. */
1284 COSTS_N_INSNS (8), /* toint. */
1285 COSTS_N_INSNS (8), /* fromint. */
1286 COSTS_N_INSNS (8) /* roundint. */
1287 },
1288 /* FP DFmode */
1289 {
1290 COSTS_N_INSNS (64), /* div. */
1291 COSTS_N_INSNS (16), /* mult. */
1292 COSTS_N_INSNS (25), /* mult_addsub. */
1293 COSTS_N_INSNS (30), /* fma. */
1294 COSTS_N_INSNS (9), /* addsub. */
1295 COSTS_N_INSNS (3), /* fpconst. */
1296 COSTS_N_INSNS (3), /* neg. */
1297 COSTS_N_INSNS (6), /* compare. */
1298 COSTS_N_INSNS (6), /* widen. */
1299 COSTS_N_INSNS (6), /* narrow. */
1300 COSTS_N_INSNS (8), /* toint. */
1301 COSTS_N_INSNS (8), /* fromint. */
1302 COSTS_N_INSNS (8) /* roundint. */
1303 }
1304 },
1305 /* Vector */
1306 {
1307 COSTS_N_INSNS (1), /* alu. */
1308 COSTS_N_INSNS (4), /* mult. */
1309 COSTS_N_INSNS (1), /* movi. */
1310 COSTS_N_INSNS (2), /* dup. */
1311 COSTS_N_INSNS (2) /* extract. */
1312 }
1313 };
1314
1315 const struct cpu_cost_table cortexa5_extra_costs =
1316 {
1317 /* ALU */
1318 {
1319 0, /* arith. */
1320 0, /* logical. */
1321 COSTS_N_INSNS (1), /* shift. */
1322 COSTS_N_INSNS (1), /* shift_reg. */
1323 COSTS_N_INSNS (1), /* arith_shift. */
1324 COSTS_N_INSNS (1), /* arith_shift_reg. */
1325 COSTS_N_INSNS (1), /* log_shift. */
1326 COSTS_N_INSNS (1), /* log_shift_reg. */
1327 COSTS_N_INSNS (1), /* extend. */
1328 COSTS_N_INSNS (1), /* extend_arith. */
1329 COSTS_N_INSNS (1), /* bfi. */
1330 COSTS_N_INSNS (1), /* bfx. */
1331 COSTS_N_INSNS (1), /* clz. */
1332 COSTS_N_INSNS (1), /* rev. */
1333 0, /* non_exec. */
1334 true /* non_exec_costs_exec. */
1335 },
1336
1337 {
1338 /* MULT SImode */
1339 {
1340 0, /* simple. */
1341 COSTS_N_INSNS (1), /* flag_setting. */
1342 COSTS_N_INSNS (1), /* extend. */
1343 COSTS_N_INSNS (1), /* add. */
1344 COSTS_N_INSNS (1), /* extend_add. */
1345 COSTS_N_INSNS (7) /* idiv. */
1346 },
1347 /* MULT DImode */
1348 {
1349 0, /* simple (N/A). */
1350 0, /* flag_setting (N/A). */
1351 COSTS_N_INSNS (1), /* extend. */
1352 0, /* add. */
1353 COSTS_N_INSNS (2), /* extend_add. */
1354 0 /* idiv (N/A). */
1355 }
1356 },
1357 /* LD/ST */
1358 {
1359 COSTS_N_INSNS (1), /* load. */
1360 COSTS_N_INSNS (1), /* load_sign_extend. */
1361 COSTS_N_INSNS (6), /* ldrd. */
1362 COSTS_N_INSNS (1), /* ldm_1st. */
1363 1, /* ldm_regs_per_insn_1st. */
1364 2, /* ldm_regs_per_insn_subsequent. */
1365 COSTS_N_INSNS (2), /* loadf. */
1366 COSTS_N_INSNS (4), /* loadd. */
1367 COSTS_N_INSNS (1), /* load_unaligned. */
1368 COSTS_N_INSNS (1), /* store. */
1369 COSTS_N_INSNS (3), /* strd. */
1370 COSTS_N_INSNS (1), /* stm_1st. */
1371 1, /* stm_regs_per_insn_1st. */
1372 2, /* stm_regs_per_insn_subsequent. */
1373 COSTS_N_INSNS (2), /* storef. */
1374 COSTS_N_INSNS (2), /* stored. */
1375 COSTS_N_INSNS (1), /* store_unaligned. */
1376 COSTS_N_INSNS (1), /* loadv. */
1377 COSTS_N_INSNS (1) /* storev. */
1378 },
1379 {
1380 /* FP SFmode */
1381 {
1382 COSTS_N_INSNS (15), /* div. */
1383 COSTS_N_INSNS (3), /* mult. */
1384 COSTS_N_INSNS (7), /* mult_addsub. */
1385 COSTS_N_INSNS (7), /* fma. */
1386 COSTS_N_INSNS (3), /* addsub. */
1387 COSTS_N_INSNS (3), /* fpconst. */
1388 COSTS_N_INSNS (3), /* neg. */
1389 COSTS_N_INSNS (3), /* compare. */
1390 COSTS_N_INSNS (3), /* widen. */
1391 COSTS_N_INSNS (3), /* narrow. */
1392 COSTS_N_INSNS (3), /* toint. */
1393 COSTS_N_INSNS (3), /* fromint. */
1394 COSTS_N_INSNS (3) /* roundint. */
1395 },
1396 /* FP DFmode */
1397 {
1398 COSTS_N_INSNS (30), /* div. */
1399 COSTS_N_INSNS (6), /* mult. */
1400 COSTS_N_INSNS (10), /* mult_addsub. */
1401 COSTS_N_INSNS (7), /* fma. */
1402 COSTS_N_INSNS (3), /* addsub. */
1403 COSTS_N_INSNS (3), /* fpconst. */
1404 COSTS_N_INSNS (3), /* neg. */
1405 COSTS_N_INSNS (3), /* compare. */
1406 COSTS_N_INSNS (3), /* widen. */
1407 COSTS_N_INSNS (3), /* narrow. */
1408 COSTS_N_INSNS (3), /* toint. */
1409 COSTS_N_INSNS (3), /* fromint. */
1410 COSTS_N_INSNS (3) /* roundint. */
1411 }
1412 },
1413 /* Vector */
1414 {
1415 COSTS_N_INSNS (1), /* alu. */
1416 COSTS_N_INSNS (4), /* mult. */
1417 COSTS_N_INSNS (1), /* movi. */
1418 COSTS_N_INSNS (2), /* dup. */
1419 COSTS_N_INSNS (2) /* extract. */
1420 }
1421 };
1422
1423
1424 const struct cpu_cost_table cortexa7_extra_costs =
1425 {
1426 /* ALU */
1427 {
1428 0, /* arith. */
1429 0, /* logical. */
1430 COSTS_N_INSNS (1), /* shift. */
1431 COSTS_N_INSNS (1), /* shift_reg. */
1432 COSTS_N_INSNS (1), /* arith_shift. */
1433 COSTS_N_INSNS (1), /* arith_shift_reg. */
1434 COSTS_N_INSNS (1), /* log_shift. */
1435 COSTS_N_INSNS (1), /* log_shift_reg. */
1436 COSTS_N_INSNS (1), /* extend. */
1437 COSTS_N_INSNS (1), /* extend_arith. */
1438 COSTS_N_INSNS (1), /* bfi. */
1439 COSTS_N_INSNS (1), /* bfx. */
1440 COSTS_N_INSNS (1), /* clz. */
1441 COSTS_N_INSNS (1), /* rev. */
1442 0, /* non_exec. */
1443 true /* non_exec_costs_exec. */
1444 },
1445
1446 {
1447 /* MULT SImode */
1448 {
1449 0, /* simple. */
1450 COSTS_N_INSNS (1), /* flag_setting. */
1451 COSTS_N_INSNS (1), /* extend. */
1452 COSTS_N_INSNS (1), /* add. */
1453 COSTS_N_INSNS (1), /* extend_add. */
1454 COSTS_N_INSNS (7) /* idiv. */
1455 },
1456 /* MULT DImode */
1457 {
1458 0, /* simple (N/A). */
1459 0, /* flag_setting (N/A). */
1460 COSTS_N_INSNS (1), /* extend. */
1461 0, /* add. */
1462 COSTS_N_INSNS (2), /* extend_add. */
1463 0 /* idiv (N/A). */
1464 }
1465 },
1466 /* LD/ST */
1467 {
1468 COSTS_N_INSNS (1), /* load. */
1469 COSTS_N_INSNS (1), /* load_sign_extend. */
1470 COSTS_N_INSNS (3), /* ldrd. */
1471 COSTS_N_INSNS (1), /* ldm_1st. */
1472 1, /* ldm_regs_per_insn_1st. */
1473 2, /* ldm_regs_per_insn_subsequent. */
1474 COSTS_N_INSNS (2), /* loadf. */
1475 COSTS_N_INSNS (2), /* loadd. */
1476 COSTS_N_INSNS (1), /* load_unaligned. */
1477 COSTS_N_INSNS (1), /* store. */
1478 COSTS_N_INSNS (3), /* strd. */
1479 COSTS_N_INSNS (1), /* stm_1st. */
1480 1, /* stm_regs_per_insn_1st. */
1481 2, /* stm_regs_per_insn_subsequent. */
1482 COSTS_N_INSNS (2), /* storef. */
1483 COSTS_N_INSNS (2), /* stored. */
1484 COSTS_N_INSNS (1), /* store_unaligned. */
1485 COSTS_N_INSNS (1), /* loadv. */
1486 COSTS_N_INSNS (1) /* storev. */
1487 },
1488 {
1489 /* FP SFmode */
1490 {
1491 COSTS_N_INSNS (15), /* div. */
1492 COSTS_N_INSNS (3), /* mult. */
1493 COSTS_N_INSNS (7), /* mult_addsub. */
1494 COSTS_N_INSNS (7), /* fma. */
1495 COSTS_N_INSNS (3), /* addsub. */
1496 COSTS_N_INSNS (3), /* fpconst. */
1497 COSTS_N_INSNS (3), /* neg. */
1498 COSTS_N_INSNS (3), /* compare. */
1499 COSTS_N_INSNS (3), /* widen. */
1500 COSTS_N_INSNS (3), /* narrow. */
1501 COSTS_N_INSNS (3), /* toint. */
1502 COSTS_N_INSNS (3), /* fromint. */
1503 COSTS_N_INSNS (3) /* roundint. */
1504 },
1505 /* FP DFmode */
1506 {
1507 COSTS_N_INSNS (30), /* div. */
1508 COSTS_N_INSNS (6), /* mult. */
1509 COSTS_N_INSNS (10), /* mult_addsub. */
1510 COSTS_N_INSNS (7), /* fma. */
1511 COSTS_N_INSNS (3), /* addsub. */
1512 COSTS_N_INSNS (3), /* fpconst. */
1513 COSTS_N_INSNS (3), /* neg. */
1514 COSTS_N_INSNS (3), /* compare. */
1515 COSTS_N_INSNS (3), /* widen. */
1516 COSTS_N_INSNS (3), /* narrow. */
1517 COSTS_N_INSNS (3), /* toint. */
1518 COSTS_N_INSNS (3), /* fromint. */
1519 COSTS_N_INSNS (3) /* roundint. */
1520 }
1521 },
1522 /* Vector */
1523 {
1524 COSTS_N_INSNS (1), /* alu. */
1525 COSTS_N_INSNS (4), /* mult. */
1526 COSTS_N_INSNS (1), /* movi. */
1527 COSTS_N_INSNS (2), /* dup. */
1528 COSTS_N_INSNS (2) /* extract. */
1529 }
1530 };
1531
1532 const struct cpu_cost_table cortexa12_extra_costs =
1533 {
1534 /* ALU */
1535 {
1536 0, /* arith. */
1537 0, /* logical. */
1538 0, /* shift. */
1539 COSTS_N_INSNS (1), /* shift_reg. */
1540 COSTS_N_INSNS (1), /* arith_shift. */
1541 COSTS_N_INSNS (1), /* arith_shift_reg. */
1542 COSTS_N_INSNS (1), /* log_shift. */
1543 COSTS_N_INSNS (1), /* log_shift_reg. */
1544 0, /* extend. */
1545 COSTS_N_INSNS (1), /* extend_arith. */
1546 0, /* bfi. */
1547 COSTS_N_INSNS (1), /* bfx. */
1548 COSTS_N_INSNS (1), /* clz. */
1549 COSTS_N_INSNS (1), /* rev. */
1550 0, /* non_exec. */
1551 true /* non_exec_costs_exec. */
1552 },
1553 /* MULT SImode */
1554 {
1555 {
1556 COSTS_N_INSNS (2), /* simple. */
1557 COSTS_N_INSNS (3), /* flag_setting. */
1558 COSTS_N_INSNS (2), /* extend. */
1559 COSTS_N_INSNS (3), /* add. */
1560 COSTS_N_INSNS (2), /* extend_add. */
1561 COSTS_N_INSNS (18) /* idiv. */
1562 },
1563 /* MULT DImode */
1564 {
1565 0, /* simple (N/A). */
1566 0, /* flag_setting (N/A). */
1567 COSTS_N_INSNS (3), /* extend. */
1568 0, /* add (N/A). */
1569 COSTS_N_INSNS (3), /* extend_add. */
1570 0 /* idiv (N/A). */
1571 }
1572 },
1573 /* LD/ST */
1574 {
1575 COSTS_N_INSNS (3), /* load. */
1576 COSTS_N_INSNS (3), /* load_sign_extend. */
1577 COSTS_N_INSNS (3), /* ldrd. */
1578 COSTS_N_INSNS (3), /* ldm_1st. */
1579 1, /* ldm_regs_per_insn_1st. */
1580 2, /* ldm_regs_per_insn_subsequent. */
1581 COSTS_N_INSNS (3), /* loadf. */
1582 COSTS_N_INSNS (3), /* loadd. */
1583 0, /* load_unaligned. */
1584 0, /* store. */
1585 0, /* strd. */
1586 0, /* stm_1st. */
1587 1, /* stm_regs_per_insn_1st. */
1588 2, /* stm_regs_per_insn_subsequent. */
1589 COSTS_N_INSNS (2), /* storef. */
1590 COSTS_N_INSNS (2), /* stored. */
1591 0, /* store_unaligned. */
1592 COSTS_N_INSNS (1), /* loadv. */
1593 COSTS_N_INSNS (1) /* storev. */
1594 },
1595 {
1596 /* FP SFmode */
1597 {
1598 COSTS_N_INSNS (17), /* div. */
1599 COSTS_N_INSNS (4), /* mult. */
1600 COSTS_N_INSNS (8), /* mult_addsub. */
1601 COSTS_N_INSNS (8), /* fma. */
1602 COSTS_N_INSNS (4), /* addsub. */
1603 COSTS_N_INSNS (2), /* fpconst. */
1604 COSTS_N_INSNS (2), /* neg. */
1605 COSTS_N_INSNS (2), /* compare. */
1606 COSTS_N_INSNS (4), /* widen. */
1607 COSTS_N_INSNS (4), /* narrow. */
1608 COSTS_N_INSNS (4), /* toint. */
1609 COSTS_N_INSNS (4), /* fromint. */
1610 COSTS_N_INSNS (4) /* roundint. */
1611 },
1612 /* FP DFmode */
1613 {
1614 COSTS_N_INSNS (31), /* div. */
1615 COSTS_N_INSNS (4), /* mult. */
1616 COSTS_N_INSNS (8), /* mult_addsub. */
1617 COSTS_N_INSNS (8), /* fma. */
1618 COSTS_N_INSNS (4), /* addsub. */
1619 COSTS_N_INSNS (2), /* fpconst. */
1620 COSTS_N_INSNS (2), /* neg. */
1621 COSTS_N_INSNS (2), /* compare. */
1622 COSTS_N_INSNS (4), /* widen. */
1623 COSTS_N_INSNS (4), /* narrow. */
1624 COSTS_N_INSNS (4), /* toint. */
1625 COSTS_N_INSNS (4), /* fromint. */
1626 COSTS_N_INSNS (4) /* roundint. */
1627 }
1628 },
1629 /* Vector */
1630 {
1631 COSTS_N_INSNS (1), /* alu. */
1632 COSTS_N_INSNS (4), /* mult. */
1633 COSTS_N_INSNS (1), /* movi. */
1634 COSTS_N_INSNS (2), /* dup. */
1635 COSTS_N_INSNS (2) /* extract. */
1636 }
1637 };
1638
1639 const struct cpu_cost_table cortexa15_extra_costs =
1640 {
1641 /* ALU */
1642 {
1643 0, /* arith. */
1644 0, /* logical. */
1645 0, /* shift. */
1646 0, /* shift_reg. */
1647 COSTS_N_INSNS (1), /* arith_shift. */
1648 COSTS_N_INSNS (1), /* arith_shift_reg. */
1649 COSTS_N_INSNS (1), /* log_shift. */
1650 COSTS_N_INSNS (1), /* log_shift_reg. */
1651 0, /* extend. */
1652 COSTS_N_INSNS (1), /* extend_arith. */
1653 COSTS_N_INSNS (1), /* bfi. */
1654 0, /* bfx. */
1655 0, /* clz. */
1656 0, /* rev. */
1657 0, /* non_exec. */
1658 true /* non_exec_costs_exec. */
1659 },
1660 /* MULT SImode */
1661 {
1662 {
1663 COSTS_N_INSNS (2), /* simple. */
1664 COSTS_N_INSNS (3), /* flag_setting. */
1665 COSTS_N_INSNS (2), /* extend. */
1666 COSTS_N_INSNS (2), /* add. */
1667 COSTS_N_INSNS (2), /* extend_add. */
1668 COSTS_N_INSNS (18) /* idiv. */
1669 },
1670 /* MULT DImode */
1671 {
1672 0, /* simple (N/A). */
1673 0, /* flag_setting (N/A). */
1674 COSTS_N_INSNS (3), /* extend. */
1675 0, /* add (N/A). */
1676 COSTS_N_INSNS (3), /* extend_add. */
1677 0 /* idiv (N/A). */
1678 }
1679 },
1680 /* LD/ST */
1681 {
1682 COSTS_N_INSNS (3), /* load. */
1683 COSTS_N_INSNS (3), /* load_sign_extend. */
1684 COSTS_N_INSNS (3), /* ldrd. */
1685 COSTS_N_INSNS (4), /* ldm_1st. */
1686 1, /* ldm_regs_per_insn_1st. */
1687 2, /* ldm_regs_per_insn_subsequent. */
1688 COSTS_N_INSNS (4), /* loadf. */
1689 COSTS_N_INSNS (4), /* loadd. */
1690 0, /* load_unaligned. */
1691 0, /* store. */
1692 0, /* strd. */
1693 COSTS_N_INSNS (1), /* stm_1st. */
1694 1, /* stm_regs_per_insn_1st. */
1695 2, /* stm_regs_per_insn_subsequent. */
1696 0, /* storef. */
1697 0, /* stored. */
1698 0, /* store_unaligned. */
1699 COSTS_N_INSNS (1), /* loadv. */
1700 COSTS_N_INSNS (1) /* storev. */
1701 },
1702 {
1703 /* FP SFmode */
1704 {
1705 COSTS_N_INSNS (17), /* div. */
1706 COSTS_N_INSNS (4), /* mult. */
1707 COSTS_N_INSNS (8), /* mult_addsub. */
1708 COSTS_N_INSNS (8), /* fma. */
1709 COSTS_N_INSNS (4), /* addsub. */
1710 COSTS_N_INSNS (2), /* fpconst. */
1711 COSTS_N_INSNS (2), /* neg. */
1712 COSTS_N_INSNS (5), /* compare. */
1713 COSTS_N_INSNS (4), /* widen. */
1714 COSTS_N_INSNS (4), /* narrow. */
1715 COSTS_N_INSNS (4), /* toint. */
1716 COSTS_N_INSNS (4), /* fromint. */
1717 COSTS_N_INSNS (4) /* roundint. */
1718 },
1719 /* FP DFmode */
1720 {
1721 COSTS_N_INSNS (31), /* div. */
1722 COSTS_N_INSNS (4), /* mult. */
1723 COSTS_N_INSNS (8), /* mult_addsub. */
1724 COSTS_N_INSNS (8), /* fma. */
1725 COSTS_N_INSNS (4), /* addsub. */
1726 COSTS_N_INSNS (2), /* fpconst. */
1727 COSTS_N_INSNS (2), /* neg. */
1728 COSTS_N_INSNS (2), /* compare. */
1729 COSTS_N_INSNS (4), /* widen. */
1730 COSTS_N_INSNS (4), /* narrow. */
1731 COSTS_N_INSNS (4), /* toint. */
1732 COSTS_N_INSNS (4), /* fromint. */
1733 COSTS_N_INSNS (4) /* roundint. */
1734 }
1735 },
1736 /* Vector */
1737 {
1738 COSTS_N_INSNS (1), /* alu. */
1739 COSTS_N_INSNS (4), /* mult. */
1740 COSTS_N_INSNS (1), /* movi. */
1741 COSTS_N_INSNS (2), /* dup. */
1742 COSTS_N_INSNS (2) /* extract. */
1743 }
1744 };
1745
1746 const struct cpu_cost_table v7m_extra_costs =
1747 {
1748 /* ALU */
1749 {
1750 0, /* arith. */
1751 0, /* logical. */
1752 0, /* shift. */
1753 0, /* shift_reg. */
1754 0, /* arith_shift. */
1755 COSTS_N_INSNS (1), /* arith_shift_reg. */
1756 0, /* log_shift. */
1757 COSTS_N_INSNS (1), /* log_shift_reg. */
1758 0, /* extend. */
1759 COSTS_N_INSNS (1), /* extend_arith. */
1760 0, /* bfi. */
1761 0, /* bfx. */
1762 0, /* clz. */
1763 0, /* rev. */
1764 COSTS_N_INSNS (1), /* non_exec. */
1765 false /* non_exec_costs_exec. */
1766 },
1767 {
1768 /* MULT SImode */
1769 {
1770 COSTS_N_INSNS (1), /* simple. */
1771 COSTS_N_INSNS (1), /* flag_setting. */
1772 COSTS_N_INSNS (2), /* extend. */
1773 COSTS_N_INSNS (1), /* add. */
1774 COSTS_N_INSNS (3), /* extend_add. */
1775 COSTS_N_INSNS (8) /* idiv. */
1776 },
1777 /* MULT DImode */
1778 {
1779 0, /* simple (N/A). */
1780 0, /* flag_setting (N/A). */
1781 COSTS_N_INSNS (2), /* extend. */
1782 0, /* add (N/A). */
1783 COSTS_N_INSNS (3), /* extend_add. */
1784 0 /* idiv (N/A). */
1785 }
1786 },
1787 /* LD/ST */
1788 {
1789 COSTS_N_INSNS (2), /* load. */
1790 0, /* load_sign_extend. */
1791 COSTS_N_INSNS (3), /* ldrd. */
1792 COSTS_N_INSNS (2), /* ldm_1st. */
1793 1, /* ldm_regs_per_insn_1st. */
1794 1, /* ldm_regs_per_insn_subsequent. */
1795 COSTS_N_INSNS (2), /* loadf. */
1796 COSTS_N_INSNS (3), /* loadd. */
1797 COSTS_N_INSNS (1), /* load_unaligned. */
1798 COSTS_N_INSNS (2), /* store. */
1799 COSTS_N_INSNS (3), /* strd. */
1800 COSTS_N_INSNS (2), /* stm_1st. */
1801 1, /* stm_regs_per_insn_1st. */
1802 1, /* stm_regs_per_insn_subsequent. */
1803 COSTS_N_INSNS (2), /* storef. */
1804 COSTS_N_INSNS (3), /* stored. */
1805 COSTS_N_INSNS (1), /* store_unaligned. */
1806 COSTS_N_INSNS (1), /* loadv. */
1807 COSTS_N_INSNS (1) /* storev. */
1808 },
1809 {
1810 /* FP SFmode */
1811 {
1812 COSTS_N_INSNS (7), /* div. */
1813 COSTS_N_INSNS (2), /* mult. */
1814 COSTS_N_INSNS (5), /* mult_addsub. */
1815 COSTS_N_INSNS (3), /* fma. */
1816 COSTS_N_INSNS (1), /* addsub. */
1817 0, /* fpconst. */
1818 0, /* neg. */
1819 0, /* compare. */
1820 0, /* widen. */
1821 0, /* narrow. */
1822 0, /* toint. */
1823 0, /* fromint. */
1824 0 /* roundint. */
1825 },
1826 /* FP DFmode */
1827 {
1828 COSTS_N_INSNS (15), /* div. */
1829 COSTS_N_INSNS (5), /* mult. */
1830 COSTS_N_INSNS (7), /* mult_addsub. */
1831 COSTS_N_INSNS (7), /* fma. */
1832 COSTS_N_INSNS (3), /* addsub. */
1833 0, /* fpconst. */
1834 0, /* neg. */
1835 0, /* compare. */
1836 0, /* widen. */
1837 0, /* narrow. */
1838 0, /* toint. */
1839 0, /* fromint. */
1840 0 /* roundint. */
1841 }
1842 },
1843 /* Vector */
1844 {
1845 COSTS_N_INSNS (1), /* alu. */
1846 COSTS_N_INSNS (4), /* mult. */
1847 COSTS_N_INSNS (1), /* movi. */
1848 COSTS_N_INSNS (2), /* dup. */
1849 COSTS_N_INSNS (2) /* extract. */
1850 }
1851 };
1852
1853 const struct addr_mode_cost_table generic_addr_mode_costs =
1854 {
1855 /* int. */
1856 {
1857 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1858 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1859 COSTS_N_INSNS (0) /* AMO_WB. */
1860 },
1861 /* float. */
1862 {
1863 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1864 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1865 COSTS_N_INSNS (0) /* AMO_WB. */
1866 },
1867 /* vector. */
1868 {
1869 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1870 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1871 COSTS_N_INSNS (0) /* AMO_WB. */
1872 }
1873 };
1874
1875 const struct tune_params arm_slowmul_tune =
1876 {
1877 &generic_extra_costs, /* Insn extra costs. */
1878 &generic_addr_mode_costs, /* Addressing mode costs. */
1879 NULL, /* Sched adj cost. */
1880 arm_default_branch_cost,
1881 &arm_default_vec_cost,
1882 3, /* Constant limit. */
1883 5, /* Max cond insns. */
1884 8, /* Memset max inline. */
1885 1, /* Issue rate. */
1886 ARM_PREFETCH_NOT_BENEFICIAL,
1887 tune_params::PREF_CONST_POOL_TRUE,
1888 tune_params::PREF_LDRD_FALSE,
1889 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1890 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1891 tune_params::DISPARAGE_FLAGS_NEITHER,
1892 tune_params::PREF_NEON_STRINGOPS_FALSE,
1893 tune_params::FUSE_NOTHING,
1894 tune_params::SCHED_AUTOPREF_OFF
1895 };
1896
1897 const struct tune_params arm_fastmul_tune =
1898 {
1899 &generic_extra_costs, /* Insn extra costs. */
1900 &generic_addr_mode_costs, /* Addressing mode costs. */
1901 NULL, /* Sched adj cost. */
1902 arm_default_branch_cost,
1903 &arm_default_vec_cost,
1904 1, /* Constant limit. */
1905 5, /* Max cond insns. */
1906 8, /* Memset max inline. */
1907 1, /* Issue rate. */
1908 ARM_PREFETCH_NOT_BENEFICIAL,
1909 tune_params::PREF_CONST_POOL_TRUE,
1910 tune_params::PREF_LDRD_FALSE,
1911 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1912 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1913 tune_params::DISPARAGE_FLAGS_NEITHER,
1914 tune_params::PREF_NEON_STRINGOPS_FALSE,
1915 tune_params::FUSE_NOTHING,
1916 tune_params::SCHED_AUTOPREF_OFF
1917 };
1918
1919 /* StrongARM has early execution of branches, so a sequence that is worth
1920 skipping is shorter. Set max_insns_skipped to a lower value. */
1921
1922 const struct tune_params arm_strongarm_tune =
1923 {
1924 &generic_extra_costs, /* Insn extra costs. */
1925 &generic_addr_mode_costs, /* Addressing mode costs. */
1926 NULL, /* Sched adj cost. */
1927 arm_default_branch_cost,
1928 &arm_default_vec_cost,
1929 1, /* Constant limit. */
1930 3, /* Max cond insns. */
1931 8, /* Memset max inline. */
1932 1, /* Issue rate. */
1933 ARM_PREFETCH_NOT_BENEFICIAL,
1934 tune_params::PREF_CONST_POOL_TRUE,
1935 tune_params::PREF_LDRD_FALSE,
1936 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1937 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1938 tune_params::DISPARAGE_FLAGS_NEITHER,
1939 tune_params::PREF_NEON_STRINGOPS_FALSE,
1940 tune_params::FUSE_NOTHING,
1941 tune_params::SCHED_AUTOPREF_OFF
1942 };
1943
1944 const struct tune_params arm_xscale_tune =
1945 {
1946 &generic_extra_costs, /* Insn extra costs. */
1947 &generic_addr_mode_costs, /* Addressing mode costs. */
1948 xscale_sched_adjust_cost,
1949 arm_default_branch_cost,
1950 &arm_default_vec_cost,
1951 2, /* Constant limit. */
1952 3, /* Max cond insns. */
1953 8, /* Memset max inline. */
1954 1, /* Issue rate. */
1955 ARM_PREFETCH_NOT_BENEFICIAL,
1956 tune_params::PREF_CONST_POOL_TRUE,
1957 tune_params::PREF_LDRD_FALSE,
1958 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1959 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1960 tune_params::DISPARAGE_FLAGS_NEITHER,
1961 tune_params::PREF_NEON_STRINGOPS_FALSE,
1962 tune_params::FUSE_NOTHING,
1963 tune_params::SCHED_AUTOPREF_OFF
1964 };
1965
1966 const struct tune_params arm_9e_tune =
1967 {
1968 &generic_extra_costs, /* Insn extra costs. */
1969 &generic_addr_mode_costs, /* Addressing mode costs. */
1970 NULL, /* Sched adj cost. */
1971 arm_default_branch_cost,
1972 &arm_default_vec_cost,
1973 1, /* Constant limit. */
1974 5, /* Max cond insns. */
1975 8, /* Memset max inline. */
1976 1, /* Issue rate. */
1977 ARM_PREFETCH_NOT_BENEFICIAL,
1978 tune_params::PREF_CONST_POOL_TRUE,
1979 tune_params::PREF_LDRD_FALSE,
1980 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1981 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1982 tune_params::DISPARAGE_FLAGS_NEITHER,
1983 tune_params::PREF_NEON_STRINGOPS_FALSE,
1984 tune_params::FUSE_NOTHING,
1985 tune_params::SCHED_AUTOPREF_OFF
1986 };
1987
1988 const struct tune_params arm_marvell_pj4_tune =
1989 {
1990 &generic_extra_costs, /* Insn extra costs. */
1991 &generic_addr_mode_costs, /* Addressing mode costs. */
1992 NULL, /* Sched adj cost. */
1993 arm_default_branch_cost,
1994 &arm_default_vec_cost,
1995 1, /* Constant limit. */
1996 5, /* Max cond insns. */
1997 8, /* Memset max inline. */
1998 2, /* Issue rate. */
1999 ARM_PREFETCH_NOT_BENEFICIAL,
2000 tune_params::PREF_CONST_POOL_TRUE,
2001 tune_params::PREF_LDRD_FALSE,
2002 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2003 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2004 tune_params::DISPARAGE_FLAGS_NEITHER,
2005 tune_params::PREF_NEON_STRINGOPS_FALSE,
2006 tune_params::FUSE_NOTHING,
2007 tune_params::SCHED_AUTOPREF_OFF
2008 };
2009
2010 const struct tune_params arm_v6t2_tune =
2011 {
2012 &generic_extra_costs, /* Insn extra costs. */
2013 &generic_addr_mode_costs, /* Addressing mode costs. */
2014 NULL, /* Sched adj cost. */
2015 arm_default_branch_cost,
2016 &arm_default_vec_cost,
2017 1, /* Constant limit. */
2018 5, /* Max cond insns. */
2019 8, /* Memset max inline. */
2020 1, /* Issue rate. */
2021 ARM_PREFETCH_NOT_BENEFICIAL,
2022 tune_params::PREF_CONST_POOL_FALSE,
2023 tune_params::PREF_LDRD_FALSE,
2024 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2025 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2026 tune_params::DISPARAGE_FLAGS_NEITHER,
2027 tune_params::PREF_NEON_STRINGOPS_FALSE,
2028 tune_params::FUSE_NOTHING,
2029 tune_params::SCHED_AUTOPREF_OFF
2030 };
2031
2032
2033 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2034 const struct tune_params arm_cortex_tune =
2035 {
2036 &generic_extra_costs,
2037 &generic_addr_mode_costs, /* Addressing mode costs. */
2038 NULL, /* Sched adj cost. */
2039 arm_default_branch_cost,
2040 &arm_default_vec_cost,
2041 1, /* Constant limit. */
2042 5, /* Max cond insns. */
2043 8, /* Memset max inline. */
2044 2, /* Issue rate. */
2045 ARM_PREFETCH_NOT_BENEFICIAL,
2046 tune_params::PREF_CONST_POOL_FALSE,
2047 tune_params::PREF_LDRD_FALSE,
2048 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2049 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2050 tune_params::DISPARAGE_FLAGS_NEITHER,
2051 tune_params::PREF_NEON_STRINGOPS_FALSE,
2052 tune_params::FUSE_NOTHING,
2053 tune_params::SCHED_AUTOPREF_OFF
2054 };
2055
2056 const struct tune_params arm_cortex_a8_tune =
2057 {
2058 &cortexa8_extra_costs,
2059 &generic_addr_mode_costs, /* Addressing mode costs. */
2060 NULL, /* Sched adj cost. */
2061 arm_default_branch_cost,
2062 &arm_default_vec_cost,
2063 1, /* Constant limit. */
2064 5, /* Max cond insns. */
2065 8, /* Memset max inline. */
2066 2, /* Issue rate. */
2067 ARM_PREFETCH_NOT_BENEFICIAL,
2068 tune_params::PREF_CONST_POOL_FALSE,
2069 tune_params::PREF_LDRD_FALSE,
2070 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2071 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2072 tune_params::DISPARAGE_FLAGS_NEITHER,
2073 tune_params::PREF_NEON_STRINGOPS_TRUE,
2074 tune_params::FUSE_NOTHING,
2075 tune_params::SCHED_AUTOPREF_OFF
2076 };
2077
2078 const struct tune_params arm_cortex_a7_tune =
2079 {
2080 &cortexa7_extra_costs,
2081 &generic_addr_mode_costs, /* Addressing mode costs. */
2082 NULL, /* Sched adj cost. */
2083 arm_default_branch_cost,
2084 &arm_default_vec_cost,
2085 1, /* Constant limit. */
2086 5, /* Max cond insns. */
2087 8, /* Memset max inline. */
2088 2, /* Issue rate. */
2089 ARM_PREFETCH_NOT_BENEFICIAL,
2090 tune_params::PREF_CONST_POOL_FALSE,
2091 tune_params::PREF_LDRD_FALSE,
2092 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2094 tune_params::DISPARAGE_FLAGS_NEITHER,
2095 tune_params::PREF_NEON_STRINGOPS_TRUE,
2096 tune_params::FUSE_NOTHING,
2097 tune_params::SCHED_AUTOPREF_OFF
2098 };
2099
2100 const struct tune_params arm_cortex_a15_tune =
2101 {
2102 &cortexa15_extra_costs,
2103 &generic_addr_mode_costs, /* Addressing mode costs. */
2104 NULL, /* Sched adj cost. */
2105 arm_default_branch_cost,
2106 &arm_default_vec_cost,
2107 1, /* Constant limit. */
2108 2, /* Max cond insns. */
2109 8, /* Memset max inline. */
2110 3, /* Issue rate. */
2111 ARM_PREFETCH_NOT_BENEFICIAL,
2112 tune_params::PREF_CONST_POOL_FALSE,
2113 tune_params::PREF_LDRD_TRUE,
2114 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2116 tune_params::DISPARAGE_FLAGS_ALL,
2117 tune_params::PREF_NEON_STRINGOPS_TRUE,
2118 tune_params::FUSE_NOTHING,
2119 tune_params::SCHED_AUTOPREF_FULL
2120 };
2121
2122 const struct tune_params arm_cortex_a35_tune =
2123 {
2124 &cortexa53_extra_costs,
2125 &generic_addr_mode_costs, /* Addressing mode costs. */
2126 NULL, /* Sched adj cost. */
2127 arm_default_branch_cost,
2128 &arm_default_vec_cost,
2129 1, /* Constant limit. */
2130 5, /* Max cond insns. */
2131 8, /* Memset max inline. */
2132 1, /* Issue rate. */
2133 ARM_PREFETCH_NOT_BENEFICIAL,
2134 tune_params::PREF_CONST_POOL_FALSE,
2135 tune_params::PREF_LDRD_FALSE,
2136 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2137 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2138 tune_params::DISPARAGE_FLAGS_NEITHER,
2139 tune_params::PREF_NEON_STRINGOPS_TRUE,
2140 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2141 tune_params::SCHED_AUTOPREF_OFF
2142 };
2143
2144 const struct tune_params arm_cortex_a53_tune =
2145 {
2146 &cortexa53_extra_costs,
2147 &generic_addr_mode_costs, /* Addressing mode costs. */
2148 NULL, /* Sched adj cost. */
2149 arm_default_branch_cost,
2150 &arm_default_vec_cost,
2151 1, /* Constant limit. */
2152 5, /* Max cond insns. */
2153 8, /* Memset max inline. */
2154 2, /* Issue rate. */
2155 ARM_PREFETCH_NOT_BENEFICIAL,
2156 tune_params::PREF_CONST_POOL_FALSE,
2157 tune_params::PREF_LDRD_FALSE,
2158 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2159 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2160 tune_params::DISPARAGE_FLAGS_NEITHER,
2161 tune_params::PREF_NEON_STRINGOPS_TRUE,
2162 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2163 tune_params::SCHED_AUTOPREF_OFF
2164 };
2165
2166 const struct tune_params arm_cortex_a57_tune =
2167 {
2168 &cortexa57_extra_costs,
2169 &generic_addr_mode_costs, /* addressing mode costs */
2170 NULL, /* Sched adj cost. */
2171 arm_default_branch_cost,
2172 &arm_default_vec_cost,
2173 1, /* Constant limit. */
2174 2, /* Max cond insns. */
2175 8, /* Memset max inline. */
2176 3, /* Issue rate. */
2177 ARM_PREFETCH_NOT_BENEFICIAL,
2178 tune_params::PREF_CONST_POOL_FALSE,
2179 tune_params::PREF_LDRD_TRUE,
2180 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2181 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2182 tune_params::DISPARAGE_FLAGS_ALL,
2183 tune_params::PREF_NEON_STRINGOPS_TRUE,
2184 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2185 tune_params::SCHED_AUTOPREF_FULL
2186 };
2187
2188 const struct tune_params arm_exynosm1_tune =
2189 {
2190 &exynosm1_extra_costs,
2191 &generic_addr_mode_costs, /* Addressing mode costs. */
2192 NULL, /* Sched adj cost. */
2193 arm_default_branch_cost,
2194 &arm_default_vec_cost,
2195 1, /* Constant limit. */
2196 2, /* Max cond insns. */
2197 8, /* Memset max inline. */
2198 3, /* Issue rate. */
2199 ARM_PREFETCH_NOT_BENEFICIAL,
2200 tune_params::PREF_CONST_POOL_FALSE,
2201 tune_params::PREF_LDRD_TRUE,
2202 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2203 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2204 tune_params::DISPARAGE_FLAGS_ALL,
2205 tune_params::PREF_NEON_STRINGOPS_TRUE,
2206 tune_params::FUSE_NOTHING,
2207 tune_params::SCHED_AUTOPREF_OFF
2208 };
2209
2210 const struct tune_params arm_xgene1_tune =
2211 {
2212 &xgene1_extra_costs,
2213 &generic_addr_mode_costs, /* Addressing mode costs. */
2214 NULL, /* Sched adj cost. */
2215 arm_default_branch_cost,
2216 &arm_default_vec_cost,
2217 1, /* Constant limit. */
2218 2, /* Max cond insns. */
2219 32, /* Memset max inline. */
2220 4, /* Issue rate. */
2221 ARM_PREFETCH_NOT_BENEFICIAL,
2222 tune_params::PREF_CONST_POOL_FALSE,
2223 tune_params::PREF_LDRD_TRUE,
2224 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2225 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2226 tune_params::DISPARAGE_FLAGS_ALL,
2227 tune_params::PREF_NEON_STRINGOPS_FALSE,
2228 tune_params::FUSE_NOTHING,
2229 tune_params::SCHED_AUTOPREF_OFF
2230 };
2231
2232 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2233 less appealing. Set max_insns_skipped to a low value. */
2234
2235 const struct tune_params arm_cortex_a5_tune =
2236 {
2237 &cortexa5_extra_costs,
2238 &generic_addr_mode_costs, /* Addressing mode costs. */
2239 NULL, /* Sched adj cost. */
2240 arm_cortex_a5_branch_cost,
2241 &arm_default_vec_cost,
2242 1, /* Constant limit. */
2243 1, /* Max cond insns. */
2244 8, /* Memset max inline. */
2245 2, /* Issue rate. */
2246 ARM_PREFETCH_NOT_BENEFICIAL,
2247 tune_params::PREF_CONST_POOL_FALSE,
2248 tune_params::PREF_LDRD_FALSE,
2249 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2251 tune_params::DISPARAGE_FLAGS_NEITHER,
2252 tune_params::PREF_NEON_STRINGOPS_TRUE,
2253 tune_params::FUSE_NOTHING,
2254 tune_params::SCHED_AUTOPREF_OFF
2255 };
2256
2257 const struct tune_params arm_cortex_a9_tune =
2258 {
2259 &cortexa9_extra_costs,
2260 &generic_addr_mode_costs, /* Addressing mode costs. */
2261 cortex_a9_sched_adjust_cost,
2262 arm_default_branch_cost,
2263 &arm_default_vec_cost,
2264 1, /* Constant limit. */
2265 5, /* Max cond insns. */
2266 8, /* Memset max inline. */
2267 2, /* Issue rate. */
2268 ARM_PREFETCH_BENEFICIAL(4,32,32),
2269 tune_params::PREF_CONST_POOL_FALSE,
2270 tune_params::PREF_LDRD_FALSE,
2271 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2272 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2273 tune_params::DISPARAGE_FLAGS_NEITHER,
2274 tune_params::PREF_NEON_STRINGOPS_FALSE,
2275 tune_params::FUSE_NOTHING,
2276 tune_params::SCHED_AUTOPREF_OFF
2277 };
2278
2279 const struct tune_params arm_cortex_a12_tune =
2280 {
2281 &cortexa12_extra_costs,
2282 &generic_addr_mode_costs, /* Addressing mode costs. */
2283 NULL, /* Sched adj cost. */
2284 arm_default_branch_cost,
2285 &arm_default_vec_cost, /* Vectorizer costs. */
2286 1, /* Constant limit. */
2287 2, /* Max cond insns. */
2288 8, /* Memset max inline. */
2289 2, /* Issue rate. */
2290 ARM_PREFETCH_NOT_BENEFICIAL,
2291 tune_params::PREF_CONST_POOL_FALSE,
2292 tune_params::PREF_LDRD_TRUE,
2293 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2294 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2295 tune_params::DISPARAGE_FLAGS_ALL,
2296 tune_params::PREF_NEON_STRINGOPS_TRUE,
2297 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2298 tune_params::SCHED_AUTOPREF_OFF
2299 };
2300
2301 const struct tune_params arm_cortex_a73_tune =
2302 {
2303 &cortexa57_extra_costs,
2304 &generic_addr_mode_costs, /* Addressing mode costs. */
2305 NULL, /* Sched adj cost. */
2306 arm_default_branch_cost,
2307 &arm_default_vec_cost, /* Vectorizer costs. */
2308 1, /* Constant limit. */
2309 2, /* Max cond insns. */
2310 8, /* Memset max inline. */
2311 2, /* Issue rate. */
2312 ARM_PREFETCH_NOT_BENEFICIAL,
2313 tune_params::PREF_CONST_POOL_FALSE,
2314 tune_params::PREF_LDRD_TRUE,
2315 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2316 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2317 tune_params::DISPARAGE_FLAGS_ALL,
2318 tune_params::PREF_NEON_STRINGOPS_TRUE,
2319 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2320 tune_params::SCHED_AUTOPREF_FULL
2321 };
2322
2323 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2324 cycle to execute each. An LDR from the constant pool also takes two cycles
2325 to execute, but mildly increases pipelining opportunity (consecutive
2326 loads/stores can be pipelined together, saving one cycle), and may also
2327 improve icache utilisation. Hence we prefer the constant pool for such
2328 processors. */
2329
2330 const struct tune_params arm_v7m_tune =
2331 {
2332 &v7m_extra_costs,
2333 &generic_addr_mode_costs, /* Addressing mode costs. */
2334 NULL, /* Sched adj cost. */
2335 arm_cortex_m_branch_cost,
2336 &arm_default_vec_cost,
2337 1, /* Constant limit. */
2338 2, /* Max cond insns. */
2339 8, /* Memset max inline. */
2340 1, /* Issue rate. */
2341 ARM_PREFETCH_NOT_BENEFICIAL,
2342 tune_params::PREF_CONST_POOL_TRUE,
2343 tune_params::PREF_LDRD_FALSE,
2344 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2345 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2346 tune_params::DISPARAGE_FLAGS_NEITHER,
2347 tune_params::PREF_NEON_STRINGOPS_FALSE,
2348 tune_params::FUSE_NOTHING,
2349 tune_params::SCHED_AUTOPREF_OFF
2350 };
2351
2352 /* Cortex-M7 tuning. */
2353
2354 const struct tune_params arm_cortex_m7_tune =
2355 {
2356 &v7m_extra_costs,
2357 &generic_addr_mode_costs, /* Addressing mode costs. */
2358 NULL, /* Sched adj cost. */
2359 arm_cortex_m7_branch_cost,
2360 &arm_default_vec_cost,
2361 0, /* Constant limit. */
2362 1, /* Max cond insns. */
2363 8, /* Memset max inline. */
2364 2, /* Issue rate. */
2365 ARM_PREFETCH_NOT_BENEFICIAL,
2366 tune_params::PREF_CONST_POOL_TRUE,
2367 tune_params::PREF_LDRD_FALSE,
2368 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2369 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2370 tune_params::DISPARAGE_FLAGS_NEITHER,
2371 tune_params::PREF_NEON_STRINGOPS_FALSE,
2372 tune_params::FUSE_NOTHING,
2373 tune_params::SCHED_AUTOPREF_OFF
2374 };
2375
2376 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2377 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2378 cortex-m23. */
2379 const struct tune_params arm_v6m_tune =
2380 {
2381 &generic_extra_costs, /* Insn extra costs. */
2382 &generic_addr_mode_costs, /* Addressing mode costs. */
2383 NULL, /* Sched adj cost. */
2384 arm_default_branch_cost,
2385 &arm_default_vec_cost, /* Vectorizer costs. */
2386 1, /* Constant limit. */
2387 5, /* Max cond insns. */
2388 8, /* Memset max inline. */
2389 1, /* Issue rate. */
2390 ARM_PREFETCH_NOT_BENEFICIAL,
2391 tune_params::PREF_CONST_POOL_FALSE,
2392 tune_params::PREF_LDRD_FALSE,
2393 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2394 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2395 tune_params::DISPARAGE_FLAGS_NEITHER,
2396 tune_params::PREF_NEON_STRINGOPS_FALSE,
2397 tune_params::FUSE_NOTHING,
2398 tune_params::SCHED_AUTOPREF_OFF
2399 };
2400
2401 const struct tune_params arm_fa726te_tune =
2402 {
2403 &generic_extra_costs, /* Insn extra costs. */
2404 &generic_addr_mode_costs, /* Addressing mode costs. */
2405 fa726te_sched_adjust_cost,
2406 arm_default_branch_cost,
2407 &arm_default_vec_cost,
2408 1, /* Constant limit. */
2409 5, /* Max cond insns. */
2410 8, /* Memset max inline. */
2411 2, /* Issue rate. */
2412 ARM_PREFETCH_NOT_BENEFICIAL,
2413 tune_params::PREF_CONST_POOL_TRUE,
2414 tune_params::PREF_LDRD_FALSE,
2415 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2416 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2417 tune_params::DISPARAGE_FLAGS_NEITHER,
2418 tune_params::PREF_NEON_STRINGOPS_FALSE,
2419 tune_params::FUSE_NOTHING,
2420 tune_params::SCHED_AUTOPREF_OFF
2421 };
2422
2423 /* Key type for Pointer Authentication extension. */
2424 enum aarch_key_type aarch_ra_sign_key = AARCH_KEY_A;
2425
2426 char *accepted_branch_protection_string = NULL;
2427
2428 /* Auto-generated CPU, FPU and architecture tables. */
2429 #include "arm-cpu-data.h"
2430
2431 /* The name of the preprocessor macro to define for this architecture. PROFILE
2432 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2433 is thus chosen to be big enough to hold the longest architecture name. */
2434
2435 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2436
2437 /* Supported TLS relocations. */
2438
2439 enum tls_reloc {
2440 TLS_GD32,
2441 TLS_GD32_FDPIC,
2442 TLS_LDM32,
2443 TLS_LDM32_FDPIC,
2444 TLS_LDO32,
2445 TLS_IE32,
2446 TLS_IE32_FDPIC,
2447 TLS_LE32,
2448 TLS_DESCSEQ /* GNU scheme */
2449 };
2450
2451 /* The maximum number of insns to be used when loading a constant. */
2452 inline static int
2453 arm_constant_limit (bool size_p)
2454 {
2455 return size_p ? 1 : current_tune->constant_limit;
2456 }
2457
2458 /* Emit an insn that's a simple single-set. Both the operands must be known
2459 to be valid. */
2460 inline static rtx_insn *
2461 emit_set_insn (rtx x, rtx y)
2462 {
2463 return emit_insn (gen_rtx_SET (x, y));
2464 }
2465
2466 /* Return the number of bits set in VALUE. */
2467 static unsigned
2468 bit_count (unsigned long value)
2469 {
2470 unsigned long count = 0;
2471
2472 while (value)
2473 {
2474 count++;
2475 value &= value - 1; /* Clear the least-significant set bit. */
2476 }
2477
2478 return count;
2479 }
2480
2481 /* Return the number of bits set in BMAP. */
2482 static unsigned
2483 bitmap_popcount (const sbitmap bmap)
2484 {
2485 unsigned int count = 0;
2486 unsigned int n = 0;
2487 sbitmap_iterator sbi;
2488
2489 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2490 count++;
2491 return count;
2492 }
2493
2494 typedef struct
2495 {
2496 machine_mode mode;
2497 const char *name;
2498 } arm_fixed_mode_set;
2499
2500 /* A small helper for setting fixed-point library libfuncs. */
2501
2502 static void
2503 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2504 const char *funcname, const char *modename,
2505 int num_suffix)
2506 {
2507 char buffer[50];
2508
2509 if (num_suffix == 0)
2510 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2511 else
2512 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2513
2514 set_optab_libfunc (optable, mode, buffer);
2515 }
2516
2517 static void
2518 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2519 machine_mode from, const char *funcname,
2520 const char *toname, const char *fromname)
2521 {
2522 char buffer[50];
2523 const char *maybe_suffix_2 = "";
2524
2525 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2526 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2527 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2528 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2529 maybe_suffix_2 = "2";
2530
2531 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2532 maybe_suffix_2);
2533
2534 set_conv_libfunc (optable, to, from, buffer);
2535 }
2536
2537 static GTY(()) rtx speculation_barrier_libfunc;
2538
2539 /* Record that we have no arithmetic or comparison libfuncs for
2540 machine mode MODE. */
2541
2542 static void
2543 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2544 {
2545 /* Arithmetic. */
2546 set_optab_libfunc (add_optab, mode, NULL);
2547 set_optab_libfunc (sdiv_optab, mode, NULL);
2548 set_optab_libfunc (smul_optab, mode, NULL);
2549 set_optab_libfunc (neg_optab, mode, NULL);
2550 set_optab_libfunc (sub_optab, mode, NULL);
2551
2552 /* Comparisons. */
2553 set_optab_libfunc (eq_optab, mode, NULL);
2554 set_optab_libfunc (ne_optab, mode, NULL);
2555 set_optab_libfunc (lt_optab, mode, NULL);
2556 set_optab_libfunc (le_optab, mode, NULL);
2557 set_optab_libfunc (ge_optab, mode, NULL);
2558 set_optab_libfunc (gt_optab, mode, NULL);
2559 set_optab_libfunc (unord_optab, mode, NULL);
2560 }
2561
2562 /* Set up library functions unique to ARM. */
2563 static void
2564 arm_init_libfuncs (void)
2565 {
2566 machine_mode mode_iter;
2567
2568 /* For Linux, we have access to kernel support for atomic operations. */
2569 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2570 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2571
2572 /* There are no special library functions unless we are using the
2573 ARM BPABI. */
2574 if (!TARGET_BPABI)
2575 return;
2576
2577 /* The functions below are described in Section 4 of the "Run-Time
2578 ABI for the ARM architecture", Version 1.0. */
2579
2580 /* Double-precision floating-point arithmetic. Table 2. */
2581 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2582 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2583 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2584 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2585 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2586
2587 /* Double-precision comparisons. Table 3. */
2588 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2589 set_optab_libfunc (ne_optab, DFmode, NULL);
2590 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2591 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2592 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2593 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2594 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2595
2596 /* Single-precision floating-point arithmetic. Table 4. */
2597 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2598 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2599 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2600 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2601 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2602
2603 /* Single-precision comparisons. Table 5. */
2604 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2605 set_optab_libfunc (ne_optab, SFmode, NULL);
2606 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2607 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2608 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2609 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2610 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2611
2612 /* Floating-point to integer conversions. Table 6. */
2613 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2614 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2615 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2616 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2617 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2618 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2619 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2620 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2621
2622 /* Conversions between floating types. Table 7. */
2623 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2624 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2625
2626 /* Integer to floating-point conversions. Table 8. */
2627 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2628 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2629 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2630 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2631 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2632 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2633 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2634 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2635
2636 /* Long long. Table 9. */
2637 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2638 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2639 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2640 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2641 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2642 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2643 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2644 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2645
2646 /* Integer (32/32->32) division. \S 4.3.1. */
2647 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2648 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2649
2650 /* The divmod functions are designed so that they can be used for
2651 plain division, even though they return both the quotient and the
2652 remainder. The quotient is returned in the usual location (i.e.,
2653 r0 for SImode, {r0, r1} for DImode), just as would be expected
2654 for an ordinary division routine. Because the AAPCS calling
2655 conventions specify that all of { r0, r1, r2, r3 } are
2656 callee-saved registers, there is no need to tell the compiler
2657 explicitly that those registers are clobbered by these
2658 routines. */
2659 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2660 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2661
2662 /* For SImode division the ABI provides div-without-mod routines,
2663 which are faster. */
2664 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2665 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2666
2667 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2668 divmod libcalls instead. */
2669 set_optab_libfunc (smod_optab, DImode, NULL);
2670 set_optab_libfunc (umod_optab, DImode, NULL);
2671 set_optab_libfunc (smod_optab, SImode, NULL);
2672 set_optab_libfunc (umod_optab, SImode, NULL);
2673
2674 /* Half-precision float operations. The compiler handles all operations
2675 with NULL libfuncs by converting the SFmode. */
2676 switch (arm_fp16_format)
2677 {
2678 case ARM_FP16_FORMAT_IEEE:
2679 case ARM_FP16_FORMAT_ALTERNATIVE:
2680
2681 /* Conversions. */
2682 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2683 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2684 ? "__gnu_f2h_ieee"
2685 : "__gnu_f2h_alternative"));
2686 set_conv_libfunc (sext_optab, SFmode, HFmode,
2687 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2688 ? "__gnu_h2f_ieee"
2689 : "__gnu_h2f_alternative"));
2690
2691 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2692 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2693 ? "__gnu_d2h_ieee"
2694 : "__gnu_d2h_alternative"));
2695
2696 arm_block_arith_comp_libfuncs_for_mode (HFmode);
2697 break;
2698
2699 default:
2700 break;
2701 }
2702
2703 /* For all possible libcalls in BFmode, record NULL. */
2704 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2705 {
2706 set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2707 set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2708 set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2709 set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2710 }
2711 arm_block_arith_comp_libfuncs_for_mode (BFmode);
2712
2713 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2714 {
2715 const arm_fixed_mode_set fixed_arith_modes[] =
2716 {
2717 { E_QQmode, "qq" },
2718 { E_UQQmode, "uqq" },
2719 { E_HQmode, "hq" },
2720 { E_UHQmode, "uhq" },
2721 { E_SQmode, "sq" },
2722 { E_USQmode, "usq" },
2723 { E_DQmode, "dq" },
2724 { E_UDQmode, "udq" },
2725 { E_TQmode, "tq" },
2726 { E_UTQmode, "utq" },
2727 { E_HAmode, "ha" },
2728 { E_UHAmode, "uha" },
2729 { E_SAmode, "sa" },
2730 { E_USAmode, "usa" },
2731 { E_DAmode, "da" },
2732 { E_UDAmode, "uda" },
2733 { E_TAmode, "ta" },
2734 { E_UTAmode, "uta" }
2735 };
2736 const arm_fixed_mode_set fixed_conv_modes[] =
2737 {
2738 { E_QQmode, "qq" },
2739 { E_UQQmode, "uqq" },
2740 { E_HQmode, "hq" },
2741 { E_UHQmode, "uhq" },
2742 { E_SQmode, "sq" },
2743 { E_USQmode, "usq" },
2744 { E_DQmode, "dq" },
2745 { E_UDQmode, "udq" },
2746 { E_TQmode, "tq" },
2747 { E_UTQmode, "utq" },
2748 { E_HAmode, "ha" },
2749 { E_UHAmode, "uha" },
2750 { E_SAmode, "sa" },
2751 { E_USAmode, "usa" },
2752 { E_DAmode, "da" },
2753 { E_UDAmode, "uda" },
2754 { E_TAmode, "ta" },
2755 { E_UTAmode, "uta" },
2756 { E_QImode, "qi" },
2757 { E_HImode, "hi" },
2758 { E_SImode, "si" },
2759 { E_DImode, "di" },
2760 { E_TImode, "ti" },
2761 { E_SFmode, "sf" },
2762 { E_DFmode, "df" }
2763 };
2764 unsigned int i, j;
2765
2766 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2767 {
2768 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2769 "add", fixed_arith_modes[i].name, 3);
2770 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2771 "ssadd", fixed_arith_modes[i].name, 3);
2772 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2773 "usadd", fixed_arith_modes[i].name, 3);
2774 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2775 "sub", fixed_arith_modes[i].name, 3);
2776 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2777 "sssub", fixed_arith_modes[i].name, 3);
2778 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2779 "ussub", fixed_arith_modes[i].name, 3);
2780 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2781 "mul", fixed_arith_modes[i].name, 3);
2782 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2783 "ssmul", fixed_arith_modes[i].name, 3);
2784 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2785 "usmul", fixed_arith_modes[i].name, 3);
2786 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2787 "div", fixed_arith_modes[i].name, 3);
2788 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2789 "udiv", fixed_arith_modes[i].name, 3);
2790 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2791 "ssdiv", fixed_arith_modes[i].name, 3);
2792 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2793 "usdiv", fixed_arith_modes[i].name, 3);
2794 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2795 "neg", fixed_arith_modes[i].name, 2);
2796 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2797 "ssneg", fixed_arith_modes[i].name, 2);
2798 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2799 "usneg", fixed_arith_modes[i].name, 2);
2800 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2801 "ashl", fixed_arith_modes[i].name, 3);
2802 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2803 "ashr", fixed_arith_modes[i].name, 3);
2804 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2805 "lshr", fixed_arith_modes[i].name, 3);
2806 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2807 "ssashl", fixed_arith_modes[i].name, 3);
2808 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2809 "usashl", fixed_arith_modes[i].name, 3);
2810 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2811 "cmp", fixed_arith_modes[i].name, 2);
2812 }
2813
2814 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2815 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2816 {
2817 if (i == j
2818 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2819 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2820 continue;
2821
2822 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2823 fixed_conv_modes[j].mode, "fract",
2824 fixed_conv_modes[i].name,
2825 fixed_conv_modes[j].name);
2826 arm_set_fixed_conv_libfunc (satfract_optab,
2827 fixed_conv_modes[i].mode,
2828 fixed_conv_modes[j].mode, "satfract",
2829 fixed_conv_modes[i].name,
2830 fixed_conv_modes[j].name);
2831 arm_set_fixed_conv_libfunc (fractuns_optab,
2832 fixed_conv_modes[i].mode,
2833 fixed_conv_modes[j].mode, "fractuns",
2834 fixed_conv_modes[i].name,
2835 fixed_conv_modes[j].name);
2836 arm_set_fixed_conv_libfunc (satfractuns_optab,
2837 fixed_conv_modes[i].mode,
2838 fixed_conv_modes[j].mode, "satfractuns",
2839 fixed_conv_modes[i].name,
2840 fixed_conv_modes[j].name);
2841 }
2842 }
2843
2844 if (TARGET_AAPCS_BASED)
2845 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2846
2847 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2848 }
2849
2850 /* On AAPCS systems, this is the "struct __va_list". */
2851 static GTY(()) tree va_list_type;
2852
2853 /* Return the type to use as __builtin_va_list. */
2854 static tree
2855 arm_build_builtin_va_list (void)
2856 {
2857 tree va_list_name;
2858 tree ap_field;
2859
2860 if (!TARGET_AAPCS_BASED)
2861 return std_build_builtin_va_list ();
2862
2863 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2864 defined as:
2865
2866 struct __va_list
2867 {
2868 void *__ap;
2869 };
2870
2871 The C Library ABI further reinforces this definition in \S
2872 4.1.
2873
2874 We must follow this definition exactly. The structure tag
2875 name is visible in C++ mangled names, and thus forms a part
2876 of the ABI. The field name may be used by people who
2877 #include <stdarg.h>. */
2878 /* Create the type. */
2879 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2880 /* Give it the required name. */
2881 va_list_name = build_decl (BUILTINS_LOCATION,
2882 TYPE_DECL,
2883 get_identifier ("__va_list"),
2884 va_list_type);
2885 DECL_ARTIFICIAL (va_list_name) = 1;
2886 TYPE_NAME (va_list_type) = va_list_name;
2887 TYPE_STUB_DECL (va_list_type) = va_list_name;
2888 /* Create the __ap field. */
2889 ap_field = build_decl (BUILTINS_LOCATION,
2890 FIELD_DECL,
2891 get_identifier ("__ap"),
2892 ptr_type_node);
2893 DECL_ARTIFICIAL (ap_field) = 1;
2894 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2895 TYPE_FIELDS (va_list_type) = ap_field;
2896 /* Compute its layout. */
2897 layout_type (va_list_type);
2898
2899 return va_list_type;
2900 }
2901
2902 /* Return an expression of type "void *" pointing to the next
2903 available argument in a variable-argument list. VALIST is the
2904 user-level va_list object, of type __builtin_va_list. */
2905 static tree
2906 arm_extract_valist_ptr (tree valist)
2907 {
2908 if (TREE_TYPE (valist) == error_mark_node)
2909 return error_mark_node;
2910
2911 /* On an AAPCS target, the pointer is stored within "struct
2912 va_list". */
2913 if (TARGET_AAPCS_BASED)
2914 {
2915 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2916 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2917 valist, ap_field, NULL_TREE);
2918 }
2919
2920 return valist;
2921 }
2922
2923 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2924 static void
2925 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2926 {
2927 valist = arm_extract_valist_ptr (valist);
2928 std_expand_builtin_va_start (valist, nextarg);
2929 }
2930
2931 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2932 static tree
2933 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2934 gimple_seq *post_p)
2935 {
2936 valist = arm_extract_valist_ptr (valist);
2937 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2938 }
2939
2940 /* Check any incompatible options that the user has specified. */
2941 static void
2942 arm_option_check_internal (struct gcc_options *opts)
2943 {
2944 int flags = opts->x_target_flags;
2945
2946 /* iWMMXt and NEON are incompatible. */
2947 if (TARGET_IWMMXT
2948 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2949 error ("iWMMXt and NEON are incompatible");
2950
2951 /* Make sure that the processor choice does not conflict with any of the
2952 other command line choices. */
2953 if (TARGET_ARM_P (flags)
2954 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2955 error ("target CPU does not support ARM mode");
2956
2957 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2958 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2959 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2960
2961 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2962 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2963
2964 /* If this target is normally configured to use APCS frames, warn if they
2965 are turned off and debugging is turned on. */
2966 if (TARGET_ARM_P (flags)
2967 && write_symbols != NO_DEBUG
2968 && !TARGET_APCS_FRAME
2969 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2970 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2971 "debugging");
2972
2973 /* iWMMXt unsupported under Thumb mode. */
2974 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2975 error ("iWMMXt unsupported under Thumb mode");
2976
2977 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2978 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2979
2980 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2981 {
2982 error ("RTP PIC is incompatible with Thumb");
2983 flag_pic = 0;
2984 }
2985
2986 if (target_pure_code || target_slow_flash_data)
2987 {
2988 const char *flag = (target_pure_code ? "-mpure-code" :
2989 "-mslow-flash-data");
2990 bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
2991
2992 /* We only support -mslow-flash-data on M-profile targets with
2993 MOVT. */
2994 if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
2995 error ("%s only supports non-pic code on M-profile targets with the "
2996 "MOVT instruction", flag);
2997
2998 /* We only support -mpure-code on M-profile targets. */
2999 if (target_pure_code && common_unsupported_modes)
3000 error ("%s only supports non-pic code on M-profile targets", flag);
3001
3002 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
3003 -mword-relocations forbids relocation of MOVT/MOVW. */
3004 if (target_word_relocations)
3005 error ("%s incompatible with %<-mword-relocations%>", flag);
3006 }
3007 }
3008
3009 /* Recompute the global settings depending on target attribute options. */
3010
3011 static void
3012 arm_option_params_internal (void)
3013 {
3014 /* If we are not using the default (ARM mode) section anchor offset
3015 ranges, then set the correct ranges now. */
3016 if (TARGET_THUMB1)
3017 {
3018 /* Thumb-1 LDR instructions cannot have negative offsets.
3019 Permissible positive offset ranges are 5-bit (for byte loads),
3020 6-bit (for halfword loads), or 7-bit (for word loads).
3021 Empirical results suggest a 7-bit anchor range gives the best
3022 overall code size. */
3023 targetm.min_anchor_offset = 0;
3024 targetm.max_anchor_offset = 127;
3025 }
3026 else if (TARGET_THUMB2)
3027 {
3028 /* The minimum is set such that the total size of the block
3029 for a particular anchor is 248 + 1 + 4095 bytes, which is
3030 divisible by eight, ensuring natural spacing of anchors. */
3031 targetm.min_anchor_offset = -248;
3032 targetm.max_anchor_offset = 4095;
3033 }
3034 else
3035 {
3036 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3037 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3038 }
3039
3040 /* Increase the number of conditional instructions with -Os. */
3041 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3042
3043 /* For THUMB2, we limit the conditional sequence to one IT block. */
3044 if (TARGET_THUMB2)
3045 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3046
3047 if (TARGET_THUMB1)
3048 targetm.md_asm_adjust = thumb1_md_asm_adjust;
3049 else
3050 targetm.md_asm_adjust = arm_md_asm_adjust;
3051 }
3052
3053 /* True if -mflip-thumb should next add an attribute for the default
3054 mode, false if it should next add an attribute for the opposite mode. */
3055 static GTY(()) bool thumb_flipper;
3056
3057 /* Options after initial target override. */
3058 static GTY(()) tree init_optimize;
3059
3060 static void
3061 arm_override_options_after_change_1 (struct gcc_options *opts,
3062 struct gcc_options *opts_set)
3063 {
3064 /* -falign-functions without argument: supply one. */
3065 if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3066 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3067 && opts->x_optimize_size ? "2" : "4";
3068 }
3069
3070 /* Implement targetm.override_options_after_change. */
3071
3072 static void
3073 arm_override_options_after_change (void)
3074 {
3075 arm_override_options_after_change_1 (&global_options, &global_options_set);
3076 }
3077
3078 /* Implement TARGET_OPTION_RESTORE. */
3079 static void
3080 arm_option_restore (struct gcc_options */* opts */,
3081 struct gcc_options */* opts_set */,
3082 struct cl_target_option *ptr)
3083 {
3084 arm_configure_build_target (&arm_active_target, ptr, false);
3085 arm_option_reconfigure_globals ();
3086 }
3087
3088 /* Reset options between modes that the user has specified. */
3089 static void
3090 arm_option_override_internal (struct gcc_options *opts,
3091 struct gcc_options *opts_set)
3092 {
3093 arm_override_options_after_change_1 (opts, opts_set);
3094
3095 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3096 {
3097 /* The default is to enable interworking, so this warning message would
3098 be confusing to users who have just compiled with
3099 eg, -march=armv4. */
3100 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3101 opts->x_target_flags &= ~MASK_INTERWORK;
3102 }
3103
3104 if (TARGET_THUMB_P (opts->x_target_flags)
3105 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3106 {
3107 warning (0, "target CPU does not support THUMB instructions");
3108 opts->x_target_flags &= ~MASK_THUMB;
3109 }
3110
3111 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3112 {
3113 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3114 opts->x_target_flags &= ~MASK_APCS_FRAME;
3115 }
3116
3117 /* Callee super interworking implies thumb interworking. Adding
3118 this to the flags here simplifies the logic elsewhere. */
3119 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3120 opts->x_target_flags |= MASK_INTERWORK;
3121
3122 /* need to remember initial values so combinaisons of options like
3123 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3124 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3125
3126 if (! opts_set->x_arm_restrict_it)
3127 opts->x_arm_restrict_it = arm_arch8;
3128
3129 /* ARM execution state and M profile don't have [restrict] IT. */
3130 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3131 opts->x_arm_restrict_it = 0;
3132
3133 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3134 if (!opts_set->x_arm_restrict_it
3135 && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3136 opts->x_arm_restrict_it = 0;
3137
3138 /* Enable -munaligned-access by default for
3139 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3140 i.e. Thumb2 and ARM state only.
3141 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3142 - ARMv8 architecture-base processors.
3143
3144 Disable -munaligned-access by default for
3145 - all pre-ARMv6 architecture-based processors
3146 - ARMv6-M architecture-based processors
3147 - ARMv8-M Baseline processors. */
3148
3149 if (! opts_set->x_unaligned_access)
3150 {
3151 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3152 && arm_arch6 && (arm_arch_notm || arm_arch7));
3153 }
3154 else if (opts->x_unaligned_access == 1
3155 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3156 {
3157 warning (0, "target CPU does not support unaligned accesses");
3158 opts->x_unaligned_access = 0;
3159 }
3160
3161 /* Don't warn since it's on by default in -O2. */
3162 if (TARGET_THUMB1_P (opts->x_target_flags))
3163 opts->x_flag_schedule_insns = 0;
3164 else
3165 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3166
3167 /* Disable shrink-wrap when optimizing function for size, since it tends to
3168 generate additional returns. */
3169 if (optimize_function_for_size_p (cfun)
3170 && TARGET_THUMB2_P (opts->x_target_flags))
3171 opts->x_flag_shrink_wrap = false;
3172 else
3173 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3174
3175 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3176 - epilogue_insns - does not accurately model the corresponding insns
3177 emitted in the asm file. In particular, see the comment in thumb_exit
3178 'Find out how many of the (return) argument registers we can corrupt'.
3179 As a consequence, the epilogue may clobber registers without fipa-ra
3180 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3181 TODO: Accurately model clobbers for epilogue_insns and reenable
3182 fipa-ra. */
3183 if (TARGET_THUMB1_P (opts->x_target_flags))
3184 opts->x_flag_ipa_ra = 0;
3185 else
3186 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3187
3188 /* Thumb2 inline assembly code should always use unified syntax.
3189 This will apply to ARM and Thumb1 eventually. */
3190 if (TARGET_THUMB2_P (opts->x_target_flags))
3191 opts->x_inline_asm_unified = true;
3192
3193 if (arm_stack_protector_guard == SSP_GLOBAL
3194 && opts->x_arm_stack_protector_guard_offset_str)
3195 {
3196 error ("incompatible options %<-mstack-protector-guard=global%> and "
3197 "%<-mstack-protector-guard-offset=%s%>",
3198 arm_stack_protector_guard_offset_str);
3199 }
3200
3201 if (opts->x_arm_stack_protector_guard_offset_str)
3202 {
3203 char *end;
3204 const char *str = arm_stack_protector_guard_offset_str;
3205 errno = 0;
3206 long offs = strtol (arm_stack_protector_guard_offset_str, &end, 0);
3207 if (!*str || *end || errno)
3208 error ("%qs is not a valid offset in %qs", str,
3209 "-mstack-protector-guard-offset=");
3210 arm_stack_protector_guard_offset = offs;
3211 }
3212
3213 if (arm_current_function_pac_enabled_p ())
3214 {
3215 if (!arm_arch8m_main)
3216 error ("This architecture does not support branch protection "
3217 "instructions");
3218 if (TARGET_TPCS_FRAME)
3219 sorry ("Return address signing is not supported with %<-mtpcs-frame%>.");
3220 }
3221
3222 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3223 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3224 #endif
3225 }
3226
3227 static sbitmap isa_all_fpubits_internal;
3228 static sbitmap isa_all_fpbits;
3229 static sbitmap isa_quirkbits;
3230
3231 /* Configure a build target TARGET from the user-specified options OPTS and
3232 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3233 architecture have been specified, but the two are not identical. */
3234 void
3235 arm_configure_build_target (struct arm_build_target *target,
3236 struct cl_target_option *opts,
3237 bool warn_compatible)
3238 {
3239 const cpu_option *arm_selected_tune = NULL;
3240 const arch_option *arm_selected_arch = NULL;
3241 const cpu_option *arm_selected_cpu = NULL;
3242 const arm_fpu_desc *arm_selected_fpu = NULL;
3243 const char *tune_opts = NULL;
3244 const char *arch_opts = NULL;
3245 const char *cpu_opts = NULL;
3246
3247 bitmap_clear (target->isa);
3248 target->core_name = NULL;
3249 target->arch_name = NULL;
3250
3251 if (opts->x_arm_arch_string)
3252 {
3253 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3254 "-march",
3255 opts->x_arm_arch_string);
3256 arch_opts = strchr (opts->x_arm_arch_string, '+');
3257 }
3258
3259 if (opts->x_arm_cpu_string)
3260 {
3261 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3262 opts->x_arm_cpu_string);
3263 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3264 arm_selected_tune = arm_selected_cpu;
3265 /* If taking the tuning from -mcpu, we don't need to rescan the
3266 options for tuning. */
3267 }
3268
3269 if (opts->x_arm_tune_string)
3270 {
3271 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3272 opts->x_arm_tune_string);
3273 tune_opts = strchr (opts->x_arm_tune_string, '+');
3274 }
3275
3276 if (opts->x_arm_branch_protection_string)
3277 {
3278 aarch_validate_mbranch_protection (opts->x_arm_branch_protection_string);
3279
3280 if (aarch_ra_sign_key != AARCH_KEY_A)
3281 {
3282 warning (0, "invalid key type for %<-mbranch-protection=%>");
3283 aarch_ra_sign_key = AARCH_KEY_A;
3284 }
3285 }
3286
3287 if (arm_selected_arch)
3288 {
3289 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3290 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3291 arch_opts);
3292
3293 if (arm_selected_cpu)
3294 {
3295 auto_sbitmap cpu_isa (isa_num_bits);
3296 auto_sbitmap isa_delta (isa_num_bits);
3297
3298 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3299 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3300 cpu_opts);
3301 bitmap_xor (isa_delta, cpu_isa, target->isa);
3302 /* Ignore any bits that are quirk bits. */
3303 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3304 /* If the user (or the default configuration) has specified a
3305 specific FPU, then ignore any bits that depend on the FPU
3306 configuration. Do similarly if using the soft-float
3307 ABI. */
3308 if (opts->x_arm_fpu_index != TARGET_FPU_auto
3309 || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3310 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3311
3312 if (!bitmap_empty_p (isa_delta))
3313 {
3314 if (warn_compatible)
3315 warning (0, "switch %<-mcpu=%s%> conflicts "
3316 "with switch %<-march=%s%>",
3317 opts->x_arm_cpu_string,
3318 opts->x_arm_arch_string);
3319
3320 /* -march wins for code generation.
3321 -mcpu wins for default tuning. */
3322 if (!arm_selected_tune)
3323 arm_selected_tune = arm_selected_cpu;
3324
3325 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3326 target->arch_name = arm_selected_arch->common.name;
3327 }
3328 else
3329 {
3330 /* Architecture and CPU are essentially the same.
3331 Prefer the CPU setting. */
3332 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3333 target->core_name = arm_selected_cpu->common.name;
3334 /* Copy the CPU's capabilities, so that we inherit the
3335 appropriate extensions and quirks. */
3336 bitmap_copy (target->isa, cpu_isa);
3337 }
3338 }
3339 else
3340 {
3341 /* Pick a CPU based on the architecture. */
3342 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3343 target->arch_name = arm_selected_arch->common.name;
3344 /* Note: target->core_name is left unset in this path. */
3345 }
3346 }
3347 else if (arm_selected_cpu)
3348 {
3349 target->core_name = arm_selected_cpu->common.name;
3350 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3351 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3352 cpu_opts);
3353 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3354 }
3355 /* If the user did not specify a processor or architecture, choose
3356 one for them. */
3357 else
3358 {
3359 const cpu_option *sel;
3360 auto_sbitmap sought_isa (isa_num_bits);
3361 bitmap_clear (sought_isa);
3362 auto_sbitmap default_isa (isa_num_bits);
3363
3364 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3365 TARGET_CPU_DEFAULT);
3366 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3367 gcc_assert (arm_selected_cpu->common.name);
3368
3369 /* RWE: All of the selection logic below (to the end of this
3370 'if' clause) looks somewhat suspect. It appears to be mostly
3371 there to support forcing thumb support when the default CPU
3372 does not have thumb (somewhat dubious in terms of what the
3373 user might be expecting). I think it should be removed once
3374 support for the pre-thumb era cores is removed. */
3375 sel = arm_selected_cpu;
3376 arm_initialize_isa (default_isa, sel->common.isa_bits);
3377 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3378 cpu_opts);
3379
3380 /* Now check to see if the user has specified any command line
3381 switches that require certain abilities from the cpu. */
3382
3383 if (TARGET_INTERWORK || TARGET_THUMB)
3384 bitmap_set_bit (sought_isa, isa_bit_thumb);
3385
3386 /* If there are such requirements and the default CPU does not
3387 satisfy them, we need to run over the complete list of
3388 cores looking for one that is satisfactory. */
3389 if (!bitmap_empty_p (sought_isa)
3390 && !bitmap_subset_p (sought_isa, default_isa))
3391 {
3392 auto_sbitmap candidate_isa (isa_num_bits);
3393 /* We're only interested in a CPU with at least the
3394 capabilities of the default CPU and the required
3395 additional features. */
3396 bitmap_ior (default_isa, default_isa, sought_isa);
3397
3398 /* Try to locate a CPU type that supports all of the abilities
3399 of the default CPU, plus the extra abilities requested by
3400 the user. */
3401 for (sel = all_cores; sel->common.name != NULL; sel++)
3402 {
3403 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3404 /* An exact match? */
3405 if (bitmap_equal_p (default_isa, candidate_isa))
3406 break;
3407 }
3408
3409 if (sel->common.name == NULL)
3410 {
3411 unsigned current_bit_count = isa_num_bits;
3412 const cpu_option *best_fit = NULL;
3413
3414 /* Ideally we would like to issue an error message here
3415 saying that it was not possible to find a CPU compatible
3416 with the default CPU, but which also supports the command
3417 line options specified by the programmer, and so they
3418 ought to use the -mcpu=<name> command line option to
3419 override the default CPU type.
3420
3421 If we cannot find a CPU that has exactly the
3422 characteristics of the default CPU and the given
3423 command line options we scan the array again looking
3424 for a best match. The best match must have at least
3425 the capabilities of the perfect match. */
3426 for (sel = all_cores; sel->common.name != NULL; sel++)
3427 {
3428 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3429
3430 if (bitmap_subset_p (default_isa, candidate_isa))
3431 {
3432 unsigned count;
3433
3434 bitmap_and_compl (candidate_isa, candidate_isa,
3435 default_isa);
3436 count = bitmap_popcount (candidate_isa);
3437
3438 if (count < current_bit_count)
3439 {
3440 best_fit = sel;
3441 current_bit_count = count;
3442 }
3443 }
3444
3445 gcc_assert (best_fit);
3446 sel = best_fit;
3447 }
3448 }
3449 arm_selected_cpu = sel;
3450 }
3451
3452 /* Now we know the CPU, we can finally initialize the target
3453 structure. */
3454 target->core_name = arm_selected_cpu->common.name;
3455 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3456 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3457 cpu_opts);
3458 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3459 }
3460
3461 gcc_assert (arm_selected_cpu);
3462 gcc_assert (arm_selected_arch);
3463
3464 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3465 {
3466 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3467 auto_sbitmap fpu_bits (isa_num_bits);
3468
3469 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3470 /* This should clear out ALL bits relating to the FPU/simd
3471 extensions, to avoid potentially invalid combinations later on
3472 that we can't match. At present we only clear out those bits
3473 that can be set by -mfpu. This should be fixed in GCC-12. */
3474 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3475 bitmap_ior (target->isa, target->isa, fpu_bits);
3476 }
3477
3478 /* If we have the soft-float ABI, clear any feature bits relating to use of
3479 floating-point operations. They'll just confuse things later on. */
3480 if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3481 bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3482
3483 /* There may be implied bits which we still need to enable. These are
3484 non-named features which are needed to complete other sets of features,
3485 but cannot be enabled from arm-cpus.in due to being shared between
3486 multiple fgroups. Each entry in all_implied_fbits is of the form
3487 ante -> cons, meaning that if the feature "ante" is enabled, we should
3488 implicitly enable "cons". */
3489 const struct fbit_implication *impl = all_implied_fbits;
3490 while (impl->ante)
3491 {
3492 if (bitmap_bit_p (target->isa, impl->ante))
3493 bitmap_set_bit (target->isa, impl->cons);
3494 impl++;
3495 }
3496
3497 if (!arm_selected_tune)
3498 arm_selected_tune = arm_selected_cpu;
3499 else /* Validate the features passed to -mtune. */
3500 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3501
3502 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3503
3504 /* Finish initializing the target structure. */
3505 if (!target->arch_name)
3506 target->arch_name = arm_selected_arch->common.name;
3507 target->arch_pp_name = arm_selected_arch->arch;
3508 target->base_arch = arm_selected_arch->base_arch;
3509 target->profile = arm_selected_arch->profile;
3510
3511 target->tune_flags = tune_data->tune_flags;
3512 target->tune = tune_data->tune;
3513 target->tune_core = tune_data->scheduler;
3514 }
3515
3516 /* Fix up any incompatible options that the user has specified. */
3517 static void
3518 arm_option_override (void)
3519 {
3520 static const enum isa_feature fpu_bitlist_internal[]
3521 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3522 /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main. */
3523 static const enum isa_feature fp_bitlist[]
3524 = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3525 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3526 cl_target_option opts;
3527
3528 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3529 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3530
3531 isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3532 isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3533 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3534 arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3535
3536 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3537
3538 if (!OPTION_SET_P (arm_fpu_index))
3539 {
3540 bool ok;
3541 int fpu_index;
3542
3543 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3544 CL_TARGET);
3545 gcc_assert (ok);
3546 arm_fpu_index = (enum fpu_type) fpu_index;
3547 }
3548
3549 cl_target_option_save (&opts, &global_options, &global_options_set);
3550 arm_configure_build_target (&arm_active_target, &opts, true);
3551
3552 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3553 SUBTARGET_OVERRIDE_OPTIONS;
3554 #endif
3555
3556 /* Initialize boolean versions of the architectural flags, for use
3557 in the arm.md file and for enabling feature flags. */
3558 arm_option_reconfigure_globals ();
3559
3560 arm_tune = arm_active_target.tune_core;
3561 tune_flags = arm_active_target.tune_flags;
3562 current_tune = arm_active_target.tune;
3563
3564 /* TBD: Dwarf info for apcs frame is not handled yet. */
3565 if (TARGET_APCS_FRAME)
3566 flag_shrink_wrap = false;
3567
3568 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3569 {
3570 warning (0, "%<-mapcs-stack-check%> incompatible with "
3571 "%<-mno-apcs-frame%>");
3572 target_flags |= MASK_APCS_FRAME;
3573 }
3574
3575 if (TARGET_POKE_FUNCTION_NAME)
3576 target_flags |= MASK_APCS_FRAME;
3577
3578 if (TARGET_APCS_REENT && flag_pic)
3579 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3580
3581 if (TARGET_APCS_REENT)
3582 warning (0, "APCS reentrant code not supported. Ignored");
3583
3584 /* Set up some tuning parameters. */
3585 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3586 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3587 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3588 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3589 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3590 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3591
3592 /* For arm2/3 there is no need to do any scheduling if we are doing
3593 software floating-point. */
3594 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3595 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3596
3597 /* Override the default structure alignment for AAPCS ABI. */
3598 if (!OPTION_SET_P (arm_structure_size_boundary))
3599 {
3600 if (TARGET_AAPCS_BASED)
3601 arm_structure_size_boundary = 8;
3602 }
3603 else
3604 {
3605 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3606
3607 if (arm_structure_size_boundary != 8
3608 && arm_structure_size_boundary != 32
3609 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3610 {
3611 if (ARM_DOUBLEWORD_ALIGN)
3612 warning (0,
3613 "structure size boundary can only be set to 8, 32 or 64");
3614 else
3615 warning (0, "structure size boundary can only be set to 8 or 32");
3616 arm_structure_size_boundary
3617 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3618 }
3619 }
3620
3621 if (TARGET_VXWORKS_RTP)
3622 {
3623 if (!OPTION_SET_P (arm_pic_data_is_text_relative))
3624 arm_pic_data_is_text_relative = 0;
3625 }
3626 else if (flag_pic
3627 && !arm_pic_data_is_text_relative
3628 && !(OPTION_SET_P (target_flags) & MASK_SINGLE_PIC_BASE))
3629 /* When text & data segments don't have a fixed displacement, the
3630 intended use is with a single, read only, pic base register.
3631 Unless the user explicitly requested not to do that, set
3632 it. */
3633 target_flags |= MASK_SINGLE_PIC_BASE;
3634
3635 /* If stack checking is disabled, we can use r10 as the PIC register,
3636 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3637 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3638 {
3639 if (TARGET_VXWORKS_RTP)
3640 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3641 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3642 }
3643
3644 if (flag_pic && TARGET_VXWORKS_RTP)
3645 arm_pic_register = 9;
3646
3647 /* If in FDPIC mode then force arm_pic_register to be r9. */
3648 if (TARGET_FDPIC)
3649 {
3650 arm_pic_register = FDPIC_REGNUM;
3651 if (TARGET_THUMB1)
3652 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3653 }
3654
3655 if (arm_pic_register_string != NULL)
3656 {
3657 int pic_register = decode_reg_name (arm_pic_register_string);
3658
3659 if (!flag_pic)
3660 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3661
3662 /* Prevent the user from choosing an obviously stupid PIC register. */
3663 else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3664 || pic_register == HARD_FRAME_POINTER_REGNUM
3665 || pic_register == STACK_POINTER_REGNUM
3666 || pic_register >= PC_REGNUM
3667 || (TARGET_VXWORKS_RTP
3668 && (unsigned int) pic_register != arm_pic_register))
3669 error ("unable to use %qs for PIC register", arm_pic_register_string);
3670 else
3671 arm_pic_register = pic_register;
3672 }
3673
3674 if (flag_pic)
3675 target_word_relocations = 1;
3676
3677 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3678 if (fix_cm3_ldrd == 2)
3679 {
3680 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3681 fix_cm3_ldrd = 1;
3682 else
3683 fix_cm3_ldrd = 0;
3684 }
3685
3686 /* Enable fix_vlldm by default if required. */
3687 if (fix_vlldm == 2)
3688 {
3689 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3690 fix_vlldm = 1;
3691 else
3692 fix_vlldm = 0;
3693 }
3694
3695 /* Enable fix_aes by default if required. */
3696 if (fix_aes_erratum_1742098 == 2)
3697 {
3698 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_aes_1742098))
3699 fix_aes_erratum_1742098 = 1;
3700 else
3701 fix_aes_erratum_1742098 = 0;
3702 }
3703
3704 /* Hot/Cold partitioning is not currently supported, since we can't
3705 handle literal pool placement in that case. */
3706 if (flag_reorder_blocks_and_partition)
3707 {
3708 inform (input_location,
3709 "%<-freorder-blocks-and-partition%> not supported "
3710 "on this architecture");
3711 flag_reorder_blocks_and_partition = 0;
3712 flag_reorder_blocks = 1;
3713 }
3714
3715 if (flag_pic)
3716 /* Hoisting PIC address calculations more aggressively provides a small,
3717 but measurable, size reduction for PIC code. Therefore, we decrease
3718 the bar for unrestricted expression hoisting to the cost of PIC address
3719 calculation, which is 2 instructions. */
3720 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3721 param_gcse_unrestricted_cost, 2);
3722
3723 /* ARM EABI defaults to strict volatile bitfields. */
3724 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3725 && abi_version_at_least(2))
3726 flag_strict_volatile_bitfields = 1;
3727
3728 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3729 have deemed it beneficial (signified by setting
3730 prefetch.num_slots to 1 or more). */
3731 if (flag_prefetch_loop_arrays < 0
3732 && HAVE_prefetch
3733 && optimize >= 3
3734 && current_tune->prefetch.num_slots > 0)
3735 flag_prefetch_loop_arrays = 1;
3736
3737 /* Set up parameters to be used in prefetching algorithm. Do not
3738 override the defaults unless we are tuning for a core we have
3739 researched values for. */
3740 if (current_tune->prefetch.num_slots > 0)
3741 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3742 param_simultaneous_prefetches,
3743 current_tune->prefetch.num_slots);
3744 if (current_tune->prefetch.l1_cache_line_size >= 0)
3745 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3746 param_l1_cache_line_size,
3747 current_tune->prefetch.l1_cache_line_size);
3748 if (current_tune->prefetch.l1_cache_line_size >= 0)
3749 {
3750 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3751 param_destruct_interfere_size,
3752 current_tune->prefetch.l1_cache_line_size);
3753 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3754 param_construct_interfere_size,
3755 current_tune->prefetch.l1_cache_line_size);
3756 }
3757 else
3758 {
3759 /* For a generic ARM target, JF Bastien proposed using 64 for both. */
3760 /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3761 constructive? */
3762 /* More recent Cortex chips have a 64-byte cache line, but are marked
3763 ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults. */
3764 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3765 param_destruct_interfere_size, 64);
3766 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3767 param_construct_interfere_size, 64);
3768 }
3769
3770 if (current_tune->prefetch.l1_cache_size >= 0)
3771 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3772 param_l1_cache_size,
3773 current_tune->prefetch.l1_cache_size);
3774
3775 /* Look through ready list and all of queue for instructions
3776 relevant for L2 auto-prefetcher. */
3777 int sched_autopref_queue_depth;
3778
3779 switch (current_tune->sched_autopref)
3780 {
3781 case tune_params::SCHED_AUTOPREF_OFF:
3782 sched_autopref_queue_depth = -1;
3783 break;
3784
3785 case tune_params::SCHED_AUTOPREF_RANK:
3786 sched_autopref_queue_depth = 0;
3787 break;
3788
3789 case tune_params::SCHED_AUTOPREF_FULL:
3790 sched_autopref_queue_depth = max_insn_queue_index + 1;
3791 break;
3792
3793 default:
3794 gcc_unreachable ();
3795 }
3796
3797 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3798 param_sched_autopref_queue_depth,
3799 sched_autopref_queue_depth);
3800
3801 /* Currently, for slow flash data, we just disable literal pools. We also
3802 disable it for pure-code. */
3803 if (target_slow_flash_data || target_pure_code)
3804 arm_disable_literal_pool = true;
3805
3806 /* Disable scheduling fusion by default if it's not armv7 processor
3807 or doesn't prefer ldrd/strd. */
3808 if (flag_schedule_fusion == 2
3809 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3810 flag_schedule_fusion = 0;
3811
3812 /* Need to remember initial options before they are overriden. */
3813 init_optimize = build_optimization_node (&global_options,
3814 &global_options_set);
3815
3816 arm_options_perform_arch_sanity_checks ();
3817 arm_option_override_internal (&global_options, &global_options_set);
3818 arm_option_check_internal (&global_options);
3819 arm_option_params_internal ();
3820
3821 /* Create the default target_options structure. */
3822 target_option_default_node = target_option_current_node
3823 = build_target_option_node (&global_options, &global_options_set);
3824
3825 /* Register global variables with the garbage collector. */
3826 arm_add_gc_roots ();
3827
3828 /* Init initial mode for testing. */
3829 thumb_flipper = TARGET_THUMB;
3830 }
3831
3832
3833 /* Reconfigure global status flags from the active_target.isa. */
3834 void
3835 arm_option_reconfigure_globals (void)
3836 {
3837 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3838 arm_base_arch = arm_active_target.base_arch;
3839
3840 /* Initialize boolean versions of the architectural flags, for use
3841 in the arm.md file. */
3842 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3843 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3844 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3845 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3846 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3847 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3848 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3849 arm_arch6m = arm_arch6 && !arm_arch_notm;
3850 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3851 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3852 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3853 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3854 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3855 arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3856 arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3857 arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3858 isa_bit_armv8_1m_main);
3859 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3860 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3861 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3862 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3863 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3864 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3865 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3866 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3867 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3868 arm_arch8m_main = arm_arch7 && arm_arch_cmse;
3869 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3870 arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3871 arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3872
3873 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3874 if (arm_fp16_inst)
3875 {
3876 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3877 error ("selected fp16 options are incompatible");
3878 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3879 }
3880
3881 arm_arch_cde = 0;
3882 arm_arch_cde_coproc = 0;
3883 int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3884 isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3885 isa_bit_cdecp6, isa_bit_cdecp7};
3886 for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3887 {
3888 int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3889 if (cde_bit)
3890 {
3891 arm_arch_cde |= cde_bit;
3892 arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3893 }
3894 }
3895
3896 /* And finally, set up some quirks. */
3897 arm_arch_no_volatile_ce
3898 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3899 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3900 isa_bit_quirk_armv6kz);
3901
3902 /* Use the cp15 method if it is available. */
3903 if (target_thread_pointer == TP_AUTO)
3904 {
3905 if (arm_arch6k && !TARGET_THUMB1)
3906 target_thread_pointer = TP_CP15;
3907 else
3908 target_thread_pointer = TP_SOFT;
3909 }
3910
3911 if (!TARGET_HARD_TP && arm_stack_protector_guard == SSP_TLSREG)
3912 error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3913 }
3914
3915 /* Perform some validation between the desired architecture and the rest of the
3916 options. */
3917 void
3918 arm_options_perform_arch_sanity_checks (void)
3919 {
3920 /* V5T code we generate is completely interworking capable, so we turn off
3921 TARGET_INTERWORK here to avoid many tests later on. */
3922
3923 /* XXX However, we must pass the right pre-processor defines to CPP
3924 or GLD can get confused. This is a hack. */
3925 if (TARGET_INTERWORK)
3926 arm_cpp_interwork = 1;
3927
3928 if (arm_arch5t)
3929 target_flags &= ~MASK_INTERWORK;
3930
3931 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3932 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3933
3934 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3935 error ("iwmmxt abi requires an iwmmxt capable cpu");
3936
3937 /* BPABI targets use linker tricks to allow interworking on cores
3938 without thumb support. */
3939 if (TARGET_INTERWORK
3940 && !TARGET_BPABI
3941 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3942 {
3943 warning (0, "target CPU does not support interworking" );
3944 target_flags &= ~MASK_INTERWORK;
3945 }
3946
3947 /* If soft-float is specified then don't use FPU. */
3948 if (TARGET_SOFT_FLOAT)
3949 arm_fpu_attr = FPU_NONE;
3950 else
3951 arm_fpu_attr = FPU_VFP;
3952
3953 if (TARGET_AAPCS_BASED)
3954 {
3955 if (TARGET_CALLER_INTERWORKING)
3956 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3957 else
3958 if (TARGET_CALLEE_INTERWORKING)
3959 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3960 }
3961
3962 /* __fp16 support currently assumes the core has ldrh. */
3963 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3964 sorry ("%<__fp16%> and no ldrh");
3965
3966 if (use_cmse && !arm_arch_cmse)
3967 error ("target CPU does not support ARMv8-M Security Extensions");
3968
3969 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3970 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3971 if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3972 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3973
3974
3975 if (TARGET_AAPCS_BASED)
3976 {
3977 if (arm_abi == ARM_ABI_IWMMXT)
3978 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3979 else if (TARGET_HARD_FLOAT_ABI)
3980 {
3981 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3982 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
3983 && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
3984 error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
3985 }
3986 else
3987 arm_pcs_default = ARM_PCS_AAPCS;
3988 }
3989 else
3990 {
3991 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3992 sorry ("%<-mfloat-abi=hard%> and VFP");
3993
3994 if (arm_abi == ARM_ABI_APCS)
3995 arm_pcs_default = ARM_PCS_APCS;
3996 else
3997 arm_pcs_default = ARM_PCS_ATPCS;
3998 }
3999 }
4000
4001 /* Test whether a local function descriptor is canonical, i.e.,
4002 whether we can use GOTOFFFUNCDESC to compute the address of the
4003 function. */
4004 static bool
4005 arm_fdpic_local_funcdesc_p (rtx fnx)
4006 {
4007 tree fn;
4008 enum symbol_visibility vis;
4009 bool ret;
4010
4011 if (!TARGET_FDPIC)
4012 return true;
4013
4014 if (! SYMBOL_REF_LOCAL_P (fnx))
4015 return false;
4016
4017 fn = SYMBOL_REF_DECL (fnx);
4018
4019 if (! fn)
4020 return false;
4021
4022 vis = DECL_VISIBILITY (fn);
4023
4024 if (vis == VISIBILITY_PROTECTED)
4025 /* Private function descriptors for protected functions are not
4026 canonical. Temporarily change the visibility to global so that
4027 we can ensure uniqueness of funcdesc pointers. */
4028 DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
4029
4030 ret = default_binds_local_p_1 (fn, flag_pic);
4031
4032 DECL_VISIBILITY (fn) = vis;
4033
4034 return ret;
4035 }
4036
4037 static void
4038 arm_add_gc_roots (void)
4039 {
4040 gcc_obstack_init(&minipool_obstack);
4041 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
4042 }
4043 \f
4044 /* A table of known ARM exception types.
4045 For use with the interrupt function attribute. */
4046
4047 typedef struct
4048 {
4049 const char *const arg;
4050 const unsigned long return_value;
4051 }
4052 isr_attribute_arg;
4053
4054 static const isr_attribute_arg isr_attribute_args [] =
4055 {
4056 { "IRQ", ARM_FT_ISR },
4057 { "irq", ARM_FT_ISR },
4058 { "FIQ", ARM_FT_FIQ },
4059 { "fiq", ARM_FT_FIQ },
4060 { "ABORT", ARM_FT_ISR },
4061 { "abort", ARM_FT_ISR },
4062 { "UNDEF", ARM_FT_EXCEPTION },
4063 { "undef", ARM_FT_EXCEPTION },
4064 { "SWI", ARM_FT_EXCEPTION },
4065 { "swi", ARM_FT_EXCEPTION },
4066 { NULL, ARM_FT_NORMAL }
4067 };
4068
4069 /* Returns the (interrupt) function type of the current
4070 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
4071
4072 static unsigned long
4073 arm_isr_value (tree argument)
4074 {
4075 const isr_attribute_arg * ptr;
4076 const char * arg;
4077
4078 if (!arm_arch_notm)
4079 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
4080
4081 /* No argument - default to IRQ. */
4082 if (argument == NULL_TREE)
4083 return ARM_FT_ISR;
4084
4085 /* Get the value of the argument. */
4086 if (TREE_VALUE (argument) == NULL_TREE
4087 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
4088 return ARM_FT_UNKNOWN;
4089
4090 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
4091
4092 /* Check it against the list of known arguments. */
4093 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
4094 if (streq (arg, ptr->arg))
4095 return ptr->return_value;
4096
4097 /* An unrecognized interrupt type. */
4098 return ARM_FT_UNKNOWN;
4099 }
4100
4101 /* Computes the type of the current function. */
4102
4103 static unsigned long
4104 arm_compute_func_type (void)
4105 {
4106 unsigned long type = ARM_FT_UNKNOWN;
4107 tree a;
4108 tree attr;
4109
4110 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4111
4112 /* Decide if the current function is volatile. Such functions
4113 never return, and many memory cycles can be saved by not storing
4114 register values that will never be needed again. This optimization
4115 was added to speed up context switching in a kernel application. */
4116 if (optimize > 0
4117 && (TREE_NOTHROW (current_function_decl)
4118 || !(flag_unwind_tables
4119 || (flag_exceptions
4120 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4121 && TREE_THIS_VOLATILE (current_function_decl))
4122 type |= ARM_FT_VOLATILE;
4123
4124 if (cfun->static_chain_decl != NULL)
4125 type |= ARM_FT_NESTED;
4126
4127 attr = DECL_ATTRIBUTES (current_function_decl);
4128
4129 a = lookup_attribute ("naked", attr);
4130 if (a != NULL_TREE)
4131 type |= ARM_FT_NAKED;
4132
4133 a = lookup_attribute ("isr", attr);
4134 if (a == NULL_TREE)
4135 a = lookup_attribute ("interrupt", attr);
4136
4137 if (a == NULL_TREE)
4138 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4139 else
4140 type |= arm_isr_value (TREE_VALUE (a));
4141
4142 if (lookup_attribute ("cmse_nonsecure_entry", attr))
4143 type |= ARM_FT_CMSE_ENTRY;
4144
4145 return type;
4146 }
4147
4148 /* Returns the type of the current function. */
4149
4150 unsigned long
4151 arm_current_func_type (void)
4152 {
4153 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4154 cfun->machine->func_type = arm_compute_func_type ();
4155
4156 return cfun->machine->func_type;
4157 }
4158
4159 bool
4160 arm_allocate_stack_slots_for_args (void)
4161 {
4162 /* Naked functions should not allocate stack slots for arguments. */
4163 return !IS_NAKED (arm_current_func_type ());
4164 }
4165
4166 static bool
4167 arm_warn_func_return (tree decl)
4168 {
4169 /* Naked functions are implemented entirely in assembly, including the
4170 return sequence, so suppress warnings about this. */
4171 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4172 }
4173
4174 \f
4175 /* Output assembler code for a block containing the constant parts
4176 of a trampoline, leaving space for the variable parts.
4177
4178 On the ARM, (if r8 is the static chain regnum, and remembering that
4179 referencing pc adds an offset of 8) the trampoline looks like:
4180 ldr r8, [pc, #0]
4181 ldr pc, [pc]
4182 .word static chain value
4183 .word function's address
4184 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4185
4186 In FDPIC mode, the trampoline looks like:
4187 .word trampoline address
4188 .word trampoline GOT address
4189 ldr r12, [pc, #8] ; #4 for Arm mode
4190 ldr r9, [pc, #8] ; #4 for Arm mode
4191 ldr pc, [pc, #8] ; #4 for Arm mode
4192 .word static chain value
4193 .word GOT address
4194 .word function's address
4195 */
4196
4197 static void
4198 arm_asm_trampoline_template (FILE *f)
4199 {
4200 fprintf (f, "\t.syntax unified\n");
4201
4202 if (TARGET_FDPIC)
4203 {
4204 /* The first two words are a function descriptor pointing to the
4205 trampoline code just below. */
4206 if (TARGET_ARM)
4207 fprintf (f, "\t.arm\n");
4208 else if (TARGET_THUMB2)
4209 fprintf (f, "\t.thumb\n");
4210 else
4211 /* Only ARM and Thumb-2 are supported. */
4212 gcc_unreachable ();
4213
4214 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4215 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4216 /* Trampoline code which sets the static chain register but also
4217 PIC register before jumping into real code. */
4218 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4219 STATIC_CHAIN_REGNUM, PC_REGNUM,
4220 TARGET_THUMB2 ? 8 : 4);
4221 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4222 PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4223 TARGET_THUMB2 ? 8 : 4);
4224 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4225 PC_REGNUM, PC_REGNUM,
4226 TARGET_THUMB2 ? 8 : 4);
4227 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4228 }
4229 else if (TARGET_ARM)
4230 {
4231 fprintf (f, "\t.arm\n");
4232 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4233 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4234 }
4235 else if (TARGET_THUMB2)
4236 {
4237 fprintf (f, "\t.thumb\n");
4238 /* The Thumb-2 trampoline is similar to the arm implementation.
4239 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4240 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4241 STATIC_CHAIN_REGNUM, PC_REGNUM);
4242 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4243 }
4244 else
4245 {
4246 ASM_OUTPUT_ALIGN (f, 2);
4247 fprintf (f, "\t.code\t16\n");
4248 fprintf (f, ".Ltrampoline_start:\n");
4249 asm_fprintf (f, "\tpush\t{r0, r1}\n");
4250 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4251 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4252 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4253 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4254 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4255 }
4256 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4257 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4258 }
4259
4260 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4261
4262 static void
4263 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4264 {
4265 rtx fnaddr, mem, a_tramp;
4266
4267 emit_block_move (m_tramp, assemble_trampoline_template (),
4268 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4269
4270 if (TARGET_FDPIC)
4271 {
4272 rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4273 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4274 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4275 /* The function start address is at offset 8, but in Thumb mode
4276 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4277 below. */
4278 rtx trampoline_code_start
4279 = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4280
4281 /* Write initial funcdesc which points to the trampoline. */
4282 mem = adjust_address (m_tramp, SImode, 0);
4283 emit_move_insn (mem, trampoline_code_start);
4284 mem = adjust_address (m_tramp, SImode, 4);
4285 emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4286 /* Setup static chain. */
4287 mem = adjust_address (m_tramp, SImode, 20);
4288 emit_move_insn (mem, chain_value);
4289 /* GOT + real function entry point. */
4290 mem = adjust_address (m_tramp, SImode, 24);
4291 emit_move_insn (mem, gotaddr);
4292 mem = adjust_address (m_tramp, SImode, 28);
4293 emit_move_insn (mem, fnaddr);
4294 }
4295 else
4296 {
4297 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4298 emit_move_insn (mem, chain_value);
4299
4300 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4301 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4302 emit_move_insn (mem, fnaddr);
4303 }
4304
4305 a_tramp = XEXP (m_tramp, 0);
4306 maybe_emit_call_builtin___clear_cache (a_tramp,
4307 plus_constant (ptr_mode,
4308 a_tramp,
4309 TRAMPOLINE_SIZE));
4310 }
4311
4312 /* Thumb trampolines should be entered in thumb mode, so set
4313 the bottom bit of the address. */
4314
4315 static rtx
4316 arm_trampoline_adjust_address (rtx addr)
4317 {
4318 /* For FDPIC don't fix trampoline address since it's a function
4319 descriptor and not a function address. */
4320 if (TARGET_THUMB && !TARGET_FDPIC)
4321 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4322 NULL, 0, OPTAB_LIB_WIDEN);
4323 return addr;
4324 }
4325 \f
4326 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4327 includes call-clobbered registers too. If this is a leaf function
4328 we can just examine the registers used by the RTL, but otherwise we
4329 have to assume that whatever function is called might clobber
4330 anything, and so we have to save all the call-clobbered registers
4331 as well. */
4332 static inline bool reg_needs_saving_p (unsigned reg)
4333 {
4334 unsigned long func_type = arm_current_func_type ();
4335
4336 if (IS_INTERRUPT (func_type))
4337 if (df_regs_ever_live_p (reg)
4338 /* Save call-clobbered core registers. */
4339 || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4340 return true;
4341 else
4342 return false;
4343 else
4344 if (!df_regs_ever_live_p (reg)
4345 || call_used_or_fixed_reg_p (reg))
4346 return false;
4347 else
4348 return true;
4349 }
4350
4351 /* Return 1 if it is possible to return using a single instruction.
4352 If SIBLING is non-null, this is a test for a return before a sibling
4353 call. SIBLING is the call insn, so we can examine its register usage. */
4354
4355 int
4356 use_return_insn (int iscond, rtx sibling)
4357 {
4358 int regno;
4359 unsigned int func_type;
4360 unsigned long saved_int_regs;
4361 unsigned HOST_WIDE_INT stack_adjust;
4362 arm_stack_offsets *offsets;
4363
4364 /* Never use a return instruction before reload has run. */
4365 if (!reload_completed)
4366 return 0;
4367
4368 /* Never use a return instruction when return address signing
4369 mechanism is enabled as it requires more than one
4370 instruction. */
4371 if (arm_current_function_pac_enabled_p ())
4372 return 0;
4373
4374 func_type = arm_current_func_type ();
4375
4376 /* Naked, volatile and stack alignment functions need special
4377 consideration. */
4378 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4379 return 0;
4380
4381 /* So do interrupt functions that use the frame pointer and Thumb
4382 interrupt functions. */
4383 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4384 return 0;
4385
4386 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4387 && !optimize_function_for_size_p (cfun))
4388 return 0;
4389
4390 offsets = arm_get_frame_offsets ();
4391 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4392
4393 /* As do variadic functions. */
4394 if (crtl->args.pretend_args_size
4395 || cfun->machine->uses_anonymous_args
4396 /* Or if the function calls __builtin_eh_return () */
4397 || crtl->calls_eh_return
4398 /* Or if the function calls alloca */
4399 || cfun->calls_alloca
4400 /* Or if there is a stack adjustment. However, if the stack pointer
4401 is saved on the stack, we can use a pre-incrementing stack load. */
4402 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4403 && stack_adjust == 4))
4404 /* Or if the static chain register was saved above the frame, under the
4405 assumption that the stack pointer isn't saved on the stack. */
4406 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4407 && arm_compute_static_chain_stack_bytes() != 0))
4408 return 0;
4409
4410 saved_int_regs = offsets->saved_regs_mask;
4411
4412 /* Unfortunately, the insn
4413
4414 ldmib sp, {..., sp, ...}
4415
4416 triggers a bug on most SA-110 based devices, such that the stack
4417 pointer won't be correctly restored if the instruction takes a
4418 page fault. We work around this problem by popping r3 along with
4419 the other registers, since that is never slower than executing
4420 another instruction.
4421
4422 We test for !arm_arch5t here, because code for any architecture
4423 less than this could potentially be run on one of the buggy
4424 chips. */
4425 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4426 {
4427 /* Validate that r3 is a call-clobbered register (always true in
4428 the default abi) ... */
4429 if (!call_used_or_fixed_reg_p (3))
4430 return 0;
4431
4432 /* ... that it isn't being used for a return value ... */
4433 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4434 return 0;
4435
4436 /* ... or for a tail-call argument ... */
4437 if (sibling)
4438 {
4439 gcc_assert (CALL_P (sibling));
4440
4441 if (find_regno_fusage (sibling, USE, 3))
4442 return 0;
4443 }
4444
4445 /* ... and that there are no call-saved registers in r0-r2
4446 (always true in the default ABI). */
4447 if (saved_int_regs & 0x7)
4448 return 0;
4449 }
4450
4451 /* Can't be done if interworking with Thumb, and any registers have been
4452 stacked. */
4453 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4454 return 0;
4455
4456 /* On StrongARM, conditional returns are expensive if they aren't
4457 taken and multiple registers have been stacked. */
4458 if (iscond && arm_tune_strongarm)
4459 {
4460 /* Conditional return when just the LR is stored is a simple
4461 conditional-load instruction, that's not expensive. */
4462 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4463 return 0;
4464
4465 if (flag_pic
4466 && arm_pic_register != INVALID_REGNUM
4467 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4468 return 0;
4469 }
4470
4471 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4472 several instructions if anything needs to be popped. Armv8.1-M Mainline
4473 also needs several instructions to save and restore FP context. */
4474 if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4475 return 0;
4476
4477 /* If there are saved registers but the LR isn't saved, then we need
4478 two instructions for the return. */
4479 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4480 return 0;
4481
4482 /* Can't be done if any of the VFP regs are pushed,
4483 since this also requires an insn. */
4484 if (TARGET_VFP_BASE)
4485 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4486 if (reg_needs_saving_p (regno))
4487 return 0;
4488
4489 if (TARGET_REALLY_IWMMXT)
4490 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4491 if (reg_needs_saving_p (regno))
4492 return 0;
4493
4494 return 1;
4495 }
4496
4497 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4498 shrink-wrapping if possible. This is the case if we need to emit a
4499 prologue, which we can test by looking at the offsets. */
4500 bool
4501 use_simple_return_p (void)
4502 {
4503 arm_stack_offsets *offsets;
4504
4505 /* Note this function can be called before or after reload. */
4506 if (!reload_completed)
4507 arm_compute_frame_layout ();
4508
4509 offsets = arm_get_frame_offsets ();
4510 return offsets->outgoing_args != 0;
4511 }
4512
4513 /* Return TRUE if int I is a valid immediate ARM constant. */
4514
4515 int
4516 const_ok_for_arm (HOST_WIDE_INT i)
4517 {
4518 int lowbit;
4519
4520 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4521 be all zero, or all one. */
4522 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4523 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4524 != ((~(unsigned HOST_WIDE_INT) 0)
4525 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4526 return FALSE;
4527
4528 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4529
4530 /* Fast return for 0 and small values. We must do this for zero, since
4531 the code below can't handle that one case. */
4532 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4533 return TRUE;
4534
4535 /* Get the number of trailing zeros. */
4536 lowbit = ffs((int) i) - 1;
4537
4538 /* Only even shifts are allowed in ARM mode so round down to the
4539 nearest even number. */
4540 if (TARGET_ARM)
4541 lowbit &= ~1;
4542
4543 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4544 return TRUE;
4545
4546 if (TARGET_ARM)
4547 {
4548 /* Allow rotated constants in ARM mode. */
4549 if (lowbit <= 4
4550 && ((i & ~0xc000003f) == 0
4551 || (i & ~0xf000000f) == 0
4552 || (i & ~0xfc000003) == 0))
4553 return TRUE;
4554 }
4555 else if (TARGET_THUMB2)
4556 {
4557 HOST_WIDE_INT v;
4558
4559 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4560 v = i & 0xff;
4561 v |= v << 16;
4562 if (i == v || i == (v | (v << 8)))
4563 return TRUE;
4564
4565 /* Allow repeated pattern 0xXY00XY00. */
4566 v = i & 0xff00;
4567 v |= v << 16;
4568 if (i == v)
4569 return TRUE;
4570 }
4571 else if (TARGET_HAVE_MOVT)
4572 {
4573 /* Thumb-1 Targets with MOVT. */
4574 if (i > 0xffff)
4575 return FALSE;
4576 else
4577 return TRUE;
4578 }
4579
4580 return FALSE;
4581 }
4582
4583 /* Return true if I is a valid constant for the operation CODE. */
4584 int
4585 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4586 {
4587 if (const_ok_for_arm (i))
4588 return 1;
4589
4590 switch (code)
4591 {
4592 case SET:
4593 /* See if we can use movw. */
4594 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4595 return 1;
4596 else
4597 /* Otherwise, try mvn. */
4598 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4599
4600 case PLUS:
4601 /* See if we can use addw or subw. */
4602 if (TARGET_THUMB2
4603 && ((i & 0xfffff000) == 0
4604 || ((-i) & 0xfffff000) == 0))
4605 return 1;
4606 /* Fall through. */
4607 case COMPARE:
4608 case EQ:
4609 case NE:
4610 case GT:
4611 case LE:
4612 case LT:
4613 case GE:
4614 case GEU:
4615 case LTU:
4616 case GTU:
4617 case LEU:
4618 case UNORDERED:
4619 case ORDERED:
4620 case UNEQ:
4621 case UNGE:
4622 case UNLT:
4623 case UNGT:
4624 case UNLE:
4625 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4626
4627 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4628 case XOR:
4629 return 0;
4630
4631 case IOR:
4632 if (TARGET_THUMB2)
4633 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4634 return 0;
4635
4636 case AND:
4637 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4638
4639 default:
4640 gcc_unreachable ();
4641 }
4642 }
4643
4644 /* Return true if I is a valid di mode constant for the operation CODE. */
4645 int
4646 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4647 {
4648 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4649 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4650 rtx hi = GEN_INT (hi_val);
4651 rtx lo = GEN_INT (lo_val);
4652
4653 if (TARGET_THUMB1)
4654 return 0;
4655
4656 switch (code)
4657 {
4658 case AND:
4659 case IOR:
4660 case XOR:
4661 return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4662 || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4663 case PLUS:
4664 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4665
4666 default:
4667 return 0;
4668 }
4669 }
4670
4671 /* Emit a sequence of insns to handle a large constant.
4672 CODE is the code of the operation required, it can be any of SET, PLUS,
4673 IOR, AND, XOR, MINUS;
4674 MODE is the mode in which the operation is being performed;
4675 VAL is the integer to operate on;
4676 SOURCE is the other operand (a register, or a null-pointer for SET);
4677 SUBTARGETS means it is safe to create scratch registers if that will
4678 either produce a simpler sequence, or we will want to cse the values.
4679 Return value is the number of insns emitted. */
4680
4681 /* ??? Tweak this for thumb2. */
4682 int
4683 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4684 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4685 {
4686 rtx cond;
4687
4688 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4689 cond = COND_EXEC_TEST (PATTERN (insn));
4690 else
4691 cond = NULL_RTX;
4692
4693 if (subtargets || code == SET
4694 || (REG_P (target) && REG_P (source)
4695 && REGNO (target) != REGNO (source)))
4696 {
4697 /* After arm_reorg has been called, we can't fix up expensive
4698 constants by pushing them into memory so we must synthesize
4699 them in-line, regardless of the cost. This is only likely to
4700 be more costly on chips that have load delay slots and we are
4701 compiling without running the scheduler (so no splitting
4702 occurred before the final instruction emission).
4703
4704 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4705 */
4706 if (!cfun->machine->after_arm_reorg
4707 && !cond
4708 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4709 1, 0)
4710 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4711 + (code != SET))))
4712 {
4713 if (code == SET)
4714 {
4715 /* Currently SET is the only monadic value for CODE, all
4716 the rest are diadic. */
4717 if (TARGET_USE_MOVT)
4718 arm_emit_movpair (target, GEN_INT (val));
4719 else
4720 emit_set_insn (target, GEN_INT (val));
4721
4722 return 1;
4723 }
4724 else
4725 {
4726 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4727
4728 if (TARGET_USE_MOVT)
4729 arm_emit_movpair (temp, GEN_INT (val));
4730 else
4731 emit_set_insn (temp, GEN_INT (val));
4732
4733 /* For MINUS, the value is subtracted from, since we never
4734 have subtraction of a constant. */
4735 if (code == MINUS)
4736 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4737 else
4738 emit_set_insn (target,
4739 gen_rtx_fmt_ee (code, mode, source, temp));
4740 return 2;
4741 }
4742 }
4743 }
4744
4745 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4746 1);
4747 }
4748
4749 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4750 ARM/THUMB2 immediates, and add up to VAL.
4751 Thr function return value gives the number of insns required. */
4752 static int
4753 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4754 struct four_ints *return_sequence)
4755 {
4756 int best_consecutive_zeros = 0;
4757 int i;
4758 int best_start = 0;
4759 int insns1, insns2;
4760 struct four_ints tmp_sequence;
4761
4762 /* If we aren't targeting ARM, the best place to start is always at
4763 the bottom, otherwise look more closely. */
4764 if (TARGET_ARM)
4765 {
4766 for (i = 0; i < 32; i += 2)
4767 {
4768 int consecutive_zeros = 0;
4769
4770 if (!(val & (3 << i)))
4771 {
4772 while ((i < 32) && !(val & (3 << i)))
4773 {
4774 consecutive_zeros += 2;
4775 i += 2;
4776 }
4777 if (consecutive_zeros > best_consecutive_zeros)
4778 {
4779 best_consecutive_zeros = consecutive_zeros;
4780 best_start = i - consecutive_zeros;
4781 }
4782 i -= 2;
4783 }
4784 }
4785 }
4786
4787 /* So long as it won't require any more insns to do so, it's
4788 desirable to emit a small constant (in bits 0...9) in the last
4789 insn. This way there is more chance that it can be combined with
4790 a later addressing insn to form a pre-indexed load or store
4791 operation. Consider:
4792
4793 *((volatile int *)0xe0000100) = 1;
4794 *((volatile int *)0xe0000110) = 2;
4795
4796 We want this to wind up as:
4797
4798 mov rA, #0xe0000000
4799 mov rB, #1
4800 str rB, [rA, #0x100]
4801 mov rB, #2
4802 str rB, [rA, #0x110]
4803
4804 rather than having to synthesize both large constants from scratch.
4805
4806 Therefore, we calculate how many insns would be required to emit
4807 the constant starting from `best_start', and also starting from
4808 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4809 yield a shorter sequence, we may as well use zero. */
4810 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4811 if (best_start != 0
4812 && ((HOST_WIDE_INT_1U << best_start) < val))
4813 {
4814 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4815 if (insns2 <= insns1)
4816 {
4817 *return_sequence = tmp_sequence;
4818 insns1 = insns2;
4819 }
4820 }
4821
4822 return insns1;
4823 }
4824
4825 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4826 static int
4827 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4828 struct four_ints *return_sequence, int i)
4829 {
4830 int remainder = val & 0xffffffff;
4831 int insns = 0;
4832
4833 /* Try and find a way of doing the job in either two or three
4834 instructions.
4835
4836 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4837 location. We start at position I. This may be the MSB, or
4838 optimial_immediate_sequence may have positioned it at the largest block
4839 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4840 wrapping around to the top of the word when we drop off the bottom.
4841 In the worst case this code should produce no more than four insns.
4842
4843 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4844 constants, shifted to any arbitrary location. We should always start
4845 at the MSB. */
4846 do
4847 {
4848 int end;
4849 unsigned int b1, b2, b3, b4;
4850 unsigned HOST_WIDE_INT result;
4851 int loc;
4852
4853 gcc_assert (insns < 4);
4854
4855 if (i <= 0)
4856 i += 32;
4857
4858 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4859 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4860 {
4861 loc = i;
4862 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4863 /* We can use addw/subw for the last 12 bits. */
4864 result = remainder;
4865 else
4866 {
4867 /* Use an 8-bit shifted/rotated immediate. */
4868 end = i - 8;
4869 if (end < 0)
4870 end += 32;
4871 result = remainder & ((0x0ff << end)
4872 | ((i < end) ? (0xff >> (32 - end))
4873 : 0));
4874 i -= 8;
4875 }
4876 }
4877 else
4878 {
4879 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4880 arbitrary shifts. */
4881 i -= TARGET_ARM ? 2 : 1;
4882 continue;
4883 }
4884
4885 /* Next, see if we can do a better job with a thumb2 replicated
4886 constant.
4887
4888 We do it this way around to catch the cases like 0x01F001E0 where
4889 two 8-bit immediates would work, but a replicated constant would
4890 make it worse.
4891
4892 TODO: 16-bit constants that don't clear all the bits, but still win.
4893 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4894 if (TARGET_THUMB2)
4895 {
4896 b1 = (remainder & 0xff000000) >> 24;
4897 b2 = (remainder & 0x00ff0000) >> 16;
4898 b3 = (remainder & 0x0000ff00) >> 8;
4899 b4 = remainder & 0xff;
4900
4901 if (loc > 24)
4902 {
4903 /* The 8-bit immediate already found clears b1 (and maybe b2),
4904 but must leave b3 and b4 alone. */
4905
4906 /* First try to find a 32-bit replicated constant that clears
4907 almost everything. We can assume that we can't do it in one,
4908 or else we wouldn't be here. */
4909 unsigned int tmp = b1 & b2 & b3 & b4;
4910 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4911 + (tmp << 24);
4912 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4913 + (tmp == b3) + (tmp == b4);
4914 if (tmp
4915 && (matching_bytes >= 3
4916 || (matching_bytes == 2
4917 && const_ok_for_op (remainder & ~tmp2, code))))
4918 {
4919 /* At least 3 of the bytes match, and the fourth has at
4920 least as many bits set, or two of the bytes match
4921 and it will only require one more insn to finish. */
4922 result = tmp2;
4923 i = tmp != b1 ? 32
4924 : tmp != b2 ? 24
4925 : tmp != b3 ? 16
4926 : 8;
4927 }
4928
4929 /* Second, try to find a 16-bit replicated constant that can
4930 leave three of the bytes clear. If b2 or b4 is already
4931 zero, then we can. If the 8-bit from above would not
4932 clear b2 anyway, then we still win. */
4933 else if (b1 == b3 && (!b2 || !b4
4934 || (remainder & 0x00ff0000 & ~result)))
4935 {
4936 result = remainder & 0xff00ff00;
4937 i = 24;
4938 }
4939 }
4940 else if (loc > 16)
4941 {
4942 /* The 8-bit immediate already found clears b2 (and maybe b3)
4943 and we don't get here unless b1 is alredy clear, but it will
4944 leave b4 unchanged. */
4945
4946 /* If we can clear b2 and b4 at once, then we win, since the
4947 8-bits couldn't possibly reach that far. */
4948 if (b2 == b4)
4949 {
4950 result = remainder & 0x00ff00ff;
4951 i = 16;
4952 }
4953 }
4954 }
4955
4956 return_sequence->i[insns++] = result;
4957 remainder &= ~result;
4958
4959 if (code == SET || code == MINUS)
4960 code = PLUS;
4961 }
4962 while (remainder);
4963
4964 return insns;
4965 }
4966
4967 /* Emit an instruction with the indicated PATTERN. If COND is
4968 non-NULL, conditionalize the execution of the instruction on COND
4969 being true. */
4970
4971 static void
4972 emit_constant_insn (rtx cond, rtx pattern)
4973 {
4974 if (cond)
4975 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4976 emit_insn (pattern);
4977 }
4978
4979 /* As above, but extra parameter GENERATE which, if clear, suppresses
4980 RTL generation. */
4981
4982 static int
4983 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4984 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4985 int subtargets, int generate)
4986 {
4987 int can_invert = 0;
4988 int can_negate = 0;
4989 int final_invert = 0;
4990 int i;
4991 int set_sign_bit_copies = 0;
4992 int clear_sign_bit_copies = 0;
4993 int clear_zero_bit_copies = 0;
4994 int set_zero_bit_copies = 0;
4995 int insns = 0, neg_insns, inv_insns;
4996 unsigned HOST_WIDE_INT temp1, temp2;
4997 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4998 struct four_ints *immediates;
4999 struct four_ints pos_immediates, neg_immediates, inv_immediates;
5000
5001 /* Find out which operations are safe for a given CODE. Also do a quick
5002 check for degenerate cases; these can occur when DImode operations
5003 are split. */
5004 switch (code)
5005 {
5006 case SET:
5007 can_invert = 1;
5008 break;
5009
5010 case PLUS:
5011 can_negate = 1;
5012 break;
5013
5014 case IOR:
5015 if (remainder == 0xffffffff)
5016 {
5017 if (generate)
5018 emit_constant_insn (cond,
5019 gen_rtx_SET (target,
5020 GEN_INT (ARM_SIGN_EXTEND (val))));
5021 return 1;
5022 }
5023
5024 if (remainder == 0)
5025 {
5026 if (reload_completed && rtx_equal_p (target, source))
5027 return 0;
5028
5029 if (generate)
5030 emit_constant_insn (cond, gen_rtx_SET (target, source));
5031 return 1;
5032 }
5033 break;
5034
5035 case AND:
5036 if (remainder == 0)
5037 {
5038 if (generate)
5039 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
5040 return 1;
5041 }
5042 if (remainder == 0xffffffff)
5043 {
5044 if (reload_completed && rtx_equal_p (target, source))
5045 return 0;
5046 if (generate)
5047 emit_constant_insn (cond, gen_rtx_SET (target, source));
5048 return 1;
5049 }
5050 can_invert = 1;
5051 break;
5052
5053 case XOR:
5054 if (remainder == 0)
5055 {
5056 if (reload_completed && rtx_equal_p (target, source))
5057 return 0;
5058 if (generate)
5059 emit_constant_insn (cond, gen_rtx_SET (target, source));
5060 return 1;
5061 }
5062
5063 if (remainder == 0xffffffff)
5064 {
5065 if (generate)
5066 emit_constant_insn (cond,
5067 gen_rtx_SET (target,
5068 gen_rtx_NOT (mode, source)));
5069 return 1;
5070 }
5071 final_invert = 1;
5072 break;
5073
5074 case MINUS:
5075 /* We treat MINUS as (val - source), since (source - val) is always
5076 passed as (source + (-val)). */
5077 if (remainder == 0)
5078 {
5079 if (generate)
5080 emit_constant_insn (cond,
5081 gen_rtx_SET (target,
5082 gen_rtx_NEG (mode, source)));
5083 return 1;
5084 }
5085 if (const_ok_for_arm (val))
5086 {
5087 if (generate)
5088 emit_constant_insn (cond,
5089 gen_rtx_SET (target,
5090 gen_rtx_MINUS (mode, GEN_INT (val),
5091 source)));
5092 return 1;
5093 }
5094
5095 break;
5096
5097 default:
5098 gcc_unreachable ();
5099 }
5100
5101 /* If we can do it in one insn get out quickly. */
5102 if (const_ok_for_op (val, code))
5103 {
5104 if (generate)
5105 emit_constant_insn (cond,
5106 gen_rtx_SET (target,
5107 (source
5108 ? gen_rtx_fmt_ee (code, mode, source,
5109 GEN_INT (val))
5110 : GEN_INT (val))));
5111 return 1;
5112 }
5113
5114 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5115 insn. */
5116 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5117 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5118 {
5119 if (generate)
5120 {
5121 if (mode == SImode && i == 16)
5122 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5123 smaller insn. */
5124 emit_constant_insn (cond,
5125 gen_zero_extendhisi2
5126 (target, gen_lowpart (HImode, source)));
5127 else
5128 /* Extz only supports SImode, but we can coerce the operands
5129 into that mode. */
5130 emit_constant_insn (cond,
5131 gen_extzv_t2 (gen_lowpart (SImode, target),
5132 gen_lowpart (SImode, source),
5133 GEN_INT (i), const0_rtx));
5134 }
5135
5136 return 1;
5137 }
5138
5139 /* Calculate a few attributes that may be useful for specific
5140 optimizations. */
5141 /* Count number of leading zeros. */
5142 for (i = 31; i >= 0; i--)
5143 {
5144 if ((remainder & (1 << i)) == 0)
5145 clear_sign_bit_copies++;
5146 else
5147 break;
5148 }
5149
5150 /* Count number of leading 1's. */
5151 for (i = 31; i >= 0; i--)
5152 {
5153 if ((remainder & (1 << i)) != 0)
5154 set_sign_bit_copies++;
5155 else
5156 break;
5157 }
5158
5159 /* Count number of trailing zero's. */
5160 for (i = 0; i <= 31; i++)
5161 {
5162 if ((remainder & (1 << i)) == 0)
5163 clear_zero_bit_copies++;
5164 else
5165 break;
5166 }
5167
5168 /* Count number of trailing 1's. */
5169 for (i = 0; i <= 31; i++)
5170 {
5171 if ((remainder & (1 << i)) != 0)
5172 set_zero_bit_copies++;
5173 else
5174 break;
5175 }
5176
5177 switch (code)
5178 {
5179 case SET:
5180 /* See if we can do this by sign_extending a constant that is known
5181 to be negative. This is a good, way of doing it, since the shift
5182 may well merge into a subsequent insn. */
5183 if (set_sign_bit_copies > 1)
5184 {
5185 if (const_ok_for_arm
5186 (temp1 = ARM_SIGN_EXTEND (remainder
5187 << (set_sign_bit_copies - 1))))
5188 {
5189 if (generate)
5190 {
5191 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5192 emit_constant_insn (cond,
5193 gen_rtx_SET (new_src, GEN_INT (temp1)));
5194 emit_constant_insn (cond,
5195 gen_ashrsi3 (target, new_src,
5196 GEN_INT (set_sign_bit_copies - 1)));
5197 }
5198 return 2;
5199 }
5200 /* For an inverted constant, we will need to set the low bits,
5201 these will be shifted out of harm's way. */
5202 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5203 if (const_ok_for_arm (~temp1))
5204 {
5205 if (generate)
5206 {
5207 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5208 emit_constant_insn (cond,
5209 gen_rtx_SET (new_src, GEN_INT (temp1)));
5210 emit_constant_insn (cond,
5211 gen_ashrsi3 (target, new_src,
5212 GEN_INT (set_sign_bit_copies - 1)));
5213 }
5214 return 2;
5215 }
5216 }
5217
5218 /* See if we can calculate the value as the difference between two
5219 valid immediates. */
5220 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5221 {
5222 int topshift = clear_sign_bit_copies & ~1;
5223
5224 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5225 & (0xff000000 >> topshift));
5226
5227 /* If temp1 is zero, then that means the 9 most significant
5228 bits of remainder were 1 and we've caused it to overflow.
5229 When topshift is 0 we don't need to do anything since we
5230 can borrow from 'bit 32'. */
5231 if (temp1 == 0 && topshift != 0)
5232 temp1 = 0x80000000 >> (topshift - 1);
5233
5234 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5235
5236 if (const_ok_for_arm (temp2))
5237 {
5238 if (generate)
5239 {
5240 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5241 emit_constant_insn (cond,
5242 gen_rtx_SET (new_src, GEN_INT (temp1)));
5243 emit_constant_insn (cond,
5244 gen_addsi3 (target, new_src,
5245 GEN_INT (-temp2)));
5246 }
5247
5248 return 2;
5249 }
5250 }
5251
5252 /* See if we can generate this by setting the bottom (or the top)
5253 16 bits, and then shifting these into the other half of the
5254 word. We only look for the simplest cases, to do more would cost
5255 too much. Be careful, however, not to generate this when the
5256 alternative would take fewer insns. */
5257 if (val & 0xffff0000)
5258 {
5259 temp1 = remainder & 0xffff0000;
5260 temp2 = remainder & 0x0000ffff;
5261
5262 /* Overlaps outside this range are best done using other methods. */
5263 for (i = 9; i < 24; i++)
5264 {
5265 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5266 && !const_ok_for_arm (temp2))
5267 {
5268 rtx new_src = (subtargets
5269 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5270 : target);
5271 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5272 source, subtargets, generate);
5273 source = new_src;
5274 if (generate)
5275 emit_constant_insn
5276 (cond,
5277 gen_rtx_SET
5278 (target,
5279 gen_rtx_IOR (mode,
5280 gen_rtx_ASHIFT (mode, source,
5281 GEN_INT (i)),
5282 source)));
5283 return insns + 1;
5284 }
5285 }
5286
5287 /* Don't duplicate cases already considered. */
5288 for (i = 17; i < 24; i++)
5289 {
5290 if (((temp1 | (temp1 >> i)) == remainder)
5291 && !const_ok_for_arm (temp1))
5292 {
5293 rtx new_src = (subtargets
5294 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5295 : target);
5296 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5297 source, subtargets, generate);
5298 source = new_src;
5299 if (generate)
5300 emit_constant_insn
5301 (cond,
5302 gen_rtx_SET (target,
5303 gen_rtx_IOR
5304 (mode,
5305 gen_rtx_LSHIFTRT (mode, source,
5306 GEN_INT (i)),
5307 source)));
5308 return insns + 1;
5309 }
5310 }
5311 }
5312 break;
5313
5314 case IOR:
5315 case XOR:
5316 /* If we have IOR or XOR, and the constant can be loaded in a
5317 single instruction, and we can find a temporary to put it in,
5318 then this can be done in two instructions instead of 3-4. */
5319 if (subtargets
5320 /* TARGET can't be NULL if SUBTARGETS is 0 */
5321 || (reload_completed && !reg_mentioned_p (target, source)))
5322 {
5323 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5324 {
5325 if (generate)
5326 {
5327 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5328
5329 emit_constant_insn (cond,
5330 gen_rtx_SET (sub, GEN_INT (val)));
5331 emit_constant_insn (cond,
5332 gen_rtx_SET (target,
5333 gen_rtx_fmt_ee (code, mode,
5334 source, sub)));
5335 }
5336 return 2;
5337 }
5338 }
5339
5340 if (code == XOR)
5341 break;
5342
5343 /* Convert.
5344 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5345 and the remainder 0s for e.g. 0xfff00000)
5346 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5347
5348 This can be done in 2 instructions by using shifts with mov or mvn.
5349 e.g. for
5350 x = x | 0xfff00000;
5351 we generate.
5352 mvn r0, r0, asl #12
5353 mvn r0, r0, lsr #12 */
5354 if (set_sign_bit_copies > 8
5355 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5356 {
5357 if (generate)
5358 {
5359 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5360 rtx shift = GEN_INT (set_sign_bit_copies);
5361
5362 emit_constant_insn
5363 (cond,
5364 gen_rtx_SET (sub,
5365 gen_rtx_NOT (mode,
5366 gen_rtx_ASHIFT (mode,
5367 source,
5368 shift))));
5369 emit_constant_insn
5370 (cond,
5371 gen_rtx_SET (target,
5372 gen_rtx_NOT (mode,
5373 gen_rtx_LSHIFTRT (mode, sub,
5374 shift))));
5375 }
5376 return 2;
5377 }
5378
5379 /* Convert
5380 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5381 to
5382 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5383
5384 For eg. r0 = r0 | 0xfff
5385 mvn r0, r0, lsr #12
5386 mvn r0, r0, asl #12
5387
5388 */
5389 if (set_zero_bit_copies > 8
5390 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5391 {
5392 if (generate)
5393 {
5394 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5395 rtx shift = GEN_INT (set_zero_bit_copies);
5396
5397 emit_constant_insn
5398 (cond,
5399 gen_rtx_SET (sub,
5400 gen_rtx_NOT (mode,
5401 gen_rtx_LSHIFTRT (mode,
5402 source,
5403 shift))));
5404 emit_constant_insn
5405 (cond,
5406 gen_rtx_SET (target,
5407 gen_rtx_NOT (mode,
5408 gen_rtx_ASHIFT (mode, sub,
5409 shift))));
5410 }
5411 return 2;
5412 }
5413
5414 /* This will never be reached for Thumb2 because orn is a valid
5415 instruction. This is for Thumb1 and the ARM 32 bit cases.
5416
5417 x = y | constant (such that ~constant is a valid constant)
5418 Transform this to
5419 x = ~(~y & ~constant).
5420 */
5421 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5422 {
5423 if (generate)
5424 {
5425 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5426 emit_constant_insn (cond,
5427 gen_rtx_SET (sub,
5428 gen_rtx_NOT (mode, source)));
5429 source = sub;
5430 if (subtargets)
5431 sub = gen_reg_rtx (mode);
5432 emit_constant_insn (cond,
5433 gen_rtx_SET (sub,
5434 gen_rtx_AND (mode, source,
5435 GEN_INT (temp1))));
5436 emit_constant_insn (cond,
5437 gen_rtx_SET (target,
5438 gen_rtx_NOT (mode, sub)));
5439 }
5440 return 3;
5441 }
5442 break;
5443
5444 case AND:
5445 /* See if two shifts will do 2 or more insn's worth of work. */
5446 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5447 {
5448 HOST_WIDE_INT shift_mask = ((0xffffffff
5449 << (32 - clear_sign_bit_copies))
5450 & 0xffffffff);
5451
5452 if ((remainder | shift_mask) != 0xffffffff)
5453 {
5454 HOST_WIDE_INT new_val
5455 = ARM_SIGN_EXTEND (remainder | shift_mask);
5456
5457 if (generate)
5458 {
5459 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5460 insns = arm_gen_constant (AND, SImode, cond, new_val,
5461 new_src, source, subtargets, 1);
5462 source = new_src;
5463 }
5464 else
5465 {
5466 rtx targ = subtargets ? NULL_RTX : target;
5467 insns = arm_gen_constant (AND, mode, cond, new_val,
5468 targ, source, subtargets, 0);
5469 }
5470 }
5471
5472 if (generate)
5473 {
5474 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5475 rtx shift = GEN_INT (clear_sign_bit_copies);
5476
5477 emit_insn (gen_ashlsi3 (new_src, source, shift));
5478 emit_insn (gen_lshrsi3 (target, new_src, shift));
5479 }
5480
5481 return insns + 2;
5482 }
5483
5484 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5485 {
5486 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5487
5488 if ((remainder | shift_mask) != 0xffffffff)
5489 {
5490 HOST_WIDE_INT new_val
5491 = ARM_SIGN_EXTEND (remainder | shift_mask);
5492 if (generate)
5493 {
5494 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5495
5496 insns = arm_gen_constant (AND, mode, cond, new_val,
5497 new_src, source, subtargets, 1);
5498 source = new_src;
5499 }
5500 else
5501 {
5502 rtx targ = subtargets ? NULL_RTX : target;
5503
5504 insns = arm_gen_constant (AND, mode, cond, new_val,
5505 targ, source, subtargets, 0);
5506 }
5507 }
5508
5509 if (generate)
5510 {
5511 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5512 rtx shift = GEN_INT (clear_zero_bit_copies);
5513
5514 emit_insn (gen_lshrsi3 (new_src, source, shift));
5515 emit_insn (gen_ashlsi3 (target, new_src, shift));
5516 }
5517
5518 return insns + 2;
5519 }
5520
5521 break;
5522
5523 default:
5524 break;
5525 }
5526
5527 /* Calculate what the instruction sequences would be if we generated it
5528 normally, negated, or inverted. */
5529 if (code == AND)
5530 /* AND cannot be split into multiple insns, so invert and use BIC. */
5531 insns = 99;
5532 else
5533 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5534
5535 if (can_negate)
5536 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5537 &neg_immediates);
5538 else
5539 neg_insns = 99;
5540
5541 if (can_invert || final_invert)
5542 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5543 &inv_immediates);
5544 else
5545 inv_insns = 99;
5546
5547 immediates = &pos_immediates;
5548
5549 /* Is the negated immediate sequence more efficient? */
5550 if (neg_insns < insns && neg_insns <= inv_insns)
5551 {
5552 insns = neg_insns;
5553 immediates = &neg_immediates;
5554 }
5555 else
5556 can_negate = 0;
5557
5558 /* Is the inverted immediate sequence more efficient?
5559 We must allow for an extra NOT instruction for XOR operations, although
5560 there is some chance that the final 'mvn' will get optimized later. */
5561 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5562 {
5563 insns = inv_insns;
5564 immediates = &inv_immediates;
5565 }
5566 else
5567 {
5568 can_invert = 0;
5569 final_invert = 0;
5570 }
5571
5572 /* Now output the chosen sequence as instructions. */
5573 if (generate)
5574 {
5575 for (i = 0; i < insns; i++)
5576 {
5577 rtx new_src, temp1_rtx;
5578
5579 temp1 = immediates->i[i];
5580
5581 if (code == SET || code == MINUS)
5582 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5583 else if ((final_invert || i < (insns - 1)) && subtargets)
5584 new_src = gen_reg_rtx (mode);
5585 else
5586 new_src = target;
5587
5588 if (can_invert)
5589 temp1 = ~temp1;
5590 else if (can_negate)
5591 temp1 = -temp1;
5592
5593 temp1 = trunc_int_for_mode (temp1, mode);
5594 temp1_rtx = GEN_INT (temp1);
5595
5596 if (code == SET)
5597 ;
5598 else if (code == MINUS)
5599 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5600 else
5601 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5602
5603 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5604 source = new_src;
5605
5606 if (code == SET)
5607 {
5608 can_negate = can_invert;
5609 can_invert = 0;
5610 code = PLUS;
5611 }
5612 else if (code == MINUS)
5613 code = PLUS;
5614 }
5615 }
5616
5617 if (final_invert)
5618 {
5619 if (generate)
5620 emit_constant_insn (cond, gen_rtx_SET (target,
5621 gen_rtx_NOT (mode, source)));
5622 insns++;
5623 }
5624
5625 return insns;
5626 }
5627
5628 /* Return TRUE if op is a constant where both the low and top words are
5629 suitable for RSB/RSC instructions. This is never true for Thumb, since
5630 we do not have RSC in that case. */
5631 static bool
5632 arm_const_double_prefer_rsbs_rsc (rtx op)
5633 {
5634 /* Thumb lacks RSC, so we never prefer that sequence. */
5635 if (TARGET_THUMB || !CONST_INT_P (op))
5636 return false;
5637 HOST_WIDE_INT hi, lo;
5638 lo = UINTVAL (op) & 0xffffffffULL;
5639 hi = UINTVAL (op) >> 32;
5640 return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5641 }
5642
5643 /* Canonicalize a comparison so that we are more likely to recognize it.
5644 This can be done for a few constant compares, where we can make the
5645 immediate value easier to load. */
5646
5647 static void
5648 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5649 bool op0_preserve_value)
5650 {
5651 machine_mode mode;
5652 unsigned HOST_WIDE_INT i, maxval;
5653
5654 mode = GET_MODE (*op0);
5655 if (mode == VOIDmode)
5656 mode = GET_MODE (*op1);
5657
5658 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5659
5660 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5661 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5662 either reversed or (for constant OP1) adjusted to GE/LT.
5663 Similarly for GTU/LEU in Thumb mode. */
5664 if (mode == DImode)
5665 {
5666
5667 if (*code == GT || *code == LE
5668 || *code == GTU || *code == LEU)
5669 {
5670 /* Missing comparison. First try to use an available
5671 comparison. */
5672 if (CONST_INT_P (*op1))
5673 {
5674 i = INTVAL (*op1);
5675 switch (*code)
5676 {
5677 case GT:
5678 case LE:
5679 if (i != maxval)
5680 {
5681 /* Try to convert to GE/LT, unless that would be more
5682 expensive. */
5683 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5684 && arm_const_double_prefer_rsbs_rsc (*op1))
5685 return;
5686 *op1 = GEN_INT (i + 1);
5687 *code = *code == GT ? GE : LT;
5688 }
5689 else
5690 {
5691 /* GT maxval is always false, LE maxval is always true.
5692 We can't fold that away here as we must make a
5693 comparison, but we can fold them to comparisons
5694 with the same result that can be handled:
5695 op0 GT maxval -> op0 LT minval
5696 op0 LE maxval -> op0 GE minval
5697 where minval = (-maxval - 1). */
5698 *op1 = GEN_INT (-maxval - 1);
5699 *code = *code == GT ? LT : GE;
5700 }
5701 return;
5702
5703 case GTU:
5704 case LEU:
5705 if (i != ~((unsigned HOST_WIDE_INT) 0))
5706 {
5707 /* Try to convert to GEU/LTU, unless that would
5708 be more expensive. */
5709 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5710 && arm_const_double_prefer_rsbs_rsc (*op1))
5711 return;
5712 *op1 = GEN_INT (i + 1);
5713 *code = *code == GTU ? GEU : LTU;
5714 }
5715 else
5716 {
5717 /* GTU ~0 is always false, LEU ~0 is always true.
5718 We can't fold that away here as we must make a
5719 comparison, but we can fold them to comparisons
5720 with the same result that can be handled:
5721 op0 GTU ~0 -> op0 LTU 0
5722 op0 LEU ~0 -> op0 GEU 0. */
5723 *op1 = const0_rtx;
5724 *code = *code == GTU ? LTU : GEU;
5725 }
5726 return;
5727
5728 default:
5729 gcc_unreachable ();
5730 }
5731 }
5732
5733 if (!op0_preserve_value)
5734 {
5735 std::swap (*op0, *op1);
5736 *code = (int)swap_condition ((enum rtx_code)*code);
5737 }
5738 }
5739 return;
5740 }
5741
5742 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5743 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5744 to facilitate possible combining with a cmp into 'ands'. */
5745 if (mode == SImode
5746 && GET_CODE (*op0) == ZERO_EXTEND
5747 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5748 && GET_MODE (XEXP (*op0, 0)) == QImode
5749 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5750 && subreg_lowpart_p (XEXP (*op0, 0))
5751 && *op1 == const0_rtx)
5752 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5753 GEN_INT (255));
5754
5755 /* Comparisons smaller than DImode. Only adjust comparisons against
5756 an out-of-range constant. */
5757 if (!CONST_INT_P (*op1)
5758 || const_ok_for_arm (INTVAL (*op1))
5759 || const_ok_for_arm (- INTVAL (*op1)))
5760 return;
5761
5762 i = INTVAL (*op1);
5763
5764 switch (*code)
5765 {
5766 case EQ:
5767 case NE:
5768 return;
5769
5770 case GT:
5771 case LE:
5772 if (i != maxval
5773 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5774 {
5775 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5776 *code = *code == GT ? GE : LT;
5777 return;
5778 }
5779 break;
5780
5781 case GE:
5782 case LT:
5783 if (i != ~maxval
5784 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5785 {
5786 *op1 = GEN_INT (i - 1);
5787 *code = *code == GE ? GT : LE;
5788 return;
5789 }
5790 break;
5791
5792 case GTU:
5793 case LEU:
5794 if (i != ~((unsigned HOST_WIDE_INT) 0)
5795 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5796 {
5797 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5798 *code = *code == GTU ? GEU : LTU;
5799 return;
5800 }
5801 break;
5802
5803 case GEU:
5804 case LTU:
5805 if (i != 0
5806 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5807 {
5808 *op1 = GEN_INT (i - 1);
5809 *code = *code == GEU ? GTU : LEU;
5810 return;
5811 }
5812 break;
5813
5814 default:
5815 gcc_unreachable ();
5816 }
5817 }
5818
5819
5820 /* Define how to find the value returned by a function. */
5821
5822 static rtx
5823 arm_function_value(const_tree type, const_tree func,
5824 bool outgoing ATTRIBUTE_UNUSED)
5825 {
5826 machine_mode mode;
5827 int unsignedp ATTRIBUTE_UNUSED;
5828 rtx r ATTRIBUTE_UNUSED;
5829
5830 mode = TYPE_MODE (type);
5831
5832 if (TARGET_AAPCS_BASED)
5833 return aapcs_allocate_return_reg (mode, type, func);
5834
5835 /* Promote integer types. */
5836 if (INTEGRAL_TYPE_P (type))
5837 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5838
5839 /* Promotes small structs returned in a register to full-word size
5840 for big-endian AAPCS. */
5841 if (arm_return_in_msb (type))
5842 {
5843 HOST_WIDE_INT size = int_size_in_bytes (type);
5844 if (size % UNITS_PER_WORD != 0)
5845 {
5846 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5847 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5848 }
5849 }
5850
5851 return arm_libcall_value_1 (mode);
5852 }
5853
5854 /* libcall hashtable helpers. */
5855
5856 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5857 {
5858 static inline hashval_t hash (const rtx_def *);
5859 static inline bool equal (const rtx_def *, const rtx_def *);
5860 static inline void remove (rtx_def *);
5861 };
5862
5863 inline bool
5864 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5865 {
5866 return rtx_equal_p (p1, p2);
5867 }
5868
5869 inline hashval_t
5870 libcall_hasher::hash (const rtx_def *p1)
5871 {
5872 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5873 }
5874
5875 typedef hash_table<libcall_hasher> libcall_table_type;
5876
5877 static void
5878 add_libcall (libcall_table_type *htab, rtx libcall)
5879 {
5880 *htab->find_slot (libcall, INSERT) = libcall;
5881 }
5882
5883 static bool
5884 arm_libcall_uses_aapcs_base (const_rtx libcall)
5885 {
5886 static bool init_done = false;
5887 static libcall_table_type *libcall_htab = NULL;
5888
5889 if (!init_done)
5890 {
5891 init_done = true;
5892
5893 libcall_htab = new libcall_table_type (31);
5894 add_libcall (libcall_htab,
5895 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5896 add_libcall (libcall_htab,
5897 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5898 add_libcall (libcall_htab,
5899 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5900 add_libcall (libcall_htab,
5901 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5902
5903 add_libcall (libcall_htab,
5904 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5905 add_libcall (libcall_htab,
5906 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5907 add_libcall (libcall_htab,
5908 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5909 add_libcall (libcall_htab,
5910 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5911
5912 add_libcall (libcall_htab,
5913 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5914 add_libcall (libcall_htab,
5915 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5916 add_libcall (libcall_htab,
5917 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5918 add_libcall (libcall_htab,
5919 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5920 add_libcall (libcall_htab,
5921 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5922 add_libcall (libcall_htab,
5923 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5924 add_libcall (libcall_htab,
5925 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5926 add_libcall (libcall_htab,
5927 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5928 add_libcall (libcall_htab,
5929 convert_optab_libfunc (sfix_optab, SImode, SFmode));
5930 add_libcall (libcall_htab,
5931 convert_optab_libfunc (ufix_optab, SImode, SFmode));
5932
5933 /* Values from double-precision helper functions are returned in core
5934 registers if the selected core only supports single-precision
5935 arithmetic, even if we are using the hard-float ABI. The same is
5936 true for single-precision helpers except in case of MVE, because in
5937 MVE we will be using the hard-float ABI on a CPU which doesn't support
5938 single-precision operations in hardware. In MVE the following check
5939 enables use of emulation for the single-precision arithmetic
5940 operations. */
5941 if (TARGET_HAVE_MVE)
5942 {
5943 add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5944 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5945 add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5946 add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5947 add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5948 add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5949 add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5950 add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5951 add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5952 add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5953 add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5954 }
5955 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5956 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5957 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5958 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5959 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5960 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5961 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5962 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5963 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5964 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5965 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5966 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5967 SFmode));
5968 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5969 DFmode));
5970 add_libcall (libcall_htab,
5971 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5972 }
5973
5974 return libcall && libcall_htab->find (libcall) != NULL;
5975 }
5976
5977 static rtx
5978 arm_libcall_value_1 (machine_mode mode)
5979 {
5980 if (TARGET_AAPCS_BASED)
5981 return aapcs_libcall_value (mode);
5982 else if (TARGET_IWMMXT_ABI
5983 && arm_vector_mode_supported_p (mode))
5984 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5985 else
5986 return gen_rtx_REG (mode, ARG_REGISTER (1));
5987 }
5988
5989 /* Define how to find the value returned by a library function
5990 assuming the value has mode MODE. */
5991
5992 static rtx
5993 arm_libcall_value (machine_mode mode, const_rtx libcall)
5994 {
5995 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5996 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5997 {
5998 /* The following libcalls return their result in integer registers,
5999 even though they return a floating point value. */
6000 if (arm_libcall_uses_aapcs_base (libcall))
6001 return gen_rtx_REG (mode, ARG_REGISTER(1));
6002
6003 }
6004
6005 return arm_libcall_value_1 (mode);
6006 }
6007
6008 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
6009
6010 static bool
6011 arm_function_value_regno_p (const unsigned int regno)
6012 {
6013 if (regno == ARG_REGISTER (1)
6014 || (TARGET_32BIT
6015 && TARGET_AAPCS_BASED
6016 && TARGET_HARD_FLOAT
6017 && regno == FIRST_VFP_REGNUM)
6018 || (TARGET_IWMMXT_ABI
6019 && regno == FIRST_IWMMXT_REGNUM))
6020 return true;
6021
6022 return false;
6023 }
6024
6025 /* Determine the amount of memory needed to store the possible return
6026 registers of an untyped call. */
6027 int
6028 arm_apply_result_size (void)
6029 {
6030 int size = 16;
6031
6032 if (TARGET_32BIT)
6033 {
6034 if (TARGET_HARD_FLOAT_ABI)
6035 size += 32;
6036 if (TARGET_IWMMXT_ABI)
6037 size += 8;
6038 }
6039
6040 return size;
6041 }
6042
6043 /* Decide whether TYPE should be returned in memory (true)
6044 or in a register (false). FNTYPE is the type of the function making
6045 the call. */
6046 static bool
6047 arm_return_in_memory (const_tree type, const_tree fntype)
6048 {
6049 HOST_WIDE_INT size;
6050
6051 size = int_size_in_bytes (type); /* Negative if not fixed size. */
6052
6053 if (TARGET_AAPCS_BASED)
6054 {
6055 /* Simple, non-aggregate types (ie not including vectors and
6056 complex) are always returned in a register (or registers).
6057 We don't care about which register here, so we can short-cut
6058 some of the detail. */
6059 if (!AGGREGATE_TYPE_P (type)
6060 && TREE_CODE (type) != VECTOR_TYPE
6061 && TREE_CODE (type) != COMPLEX_TYPE)
6062 return false;
6063
6064 /* Any return value that is no larger than one word can be
6065 returned in r0. */
6066 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
6067 return false;
6068
6069 /* Check any available co-processors to see if they accept the
6070 type as a register candidate (VFP, for example, can return
6071 some aggregates in consecutive registers). These aren't
6072 available if the call is variadic. */
6073 if (aapcs_select_return_coproc (type, fntype) >= 0)
6074 return false;
6075
6076 /* Vector values should be returned using ARM registers, not
6077 memory (unless they're over 16 bytes, which will break since
6078 we only have four call-clobbered registers to play with). */
6079 if (TREE_CODE (type) == VECTOR_TYPE)
6080 return (size < 0 || size > (4 * UNITS_PER_WORD));
6081
6082 /* The rest go in memory. */
6083 return true;
6084 }
6085
6086 if (TREE_CODE (type) == VECTOR_TYPE)
6087 return (size < 0 || size > (4 * UNITS_PER_WORD));
6088
6089 if (!AGGREGATE_TYPE_P (type) &&
6090 (TREE_CODE (type) != VECTOR_TYPE))
6091 /* All simple types are returned in registers. */
6092 return false;
6093
6094 if (arm_abi != ARM_ABI_APCS)
6095 {
6096 /* ATPCS and later return aggregate types in memory only if they are
6097 larger than a word (or are variable size). */
6098 return (size < 0 || size > UNITS_PER_WORD);
6099 }
6100
6101 /* For the arm-wince targets we choose to be compatible with Microsoft's
6102 ARM and Thumb compilers, which always return aggregates in memory. */
6103 #ifndef ARM_WINCE
6104 /* All structures/unions bigger than one word are returned in memory.
6105 Also catch the case where int_size_in_bytes returns -1. In this case
6106 the aggregate is either huge or of variable size, and in either case
6107 we will want to return it via memory and not in a register. */
6108 if (size < 0 || size > UNITS_PER_WORD)
6109 return true;
6110
6111 if (TREE_CODE (type) == RECORD_TYPE)
6112 {
6113 tree field;
6114
6115 /* For a struct the APCS says that we only return in a register
6116 if the type is 'integer like' and every addressable element
6117 has an offset of zero. For practical purposes this means
6118 that the structure can have at most one non bit-field element
6119 and that this element must be the first one in the structure. */
6120
6121 /* Find the first field, ignoring non FIELD_DECL things which will
6122 have been created by C++. */
6123 /* NOTE: This code is deprecated and has not been updated to handle
6124 DECL_FIELD_ABI_IGNORED. */
6125 for (field = TYPE_FIELDS (type);
6126 field && TREE_CODE (field) != FIELD_DECL;
6127 field = DECL_CHAIN (field))
6128 continue;
6129
6130 if (field == NULL)
6131 return false; /* An empty structure. Allowed by an extension to ANSI C. */
6132
6133 /* Check that the first field is valid for returning in a register. */
6134
6135 /* ... Floats are not allowed */
6136 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6137 return true;
6138
6139 /* ... Aggregates that are not themselves valid for returning in
6140 a register are not allowed. */
6141 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6142 return true;
6143
6144 /* Now check the remaining fields, if any. Only bitfields are allowed,
6145 since they are not addressable. */
6146 for (field = DECL_CHAIN (field);
6147 field;
6148 field = DECL_CHAIN (field))
6149 {
6150 if (TREE_CODE (field) != FIELD_DECL)
6151 continue;
6152
6153 if (!DECL_BIT_FIELD_TYPE (field))
6154 return true;
6155 }
6156
6157 return false;
6158 }
6159
6160 if (TREE_CODE (type) == UNION_TYPE)
6161 {
6162 tree field;
6163
6164 /* Unions can be returned in registers if every element is
6165 integral, or can be returned in an integer register. */
6166 for (field = TYPE_FIELDS (type);
6167 field;
6168 field = DECL_CHAIN (field))
6169 {
6170 if (TREE_CODE (field) != FIELD_DECL)
6171 continue;
6172
6173 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6174 return true;
6175
6176 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6177 return true;
6178 }
6179
6180 return false;
6181 }
6182 #endif /* not ARM_WINCE */
6183
6184 /* Return all other types in memory. */
6185 return true;
6186 }
6187
6188 const struct pcs_attribute_arg
6189 {
6190 const char *arg;
6191 enum arm_pcs value;
6192 } pcs_attribute_args[] =
6193 {
6194 {"aapcs", ARM_PCS_AAPCS},
6195 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6196 #if 0
6197 /* We could recognize these, but changes would be needed elsewhere
6198 * to implement them. */
6199 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6200 {"atpcs", ARM_PCS_ATPCS},
6201 {"apcs", ARM_PCS_APCS},
6202 #endif
6203 {NULL, ARM_PCS_UNKNOWN}
6204 };
6205
6206 static enum arm_pcs
6207 arm_pcs_from_attribute (tree attr)
6208 {
6209 const struct pcs_attribute_arg *ptr;
6210 const char *arg;
6211
6212 /* Get the value of the argument. */
6213 if (TREE_VALUE (attr) == NULL_TREE
6214 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6215 return ARM_PCS_UNKNOWN;
6216
6217 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6218
6219 /* Check it against the list of known arguments. */
6220 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6221 if (streq (arg, ptr->arg))
6222 return ptr->value;
6223
6224 /* An unrecognized interrupt type. */
6225 return ARM_PCS_UNKNOWN;
6226 }
6227
6228 /* Get the PCS variant to use for this call. TYPE is the function's type
6229 specification, DECL is the specific declartion. DECL may be null if
6230 the call could be indirect or if this is a library call. */
6231 static enum arm_pcs
6232 arm_get_pcs_model (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
6233 {
6234 bool user_convention = false;
6235 enum arm_pcs user_pcs = arm_pcs_default;
6236 tree attr;
6237
6238 gcc_assert (type);
6239
6240 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6241 if (attr)
6242 {
6243 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6244 user_convention = true;
6245 }
6246
6247 if (TARGET_AAPCS_BASED)
6248 {
6249 /* Detect varargs functions. These always use the base rules
6250 (no argument is ever a candidate for a co-processor
6251 register). */
6252 bool base_rules = stdarg_p (type);
6253
6254 if (user_convention)
6255 {
6256 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6257 sorry ("non-AAPCS derived PCS variant");
6258 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6259 error ("variadic functions must use the base AAPCS variant");
6260 }
6261
6262 if (base_rules)
6263 return ARM_PCS_AAPCS;
6264 else if (user_convention)
6265 return user_pcs;
6266 #if 0
6267 /* Unfortunately, this is not safe and can lead to wrong code
6268 being generated (PR96882). Not all calls into the back-end
6269 pass the DECL, so it is unsafe to make any PCS-changing
6270 decisions based on it. In particular the RETURN_IN_MEMORY
6271 hook is only ever passed a TYPE. This needs revisiting to
6272 see if there are any partial improvements that can be
6273 re-enabled. */
6274 else if (decl && flag_unit_at_a_time)
6275 {
6276 /* Local functions never leak outside this compilation unit,
6277 so we are free to use whatever conventions are
6278 appropriate. */
6279 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6280 cgraph_node *local_info_node
6281 = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6282 if (local_info_node && local_info_node->local)
6283 return ARM_PCS_AAPCS_LOCAL;
6284 }
6285 #endif
6286 }
6287 else if (user_convention && user_pcs != arm_pcs_default)
6288 sorry ("PCS variant");
6289
6290 /* For everything else we use the target's default. */
6291 return arm_pcs_default;
6292 }
6293
6294
6295 static void
6296 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6297 const_tree fntype ATTRIBUTE_UNUSED,
6298 rtx libcall ATTRIBUTE_UNUSED,
6299 const_tree fndecl ATTRIBUTE_UNUSED)
6300 {
6301 /* Record the unallocated VFP registers. */
6302 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6303 pcum->aapcs_vfp_reg_alloc = 0;
6304 }
6305
6306 /* Bitmasks that indicate whether earlier versions of GCC would have
6307 taken a different path through the ABI logic. This should result in
6308 a -Wpsabi warning if the earlier path led to a different ABI decision.
6309
6310 WARN_PSABI_EMPTY_CXX17_BASE
6311 Indicates that the type includes an artificial empty C++17 base field
6312 that, prior to GCC 10.1, would prevent the type from being treated as
6313 a HFA or HVA. See PR94711 for details.
6314
6315 WARN_PSABI_NO_UNIQUE_ADDRESS
6316 Indicates that the type includes an empty [[no_unique_address]] field
6317 that, prior to GCC 10.1, would prevent the type from being treated as
6318 a HFA or HVA. */
6319 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6320 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6321 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD = 1U << 2;
6322
6323 /* Walk down the type tree of TYPE counting consecutive base elements.
6324 If *MODEP is VOIDmode, then set it to the first valid floating point
6325 type. If a non-floating point type is found, or if a floating point
6326 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6327 otherwise return the count in the sub-tree.
6328
6329 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6330 function has changed its behavior relative to earlier versions of GCC.
6331 Normally the argument should be nonnull and point to a zero-initialized
6332 variable. The function then records whether the ABI decision might
6333 be affected by a known fix to the ABI logic, setting the associated
6334 WARN_PSABI_* bits if so.
6335
6336 When the argument is instead a null pointer, the function tries to
6337 simulate the behavior of GCC before all such ABI fixes were made.
6338 This is useful to check whether the function returns something
6339 different after the ABI fixes. */
6340 static int
6341 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6342 unsigned int *warn_psabi_flags)
6343 {
6344 machine_mode mode;
6345 HOST_WIDE_INT size;
6346
6347 switch (TREE_CODE (type))
6348 {
6349 case REAL_TYPE:
6350 mode = TYPE_MODE (type);
6351 if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6352 return -1;
6353
6354 if (*modep == VOIDmode)
6355 *modep = mode;
6356
6357 if (*modep == mode)
6358 return 1;
6359
6360 break;
6361
6362 case COMPLEX_TYPE:
6363 mode = TYPE_MODE (TREE_TYPE (type));
6364 if (mode != DFmode && mode != SFmode)
6365 return -1;
6366
6367 if (*modep == VOIDmode)
6368 *modep = mode;
6369
6370 if (*modep == mode)
6371 return 2;
6372
6373 break;
6374
6375 case VECTOR_TYPE:
6376 /* Use V2SImode and V4SImode as representatives of all 64-bit
6377 and 128-bit vector types, whether or not those modes are
6378 supported with the present options. */
6379 size = int_size_in_bytes (type);
6380 switch (size)
6381 {
6382 case 8:
6383 mode = V2SImode;
6384 break;
6385 case 16:
6386 mode = V4SImode;
6387 break;
6388 default:
6389 return -1;
6390 }
6391
6392 if (*modep == VOIDmode)
6393 *modep = mode;
6394
6395 /* Vector modes are considered to be opaque: two vectors are
6396 equivalent for the purposes of being homogeneous aggregates
6397 if they are the same size. */
6398 if (*modep == mode)
6399 return 1;
6400
6401 break;
6402
6403 case ARRAY_TYPE:
6404 {
6405 int count;
6406 tree index = TYPE_DOMAIN (type);
6407
6408 /* Can't handle incomplete types nor sizes that are not
6409 fixed. */
6410 if (!COMPLETE_TYPE_P (type)
6411 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6412 return -1;
6413
6414 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6415 warn_psabi_flags);
6416 if (count == -1
6417 || !index
6418 || !TYPE_MAX_VALUE (index)
6419 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6420 || !TYPE_MIN_VALUE (index)
6421 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6422 || count < 0)
6423 return -1;
6424
6425 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6426 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6427
6428 /* There must be no padding. */
6429 if (wi::to_wide (TYPE_SIZE (type))
6430 != count * GET_MODE_BITSIZE (*modep))
6431 return -1;
6432
6433 return count;
6434 }
6435
6436 case RECORD_TYPE:
6437 {
6438 int count = 0;
6439 int sub_count;
6440 tree field;
6441
6442 /* Can't handle incomplete types nor sizes that are not
6443 fixed. */
6444 if (!COMPLETE_TYPE_P (type)
6445 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6446 return -1;
6447
6448 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6449 {
6450 if (TREE_CODE (field) != FIELD_DECL)
6451 continue;
6452
6453 if (DECL_FIELD_ABI_IGNORED (field))
6454 {
6455 /* See whether this is something that earlier versions of
6456 GCC failed to ignore. */
6457 unsigned int flag;
6458 if (lookup_attribute ("no_unique_address",
6459 DECL_ATTRIBUTES (field)))
6460 flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6461 else if (cxx17_empty_base_field_p (field))
6462 flag = WARN_PSABI_EMPTY_CXX17_BASE;
6463 else
6464 /* No compatibility problem. */
6465 continue;
6466
6467 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6468 if (warn_psabi_flags)
6469 {
6470 *warn_psabi_flags |= flag;
6471 continue;
6472 }
6473 }
6474 /* A zero-width bitfield may affect layout in some
6475 circumstances, but adds no members. The determination
6476 of whether or not a type is an HFA is performed after
6477 layout is complete, so if the type still looks like an
6478 HFA afterwards, it is still classed as one. This is
6479 potentially an ABI break for the hard-float ABI. */
6480 else if (DECL_BIT_FIELD (field)
6481 && integer_zerop (DECL_SIZE (field)))
6482 {
6483 /* Prior to GCC-12 these fields were striped early,
6484 hiding them from the back-end entirely and
6485 resulting in the correct behaviour for argument
6486 passing. Simulate that old behaviour without
6487 generating a warning. */
6488 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
6489 continue;
6490 if (warn_psabi_flags)
6491 {
6492 *warn_psabi_flags |= WARN_PSABI_ZERO_WIDTH_BITFIELD;
6493 continue;
6494 }
6495 }
6496
6497 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6498 warn_psabi_flags);
6499 if (sub_count < 0)
6500 return -1;
6501 count += sub_count;
6502 }
6503
6504 /* There must be no padding. */
6505 if (wi::to_wide (TYPE_SIZE (type))
6506 != count * GET_MODE_BITSIZE (*modep))
6507 return -1;
6508
6509 return count;
6510 }
6511
6512 case UNION_TYPE:
6513 case QUAL_UNION_TYPE:
6514 {
6515 /* These aren't very interesting except in a degenerate case. */
6516 int count = 0;
6517 int sub_count;
6518 tree field;
6519
6520 /* Can't handle incomplete types nor sizes that are not
6521 fixed. */
6522 if (!COMPLETE_TYPE_P (type)
6523 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6524 return -1;
6525
6526 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6527 {
6528 if (TREE_CODE (field) != FIELD_DECL)
6529 continue;
6530
6531 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6532 warn_psabi_flags);
6533 if (sub_count < 0)
6534 return -1;
6535 count = count > sub_count ? count : sub_count;
6536 }
6537
6538 /* There must be no padding. */
6539 if (wi::to_wide (TYPE_SIZE (type))
6540 != count * GET_MODE_BITSIZE (*modep))
6541 return -1;
6542
6543 return count;
6544 }
6545
6546 default:
6547 break;
6548 }
6549
6550 return -1;
6551 }
6552
6553 /* Return true if PCS_VARIANT should use VFP registers. */
6554 static bool
6555 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6556 {
6557 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6558 {
6559 static bool seen_thumb1_vfp = false;
6560
6561 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6562 {
6563 sorry ("Thumb-1 %<hard-float%> VFP ABI");
6564 /* sorry() is not immediately fatal, so only display this once. */
6565 seen_thumb1_vfp = true;
6566 }
6567
6568 return true;
6569 }
6570
6571 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6572 return false;
6573
6574 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6575 (TARGET_VFP_DOUBLE || !is_double));
6576 }
6577
6578 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6579 suitable for passing or returning in VFP registers for the PCS
6580 variant selected. If it is, then *BASE_MODE is updated to contain
6581 a machine mode describing each element of the argument's type and
6582 *COUNT to hold the number of such elements. */
6583 static bool
6584 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6585 machine_mode mode, const_tree type,
6586 machine_mode *base_mode, int *count)
6587 {
6588 machine_mode new_mode = VOIDmode;
6589
6590 /* If we have the type information, prefer that to working things
6591 out from the mode. */
6592 if (type)
6593 {
6594 unsigned int warn_psabi_flags = 0;
6595 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6596 &warn_psabi_flags);
6597 if (ag_count > 0 && ag_count <= 4)
6598 {
6599 static unsigned last_reported_type_uid;
6600 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6601 int alt;
6602 if (warn_psabi
6603 && warn_psabi_flags
6604 && uid != last_reported_type_uid
6605 && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6606 != ag_count))
6607 {
6608 const char *url10
6609 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6610 const char *url12
6611 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
6612 gcc_assert (alt == -1);
6613 last_reported_type_uid = uid;
6614 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6615 qualification. */
6616 if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6617 inform (input_location, "parameter passing for argument of "
6618 "type %qT with %<[[no_unique_address]]%> members "
6619 "changed %{in GCC 10.1%}",
6620 TYPE_MAIN_VARIANT (type), url10);
6621 else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6622 inform (input_location, "parameter passing for argument of "
6623 "type %qT when C++17 is enabled changed to match "
6624 "C++14 %{in GCC 10.1%}",
6625 TYPE_MAIN_VARIANT (type), url10);
6626 else if (warn_psabi_flags & WARN_PSABI_ZERO_WIDTH_BITFIELD)
6627 inform (input_location, "parameter passing for argument of "
6628 "type %qT changed %{in GCC 12.1%}",
6629 TYPE_MAIN_VARIANT (type), url12);
6630 }
6631 *count = ag_count;
6632 }
6633 else
6634 return false;
6635 }
6636 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6637 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6638 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6639 {
6640 *count = 1;
6641 new_mode = mode;
6642 }
6643 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6644 {
6645 *count = 2;
6646 new_mode = (mode == DCmode ? DFmode : SFmode);
6647 }
6648 else
6649 return false;
6650
6651
6652 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6653 return false;
6654
6655 *base_mode = new_mode;
6656
6657 if (TARGET_GENERAL_REGS_ONLY)
6658 error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6659 type);
6660
6661 return true;
6662 }
6663
6664 static bool
6665 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6666 machine_mode mode, const_tree type)
6667 {
6668 int count ATTRIBUTE_UNUSED;
6669 machine_mode ag_mode ATTRIBUTE_UNUSED;
6670
6671 if (!use_vfp_abi (pcs_variant, false))
6672 return false;
6673 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6674 &ag_mode, &count);
6675 }
6676
6677 static bool
6678 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6679 const_tree type)
6680 {
6681 if (!use_vfp_abi (pcum->pcs_variant, false))
6682 return false;
6683
6684 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6685 &pcum->aapcs_vfp_rmode,
6686 &pcum->aapcs_vfp_rcount);
6687 }
6688
6689 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6690 for the behaviour of this function. */
6691
6692 static bool
6693 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6694 const_tree type ATTRIBUTE_UNUSED)
6695 {
6696 int rmode_size
6697 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6698 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6699 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6700 int regno;
6701
6702 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6703 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6704 {
6705 pcum->aapcs_vfp_reg_alloc = mask << regno;
6706 if (mode == BLKmode
6707 || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6708 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6709 {
6710 int i;
6711 int rcount = pcum->aapcs_vfp_rcount;
6712 int rshift = shift;
6713 machine_mode rmode = pcum->aapcs_vfp_rmode;
6714 rtx par;
6715 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6716 {
6717 /* Avoid using unsupported vector modes. */
6718 if (rmode == V2SImode)
6719 rmode = DImode;
6720 else if (rmode == V4SImode)
6721 {
6722 rmode = DImode;
6723 rcount *= 2;
6724 rshift /= 2;
6725 }
6726 }
6727 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6728 for (i = 0; i < rcount; i++)
6729 {
6730 rtx tmp = gen_rtx_REG (rmode,
6731 FIRST_VFP_REGNUM + regno + i * rshift);
6732 tmp = gen_rtx_EXPR_LIST
6733 (VOIDmode, tmp,
6734 GEN_INT (i * GET_MODE_SIZE (rmode)));
6735 XVECEXP (par, 0, i) = tmp;
6736 }
6737
6738 pcum->aapcs_reg = par;
6739 }
6740 else
6741 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6742 return true;
6743 }
6744 return false;
6745 }
6746
6747 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6748 comment there for the behaviour of this function. */
6749
6750 static rtx
6751 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6752 machine_mode mode,
6753 const_tree type ATTRIBUTE_UNUSED)
6754 {
6755 if (!use_vfp_abi (pcs_variant, false))
6756 return NULL;
6757
6758 if (mode == BLKmode
6759 || (GET_MODE_CLASS (mode) == MODE_INT
6760 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6761 && !(TARGET_NEON || TARGET_HAVE_MVE)))
6762 {
6763 int count;
6764 machine_mode ag_mode;
6765 int i;
6766 rtx par;
6767 int shift;
6768
6769 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6770 &ag_mode, &count);
6771
6772 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6773 {
6774 if (ag_mode == V2SImode)
6775 ag_mode = DImode;
6776 else if (ag_mode == V4SImode)
6777 {
6778 ag_mode = DImode;
6779 count *= 2;
6780 }
6781 }
6782 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6783 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6784 for (i = 0; i < count; i++)
6785 {
6786 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6787 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6788 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6789 XVECEXP (par, 0, i) = tmp;
6790 }
6791
6792 return par;
6793 }
6794
6795 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6796 }
6797
6798 static void
6799 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6800 machine_mode mode ATTRIBUTE_UNUSED,
6801 const_tree type ATTRIBUTE_UNUSED)
6802 {
6803 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6804 pcum->aapcs_vfp_reg_alloc = 0;
6805 return;
6806 }
6807
6808 #define AAPCS_CP(X) \
6809 { \
6810 aapcs_ ## X ## _cum_init, \
6811 aapcs_ ## X ## _is_call_candidate, \
6812 aapcs_ ## X ## _allocate, \
6813 aapcs_ ## X ## _is_return_candidate, \
6814 aapcs_ ## X ## _allocate_return_reg, \
6815 aapcs_ ## X ## _advance \
6816 }
6817
6818 /* Table of co-processors that can be used to pass arguments in
6819 registers. Idealy no arugment should be a candidate for more than
6820 one co-processor table entry, but the table is processed in order
6821 and stops after the first match. If that entry then fails to put
6822 the argument into a co-processor register, the argument will go on
6823 the stack. */
6824 static struct
6825 {
6826 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6827 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6828
6829 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6830 BLKmode) is a candidate for this co-processor's registers; this
6831 function should ignore any position-dependent state in
6832 CUMULATIVE_ARGS and only use call-type dependent information. */
6833 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6834
6835 /* Return true if the argument does get a co-processor register; it
6836 should set aapcs_reg to an RTX of the register allocated as is
6837 required for a return from FUNCTION_ARG. */
6838 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6839
6840 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6841 be returned in this co-processor's registers. */
6842 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6843
6844 /* Allocate and return an RTX element to hold the return type of a call. This
6845 routine must not fail and will only be called if is_return_candidate
6846 returned true with the same parameters. */
6847 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6848
6849 /* Finish processing this argument and prepare to start processing
6850 the next one. */
6851 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6852 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6853 {
6854 AAPCS_CP(vfp)
6855 };
6856
6857 #undef AAPCS_CP
6858
6859 static int
6860 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6861 const_tree type)
6862 {
6863 int i;
6864
6865 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6866 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6867 return i;
6868
6869 return -1;
6870 }
6871
6872 static int
6873 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6874 {
6875 /* We aren't passed a decl, so we can't check that a call is local.
6876 However, it isn't clear that that would be a win anyway, since it
6877 might limit some tail-calling opportunities. */
6878 enum arm_pcs pcs_variant;
6879
6880 if (fntype)
6881 {
6882 const_tree fndecl = NULL_TREE;
6883
6884 if (TREE_CODE (fntype) == FUNCTION_DECL)
6885 {
6886 fndecl = fntype;
6887 fntype = TREE_TYPE (fntype);
6888 }
6889
6890 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6891 }
6892 else
6893 pcs_variant = arm_pcs_default;
6894
6895 if (pcs_variant != ARM_PCS_AAPCS)
6896 {
6897 int i;
6898
6899 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6900 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6901 TYPE_MODE (type),
6902 type))
6903 return i;
6904 }
6905 return -1;
6906 }
6907
6908 static rtx
6909 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6910 const_tree fntype)
6911 {
6912 /* We aren't passed a decl, so we can't check that a call is local.
6913 However, it isn't clear that that would be a win anyway, since it
6914 might limit some tail-calling opportunities. */
6915 enum arm_pcs pcs_variant;
6916 int unsignedp ATTRIBUTE_UNUSED;
6917
6918 if (fntype)
6919 {
6920 const_tree fndecl = NULL_TREE;
6921
6922 if (TREE_CODE (fntype) == FUNCTION_DECL)
6923 {
6924 fndecl = fntype;
6925 fntype = TREE_TYPE (fntype);
6926 }
6927
6928 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6929 }
6930 else
6931 pcs_variant = arm_pcs_default;
6932
6933 /* Promote integer types. */
6934 if (type && INTEGRAL_TYPE_P (type))
6935 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6936
6937 if (pcs_variant != ARM_PCS_AAPCS)
6938 {
6939 int i;
6940
6941 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6942 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6943 type))
6944 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6945 mode, type);
6946 }
6947
6948 /* Promotes small structs returned in a register to full-word size
6949 for big-endian AAPCS. */
6950 if (type && arm_return_in_msb (type))
6951 {
6952 HOST_WIDE_INT size = int_size_in_bytes (type);
6953 if (size % UNITS_PER_WORD != 0)
6954 {
6955 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6956 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6957 }
6958 }
6959
6960 return gen_rtx_REG (mode, R0_REGNUM);
6961 }
6962
6963 static rtx
6964 aapcs_libcall_value (machine_mode mode)
6965 {
6966 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6967 && GET_MODE_SIZE (mode) <= 4)
6968 mode = SImode;
6969
6970 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6971 }
6972
6973 /* Lay out a function argument using the AAPCS rules. The rule
6974 numbers referred to here are those in the AAPCS. */
6975 static void
6976 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6977 const_tree type, bool named)
6978 {
6979 int nregs, nregs2;
6980 int ncrn;
6981
6982 /* We only need to do this once per argument. */
6983 if (pcum->aapcs_arg_processed)
6984 return;
6985
6986 pcum->aapcs_arg_processed = true;
6987
6988 /* Special case: if named is false then we are handling an incoming
6989 anonymous argument which is on the stack. */
6990 if (!named)
6991 return;
6992
6993 /* Is this a potential co-processor register candidate? */
6994 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6995 {
6996 int slot = aapcs_select_call_coproc (pcum, mode, type);
6997 pcum->aapcs_cprc_slot = slot;
6998
6999 /* We don't have to apply any of the rules from part B of the
7000 preparation phase, these are handled elsewhere in the
7001 compiler. */
7002
7003 if (slot >= 0)
7004 {
7005 /* A Co-processor register candidate goes either in its own
7006 class of registers or on the stack. */
7007 if (!pcum->aapcs_cprc_failed[slot])
7008 {
7009 /* C1.cp - Try to allocate the argument to co-processor
7010 registers. */
7011 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
7012 return;
7013
7014 /* C2.cp - Put the argument on the stack and note that we
7015 can't assign any more candidates in this slot. We also
7016 need to note that we have allocated stack space, so that
7017 we won't later try to split a non-cprc candidate between
7018 core registers and the stack. */
7019 pcum->aapcs_cprc_failed[slot] = true;
7020 pcum->can_split = false;
7021 }
7022
7023 /* We didn't get a register, so this argument goes on the
7024 stack. */
7025 gcc_assert (pcum->can_split == false);
7026 return;
7027 }
7028 }
7029
7030 /* C3 - For double-word aligned arguments, round the NCRN up to the
7031 next even number. */
7032 ncrn = pcum->aapcs_ncrn;
7033 if (ncrn & 1)
7034 {
7035 int res = arm_needs_doubleword_align (mode, type);
7036 /* Only warn during RTL expansion of call stmts, otherwise we would
7037 warn e.g. during gimplification even on functions that will be
7038 always inlined, and we'd warn multiple times. Don't warn when
7039 called in expand_function_start either, as we warn instead in
7040 arm_function_arg_boundary in that case. */
7041 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
7042 inform (input_location, "parameter passing for argument of type "
7043 "%qT changed in GCC 7.1", type);
7044 else if (res > 0)
7045 ncrn++;
7046 }
7047
7048 nregs = ARM_NUM_REGS2(mode, type);
7049
7050 /* Sigh, this test should really assert that nregs > 0, but a GCC
7051 extension allows empty structs and then gives them empty size; it
7052 then allows such a structure to be passed by value. For some of
7053 the code below we have to pretend that such an argument has
7054 non-zero size so that we 'locate' it correctly either in
7055 registers or on the stack. */
7056 gcc_assert (nregs >= 0);
7057
7058 nregs2 = nregs ? nregs : 1;
7059
7060 /* C4 - Argument fits entirely in core registers. */
7061 if (ncrn + nregs2 <= NUM_ARG_REGS)
7062 {
7063 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7064 pcum->aapcs_next_ncrn = ncrn + nregs;
7065 return;
7066 }
7067
7068 /* C5 - Some core registers left and there are no arguments already
7069 on the stack: split this argument between the remaining core
7070 registers and the stack. */
7071 if (ncrn < NUM_ARG_REGS && pcum->can_split)
7072 {
7073 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7074 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7075 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
7076 return;
7077 }
7078
7079 /* C6 - NCRN is set to 4. */
7080 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7081
7082 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
7083 return;
7084 }
7085
7086 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7087 for a call to a function whose data type is FNTYPE.
7088 For a library call, FNTYPE is NULL. */
7089 void
7090 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
7091 rtx libname,
7092 tree fndecl ATTRIBUTE_UNUSED)
7093 {
7094 /* Long call handling. */
7095 if (fntype)
7096 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
7097 else
7098 pcum->pcs_variant = arm_pcs_default;
7099
7100 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7101 {
7102 if (arm_libcall_uses_aapcs_base (libname))
7103 pcum->pcs_variant = ARM_PCS_AAPCS;
7104
7105 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
7106 pcum->aapcs_reg = NULL_RTX;
7107 pcum->aapcs_partial = 0;
7108 pcum->aapcs_arg_processed = false;
7109 pcum->aapcs_cprc_slot = -1;
7110 pcum->can_split = true;
7111
7112 if (pcum->pcs_variant != ARM_PCS_AAPCS)
7113 {
7114 int i;
7115
7116 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7117 {
7118 pcum->aapcs_cprc_failed[i] = false;
7119 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
7120 }
7121 }
7122 return;
7123 }
7124
7125 /* Legacy ABIs */
7126
7127 /* On the ARM, the offset starts at 0. */
7128 pcum->nregs = 0;
7129 pcum->iwmmxt_nregs = 0;
7130 pcum->can_split = true;
7131
7132 /* Varargs vectors are treated the same as long long.
7133 named_count avoids having to change the way arm handles 'named' */
7134 pcum->named_count = 0;
7135 pcum->nargs = 0;
7136
7137 if (TARGET_REALLY_IWMMXT && fntype)
7138 {
7139 tree fn_arg;
7140
7141 for (fn_arg = TYPE_ARG_TYPES (fntype);
7142 fn_arg;
7143 fn_arg = TREE_CHAIN (fn_arg))
7144 pcum->named_count += 1;
7145
7146 if (! pcum->named_count)
7147 pcum->named_count = INT_MAX;
7148 }
7149 }
7150
7151 /* Return 2 if double word alignment is required for argument passing,
7152 but wasn't required before the fix for PR88469.
7153 Return 1 if double word alignment is required for argument passing.
7154 Return -1 if double word alignment used to be required for argument
7155 passing before PR77728 ABI fix, but is not required anymore.
7156 Return 0 if double word alignment is not required and wasn't requried
7157 before either. */
7158 static int
7159 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7160 {
7161 if (!type)
7162 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7163
7164 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
7165 if (!AGGREGATE_TYPE_P (type))
7166 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7167
7168 /* Array types: Use member alignment of element type. */
7169 if (TREE_CODE (type) == ARRAY_TYPE)
7170 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7171
7172 int ret = 0;
7173 int ret2 = 0;
7174 /* Record/aggregate types: Use greatest member alignment of any member.
7175
7176 Note that we explicitly consider zero-sized fields here, even though
7177 they don't map to AAPCS machine types. For example, in:
7178
7179 struct __attribute__((aligned(8))) empty {};
7180
7181 struct s {
7182 [[no_unique_address]] empty e;
7183 int x;
7184 };
7185
7186 "s" contains only one Fundamental Data Type (the int field)
7187 but gains 8-byte alignment and size thanks to "e". */
7188 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7189 if (DECL_ALIGN (field) > PARM_BOUNDARY)
7190 {
7191 if (TREE_CODE (field) == FIELD_DECL)
7192 return 1;
7193 else
7194 /* Before PR77728 fix, we were incorrectly considering also
7195 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7196 Make sure we can warn about that with -Wpsabi. */
7197 ret = -1;
7198 }
7199 else if (TREE_CODE (field) == FIELD_DECL
7200 && DECL_BIT_FIELD_TYPE (field)
7201 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7202 ret2 = 1;
7203
7204 if (ret2)
7205 return 2;
7206
7207 return ret;
7208 }
7209
7210
7211 /* Determine where to put an argument to a function.
7212 Value is zero to push the argument on the stack,
7213 or a hard register in which to store the argument.
7214
7215 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7216 the preceding args and about the function being called.
7217 ARG is a description of the argument.
7218
7219 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7220 other arguments are passed on the stack. If (NAMED == 0) (which happens
7221 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7222 defined), say it is passed in the stack (function_prologue will
7223 indeed make it pass in the stack if necessary). */
7224
7225 static rtx
7226 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7227 {
7228 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7229 int nregs;
7230
7231 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7232 a call insn (op3 of a call_value insn). */
7233 if (arg.end_marker_p ())
7234 return const0_rtx;
7235
7236 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7237 {
7238 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7239 return pcum->aapcs_reg;
7240 }
7241
7242 /* Varargs vectors are treated the same as long long.
7243 named_count avoids having to change the way arm handles 'named' */
7244 if (TARGET_IWMMXT_ABI
7245 && arm_vector_mode_supported_p (arg.mode)
7246 && pcum->named_count > pcum->nargs + 1)
7247 {
7248 if (pcum->iwmmxt_nregs <= 9)
7249 return gen_rtx_REG (arg.mode,
7250 pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7251 else
7252 {
7253 pcum->can_split = false;
7254 return NULL_RTX;
7255 }
7256 }
7257
7258 /* Put doubleword aligned quantities in even register pairs. */
7259 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7260 {
7261 int res = arm_needs_doubleword_align (arg.mode, arg.type);
7262 if (res < 0 && warn_psabi)
7263 inform (input_location, "parameter passing for argument of type "
7264 "%qT changed in GCC 7.1", arg.type);
7265 else if (res > 0)
7266 {
7267 pcum->nregs++;
7268 if (res > 1 && warn_psabi)
7269 inform (input_location, "parameter passing for argument of type "
7270 "%qT changed in GCC 9.1", arg.type);
7271 }
7272 }
7273
7274 /* Only allow splitting an arg between regs and memory if all preceding
7275 args were allocated to regs. For args passed by reference we only count
7276 the reference pointer. */
7277 if (pcum->can_split)
7278 nregs = 1;
7279 else
7280 nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7281
7282 if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7283 return NULL_RTX;
7284
7285 return gen_rtx_REG (arg.mode, pcum->nregs);
7286 }
7287
7288 static unsigned int
7289 arm_function_arg_boundary (machine_mode mode, const_tree type)
7290 {
7291 if (!ARM_DOUBLEWORD_ALIGN)
7292 return PARM_BOUNDARY;
7293
7294 int res = arm_needs_doubleword_align (mode, type);
7295 if (res < 0 && warn_psabi)
7296 inform (input_location, "parameter passing for argument of type %qT "
7297 "changed in GCC 7.1", type);
7298 if (res > 1 && warn_psabi)
7299 inform (input_location, "parameter passing for argument of type "
7300 "%qT changed in GCC 9.1", type);
7301
7302 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7303 }
7304
7305 static int
7306 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7307 {
7308 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7309 int nregs = pcum->nregs;
7310
7311 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7312 {
7313 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7314 return pcum->aapcs_partial;
7315 }
7316
7317 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7318 return 0;
7319
7320 if (NUM_ARG_REGS > nregs
7321 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7322 && pcum->can_split)
7323 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7324
7325 return 0;
7326 }
7327
7328 /* Update the data in PCUM to advance over argument ARG. */
7329
7330 static void
7331 arm_function_arg_advance (cumulative_args_t pcum_v,
7332 const function_arg_info &arg)
7333 {
7334 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7335
7336 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7337 {
7338 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7339
7340 if (pcum->aapcs_cprc_slot >= 0)
7341 {
7342 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7343 arg.type);
7344 pcum->aapcs_cprc_slot = -1;
7345 }
7346
7347 /* Generic stuff. */
7348 pcum->aapcs_arg_processed = false;
7349 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7350 pcum->aapcs_reg = NULL_RTX;
7351 pcum->aapcs_partial = 0;
7352 }
7353 else
7354 {
7355 pcum->nargs += 1;
7356 if (arm_vector_mode_supported_p (arg.mode)
7357 && pcum->named_count > pcum->nargs
7358 && TARGET_IWMMXT_ABI)
7359 pcum->iwmmxt_nregs += 1;
7360 else
7361 pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7362 }
7363 }
7364
7365 /* Variable sized types are passed by reference. This is a GCC
7366 extension to the ARM ABI. */
7367
7368 static bool
7369 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7370 {
7371 return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7372 }
7373 \f
7374 /* Encode the current state of the #pragma [no_]long_calls. */
7375 typedef enum
7376 {
7377 OFF, /* No #pragma [no_]long_calls is in effect. */
7378 LONG, /* #pragma long_calls is in effect. */
7379 SHORT /* #pragma no_long_calls is in effect. */
7380 } arm_pragma_enum;
7381
7382 static arm_pragma_enum arm_pragma_long_calls = OFF;
7383
7384 void
7385 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7386 {
7387 arm_pragma_long_calls = LONG;
7388 }
7389
7390 void
7391 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7392 {
7393 arm_pragma_long_calls = SHORT;
7394 }
7395
7396 void
7397 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7398 {
7399 arm_pragma_long_calls = OFF;
7400 }
7401 \f
7402 /* Handle an attribute requiring a FUNCTION_DECL;
7403 arguments as in struct attribute_spec.handler. */
7404 static tree
7405 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7406 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7407 {
7408 if (TREE_CODE (*node) != FUNCTION_DECL)
7409 {
7410 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7411 name);
7412 *no_add_attrs = true;
7413 }
7414
7415 return NULL_TREE;
7416 }
7417
7418 /* Handle an "interrupt" or "isr" attribute;
7419 arguments as in struct attribute_spec.handler. */
7420 static tree
7421 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7422 bool *no_add_attrs)
7423 {
7424 if (DECL_P (*node))
7425 {
7426 if (TREE_CODE (*node) != FUNCTION_DECL)
7427 {
7428 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7429 name);
7430 *no_add_attrs = true;
7431 }
7432 else if (TARGET_VFP_BASE)
7433 {
7434 warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7435 name);
7436 }
7437 /* FIXME: the argument if any is checked for type attributes;
7438 should it be checked for decl ones? */
7439 }
7440 else
7441 {
7442 if (TREE_CODE (*node) == FUNCTION_TYPE
7443 || TREE_CODE (*node) == METHOD_TYPE)
7444 {
7445 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7446 {
7447 warning (OPT_Wattributes, "%qE attribute ignored",
7448 name);
7449 *no_add_attrs = true;
7450 }
7451 }
7452 else if (TREE_CODE (*node) == POINTER_TYPE
7453 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
7454 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
7455 && arm_isr_value (args) != ARM_FT_UNKNOWN)
7456 {
7457 *node = build_variant_type_copy (*node);
7458 TREE_TYPE (*node) = build_type_attribute_variant
7459 (TREE_TYPE (*node),
7460 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7461 *no_add_attrs = true;
7462 }
7463 else
7464 {
7465 /* Possibly pass this attribute on from the type to a decl. */
7466 if (flags & ((int) ATTR_FLAG_DECL_NEXT
7467 | (int) ATTR_FLAG_FUNCTION_NEXT
7468 | (int) ATTR_FLAG_ARRAY_NEXT))
7469 {
7470 *no_add_attrs = true;
7471 return tree_cons (name, args, NULL_TREE);
7472 }
7473 else
7474 {
7475 warning (OPT_Wattributes, "%qE attribute ignored",
7476 name);
7477 }
7478 }
7479 }
7480
7481 return NULL_TREE;
7482 }
7483
7484 /* Handle a "pcs" attribute; arguments as in struct
7485 attribute_spec.handler. */
7486 static tree
7487 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7488 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7489 {
7490 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7491 {
7492 warning (OPT_Wattributes, "%qE attribute ignored", name);
7493 *no_add_attrs = true;
7494 }
7495 return NULL_TREE;
7496 }
7497
7498 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7499 /* Handle the "notshared" attribute. This attribute is another way of
7500 requesting hidden visibility. ARM's compiler supports
7501 "__declspec(notshared)"; we support the same thing via an
7502 attribute. */
7503
7504 static tree
7505 arm_handle_notshared_attribute (tree *node,
7506 tree name ATTRIBUTE_UNUSED,
7507 tree args ATTRIBUTE_UNUSED,
7508 int flags ATTRIBUTE_UNUSED,
7509 bool *no_add_attrs)
7510 {
7511 tree decl = TYPE_NAME (*node);
7512
7513 if (decl)
7514 {
7515 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7516 DECL_VISIBILITY_SPECIFIED (decl) = 1;
7517 *no_add_attrs = false;
7518 }
7519 return NULL_TREE;
7520 }
7521 #endif
7522
7523 /* This function returns true if a function with declaration FNDECL and type
7524 FNTYPE uses the stack to pass arguments or return variables and false
7525 otherwise. This is used for functions with the attributes
7526 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7527 diagnostic messages if the stack is used. NAME is the name of the attribute
7528 used. */
7529
7530 static bool
7531 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7532 {
7533 function_args_iterator args_iter;
7534 CUMULATIVE_ARGS args_so_far_v;
7535 cumulative_args_t args_so_far;
7536 bool first_param = true;
7537 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7538
7539 /* Error out if any argument is passed on the stack. */
7540 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7541 args_so_far = pack_cumulative_args (&args_so_far_v);
7542 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7543 {
7544 rtx arg_rtx;
7545
7546 prev_arg_type = arg_type;
7547 if (VOID_TYPE_P (arg_type))
7548 continue;
7549
7550 function_arg_info arg (arg_type, /*named=*/true);
7551 if (!first_param)
7552 /* ??? We should advance after processing the argument and pass
7553 the argument we're advancing past. */
7554 arm_function_arg_advance (args_so_far, arg);
7555 arg_rtx = arm_function_arg (args_so_far, arg);
7556 if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7557 {
7558 error ("%qE attribute not available to functions with arguments "
7559 "passed on the stack", name);
7560 return true;
7561 }
7562 first_param = false;
7563 }
7564
7565 /* Error out for variadic functions since we cannot control how many
7566 arguments will be passed and thus stack could be used. stdarg_p () is not
7567 used for the checking to avoid browsing arguments twice. */
7568 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7569 {
7570 error ("%qE attribute not available to functions with variable number "
7571 "of arguments", name);
7572 return true;
7573 }
7574
7575 /* Error out if return value is passed on the stack. */
7576 ret_type = TREE_TYPE (fntype);
7577 if (arm_return_in_memory (ret_type, fntype))
7578 {
7579 error ("%qE attribute not available to functions that return value on "
7580 "the stack", name);
7581 return true;
7582 }
7583 return false;
7584 }
7585
7586 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7587 function will check whether the attribute is allowed here and will add the
7588 attribute to the function declaration tree or otherwise issue a warning. */
7589
7590 static tree
7591 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7592 tree /* args */,
7593 int /* flags */,
7594 bool *no_add_attrs)
7595 {
7596 tree fndecl;
7597
7598 if (!use_cmse)
7599 {
7600 *no_add_attrs = true;
7601 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7602 "option", name);
7603 return NULL_TREE;
7604 }
7605
7606 /* Ignore attribute for function types. */
7607 if (TREE_CODE (*node) != FUNCTION_DECL)
7608 {
7609 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7610 name);
7611 *no_add_attrs = true;
7612 return NULL_TREE;
7613 }
7614
7615 fndecl = *node;
7616
7617 /* Warn for static linkage functions. */
7618 if (!TREE_PUBLIC (fndecl))
7619 {
7620 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7621 "with static linkage", name);
7622 *no_add_attrs = true;
7623 return NULL_TREE;
7624 }
7625
7626 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7627 TREE_TYPE (fndecl));
7628 return NULL_TREE;
7629 }
7630
7631
7632 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7633 function will check whether the attribute is allowed here and will add the
7634 attribute to the function type tree or otherwise issue a diagnostic. The
7635 reason we check this at declaration time is to only allow the use of the
7636 attribute with declarations of function pointers and not function
7637 declarations. This function checks NODE is of the expected type and issues
7638 diagnostics otherwise using NAME. If it is not of the expected type
7639 *NO_ADD_ATTRS will be set to true. */
7640
7641 static tree
7642 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7643 tree /* args */,
7644 int /* flags */,
7645 bool *no_add_attrs)
7646 {
7647 tree decl = NULL_TREE;
7648 tree fntype, type;
7649
7650 if (!use_cmse)
7651 {
7652 *no_add_attrs = true;
7653 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7654 "option", name);
7655 return NULL_TREE;
7656 }
7657
7658 if (DECL_P (*node))
7659 {
7660 fntype = TREE_TYPE (*node);
7661
7662 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7663 decl = *node;
7664 }
7665 else
7666 fntype = *node;
7667
7668 while (fntype && TREE_CODE (fntype) == POINTER_TYPE)
7669 fntype = TREE_TYPE (fntype);
7670
7671 if ((DECL_P (*node) && !decl) || TREE_CODE (fntype) != FUNCTION_TYPE)
7672 {
7673 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7674 "function pointer", name);
7675 *no_add_attrs = true;
7676 return NULL_TREE;
7677 }
7678
7679 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7680
7681 if (*no_add_attrs)
7682 return NULL_TREE;
7683
7684 /* Prevent trees being shared among function types with and without
7685 cmse_nonsecure_call attribute. */
7686 if (decl)
7687 {
7688 type = build_distinct_type_copy (TREE_TYPE (decl));
7689 TREE_TYPE (decl) = type;
7690 }
7691 else
7692 {
7693 type = build_distinct_type_copy (*node);
7694 *node = type;
7695 }
7696
7697 fntype = type;
7698
7699 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7700 {
7701 type = fntype;
7702 fntype = TREE_TYPE (fntype);
7703 fntype = build_distinct_type_copy (fntype);
7704 TREE_TYPE (type) = fntype;
7705 }
7706
7707 /* Construct a type attribute and add it to the function type. */
7708 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7709 TYPE_ATTRIBUTES (fntype));
7710 TYPE_ATTRIBUTES (fntype) = attrs;
7711 return NULL_TREE;
7712 }
7713
7714 /* Return 0 if the attributes for two types are incompatible, 1 if they
7715 are compatible, and 2 if they are nearly compatible (which causes a
7716 warning to be generated). */
7717 static int
7718 arm_comp_type_attributes (const_tree type1, const_tree type2)
7719 {
7720 int l1, l2, s1, s2;
7721
7722 tree attrs1 = lookup_attribute ("Advanced SIMD type",
7723 TYPE_ATTRIBUTES (type1));
7724 tree attrs2 = lookup_attribute ("Advanced SIMD type",
7725 TYPE_ATTRIBUTES (type2));
7726 if (bool (attrs1) != bool (attrs2))
7727 return 0;
7728 if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7729 return 0;
7730
7731 /* Check for mismatch of non-default calling convention. */
7732 if (TREE_CODE (type1) != FUNCTION_TYPE)
7733 return 1;
7734
7735 /* Check for mismatched call attributes. */
7736 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7737 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7738 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7739 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7740
7741 /* Only bother to check if an attribute is defined. */
7742 if (l1 | l2 | s1 | s2)
7743 {
7744 /* If one type has an attribute, the other must have the same attribute. */
7745 if ((l1 != l2) || (s1 != s2))
7746 return 0;
7747
7748 /* Disallow mixed attributes. */
7749 if ((l1 & s2) || (l2 & s1))
7750 return 0;
7751 }
7752
7753 /* Check for mismatched ISR attribute. */
7754 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7755 if (! l1)
7756 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7757 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7758 if (! l2)
7759 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7760 if (l1 != l2)
7761 return 0;
7762
7763 l1 = lookup_attribute ("cmse_nonsecure_call",
7764 TYPE_ATTRIBUTES (type1)) != NULL;
7765 l2 = lookup_attribute ("cmse_nonsecure_call",
7766 TYPE_ATTRIBUTES (type2)) != NULL;
7767
7768 if (l1 != l2)
7769 return 0;
7770
7771 return 1;
7772 }
7773
7774 /* Assigns default attributes to newly defined type. This is used to
7775 set short_call/long_call attributes for function types of
7776 functions defined inside corresponding #pragma scopes. */
7777 static void
7778 arm_set_default_type_attributes (tree type)
7779 {
7780 /* Add __attribute__ ((long_call)) to all functions, when
7781 inside #pragma long_calls or __attribute__ ((short_call)),
7782 when inside #pragma no_long_calls. */
7783 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7784 {
7785 tree type_attr_list, attr_name;
7786 type_attr_list = TYPE_ATTRIBUTES (type);
7787
7788 if (arm_pragma_long_calls == LONG)
7789 attr_name = get_identifier ("long_call");
7790 else if (arm_pragma_long_calls == SHORT)
7791 attr_name = get_identifier ("short_call");
7792 else
7793 return;
7794
7795 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7796 TYPE_ATTRIBUTES (type) = type_attr_list;
7797 }
7798 }
7799 \f
7800 /* Return true if DECL is known to be linked into section SECTION. */
7801
7802 static bool
7803 arm_function_in_section_p (tree decl, section *section)
7804 {
7805 /* We can only be certain about the prevailing symbol definition. */
7806 if (!decl_binds_to_current_def_p (decl))
7807 return false;
7808
7809 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7810 if (!DECL_SECTION_NAME (decl))
7811 {
7812 /* Make sure that we will not create a unique section for DECL. */
7813 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7814 return false;
7815 }
7816
7817 return function_section (decl) == section;
7818 }
7819
7820 /* Return nonzero if a 32-bit "long_call" should be generated for
7821 a call from the current function to DECL. We generate a long_call
7822 if the function:
7823
7824 a. has an __attribute__((long call))
7825 or b. is within the scope of a #pragma long_calls
7826 or c. the -mlong-calls command line switch has been specified
7827
7828 However we do not generate a long call if the function:
7829
7830 d. has an __attribute__ ((short_call))
7831 or e. is inside the scope of a #pragma no_long_calls
7832 or f. is defined in the same section as the current function. */
7833
7834 bool
7835 arm_is_long_call_p (tree decl)
7836 {
7837 tree attrs;
7838
7839 if (!decl)
7840 return TARGET_LONG_CALLS;
7841
7842 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7843 if (lookup_attribute ("short_call", attrs))
7844 return false;
7845
7846 /* For "f", be conservative, and only cater for cases in which the
7847 whole of the current function is placed in the same section. */
7848 if (!flag_reorder_blocks_and_partition
7849 && TREE_CODE (decl) == FUNCTION_DECL
7850 && arm_function_in_section_p (decl, current_function_section ()))
7851 return false;
7852
7853 if (lookup_attribute ("long_call", attrs))
7854 return true;
7855
7856 return TARGET_LONG_CALLS;
7857 }
7858
7859 /* Return nonzero if it is ok to make a tail-call to DECL. */
7860 static bool
7861 arm_function_ok_for_sibcall (tree decl, tree exp)
7862 {
7863 unsigned long func_type;
7864
7865 if (cfun->machine->sibcall_blocked)
7866 return false;
7867
7868 if (TARGET_FDPIC)
7869 {
7870 /* In FDPIC, never tailcall something for which we have no decl:
7871 the target function could be in a different module, requiring
7872 a different FDPIC register value. */
7873 if (decl == NULL)
7874 return false;
7875 }
7876
7877 /* Never tailcall something if we are generating code for Thumb-1. */
7878 if (TARGET_THUMB1)
7879 return false;
7880
7881 /* The PIC register is live on entry to VxWorks PLT entries, so we
7882 must make the call before restoring the PIC register. */
7883 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7884 return false;
7885
7886 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7887 may be used both as target of the call and base register for restoring
7888 the VFP registers */
7889 if (TARGET_APCS_FRAME && TARGET_ARM
7890 && TARGET_HARD_FLOAT
7891 && decl && arm_is_long_call_p (decl))
7892 return false;
7893
7894 /* If we are interworking and the function is not declared static
7895 then we can't tail-call it unless we know that it exists in this
7896 compilation unit (since it might be a Thumb routine). */
7897 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7898 && !TREE_ASM_WRITTEN (decl))
7899 return false;
7900
7901 func_type = arm_current_func_type ();
7902 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7903 if (IS_INTERRUPT (func_type))
7904 return false;
7905
7906 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7907 generated for entry functions themselves. */
7908 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7909 return false;
7910
7911 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7912 this would complicate matters for later code generation. */
7913 if (TREE_CODE (exp) == CALL_EXPR)
7914 {
7915 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7916 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7917 return false;
7918 }
7919
7920 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7921 {
7922 /* Check that the return value locations are the same. For
7923 example that we aren't returning a value from the sibling in
7924 a VFP register but then need to transfer it to a core
7925 register. */
7926 rtx a, b;
7927 tree decl_or_type = decl;
7928
7929 /* If it is an indirect function pointer, get the function type. */
7930 if (!decl)
7931 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7932
7933 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7934 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7935 cfun->decl, false);
7936 if (!rtx_equal_p (a, b))
7937 return false;
7938 }
7939
7940 /* Never tailcall if function may be called with a misaligned SP. */
7941 if (IS_STACKALIGN (func_type))
7942 return false;
7943
7944 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7945 references should become a NOP. Don't convert such calls into
7946 sibling calls. */
7947 if (TARGET_AAPCS_BASED
7948 && arm_abi == ARM_ABI_AAPCS
7949 && decl
7950 && DECL_WEAK (decl))
7951 return false;
7952
7953 /* We cannot do a tailcall for an indirect call by descriptor if all the
7954 argument registers are used because the only register left to load the
7955 address is IP and it will already contain the static chain. */
7956 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7957 {
7958 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7959 CUMULATIVE_ARGS cum;
7960 cumulative_args_t cum_v;
7961
7962 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7963 cum_v = pack_cumulative_args (&cum);
7964
7965 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7966 {
7967 tree type = TREE_VALUE (t);
7968 if (!VOID_TYPE_P (type))
7969 {
7970 function_arg_info arg (type, /*named=*/true);
7971 arm_function_arg_advance (cum_v, arg);
7972 }
7973 }
7974
7975 function_arg_info arg (integer_type_node, /*named=*/true);
7976 if (!arm_function_arg (cum_v, arg))
7977 return false;
7978 }
7979
7980 /* Everything else is ok. */
7981 return true;
7982 }
7983
7984 \f
7985 /* Addressing mode support functions. */
7986
7987 /* Return nonzero if X is a legitimate immediate operand when compiling
7988 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7989 int
7990 legitimate_pic_operand_p (rtx x)
7991 {
7992 if (SYMBOL_REF_P (x)
7993 || (GET_CODE (x) == CONST
7994 && GET_CODE (XEXP (x, 0)) == PLUS
7995 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7996 return 0;
7997
7998 return 1;
7999 }
8000
8001 /* Record that the current function needs a PIC register. If PIC_REG is null,
8002 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
8003 both case cfun->machine->pic_reg is initialized if we have not already done
8004 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
8005 PIC register is reloaded in the current position of the instruction stream
8006 irregardless of whether it was loaded before. Otherwise, it is only loaded
8007 if not already done so (crtl->uses_pic_offset_table is null). Note that
8008 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
8009 is only supported iff COMPUTE_NOW is false. */
8010
8011 static void
8012 require_pic_register (rtx pic_reg, bool compute_now)
8013 {
8014 gcc_assert (compute_now == (pic_reg != NULL_RTX));
8015
8016 /* A lot of the logic here is made obscure by the fact that this
8017 routine gets called as part of the rtx cost estimation process.
8018 We don't want those calls to affect any assumptions about the real
8019 function; and further, we can't call entry_of_function() until we
8020 start the real expansion process. */
8021 if (!crtl->uses_pic_offset_table || compute_now)
8022 {
8023 gcc_assert (can_create_pseudo_p ()
8024 || (pic_reg != NULL_RTX
8025 && REG_P (pic_reg)
8026 && GET_MODE (pic_reg) == Pmode));
8027 if (arm_pic_register != INVALID_REGNUM
8028 && !compute_now
8029 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
8030 {
8031 if (!cfun->machine->pic_reg)
8032 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
8033
8034 /* Play games to avoid marking the function as needing pic
8035 if we are being called as part of the cost-estimation
8036 process. */
8037 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8038 crtl->uses_pic_offset_table = 1;
8039 }
8040 else
8041 {
8042 rtx_insn *seq, *insn;
8043
8044 if (pic_reg == NULL_RTX)
8045 pic_reg = gen_reg_rtx (Pmode);
8046 if (!cfun->machine->pic_reg)
8047 cfun->machine->pic_reg = pic_reg;
8048
8049 /* Play games to avoid marking the function as needing pic
8050 if we are being called as part of the cost-estimation
8051 process. */
8052 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8053 {
8054 crtl->uses_pic_offset_table = 1;
8055 start_sequence ();
8056
8057 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
8058 && arm_pic_register > LAST_LO_REGNUM
8059 && !compute_now)
8060 emit_move_insn (cfun->machine->pic_reg,
8061 gen_rtx_REG (Pmode, arm_pic_register));
8062 else
8063 arm_load_pic_register (0UL, pic_reg);
8064
8065 seq = get_insns ();
8066 end_sequence ();
8067
8068 for (insn = seq; insn; insn = NEXT_INSN (insn))
8069 if (INSN_P (insn))
8070 INSN_LOCATION (insn) = prologue_location;
8071
8072 /* We can be called during expansion of PHI nodes, where
8073 we can't yet emit instructions directly in the final
8074 insn stream. Queue the insns on the entry edge, they will
8075 be committed after everything else is expanded. */
8076 if (currently_expanding_to_rtl)
8077 insert_insn_on_edge (seq,
8078 single_succ_edge
8079 (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
8080 else
8081 emit_insn (seq);
8082 }
8083 }
8084 }
8085 }
8086
8087 /* Generate insns to calculate the address of ORIG in pic mode. */
8088 static rtx_insn *
8089 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
8090 {
8091 rtx pat;
8092 rtx mem;
8093
8094 pat = gen_calculate_pic_address (reg, pic_reg, orig);
8095
8096 /* Make the MEM as close to a constant as possible. */
8097 mem = SET_SRC (pat);
8098 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
8099 MEM_READONLY_P (mem) = 1;
8100 MEM_NOTRAP_P (mem) = 1;
8101
8102 return emit_insn (pat);
8103 }
8104
8105 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
8106 created to hold the result of the load. If not NULL, PIC_REG indicates
8107 which register to use as PIC register, otherwise it is decided by register
8108 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
8109 location in the instruction stream, irregardless of whether it was loaded
8110 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8111 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8112
8113 Returns the register REG into which the PIC load is performed. */
8114
8115 rtx
8116 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
8117 bool compute_now)
8118 {
8119 gcc_assert (compute_now == (pic_reg != NULL_RTX));
8120
8121 if (SYMBOL_REF_P (orig)
8122 || LABEL_REF_P (orig))
8123 {
8124 if (reg == 0)
8125 {
8126 gcc_assert (can_create_pseudo_p ());
8127 reg = gen_reg_rtx (Pmode);
8128 }
8129
8130 /* VxWorks does not impose a fixed gap between segments; the run-time
8131 gap can be different from the object-file gap. We therefore can't
8132 use GOTOFF unless we are absolutely sure that the symbol is in the
8133 same segment as the GOT. Unfortunately, the flexibility of linker
8134 scripts means that we can't be sure of that in general, so assume
8135 that GOTOFF is never valid on VxWorks. */
8136 /* References to weak symbols cannot be resolved locally: they
8137 may be overridden by a non-weak definition at link time. */
8138 rtx_insn *insn;
8139 if ((LABEL_REF_P (orig)
8140 || (SYMBOL_REF_P (orig)
8141 && SYMBOL_REF_LOCAL_P (orig)
8142 && (SYMBOL_REF_DECL (orig)
8143 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
8144 && (!SYMBOL_REF_FUNCTION_P (orig)
8145 || arm_fdpic_local_funcdesc_p (orig))))
8146 && NEED_GOT_RELOC
8147 && arm_pic_data_is_text_relative)
8148 insn = arm_pic_static_addr (orig, reg);
8149 else
8150 {
8151 /* If this function doesn't have a pic register, create one now. */
8152 require_pic_register (pic_reg, compute_now);
8153
8154 if (pic_reg == NULL_RTX)
8155 pic_reg = cfun->machine->pic_reg;
8156
8157 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8158 }
8159
8160 /* Put a REG_EQUAL note on this insn, so that it can be optimized
8161 by loop. */
8162 set_unique_reg_note (insn, REG_EQUAL, orig);
8163
8164 return reg;
8165 }
8166 else if (GET_CODE (orig) == CONST)
8167 {
8168 rtx base, offset;
8169
8170 if (GET_CODE (XEXP (orig, 0)) == PLUS
8171 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8172 return orig;
8173
8174 /* Handle the case where we have: const (UNSPEC_TLS). */
8175 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8176 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8177 return orig;
8178
8179 /* Handle the case where we have:
8180 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
8181 CONST_INT. */
8182 if (GET_CODE (XEXP (orig, 0)) == PLUS
8183 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8184 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8185 {
8186 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8187 return orig;
8188 }
8189
8190 if (reg == 0)
8191 {
8192 gcc_assert (can_create_pseudo_p ());
8193 reg = gen_reg_rtx (Pmode);
8194 }
8195
8196 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8197
8198 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8199 pic_reg, compute_now);
8200 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8201 base == reg ? 0 : reg, pic_reg,
8202 compute_now);
8203
8204 if (CONST_INT_P (offset))
8205 {
8206 /* The base register doesn't really matter, we only want to
8207 test the index for the appropriate mode. */
8208 if (!arm_legitimate_index_p (mode, offset, SET, 0))
8209 {
8210 gcc_assert (can_create_pseudo_p ());
8211 offset = force_reg (Pmode, offset);
8212 }
8213
8214 if (CONST_INT_P (offset))
8215 return plus_constant (Pmode, base, INTVAL (offset));
8216 }
8217
8218 if (GET_MODE_SIZE (mode) > 4
8219 && (GET_MODE_CLASS (mode) == MODE_INT
8220 || TARGET_SOFT_FLOAT))
8221 {
8222 emit_insn (gen_addsi3 (reg, base, offset));
8223 return reg;
8224 }
8225
8226 return gen_rtx_PLUS (Pmode, base, offset);
8227 }
8228
8229 return orig;
8230 }
8231
8232
8233 /* Generate insns that produce the address of the stack canary */
8234 rtx
8235 arm_stack_protect_tls_canary_mem (bool reload)
8236 {
8237 rtx tp = gen_reg_rtx (SImode);
8238 if (reload)
8239 emit_insn (gen_reload_tp_hard (tp));
8240 else
8241 emit_insn (gen_load_tp_hard (tp));
8242
8243 rtx reg = gen_reg_rtx (SImode);
8244 rtx offset = GEN_INT (arm_stack_protector_guard_offset);
8245 emit_set_insn (reg, gen_rtx_PLUS (SImode, tp, offset));
8246 return gen_rtx_MEM (SImode, reg);
8247 }
8248
8249
8250 /* Whether a register is callee saved or not. This is necessary because high
8251 registers are marked as caller saved when optimizing for size on Thumb-1
8252 targets despite being callee saved in order to avoid using them. */
8253 #define callee_saved_reg_p(reg) \
8254 (!call_used_or_fixed_reg_p (reg) \
8255 || (TARGET_THUMB1 && optimize_size \
8256 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8257
8258 /* Return a mask for the call-clobbered low registers that are unused
8259 at the end of the prologue. */
8260 static unsigned long
8261 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8262 {
8263 unsigned long mask = 0;
8264 bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8265
8266 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8267 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8268 mask |= 1 << (reg - FIRST_LO_REGNUM);
8269 return mask;
8270 }
8271
8272 /* Similarly for the start of the epilogue. */
8273 static unsigned long
8274 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8275 {
8276 unsigned long mask = 0;
8277 bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8278
8279 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8280 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8281 mask |= 1 << (reg - FIRST_LO_REGNUM);
8282 return mask;
8283 }
8284
8285 /* Find a spare register to use during the prolog of a function. */
8286
8287 static int
8288 thumb_find_work_register (unsigned long pushed_regs_mask)
8289 {
8290 int reg;
8291
8292 unsigned long unused_regs
8293 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8294
8295 /* Check the argument registers first as these are call-used. The
8296 register allocation order means that sometimes r3 might be used
8297 but earlier argument registers might not, so check them all. */
8298 for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8299 if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8300 return reg;
8301
8302 /* Otherwise look for a call-saved register that is going to be pushed. */
8303 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8304 if (pushed_regs_mask & (1 << reg))
8305 return reg;
8306
8307 if (TARGET_THUMB2)
8308 {
8309 /* Thumb-2 can use high regs. */
8310 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8311 if (pushed_regs_mask & (1 << reg))
8312 return reg;
8313 }
8314 /* Something went wrong - thumb_compute_save_reg_mask()
8315 should have arranged for a suitable register to be pushed. */
8316 gcc_unreachable ();
8317 }
8318
8319 static GTY(()) int pic_labelno;
8320
8321 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8322 low register. */
8323
8324 void
8325 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8326 {
8327 rtx l1, labelno, pic_tmp, pic_rtx;
8328
8329 if (crtl->uses_pic_offset_table == 0
8330 || TARGET_SINGLE_PIC_BASE
8331 || TARGET_FDPIC)
8332 return;
8333
8334 gcc_assert (flag_pic);
8335
8336 if (pic_reg == NULL_RTX)
8337 pic_reg = cfun->machine->pic_reg;
8338 if (TARGET_VXWORKS_RTP)
8339 {
8340 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8341 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8342 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8343
8344 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8345
8346 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8347 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8348 }
8349 else
8350 {
8351 /* We use an UNSPEC rather than a LABEL_REF because this label
8352 never appears in the code stream. */
8353
8354 labelno = GEN_INT (pic_labelno++);
8355 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8356 l1 = gen_rtx_CONST (VOIDmode, l1);
8357
8358 /* On the ARM the PC register contains 'dot + 8' at the time of the
8359 addition, on the Thumb it is 'dot + 4'. */
8360 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8361 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8362 UNSPEC_GOTSYM_OFF);
8363 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8364
8365 if (TARGET_32BIT)
8366 {
8367 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8368 }
8369 else /* TARGET_THUMB1 */
8370 {
8371 if (arm_pic_register != INVALID_REGNUM
8372 && REGNO (pic_reg) > LAST_LO_REGNUM)
8373 {
8374 /* We will have pushed the pic register, so we should always be
8375 able to find a work register. */
8376 pic_tmp = gen_rtx_REG (SImode,
8377 thumb_find_work_register (saved_regs));
8378 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8379 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8380 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8381 }
8382 else if (arm_pic_register != INVALID_REGNUM
8383 && arm_pic_register > LAST_LO_REGNUM
8384 && REGNO (pic_reg) <= LAST_LO_REGNUM)
8385 {
8386 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8387 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8388 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8389 }
8390 else
8391 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8392 }
8393 }
8394
8395 /* Need to emit this whether or not we obey regdecls,
8396 since setjmp/longjmp can cause life info to screw up. */
8397 emit_use (pic_reg);
8398 }
8399
8400 /* Try to determine whether an object, referenced via ORIG, will be
8401 placed in the text or data segment. This is used in FDPIC mode, to
8402 decide which relocations to use when accessing ORIG. *IS_READONLY
8403 is set to true if ORIG is a read-only location, false otherwise.
8404 Return true if we could determine the location of ORIG, false
8405 otherwise. *IS_READONLY is valid only when we return true. */
8406 static bool
8407 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8408 {
8409 *is_readonly = false;
8410
8411 if (LABEL_REF_P (orig))
8412 {
8413 *is_readonly = true;
8414 return true;
8415 }
8416
8417 if (SYMBOL_REF_P (orig))
8418 {
8419 if (CONSTANT_POOL_ADDRESS_P (orig))
8420 {
8421 *is_readonly = true;
8422 return true;
8423 }
8424 if (SYMBOL_REF_LOCAL_P (orig)
8425 && !SYMBOL_REF_EXTERNAL_P (orig)
8426 && SYMBOL_REF_DECL (orig)
8427 && (!DECL_P (SYMBOL_REF_DECL (orig))
8428 || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8429 {
8430 tree decl = SYMBOL_REF_DECL (orig);
8431 tree init = (TREE_CODE (decl) == VAR_DECL)
8432 ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8433 ? decl : 0;
8434 int reloc = 0;
8435 bool named_section, readonly;
8436
8437 if (init && init != error_mark_node)
8438 reloc = compute_reloc_for_constant (init);
8439
8440 named_section = TREE_CODE (decl) == VAR_DECL
8441 && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8442 readonly = decl_readonly_section (decl, reloc);
8443
8444 /* We don't know where the link script will put a named
8445 section, so return false in such a case. */
8446 if (named_section)
8447 return false;
8448
8449 *is_readonly = readonly;
8450 return true;
8451 }
8452
8453 /* We don't know. */
8454 return false;
8455 }
8456
8457 gcc_unreachable ();
8458 }
8459
8460 /* Generate code to load the address of a static var when flag_pic is set. */
8461 static rtx_insn *
8462 arm_pic_static_addr (rtx orig, rtx reg)
8463 {
8464 rtx l1, labelno, offset_rtx;
8465 rtx_insn *insn;
8466
8467 gcc_assert (flag_pic);
8468
8469 bool is_readonly = false;
8470 bool info_known = false;
8471
8472 if (TARGET_FDPIC
8473 && SYMBOL_REF_P (orig)
8474 && !SYMBOL_REF_FUNCTION_P (orig))
8475 info_known = arm_is_segment_info_known (orig, &is_readonly);
8476
8477 if (TARGET_FDPIC
8478 && SYMBOL_REF_P (orig)
8479 && !SYMBOL_REF_FUNCTION_P (orig)
8480 && !info_known)
8481 {
8482 /* We don't know where orig is stored, so we have be
8483 pessimistic and use a GOT relocation. */
8484 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8485
8486 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8487 }
8488 else if (TARGET_FDPIC
8489 && SYMBOL_REF_P (orig)
8490 && (SYMBOL_REF_FUNCTION_P (orig)
8491 || !is_readonly))
8492 {
8493 /* We use the GOTOFF relocation. */
8494 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8495
8496 rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8497 emit_insn (gen_movsi (reg, l1));
8498 insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8499 }
8500 else
8501 {
8502 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8503 PC-relative access. */
8504 /* We use an UNSPEC rather than a LABEL_REF because this label
8505 never appears in the code stream. */
8506 labelno = GEN_INT (pic_labelno++);
8507 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8508 l1 = gen_rtx_CONST (VOIDmode, l1);
8509
8510 /* On the ARM the PC register contains 'dot + 8' at the time of the
8511 addition, on the Thumb it is 'dot + 4'. */
8512 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8513 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8514 UNSPEC_SYMBOL_OFFSET);
8515 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8516
8517 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8518 labelno));
8519 }
8520
8521 return insn;
8522 }
8523
8524 /* Return nonzero if X is valid as an ARM state addressing register. */
8525 static int
8526 arm_address_register_rtx_p (rtx x, int strict_p)
8527 {
8528 int regno;
8529
8530 if (!REG_P (x))
8531 return 0;
8532
8533 regno = REGNO (x);
8534
8535 if (strict_p)
8536 return ARM_REGNO_OK_FOR_BASE_P (regno);
8537
8538 return (regno <= LAST_ARM_REGNUM
8539 || regno >= FIRST_PSEUDO_REGISTER
8540 || regno == FRAME_POINTER_REGNUM
8541 || regno == ARG_POINTER_REGNUM);
8542 }
8543
8544 /* Return TRUE if this rtx is the difference of a symbol and a label,
8545 and will reduce to a PC-relative relocation in the object file.
8546 Expressions like this can be left alone when generating PIC, rather
8547 than forced through the GOT. */
8548 static int
8549 pcrel_constant_p (rtx x)
8550 {
8551 if (GET_CODE (x) == MINUS)
8552 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8553
8554 return FALSE;
8555 }
8556
8557 /* Return true if X will surely end up in an index register after next
8558 splitting pass. */
8559 static bool
8560 will_be_in_index_register (const_rtx x)
8561 {
8562 /* arm.md: calculate_pic_address will split this into a register. */
8563 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8564 }
8565
8566 /* Return nonzero if X is a valid ARM state address operand. */
8567 int
8568 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8569 int strict_p)
8570 {
8571 bool use_ldrd;
8572 enum rtx_code code = GET_CODE (x);
8573
8574 if (arm_address_register_rtx_p (x, strict_p))
8575 return 1;
8576
8577 use_ldrd = (TARGET_LDRD
8578 && (mode == DImode || mode == DFmode));
8579
8580 if (code == POST_INC || code == PRE_DEC
8581 || ((code == PRE_INC || code == POST_DEC)
8582 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8583 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8584
8585 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8586 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8587 && GET_CODE (XEXP (x, 1)) == PLUS
8588 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8589 {
8590 rtx addend = XEXP (XEXP (x, 1), 1);
8591
8592 /* Don't allow ldrd post increment by register because it's hard
8593 to fixup invalid register choices. */
8594 if (use_ldrd
8595 && GET_CODE (x) == POST_MODIFY
8596 && REG_P (addend))
8597 return 0;
8598
8599 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8600 && arm_legitimate_index_p (mode, addend, outer, strict_p));
8601 }
8602
8603 /* After reload constants split into minipools will have addresses
8604 from a LABEL_REF. */
8605 else if (reload_completed
8606 && (code == LABEL_REF
8607 || (code == CONST
8608 && GET_CODE (XEXP (x, 0)) == PLUS
8609 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8610 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8611 return 1;
8612
8613 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8614 return 0;
8615
8616 else if (code == PLUS)
8617 {
8618 rtx xop0 = XEXP (x, 0);
8619 rtx xop1 = XEXP (x, 1);
8620
8621 return ((arm_address_register_rtx_p (xop0, strict_p)
8622 && ((CONST_INT_P (xop1)
8623 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8624 || (!strict_p && will_be_in_index_register (xop1))))
8625 || (arm_address_register_rtx_p (xop1, strict_p)
8626 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8627 }
8628
8629 #if 0
8630 /* Reload currently can't handle MINUS, so disable this for now */
8631 else if (GET_CODE (x) == MINUS)
8632 {
8633 rtx xop0 = XEXP (x, 0);
8634 rtx xop1 = XEXP (x, 1);
8635
8636 return (arm_address_register_rtx_p (xop0, strict_p)
8637 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8638 }
8639 #endif
8640
8641 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8642 && code == SYMBOL_REF
8643 && CONSTANT_POOL_ADDRESS_P (x)
8644 && ! (flag_pic
8645 && symbol_mentioned_p (get_pool_constant (x))
8646 && ! pcrel_constant_p (get_pool_constant (x))))
8647 return 1;
8648
8649 return 0;
8650 }
8651
8652 /* Return true if we can avoid creating a constant pool entry for x. */
8653 static bool
8654 can_avoid_literal_pool_for_label_p (rtx x)
8655 {
8656 /* Normally we can assign constant values to target registers without
8657 the help of constant pool. But there are cases we have to use constant
8658 pool like:
8659 1) assign a label to register.
8660 2) sign-extend a 8bit value to 32bit and then assign to register.
8661
8662 Constant pool access in format:
8663 (set (reg r0) (mem (symbol_ref (".LC0"))))
8664 will cause the use of literal pool (later in function arm_reorg).
8665 So here we mark such format as an invalid format, then the compiler
8666 will adjust it into:
8667 (set (reg r0) (symbol_ref (".LC0")))
8668 (set (reg r0) (mem (reg r0))).
8669 No extra register is required, and (mem (reg r0)) won't cause the use
8670 of literal pools. */
8671 if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8672 && CONSTANT_POOL_ADDRESS_P (x))
8673 return 1;
8674 return 0;
8675 }
8676
8677
8678 /* Return nonzero if X is a valid Thumb-2 address operand. */
8679 static int
8680 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8681 {
8682 bool use_ldrd;
8683 enum rtx_code code = GET_CODE (x);
8684
8685 /* If we are dealing with a MVE predicate mode, then treat it as a HImode as
8686 can store and load it like any other 16-bit value. */
8687 if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (mode))
8688 mode = HImode;
8689
8690 if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
8691 return mve_vector_mem_operand (mode, x, strict_p);
8692
8693 if (arm_address_register_rtx_p (x, strict_p))
8694 return 1;
8695
8696 use_ldrd = (TARGET_LDRD
8697 && (mode == DImode || mode == DFmode));
8698
8699 if (code == POST_INC || code == PRE_DEC
8700 || ((code == PRE_INC || code == POST_DEC)
8701 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8702 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8703
8704 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8705 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8706 && GET_CODE (XEXP (x, 1)) == PLUS
8707 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8708 {
8709 /* Thumb-2 only has autoincrement by constant. */
8710 rtx addend = XEXP (XEXP (x, 1), 1);
8711 HOST_WIDE_INT offset;
8712
8713 if (!CONST_INT_P (addend))
8714 return 0;
8715
8716 offset = INTVAL(addend);
8717 if (GET_MODE_SIZE (mode) <= 4)
8718 return (offset > -256 && offset < 256);
8719
8720 return (use_ldrd && offset > -1024 && offset < 1024
8721 && (offset & 3) == 0);
8722 }
8723
8724 /* After reload constants split into minipools will have addresses
8725 from a LABEL_REF. */
8726 else if (reload_completed
8727 && (code == LABEL_REF
8728 || (code == CONST
8729 && GET_CODE (XEXP (x, 0)) == PLUS
8730 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8731 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8732 return 1;
8733
8734 else if (mode == TImode
8735 || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8736 || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8737 return 0;
8738
8739 else if (code == PLUS)
8740 {
8741 rtx xop0 = XEXP (x, 0);
8742 rtx xop1 = XEXP (x, 1);
8743
8744 return ((arm_address_register_rtx_p (xop0, strict_p)
8745 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8746 || (!strict_p && will_be_in_index_register (xop1))))
8747 || (arm_address_register_rtx_p (xop1, strict_p)
8748 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8749 }
8750
8751 else if (can_avoid_literal_pool_for_label_p (x))
8752 return 0;
8753
8754 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8755 && code == SYMBOL_REF
8756 && CONSTANT_POOL_ADDRESS_P (x)
8757 && ! (flag_pic
8758 && symbol_mentioned_p (get_pool_constant (x))
8759 && ! pcrel_constant_p (get_pool_constant (x))))
8760 return 1;
8761
8762 return 0;
8763 }
8764
8765 /* Return nonzero if INDEX is valid for an address index operand in
8766 ARM state. */
8767 static int
8768 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8769 int strict_p)
8770 {
8771 HOST_WIDE_INT range;
8772 enum rtx_code code = GET_CODE (index);
8773
8774 /* Standard coprocessor addressing modes. */
8775 if (TARGET_HARD_FLOAT
8776 && (mode == SFmode || mode == DFmode))
8777 return (code == CONST_INT && INTVAL (index) < 1024
8778 && INTVAL (index) > -1024
8779 && (INTVAL (index) & 3) == 0);
8780
8781 /* For quad modes, we restrict the constant offset to be slightly less
8782 than what the instruction format permits. We do this because for
8783 quad mode moves, we will actually decompose them into two separate
8784 double-mode reads or writes. INDEX must therefore be a valid
8785 (double-mode) offset and so should INDEX+8. */
8786 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8787 return (code == CONST_INT
8788 && INTVAL (index) < 1016
8789 && INTVAL (index) > -1024
8790 && (INTVAL (index) & 3) == 0);
8791
8792 /* We have no such constraint on double mode offsets, so we permit the
8793 full range of the instruction format. */
8794 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8795 return (code == CONST_INT
8796 && INTVAL (index) < 1024
8797 && INTVAL (index) > -1024
8798 && (INTVAL (index) & 3) == 0);
8799
8800 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8801 return (code == CONST_INT
8802 && INTVAL (index) < 1024
8803 && INTVAL (index) > -1024
8804 && (INTVAL (index) & 3) == 0);
8805
8806 if (arm_address_register_rtx_p (index, strict_p)
8807 && (GET_MODE_SIZE (mode) <= 4))
8808 return 1;
8809
8810 if (mode == DImode || mode == DFmode)
8811 {
8812 if (code == CONST_INT)
8813 {
8814 HOST_WIDE_INT val = INTVAL (index);
8815
8816 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8817 If vldr is selected it uses arm_coproc_mem_operand. */
8818 if (TARGET_LDRD)
8819 return val > -256 && val < 256;
8820 else
8821 return val > -4096 && val < 4092;
8822 }
8823
8824 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8825 }
8826
8827 if (GET_MODE_SIZE (mode) <= 4
8828 && ! (arm_arch4
8829 && (mode == HImode
8830 || mode == HFmode
8831 || (mode == QImode && outer == SIGN_EXTEND))))
8832 {
8833 if (code == MULT)
8834 {
8835 rtx xiop0 = XEXP (index, 0);
8836 rtx xiop1 = XEXP (index, 1);
8837
8838 return ((arm_address_register_rtx_p (xiop0, strict_p)
8839 && power_of_two_operand (xiop1, SImode))
8840 || (arm_address_register_rtx_p (xiop1, strict_p)
8841 && power_of_two_operand (xiop0, SImode)));
8842 }
8843 else if (code == LSHIFTRT || code == ASHIFTRT
8844 || code == ASHIFT || code == ROTATERT)
8845 {
8846 rtx op = XEXP (index, 1);
8847
8848 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8849 && CONST_INT_P (op)
8850 && INTVAL (op) > 0
8851 && INTVAL (op) <= 31);
8852 }
8853 }
8854
8855 /* For ARM v4 we may be doing a sign-extend operation during the
8856 load. */
8857 if (arm_arch4)
8858 {
8859 if (mode == HImode
8860 || mode == HFmode
8861 || (outer == SIGN_EXTEND && mode == QImode))
8862 range = 256;
8863 else
8864 range = 4096;
8865 }
8866 else
8867 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8868
8869 return (code == CONST_INT
8870 && INTVAL (index) < range
8871 && INTVAL (index) > -range);
8872 }
8873
8874 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8875 index operand. i.e. 1, 2, 4 or 8. */
8876 static bool
8877 thumb2_index_mul_operand (rtx op)
8878 {
8879 HOST_WIDE_INT val;
8880
8881 if (!CONST_INT_P (op))
8882 return false;
8883
8884 val = INTVAL(op);
8885 return (val == 1 || val == 2 || val == 4 || val == 8);
8886 }
8887
8888 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8889 static int
8890 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8891 {
8892 enum rtx_code code = GET_CODE (index);
8893
8894 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8895 /* Standard coprocessor addressing modes. */
8896 if (TARGET_VFP_BASE
8897 && (mode == SFmode || mode == DFmode))
8898 return (code == CONST_INT && INTVAL (index) < 1024
8899 /* Thumb-2 allows only > -256 index range for it's core register
8900 load/stores. Since we allow SF/DF in core registers, we have
8901 to use the intersection between -256~4096 (core) and -1024~1024
8902 (coprocessor). */
8903 && INTVAL (index) > -256
8904 && (INTVAL (index) & 3) == 0);
8905
8906 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8907 {
8908 /* For DImode assume values will usually live in core regs
8909 and only allow LDRD addressing modes. */
8910 if (!TARGET_LDRD || mode != DImode)
8911 return (code == CONST_INT
8912 && INTVAL (index) < 1024
8913 && INTVAL (index) > -1024
8914 && (INTVAL (index) & 3) == 0);
8915 }
8916
8917 /* For quad modes, we restrict the constant offset to be slightly less
8918 than what the instruction format permits. We do this because for
8919 quad mode moves, we will actually decompose them into two separate
8920 double-mode reads or writes. INDEX must therefore be a valid
8921 (double-mode) offset and so should INDEX+8. */
8922 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8923 return (code == CONST_INT
8924 && INTVAL (index) < 1016
8925 && INTVAL (index) > -1024
8926 && (INTVAL (index) & 3) == 0);
8927
8928 /* We have no such constraint on double mode offsets, so we permit the
8929 full range of the instruction format. */
8930 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8931 return (code == CONST_INT
8932 && INTVAL (index) < 1024
8933 && INTVAL (index) > -1024
8934 && (INTVAL (index) & 3) == 0);
8935
8936 if (arm_address_register_rtx_p (index, strict_p)
8937 && (GET_MODE_SIZE (mode) <= 4))
8938 return 1;
8939
8940 if (mode == DImode || mode == DFmode)
8941 {
8942 if (code == CONST_INT)
8943 {
8944 HOST_WIDE_INT val = INTVAL (index);
8945 /* Thumb-2 ldrd only has reg+const addressing modes.
8946 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8947 If vldr is selected it uses arm_coproc_mem_operand. */
8948 if (TARGET_LDRD)
8949 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8950 else
8951 return IN_RANGE (val, -255, 4095 - 4);
8952 }
8953 else
8954 return 0;
8955 }
8956
8957 if (code == MULT)
8958 {
8959 rtx xiop0 = XEXP (index, 0);
8960 rtx xiop1 = XEXP (index, 1);
8961
8962 return ((arm_address_register_rtx_p (xiop0, strict_p)
8963 && thumb2_index_mul_operand (xiop1))
8964 || (arm_address_register_rtx_p (xiop1, strict_p)
8965 && thumb2_index_mul_operand (xiop0)));
8966 }
8967 else if (code == ASHIFT)
8968 {
8969 rtx op = XEXP (index, 1);
8970
8971 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8972 && CONST_INT_P (op)
8973 && INTVAL (op) > 0
8974 && INTVAL (op) <= 3);
8975 }
8976
8977 return (code == CONST_INT
8978 && INTVAL (index) < 4096
8979 && INTVAL (index) > -256);
8980 }
8981
8982 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8983 static int
8984 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8985 {
8986 int regno;
8987
8988 if (!REG_P (x))
8989 return 0;
8990
8991 regno = REGNO (x);
8992
8993 if (strict_p)
8994 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8995
8996 return (regno <= LAST_LO_REGNUM
8997 || regno > LAST_VIRTUAL_REGISTER
8998 || regno == FRAME_POINTER_REGNUM
8999 || (GET_MODE_SIZE (mode) >= 4
9000 && (regno == STACK_POINTER_REGNUM
9001 || regno >= FIRST_PSEUDO_REGISTER
9002 || x == hard_frame_pointer_rtx
9003 || x == arg_pointer_rtx)));
9004 }
9005
9006 /* Return nonzero if x is a legitimate index register. This is the case
9007 for any base register that can access a QImode object. */
9008 inline static int
9009 thumb1_index_register_rtx_p (rtx x, int strict_p)
9010 {
9011 return thumb1_base_register_rtx_p (x, QImode, strict_p);
9012 }
9013
9014 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
9015
9016 The AP may be eliminated to either the SP or the FP, so we use the
9017 least common denominator, e.g. SImode, and offsets from 0 to 64.
9018
9019 ??? Verify whether the above is the right approach.
9020
9021 ??? Also, the FP may be eliminated to the SP, so perhaps that
9022 needs special handling also.
9023
9024 ??? Look at how the mips16 port solves this problem. It probably uses
9025 better ways to solve some of these problems.
9026
9027 Although it is not incorrect, we don't accept QImode and HImode
9028 addresses based on the frame pointer or arg pointer until the
9029 reload pass starts. This is so that eliminating such addresses
9030 into stack based ones won't produce impossible code. */
9031 int
9032 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
9033 {
9034 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
9035 return 0;
9036
9037 /* ??? Not clear if this is right. Experiment. */
9038 if (GET_MODE_SIZE (mode) < 4
9039 && !(reload_in_progress || reload_completed)
9040 && (reg_mentioned_p (frame_pointer_rtx, x)
9041 || reg_mentioned_p (arg_pointer_rtx, x)
9042 || reg_mentioned_p (virtual_incoming_args_rtx, x)
9043 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
9044 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
9045 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
9046 return 0;
9047
9048 /* Accept any base register. SP only in SImode or larger. */
9049 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
9050 return 1;
9051
9052 /* This is PC relative data before arm_reorg runs. */
9053 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
9054 && SYMBOL_REF_P (x)
9055 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
9056 && !arm_disable_literal_pool)
9057 return 1;
9058
9059 /* This is PC relative data after arm_reorg runs. */
9060 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
9061 && reload_completed
9062 && (LABEL_REF_P (x)
9063 || (GET_CODE (x) == CONST
9064 && GET_CODE (XEXP (x, 0)) == PLUS
9065 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
9066 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
9067 return 1;
9068
9069 /* Post-inc indexing only supported for SImode and larger. */
9070 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
9071 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
9072 return 1;
9073
9074 else if (GET_CODE (x) == PLUS)
9075 {
9076 /* REG+REG address can be any two index registers. */
9077 /* We disallow FRAME+REG addressing since we know that FRAME
9078 will be replaced with STACK, and SP relative addressing only
9079 permits SP+OFFSET. */
9080 if (GET_MODE_SIZE (mode) <= 4
9081 && XEXP (x, 0) != frame_pointer_rtx
9082 && XEXP (x, 1) != frame_pointer_rtx
9083 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9084 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
9085 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
9086 return 1;
9087
9088 /* REG+const has 5-7 bit offset for non-SP registers. */
9089 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9090 || XEXP (x, 0) == arg_pointer_rtx)
9091 && CONST_INT_P (XEXP (x, 1))
9092 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
9093 return 1;
9094
9095 /* REG+const has 10-bit offset for SP, but only SImode and
9096 larger is supported. */
9097 /* ??? Should probably check for DI/DFmode overflow here
9098 just like GO_IF_LEGITIMATE_OFFSET does. */
9099 else if (REG_P (XEXP (x, 0))
9100 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
9101 && GET_MODE_SIZE (mode) >= 4
9102 && CONST_INT_P (XEXP (x, 1))
9103 && INTVAL (XEXP (x, 1)) >= 0
9104 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
9105 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9106 return 1;
9107
9108 else if (REG_P (XEXP (x, 0))
9109 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
9110 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
9111 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
9112 && REGNO (XEXP (x, 0))
9113 <= LAST_VIRTUAL_POINTER_REGISTER))
9114 && GET_MODE_SIZE (mode) >= 4
9115 && CONST_INT_P (XEXP (x, 1))
9116 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9117 return 1;
9118 }
9119
9120 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
9121 && GET_MODE_SIZE (mode) == 4
9122 && SYMBOL_REF_P (x)
9123 && CONSTANT_POOL_ADDRESS_P (x)
9124 && !arm_disable_literal_pool
9125 && ! (flag_pic
9126 && symbol_mentioned_p (get_pool_constant (x))
9127 && ! pcrel_constant_p (get_pool_constant (x))))
9128 return 1;
9129
9130 return 0;
9131 }
9132
9133 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9134 instruction of mode MODE. */
9135 int
9136 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
9137 {
9138 switch (GET_MODE_SIZE (mode))
9139 {
9140 case 1:
9141 return val >= 0 && val < 32;
9142
9143 case 2:
9144 return val >= 0 && val < 64 && (val & 1) == 0;
9145
9146 default:
9147 return (val >= 0
9148 && (val + GET_MODE_SIZE (mode)) <= 128
9149 && (val & 3) == 0);
9150 }
9151 }
9152
9153 bool
9154 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
9155 {
9156 if (TARGET_ARM)
9157 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
9158 else if (TARGET_THUMB2)
9159 return thumb2_legitimate_address_p (mode, x, strict_p);
9160 else /* if (TARGET_THUMB1) */
9161 return thumb1_legitimate_address_p (mode, x, strict_p);
9162 }
9163
9164 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9165
9166 Given an rtx X being reloaded into a reg required to be
9167 in class CLASS, return the class of reg to actually use.
9168 In general this is just CLASS, but for the Thumb core registers and
9169 immediate constants we prefer a LO_REGS class or a subset. */
9170
9171 static reg_class_t
9172 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
9173 {
9174 if (TARGET_32BIT)
9175 return rclass;
9176 else
9177 {
9178 if (rclass == GENERAL_REGS)
9179 return LO_REGS;
9180 else
9181 return rclass;
9182 }
9183 }
9184
9185 /* Build the SYMBOL_REF for __tls_get_addr. */
9186
9187 static GTY(()) rtx tls_get_addr_libfunc;
9188
9189 static rtx
9190 get_tls_get_addr (void)
9191 {
9192 if (!tls_get_addr_libfunc)
9193 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9194 return tls_get_addr_libfunc;
9195 }
9196
9197 rtx
9198 arm_load_tp (rtx target)
9199 {
9200 if (!target)
9201 target = gen_reg_rtx (SImode);
9202
9203 if (TARGET_HARD_TP)
9204 {
9205 /* Can return in any reg. */
9206 emit_insn (gen_load_tp_hard (target));
9207 }
9208 else
9209 {
9210 /* Always returned in r0. Immediately copy the result into a pseudo,
9211 otherwise other uses of r0 (e.g. setting up function arguments) may
9212 clobber the value. */
9213
9214 rtx tmp;
9215
9216 if (TARGET_FDPIC)
9217 {
9218 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9219 rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9220
9221 emit_insn (gen_load_tp_soft_fdpic ());
9222
9223 /* Restore r9. */
9224 emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9225 }
9226 else
9227 emit_insn (gen_load_tp_soft ());
9228
9229 tmp = gen_rtx_REG (SImode, R0_REGNUM);
9230 emit_move_insn (target, tmp);
9231 }
9232 return target;
9233 }
9234
9235 static rtx
9236 load_tls_operand (rtx x, rtx reg)
9237 {
9238 rtx tmp;
9239
9240 if (reg == NULL_RTX)
9241 reg = gen_reg_rtx (SImode);
9242
9243 tmp = gen_rtx_CONST (SImode, x);
9244
9245 emit_move_insn (reg, tmp);
9246
9247 return reg;
9248 }
9249
9250 static rtx_insn *
9251 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9252 {
9253 rtx label, labelno = NULL_RTX, sum;
9254
9255 gcc_assert (reloc != TLS_DESCSEQ);
9256 start_sequence ();
9257
9258 if (TARGET_FDPIC)
9259 {
9260 sum = gen_rtx_UNSPEC (Pmode,
9261 gen_rtvec (2, x, GEN_INT (reloc)),
9262 UNSPEC_TLS);
9263 }
9264 else
9265 {
9266 labelno = GEN_INT (pic_labelno++);
9267 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9268 label = gen_rtx_CONST (VOIDmode, label);
9269
9270 sum = gen_rtx_UNSPEC (Pmode,
9271 gen_rtvec (4, x, GEN_INT (reloc), label,
9272 GEN_INT (TARGET_ARM ? 8 : 4)),
9273 UNSPEC_TLS);
9274 }
9275 reg = load_tls_operand (sum, reg);
9276
9277 if (TARGET_FDPIC)
9278 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9279 else if (TARGET_ARM)
9280 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9281 else
9282 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9283
9284 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9285 LCT_PURE, /* LCT_CONST? */
9286 Pmode, reg, Pmode);
9287
9288 rtx_insn *insns = get_insns ();
9289 end_sequence ();
9290
9291 return insns;
9292 }
9293
9294 static rtx
9295 arm_tls_descseq_addr (rtx x, rtx reg)
9296 {
9297 rtx labelno = GEN_INT (pic_labelno++);
9298 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9299 rtx sum = gen_rtx_UNSPEC (Pmode,
9300 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9301 gen_rtx_CONST (VOIDmode, label),
9302 GEN_INT (!TARGET_ARM)),
9303 UNSPEC_TLS);
9304 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9305
9306 emit_insn (gen_tlscall (x, labelno));
9307 if (!reg)
9308 reg = gen_reg_rtx (SImode);
9309 else
9310 gcc_assert (REGNO (reg) != R0_REGNUM);
9311
9312 emit_move_insn (reg, reg0);
9313
9314 return reg;
9315 }
9316
9317
9318 rtx
9319 legitimize_tls_address (rtx x, rtx reg)
9320 {
9321 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9322 rtx_insn *insns;
9323 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9324
9325 switch (model)
9326 {
9327 case TLS_MODEL_GLOBAL_DYNAMIC:
9328 if (TARGET_GNU2_TLS)
9329 {
9330 gcc_assert (!TARGET_FDPIC);
9331
9332 reg = arm_tls_descseq_addr (x, reg);
9333
9334 tp = arm_load_tp (NULL_RTX);
9335
9336 dest = gen_rtx_PLUS (Pmode, tp, reg);
9337 }
9338 else
9339 {
9340 /* Original scheme */
9341 if (TARGET_FDPIC)
9342 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9343 else
9344 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9345 dest = gen_reg_rtx (Pmode);
9346 emit_libcall_block (insns, dest, ret, x);
9347 }
9348 return dest;
9349
9350 case TLS_MODEL_LOCAL_DYNAMIC:
9351 if (TARGET_GNU2_TLS)
9352 {
9353 gcc_assert (!TARGET_FDPIC);
9354
9355 reg = arm_tls_descseq_addr (x, reg);
9356
9357 tp = arm_load_tp (NULL_RTX);
9358
9359 dest = gen_rtx_PLUS (Pmode, tp, reg);
9360 }
9361 else
9362 {
9363 if (TARGET_FDPIC)
9364 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9365 else
9366 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9367
9368 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9369 share the LDM result with other LD model accesses. */
9370 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9371 UNSPEC_TLS);
9372 dest = gen_reg_rtx (Pmode);
9373 emit_libcall_block (insns, dest, ret, eqv);
9374
9375 /* Load the addend. */
9376 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9377 GEN_INT (TLS_LDO32)),
9378 UNSPEC_TLS);
9379 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9380 dest = gen_rtx_PLUS (Pmode, dest, addend);
9381 }
9382 return dest;
9383
9384 case TLS_MODEL_INITIAL_EXEC:
9385 if (TARGET_FDPIC)
9386 {
9387 sum = gen_rtx_UNSPEC (Pmode,
9388 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9389 UNSPEC_TLS);
9390 reg = load_tls_operand (sum, reg);
9391 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9392 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9393 }
9394 else
9395 {
9396 labelno = GEN_INT (pic_labelno++);
9397 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9398 label = gen_rtx_CONST (VOIDmode, label);
9399 sum = gen_rtx_UNSPEC (Pmode,
9400 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9401 GEN_INT (TARGET_ARM ? 8 : 4)),
9402 UNSPEC_TLS);
9403 reg = load_tls_operand (sum, reg);
9404
9405 if (TARGET_ARM)
9406 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9407 else if (TARGET_THUMB2)
9408 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9409 else
9410 {
9411 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9412 emit_move_insn (reg, gen_const_mem (SImode, reg));
9413 }
9414 }
9415
9416 tp = arm_load_tp (NULL_RTX);
9417
9418 return gen_rtx_PLUS (Pmode, tp, reg);
9419
9420 case TLS_MODEL_LOCAL_EXEC:
9421 tp = arm_load_tp (NULL_RTX);
9422
9423 reg = gen_rtx_UNSPEC (Pmode,
9424 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9425 UNSPEC_TLS);
9426 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9427
9428 return gen_rtx_PLUS (Pmode, tp, reg);
9429
9430 default:
9431 abort ();
9432 }
9433 }
9434
9435 /* Try machine-dependent ways of modifying an illegitimate address
9436 to be legitimate. If we find one, return the new, valid address. */
9437 rtx
9438 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9439 {
9440 if (arm_tls_referenced_p (x))
9441 {
9442 rtx addend = NULL;
9443
9444 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9445 {
9446 addend = XEXP (XEXP (x, 0), 1);
9447 x = XEXP (XEXP (x, 0), 0);
9448 }
9449
9450 if (!SYMBOL_REF_P (x))
9451 return x;
9452
9453 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9454
9455 x = legitimize_tls_address (x, NULL_RTX);
9456
9457 if (addend)
9458 {
9459 x = gen_rtx_PLUS (SImode, x, addend);
9460 orig_x = x;
9461 }
9462 else
9463 return x;
9464 }
9465
9466 if (TARGET_THUMB1)
9467 return thumb_legitimize_address (x, orig_x, mode);
9468
9469 if (GET_CODE (x) == PLUS)
9470 {
9471 rtx xop0 = XEXP (x, 0);
9472 rtx xop1 = XEXP (x, 1);
9473
9474 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9475 xop0 = force_reg (SImode, xop0);
9476
9477 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9478 && !symbol_mentioned_p (xop1))
9479 xop1 = force_reg (SImode, xop1);
9480
9481 if (ARM_BASE_REGISTER_RTX_P (xop0)
9482 && CONST_INT_P (xop1))
9483 {
9484 HOST_WIDE_INT n, low_n;
9485 rtx base_reg, val;
9486 n = INTVAL (xop1);
9487
9488 /* VFP addressing modes actually allow greater offsets, but for
9489 now we just stick with the lowest common denominator. */
9490 if (mode == DImode || mode == DFmode)
9491 {
9492 low_n = n & 0x0f;
9493 n &= ~0x0f;
9494 if (low_n > 4)
9495 {
9496 n += 16;
9497 low_n -= 16;
9498 }
9499 }
9500 else
9501 {
9502 low_n = ((mode) == TImode ? 0
9503 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9504 n -= low_n;
9505 }
9506
9507 base_reg = gen_reg_rtx (SImode);
9508 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9509 emit_move_insn (base_reg, val);
9510 x = plus_constant (Pmode, base_reg, low_n);
9511 }
9512 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9513 x = gen_rtx_PLUS (SImode, xop0, xop1);
9514 }
9515
9516 /* XXX We don't allow MINUS any more -- see comment in
9517 arm_legitimate_address_outer_p (). */
9518 else if (GET_CODE (x) == MINUS)
9519 {
9520 rtx xop0 = XEXP (x, 0);
9521 rtx xop1 = XEXP (x, 1);
9522
9523 if (CONSTANT_P (xop0))
9524 xop0 = force_reg (SImode, xop0);
9525
9526 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9527 xop1 = force_reg (SImode, xop1);
9528
9529 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9530 x = gen_rtx_MINUS (SImode, xop0, xop1);
9531 }
9532
9533 /* Make sure to take full advantage of the pre-indexed addressing mode
9534 with absolute addresses which often allows for the base register to
9535 be factorized for multiple adjacent memory references, and it might
9536 even allows for the mini pool to be avoided entirely. */
9537 else if (CONST_INT_P (x) && optimize > 0)
9538 {
9539 unsigned int bits;
9540 HOST_WIDE_INT mask, base, index;
9541 rtx base_reg;
9542
9543 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9544 only use a 8-bit index. So let's use a 12-bit index for
9545 SImode only and hope that arm_gen_constant will enable LDRB
9546 to use more bits. */
9547 bits = (mode == SImode) ? 12 : 8;
9548 mask = (1 << bits) - 1;
9549 base = INTVAL (x) & ~mask;
9550 index = INTVAL (x) & mask;
9551 if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9552 {
9553 /* It'll most probably be more efficient to generate the
9554 base with more bits set and use a negative index instead.
9555 Don't do this for Thumb as negative offsets are much more
9556 limited. */
9557 base |= mask;
9558 index -= mask;
9559 }
9560 base_reg = force_reg (SImode, GEN_INT (base));
9561 x = plus_constant (Pmode, base_reg, index);
9562 }
9563
9564 if (flag_pic)
9565 {
9566 /* We need to find and carefully transform any SYMBOL and LABEL
9567 references; so go back to the original address expression. */
9568 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9569 false /*compute_now*/);
9570
9571 if (new_x != orig_x)
9572 x = new_x;
9573 }
9574
9575 return x;
9576 }
9577
9578
9579 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9580 to be legitimate. If we find one, return the new, valid address. */
9581 rtx
9582 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9583 {
9584 if (GET_CODE (x) == PLUS
9585 && CONST_INT_P (XEXP (x, 1))
9586 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9587 || INTVAL (XEXP (x, 1)) < 0))
9588 {
9589 rtx xop0 = XEXP (x, 0);
9590 rtx xop1 = XEXP (x, 1);
9591 HOST_WIDE_INT offset = INTVAL (xop1);
9592
9593 /* Try and fold the offset into a biasing of the base register and
9594 then offsetting that. Don't do this when optimizing for space
9595 since it can cause too many CSEs. */
9596 if (optimize_size && offset >= 0
9597 && offset < 256 + 31 * GET_MODE_SIZE (mode))
9598 {
9599 HOST_WIDE_INT delta;
9600
9601 if (offset >= 256)
9602 delta = offset - (256 - GET_MODE_SIZE (mode));
9603 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9604 delta = 31 * GET_MODE_SIZE (mode);
9605 else
9606 delta = offset & (~31 * GET_MODE_SIZE (mode));
9607
9608 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9609 NULL_RTX);
9610 x = plus_constant (Pmode, xop0, delta);
9611 }
9612 else if (offset < 0 && offset > -256)
9613 /* Small negative offsets are best done with a subtract before the
9614 dereference, forcing these into a register normally takes two
9615 instructions. */
9616 x = force_operand (x, NULL_RTX);
9617 else
9618 {
9619 /* For the remaining cases, force the constant into a register. */
9620 xop1 = force_reg (SImode, xop1);
9621 x = gen_rtx_PLUS (SImode, xop0, xop1);
9622 }
9623 }
9624 else if (GET_CODE (x) == PLUS
9625 && s_register_operand (XEXP (x, 1), SImode)
9626 && !s_register_operand (XEXP (x, 0), SImode))
9627 {
9628 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9629
9630 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9631 }
9632
9633 if (flag_pic)
9634 {
9635 /* We need to find and carefully transform any SYMBOL and LABEL
9636 references; so go back to the original address expression. */
9637 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9638 false /*compute_now*/);
9639
9640 if (new_x != orig_x)
9641 x = new_x;
9642 }
9643
9644 return x;
9645 }
9646
9647 /* Return TRUE if X contains any TLS symbol references. */
9648
9649 bool
9650 arm_tls_referenced_p (rtx x)
9651 {
9652 if (! TARGET_HAVE_TLS)
9653 return false;
9654
9655 subrtx_iterator::array_type array;
9656 FOR_EACH_SUBRTX (iter, array, x, ALL)
9657 {
9658 const_rtx x = *iter;
9659 if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9660 {
9661 /* ARM currently does not provide relocations to encode TLS variables
9662 into AArch32 instructions, only data, so there is no way to
9663 currently implement these if a literal pool is disabled. */
9664 if (arm_disable_literal_pool)
9665 sorry ("accessing thread-local storage is not currently supported "
9666 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9667
9668 return true;
9669 }
9670
9671 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9672 TLS offsets, not real symbol references. */
9673 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9674 iter.skip_subrtxes ();
9675 }
9676 return false;
9677 }
9678
9679 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9680
9681 On the ARM, allow any integer (invalid ones are removed later by insn
9682 patterns), nice doubles and symbol_refs which refer to the function's
9683 constant pool XXX.
9684
9685 When generating pic allow anything. */
9686
9687 static bool
9688 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9689 {
9690 if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9691 return false;
9692
9693 return flag_pic || !label_mentioned_p (x);
9694 }
9695
9696 static bool
9697 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9698 {
9699 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9700 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9701 for ARMv8-M Baseline or later the result is valid. */
9702 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9703 x = XEXP (x, 0);
9704
9705 return (CONST_INT_P (x)
9706 || CONST_DOUBLE_P (x)
9707 || CONSTANT_ADDRESS_P (x)
9708 || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9709 /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9710 we build the symbol address with upper/lower
9711 relocations. */
9712 || (TARGET_THUMB1
9713 && !label_mentioned_p (x)
9714 && arm_valid_symbolic_address_p (x)
9715 && arm_disable_literal_pool)
9716 || flag_pic);
9717 }
9718
9719 static bool
9720 arm_legitimate_constant_p (machine_mode mode, rtx x)
9721 {
9722 return (!arm_cannot_force_const_mem (mode, x)
9723 && (TARGET_32BIT
9724 ? arm_legitimate_constant_p_1 (mode, x)
9725 : thumb_legitimate_constant_p (mode, x)));
9726 }
9727
9728 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9729
9730 static bool
9731 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9732 {
9733 rtx base, offset;
9734 split_const (x, &base, &offset);
9735
9736 if (SYMBOL_REF_P (base))
9737 {
9738 /* Function symbols cannot have an offset due to the Thumb bit. */
9739 if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9740 && INTVAL (offset) != 0)
9741 return true;
9742
9743 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9744 && !offset_within_block_p (base, INTVAL (offset)))
9745 return true;
9746 }
9747 return arm_tls_referenced_p (x);
9748 }
9749 \f
9750 #define REG_OR_SUBREG_REG(X) \
9751 (REG_P (X) \
9752 || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9753
9754 #define REG_OR_SUBREG_RTX(X) \
9755 (REG_P (X) ? (X) : SUBREG_REG (X))
9756
9757 static inline int
9758 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9759 {
9760 machine_mode mode = GET_MODE (x);
9761 int total, words;
9762
9763 switch (code)
9764 {
9765 case ASHIFT:
9766 case ASHIFTRT:
9767 case LSHIFTRT:
9768 case ROTATERT:
9769 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9770
9771 case PLUS:
9772 case MINUS:
9773 case COMPARE:
9774 case NEG:
9775 case NOT:
9776 return COSTS_N_INSNS (1);
9777
9778 case MULT:
9779 if (arm_arch6m && arm_m_profile_small_mul)
9780 return COSTS_N_INSNS (32);
9781
9782 if (CONST_INT_P (XEXP (x, 1)))
9783 {
9784 int cycles = 0;
9785 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9786
9787 while (i)
9788 {
9789 i >>= 2;
9790 cycles++;
9791 }
9792 return COSTS_N_INSNS (2) + cycles;
9793 }
9794 return COSTS_N_INSNS (1) + 16;
9795
9796 case SET:
9797 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9798 the mode. */
9799 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9800 return (COSTS_N_INSNS (words)
9801 + 4 * ((MEM_P (SET_SRC (x)))
9802 + MEM_P (SET_DEST (x))));
9803
9804 case CONST_INT:
9805 if (outer == SET)
9806 {
9807 if (UINTVAL (x) < 256
9808 /* 16-bit constant. */
9809 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9810 return 0;
9811 if (thumb_shiftable_const (INTVAL (x)))
9812 return COSTS_N_INSNS (2);
9813 return arm_disable_literal_pool
9814 ? COSTS_N_INSNS (8)
9815 : COSTS_N_INSNS (3);
9816 }
9817 else if ((outer == PLUS || outer == COMPARE)
9818 && INTVAL (x) < 256 && INTVAL (x) > -256)
9819 return 0;
9820 else if ((outer == IOR || outer == XOR || outer == AND)
9821 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9822 return COSTS_N_INSNS (1);
9823 else if (outer == AND)
9824 {
9825 int i;
9826 /* This duplicates the tests in the andsi3 expander. */
9827 for (i = 9; i <= 31; i++)
9828 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9829 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9830 return COSTS_N_INSNS (2);
9831 }
9832 else if (outer == ASHIFT || outer == ASHIFTRT
9833 || outer == LSHIFTRT)
9834 return 0;
9835 return COSTS_N_INSNS (2);
9836
9837 case CONST:
9838 case CONST_DOUBLE:
9839 case LABEL_REF:
9840 case SYMBOL_REF:
9841 return COSTS_N_INSNS (3);
9842
9843 case UDIV:
9844 case UMOD:
9845 case DIV:
9846 case MOD:
9847 return 100;
9848
9849 case TRUNCATE:
9850 return 99;
9851
9852 case AND:
9853 case XOR:
9854 case IOR:
9855 /* XXX guess. */
9856 return 8;
9857
9858 case MEM:
9859 /* XXX another guess. */
9860 /* Memory costs quite a lot for the first word, but subsequent words
9861 load at the equivalent of a single insn each. */
9862 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9863 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9864 ? 4 : 0));
9865
9866 case IF_THEN_ELSE:
9867 /* XXX a guess. */
9868 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9869 return 14;
9870 return 2;
9871
9872 case SIGN_EXTEND:
9873 case ZERO_EXTEND:
9874 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9875 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9876
9877 if (mode == SImode)
9878 return total;
9879
9880 if (arm_arch6)
9881 return total + COSTS_N_INSNS (1);
9882
9883 /* Assume a two-shift sequence. Increase the cost slightly so
9884 we prefer actual shifts over an extend operation. */
9885 return total + 1 + COSTS_N_INSNS (2);
9886
9887 default:
9888 return 99;
9889 }
9890 }
9891
9892 /* Estimates the size cost of thumb1 instructions.
9893 For now most of the code is copied from thumb1_rtx_costs. We need more
9894 fine grain tuning when we have more related test cases. */
9895 static inline int
9896 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9897 {
9898 machine_mode mode = GET_MODE (x);
9899 int words, cost;
9900
9901 switch (code)
9902 {
9903 case ASHIFT:
9904 case ASHIFTRT:
9905 case LSHIFTRT:
9906 case ROTATERT:
9907 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9908
9909 case PLUS:
9910 case MINUS:
9911 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9912 defined by RTL expansion, especially for the expansion of
9913 multiplication. */
9914 if ((GET_CODE (XEXP (x, 0)) == MULT
9915 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9916 || (GET_CODE (XEXP (x, 1)) == MULT
9917 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9918 return COSTS_N_INSNS (2);
9919 /* Fall through. */
9920 case COMPARE:
9921 case NEG:
9922 case NOT:
9923 return COSTS_N_INSNS (1);
9924
9925 case MULT:
9926 if (CONST_INT_P (XEXP (x, 1)))
9927 {
9928 /* Thumb1 mul instruction can't operate on const. We must Load it
9929 into a register first. */
9930 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9931 /* For the targets which have a very small and high-latency multiply
9932 unit, we prefer to synthesize the mult with up to 5 instructions,
9933 giving a good balance between size and performance. */
9934 if (arm_arch6m && arm_m_profile_small_mul)
9935 return COSTS_N_INSNS (5);
9936 else
9937 return COSTS_N_INSNS (1) + const_size;
9938 }
9939 return COSTS_N_INSNS (1);
9940
9941 case SET:
9942 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9943 the mode. */
9944 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9945 cost = COSTS_N_INSNS (words);
9946 if (satisfies_constraint_J (SET_SRC (x))
9947 || satisfies_constraint_K (SET_SRC (x))
9948 /* Too big an immediate for a 2-byte mov, using MOVT. */
9949 || (CONST_INT_P (SET_SRC (x))
9950 && UINTVAL (SET_SRC (x)) >= 256
9951 && TARGET_HAVE_MOVT
9952 && satisfies_constraint_j (SET_SRC (x)))
9953 /* thumb1_movdi_insn. */
9954 || ((words > 1) && MEM_P (SET_SRC (x))))
9955 cost += COSTS_N_INSNS (1);
9956 return cost;
9957
9958 case CONST_INT:
9959 if (outer == SET)
9960 {
9961 if (UINTVAL (x) < 256)
9962 return COSTS_N_INSNS (1);
9963 /* movw is 4byte long. */
9964 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9965 return COSTS_N_INSNS (2);
9966 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9967 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9968 return COSTS_N_INSNS (2);
9969 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9970 if (thumb_shiftable_const (INTVAL (x)))
9971 return COSTS_N_INSNS (2);
9972 return arm_disable_literal_pool
9973 ? COSTS_N_INSNS (8)
9974 : COSTS_N_INSNS (3);
9975 }
9976 else if ((outer == PLUS || outer == COMPARE)
9977 && INTVAL (x) < 256 && INTVAL (x) > -256)
9978 return 0;
9979 else if ((outer == IOR || outer == XOR || outer == AND)
9980 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9981 return COSTS_N_INSNS (1);
9982 else if (outer == AND)
9983 {
9984 int i;
9985 /* This duplicates the tests in the andsi3 expander. */
9986 for (i = 9; i <= 31; i++)
9987 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9988 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9989 return COSTS_N_INSNS (2);
9990 }
9991 else if (outer == ASHIFT || outer == ASHIFTRT
9992 || outer == LSHIFTRT)
9993 return 0;
9994 return COSTS_N_INSNS (2);
9995
9996 case CONST:
9997 case CONST_DOUBLE:
9998 case LABEL_REF:
9999 case SYMBOL_REF:
10000 return COSTS_N_INSNS (3);
10001
10002 case UDIV:
10003 case UMOD:
10004 case DIV:
10005 case MOD:
10006 return 100;
10007
10008 case TRUNCATE:
10009 return 99;
10010
10011 case AND:
10012 case XOR:
10013 case IOR:
10014 return COSTS_N_INSNS (1);
10015
10016 case MEM:
10017 return (COSTS_N_INSNS (1)
10018 + COSTS_N_INSNS (1)
10019 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
10020 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
10021 ? COSTS_N_INSNS (1) : 0));
10022
10023 case IF_THEN_ELSE:
10024 /* XXX a guess. */
10025 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10026 return 14;
10027 return 2;
10028
10029 case ZERO_EXTEND:
10030 /* XXX still guessing. */
10031 switch (GET_MODE (XEXP (x, 0)))
10032 {
10033 case E_QImode:
10034 return (1 + (mode == DImode ? 4 : 0)
10035 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10036
10037 case E_HImode:
10038 return (4 + (mode == DImode ? 4 : 0)
10039 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10040
10041 case E_SImode:
10042 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10043
10044 default:
10045 return 99;
10046 }
10047
10048 default:
10049 return 99;
10050 }
10051 }
10052
10053 /* Helper function for arm_rtx_costs. If one operand of the OP, a
10054 PLUS, adds the carry flag, then return the other operand. If
10055 neither is a carry, return OP unchanged. */
10056 static rtx
10057 strip_carry_operation (rtx op)
10058 {
10059 gcc_assert (GET_CODE (op) == PLUS);
10060 if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
10061 return XEXP (op, 1);
10062 else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
10063 return XEXP (op, 0);
10064 return op;
10065 }
10066
10067 /* Helper function for arm_rtx_costs. If the operand is a valid shift
10068 operand, then return the operand that is being shifted. If the shift
10069 is not by a constant, then set SHIFT_REG to point to the operand.
10070 Return NULL if OP is not a shifter operand. */
10071 static rtx
10072 shifter_op_p (rtx op, rtx *shift_reg)
10073 {
10074 enum rtx_code code = GET_CODE (op);
10075
10076 if (code == MULT && CONST_INT_P (XEXP (op, 1))
10077 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
10078 return XEXP (op, 0);
10079 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
10080 return XEXP (op, 0);
10081 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
10082 || code == ASHIFTRT)
10083 {
10084 if (!CONST_INT_P (XEXP (op, 1)))
10085 *shift_reg = XEXP (op, 1);
10086 return XEXP (op, 0);
10087 }
10088
10089 return NULL;
10090 }
10091
10092 static bool
10093 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
10094 {
10095 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
10096 rtx_code code = GET_CODE (x);
10097 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
10098
10099 switch (XINT (x, 1))
10100 {
10101 case UNSPEC_UNALIGNED_LOAD:
10102 /* We can only do unaligned loads into the integer unit, and we can't
10103 use LDM or LDRD. */
10104 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10105 if (speed_p)
10106 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
10107 + extra_cost->ldst.load_unaligned);
10108
10109 #ifdef NOT_YET
10110 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10111 ADDR_SPACE_GENERIC, speed_p);
10112 #endif
10113 return true;
10114
10115 case UNSPEC_UNALIGNED_STORE:
10116 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10117 if (speed_p)
10118 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
10119 + extra_cost->ldst.store_unaligned);
10120
10121 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
10122 #ifdef NOT_YET
10123 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10124 ADDR_SPACE_GENERIC, speed_p);
10125 #endif
10126 return true;
10127
10128 case UNSPEC_VRINTZ:
10129 case UNSPEC_VRINTP:
10130 case UNSPEC_VRINTM:
10131 case UNSPEC_VRINTR:
10132 case UNSPEC_VRINTX:
10133 case UNSPEC_VRINTA:
10134 if (speed_p)
10135 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
10136
10137 return true;
10138 default:
10139 *cost = COSTS_N_INSNS (2);
10140 break;
10141 }
10142 return true;
10143 }
10144
10145 /* Cost of a libcall. We assume one insn per argument, an amount for the
10146 call (one insn for -Os) and then one for processing the result. */
10147 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10148
10149 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
10150 do \
10151 { \
10152 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
10153 if (shift_op != NULL \
10154 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
10155 { \
10156 if (shift_reg) \
10157 { \
10158 if (speed_p) \
10159 *cost += extra_cost->alu.arith_shift_reg; \
10160 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10161 ASHIFT, 1, speed_p); \
10162 } \
10163 else if (speed_p) \
10164 *cost += extra_cost->alu.arith_shift; \
10165 \
10166 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
10167 ASHIFT, 0, speed_p) \
10168 + rtx_cost (XEXP (x, 1 - IDX), \
10169 GET_MODE (shift_op), \
10170 OP, 1, speed_p)); \
10171 return true; \
10172 } \
10173 } \
10174 while (0)
10175
10176 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
10177 considering the costs of the addressing mode and memory access
10178 separately. */
10179 static bool
10180 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
10181 int *cost, bool speed_p)
10182 {
10183 machine_mode mode = GET_MODE (x);
10184
10185 *cost = COSTS_N_INSNS (1);
10186
10187 if (flag_pic
10188 && GET_CODE (XEXP (x, 0)) == PLUS
10189 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
10190 /* This will be split into two instructions. Add the cost of the
10191 additional instruction here. The cost of the memory access is computed
10192 below. See arm.md:calculate_pic_address. */
10193 *cost += COSTS_N_INSNS (1);
10194
10195 /* Calculate cost of the addressing mode. */
10196 if (speed_p)
10197 {
10198 arm_addr_mode_op op_type;
10199 switch (GET_CODE (XEXP (x, 0)))
10200 {
10201 default:
10202 case REG:
10203 op_type = AMO_DEFAULT;
10204 break;
10205 case MINUS:
10206 /* MINUS does not appear in RTL, but the architecture supports it,
10207 so handle this case defensively. */
10208 /* fall through */
10209 case PLUS:
10210 op_type = AMO_NO_WB;
10211 break;
10212 case PRE_INC:
10213 case PRE_DEC:
10214 case POST_INC:
10215 case POST_DEC:
10216 case PRE_MODIFY:
10217 case POST_MODIFY:
10218 op_type = AMO_WB;
10219 break;
10220 }
10221
10222 if (VECTOR_MODE_P (mode))
10223 *cost += current_tune->addr_mode_costs->vector[op_type];
10224 else if (FLOAT_MODE_P (mode))
10225 *cost += current_tune->addr_mode_costs->fp[op_type];
10226 else
10227 *cost += current_tune->addr_mode_costs->integer[op_type];
10228 }
10229
10230 /* Calculate cost of memory access. */
10231 if (speed_p)
10232 {
10233 if (FLOAT_MODE_P (mode))
10234 {
10235 if (GET_MODE_SIZE (mode) == 8)
10236 *cost += extra_cost->ldst.loadd;
10237 else
10238 *cost += extra_cost->ldst.loadf;
10239 }
10240 else if (VECTOR_MODE_P (mode))
10241 *cost += extra_cost->ldst.loadv;
10242 else
10243 {
10244 /* Integer modes */
10245 if (GET_MODE_SIZE (mode) == 8)
10246 *cost += extra_cost->ldst.ldrd;
10247 else
10248 *cost += extra_cost->ldst.load;
10249 }
10250 }
10251
10252 return true;
10253 }
10254
10255 /* Helper for arm_bfi_p. */
10256 static bool
10257 arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
10258 {
10259 unsigned HOST_WIDE_INT const1;
10260 unsigned HOST_WIDE_INT const2 = 0;
10261
10262 if (!CONST_INT_P (XEXP (op0, 1)))
10263 return false;
10264
10265 const1 = UINTVAL (XEXP (op0, 1));
10266 if (!CONST_INT_P (XEXP (op1, 1))
10267 || ~UINTVAL (XEXP (op1, 1)) != const1)
10268 return false;
10269
10270 if (GET_CODE (XEXP (op0, 0)) == ASHIFT
10271 && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
10272 {
10273 const2 = UINTVAL (XEXP (XEXP (op0, 0), 1));
10274 *sub0 = XEXP (XEXP (op0, 0), 0);
10275 }
10276 else
10277 *sub0 = XEXP (op0, 0);
10278
10279 if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
10280 return false;
10281
10282 *sub1 = XEXP (op1, 0);
10283 return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
10284 }
10285
10286 /* Recognize a BFI idiom. Helper for arm_rtx_costs_internal. The
10287 format looks something like:
10288
10289 (IOR (AND (reg1) (~const1))
10290 (AND (ASHIFT (reg2) (const2))
10291 (const1)))
10292
10293 where const1 is a consecutive sequence of 1-bits with the
10294 least-significant non-zero bit starting at bit position const2. If
10295 const2 is zero, then the shift will not appear at all, due to
10296 canonicalization. The two arms of the IOR expression may be
10297 flipped. */
10298 static bool
10299 arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
10300 {
10301 if (GET_CODE (x) != IOR)
10302 return false;
10303 if (GET_CODE (XEXP (x, 0)) != AND
10304 || GET_CODE (XEXP (x, 1)) != AND)
10305 return false;
10306 return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
10307 || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
10308 }
10309
10310 /* RTX costs. Make an estimate of the cost of executing the operation
10311 X, which is contained within an operation with code OUTER_CODE.
10312 SPEED_P indicates whether the cost desired is the performance cost,
10313 or the size cost. The estimate is stored in COST and the return
10314 value is TRUE if the cost calculation is final, or FALSE if the
10315 caller should recurse through the operands of X to add additional
10316 costs.
10317
10318 We currently make no attempt to model the size savings of Thumb-2
10319 16-bit instructions. At the normal points in compilation where
10320 this code is called we have no measure of whether the condition
10321 flags are live or not, and thus no realistic way to determine what
10322 the size will eventually be. */
10323 static bool
10324 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10325 const struct cpu_cost_table *extra_cost,
10326 int *cost, bool speed_p)
10327 {
10328 machine_mode mode = GET_MODE (x);
10329
10330 *cost = COSTS_N_INSNS (1);
10331
10332 if (TARGET_THUMB1)
10333 {
10334 if (speed_p)
10335 *cost = thumb1_rtx_costs (x, code, outer_code);
10336 else
10337 *cost = thumb1_size_rtx_costs (x, code, outer_code);
10338 return true;
10339 }
10340
10341 switch (code)
10342 {
10343 case SET:
10344 *cost = 0;
10345 /* SET RTXs don't have a mode so we get it from the destination. */
10346 mode = GET_MODE (SET_DEST (x));
10347
10348 if (REG_P (SET_SRC (x))
10349 && REG_P (SET_DEST (x)))
10350 {
10351 /* Assume that most copies can be done with a single insn,
10352 unless we don't have HW FP, in which case everything
10353 larger than word mode will require two insns. */
10354 *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10355 && GET_MODE_SIZE (mode) > 4)
10356 || mode == DImode)
10357 ? 2 : 1);
10358 /* Conditional register moves can be encoded
10359 in 16 bits in Thumb mode. */
10360 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10361 *cost >>= 1;
10362
10363 return true;
10364 }
10365
10366 if (CONST_INT_P (SET_SRC (x)))
10367 {
10368 /* Handle CONST_INT here, since the value doesn't have a mode
10369 and we would otherwise be unable to work out the true cost. */
10370 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10371 0, speed_p);
10372 outer_code = SET;
10373 /* Slightly lower the cost of setting a core reg to a constant.
10374 This helps break up chains and allows for better scheduling. */
10375 if (REG_P (SET_DEST (x))
10376 && REGNO (SET_DEST (x)) <= LR_REGNUM)
10377 *cost -= 1;
10378 x = SET_SRC (x);
10379 /* Immediate moves with an immediate in the range [0, 255] can be
10380 encoded in 16 bits in Thumb mode. */
10381 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10382 && INTVAL (x) >= 0 && INTVAL (x) <=255)
10383 *cost >>= 1;
10384 goto const_int_cost;
10385 }
10386
10387 return false;
10388
10389 case MEM:
10390 return arm_mem_costs (x, extra_cost, cost, speed_p);
10391
10392 case PARALLEL:
10393 {
10394 /* Calculations of LDM costs are complex. We assume an initial cost
10395 (ldm_1st) which will load the number of registers mentioned in
10396 ldm_regs_per_insn_1st registers; then each additional
10397 ldm_regs_per_insn_subsequent registers cost one more insn. The
10398 formula for N regs is thus:
10399
10400 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10401 + ldm_regs_per_insn_subsequent - 1)
10402 / ldm_regs_per_insn_subsequent).
10403
10404 Additional costs may also be added for addressing. A similar
10405 formula is used for STM. */
10406
10407 bool is_ldm = load_multiple_operation (x, SImode);
10408 bool is_stm = store_multiple_operation (x, SImode);
10409
10410 if (is_ldm || is_stm)
10411 {
10412 if (speed_p)
10413 {
10414 HOST_WIDE_INT nregs = XVECLEN (x, 0);
10415 HOST_WIDE_INT regs_per_insn_1st = is_ldm
10416 ? extra_cost->ldst.ldm_regs_per_insn_1st
10417 : extra_cost->ldst.stm_regs_per_insn_1st;
10418 HOST_WIDE_INT regs_per_insn_sub = is_ldm
10419 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10420 : extra_cost->ldst.stm_regs_per_insn_subsequent;
10421
10422 *cost += regs_per_insn_1st
10423 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10424 + regs_per_insn_sub - 1)
10425 / regs_per_insn_sub);
10426 return true;
10427 }
10428
10429 }
10430 return false;
10431 }
10432 case DIV:
10433 case UDIV:
10434 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10435 && (mode == SFmode || !TARGET_VFP_SINGLE))
10436 *cost += COSTS_N_INSNS (speed_p
10437 ? extra_cost->fp[mode != SFmode].div : 0);
10438 else if (mode == SImode && TARGET_IDIV)
10439 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10440 else
10441 *cost = LIBCALL_COST (2);
10442
10443 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10444 possible udiv is prefered. */
10445 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10446 return false; /* All arguments must be in registers. */
10447
10448 case MOD:
10449 /* MOD by a power of 2 can be expanded as:
10450 rsbs r1, r0, #0
10451 and r0, r0, #(n - 1)
10452 and r1, r1, #(n - 1)
10453 rsbpl r0, r1, #0. */
10454 if (CONST_INT_P (XEXP (x, 1))
10455 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10456 && mode == SImode)
10457 {
10458 *cost += COSTS_N_INSNS (3);
10459
10460 if (speed_p)
10461 *cost += 2 * extra_cost->alu.logical
10462 + extra_cost->alu.arith;
10463 return true;
10464 }
10465
10466 /* Fall-through. */
10467 case UMOD:
10468 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10469 possible udiv is prefered. */
10470 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10471 return false; /* All arguments must be in registers. */
10472
10473 case ROTATE:
10474 if (mode == SImode && REG_P (XEXP (x, 1)))
10475 {
10476 *cost += (COSTS_N_INSNS (1)
10477 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10478 if (speed_p)
10479 *cost += extra_cost->alu.shift_reg;
10480 return true;
10481 }
10482 /* Fall through */
10483 case ROTATERT:
10484 case ASHIFT:
10485 case LSHIFTRT:
10486 case ASHIFTRT:
10487 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10488 {
10489 *cost += (COSTS_N_INSNS (2)
10490 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10491 if (speed_p)
10492 *cost += 2 * extra_cost->alu.shift;
10493 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10494 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10495 *cost += 1;
10496 return true;
10497 }
10498 else if (mode == SImode)
10499 {
10500 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10501 /* Slightly disparage register shifts at -Os, but not by much. */
10502 if (!CONST_INT_P (XEXP (x, 1)))
10503 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10504 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10505 return true;
10506 }
10507 else if (GET_MODE_CLASS (mode) == MODE_INT
10508 && GET_MODE_SIZE (mode) < 4)
10509 {
10510 if (code == ASHIFT)
10511 {
10512 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10513 /* Slightly disparage register shifts at -Os, but not by
10514 much. */
10515 if (!CONST_INT_P (XEXP (x, 1)))
10516 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10517 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10518 }
10519 else if (code == LSHIFTRT || code == ASHIFTRT)
10520 {
10521 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10522 {
10523 /* Can use SBFX/UBFX. */
10524 if (speed_p)
10525 *cost += extra_cost->alu.bfx;
10526 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10527 }
10528 else
10529 {
10530 *cost += COSTS_N_INSNS (1);
10531 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10532 if (speed_p)
10533 {
10534 if (CONST_INT_P (XEXP (x, 1)))
10535 *cost += 2 * extra_cost->alu.shift;
10536 else
10537 *cost += (extra_cost->alu.shift
10538 + extra_cost->alu.shift_reg);
10539 }
10540 else
10541 /* Slightly disparage register shifts. */
10542 *cost += !CONST_INT_P (XEXP (x, 1));
10543 }
10544 }
10545 else /* Rotates. */
10546 {
10547 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10548 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10549 if (speed_p)
10550 {
10551 if (CONST_INT_P (XEXP (x, 1)))
10552 *cost += (2 * extra_cost->alu.shift
10553 + extra_cost->alu.log_shift);
10554 else
10555 *cost += (extra_cost->alu.shift
10556 + extra_cost->alu.shift_reg
10557 + extra_cost->alu.log_shift_reg);
10558 }
10559 }
10560 return true;
10561 }
10562
10563 *cost = LIBCALL_COST (2);
10564 return false;
10565
10566 case BSWAP:
10567 if (arm_arch6)
10568 {
10569 if (mode == SImode)
10570 {
10571 if (speed_p)
10572 *cost += extra_cost->alu.rev;
10573
10574 return false;
10575 }
10576 }
10577 else
10578 {
10579 /* No rev instruction available. Look at arm_legacy_rev
10580 and thumb_legacy_rev for the form of RTL used then. */
10581 if (TARGET_THUMB)
10582 {
10583 *cost += COSTS_N_INSNS (9);
10584
10585 if (speed_p)
10586 {
10587 *cost += 6 * extra_cost->alu.shift;
10588 *cost += 3 * extra_cost->alu.logical;
10589 }
10590 }
10591 else
10592 {
10593 *cost += COSTS_N_INSNS (4);
10594
10595 if (speed_p)
10596 {
10597 *cost += 2 * extra_cost->alu.shift;
10598 *cost += extra_cost->alu.arith_shift;
10599 *cost += 2 * extra_cost->alu.logical;
10600 }
10601 }
10602 return true;
10603 }
10604 return false;
10605
10606 case MINUS:
10607 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10608 && (mode == SFmode || !TARGET_VFP_SINGLE))
10609 {
10610 if (GET_CODE (XEXP (x, 0)) == MULT
10611 || GET_CODE (XEXP (x, 1)) == MULT)
10612 {
10613 rtx mul_op0, mul_op1, sub_op;
10614
10615 if (speed_p)
10616 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10617
10618 if (GET_CODE (XEXP (x, 0)) == MULT)
10619 {
10620 mul_op0 = XEXP (XEXP (x, 0), 0);
10621 mul_op1 = XEXP (XEXP (x, 0), 1);
10622 sub_op = XEXP (x, 1);
10623 }
10624 else
10625 {
10626 mul_op0 = XEXP (XEXP (x, 1), 0);
10627 mul_op1 = XEXP (XEXP (x, 1), 1);
10628 sub_op = XEXP (x, 0);
10629 }
10630
10631 /* The first operand of the multiply may be optionally
10632 negated. */
10633 if (GET_CODE (mul_op0) == NEG)
10634 mul_op0 = XEXP (mul_op0, 0);
10635
10636 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10637 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10638 + rtx_cost (sub_op, mode, code, 0, speed_p));
10639
10640 return true;
10641 }
10642
10643 if (speed_p)
10644 *cost += extra_cost->fp[mode != SFmode].addsub;
10645 return false;
10646 }
10647
10648 if (mode == SImode)
10649 {
10650 rtx shift_by_reg = NULL;
10651 rtx shift_op;
10652 rtx non_shift_op;
10653 rtx op0 = XEXP (x, 0);
10654 rtx op1 = XEXP (x, 1);
10655
10656 /* Factor out any borrow operation. There's more than one way
10657 of expressing this; try to recognize them all. */
10658 if (GET_CODE (op0) == MINUS)
10659 {
10660 if (arm_borrow_operation (op1, SImode))
10661 {
10662 op1 = XEXP (op0, 1);
10663 op0 = XEXP (op0, 0);
10664 }
10665 else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10666 op0 = XEXP (op0, 0);
10667 }
10668 else if (GET_CODE (op1) == PLUS
10669 && arm_borrow_operation (XEXP (op1, 0), SImode))
10670 op1 = XEXP (op1, 0);
10671 else if (GET_CODE (op0) == NEG
10672 && arm_borrow_operation (op1, SImode))
10673 {
10674 /* Negate with carry-in. For Thumb2 this is done with
10675 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10676 RSC instruction that exists in Arm mode. */
10677 if (speed_p)
10678 *cost += (TARGET_THUMB2
10679 ? extra_cost->alu.arith_shift
10680 : extra_cost->alu.arith);
10681 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10682 return true;
10683 }
10684 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10685 Note we do mean ~borrow here. */
10686 else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10687 {
10688 *cost += rtx_cost (op1, mode, code, 1, speed_p);
10689 return true;
10690 }
10691
10692 shift_op = shifter_op_p (op0, &shift_by_reg);
10693 if (shift_op == NULL)
10694 {
10695 shift_op = shifter_op_p (op1, &shift_by_reg);
10696 non_shift_op = op0;
10697 }
10698 else
10699 non_shift_op = op1;
10700
10701 if (shift_op != NULL)
10702 {
10703 if (shift_by_reg != NULL)
10704 {
10705 if (speed_p)
10706 *cost += extra_cost->alu.arith_shift_reg;
10707 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10708 }
10709 else if (speed_p)
10710 *cost += extra_cost->alu.arith_shift;
10711
10712 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10713 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10714 return true;
10715 }
10716
10717 if (arm_arch_thumb2
10718 && GET_CODE (XEXP (x, 1)) == MULT)
10719 {
10720 /* MLS. */
10721 if (speed_p)
10722 *cost += extra_cost->mult[0].add;
10723 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10724 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10725 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10726 return true;
10727 }
10728
10729 if (CONST_INT_P (op0))
10730 {
10731 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10732 INTVAL (op0), NULL_RTX,
10733 NULL_RTX, 1, 0);
10734 *cost = COSTS_N_INSNS (insns);
10735 if (speed_p)
10736 *cost += insns * extra_cost->alu.arith;
10737 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10738 return true;
10739 }
10740 else if (speed_p)
10741 *cost += extra_cost->alu.arith;
10742
10743 /* Don't recurse as we don't want to cost any borrow that
10744 we've stripped. */
10745 *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10746 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10747 return true;
10748 }
10749
10750 if (GET_MODE_CLASS (mode) == MODE_INT
10751 && GET_MODE_SIZE (mode) < 4)
10752 {
10753 rtx shift_op, shift_reg;
10754 shift_reg = NULL;
10755
10756 /* We check both sides of the MINUS for shifter operands since,
10757 unlike PLUS, it's not commutative. */
10758
10759 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10760 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10761
10762 /* Slightly disparage, as we might need to widen the result. */
10763 *cost += 1;
10764 if (speed_p)
10765 *cost += extra_cost->alu.arith;
10766
10767 if (CONST_INT_P (XEXP (x, 0)))
10768 {
10769 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10770 return true;
10771 }
10772
10773 return false;
10774 }
10775
10776 if (mode == DImode)
10777 {
10778 *cost += COSTS_N_INSNS (1);
10779
10780 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10781 {
10782 rtx op1 = XEXP (x, 1);
10783
10784 if (speed_p)
10785 *cost += 2 * extra_cost->alu.arith;
10786
10787 if (GET_CODE (op1) == ZERO_EXTEND)
10788 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10789 0, speed_p);
10790 else
10791 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10792 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10793 0, speed_p);
10794 return true;
10795 }
10796 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10797 {
10798 if (speed_p)
10799 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10800 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10801 0, speed_p)
10802 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10803 return true;
10804 }
10805 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10806 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10807 {
10808 if (speed_p)
10809 *cost += (extra_cost->alu.arith
10810 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10811 ? extra_cost->alu.arith
10812 : extra_cost->alu.arith_shift));
10813 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10814 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10815 GET_CODE (XEXP (x, 1)), 0, speed_p));
10816 return true;
10817 }
10818
10819 if (speed_p)
10820 *cost += 2 * extra_cost->alu.arith;
10821 return false;
10822 }
10823
10824 /* Vector mode? */
10825
10826 *cost = LIBCALL_COST (2);
10827 return false;
10828
10829 case PLUS:
10830 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10831 && (mode == SFmode || !TARGET_VFP_SINGLE))
10832 {
10833 if (GET_CODE (XEXP (x, 0)) == MULT)
10834 {
10835 rtx mul_op0, mul_op1, add_op;
10836
10837 if (speed_p)
10838 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10839
10840 mul_op0 = XEXP (XEXP (x, 0), 0);
10841 mul_op1 = XEXP (XEXP (x, 0), 1);
10842 add_op = XEXP (x, 1);
10843
10844 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10845 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10846 + rtx_cost (add_op, mode, code, 0, speed_p));
10847
10848 return true;
10849 }
10850
10851 if (speed_p)
10852 *cost += extra_cost->fp[mode != SFmode].addsub;
10853 return false;
10854 }
10855 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10856 {
10857 *cost = LIBCALL_COST (2);
10858 return false;
10859 }
10860
10861 /* Narrow modes can be synthesized in SImode, but the range
10862 of useful sub-operations is limited. Check for shift operations
10863 on one of the operands. Only left shifts can be used in the
10864 narrow modes. */
10865 if (GET_MODE_CLASS (mode) == MODE_INT
10866 && GET_MODE_SIZE (mode) < 4)
10867 {
10868 rtx shift_op, shift_reg;
10869 shift_reg = NULL;
10870
10871 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10872
10873 if (CONST_INT_P (XEXP (x, 1)))
10874 {
10875 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10876 INTVAL (XEXP (x, 1)), NULL_RTX,
10877 NULL_RTX, 1, 0);
10878 *cost = COSTS_N_INSNS (insns);
10879 if (speed_p)
10880 *cost += insns * extra_cost->alu.arith;
10881 /* Slightly penalize a narrow operation as the result may
10882 need widening. */
10883 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10884 return true;
10885 }
10886
10887 /* Slightly penalize a narrow operation as the result may
10888 need widening. */
10889 *cost += 1;
10890 if (speed_p)
10891 *cost += extra_cost->alu.arith;
10892
10893 return false;
10894 }
10895
10896 if (mode == SImode)
10897 {
10898 rtx shift_op, shift_reg;
10899
10900 if (TARGET_INT_SIMD
10901 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10902 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10903 {
10904 /* UXTA[BH] or SXTA[BH]. */
10905 if (speed_p)
10906 *cost += extra_cost->alu.extend_arith;
10907 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10908 0, speed_p)
10909 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10910 return true;
10911 }
10912
10913 rtx op0 = XEXP (x, 0);
10914 rtx op1 = XEXP (x, 1);
10915
10916 /* Handle a side effect of adding in the carry to an addition. */
10917 if (GET_CODE (op0) == PLUS
10918 && arm_carry_operation (op1, mode))
10919 {
10920 op1 = XEXP (op0, 1);
10921 op0 = XEXP (op0, 0);
10922 }
10923 else if (GET_CODE (op1) == PLUS
10924 && arm_carry_operation (op0, mode))
10925 {
10926 op0 = XEXP (op1, 0);
10927 op1 = XEXP (op1, 1);
10928 }
10929 else if (GET_CODE (op0) == PLUS)
10930 {
10931 op0 = strip_carry_operation (op0);
10932 if (swap_commutative_operands_p (op0, op1))
10933 std::swap (op0, op1);
10934 }
10935
10936 if (arm_carry_operation (op0, mode))
10937 {
10938 /* Adding the carry to a register is a canonicalization of
10939 adding 0 to the register plus the carry. */
10940 if (speed_p)
10941 *cost += extra_cost->alu.arith;
10942 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10943 return true;
10944 }
10945
10946 shift_reg = NULL;
10947 shift_op = shifter_op_p (op0, &shift_reg);
10948 if (shift_op != NULL)
10949 {
10950 if (shift_reg)
10951 {
10952 if (speed_p)
10953 *cost += extra_cost->alu.arith_shift_reg;
10954 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10955 }
10956 else if (speed_p)
10957 *cost += extra_cost->alu.arith_shift;
10958
10959 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10960 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10961 return true;
10962 }
10963
10964 if (GET_CODE (op0) == MULT)
10965 {
10966 rtx mul_op = op0;
10967
10968 if (TARGET_DSP_MULTIPLY
10969 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10970 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10971 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10972 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10973 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10974 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10975 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10976 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10977 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10978 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10979 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10980 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10981 == 16))))))
10982 {
10983 /* SMLA[BT][BT]. */
10984 if (speed_p)
10985 *cost += extra_cost->mult[0].extend_add;
10986 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10987 SIGN_EXTEND, 0, speed_p)
10988 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10989 SIGN_EXTEND, 0, speed_p)
10990 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10991 return true;
10992 }
10993
10994 if (speed_p)
10995 *cost += extra_cost->mult[0].add;
10996 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10997 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10998 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10999 return true;
11000 }
11001
11002 if (CONST_INT_P (op1))
11003 {
11004 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
11005 INTVAL (op1), NULL_RTX,
11006 NULL_RTX, 1, 0);
11007 *cost = COSTS_N_INSNS (insns);
11008 if (speed_p)
11009 *cost += insns * extra_cost->alu.arith;
11010 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11011 return true;
11012 }
11013
11014 if (speed_p)
11015 *cost += extra_cost->alu.arith;
11016
11017 /* Don't recurse here because we want to test the operands
11018 without any carry operation. */
11019 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11020 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
11021 return true;
11022 }
11023
11024 if (mode == DImode)
11025 {
11026 if (GET_CODE (XEXP (x, 0)) == MULT
11027 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
11028 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
11029 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
11030 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
11031 {
11032 if (speed_p)
11033 *cost += extra_cost->mult[1].extend_add;
11034 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11035 ZERO_EXTEND, 0, speed_p)
11036 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
11037 ZERO_EXTEND, 0, speed_p)
11038 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11039 return true;
11040 }
11041
11042 *cost += COSTS_N_INSNS (1);
11043
11044 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11045 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
11046 {
11047 if (speed_p)
11048 *cost += (extra_cost->alu.arith
11049 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11050 ? extra_cost->alu.arith
11051 : extra_cost->alu.arith_shift));
11052
11053 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
11054 0, speed_p)
11055 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11056 return true;
11057 }
11058
11059 if (speed_p)
11060 *cost += 2 * extra_cost->alu.arith;
11061 return false;
11062 }
11063
11064 /* Vector mode? */
11065 *cost = LIBCALL_COST (2);
11066 return false;
11067 case IOR:
11068 {
11069 rtx sub0, sub1;
11070 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
11071 {
11072 if (speed_p)
11073 *cost += extra_cost->alu.rev;
11074
11075 return true;
11076 }
11077 else if (mode == SImode && arm_arch_thumb2
11078 && arm_bfi_p (x, &sub0, &sub1))
11079 {
11080 *cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
11081 *cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
11082 if (speed_p)
11083 *cost += extra_cost->alu.bfi;
11084
11085 return true;
11086 }
11087 }
11088
11089 /* Fall through. */
11090 case AND: case XOR:
11091 if (mode == SImode)
11092 {
11093 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
11094 rtx op0 = XEXP (x, 0);
11095 rtx shift_op, shift_reg;
11096
11097 if (subcode == NOT
11098 && (code == AND
11099 || (code == IOR && TARGET_THUMB2)))
11100 op0 = XEXP (op0, 0);
11101
11102 shift_reg = NULL;
11103 shift_op = shifter_op_p (op0, &shift_reg);
11104 if (shift_op != NULL)
11105 {
11106 if (shift_reg)
11107 {
11108 if (speed_p)
11109 *cost += extra_cost->alu.log_shift_reg;
11110 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11111 }
11112 else if (speed_p)
11113 *cost += extra_cost->alu.log_shift;
11114
11115 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11116 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11117 return true;
11118 }
11119
11120 if (CONST_INT_P (XEXP (x, 1)))
11121 {
11122 int insns = arm_gen_constant (code, SImode, NULL_RTX,
11123 INTVAL (XEXP (x, 1)), NULL_RTX,
11124 NULL_RTX, 1, 0);
11125
11126 *cost = COSTS_N_INSNS (insns);
11127 if (speed_p)
11128 *cost += insns * extra_cost->alu.logical;
11129 *cost += rtx_cost (op0, mode, code, 0, speed_p);
11130 return true;
11131 }
11132
11133 if (speed_p)
11134 *cost += extra_cost->alu.logical;
11135 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
11136 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11137 return true;
11138 }
11139
11140 if (mode == DImode)
11141 {
11142 rtx op0 = XEXP (x, 0);
11143 enum rtx_code subcode = GET_CODE (op0);
11144
11145 *cost += COSTS_N_INSNS (1);
11146
11147 if (subcode == NOT
11148 && (code == AND
11149 || (code == IOR && TARGET_THUMB2)))
11150 op0 = XEXP (op0, 0);
11151
11152 if (GET_CODE (op0) == ZERO_EXTEND)
11153 {
11154 if (speed_p)
11155 *cost += 2 * extra_cost->alu.logical;
11156
11157 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
11158 0, speed_p)
11159 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11160 return true;
11161 }
11162 else if (GET_CODE (op0) == SIGN_EXTEND)
11163 {
11164 if (speed_p)
11165 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
11166
11167 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
11168 0, speed_p)
11169 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11170 return true;
11171 }
11172
11173 if (speed_p)
11174 *cost += 2 * extra_cost->alu.logical;
11175
11176 return true;
11177 }
11178 /* Vector mode? */
11179
11180 *cost = LIBCALL_COST (2);
11181 return false;
11182
11183 case MULT:
11184 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11185 && (mode == SFmode || !TARGET_VFP_SINGLE))
11186 {
11187 rtx op0 = XEXP (x, 0);
11188
11189 if (GET_CODE (op0) == NEG && !flag_rounding_math)
11190 op0 = XEXP (op0, 0);
11191
11192 if (speed_p)
11193 *cost += extra_cost->fp[mode != SFmode].mult;
11194
11195 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
11196 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
11197 return true;
11198 }
11199 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11200 {
11201 *cost = LIBCALL_COST (2);
11202 return false;
11203 }
11204
11205 if (mode == SImode)
11206 {
11207 if (TARGET_DSP_MULTIPLY
11208 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11209 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11210 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11211 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11212 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
11213 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11214 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11215 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
11216 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11217 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11218 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11219 && (INTVAL (XEXP (XEXP (x, 1), 1))
11220 == 16))))))
11221 {
11222 /* SMUL[TB][TB]. */
11223 if (speed_p)
11224 *cost += extra_cost->mult[0].extend;
11225 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
11226 SIGN_EXTEND, 0, speed_p);
11227 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
11228 SIGN_EXTEND, 1, speed_p);
11229 return true;
11230 }
11231 if (speed_p)
11232 *cost += extra_cost->mult[0].simple;
11233 return false;
11234 }
11235
11236 if (mode == DImode)
11237 {
11238 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11239 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
11240 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11241 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
11242 {
11243 if (speed_p)
11244 *cost += extra_cost->mult[1].extend;
11245 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
11246 ZERO_EXTEND, 0, speed_p)
11247 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
11248 ZERO_EXTEND, 0, speed_p));
11249 return true;
11250 }
11251
11252 *cost = LIBCALL_COST (2);
11253 return false;
11254 }
11255
11256 /* Vector mode? */
11257 *cost = LIBCALL_COST (2);
11258 return false;
11259
11260 case NEG:
11261 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11262 && (mode == SFmode || !TARGET_VFP_SINGLE))
11263 {
11264 if (GET_CODE (XEXP (x, 0)) == MULT)
11265 {
11266 /* VNMUL. */
11267 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11268 return true;
11269 }
11270
11271 if (speed_p)
11272 *cost += extra_cost->fp[mode != SFmode].neg;
11273
11274 return false;
11275 }
11276 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11277 {
11278 *cost = LIBCALL_COST (1);
11279 return false;
11280 }
11281
11282 if (mode == SImode)
11283 {
11284 if (GET_CODE (XEXP (x, 0)) == ABS)
11285 {
11286 *cost += COSTS_N_INSNS (1);
11287 /* Assume the non-flag-changing variant. */
11288 if (speed_p)
11289 *cost += (extra_cost->alu.log_shift
11290 + extra_cost->alu.arith_shift);
11291 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11292 return true;
11293 }
11294
11295 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11296 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11297 {
11298 *cost += COSTS_N_INSNS (1);
11299 /* No extra cost for MOV imm and MVN imm. */
11300 /* If the comparison op is using the flags, there's no further
11301 cost, otherwise we need to add the cost of the comparison. */
11302 if (!(REG_P (XEXP (XEXP (x, 0), 0))
11303 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11304 && XEXP (XEXP (x, 0), 1) == const0_rtx))
11305 {
11306 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11307 *cost += (COSTS_N_INSNS (1)
11308 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11309 0, speed_p)
11310 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11311 1, speed_p));
11312 if (speed_p)
11313 *cost += extra_cost->alu.arith;
11314 }
11315 return true;
11316 }
11317
11318 if (speed_p)
11319 *cost += extra_cost->alu.arith;
11320 return false;
11321 }
11322
11323 if (GET_MODE_CLASS (mode) == MODE_INT
11324 && GET_MODE_SIZE (mode) < 4)
11325 {
11326 /* Slightly disparage, as we might need an extend operation. */
11327 *cost += 1;
11328 if (speed_p)
11329 *cost += extra_cost->alu.arith;
11330 return false;
11331 }
11332
11333 if (mode == DImode)
11334 {
11335 *cost += COSTS_N_INSNS (1);
11336 if (speed_p)
11337 *cost += 2 * extra_cost->alu.arith;
11338 return false;
11339 }
11340
11341 /* Vector mode? */
11342 *cost = LIBCALL_COST (1);
11343 return false;
11344
11345 case NOT:
11346 if (mode == SImode)
11347 {
11348 rtx shift_op;
11349 rtx shift_reg = NULL;
11350
11351 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11352
11353 if (shift_op)
11354 {
11355 if (shift_reg != NULL)
11356 {
11357 if (speed_p)
11358 *cost += extra_cost->alu.log_shift_reg;
11359 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11360 }
11361 else if (speed_p)
11362 *cost += extra_cost->alu.log_shift;
11363 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11364 return true;
11365 }
11366
11367 if (speed_p)
11368 *cost += extra_cost->alu.logical;
11369 return false;
11370 }
11371 if (mode == DImode)
11372 {
11373 *cost += COSTS_N_INSNS (1);
11374 return false;
11375 }
11376
11377 /* Vector mode? */
11378
11379 *cost += LIBCALL_COST (1);
11380 return false;
11381
11382 case IF_THEN_ELSE:
11383 {
11384 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11385 {
11386 *cost += COSTS_N_INSNS (3);
11387 return true;
11388 }
11389 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11390 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11391
11392 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11393 /* Assume that if one arm of the if_then_else is a register,
11394 that it will be tied with the result and eliminate the
11395 conditional insn. */
11396 if (REG_P (XEXP (x, 1)))
11397 *cost += op2cost;
11398 else if (REG_P (XEXP (x, 2)))
11399 *cost += op1cost;
11400 else
11401 {
11402 if (speed_p)
11403 {
11404 if (extra_cost->alu.non_exec_costs_exec)
11405 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11406 else
11407 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11408 }
11409 else
11410 *cost += op1cost + op2cost;
11411 }
11412 }
11413 return true;
11414
11415 case COMPARE:
11416 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11417 *cost = 0;
11418 else
11419 {
11420 machine_mode op0mode;
11421 /* We'll mostly assume that the cost of a compare is the cost of the
11422 LHS. However, there are some notable exceptions. */
11423
11424 /* Floating point compares are never done as side-effects. */
11425 op0mode = GET_MODE (XEXP (x, 0));
11426 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11427 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11428 {
11429 if (speed_p)
11430 *cost += extra_cost->fp[op0mode != SFmode].compare;
11431
11432 if (XEXP (x, 1) == CONST0_RTX (op0mode))
11433 {
11434 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11435 return true;
11436 }
11437
11438 return false;
11439 }
11440 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11441 {
11442 *cost = LIBCALL_COST (2);
11443 return false;
11444 }
11445
11446 /* DImode compares normally take two insns. */
11447 if (op0mode == DImode)
11448 {
11449 *cost += COSTS_N_INSNS (1);
11450 if (speed_p)
11451 *cost += 2 * extra_cost->alu.arith;
11452 return false;
11453 }
11454
11455 if (op0mode == SImode)
11456 {
11457 rtx shift_op;
11458 rtx shift_reg;
11459
11460 if (XEXP (x, 1) == const0_rtx
11461 && !(REG_P (XEXP (x, 0))
11462 || (GET_CODE (XEXP (x, 0)) == SUBREG
11463 && REG_P (SUBREG_REG (XEXP (x, 0))))))
11464 {
11465 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11466
11467 /* Multiply operations that set the flags are often
11468 significantly more expensive. */
11469 if (speed_p
11470 && GET_CODE (XEXP (x, 0)) == MULT
11471 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11472 *cost += extra_cost->mult[0].flag_setting;
11473
11474 if (speed_p
11475 && GET_CODE (XEXP (x, 0)) == PLUS
11476 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11477 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11478 0), 1), mode))
11479 *cost += extra_cost->mult[0].flag_setting;
11480 return true;
11481 }
11482
11483 shift_reg = NULL;
11484 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11485 if (shift_op != NULL)
11486 {
11487 if (shift_reg != NULL)
11488 {
11489 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11490 1, speed_p);
11491 if (speed_p)
11492 *cost += extra_cost->alu.arith_shift_reg;
11493 }
11494 else if (speed_p)
11495 *cost += extra_cost->alu.arith_shift;
11496 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11497 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11498 return true;
11499 }
11500
11501 if (speed_p)
11502 *cost += extra_cost->alu.arith;
11503 if (CONST_INT_P (XEXP (x, 1))
11504 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11505 {
11506 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11507 return true;
11508 }
11509 return false;
11510 }
11511
11512 /* Vector mode? */
11513
11514 *cost = LIBCALL_COST (2);
11515 return false;
11516 }
11517 return true;
11518
11519 case EQ:
11520 case GE:
11521 case GT:
11522 case LE:
11523 case LT:
11524 /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11525 vcle and vclt). */
11526 if (TARGET_NEON
11527 && TARGET_HARD_FLOAT
11528 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11529 && (XEXP (x, 1) == CONST0_RTX (mode)))
11530 {
11531 *cost = 0;
11532 return true;
11533 }
11534
11535 /* Fall through. */
11536 case NE:
11537 case LTU:
11538 case LEU:
11539 case GEU:
11540 case GTU:
11541 case ORDERED:
11542 case UNORDERED:
11543 case UNEQ:
11544 case UNLE:
11545 case UNLT:
11546 case UNGE:
11547 case UNGT:
11548 case LTGT:
11549 if (outer_code == SET)
11550 {
11551 /* Is it a store-flag operation? */
11552 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11553 && XEXP (x, 1) == const0_rtx)
11554 {
11555 /* Thumb also needs an IT insn. */
11556 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11557 return true;
11558 }
11559 if (XEXP (x, 1) == const0_rtx)
11560 {
11561 switch (code)
11562 {
11563 case LT:
11564 /* LSR Rd, Rn, #31. */
11565 if (speed_p)
11566 *cost += extra_cost->alu.shift;
11567 break;
11568
11569 case EQ:
11570 /* RSBS T1, Rn, #0
11571 ADC Rd, Rn, T1. */
11572
11573 case NE:
11574 /* SUBS T1, Rn, #1
11575 SBC Rd, Rn, T1. */
11576 *cost += COSTS_N_INSNS (1);
11577 break;
11578
11579 case LE:
11580 /* RSBS T1, Rn, Rn, LSR #31
11581 ADC Rd, Rn, T1. */
11582 *cost += COSTS_N_INSNS (1);
11583 if (speed_p)
11584 *cost += extra_cost->alu.arith_shift;
11585 break;
11586
11587 case GT:
11588 /* RSB Rd, Rn, Rn, ASR #1
11589 LSR Rd, Rd, #31. */
11590 *cost += COSTS_N_INSNS (1);
11591 if (speed_p)
11592 *cost += (extra_cost->alu.arith_shift
11593 + extra_cost->alu.shift);
11594 break;
11595
11596 case GE:
11597 /* ASR Rd, Rn, #31
11598 ADD Rd, Rn, #1. */
11599 *cost += COSTS_N_INSNS (1);
11600 if (speed_p)
11601 *cost += extra_cost->alu.shift;
11602 break;
11603
11604 default:
11605 /* Remaining cases are either meaningless or would take
11606 three insns anyway. */
11607 *cost = COSTS_N_INSNS (3);
11608 break;
11609 }
11610 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11611 return true;
11612 }
11613 else
11614 {
11615 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11616 if (CONST_INT_P (XEXP (x, 1))
11617 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11618 {
11619 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11620 return true;
11621 }
11622
11623 return false;
11624 }
11625 }
11626 /* Not directly inside a set. If it involves the condition code
11627 register it must be the condition for a branch, cond_exec or
11628 I_T_E operation. Since the comparison is performed elsewhere
11629 this is just the control part which has no additional
11630 cost. */
11631 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11632 && XEXP (x, 1) == const0_rtx)
11633 {
11634 *cost = 0;
11635 return true;
11636 }
11637 return false;
11638
11639 case ABS:
11640 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11641 && (mode == SFmode || !TARGET_VFP_SINGLE))
11642 {
11643 if (speed_p)
11644 *cost += extra_cost->fp[mode != SFmode].neg;
11645
11646 return false;
11647 }
11648 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11649 {
11650 *cost = LIBCALL_COST (1);
11651 return false;
11652 }
11653
11654 if (mode == SImode)
11655 {
11656 if (speed_p)
11657 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11658 return false;
11659 }
11660 /* Vector mode? */
11661 *cost = LIBCALL_COST (1);
11662 return false;
11663
11664 case SIGN_EXTEND:
11665 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11666 && MEM_P (XEXP (x, 0)))
11667 {
11668 if (mode == DImode)
11669 *cost += COSTS_N_INSNS (1);
11670
11671 if (!speed_p)
11672 return true;
11673
11674 if (GET_MODE (XEXP (x, 0)) == SImode)
11675 *cost += extra_cost->ldst.load;
11676 else
11677 *cost += extra_cost->ldst.load_sign_extend;
11678
11679 if (mode == DImode)
11680 *cost += extra_cost->alu.shift;
11681
11682 return true;
11683 }
11684
11685 /* Widening from less than 32-bits requires an extend operation. */
11686 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11687 {
11688 /* We have SXTB/SXTH. */
11689 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11690 if (speed_p)
11691 *cost += extra_cost->alu.extend;
11692 }
11693 else if (GET_MODE (XEXP (x, 0)) != SImode)
11694 {
11695 /* Needs two shifts. */
11696 *cost += COSTS_N_INSNS (1);
11697 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11698 if (speed_p)
11699 *cost += 2 * extra_cost->alu.shift;
11700 }
11701
11702 /* Widening beyond 32-bits requires one more insn. */
11703 if (mode == DImode)
11704 {
11705 *cost += COSTS_N_INSNS (1);
11706 if (speed_p)
11707 *cost += extra_cost->alu.shift;
11708 }
11709
11710 return true;
11711
11712 case ZERO_EXTEND:
11713 if ((arm_arch4
11714 || GET_MODE (XEXP (x, 0)) == SImode
11715 || GET_MODE (XEXP (x, 0)) == QImode)
11716 && MEM_P (XEXP (x, 0)))
11717 {
11718 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11719
11720 if (mode == DImode)
11721 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11722
11723 return true;
11724 }
11725
11726 /* Widening from less than 32-bits requires an extend operation. */
11727 if (GET_MODE (XEXP (x, 0)) == QImode)
11728 {
11729 /* UXTB can be a shorter instruction in Thumb2, but it might
11730 be slower than the AND Rd, Rn, #255 alternative. When
11731 optimizing for speed it should never be slower to use
11732 AND, and we don't really model 16-bit vs 32-bit insns
11733 here. */
11734 if (speed_p)
11735 *cost += extra_cost->alu.logical;
11736 }
11737 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11738 {
11739 /* We have UXTB/UXTH. */
11740 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11741 if (speed_p)
11742 *cost += extra_cost->alu.extend;
11743 }
11744 else if (GET_MODE (XEXP (x, 0)) != SImode)
11745 {
11746 /* Needs two shifts. It's marginally preferable to use
11747 shifts rather than two BIC instructions as the second
11748 shift may merge with a subsequent insn as a shifter
11749 op. */
11750 *cost = COSTS_N_INSNS (2);
11751 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11752 if (speed_p)
11753 *cost += 2 * extra_cost->alu.shift;
11754 }
11755
11756 /* Widening beyond 32-bits requires one more insn. */
11757 if (mode == DImode)
11758 {
11759 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11760 }
11761
11762 return true;
11763
11764 case CONST_INT:
11765 *cost = 0;
11766 /* CONST_INT has no mode, so we cannot tell for sure how many
11767 insns are really going to be needed. The best we can do is
11768 look at the value passed. If it fits in SImode, then assume
11769 that's the mode it will be used for. Otherwise assume it
11770 will be used in DImode. */
11771 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11772 mode = SImode;
11773 else
11774 mode = DImode;
11775
11776 /* Avoid blowing up in arm_gen_constant (). */
11777 if (!(outer_code == PLUS
11778 || outer_code == AND
11779 || outer_code == IOR
11780 || outer_code == XOR
11781 || outer_code == MINUS))
11782 outer_code = SET;
11783
11784 const_int_cost:
11785 if (mode == SImode)
11786 {
11787 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11788 INTVAL (x), NULL, NULL,
11789 0, 0));
11790 /* Extra costs? */
11791 }
11792 else
11793 {
11794 *cost += COSTS_N_INSNS (arm_gen_constant
11795 (outer_code, SImode, NULL,
11796 trunc_int_for_mode (INTVAL (x), SImode),
11797 NULL, NULL, 0, 0)
11798 + arm_gen_constant (outer_code, SImode, NULL,
11799 INTVAL (x) >> 32, NULL,
11800 NULL, 0, 0));
11801 /* Extra costs? */
11802 }
11803
11804 return true;
11805
11806 case CONST:
11807 case LABEL_REF:
11808 case SYMBOL_REF:
11809 if (speed_p)
11810 {
11811 if (arm_arch_thumb2 && !flag_pic)
11812 *cost += COSTS_N_INSNS (1);
11813 else
11814 *cost += extra_cost->ldst.load;
11815 }
11816 else
11817 *cost += COSTS_N_INSNS (1);
11818
11819 if (flag_pic)
11820 {
11821 *cost += COSTS_N_INSNS (1);
11822 if (speed_p)
11823 *cost += extra_cost->alu.arith;
11824 }
11825
11826 return true;
11827
11828 case CONST_FIXED:
11829 *cost = COSTS_N_INSNS (4);
11830 /* Fixme. */
11831 return true;
11832
11833 case CONST_DOUBLE:
11834 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11835 && (mode == SFmode || !TARGET_VFP_SINGLE))
11836 {
11837 if (vfp3_const_double_rtx (x))
11838 {
11839 if (speed_p)
11840 *cost += extra_cost->fp[mode == DFmode].fpconst;
11841 return true;
11842 }
11843
11844 if (speed_p)
11845 {
11846 if (mode == DFmode)
11847 *cost += extra_cost->ldst.loadd;
11848 else
11849 *cost += extra_cost->ldst.loadf;
11850 }
11851 else
11852 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11853
11854 return true;
11855 }
11856 *cost = COSTS_N_INSNS (4);
11857 return true;
11858
11859 case CONST_VECTOR:
11860 /* Fixme. */
11861 if (((TARGET_NEON && TARGET_HARD_FLOAT
11862 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11863 || TARGET_HAVE_MVE)
11864 && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11865 *cost = COSTS_N_INSNS (1);
11866 else
11867 *cost = COSTS_N_INSNS (4);
11868 return true;
11869
11870 case HIGH:
11871 case LO_SUM:
11872 /* When optimizing for size, we prefer constant pool entries to
11873 MOVW/MOVT pairs, so bump the cost of these slightly. */
11874 if (!speed_p)
11875 *cost += 1;
11876 return true;
11877
11878 case CLZ:
11879 if (speed_p)
11880 *cost += extra_cost->alu.clz;
11881 return false;
11882
11883 case SMIN:
11884 if (XEXP (x, 1) == const0_rtx)
11885 {
11886 if (speed_p)
11887 *cost += extra_cost->alu.log_shift;
11888 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11889 return true;
11890 }
11891 /* Fall through. */
11892 case SMAX:
11893 case UMIN:
11894 case UMAX:
11895 *cost += COSTS_N_INSNS (1);
11896 return false;
11897
11898 case TRUNCATE:
11899 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11900 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11901 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11902 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11903 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11904 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11905 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11906 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11907 == ZERO_EXTEND))))
11908 {
11909 if (speed_p)
11910 *cost += extra_cost->mult[1].extend;
11911 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11912 ZERO_EXTEND, 0, speed_p)
11913 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11914 ZERO_EXTEND, 0, speed_p));
11915 return true;
11916 }
11917 *cost = LIBCALL_COST (1);
11918 return false;
11919
11920 case UNSPEC_VOLATILE:
11921 case UNSPEC:
11922 return arm_unspec_cost (x, outer_code, speed_p, cost);
11923
11924 case PC:
11925 /* Reading the PC is like reading any other register. Writing it
11926 is more expensive, but we take that into account elsewhere. */
11927 *cost = 0;
11928 return true;
11929
11930 case ZERO_EXTRACT:
11931 /* TODO: Simple zero_extract of bottom bits using AND. */
11932 /* Fall through. */
11933 case SIGN_EXTRACT:
11934 if (arm_arch6
11935 && mode == SImode
11936 && CONST_INT_P (XEXP (x, 1))
11937 && CONST_INT_P (XEXP (x, 2)))
11938 {
11939 if (speed_p)
11940 *cost += extra_cost->alu.bfx;
11941 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11942 return true;
11943 }
11944 /* Without UBFX/SBFX, need to resort to shift operations. */
11945 *cost += COSTS_N_INSNS (1);
11946 if (speed_p)
11947 *cost += 2 * extra_cost->alu.shift;
11948 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11949 return true;
11950
11951 case FLOAT_EXTEND:
11952 if (TARGET_HARD_FLOAT)
11953 {
11954 if (speed_p)
11955 *cost += extra_cost->fp[mode == DFmode].widen;
11956 if (!TARGET_VFP5
11957 && GET_MODE (XEXP (x, 0)) == HFmode)
11958 {
11959 /* Pre v8, widening HF->DF is a two-step process, first
11960 widening to SFmode. */
11961 *cost += COSTS_N_INSNS (1);
11962 if (speed_p)
11963 *cost += extra_cost->fp[0].widen;
11964 }
11965 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11966 return true;
11967 }
11968
11969 *cost = LIBCALL_COST (1);
11970 return false;
11971
11972 case FLOAT_TRUNCATE:
11973 if (TARGET_HARD_FLOAT)
11974 {
11975 if (speed_p)
11976 *cost += extra_cost->fp[mode == DFmode].narrow;
11977 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11978 return true;
11979 /* Vector modes? */
11980 }
11981 *cost = LIBCALL_COST (1);
11982 return false;
11983
11984 case FMA:
11985 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11986 {
11987 rtx op0 = XEXP (x, 0);
11988 rtx op1 = XEXP (x, 1);
11989 rtx op2 = XEXP (x, 2);
11990
11991
11992 /* vfms or vfnma. */
11993 if (GET_CODE (op0) == NEG)
11994 op0 = XEXP (op0, 0);
11995
11996 /* vfnms or vfnma. */
11997 if (GET_CODE (op2) == NEG)
11998 op2 = XEXP (op2, 0);
11999
12000 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
12001 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
12002 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
12003
12004 if (speed_p)
12005 *cost += extra_cost->fp[mode ==DFmode].fma;
12006
12007 return true;
12008 }
12009
12010 *cost = LIBCALL_COST (3);
12011 return false;
12012
12013 case FIX:
12014 case UNSIGNED_FIX:
12015 if (TARGET_HARD_FLOAT)
12016 {
12017 /* The *combine_vcvtf2i reduces a vmul+vcvt into
12018 a vcvt fixed-point conversion. */
12019 if (code == FIX && mode == SImode
12020 && GET_CODE (XEXP (x, 0)) == FIX
12021 && GET_MODE (XEXP (x, 0)) == SFmode
12022 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12023 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
12024 > 0)
12025 {
12026 if (speed_p)
12027 *cost += extra_cost->fp[0].toint;
12028
12029 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
12030 code, 0, speed_p);
12031 return true;
12032 }
12033
12034 if (GET_MODE_CLASS (mode) == MODE_INT)
12035 {
12036 mode = GET_MODE (XEXP (x, 0));
12037 if (speed_p)
12038 *cost += extra_cost->fp[mode == DFmode].toint;
12039 /* Strip of the 'cost' of rounding towards zero. */
12040 if (GET_CODE (XEXP (x, 0)) == FIX)
12041 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
12042 0, speed_p);
12043 else
12044 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
12045 /* ??? Increase the cost to deal with transferring from
12046 FP -> CORE registers? */
12047 return true;
12048 }
12049 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
12050 && TARGET_VFP5)
12051 {
12052 if (speed_p)
12053 *cost += extra_cost->fp[mode == DFmode].roundint;
12054 return false;
12055 }
12056 /* Vector costs? */
12057 }
12058 *cost = LIBCALL_COST (1);
12059 return false;
12060
12061 case FLOAT:
12062 case UNSIGNED_FLOAT:
12063 if (TARGET_HARD_FLOAT)
12064 {
12065 /* ??? Increase the cost to deal with transferring from CORE
12066 -> FP registers? */
12067 if (speed_p)
12068 *cost += extra_cost->fp[mode == DFmode].fromint;
12069 return false;
12070 }
12071 *cost = LIBCALL_COST (1);
12072 return false;
12073
12074 case CALL:
12075 return true;
12076
12077 case ASM_OPERANDS:
12078 {
12079 /* Just a guess. Guess number of instructions in the asm
12080 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
12081 though (see PR60663). */
12082 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
12083 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
12084
12085 *cost = COSTS_N_INSNS (asm_length + num_operands);
12086 return true;
12087 }
12088 default:
12089 if (mode != VOIDmode)
12090 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
12091 else
12092 *cost = COSTS_N_INSNS (4); /* Who knows? */
12093 return false;
12094 }
12095 }
12096
12097 #undef HANDLE_NARROW_SHIFT_ARITH
12098
12099 /* RTX costs entry point. */
12100
12101 static bool
12102 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
12103 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
12104 {
12105 bool result;
12106 int code = GET_CODE (x);
12107 gcc_assert (current_tune->insn_extra_cost);
12108
12109 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
12110 (enum rtx_code) outer_code,
12111 current_tune->insn_extra_cost,
12112 total, speed);
12113
12114 if (dump_file && arm_verbose_cost)
12115 {
12116 print_rtl_single (dump_file, x);
12117 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
12118 *total, result ? "final" : "partial");
12119 }
12120 return result;
12121 }
12122
12123 static int
12124 arm_insn_cost (rtx_insn *insn, bool speed)
12125 {
12126 int cost;
12127
12128 /* Don't cost a simple reg-reg move at a full insn cost: such moves
12129 will likely disappear during register allocation. */
12130 if (!reload_completed
12131 && GET_CODE (PATTERN (insn)) == SET
12132 && REG_P (SET_DEST (PATTERN (insn)))
12133 && REG_P (SET_SRC (PATTERN (insn))))
12134 return 2;
12135 cost = pattern_cost (PATTERN (insn), speed);
12136 /* If the cost is zero, then it's likely a complex insn. We don't want the
12137 cost of these to be less than something we know about. */
12138 return cost ? cost : COSTS_N_INSNS (2);
12139 }
12140
12141 /* All address computations that can be done are free, but rtx cost returns
12142 the same for practically all of them. So we weight the different types
12143 of address here in the order (most pref first):
12144 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
12145 static inline int
12146 arm_arm_address_cost (rtx x)
12147 {
12148 enum rtx_code c = GET_CODE (x);
12149
12150 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
12151 return 0;
12152 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
12153 return 10;
12154
12155 if (c == PLUS)
12156 {
12157 if (CONST_INT_P (XEXP (x, 1)))
12158 return 2;
12159
12160 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
12161 return 3;
12162
12163 return 4;
12164 }
12165
12166 return 6;
12167 }
12168
12169 static inline int
12170 arm_thumb_address_cost (rtx x)
12171 {
12172 enum rtx_code c = GET_CODE (x);
12173
12174 if (c == REG)
12175 return 1;
12176 if (c == PLUS
12177 && REG_P (XEXP (x, 0))
12178 && CONST_INT_P (XEXP (x, 1)))
12179 return 1;
12180
12181 return 2;
12182 }
12183
12184 static int
12185 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
12186 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
12187 {
12188 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
12189 }
12190
12191 /* Adjust cost hook for XScale. */
12192 static bool
12193 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12194 int * cost)
12195 {
12196 /* Some true dependencies can have a higher cost depending
12197 on precisely how certain input operands are used. */
12198 if (dep_type == 0
12199 && recog_memoized (insn) >= 0
12200 && recog_memoized (dep) >= 0)
12201 {
12202 int shift_opnum = get_attr_shift (insn);
12203 enum attr_type attr_type = get_attr_type (dep);
12204
12205 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12206 operand for INSN. If we have a shifted input operand and the
12207 instruction we depend on is another ALU instruction, then we may
12208 have to account for an additional stall. */
12209 if (shift_opnum != 0
12210 && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
12211 || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
12212 || attr_type == TYPE_ALUS_SHIFT_IMM
12213 || attr_type == TYPE_LOGIC_SHIFT_IMM
12214 || attr_type == TYPE_LOGICS_SHIFT_IMM
12215 || attr_type == TYPE_ALU_SHIFT_REG
12216 || attr_type == TYPE_ALUS_SHIFT_REG
12217 || attr_type == TYPE_LOGIC_SHIFT_REG
12218 || attr_type == TYPE_LOGICS_SHIFT_REG
12219 || attr_type == TYPE_MOV_SHIFT
12220 || attr_type == TYPE_MVN_SHIFT
12221 || attr_type == TYPE_MOV_SHIFT_REG
12222 || attr_type == TYPE_MVN_SHIFT_REG))
12223 {
12224 rtx shifted_operand;
12225 int opno;
12226
12227 /* Get the shifted operand. */
12228 extract_insn (insn);
12229 shifted_operand = recog_data.operand[shift_opnum];
12230
12231 /* Iterate over all the operands in DEP. If we write an operand
12232 that overlaps with SHIFTED_OPERAND, then we have increase the
12233 cost of this dependency. */
12234 extract_insn (dep);
12235 preprocess_constraints (dep);
12236 for (opno = 0; opno < recog_data.n_operands; opno++)
12237 {
12238 /* We can ignore strict inputs. */
12239 if (recog_data.operand_type[opno] == OP_IN)
12240 continue;
12241
12242 if (reg_overlap_mentioned_p (recog_data.operand[opno],
12243 shifted_operand))
12244 {
12245 *cost = 2;
12246 return false;
12247 }
12248 }
12249 }
12250 }
12251 return true;
12252 }
12253
12254 /* Adjust cost hook for Cortex A9. */
12255 static bool
12256 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12257 int * cost)
12258 {
12259 switch (dep_type)
12260 {
12261 case REG_DEP_ANTI:
12262 *cost = 0;
12263 return false;
12264
12265 case REG_DEP_TRUE:
12266 case REG_DEP_OUTPUT:
12267 if (recog_memoized (insn) >= 0
12268 && recog_memoized (dep) >= 0)
12269 {
12270 if (GET_CODE (PATTERN (insn)) == SET)
12271 {
12272 if (GET_MODE_CLASS
12273 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
12274 || GET_MODE_CLASS
12275 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
12276 {
12277 enum attr_type attr_type_insn = get_attr_type (insn);
12278 enum attr_type attr_type_dep = get_attr_type (dep);
12279
12280 /* By default all dependencies of the form
12281 s0 = s0 <op> s1
12282 s0 = s0 <op> s2
12283 have an extra latency of 1 cycle because
12284 of the input and output dependency in this
12285 case. However this gets modeled as an true
12286 dependency and hence all these checks. */
12287 if (REG_P (SET_DEST (PATTERN (insn)))
12288 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
12289 {
12290 /* FMACS is a special case where the dependent
12291 instruction can be issued 3 cycles before
12292 the normal latency in case of an output
12293 dependency. */
12294 if ((attr_type_insn == TYPE_FMACS
12295 || attr_type_insn == TYPE_FMACD)
12296 && (attr_type_dep == TYPE_FMACS
12297 || attr_type_dep == TYPE_FMACD))
12298 {
12299 if (dep_type == REG_DEP_OUTPUT)
12300 *cost = insn_default_latency (dep) - 3;
12301 else
12302 *cost = insn_default_latency (dep);
12303 return false;
12304 }
12305 else
12306 {
12307 if (dep_type == REG_DEP_OUTPUT)
12308 *cost = insn_default_latency (dep) + 1;
12309 else
12310 *cost = insn_default_latency (dep);
12311 }
12312 return false;
12313 }
12314 }
12315 }
12316 }
12317 break;
12318
12319 default:
12320 gcc_unreachable ();
12321 }
12322
12323 return true;
12324 }
12325
12326 /* Adjust cost hook for FA726TE. */
12327 static bool
12328 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12329 int * cost)
12330 {
12331 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12332 have penalty of 3. */
12333 if (dep_type == REG_DEP_TRUE
12334 && recog_memoized (insn) >= 0
12335 && recog_memoized (dep) >= 0
12336 && get_attr_conds (dep) == CONDS_SET)
12337 {
12338 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12339 if (get_attr_conds (insn) == CONDS_USE
12340 && get_attr_type (insn) != TYPE_BRANCH)
12341 {
12342 *cost = 3;
12343 return false;
12344 }
12345
12346 if (GET_CODE (PATTERN (insn)) == COND_EXEC
12347 || get_attr_conds (insn) == CONDS_USE)
12348 {
12349 *cost = 0;
12350 return false;
12351 }
12352 }
12353
12354 return true;
12355 }
12356
12357 /* Implement TARGET_REGISTER_MOVE_COST.
12358
12359 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12360 it is typically more expensive than a single memory access. We set
12361 the cost to less than two memory accesses so that floating
12362 point to integer conversion does not go through memory. */
12363
12364 int
12365 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12366 reg_class_t from, reg_class_t to)
12367 {
12368 if (TARGET_32BIT)
12369 {
12370 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12371 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12372 return 15;
12373 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12374 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12375 return 4;
12376 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12377 return 20;
12378 else
12379 return 2;
12380 }
12381 else
12382 {
12383 if (from == HI_REGS || to == HI_REGS)
12384 return 4;
12385 else
12386 return 2;
12387 }
12388 }
12389
12390 /* Implement TARGET_MEMORY_MOVE_COST. */
12391
12392 int
12393 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12394 bool in ATTRIBUTE_UNUSED)
12395 {
12396 if (TARGET_32BIT)
12397 return 10;
12398 else
12399 {
12400 if (GET_MODE_SIZE (mode) < 4)
12401 return 8;
12402 else
12403 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12404 }
12405 }
12406
12407 /* Vectorizer cost model implementation. */
12408
12409 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12410 static int
12411 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12412 tree vectype,
12413 int misalign ATTRIBUTE_UNUSED)
12414 {
12415 unsigned elements;
12416
12417 switch (type_of_cost)
12418 {
12419 case scalar_stmt:
12420 return current_tune->vec_costs->scalar_stmt_cost;
12421
12422 case scalar_load:
12423 return current_tune->vec_costs->scalar_load_cost;
12424
12425 case scalar_store:
12426 return current_tune->vec_costs->scalar_store_cost;
12427
12428 case vector_stmt:
12429 return current_tune->vec_costs->vec_stmt_cost;
12430
12431 case vector_load:
12432 return current_tune->vec_costs->vec_align_load_cost;
12433
12434 case vector_store:
12435 return current_tune->vec_costs->vec_store_cost;
12436
12437 case vec_to_scalar:
12438 return current_tune->vec_costs->vec_to_scalar_cost;
12439
12440 case scalar_to_vec:
12441 return current_tune->vec_costs->scalar_to_vec_cost;
12442
12443 case unaligned_load:
12444 case vector_gather_load:
12445 return current_tune->vec_costs->vec_unalign_load_cost;
12446
12447 case unaligned_store:
12448 case vector_scatter_store:
12449 return current_tune->vec_costs->vec_unalign_store_cost;
12450
12451 case cond_branch_taken:
12452 return current_tune->vec_costs->cond_taken_branch_cost;
12453
12454 case cond_branch_not_taken:
12455 return current_tune->vec_costs->cond_not_taken_branch_cost;
12456
12457 case vec_perm:
12458 case vec_promote_demote:
12459 return current_tune->vec_costs->vec_stmt_cost;
12460
12461 case vec_construct:
12462 elements = TYPE_VECTOR_SUBPARTS (vectype);
12463 return elements / 2 + 1;
12464
12465 default:
12466 gcc_unreachable ();
12467 }
12468 }
12469
12470 /* Return true if and only if this insn can dual-issue only as older. */
12471 static bool
12472 cortexa7_older_only (rtx_insn *insn)
12473 {
12474 if (recog_memoized (insn) < 0)
12475 return false;
12476
12477 switch (get_attr_type (insn))
12478 {
12479 case TYPE_ALU_DSP_REG:
12480 case TYPE_ALU_SREG:
12481 case TYPE_ALUS_SREG:
12482 case TYPE_LOGIC_REG:
12483 case TYPE_LOGICS_REG:
12484 case TYPE_ADC_REG:
12485 case TYPE_ADCS_REG:
12486 case TYPE_ADR:
12487 case TYPE_BFM:
12488 case TYPE_REV:
12489 case TYPE_MVN_REG:
12490 case TYPE_SHIFT_IMM:
12491 case TYPE_SHIFT_REG:
12492 case TYPE_LOAD_BYTE:
12493 case TYPE_LOAD_4:
12494 case TYPE_STORE_4:
12495 case TYPE_FFARITHS:
12496 case TYPE_FADDS:
12497 case TYPE_FFARITHD:
12498 case TYPE_FADDD:
12499 case TYPE_FMOV:
12500 case TYPE_F_CVT:
12501 case TYPE_FCMPS:
12502 case TYPE_FCMPD:
12503 case TYPE_FCONSTS:
12504 case TYPE_FCONSTD:
12505 case TYPE_FMULS:
12506 case TYPE_FMACS:
12507 case TYPE_FMULD:
12508 case TYPE_FMACD:
12509 case TYPE_FDIVS:
12510 case TYPE_FDIVD:
12511 case TYPE_F_MRC:
12512 case TYPE_F_MRRC:
12513 case TYPE_F_FLAG:
12514 case TYPE_F_LOADS:
12515 case TYPE_F_STORES:
12516 return true;
12517 default:
12518 return false;
12519 }
12520 }
12521
12522 /* Return true if and only if this insn can dual-issue as younger. */
12523 static bool
12524 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12525 {
12526 if (recog_memoized (insn) < 0)
12527 {
12528 if (verbose > 5)
12529 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12530 return false;
12531 }
12532
12533 switch (get_attr_type (insn))
12534 {
12535 case TYPE_ALU_IMM:
12536 case TYPE_ALUS_IMM:
12537 case TYPE_LOGIC_IMM:
12538 case TYPE_LOGICS_IMM:
12539 case TYPE_EXTEND:
12540 case TYPE_MVN_IMM:
12541 case TYPE_MOV_IMM:
12542 case TYPE_MOV_REG:
12543 case TYPE_MOV_SHIFT:
12544 case TYPE_MOV_SHIFT_REG:
12545 case TYPE_BRANCH:
12546 case TYPE_CALL:
12547 return true;
12548 default:
12549 return false;
12550 }
12551 }
12552
12553
12554 /* Look for an instruction that can dual issue only as an older
12555 instruction, and move it in front of any instructions that can
12556 dual-issue as younger, while preserving the relative order of all
12557 other instructions in the ready list. This is a hueuristic to help
12558 dual-issue in later cycles, by postponing issue of more flexible
12559 instructions. This heuristic may affect dual issue opportunities
12560 in the current cycle. */
12561 static void
12562 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12563 int *n_readyp, int clock)
12564 {
12565 int i;
12566 int first_older_only = -1, first_younger = -1;
12567
12568 if (verbose > 5)
12569 fprintf (file,
12570 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12571 clock,
12572 *n_readyp);
12573
12574 /* Traverse the ready list from the head (the instruction to issue
12575 first), and looking for the first instruction that can issue as
12576 younger and the first instruction that can dual-issue only as
12577 older. */
12578 for (i = *n_readyp - 1; i >= 0; i--)
12579 {
12580 rtx_insn *insn = ready[i];
12581 if (cortexa7_older_only (insn))
12582 {
12583 first_older_only = i;
12584 if (verbose > 5)
12585 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12586 break;
12587 }
12588 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12589 first_younger = i;
12590 }
12591
12592 /* Nothing to reorder because either no younger insn found or insn
12593 that can dual-issue only as older appears before any insn that
12594 can dual-issue as younger. */
12595 if (first_younger == -1)
12596 {
12597 if (verbose > 5)
12598 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12599 return;
12600 }
12601
12602 /* Nothing to reorder because no older-only insn in the ready list. */
12603 if (first_older_only == -1)
12604 {
12605 if (verbose > 5)
12606 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12607 return;
12608 }
12609
12610 /* Move first_older_only insn before first_younger. */
12611 if (verbose > 5)
12612 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12613 INSN_UID(ready [first_older_only]),
12614 INSN_UID(ready [first_younger]));
12615 rtx_insn *first_older_only_insn = ready [first_older_only];
12616 for (i = first_older_only; i < first_younger; i++)
12617 {
12618 ready[i] = ready[i+1];
12619 }
12620
12621 ready[i] = first_older_only_insn;
12622 return;
12623 }
12624
12625 /* Implement TARGET_SCHED_REORDER. */
12626 static int
12627 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12628 int clock)
12629 {
12630 switch (arm_tune)
12631 {
12632 case TARGET_CPU_cortexa7:
12633 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12634 break;
12635 default:
12636 /* Do nothing for other cores. */
12637 break;
12638 }
12639
12640 return arm_issue_rate ();
12641 }
12642
12643 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12644 It corrects the value of COST based on the relationship between
12645 INSN and DEP through the dependence LINK. It returns the new
12646 value. There is a per-core adjust_cost hook to adjust scheduler costs
12647 and the per-core hook can choose to completely override the generic
12648 adjust_cost function. Only put bits of code into arm_adjust_cost that
12649 are common across all cores. */
12650 static int
12651 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12652 unsigned int)
12653 {
12654 rtx i_pat, d_pat;
12655
12656 /* When generating Thumb-1 code, we want to place flag-setting operations
12657 close to a conditional branch which depends on them, so that we can
12658 omit the comparison. */
12659 if (TARGET_THUMB1
12660 && dep_type == 0
12661 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12662 && recog_memoized (dep) >= 0
12663 && get_attr_conds (dep) == CONDS_SET)
12664 return 0;
12665
12666 if (current_tune->sched_adjust_cost != NULL)
12667 {
12668 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12669 return cost;
12670 }
12671
12672 /* XXX Is this strictly true? */
12673 if (dep_type == REG_DEP_ANTI
12674 || dep_type == REG_DEP_OUTPUT)
12675 return 0;
12676
12677 /* Call insns don't incur a stall, even if they follow a load. */
12678 if (dep_type == 0
12679 && CALL_P (insn))
12680 return 1;
12681
12682 if ((i_pat = single_set (insn)) != NULL
12683 && MEM_P (SET_SRC (i_pat))
12684 && (d_pat = single_set (dep)) != NULL
12685 && MEM_P (SET_DEST (d_pat)))
12686 {
12687 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12688 /* This is a load after a store, there is no conflict if the load reads
12689 from a cached area. Assume that loads from the stack, and from the
12690 constant pool are cached, and that others will miss. This is a
12691 hack. */
12692
12693 if ((SYMBOL_REF_P (src_mem)
12694 && CONSTANT_POOL_ADDRESS_P (src_mem))
12695 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12696 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12697 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12698 return 1;
12699 }
12700
12701 return cost;
12702 }
12703
12704 int
12705 arm_max_conditional_execute (void)
12706 {
12707 return max_insns_skipped;
12708 }
12709
12710 static int
12711 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12712 {
12713 if (TARGET_32BIT)
12714 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12715 else
12716 return (optimize > 0) ? 2 : 0;
12717 }
12718
12719 static int
12720 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12721 {
12722 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12723 }
12724
12725 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12726 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12727 sequences of non-executed instructions in IT blocks probably take the same
12728 amount of time as executed instructions (and the IT instruction itself takes
12729 space in icache). This function was experimentally determined to give good
12730 results on a popular embedded benchmark. */
12731
12732 static int
12733 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12734 {
12735 return (TARGET_32BIT && speed_p) ? 1
12736 : arm_default_branch_cost (speed_p, predictable_p);
12737 }
12738
12739 static int
12740 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12741 {
12742 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12743 }
12744
12745 static bool fp_consts_inited = false;
12746
12747 static REAL_VALUE_TYPE value_fp0;
12748
12749 static void
12750 init_fp_table (void)
12751 {
12752 REAL_VALUE_TYPE r;
12753
12754 r = REAL_VALUE_ATOF ("0", DFmode);
12755 value_fp0 = r;
12756 fp_consts_inited = true;
12757 }
12758
12759 /* Return TRUE if rtx X is a valid immediate FP constant. */
12760 int
12761 arm_const_double_rtx (rtx x)
12762 {
12763 const REAL_VALUE_TYPE *r;
12764
12765 if (!fp_consts_inited)
12766 init_fp_table ();
12767
12768 r = CONST_DOUBLE_REAL_VALUE (x);
12769 if (REAL_VALUE_MINUS_ZERO (*r))
12770 return 0;
12771
12772 if (real_equal (r, &value_fp0))
12773 return 1;
12774
12775 return 0;
12776 }
12777
12778 /* VFPv3 has a fairly wide range of representable immediates, formed from
12779 "quarter-precision" floating-point values. These can be evaluated using this
12780 formula (with ^ for exponentiation):
12781
12782 -1^s * n * 2^-r
12783
12784 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12785 16 <= n <= 31 and 0 <= r <= 7.
12786
12787 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12788
12789 - A (most-significant) is the sign bit.
12790 - BCD are the exponent (encoded as r XOR 3).
12791 - EFGH are the mantissa (encoded as n - 16).
12792 */
12793
12794 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12795 fconst[sd] instruction, or -1 if X isn't suitable. */
12796 static int
12797 vfp3_const_double_index (rtx x)
12798 {
12799 REAL_VALUE_TYPE r, m;
12800 int sign, exponent;
12801 unsigned HOST_WIDE_INT mantissa, mant_hi;
12802 unsigned HOST_WIDE_INT mask;
12803 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12804 bool fail;
12805
12806 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12807 return -1;
12808
12809 r = *CONST_DOUBLE_REAL_VALUE (x);
12810
12811 /* We can't represent these things, so detect them first. */
12812 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12813 return -1;
12814
12815 /* Extract sign, exponent and mantissa. */
12816 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12817 r = real_value_abs (&r);
12818 exponent = REAL_EXP (&r);
12819 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12820 highest (sign) bit, with a fixed binary point at bit point_pos.
12821 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12822 bits for the mantissa, this may fail (low bits would be lost). */
12823 real_ldexp (&m, &r, point_pos - exponent);
12824 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12825 mantissa = w.elt (0);
12826 mant_hi = w.elt (1);
12827
12828 /* If there are bits set in the low part of the mantissa, we can't
12829 represent this value. */
12830 if (mantissa != 0)
12831 return -1;
12832
12833 /* Now make it so that mantissa contains the most-significant bits, and move
12834 the point_pos to indicate that the least-significant bits have been
12835 discarded. */
12836 point_pos -= HOST_BITS_PER_WIDE_INT;
12837 mantissa = mant_hi;
12838
12839 /* We can permit four significant bits of mantissa only, plus a high bit
12840 which is always 1. */
12841 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12842 if ((mantissa & mask) != 0)
12843 return -1;
12844
12845 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12846 mantissa >>= point_pos - 5;
12847
12848 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12849 floating-point immediate zero with Neon using an integer-zero load, but
12850 that case is handled elsewhere.) */
12851 if (mantissa == 0)
12852 return -1;
12853
12854 gcc_assert (mantissa >= 16 && mantissa <= 31);
12855
12856 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12857 normalized significands are in the range [1, 2). (Our mantissa is shifted
12858 left 4 places at this point relative to normalized IEEE754 values). GCC
12859 internally uses [0.5, 1) (see real.cc), so the exponent returned from
12860 REAL_EXP must be altered. */
12861 exponent = 5 - exponent;
12862
12863 if (exponent < 0 || exponent > 7)
12864 return -1;
12865
12866 /* Sign, mantissa and exponent are now in the correct form to plug into the
12867 formula described in the comment above. */
12868 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12869 }
12870
12871 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12872 int
12873 vfp3_const_double_rtx (rtx x)
12874 {
12875 if (!TARGET_VFP3)
12876 return 0;
12877
12878 return vfp3_const_double_index (x) != -1;
12879 }
12880
12881 /* Recognize immediates which can be used in various Neon and MVE instructions.
12882 Legal immediates are described by the following table (for VMVN variants, the
12883 bitwise inverse of the constant shown is recognized. In either case, VMOV
12884 is output and the correct instruction to use for a given constant is chosen
12885 by the assembler). The constant shown is replicated across all elements of
12886 the destination vector.
12887
12888 insn elems variant constant (binary)
12889 ---- ----- ------- -----------------
12890 vmov i32 0 00000000 00000000 00000000 abcdefgh
12891 vmov i32 1 00000000 00000000 abcdefgh 00000000
12892 vmov i32 2 00000000 abcdefgh 00000000 00000000
12893 vmov i32 3 abcdefgh 00000000 00000000 00000000
12894 vmov i16 4 00000000 abcdefgh
12895 vmov i16 5 abcdefgh 00000000
12896 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12897 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12898 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12899 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12900 vmvn i16 10 00000000 abcdefgh
12901 vmvn i16 11 abcdefgh 00000000
12902 vmov i32 12 00000000 00000000 abcdefgh 11111111
12903 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12904 vmov i32 14 00000000 abcdefgh 11111111 11111111
12905 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12906 vmov i8 16 abcdefgh
12907 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12908 eeeeeeee ffffffff gggggggg hhhhhhhh
12909 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12910 vmov f32 19 00000000 00000000 00000000 00000000
12911
12912 For case 18, B = !b. Representable values are exactly those accepted by
12913 vfp3_const_double_index, but are output as floating-point numbers rather
12914 than indices.
12915
12916 For case 19, we will change it to vmov.i32 when assembling.
12917
12918 Variants 0-5 (inclusive) may also be used as immediates for the second
12919 operand of VORR/VBIC instructions.
12920
12921 The INVERSE argument causes the bitwise inverse of the given operand to be
12922 recognized instead (used for recognizing legal immediates for the VAND/VORN
12923 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12924 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12925 output, rather than the real insns vbic/vorr).
12926
12927 INVERSE makes no difference to the recognition of float vectors.
12928
12929 The return value is the variant of immediate as shown in the above table, or
12930 -1 if the given value doesn't match any of the listed patterns.
12931 */
12932 static int
12933 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12934 rtx *modconst, int *elementwidth)
12935 {
12936 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12937 matches = 1; \
12938 for (i = 0; i < idx; i += (STRIDE)) \
12939 if (!(TEST)) \
12940 matches = 0; \
12941 if (matches) \
12942 { \
12943 immtype = (CLASS); \
12944 elsize = (ELSIZE); \
12945 break; \
12946 }
12947
12948 unsigned int i, elsize = 0, idx = 0, n_elts;
12949 unsigned int innersize;
12950 unsigned char bytes[16] = {};
12951 int immtype = -1, matches;
12952 unsigned int invmask = inverse ? 0xff : 0;
12953 bool vector = GET_CODE (op) == CONST_VECTOR;
12954
12955 if (vector)
12956 n_elts = CONST_VECTOR_NUNITS (op);
12957 else
12958 {
12959 n_elts = 1;
12960 gcc_assert (mode != VOIDmode);
12961 }
12962
12963 innersize = GET_MODE_UNIT_SIZE (mode);
12964
12965 /* Only support 128-bit vectors for MVE. */
12966 if (TARGET_HAVE_MVE
12967 && (!vector
12968 || VALID_MVE_PRED_MODE (mode)
12969 || n_elts * innersize != 16))
12970 return -1;
12971
12972 if (!TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
12973 return -1;
12974
12975 /* Vectors of float constants. */
12976 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12977 {
12978 rtx el0 = CONST_VECTOR_ELT (op, 0);
12979
12980 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12981 return -1;
12982
12983 /* FP16 vectors cannot be represented. */
12984 if (GET_MODE_INNER (mode) == HFmode)
12985 return -1;
12986
12987 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12988 are distinct in this context. */
12989 if (!const_vec_duplicate_p (op))
12990 return -1;
12991
12992 if (modconst)
12993 *modconst = CONST_VECTOR_ELT (op, 0);
12994
12995 if (elementwidth)
12996 *elementwidth = 0;
12997
12998 if (el0 == CONST0_RTX (GET_MODE (el0)))
12999 return 19;
13000 else
13001 return 18;
13002 }
13003
13004 /* The tricks done in the code below apply for little-endian vector layout.
13005 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
13006 FIXME: Implement logic for big-endian vectors. */
13007 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
13008 return -1;
13009
13010 /* Splat vector constant out into a byte vector. */
13011 for (i = 0; i < n_elts; i++)
13012 {
13013 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
13014 unsigned HOST_WIDE_INT elpart;
13015
13016 gcc_assert (CONST_INT_P (el));
13017 elpart = INTVAL (el);
13018
13019 for (unsigned int byte = 0; byte < innersize; byte++)
13020 {
13021 bytes[idx++] = (elpart & 0xff) ^ invmask;
13022 elpart >>= BITS_PER_UNIT;
13023 }
13024 }
13025
13026 /* Sanity check. */
13027 gcc_assert (idx == GET_MODE_SIZE (mode));
13028
13029 do
13030 {
13031 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
13032 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13033
13034 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13035 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13036
13037 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
13038 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13039
13040 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
13041 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
13042
13043 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
13044
13045 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
13046
13047 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
13048 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13049
13050 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13051 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13052
13053 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
13054 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13055
13056 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
13057 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
13058
13059 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
13060
13061 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
13062
13063 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13064 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13065
13066 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13067 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13068
13069 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
13070 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13071
13072 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
13073 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13074
13075 CHECK (1, 8, 16, bytes[i] == bytes[0]);
13076
13077 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
13078 && bytes[i] == bytes[(i + 8) % idx]);
13079 }
13080 while (0);
13081
13082 if (immtype == -1)
13083 return -1;
13084
13085 if (elementwidth)
13086 *elementwidth = elsize;
13087
13088 if (modconst)
13089 {
13090 unsigned HOST_WIDE_INT imm = 0;
13091
13092 /* Un-invert bytes of recognized vector, if necessary. */
13093 if (invmask != 0)
13094 for (i = 0; i < idx; i++)
13095 bytes[i] ^= invmask;
13096
13097 if (immtype == 17)
13098 {
13099 /* FIXME: Broken on 32-bit H_W_I hosts. */
13100 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
13101
13102 for (i = 0; i < 8; i++)
13103 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
13104 << (i * BITS_PER_UNIT);
13105
13106 *modconst = GEN_INT (imm);
13107 }
13108 else
13109 {
13110 unsigned HOST_WIDE_INT imm = 0;
13111
13112 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
13113 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
13114
13115 *modconst = GEN_INT (imm);
13116 }
13117 }
13118
13119 return immtype;
13120 #undef CHECK
13121 }
13122
13123 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13124 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
13125 (or zero for float elements), and a modified constant (whatever should be
13126 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
13127 modified to "simd_immediate_valid_for_move" as this function will be used
13128 both by neon and mve. */
13129 int
13130 simd_immediate_valid_for_move (rtx op, machine_mode mode,
13131 rtx *modconst, int *elementwidth)
13132 {
13133 rtx tmpconst;
13134 int tmpwidth;
13135 int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
13136
13137 if (retval == -1)
13138 return 0;
13139
13140 if (modconst)
13141 *modconst = tmpconst;
13142
13143 if (elementwidth)
13144 *elementwidth = tmpwidth;
13145
13146 return 1;
13147 }
13148
13149 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
13150 the immediate is valid, write a constant suitable for using as an operand
13151 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13152 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
13153
13154 int
13155 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
13156 rtx *modconst, int *elementwidth)
13157 {
13158 rtx tmpconst;
13159 int tmpwidth;
13160 int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
13161
13162 if (retval < 0 || retval > 5)
13163 return 0;
13164
13165 if (modconst)
13166 *modconst = tmpconst;
13167
13168 if (elementwidth)
13169 *elementwidth = tmpwidth;
13170
13171 return 1;
13172 }
13173
13174 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
13175 the immediate is valid, write a constant suitable for using as an operand
13176 to VSHR/VSHL to *MODCONST and the corresponding element width to
13177 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13178 because they have different limitations. */
13179
13180 int
13181 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
13182 rtx *modconst, int *elementwidth,
13183 bool isleftshift)
13184 {
13185 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
13186 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
13187 unsigned HOST_WIDE_INT last_elt = 0;
13188 unsigned HOST_WIDE_INT maxshift;
13189
13190 /* Split vector constant out into a byte vector. */
13191 for (i = 0; i < n_elts; i++)
13192 {
13193 rtx el = CONST_VECTOR_ELT (op, i);
13194 unsigned HOST_WIDE_INT elpart;
13195
13196 if (CONST_INT_P (el))
13197 elpart = INTVAL (el);
13198 else if (CONST_DOUBLE_P (el))
13199 return 0;
13200 else
13201 gcc_unreachable ();
13202
13203 if (i != 0 && elpart != last_elt)
13204 return 0;
13205
13206 last_elt = elpart;
13207 }
13208
13209 /* Shift less than element size. */
13210 maxshift = innersize * 8;
13211
13212 if (isleftshift)
13213 {
13214 /* Left shift immediate value can be from 0 to <size>-1. */
13215 if (last_elt >= maxshift)
13216 return 0;
13217 }
13218 else
13219 {
13220 /* Right shift immediate value can be from 1 to <size>. */
13221 if (last_elt == 0 || last_elt > maxshift)
13222 return 0;
13223 }
13224
13225 if (elementwidth)
13226 *elementwidth = innersize * 8;
13227
13228 if (modconst)
13229 *modconst = CONST_VECTOR_ELT (op, 0);
13230
13231 return 1;
13232 }
13233
13234 /* Return a string suitable for output of Neon immediate logic operation
13235 MNEM. */
13236
13237 char *
13238 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
13239 int inverse, int quad)
13240 {
13241 int width, is_valid;
13242 static char templ[40];
13243
13244 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
13245
13246 gcc_assert (is_valid != 0);
13247
13248 if (quad)
13249 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
13250 else
13251 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
13252
13253 return templ;
13254 }
13255
13256 /* Return a string suitable for output of Neon immediate shift operation
13257 (VSHR or VSHL) MNEM. */
13258
13259 char *
13260 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13261 machine_mode mode, int quad,
13262 bool isleftshift)
13263 {
13264 int width, is_valid;
13265 static char templ[40];
13266
13267 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13268 gcc_assert (is_valid != 0);
13269
13270 if (quad)
13271 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13272 else
13273 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13274
13275 return templ;
13276 }
13277
13278 /* Output a sequence of pairwise operations to implement a reduction.
13279 NOTE: We do "too much work" here, because pairwise operations work on two
13280 registers-worth of operands in one go. Unfortunately we can't exploit those
13281 extra calculations to do the full operation in fewer steps, I don't think.
13282 Although all vector elements of the result but the first are ignored, we
13283 actually calculate the same result in each of the elements. An alternative
13284 such as initially loading a vector with zero to use as each of the second
13285 operands would use up an additional register and take an extra instruction,
13286 for no particular gain. */
13287
13288 void
13289 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13290 rtx (*reduc) (rtx, rtx, rtx))
13291 {
13292 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13293 rtx tmpsum = op1;
13294
13295 for (i = parts / 2; i >= 1; i /= 2)
13296 {
13297 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13298 emit_insn (reduc (dest, tmpsum, tmpsum));
13299 tmpsum = dest;
13300 }
13301 }
13302
13303 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13304 loaded into a register using VDUP.
13305
13306 If this is the case, and GENERATE is set, we also generate
13307 instructions to do this and return an RTX to assign to the register. */
13308
13309 static rtx
13310 neon_vdup_constant (rtx vals, bool generate)
13311 {
13312 machine_mode mode = GET_MODE (vals);
13313 machine_mode inner_mode = GET_MODE_INNER (mode);
13314 rtx x;
13315
13316 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13317 return NULL_RTX;
13318
13319 if (!const_vec_duplicate_p (vals, &x))
13320 /* The elements are not all the same. We could handle repeating
13321 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13322 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13323 vdup.i16). */
13324 return NULL_RTX;
13325
13326 if (!generate)
13327 return x;
13328
13329 /* We can load this constant by using VDUP and a constant in a
13330 single ARM register. This will be cheaper than a vector
13331 load. */
13332
13333 x = copy_to_mode_reg (inner_mode, x);
13334 return gen_vec_duplicate (mode, x);
13335 }
13336
13337 /* Return a HI representation of CONST_VEC suitable for MVE predicates. */
13338 rtx
13339 mve_bool_vec_to_const (rtx const_vec)
13340 {
13341 machine_mode mode = GET_MODE (const_vec);
13342
13343 if (!VECTOR_MODE_P (mode))
13344 return const_vec;
13345
13346 unsigned n_elts = GET_MODE_NUNITS (mode);
13347 unsigned el_prec = GET_MODE_PRECISION (GET_MODE_INNER (mode));
13348 unsigned shift_c = 16 / n_elts;
13349 unsigned i;
13350 int hi_val = 0;
13351
13352 for (i = 0; i < n_elts; i++)
13353 {
13354 rtx el = CONST_VECTOR_ELT (const_vec, i);
13355 unsigned HOST_WIDE_INT elpart;
13356
13357 gcc_assert (CONST_INT_P (el));
13358 elpart = INTVAL (el) & ((1U << el_prec) - 1);
13359
13360 unsigned index = BYTES_BIG_ENDIAN ? n_elts - i - 1 : i;
13361
13362 hi_val |= elpart << (index * shift_c);
13363 }
13364 /* We are using mov immediate to encode this constant which writes 32-bits
13365 so we need to make sure the top 16-bits are all 0, otherwise we can't
13366 guarantee we can actually write this immediate. */
13367 return gen_int_mode (hi_val, SImode);
13368 }
13369
13370 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13371 constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13372 into a register.
13373
13374 If this is the case, and GENERATE is set, we also generate code to do
13375 this and return an RTX to copy into the register. */
13376
13377 rtx
13378 neon_make_constant (rtx vals, bool generate)
13379 {
13380 machine_mode mode = GET_MODE (vals);
13381 rtx target;
13382 rtx const_vec = NULL_RTX;
13383 int n_elts = GET_MODE_NUNITS (mode);
13384 int n_const = 0;
13385 int i;
13386
13387 if (GET_CODE (vals) == CONST_VECTOR)
13388 const_vec = vals;
13389 else if (GET_CODE (vals) == PARALLEL)
13390 {
13391 /* A CONST_VECTOR must contain only CONST_INTs and
13392 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13393 Only store valid constants in a CONST_VECTOR. */
13394 for (i = 0; i < n_elts; ++i)
13395 {
13396 rtx x = XVECEXP (vals, 0, i);
13397 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13398 n_const++;
13399 }
13400 if (n_const == n_elts)
13401 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13402 }
13403 else
13404 gcc_unreachable ();
13405
13406 if (const_vec != NULL
13407 && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13408 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13409 return const_vec;
13410 else if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE(mode))
13411 return mve_bool_vec_to_const (const_vec);
13412 else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13413 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13414 pipeline cycle; creating the constant takes one or two ARM
13415 pipeline cycles. */
13416 return target;
13417 else if (const_vec != NULL_RTX)
13418 /* Load from constant pool. On Cortex-A8 this takes two cycles
13419 (for either double or quad vectors). We cannot take advantage
13420 of single-cycle VLD1 because we need a PC-relative addressing
13421 mode. */
13422 return arm_disable_literal_pool ? NULL_RTX : const_vec;
13423 else
13424 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13425 We cannot construct an initializer. */
13426 return NULL_RTX;
13427 }
13428
13429 /* Initialize vector TARGET to VALS. */
13430
13431 void
13432 neon_expand_vector_init (rtx target, rtx vals)
13433 {
13434 machine_mode mode = GET_MODE (target);
13435 machine_mode inner_mode = GET_MODE_INNER (mode);
13436 int n_elts = GET_MODE_NUNITS (mode);
13437 int n_var = 0, one_var = -1;
13438 bool all_same = true;
13439 rtx x, mem;
13440 int i;
13441
13442 for (i = 0; i < n_elts; ++i)
13443 {
13444 x = XVECEXP (vals, 0, i);
13445 if (!CONSTANT_P (x))
13446 ++n_var, one_var = i;
13447
13448 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13449 all_same = false;
13450 }
13451
13452 if (n_var == 0)
13453 {
13454 rtx constant = neon_make_constant (vals);
13455 if (constant != NULL_RTX)
13456 {
13457 emit_move_insn (target, constant);
13458 return;
13459 }
13460 }
13461
13462 /* Splat a single non-constant element if we can. */
13463 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13464 {
13465 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13466 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13467 return;
13468 }
13469
13470 /* One field is non-constant. Load constant then overwrite varying
13471 field. This is more efficient than using the stack. */
13472 if (n_var == 1)
13473 {
13474 rtx copy = copy_rtx (vals);
13475 rtx merge_mask = GEN_INT (1 << one_var);
13476
13477 /* Load constant part of vector, substitute neighboring value for
13478 varying element. */
13479 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13480 neon_expand_vector_init (target, copy);
13481
13482 /* Insert variable. */
13483 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13484 emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13485 return;
13486 }
13487
13488 /* Construct the vector in memory one field at a time
13489 and load the whole vector. */
13490 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13491 for (i = 0; i < n_elts; i++)
13492 emit_move_insn (adjust_address_nv (mem, inner_mode,
13493 i * GET_MODE_SIZE (inner_mode)),
13494 XVECEXP (vals, 0, i));
13495 emit_move_insn (target, mem);
13496 }
13497
13498 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13499 ERR if it doesn't. EXP indicates the source location, which includes the
13500 inlining history for intrinsics. */
13501
13502 static void
13503 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13504 const_tree exp, const char *desc)
13505 {
13506 HOST_WIDE_INT lane;
13507
13508 gcc_assert (CONST_INT_P (operand));
13509
13510 lane = INTVAL (operand);
13511
13512 if (lane < low || lane >= high)
13513 {
13514 if (exp)
13515 error_at (EXPR_LOCATION (exp),
13516 "%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13517 else
13518 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13519 }
13520 }
13521
13522 /* Bounds-check lanes. */
13523
13524 void
13525 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13526 const_tree exp)
13527 {
13528 bounds_check (operand, low, high, exp, "lane");
13529 }
13530
13531 /* Bounds-check constants. */
13532
13533 void
13534 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13535 {
13536 bounds_check (operand, low, high, NULL_TREE, "constant");
13537 }
13538
13539 HOST_WIDE_INT
13540 neon_element_bits (machine_mode mode)
13541 {
13542 return GET_MODE_UNIT_BITSIZE (mode);
13543 }
13544
13545 \f
13546 /* Predicates for `match_operand' and `match_operator'. */
13547
13548 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13549 WB level is 2 if full writeback address modes are allowed, 1
13550 if limited writeback address modes (POST_INC and PRE_DEC) are
13551 allowed and 0 if no writeback at all is supported. */
13552
13553 int
13554 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13555 {
13556 gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13557 rtx ind;
13558
13559 /* Reject eliminable registers. */
13560 if (! (reload_in_progress || reload_completed || lra_in_progress)
13561 && ( reg_mentioned_p (frame_pointer_rtx, op)
13562 || reg_mentioned_p (arg_pointer_rtx, op)
13563 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13564 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13565 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13566 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13567 return FALSE;
13568
13569 /* Constants are converted into offsets from labels. */
13570 if (!MEM_P (op))
13571 return FALSE;
13572
13573 ind = XEXP (op, 0);
13574
13575 if (reload_completed
13576 && (LABEL_REF_P (ind)
13577 || (GET_CODE (ind) == CONST
13578 && GET_CODE (XEXP (ind, 0)) == PLUS
13579 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13580 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13581 return TRUE;
13582
13583 /* Match: (mem (reg)). */
13584 if (REG_P (ind))
13585 return arm_address_register_rtx_p (ind, 0);
13586
13587 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13588 acceptable in any case (subject to verification by
13589 arm_address_register_rtx_p). We need full writeback to accept
13590 PRE_INC and POST_DEC, and at least restricted writeback for
13591 PRE_INC and POST_DEC. */
13592 if (wb_level > 0
13593 && (GET_CODE (ind) == POST_INC
13594 || GET_CODE (ind) == PRE_DEC
13595 || (wb_level > 1
13596 && (GET_CODE (ind) == PRE_INC
13597 || GET_CODE (ind) == POST_DEC))))
13598 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13599
13600 if (wb_level > 1
13601 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13602 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13603 && GET_CODE (XEXP (ind, 1)) == PLUS
13604 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13605 ind = XEXP (ind, 1);
13606
13607 /* Match:
13608 (plus (reg)
13609 (const))
13610
13611 The encoded immediate for 16-bit modes is multiplied by 2,
13612 while the encoded immediate for 32-bit and 64-bit modes is
13613 multiplied by 4. */
13614 int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13615 if (GET_CODE (ind) == PLUS
13616 && REG_P (XEXP (ind, 0))
13617 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13618 && CONST_INT_P (XEXP (ind, 1))
13619 && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13620 && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13621 return TRUE;
13622
13623 return FALSE;
13624 }
13625
13626 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13627 WB is true if full writeback address modes are allowed and is false
13628 if limited writeback address modes (POST_INC and PRE_DEC) are
13629 allowed. */
13630
13631 int arm_coproc_mem_operand (rtx op, bool wb)
13632 {
13633 return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13634 }
13635
13636 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13637 context in which no writeback address modes are allowed. */
13638
13639 int
13640 arm_coproc_mem_operand_no_writeback (rtx op)
13641 {
13642 return arm_coproc_mem_operand_wb (op, 0);
13643 }
13644
13645 /* This function returns TRUE on matching mode and op.
13646 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13647 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13648 int
13649 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13650 {
13651 enum rtx_code code;
13652 int val, reg_no;
13653
13654 /* Match: (mem (reg)). */
13655 if (REG_P (op))
13656 {
13657 int reg_no = REGNO (op);
13658 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13659 ? reg_no <= LAST_LO_REGNUM
13660 : reg_no < LAST_ARM_REGNUM)
13661 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13662 }
13663 code = GET_CODE (op);
13664
13665 if (code == POST_INC || code == PRE_DEC
13666 || code == PRE_INC || code == POST_DEC)
13667 {
13668 reg_no = REGNO (XEXP (op, 0));
13669 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13670 ? reg_no <= LAST_LO_REGNUM
13671 :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13672 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13673 }
13674 else if (((code == POST_MODIFY || code == PRE_MODIFY)
13675 && GET_CODE (XEXP (op, 1)) == PLUS
13676 && XEXP (op, 0) == XEXP (XEXP (op, 1), 0)
13677 && REG_P (XEXP (op, 0))
13678 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT)
13679 /* Make sure to only accept PLUS after reload_completed, otherwise
13680 this will interfere with auto_inc's pattern detection. */
13681 || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
13682 && GET_CODE (XEXP (op, 1)) == CONST_INT))
13683 {
13684 reg_no = REGNO (XEXP (op, 0));
13685 if (code == PLUS)
13686 val = INTVAL (XEXP (op, 1));
13687 else
13688 val = INTVAL (XEXP(XEXP (op, 1), 1));
13689
13690 switch (mode)
13691 {
13692 case E_V16QImode:
13693 case E_V8QImode:
13694 case E_V4QImode:
13695 if (abs (val) > 127)
13696 return FALSE;
13697 break;
13698 case E_V8HImode:
13699 case E_V8HFmode:
13700 case E_V4HImode:
13701 case E_V4HFmode:
13702 if (val % 2 != 0 || abs (val) > 254)
13703 return FALSE;
13704 break;
13705 case E_V4SImode:
13706 case E_V4SFmode:
13707 if (val % 4 != 0 || abs (val) > 508)
13708 return FALSE;
13709 break;
13710 default:
13711 return FALSE;
13712 }
13713 return ((!strict && reg_no >= FIRST_PSEUDO_REGISTER)
13714 || (MVE_STN_LDW_MODE (mode)
13715 ? reg_no <= LAST_LO_REGNUM
13716 : (reg_no < LAST_ARM_REGNUM
13717 && (code == PLUS || reg_no != SP_REGNUM))));
13718 }
13719 return FALSE;
13720 }
13721
13722 /* Return TRUE if OP is a memory operand which we can load or store a vector
13723 to/from. TYPE is one of the following values:
13724 0 - Vector load/stor (vldr)
13725 1 - Core registers (ldm)
13726 2 - Element/structure loads (vld1)
13727 */
13728 int
13729 neon_vector_mem_operand (rtx op, int type, bool strict)
13730 {
13731 rtx ind;
13732
13733 /* Reject eliminable registers. */
13734 if (strict && ! (reload_in_progress || reload_completed)
13735 && (reg_mentioned_p (frame_pointer_rtx, op)
13736 || reg_mentioned_p (arg_pointer_rtx, op)
13737 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13738 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13739 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13740 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13741 return FALSE;
13742
13743 /* Constants are converted into offsets from labels. */
13744 if (!MEM_P (op))
13745 return FALSE;
13746
13747 ind = XEXP (op, 0);
13748
13749 if (reload_completed
13750 && (LABEL_REF_P (ind)
13751 || (GET_CODE (ind) == CONST
13752 && GET_CODE (XEXP (ind, 0)) == PLUS
13753 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13754 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13755 return TRUE;
13756
13757 /* Match: (mem (reg)). */
13758 if (REG_P (ind))
13759 return arm_address_register_rtx_p (ind, 0);
13760
13761 /* Allow post-increment with Neon registers. */
13762 if ((type != 1 && GET_CODE (ind) == POST_INC)
13763 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13764 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13765
13766 /* Allow post-increment by register for VLDn */
13767 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13768 && GET_CODE (XEXP (ind, 1)) == PLUS
13769 && REG_P (XEXP (XEXP (ind, 1), 1))
13770 && REG_P (XEXP (ind, 0))
13771 && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13772 return true;
13773
13774 /* Match:
13775 (plus (reg)
13776 (const)). */
13777 if (type == 0
13778 && GET_CODE (ind) == PLUS
13779 && REG_P (XEXP (ind, 0))
13780 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13781 && CONST_INT_P (XEXP (ind, 1))
13782 && INTVAL (XEXP (ind, 1)) > -1024
13783 /* For quad modes, we restrict the constant offset to be slightly less
13784 than what the instruction format permits. We have no such constraint
13785 on double mode offsets. (This must match arm_legitimate_index_p.) */
13786 && (INTVAL (XEXP (ind, 1))
13787 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13788 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13789 return TRUE;
13790
13791 return FALSE;
13792 }
13793
13794 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13795 type. */
13796 int
13797 mve_struct_mem_operand (rtx op)
13798 {
13799 rtx ind = XEXP (op, 0);
13800
13801 /* Match: (mem (reg)). */
13802 if (REG_P (ind))
13803 return arm_address_register_rtx_p (ind, 0);
13804
13805 /* Allow only post-increment by the mode size. */
13806 if (GET_CODE (ind) == POST_INC)
13807 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13808
13809 return FALSE;
13810 }
13811
13812 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13813 type. */
13814 int
13815 neon_struct_mem_operand (rtx op)
13816 {
13817 rtx ind;
13818
13819 /* Reject eliminable registers. */
13820 if (! (reload_in_progress || reload_completed)
13821 && ( reg_mentioned_p (frame_pointer_rtx, op)
13822 || reg_mentioned_p (arg_pointer_rtx, op)
13823 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13824 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13825 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13826 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13827 return FALSE;
13828
13829 /* Constants are converted into offsets from labels. */
13830 if (!MEM_P (op))
13831 return FALSE;
13832
13833 ind = XEXP (op, 0);
13834
13835 if (reload_completed
13836 && (LABEL_REF_P (ind)
13837 || (GET_CODE (ind) == CONST
13838 && GET_CODE (XEXP (ind, 0)) == PLUS
13839 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13840 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13841 return TRUE;
13842
13843 /* Match: (mem (reg)). */
13844 if (REG_P (ind))
13845 return arm_address_register_rtx_p (ind, 0);
13846
13847 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13848 if (GET_CODE (ind) == POST_INC
13849 || GET_CODE (ind) == PRE_DEC)
13850 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13851
13852 return FALSE;
13853 }
13854
13855 /* Prepares the operands for the VCMLA by lane instruction such that the right
13856 register number is selected. This instruction is special in that it always
13857 requires a D register, however there is a choice to be made between Dn[0],
13858 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13859
13860 The VCMLA by lane function always selects two values. For instance given D0
13861 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13862 used by the instruction. However given V4SF then index 0 and 1 are valid as
13863 D0[0] or D1[0] are both valid.
13864
13865 This function centralizes that information based on OPERANDS, OPERANDS[3]
13866 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13867 updated to contain the right index. */
13868
13869 rtx *
13870 neon_vcmla_lane_prepare_operands (rtx *operands)
13871 {
13872 int lane = INTVAL (operands[4]);
13873 machine_mode constmode = SImode;
13874 machine_mode mode = GET_MODE (operands[3]);
13875 int regno = REGNO (operands[3]);
13876 regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13877 if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13878 {
13879 operands[3] = gen_int_mode (regno + 1, constmode);
13880 operands[4]
13881 = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13882 }
13883 else
13884 {
13885 operands[3] = gen_int_mode (regno, constmode);
13886 operands[4] = gen_int_mode (lane, constmode);
13887 }
13888 return operands;
13889 }
13890
13891
13892 /* Return true if X is a register that will be eliminated later on. */
13893 int
13894 arm_eliminable_register (rtx x)
13895 {
13896 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13897 || REGNO (x) == ARG_POINTER_REGNUM
13898 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13899 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13900 }
13901
13902 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13903 coprocessor registers. Otherwise return NO_REGS. */
13904
13905 enum reg_class
13906 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13907 {
13908 if (mode == HFmode)
13909 {
13910 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13911 return GENERAL_REGS;
13912 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13913 return NO_REGS;
13914 return GENERAL_REGS;
13915 }
13916
13917 /* The neon move patterns handle all legitimate vector and struct
13918 addresses. */
13919 if (TARGET_NEON
13920 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13921 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13922 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13923 || VALID_NEON_STRUCT_MODE (mode)))
13924 return NO_REGS;
13925
13926 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13927 return NO_REGS;
13928
13929 return GENERAL_REGS;
13930 }
13931
13932 /* Values which must be returned in the most-significant end of the return
13933 register. */
13934
13935 static bool
13936 arm_return_in_msb (const_tree valtype)
13937 {
13938 return (TARGET_AAPCS_BASED
13939 && BYTES_BIG_ENDIAN
13940 && (AGGREGATE_TYPE_P (valtype)
13941 || TREE_CODE (valtype) == COMPLEX_TYPE
13942 || FIXED_POINT_TYPE_P (valtype)));
13943 }
13944
13945 /* Return TRUE if X references a SYMBOL_REF. */
13946 int
13947 symbol_mentioned_p (rtx x)
13948 {
13949 const char * fmt;
13950 int i;
13951
13952 if (SYMBOL_REF_P (x))
13953 return 1;
13954
13955 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13956 are constant offsets, not symbols. */
13957 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13958 return 0;
13959
13960 fmt = GET_RTX_FORMAT (GET_CODE (x));
13961
13962 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13963 {
13964 if (fmt[i] == 'E')
13965 {
13966 int j;
13967
13968 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13969 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13970 return 1;
13971 }
13972 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13973 return 1;
13974 }
13975
13976 return 0;
13977 }
13978
13979 /* Return TRUE if X references a LABEL_REF. */
13980 int
13981 label_mentioned_p (rtx x)
13982 {
13983 const char * fmt;
13984 int i;
13985
13986 if (LABEL_REF_P (x))
13987 return 1;
13988
13989 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13990 instruction, but they are constant offsets, not symbols. */
13991 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13992 return 0;
13993
13994 fmt = GET_RTX_FORMAT (GET_CODE (x));
13995 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13996 {
13997 if (fmt[i] == 'E')
13998 {
13999 int j;
14000
14001 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
14002 if (label_mentioned_p (XVECEXP (x, i, j)))
14003 return 1;
14004 }
14005 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
14006 return 1;
14007 }
14008
14009 return 0;
14010 }
14011
14012 int
14013 tls_mentioned_p (rtx x)
14014 {
14015 switch (GET_CODE (x))
14016 {
14017 case CONST:
14018 return tls_mentioned_p (XEXP (x, 0));
14019
14020 case UNSPEC:
14021 if (XINT (x, 1) == UNSPEC_TLS)
14022 return 1;
14023
14024 /* Fall through. */
14025 default:
14026 return 0;
14027 }
14028 }
14029
14030 /* Must not copy any rtx that uses a pc-relative address.
14031 Also, disallow copying of load-exclusive instructions that
14032 may appear after splitting of compare-and-swap-style operations
14033 so as to prevent those loops from being transformed away from their
14034 canonical forms (see PR 69904). */
14035
14036 static bool
14037 arm_cannot_copy_insn_p (rtx_insn *insn)
14038 {
14039 /* The tls call insn cannot be copied, as it is paired with a data
14040 word. */
14041 if (recog_memoized (insn) == CODE_FOR_tlscall)
14042 return true;
14043
14044 subrtx_iterator::array_type array;
14045 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
14046 {
14047 const_rtx x = *iter;
14048 if (GET_CODE (x) == UNSPEC
14049 && (XINT (x, 1) == UNSPEC_PIC_BASE
14050 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
14051 return true;
14052 }
14053
14054 rtx set = single_set (insn);
14055 if (set)
14056 {
14057 rtx src = SET_SRC (set);
14058 if (GET_CODE (src) == ZERO_EXTEND)
14059 src = XEXP (src, 0);
14060
14061 /* Catch the load-exclusive and load-acquire operations. */
14062 if (GET_CODE (src) == UNSPEC_VOLATILE
14063 && (XINT (src, 1) == VUNSPEC_LL
14064 || XINT (src, 1) == VUNSPEC_LAX))
14065 return true;
14066 }
14067 return false;
14068 }
14069
14070 enum rtx_code
14071 minmax_code (rtx x)
14072 {
14073 enum rtx_code code = GET_CODE (x);
14074
14075 switch (code)
14076 {
14077 case SMAX:
14078 return GE;
14079 case SMIN:
14080 return LE;
14081 case UMIN:
14082 return LEU;
14083 case UMAX:
14084 return GEU;
14085 default:
14086 gcc_unreachable ();
14087 }
14088 }
14089
14090 /* Match pair of min/max operators that can be implemented via usat/ssat. */
14091
14092 bool
14093 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
14094 int *mask, bool *signed_sat)
14095 {
14096 /* The high bound must be a power of two minus one. */
14097 int log = exact_log2 (INTVAL (hi_bound) + 1);
14098 if (log == -1)
14099 return false;
14100
14101 /* The low bound is either zero (for usat) or one less than the
14102 negation of the high bound (for ssat). */
14103 if (INTVAL (lo_bound) == 0)
14104 {
14105 if (mask)
14106 *mask = log;
14107 if (signed_sat)
14108 *signed_sat = false;
14109
14110 return true;
14111 }
14112
14113 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
14114 {
14115 if (mask)
14116 *mask = log + 1;
14117 if (signed_sat)
14118 *signed_sat = true;
14119
14120 return true;
14121 }
14122
14123 return false;
14124 }
14125
14126 /* Return 1 if memory locations are adjacent. */
14127 int
14128 adjacent_mem_locations (rtx a, rtx b)
14129 {
14130 /* We don't guarantee to preserve the order of these memory refs. */
14131 if (volatile_refs_p (a) || volatile_refs_p (b))
14132 return 0;
14133
14134 if ((REG_P (XEXP (a, 0))
14135 || (GET_CODE (XEXP (a, 0)) == PLUS
14136 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
14137 && (REG_P (XEXP (b, 0))
14138 || (GET_CODE (XEXP (b, 0)) == PLUS
14139 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
14140 {
14141 HOST_WIDE_INT val0 = 0, val1 = 0;
14142 rtx reg0, reg1;
14143 int val_diff;
14144
14145 if (GET_CODE (XEXP (a, 0)) == PLUS)
14146 {
14147 reg0 = XEXP (XEXP (a, 0), 0);
14148 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
14149 }
14150 else
14151 reg0 = XEXP (a, 0);
14152
14153 if (GET_CODE (XEXP (b, 0)) == PLUS)
14154 {
14155 reg1 = XEXP (XEXP (b, 0), 0);
14156 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
14157 }
14158 else
14159 reg1 = XEXP (b, 0);
14160
14161 /* Don't accept any offset that will require multiple
14162 instructions to handle, since this would cause the
14163 arith_adjacentmem pattern to output an overlong sequence. */
14164 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
14165 return 0;
14166
14167 /* Don't allow an eliminable register: register elimination can make
14168 the offset too large. */
14169 if (arm_eliminable_register (reg0))
14170 return 0;
14171
14172 val_diff = val1 - val0;
14173
14174 if (arm_ld_sched)
14175 {
14176 /* If the target has load delay slots, then there's no benefit
14177 to using an ldm instruction unless the offset is zero and
14178 we are optimizing for size. */
14179 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
14180 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
14181 && (val_diff == 4 || val_diff == -4));
14182 }
14183
14184 return ((REGNO (reg0) == REGNO (reg1))
14185 && (val_diff == 4 || val_diff == -4));
14186 }
14187
14188 return 0;
14189 }
14190
14191 /* Return true if OP is a valid load or store multiple operation. LOAD is true
14192 for load operations, false for store operations. CONSECUTIVE is true
14193 if the register numbers in the operation must be consecutive in the register
14194 bank. RETURN_PC is true if value is to be loaded in PC.
14195 The pattern we are trying to match for load is:
14196 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14197 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14198 :
14199 :
14200 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14201 ]
14202 where
14203 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14204 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14205 3. If consecutive is TRUE, then for kth register being loaded,
14206 REGNO (R_dk) = REGNO (R_d0) + k.
14207 The pattern for store is similar. */
14208 bool
14209 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
14210 bool consecutive, bool return_pc)
14211 {
14212 HOST_WIDE_INT count = XVECLEN (op, 0);
14213 rtx reg, mem, addr;
14214 unsigned regno;
14215 unsigned first_regno;
14216 HOST_WIDE_INT i = 1, base = 0, offset = 0;
14217 rtx elt;
14218 bool addr_reg_in_reglist = false;
14219 bool update = false;
14220 int reg_increment;
14221 int offset_adj;
14222 int regs_per_val;
14223
14224 /* If not in SImode, then registers must be consecutive
14225 (e.g., VLDM instructions for DFmode). */
14226 gcc_assert ((mode == SImode) || consecutive);
14227 /* Setting return_pc for stores is illegal. */
14228 gcc_assert (!return_pc || load);
14229
14230 /* Set up the increments and the regs per val based on the mode. */
14231 reg_increment = GET_MODE_SIZE (mode);
14232 regs_per_val = reg_increment / 4;
14233 offset_adj = return_pc ? 1 : 0;
14234
14235 if (count <= 1
14236 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
14237 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
14238 return false;
14239
14240 /* Check if this is a write-back. */
14241 elt = XVECEXP (op, 0, offset_adj);
14242 if (GET_CODE (SET_SRC (elt)) == PLUS)
14243 {
14244 i++;
14245 base = 1;
14246 update = true;
14247
14248 /* The offset adjustment must be the number of registers being
14249 popped times the size of a single register. */
14250 if (!REG_P (SET_DEST (elt))
14251 || !REG_P (XEXP (SET_SRC (elt), 0))
14252 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
14253 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
14254 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
14255 ((count - 1 - offset_adj) * reg_increment))
14256 return false;
14257 }
14258
14259 i = i + offset_adj;
14260 base = base + offset_adj;
14261 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14262 success depends on the type: VLDM can do just one reg,
14263 LDM must do at least two. */
14264 if ((count <= i) && (mode == SImode))
14265 return false;
14266
14267 elt = XVECEXP (op, 0, i - 1);
14268 if (GET_CODE (elt) != SET)
14269 return false;
14270
14271 if (load)
14272 {
14273 reg = SET_DEST (elt);
14274 mem = SET_SRC (elt);
14275 }
14276 else
14277 {
14278 reg = SET_SRC (elt);
14279 mem = SET_DEST (elt);
14280 }
14281
14282 if (!REG_P (reg) || !MEM_P (mem))
14283 return false;
14284
14285 regno = REGNO (reg);
14286 first_regno = regno;
14287 addr = XEXP (mem, 0);
14288 if (GET_CODE (addr) == PLUS)
14289 {
14290 if (!CONST_INT_P (XEXP (addr, 1)))
14291 return false;
14292
14293 offset = INTVAL (XEXP (addr, 1));
14294 addr = XEXP (addr, 0);
14295 }
14296
14297 if (!REG_P (addr))
14298 return false;
14299
14300 /* Don't allow SP to be loaded unless it is also the base register. It
14301 guarantees that SP is reset correctly when an LDM instruction
14302 is interrupted. Otherwise, we might end up with a corrupt stack. */
14303 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14304 return false;
14305
14306 if (regno == REGNO (addr))
14307 addr_reg_in_reglist = true;
14308
14309 for (; i < count; i++)
14310 {
14311 elt = XVECEXP (op, 0, i);
14312 if (GET_CODE (elt) != SET)
14313 return false;
14314
14315 if (load)
14316 {
14317 reg = SET_DEST (elt);
14318 mem = SET_SRC (elt);
14319 }
14320 else
14321 {
14322 reg = SET_SRC (elt);
14323 mem = SET_DEST (elt);
14324 }
14325
14326 if (!REG_P (reg)
14327 || GET_MODE (reg) != mode
14328 || REGNO (reg) <= regno
14329 || (consecutive
14330 && (REGNO (reg) !=
14331 (unsigned int) (first_regno + regs_per_val * (i - base))))
14332 /* Don't allow SP to be loaded unless it is also the base register. It
14333 guarantees that SP is reset correctly when an LDM instruction
14334 is interrupted. Otherwise, we might end up with a corrupt stack. */
14335 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14336 || !MEM_P (mem)
14337 || GET_MODE (mem) != mode
14338 || ((GET_CODE (XEXP (mem, 0)) != PLUS
14339 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14340 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14341 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14342 offset + (i - base) * reg_increment))
14343 && (!REG_P (XEXP (mem, 0))
14344 || offset + (i - base) * reg_increment != 0)))
14345 return false;
14346
14347 regno = REGNO (reg);
14348 if (regno == REGNO (addr))
14349 addr_reg_in_reglist = true;
14350 }
14351
14352 if (load)
14353 {
14354 if (update && addr_reg_in_reglist)
14355 return false;
14356
14357 /* For Thumb-1, address register is always modified - either by write-back
14358 or by explicit load. If the pattern does not describe an update,
14359 then the address register must be in the list of loaded registers. */
14360 if (TARGET_THUMB1)
14361 return update || addr_reg_in_reglist;
14362 }
14363
14364 return true;
14365 }
14366
14367 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14368 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14369 following form:
14370
14371 [(set (reg:SI <N>) (const_int 0))
14372 (set (reg:SI <M>) (const_int 0))
14373 ...
14374 (unspec_volatile [(const_int 0)]
14375 VUNSPEC_CLRM_APSR)
14376 (clobber (reg:CC CC_REGNUM))
14377 ]
14378
14379 Any number (including 0) of set expressions is valid, the volatile unspec is
14380 optional. All registers but SP and PC are allowed and registers must be in
14381 strict increasing order.
14382
14383 To be a valid VSCCLRM pattern, OP must have the following form:
14384
14385 [(unspec_volatile [(const_int 0)]
14386 VUNSPEC_VSCCLRM_VPR)
14387 (set (reg:SF <N>) (const_int 0))
14388 (set (reg:SF <M>) (const_int 0))
14389 ...
14390 ]
14391
14392 As with CLRM, any number (including 0) of set expressions is valid, however
14393 the volatile unspec is mandatory here. Any VFP single-precision register is
14394 accepted but all registers must be consecutive and in increasing order. */
14395
14396 bool
14397 clear_operation_p (rtx op, bool vfp)
14398 {
14399 unsigned regno;
14400 unsigned last_regno = INVALID_REGNUM;
14401 rtx elt, reg, zero;
14402 int count = XVECLEN (op, 0);
14403 int first_set = vfp ? 1 : 0;
14404 machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14405
14406 for (int i = first_set; i < count; i++)
14407 {
14408 elt = XVECEXP (op, 0, i);
14409
14410 if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14411 {
14412 if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14413 || XVECLEN (elt, 0) != 1
14414 || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14415 || i != count - 2)
14416 return false;
14417
14418 continue;
14419 }
14420
14421 if (GET_CODE (elt) == CLOBBER)
14422 continue;
14423
14424 if (GET_CODE (elt) != SET)
14425 return false;
14426
14427 reg = SET_DEST (elt);
14428 zero = SET_SRC (elt);
14429
14430 if (!REG_P (reg)
14431 || GET_MODE (reg) != expected_mode
14432 || zero != CONST0_RTX (SImode))
14433 return false;
14434
14435 regno = REGNO (reg);
14436
14437 if (vfp)
14438 {
14439 if (i != first_set && regno != last_regno + 1)
14440 return false;
14441 }
14442 else
14443 {
14444 if (regno == SP_REGNUM || regno == PC_REGNUM)
14445 return false;
14446 if (i != first_set && regno <= last_regno)
14447 return false;
14448 }
14449
14450 last_regno = regno;
14451 }
14452
14453 return true;
14454 }
14455
14456 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14457 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14458 instruction. ADD_OFFSET is nonzero if the base address register needs
14459 to be modified with an add instruction before we can use it. */
14460
14461 static bool
14462 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14463 int nops, HOST_WIDE_INT add_offset)
14464 {
14465 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14466 if the offset isn't small enough. The reason 2 ldrs are faster
14467 is because these ARMs are able to do more than one cache access
14468 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14469 whilst the ARM8 has a double bandwidth cache. This means that
14470 these cores can do both an instruction fetch and a data fetch in
14471 a single cycle, so the trick of calculating the address into a
14472 scratch register (one of the result regs) and then doing a load
14473 multiple actually becomes slower (and no smaller in code size).
14474 That is the transformation
14475
14476 ldr rd1, [rbase + offset]
14477 ldr rd2, [rbase + offset + 4]
14478
14479 to
14480
14481 add rd1, rbase, offset
14482 ldmia rd1, {rd1, rd2}
14483
14484 produces worse code -- '3 cycles + any stalls on rd2' instead of
14485 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14486 access per cycle, the first sequence could never complete in less
14487 than 6 cycles, whereas the ldm sequence would only take 5 and
14488 would make better use of sequential accesses if not hitting the
14489 cache.
14490
14491 We cheat here and test 'arm_ld_sched' which we currently know to
14492 only be true for the ARM8, ARM9 and StrongARM. If this ever
14493 changes, then the test below needs to be reworked. */
14494 if (nops == 2 && arm_ld_sched && add_offset != 0)
14495 return false;
14496
14497 /* XScale has load-store double instructions, but they have stricter
14498 alignment requirements than load-store multiple, so we cannot
14499 use them.
14500
14501 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14502 the pipeline until completion.
14503
14504 NREGS CYCLES
14505 1 3
14506 2 4
14507 3 5
14508 4 6
14509
14510 An ldr instruction takes 1-3 cycles, but does not block the
14511 pipeline.
14512
14513 NREGS CYCLES
14514 1 1-3
14515 2 2-6
14516 3 3-9
14517 4 4-12
14518
14519 Best case ldr will always win. However, the more ldr instructions
14520 we issue, the less likely we are to be able to schedule them well.
14521 Using ldr instructions also increases code size.
14522
14523 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14524 for counts of 3 or 4 regs. */
14525 if (nops <= 2 && arm_tune_xscale && !optimize_size)
14526 return false;
14527 return true;
14528 }
14529
14530 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14531 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14532 an array ORDER which describes the sequence to use when accessing the
14533 offsets that produces an ascending order. In this sequence, each
14534 offset must be larger by exactly 4 than the previous one. ORDER[0]
14535 must have been filled in with the lowest offset by the caller.
14536 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14537 we use to verify that ORDER produces an ascending order of registers.
14538 Return true if it was possible to construct such an order, false if
14539 not. */
14540
14541 static bool
14542 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14543 int *unsorted_regs)
14544 {
14545 int i;
14546 for (i = 1; i < nops; i++)
14547 {
14548 int j;
14549
14550 order[i] = order[i - 1];
14551 for (j = 0; j < nops; j++)
14552 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14553 {
14554 /* We must find exactly one offset that is higher than the
14555 previous one by 4. */
14556 if (order[i] != order[i - 1])
14557 return false;
14558 order[i] = j;
14559 }
14560 if (order[i] == order[i - 1])
14561 return false;
14562 /* The register numbers must be ascending. */
14563 if (unsorted_regs != NULL
14564 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14565 return false;
14566 }
14567 return true;
14568 }
14569
14570 /* Used to determine in a peephole whether a sequence of load
14571 instructions can be changed into a load-multiple instruction.
14572 NOPS is the number of separate load instructions we are examining. The
14573 first NOPS entries in OPERANDS are the destination registers, the
14574 next NOPS entries are memory operands. If this function is
14575 successful, *BASE is set to the common base register of the memory
14576 accesses; *LOAD_OFFSET is set to the first memory location's offset
14577 from that base register.
14578 REGS is an array filled in with the destination register numbers.
14579 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14580 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14581 the sequence of registers in REGS matches the loads from ascending memory
14582 locations, and the function verifies that the register numbers are
14583 themselves ascending. If CHECK_REGS is false, the register numbers
14584 are stored in the order they are found in the operands. */
14585 static int
14586 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14587 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14588 {
14589 int unsorted_regs[MAX_LDM_STM_OPS];
14590 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14591 int order[MAX_LDM_STM_OPS];
14592 int base_reg = -1;
14593 int i, ldm_case;
14594
14595 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14596 easily extended if required. */
14597 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14598
14599 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14600
14601 /* Loop over the operands and check that the memory references are
14602 suitable (i.e. immediate offsets from the same base register). At
14603 the same time, extract the target register, and the memory
14604 offsets. */
14605 for (i = 0; i < nops; i++)
14606 {
14607 rtx reg;
14608 rtx offset;
14609
14610 /* Convert a subreg of a mem into the mem itself. */
14611 if (GET_CODE (operands[nops + i]) == SUBREG)
14612 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14613
14614 gcc_assert (MEM_P (operands[nops + i]));
14615
14616 /* Don't reorder volatile memory references; it doesn't seem worth
14617 looking for the case where the order is ok anyway. */
14618 if (MEM_VOLATILE_P (operands[nops + i]))
14619 return 0;
14620
14621 offset = const0_rtx;
14622
14623 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14624 || (SUBREG_P (reg)
14625 && REG_P (reg = SUBREG_REG (reg))))
14626 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14627 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14628 || (SUBREG_P (reg)
14629 && REG_P (reg = SUBREG_REG (reg))))
14630 && (CONST_INT_P (offset
14631 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14632 {
14633 if (i == 0)
14634 {
14635 base_reg = REGNO (reg);
14636 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14637 return 0;
14638 }
14639 else if (base_reg != (int) REGNO (reg))
14640 /* Not addressed from the same base register. */
14641 return 0;
14642
14643 unsorted_regs[i] = (REG_P (operands[i])
14644 ? REGNO (operands[i])
14645 : REGNO (SUBREG_REG (operands[i])));
14646
14647 /* If it isn't an integer register, or if it overwrites the
14648 base register but isn't the last insn in the list, then
14649 we can't do this. */
14650 if (unsorted_regs[i] < 0
14651 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14652 || unsorted_regs[i] > 14
14653 || (i != nops - 1 && unsorted_regs[i] == base_reg))
14654 return 0;
14655
14656 /* Don't allow SP to be loaded unless it is also the base
14657 register. It guarantees that SP is reset correctly when
14658 an LDM instruction is interrupted. Otherwise, we might
14659 end up with a corrupt stack. */
14660 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14661 return 0;
14662
14663 unsorted_offsets[i] = INTVAL (offset);
14664 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14665 order[0] = i;
14666 }
14667 else
14668 /* Not a suitable memory address. */
14669 return 0;
14670 }
14671
14672 /* All the useful information has now been extracted from the
14673 operands into unsorted_regs and unsorted_offsets; additionally,
14674 order[0] has been set to the lowest offset in the list. Sort
14675 the offsets into order, verifying that they are adjacent, and
14676 check that the register numbers are ascending. */
14677 if (!compute_offset_order (nops, unsorted_offsets, order,
14678 check_regs ? unsorted_regs : NULL))
14679 return 0;
14680
14681 if (saved_order)
14682 memcpy (saved_order, order, sizeof order);
14683
14684 if (base)
14685 {
14686 *base = base_reg;
14687
14688 for (i = 0; i < nops; i++)
14689 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14690
14691 *load_offset = unsorted_offsets[order[0]];
14692 }
14693
14694 if (unsorted_offsets[order[0]] == 0)
14695 ldm_case = 1; /* ldmia */
14696 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14697 ldm_case = 2; /* ldmib */
14698 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14699 ldm_case = 3; /* ldmda */
14700 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14701 ldm_case = 4; /* ldmdb */
14702 else if (const_ok_for_arm (unsorted_offsets[order[0]])
14703 || const_ok_for_arm (-unsorted_offsets[order[0]]))
14704 ldm_case = 5;
14705 else
14706 return 0;
14707
14708 if (!multiple_operation_profitable_p (false, nops,
14709 ldm_case == 5
14710 ? unsorted_offsets[order[0]] : 0))
14711 return 0;
14712
14713 return ldm_case;
14714 }
14715
14716 /* Used to determine in a peephole whether a sequence of store instructions can
14717 be changed into a store-multiple instruction.
14718 NOPS is the number of separate store instructions we are examining.
14719 NOPS_TOTAL is the total number of instructions recognized by the peephole
14720 pattern.
14721 The first NOPS entries in OPERANDS are the source registers, the next
14722 NOPS entries are memory operands. If this function is successful, *BASE is
14723 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14724 to the first memory location's offset from that base register. REGS is an
14725 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14726 likewise filled with the corresponding rtx's.
14727 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14728 numbers to an ascending order of stores.
14729 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14730 from ascending memory locations, and the function verifies that the register
14731 numbers are themselves ascending. If CHECK_REGS is false, the register
14732 numbers are stored in the order they are found in the operands. */
14733 static int
14734 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14735 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14736 HOST_WIDE_INT *load_offset, bool check_regs)
14737 {
14738 int unsorted_regs[MAX_LDM_STM_OPS];
14739 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14740 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14741 int order[MAX_LDM_STM_OPS];
14742 int base_reg = -1;
14743 rtx base_reg_rtx = NULL;
14744 int i, stm_case;
14745
14746 /* Write back of base register is currently only supported for Thumb 1. */
14747 int base_writeback = TARGET_THUMB1;
14748
14749 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14750 easily extended if required. */
14751 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14752
14753 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14754
14755 /* Loop over the operands and check that the memory references are
14756 suitable (i.e. immediate offsets from the same base register). At
14757 the same time, extract the target register, and the memory
14758 offsets. */
14759 for (i = 0; i < nops; i++)
14760 {
14761 rtx reg;
14762 rtx offset;
14763
14764 /* Convert a subreg of a mem into the mem itself. */
14765 if (GET_CODE (operands[nops + i]) == SUBREG)
14766 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14767
14768 gcc_assert (MEM_P (operands[nops + i]));
14769
14770 /* Don't reorder volatile memory references; it doesn't seem worth
14771 looking for the case where the order is ok anyway. */
14772 if (MEM_VOLATILE_P (operands[nops + i]))
14773 return 0;
14774
14775 offset = const0_rtx;
14776
14777 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14778 || (SUBREG_P (reg)
14779 && REG_P (reg = SUBREG_REG (reg))))
14780 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14781 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14782 || (SUBREG_P (reg)
14783 && REG_P (reg = SUBREG_REG (reg))))
14784 && (CONST_INT_P (offset
14785 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14786 {
14787 unsorted_reg_rtxs[i] = (REG_P (operands[i])
14788 ? operands[i] : SUBREG_REG (operands[i]));
14789 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14790
14791 if (i == 0)
14792 {
14793 base_reg = REGNO (reg);
14794 base_reg_rtx = reg;
14795 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14796 return 0;
14797 }
14798 else if (base_reg != (int) REGNO (reg))
14799 /* Not addressed from the same base register. */
14800 return 0;
14801
14802 /* If it isn't an integer register, then we can't do this. */
14803 if (unsorted_regs[i] < 0
14804 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14805 /* The effects are unpredictable if the base register is
14806 both updated and stored. */
14807 || (base_writeback && unsorted_regs[i] == base_reg)
14808 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14809 || unsorted_regs[i] > 14)
14810 return 0;
14811
14812 unsorted_offsets[i] = INTVAL (offset);
14813 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14814 order[0] = i;
14815 }
14816 else
14817 /* Not a suitable memory address. */
14818 return 0;
14819 }
14820
14821 /* All the useful information has now been extracted from the
14822 operands into unsorted_regs and unsorted_offsets; additionally,
14823 order[0] has been set to the lowest offset in the list. Sort
14824 the offsets into order, verifying that they are adjacent, and
14825 check that the register numbers are ascending. */
14826 if (!compute_offset_order (nops, unsorted_offsets, order,
14827 check_regs ? unsorted_regs : NULL))
14828 return 0;
14829
14830 if (saved_order)
14831 memcpy (saved_order, order, sizeof order);
14832
14833 if (base)
14834 {
14835 *base = base_reg;
14836
14837 for (i = 0; i < nops; i++)
14838 {
14839 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14840 if (reg_rtxs)
14841 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14842 }
14843
14844 *load_offset = unsorted_offsets[order[0]];
14845 }
14846
14847 if (TARGET_THUMB1
14848 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14849 return 0;
14850
14851 if (unsorted_offsets[order[0]] == 0)
14852 stm_case = 1; /* stmia */
14853 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14854 stm_case = 2; /* stmib */
14855 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14856 stm_case = 3; /* stmda */
14857 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14858 stm_case = 4; /* stmdb */
14859 else
14860 return 0;
14861
14862 if (!multiple_operation_profitable_p (false, nops, 0))
14863 return 0;
14864
14865 return stm_case;
14866 }
14867 \f
14868 /* Routines for use in generating RTL. */
14869
14870 /* Generate a load-multiple instruction. COUNT is the number of loads in
14871 the instruction; REGS and MEMS are arrays containing the operands.
14872 BASEREG is the base register to be used in addressing the memory operands.
14873 WBACK_OFFSET is nonzero if the instruction should update the base
14874 register. */
14875
14876 static rtx
14877 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14878 HOST_WIDE_INT wback_offset)
14879 {
14880 int i = 0, j;
14881 rtx result;
14882
14883 if (!multiple_operation_profitable_p (false, count, 0))
14884 {
14885 rtx seq;
14886
14887 start_sequence ();
14888
14889 for (i = 0; i < count; i++)
14890 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14891
14892 if (wback_offset != 0)
14893 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14894
14895 seq = get_insns ();
14896 end_sequence ();
14897
14898 return seq;
14899 }
14900
14901 result = gen_rtx_PARALLEL (VOIDmode,
14902 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14903 if (wback_offset != 0)
14904 {
14905 XVECEXP (result, 0, 0)
14906 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14907 i = 1;
14908 count++;
14909 }
14910
14911 for (j = 0; i < count; i++, j++)
14912 XVECEXP (result, 0, i)
14913 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14914
14915 return result;
14916 }
14917
14918 /* Generate a store-multiple instruction. COUNT is the number of stores in
14919 the instruction; REGS and MEMS are arrays containing the operands.
14920 BASEREG is the base register to be used in addressing the memory operands.
14921 WBACK_OFFSET is nonzero if the instruction should update the base
14922 register. */
14923
14924 static rtx
14925 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14926 HOST_WIDE_INT wback_offset)
14927 {
14928 int i = 0, j;
14929 rtx result;
14930
14931 if (GET_CODE (basereg) == PLUS)
14932 basereg = XEXP (basereg, 0);
14933
14934 if (!multiple_operation_profitable_p (false, count, 0))
14935 {
14936 rtx seq;
14937
14938 start_sequence ();
14939
14940 for (i = 0; i < count; i++)
14941 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14942
14943 if (wback_offset != 0)
14944 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14945
14946 seq = get_insns ();
14947 end_sequence ();
14948
14949 return seq;
14950 }
14951
14952 result = gen_rtx_PARALLEL (VOIDmode,
14953 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14954 if (wback_offset != 0)
14955 {
14956 XVECEXP (result, 0, 0)
14957 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14958 i = 1;
14959 count++;
14960 }
14961
14962 for (j = 0; i < count; i++, j++)
14963 XVECEXP (result, 0, i)
14964 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14965
14966 return result;
14967 }
14968
14969 /* Generate either a load-multiple or a store-multiple instruction. This
14970 function can be used in situations where we can start with a single MEM
14971 rtx and adjust its address upwards.
14972 COUNT is the number of operations in the instruction, not counting a
14973 possible update of the base register. REGS is an array containing the
14974 register operands.
14975 BASEREG is the base register to be used in addressing the memory operands,
14976 which are constructed from BASEMEM.
14977 WRITE_BACK specifies whether the generated instruction should include an
14978 update of the base register.
14979 OFFSETP is used to pass an offset to and from this function; this offset
14980 is not used when constructing the address (instead BASEMEM should have an
14981 appropriate offset in its address), it is used only for setting
14982 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14983
14984 static rtx
14985 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14986 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14987 {
14988 rtx mems[MAX_LDM_STM_OPS];
14989 HOST_WIDE_INT offset = *offsetp;
14990 int i;
14991
14992 gcc_assert (count <= MAX_LDM_STM_OPS);
14993
14994 if (GET_CODE (basereg) == PLUS)
14995 basereg = XEXP (basereg, 0);
14996
14997 for (i = 0; i < count; i++)
14998 {
14999 rtx addr = plus_constant (Pmode, basereg, i * 4);
15000 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
15001 offset += 4;
15002 }
15003
15004 if (write_back)
15005 *offsetp = offset;
15006
15007 if (is_load)
15008 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
15009 write_back ? 4 * count : 0);
15010 else
15011 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
15012 write_back ? 4 * count : 0);
15013 }
15014
15015 rtx
15016 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
15017 rtx basemem, HOST_WIDE_INT *offsetp)
15018 {
15019 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
15020 offsetp);
15021 }
15022
15023 rtx
15024 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
15025 rtx basemem, HOST_WIDE_INT *offsetp)
15026 {
15027 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
15028 offsetp);
15029 }
15030
15031 /* Called from a peephole2 expander to turn a sequence of loads into an
15032 LDM instruction. OPERANDS are the operands found by the peephole matcher;
15033 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
15034 is true if we can reorder the registers because they are used commutatively
15035 subsequently.
15036 Returns true iff we could generate a new instruction. */
15037
15038 bool
15039 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
15040 {
15041 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15042 rtx mems[MAX_LDM_STM_OPS];
15043 int i, j, base_reg;
15044 rtx base_reg_rtx;
15045 HOST_WIDE_INT offset;
15046 int write_back = FALSE;
15047 int ldm_case;
15048 rtx addr;
15049
15050 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
15051 &base_reg, &offset, !sort_regs);
15052
15053 if (ldm_case == 0)
15054 return false;
15055
15056 if (sort_regs)
15057 for (i = 0; i < nops - 1; i++)
15058 for (j = i + 1; j < nops; j++)
15059 if (regs[i] > regs[j])
15060 {
15061 int t = regs[i];
15062 regs[i] = regs[j];
15063 regs[j] = t;
15064 }
15065 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15066
15067 if (TARGET_THUMB1)
15068 {
15069 gcc_assert (ldm_case == 1 || ldm_case == 5);
15070
15071 /* Thumb-1 ldm uses writeback except if the base is loaded. */
15072 write_back = true;
15073 for (i = 0; i < nops; i++)
15074 if (base_reg == regs[i])
15075 write_back = false;
15076
15077 /* Ensure the base is dead if it is updated. */
15078 if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
15079 return false;
15080 }
15081
15082 if (ldm_case == 5)
15083 {
15084 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
15085 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
15086 offset = 0;
15087 base_reg_rtx = newbase;
15088 }
15089
15090 for (i = 0; i < nops; i++)
15091 {
15092 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15093 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15094 SImode, addr, 0);
15095 }
15096 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
15097 write_back ? offset + i * 4 : 0));
15098 return true;
15099 }
15100
15101 /* Called from a peephole2 expander to turn a sequence of stores into an
15102 STM instruction. OPERANDS are the operands found by the peephole matcher;
15103 NOPS indicates how many separate stores we are trying to combine.
15104 Returns true iff we could generate a new instruction. */
15105
15106 bool
15107 gen_stm_seq (rtx *operands, int nops)
15108 {
15109 int i;
15110 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15111 rtx mems[MAX_LDM_STM_OPS];
15112 int base_reg;
15113 rtx base_reg_rtx;
15114 HOST_WIDE_INT offset;
15115 int write_back = FALSE;
15116 int stm_case;
15117 rtx addr;
15118 bool base_reg_dies;
15119
15120 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
15121 mem_order, &base_reg, &offset, true);
15122
15123 if (stm_case == 0)
15124 return false;
15125
15126 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15127
15128 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
15129 if (TARGET_THUMB1)
15130 {
15131 gcc_assert (base_reg_dies);
15132 write_back = TRUE;
15133 }
15134
15135 if (stm_case == 5)
15136 {
15137 gcc_assert (base_reg_dies);
15138 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15139 offset = 0;
15140 }
15141
15142 addr = plus_constant (Pmode, base_reg_rtx, offset);
15143
15144 for (i = 0; i < nops; i++)
15145 {
15146 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15147 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15148 SImode, addr, 0);
15149 }
15150 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
15151 write_back ? offset + i * 4 : 0));
15152 return true;
15153 }
15154
15155 /* Called from a peephole2 expander to turn a sequence of stores that are
15156 preceded by constant loads into an STM instruction. OPERANDS are the
15157 operands found by the peephole matcher; NOPS indicates how many
15158 separate stores we are trying to combine; there are 2 * NOPS
15159 instructions in the peephole.
15160 Returns true iff we could generate a new instruction. */
15161
15162 bool
15163 gen_const_stm_seq (rtx *operands, int nops)
15164 {
15165 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
15166 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15167 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
15168 rtx mems[MAX_LDM_STM_OPS];
15169 int base_reg;
15170 rtx base_reg_rtx;
15171 HOST_WIDE_INT offset;
15172 int write_back = FALSE;
15173 int stm_case;
15174 rtx addr;
15175 bool base_reg_dies;
15176 int i, j;
15177 HARD_REG_SET allocated;
15178
15179 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
15180 mem_order, &base_reg, &offset, false);
15181
15182 if (stm_case == 0)
15183 return false;
15184
15185 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
15186
15187 /* If the same register is used more than once, try to find a free
15188 register. */
15189 CLEAR_HARD_REG_SET (allocated);
15190 for (i = 0; i < nops; i++)
15191 {
15192 for (j = i + 1; j < nops; j++)
15193 if (regs[i] == regs[j])
15194 {
15195 rtx t = peep2_find_free_register (0, nops * 2,
15196 TARGET_THUMB1 ? "l" : "r",
15197 SImode, &allocated);
15198 if (t == NULL_RTX)
15199 return false;
15200 reg_rtxs[i] = t;
15201 regs[i] = REGNO (t);
15202 }
15203 }
15204
15205 /* Compute an ordering that maps the register numbers to an ascending
15206 sequence. */
15207 reg_order[0] = 0;
15208 for (i = 0; i < nops; i++)
15209 if (regs[i] < regs[reg_order[0]])
15210 reg_order[0] = i;
15211
15212 for (i = 1; i < nops; i++)
15213 {
15214 int this_order = reg_order[i - 1];
15215 for (j = 0; j < nops; j++)
15216 if (regs[j] > regs[reg_order[i - 1]]
15217 && (this_order == reg_order[i - 1]
15218 || regs[j] < regs[this_order]))
15219 this_order = j;
15220 reg_order[i] = this_order;
15221 }
15222
15223 /* Ensure that registers that must be live after the instruction end
15224 up with the correct value. */
15225 for (i = 0; i < nops; i++)
15226 {
15227 int this_order = reg_order[i];
15228 if ((this_order != mem_order[i]
15229 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
15230 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
15231 return false;
15232 }
15233
15234 /* Load the constants. */
15235 for (i = 0; i < nops; i++)
15236 {
15237 rtx op = operands[2 * nops + mem_order[i]];
15238 sorted_regs[i] = regs[reg_order[i]];
15239 emit_move_insn (reg_rtxs[reg_order[i]], op);
15240 }
15241
15242 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15243
15244 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
15245 if (TARGET_THUMB1)
15246 {
15247 gcc_assert (base_reg_dies);
15248 write_back = TRUE;
15249 }
15250
15251 if (stm_case == 5)
15252 {
15253 gcc_assert (base_reg_dies);
15254 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15255 offset = 0;
15256 }
15257
15258 addr = plus_constant (Pmode, base_reg_rtx, offset);
15259
15260 for (i = 0; i < nops; i++)
15261 {
15262 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15263 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15264 SImode, addr, 0);
15265 }
15266 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
15267 write_back ? offset + i * 4 : 0));
15268 return true;
15269 }
15270
15271 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15272 unaligned copies on processors which support unaligned semantics for those
15273 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
15274 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15275 An interleave factor of 1 (the minimum) will perform no interleaving.
15276 Load/store multiple are used for aligned addresses where possible. */
15277
15278 static void
15279 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
15280 HOST_WIDE_INT length,
15281 unsigned int interleave_factor)
15282 {
15283 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
15284 int *regnos = XALLOCAVEC (int, interleave_factor);
15285 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
15286 HOST_WIDE_INT i, j;
15287 HOST_WIDE_INT remaining = length, words;
15288 rtx halfword_tmp = NULL, byte_tmp = NULL;
15289 rtx dst, src;
15290 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
15291 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
15292 HOST_WIDE_INT srcoffset, dstoffset;
15293 HOST_WIDE_INT src_autoinc, dst_autoinc;
15294 rtx mem, addr;
15295
15296 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
15297
15298 /* Use hard registers if we have aligned source or destination so we can use
15299 load/store multiple with contiguous registers. */
15300 if (dst_aligned || src_aligned)
15301 for (i = 0; i < interleave_factor; i++)
15302 regs[i] = gen_rtx_REG (SImode, i);
15303 else
15304 for (i = 0; i < interleave_factor; i++)
15305 regs[i] = gen_reg_rtx (SImode);
15306
15307 dst = copy_addr_to_reg (XEXP (dstbase, 0));
15308 src = copy_addr_to_reg (XEXP (srcbase, 0));
15309
15310 srcoffset = dstoffset = 0;
15311
15312 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15313 For copying the last bytes we want to subtract this offset again. */
15314 src_autoinc = dst_autoinc = 0;
15315
15316 for (i = 0; i < interleave_factor; i++)
15317 regnos[i] = i;
15318
15319 /* Copy BLOCK_SIZE_BYTES chunks. */
15320
15321 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15322 {
15323 /* Load words. */
15324 if (src_aligned && interleave_factor > 1)
15325 {
15326 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15327 TRUE, srcbase, &srcoffset));
15328 src_autoinc += UNITS_PER_WORD * interleave_factor;
15329 }
15330 else
15331 {
15332 for (j = 0; j < interleave_factor; j++)
15333 {
15334 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15335 - src_autoinc));
15336 mem = adjust_automodify_address (srcbase, SImode, addr,
15337 srcoffset + j * UNITS_PER_WORD);
15338 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15339 }
15340 srcoffset += block_size_bytes;
15341 }
15342
15343 /* Store words. */
15344 if (dst_aligned && interleave_factor > 1)
15345 {
15346 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15347 TRUE, dstbase, &dstoffset));
15348 dst_autoinc += UNITS_PER_WORD * interleave_factor;
15349 }
15350 else
15351 {
15352 for (j = 0; j < interleave_factor; j++)
15353 {
15354 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15355 - dst_autoinc));
15356 mem = adjust_automodify_address (dstbase, SImode, addr,
15357 dstoffset + j * UNITS_PER_WORD);
15358 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15359 }
15360 dstoffset += block_size_bytes;
15361 }
15362
15363 remaining -= block_size_bytes;
15364 }
15365
15366 /* Copy any whole words left (note these aren't interleaved with any
15367 subsequent halfword/byte load/stores in the interests of simplicity). */
15368
15369 words = remaining / UNITS_PER_WORD;
15370
15371 gcc_assert (words < interleave_factor);
15372
15373 if (src_aligned && words > 1)
15374 {
15375 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15376 &srcoffset));
15377 src_autoinc += UNITS_PER_WORD * words;
15378 }
15379 else
15380 {
15381 for (j = 0; j < words; j++)
15382 {
15383 addr = plus_constant (Pmode, src,
15384 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15385 mem = adjust_automodify_address (srcbase, SImode, addr,
15386 srcoffset + j * UNITS_PER_WORD);
15387 if (src_aligned)
15388 emit_move_insn (regs[j], mem);
15389 else
15390 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15391 }
15392 srcoffset += words * UNITS_PER_WORD;
15393 }
15394
15395 if (dst_aligned && words > 1)
15396 {
15397 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15398 &dstoffset));
15399 dst_autoinc += words * UNITS_PER_WORD;
15400 }
15401 else
15402 {
15403 for (j = 0; j < words; j++)
15404 {
15405 addr = plus_constant (Pmode, dst,
15406 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15407 mem = adjust_automodify_address (dstbase, SImode, addr,
15408 dstoffset + j * UNITS_PER_WORD);
15409 if (dst_aligned)
15410 emit_move_insn (mem, regs[j]);
15411 else
15412 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15413 }
15414 dstoffset += words * UNITS_PER_WORD;
15415 }
15416
15417 remaining -= words * UNITS_PER_WORD;
15418
15419 gcc_assert (remaining < 4);
15420
15421 /* Copy a halfword if necessary. */
15422
15423 if (remaining >= 2)
15424 {
15425 halfword_tmp = gen_reg_rtx (SImode);
15426
15427 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15428 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15429 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15430
15431 /* Either write out immediately, or delay until we've loaded the last
15432 byte, depending on interleave factor. */
15433 if (interleave_factor == 1)
15434 {
15435 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15436 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15437 emit_insn (gen_unaligned_storehi (mem,
15438 gen_lowpart (HImode, halfword_tmp)));
15439 halfword_tmp = NULL;
15440 dstoffset += 2;
15441 }
15442
15443 remaining -= 2;
15444 srcoffset += 2;
15445 }
15446
15447 gcc_assert (remaining < 2);
15448
15449 /* Copy last byte. */
15450
15451 if ((remaining & 1) != 0)
15452 {
15453 byte_tmp = gen_reg_rtx (SImode);
15454
15455 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15456 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15457 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15458
15459 if (interleave_factor == 1)
15460 {
15461 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15462 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15463 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15464 byte_tmp = NULL;
15465 dstoffset++;
15466 }
15467
15468 remaining--;
15469 srcoffset++;
15470 }
15471
15472 /* Store last halfword if we haven't done so already. */
15473
15474 if (halfword_tmp)
15475 {
15476 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15477 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15478 emit_insn (gen_unaligned_storehi (mem,
15479 gen_lowpart (HImode, halfword_tmp)));
15480 dstoffset += 2;
15481 }
15482
15483 /* Likewise for last byte. */
15484
15485 if (byte_tmp)
15486 {
15487 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15488 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15489 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15490 dstoffset++;
15491 }
15492
15493 gcc_assert (remaining == 0 && srcoffset == dstoffset);
15494 }
15495
15496 /* From mips_adjust_block_mem:
15497
15498 Helper function for doing a loop-based block operation on memory
15499 reference MEM. Each iteration of the loop will operate on LENGTH
15500 bytes of MEM.
15501
15502 Create a new base register for use within the loop and point it to
15503 the start of MEM. Create a new memory reference that uses this
15504 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15505
15506 static void
15507 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15508 rtx *loop_mem)
15509 {
15510 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15511
15512 /* Although the new mem does not refer to a known location,
15513 it does keep up to LENGTH bytes of alignment. */
15514 *loop_mem = change_address (mem, BLKmode, *loop_reg);
15515 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15516 }
15517
15518 /* From mips_block_move_loop:
15519
15520 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15521 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15522 the memory regions do not overlap. */
15523
15524 static void
15525 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15526 unsigned int interleave_factor,
15527 HOST_WIDE_INT bytes_per_iter)
15528 {
15529 rtx src_reg, dest_reg, final_src, test;
15530 HOST_WIDE_INT leftover;
15531
15532 leftover = length % bytes_per_iter;
15533 length -= leftover;
15534
15535 /* Create registers and memory references for use within the loop. */
15536 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15537 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15538
15539 /* Calculate the value that SRC_REG should have after the last iteration of
15540 the loop. */
15541 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15542 0, 0, OPTAB_WIDEN);
15543
15544 /* Emit the start of the loop. */
15545 rtx_code_label *label = gen_label_rtx ();
15546 emit_label (label);
15547
15548 /* Emit the loop body. */
15549 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15550 interleave_factor);
15551
15552 /* Move on to the next block. */
15553 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15554 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15555
15556 /* Emit the loop condition. */
15557 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15558 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15559
15560 /* Mop up any left-over bytes. */
15561 if (leftover)
15562 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15563 }
15564
15565 /* Emit a block move when either the source or destination is unaligned (not
15566 aligned to a four-byte boundary). This may need further tuning depending on
15567 core type, optimize_size setting, etc. */
15568
15569 static int
15570 arm_cpymemqi_unaligned (rtx *operands)
15571 {
15572 HOST_WIDE_INT length = INTVAL (operands[2]);
15573
15574 if (optimize_size)
15575 {
15576 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15577 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15578 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15579 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15580 or dst_aligned though: allow more interleaving in those cases since the
15581 resulting code can be smaller. */
15582 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15583 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15584
15585 if (length > 12)
15586 arm_block_move_unaligned_loop (operands[0], operands[1], length,
15587 interleave_factor, bytes_per_iter);
15588 else
15589 arm_block_move_unaligned_straight (operands[0], operands[1], length,
15590 interleave_factor);
15591 }
15592 else
15593 {
15594 /* Note that the loop created by arm_block_move_unaligned_loop may be
15595 subject to loop unrolling, which makes tuning this condition a little
15596 redundant. */
15597 if (length > 32)
15598 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15599 else
15600 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15601 }
15602
15603 return 1;
15604 }
15605
15606 int
15607 arm_gen_cpymemqi (rtx *operands)
15608 {
15609 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15610 HOST_WIDE_INT srcoffset, dstoffset;
15611 rtx src, dst, srcbase, dstbase;
15612 rtx part_bytes_reg = NULL;
15613 rtx mem;
15614
15615 if (!CONST_INT_P (operands[2])
15616 || !CONST_INT_P (operands[3])
15617 || INTVAL (operands[2]) > 64)
15618 return 0;
15619
15620 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15621 return arm_cpymemqi_unaligned (operands);
15622
15623 if (INTVAL (operands[3]) & 3)
15624 return 0;
15625
15626 dstbase = operands[0];
15627 srcbase = operands[1];
15628
15629 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15630 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15631
15632 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15633 out_words_to_go = INTVAL (operands[2]) / 4;
15634 last_bytes = INTVAL (operands[2]) & 3;
15635 dstoffset = srcoffset = 0;
15636
15637 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15638 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15639
15640 while (in_words_to_go >= 2)
15641 {
15642 if (in_words_to_go > 4)
15643 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15644 TRUE, srcbase, &srcoffset));
15645 else
15646 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15647 src, FALSE, srcbase,
15648 &srcoffset));
15649
15650 if (out_words_to_go)
15651 {
15652 if (out_words_to_go > 4)
15653 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15654 TRUE, dstbase, &dstoffset));
15655 else if (out_words_to_go != 1)
15656 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15657 out_words_to_go, dst,
15658 (last_bytes == 0
15659 ? FALSE : TRUE),
15660 dstbase, &dstoffset));
15661 else
15662 {
15663 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15664 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15665 if (last_bytes != 0)
15666 {
15667 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15668 dstoffset += 4;
15669 }
15670 }
15671 }
15672
15673 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15674 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15675 }
15676
15677 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15678 if (out_words_to_go)
15679 {
15680 rtx sreg;
15681
15682 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15683 sreg = copy_to_reg (mem);
15684
15685 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15686 emit_move_insn (mem, sreg);
15687 in_words_to_go--;
15688
15689 gcc_assert (!in_words_to_go); /* Sanity check */
15690 }
15691
15692 if (in_words_to_go)
15693 {
15694 gcc_assert (in_words_to_go > 0);
15695
15696 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15697 part_bytes_reg = copy_to_mode_reg (SImode, mem);
15698 }
15699
15700 gcc_assert (!last_bytes || part_bytes_reg);
15701
15702 if (BYTES_BIG_ENDIAN && last_bytes)
15703 {
15704 rtx tmp = gen_reg_rtx (SImode);
15705
15706 /* The bytes we want are in the top end of the word. */
15707 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15708 GEN_INT (8 * (4 - last_bytes))));
15709 part_bytes_reg = tmp;
15710
15711 while (last_bytes)
15712 {
15713 mem = adjust_automodify_address (dstbase, QImode,
15714 plus_constant (Pmode, dst,
15715 last_bytes - 1),
15716 dstoffset + last_bytes - 1);
15717 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15718
15719 if (--last_bytes)
15720 {
15721 tmp = gen_reg_rtx (SImode);
15722 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15723 part_bytes_reg = tmp;
15724 }
15725 }
15726
15727 }
15728 else
15729 {
15730 if (last_bytes > 1)
15731 {
15732 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15733 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15734 last_bytes -= 2;
15735 if (last_bytes)
15736 {
15737 rtx tmp = gen_reg_rtx (SImode);
15738 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15739 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15740 part_bytes_reg = tmp;
15741 dstoffset += 2;
15742 }
15743 }
15744
15745 if (last_bytes)
15746 {
15747 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15748 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15749 }
15750 }
15751
15752 return 1;
15753 }
15754
15755 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15756 by mode size. */
15757 inline static rtx
15758 next_consecutive_mem (rtx mem)
15759 {
15760 machine_mode mode = GET_MODE (mem);
15761 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15762 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15763
15764 return adjust_automodify_address (mem, mode, addr, offset);
15765 }
15766
15767 /* Copy using LDRD/STRD instructions whenever possible.
15768 Returns true upon success. */
15769 bool
15770 gen_cpymem_ldrd_strd (rtx *operands)
15771 {
15772 unsigned HOST_WIDE_INT len;
15773 HOST_WIDE_INT align;
15774 rtx src, dst, base;
15775 rtx reg0;
15776 bool src_aligned, dst_aligned;
15777 bool src_volatile, dst_volatile;
15778
15779 gcc_assert (CONST_INT_P (operands[2]));
15780 gcc_assert (CONST_INT_P (operands[3]));
15781
15782 len = UINTVAL (operands[2]);
15783 if (len > 64)
15784 return false;
15785
15786 /* Maximum alignment we can assume for both src and dst buffers. */
15787 align = INTVAL (operands[3]);
15788
15789 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15790 return false;
15791
15792 /* Place src and dst addresses in registers
15793 and update the corresponding mem rtx. */
15794 dst = operands[0];
15795 dst_volatile = MEM_VOLATILE_P (dst);
15796 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15797 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15798 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15799
15800 src = operands[1];
15801 src_volatile = MEM_VOLATILE_P (src);
15802 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15803 base = copy_to_mode_reg (SImode, XEXP (src, 0));
15804 src = adjust_automodify_address (src, VOIDmode, base, 0);
15805
15806 if (!unaligned_access && !(src_aligned && dst_aligned))
15807 return false;
15808
15809 if (src_volatile || dst_volatile)
15810 return false;
15811
15812 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15813 if (!(dst_aligned || src_aligned))
15814 return arm_gen_cpymemqi (operands);
15815
15816 /* If the either src or dst is unaligned we'll be accessing it as pairs
15817 of unaligned SImode accesses. Otherwise we can generate DImode
15818 ldrd/strd instructions. */
15819 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15820 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15821
15822 while (len >= 8)
15823 {
15824 len -= 8;
15825 reg0 = gen_reg_rtx (DImode);
15826 rtx first_reg = NULL_RTX;
15827 rtx second_reg = NULL_RTX;
15828
15829 if (!src_aligned || !dst_aligned)
15830 {
15831 if (BYTES_BIG_ENDIAN)
15832 {
15833 second_reg = gen_lowpart (SImode, reg0);
15834 first_reg = gen_highpart_mode (SImode, DImode, reg0);
15835 }
15836 else
15837 {
15838 first_reg = gen_lowpart (SImode, reg0);
15839 second_reg = gen_highpart_mode (SImode, DImode, reg0);
15840 }
15841 }
15842 if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15843 emit_move_insn (reg0, src);
15844 else if (src_aligned)
15845 emit_insn (gen_unaligned_loaddi (reg0, src));
15846 else
15847 {
15848 emit_insn (gen_unaligned_loadsi (first_reg, src));
15849 src = next_consecutive_mem (src);
15850 emit_insn (gen_unaligned_loadsi (second_reg, src));
15851 }
15852
15853 if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15854 emit_move_insn (dst, reg0);
15855 else if (dst_aligned)
15856 emit_insn (gen_unaligned_storedi (dst, reg0));
15857 else
15858 {
15859 emit_insn (gen_unaligned_storesi (dst, first_reg));
15860 dst = next_consecutive_mem (dst);
15861 emit_insn (gen_unaligned_storesi (dst, second_reg));
15862 }
15863
15864 src = next_consecutive_mem (src);
15865 dst = next_consecutive_mem (dst);
15866 }
15867
15868 gcc_assert (len < 8);
15869 if (len >= 4)
15870 {
15871 /* More than a word but less than a double-word to copy. Copy a word. */
15872 reg0 = gen_reg_rtx (SImode);
15873 src = adjust_address (src, SImode, 0);
15874 dst = adjust_address (dst, SImode, 0);
15875 if (src_aligned)
15876 emit_move_insn (reg0, src);
15877 else
15878 emit_insn (gen_unaligned_loadsi (reg0, src));
15879
15880 if (dst_aligned)
15881 emit_move_insn (dst, reg0);
15882 else
15883 emit_insn (gen_unaligned_storesi (dst, reg0));
15884
15885 src = next_consecutive_mem (src);
15886 dst = next_consecutive_mem (dst);
15887 len -= 4;
15888 }
15889
15890 if (len == 0)
15891 return true;
15892
15893 /* Copy the remaining bytes. */
15894 if (len >= 2)
15895 {
15896 dst = adjust_address (dst, HImode, 0);
15897 src = adjust_address (src, HImode, 0);
15898 reg0 = gen_reg_rtx (SImode);
15899 if (src_aligned)
15900 emit_insn (gen_zero_extendhisi2 (reg0, src));
15901 else
15902 emit_insn (gen_unaligned_loadhiu (reg0, src));
15903
15904 if (dst_aligned)
15905 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15906 else
15907 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15908
15909 src = next_consecutive_mem (src);
15910 dst = next_consecutive_mem (dst);
15911 if (len == 2)
15912 return true;
15913 }
15914
15915 dst = adjust_address (dst, QImode, 0);
15916 src = adjust_address (src, QImode, 0);
15917 reg0 = gen_reg_rtx (QImode);
15918 emit_move_insn (reg0, src);
15919 emit_move_insn (dst, reg0);
15920 return true;
15921 }
15922
15923 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15924 into its component 32-bit subregs. OP2 may be an immediate
15925 constant and we want to simplify it in that case. */
15926 void
15927 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15928 rtx *lo_op2, rtx *hi_op2)
15929 {
15930 *lo_op1 = gen_lowpart (SImode, op1);
15931 *hi_op1 = gen_highpart (SImode, op1);
15932 *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15933 subreg_lowpart_offset (SImode, DImode));
15934 *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15935 subreg_highpart_offset (SImode, DImode));
15936 }
15937
15938 /* Select a dominance comparison mode if possible for a test of the general
15939 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15940 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15941 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15942 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15943 In all cases OP will be either EQ or NE, but we don't need to know which
15944 here. If we are unable to support a dominance comparison we return
15945 CC mode. This will then fail to match for the RTL expressions that
15946 generate this call. */
15947 machine_mode
15948 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15949 {
15950 enum rtx_code cond1, cond2;
15951 int swapped = 0;
15952
15953 /* Currently we will probably get the wrong result if the individual
15954 comparisons are not simple. This also ensures that it is safe to
15955 reverse a comparison if necessary. */
15956 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15957 != CCmode)
15958 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15959 != CCmode))
15960 return CCmode;
15961
15962 /* The if_then_else variant of this tests the second condition if the
15963 first passes, but is true if the first fails. Reverse the first
15964 condition to get a true "inclusive-or" expression. */
15965 if (cond_or == DOM_CC_NX_OR_Y)
15966 cond1 = reverse_condition (cond1);
15967
15968 /* If the comparisons are not equal, and one doesn't dominate the other,
15969 then we can't do this. */
15970 if (cond1 != cond2
15971 && !comparison_dominates_p (cond1, cond2)
15972 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15973 return CCmode;
15974
15975 if (swapped)
15976 std::swap (cond1, cond2);
15977
15978 switch (cond1)
15979 {
15980 case EQ:
15981 if (cond_or == DOM_CC_X_AND_Y)
15982 return CC_DEQmode;
15983
15984 switch (cond2)
15985 {
15986 case EQ: return CC_DEQmode;
15987 case LE: return CC_DLEmode;
15988 case LEU: return CC_DLEUmode;
15989 case GE: return CC_DGEmode;
15990 case GEU: return CC_DGEUmode;
15991 default: gcc_unreachable ();
15992 }
15993
15994 case LT:
15995 if (cond_or == DOM_CC_X_AND_Y)
15996 return CC_DLTmode;
15997
15998 switch (cond2)
15999 {
16000 case LT:
16001 return CC_DLTmode;
16002 case LE:
16003 return CC_DLEmode;
16004 case NE:
16005 return CC_DNEmode;
16006 default:
16007 gcc_unreachable ();
16008 }
16009
16010 case GT:
16011 if (cond_or == DOM_CC_X_AND_Y)
16012 return CC_DGTmode;
16013
16014 switch (cond2)
16015 {
16016 case GT:
16017 return CC_DGTmode;
16018 case GE:
16019 return CC_DGEmode;
16020 case NE:
16021 return CC_DNEmode;
16022 default:
16023 gcc_unreachable ();
16024 }
16025
16026 case LTU:
16027 if (cond_or == DOM_CC_X_AND_Y)
16028 return CC_DLTUmode;
16029
16030 switch (cond2)
16031 {
16032 case LTU:
16033 return CC_DLTUmode;
16034 case LEU:
16035 return CC_DLEUmode;
16036 case NE:
16037 return CC_DNEmode;
16038 default:
16039 gcc_unreachable ();
16040 }
16041
16042 case GTU:
16043 if (cond_or == DOM_CC_X_AND_Y)
16044 return CC_DGTUmode;
16045
16046 switch (cond2)
16047 {
16048 case GTU:
16049 return CC_DGTUmode;
16050 case GEU:
16051 return CC_DGEUmode;
16052 case NE:
16053 return CC_DNEmode;
16054 default:
16055 gcc_unreachable ();
16056 }
16057
16058 /* The remaining cases only occur when both comparisons are the
16059 same. */
16060 case NE:
16061 gcc_assert (cond1 == cond2);
16062 return CC_DNEmode;
16063
16064 case LE:
16065 gcc_assert (cond1 == cond2);
16066 return CC_DLEmode;
16067
16068 case GE:
16069 gcc_assert (cond1 == cond2);
16070 return CC_DGEmode;
16071
16072 case LEU:
16073 gcc_assert (cond1 == cond2);
16074 return CC_DLEUmode;
16075
16076 case GEU:
16077 gcc_assert (cond1 == cond2);
16078 return CC_DGEUmode;
16079
16080 default:
16081 gcc_unreachable ();
16082 }
16083 }
16084
16085 machine_mode
16086 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
16087 {
16088 /* All floating point compares return CCFP if it is an equality
16089 comparison, and CCFPE otherwise. */
16090 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16091 {
16092 switch (op)
16093 {
16094 case EQ:
16095 case NE:
16096 case UNORDERED:
16097 case ORDERED:
16098 case UNLT:
16099 case UNLE:
16100 case UNGT:
16101 case UNGE:
16102 case UNEQ:
16103 case LTGT:
16104 return CCFPmode;
16105
16106 case LT:
16107 case LE:
16108 case GT:
16109 case GE:
16110 return CCFPEmode;
16111
16112 default:
16113 gcc_unreachable ();
16114 }
16115 }
16116
16117 /* A compare with a shifted operand. Because of canonicalization, the
16118 comparison will have to be swapped when we emit the assembler. */
16119 if (GET_MODE (y) == SImode
16120 && (REG_P (y) || (SUBREG_P (y)))
16121 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16122 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
16123 || GET_CODE (x) == ROTATERT))
16124 return CC_SWPmode;
16125
16126 /* A widened compare of the sum of a value plus a carry against a
16127 constant. This is a representation of RSC. We want to swap the
16128 result of the comparison at output. Not valid if the Z bit is
16129 needed. */
16130 if (GET_MODE (x) == DImode
16131 && GET_CODE (x) == PLUS
16132 && arm_borrow_operation (XEXP (x, 1), DImode)
16133 && CONST_INT_P (y)
16134 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16135 && (op == LE || op == GT))
16136 || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
16137 && (op == LEU || op == GTU))))
16138 return CC_SWPmode;
16139
16140 /* If X is a constant we want to use CC_RSBmode. This is
16141 non-canonical, but arm_gen_compare_reg uses this to generate the
16142 correct canonical form. */
16143 if (GET_MODE (y) == SImode
16144 && (REG_P (y) || SUBREG_P (y))
16145 && CONST_INT_P (x))
16146 return CC_RSBmode;
16147
16148 /* This operation is performed swapped, but since we only rely on the Z
16149 flag we don't need an additional mode. */
16150 if (GET_MODE (y) == SImode
16151 && (REG_P (y) || (SUBREG_P (y)))
16152 && GET_CODE (x) == NEG
16153 && (op == EQ || op == NE))
16154 return CC_Zmode;
16155
16156 /* This is a special case that is used by combine to allow a
16157 comparison of a shifted byte load to be split into a zero-extend
16158 followed by a comparison of the shifted integer (only valid for
16159 equalities and unsigned inequalities). */
16160 if (GET_MODE (x) == SImode
16161 && GET_CODE (x) == ASHIFT
16162 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
16163 && GET_CODE (XEXP (x, 0)) == SUBREG
16164 && MEM_P (SUBREG_REG (XEXP (x, 0)))
16165 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
16166 && (op == EQ || op == NE
16167 || op == GEU || op == GTU || op == LTU || op == LEU)
16168 && CONST_INT_P (y))
16169 return CC_Zmode;
16170
16171 /* A construct for a conditional compare, if the false arm contains
16172 0, then both conditions must be true, otherwise either condition
16173 must be true. Not all conditions are possible, so CCmode is
16174 returned if it can't be done. */
16175 if (GET_CODE (x) == IF_THEN_ELSE
16176 && (XEXP (x, 2) == const0_rtx
16177 || XEXP (x, 2) == const1_rtx)
16178 && COMPARISON_P (XEXP (x, 0))
16179 && COMPARISON_P (XEXP (x, 1)))
16180 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16181 INTVAL (XEXP (x, 2)));
16182
16183 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
16184 if (GET_CODE (x) == AND
16185 && (op == EQ || op == NE)
16186 && COMPARISON_P (XEXP (x, 0))
16187 && COMPARISON_P (XEXP (x, 1)))
16188 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16189 DOM_CC_X_AND_Y);
16190
16191 if (GET_CODE (x) == IOR
16192 && (op == EQ || op == NE)
16193 && COMPARISON_P (XEXP (x, 0))
16194 && COMPARISON_P (XEXP (x, 1)))
16195 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16196 DOM_CC_X_OR_Y);
16197
16198 /* An operation (on Thumb) where we want to test for a single bit.
16199 This is done by shifting that bit up into the top bit of a
16200 scratch register; we can then branch on the sign bit. */
16201 if (TARGET_THUMB1
16202 && GET_MODE (x) == SImode
16203 && (op == EQ || op == NE)
16204 && GET_CODE (x) == ZERO_EXTRACT
16205 && XEXP (x, 1) == const1_rtx)
16206 return CC_Nmode;
16207
16208 /* An operation that sets the condition codes as a side-effect, the
16209 V flag is not set correctly, so we can only use comparisons where
16210 this doesn't matter. (For LT and GE we can use "mi" and "pl"
16211 instead.) */
16212 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
16213 if (GET_MODE (x) == SImode
16214 && y == const0_rtx
16215 && (op == EQ || op == NE || op == LT || op == GE)
16216 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
16217 || GET_CODE (x) == AND || GET_CODE (x) == IOR
16218 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
16219 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
16220 || GET_CODE (x) == LSHIFTRT
16221 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16222 || GET_CODE (x) == ROTATERT
16223 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
16224 return CC_NZmode;
16225
16226 /* A comparison of ~reg with a const is really a special
16227 canoncialization of compare (~const, reg), which is a reverse
16228 subtract operation. We may not get here if CONST is 0, but that
16229 doesn't matter because ~0 isn't a valid immediate for RSB. */
16230 if (GET_MODE (x) == SImode
16231 && GET_CODE (x) == NOT
16232 && CONST_INT_P (y))
16233 return CC_RSBmode;
16234
16235 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
16236 return CC_Zmode;
16237
16238 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
16239 && GET_CODE (x) == PLUS
16240 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
16241 return CC_Cmode;
16242
16243 if (GET_MODE (x) == DImode
16244 && GET_CODE (x) == PLUS
16245 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
16246 && CONST_INT_P (y)
16247 && UINTVAL (y) == 0x800000000
16248 && (op == GEU || op == LTU))
16249 return CC_ADCmode;
16250
16251 if (GET_MODE (x) == DImode
16252 && (op == GE || op == LT)
16253 && GET_CODE (x) == SIGN_EXTEND
16254 && ((GET_CODE (y) == PLUS
16255 && arm_borrow_operation (XEXP (y, 0), DImode))
16256 || arm_borrow_operation (y, DImode)))
16257 return CC_NVmode;
16258
16259 if (GET_MODE (x) == DImode
16260 && (op == GEU || op == LTU)
16261 && GET_CODE (x) == ZERO_EXTEND
16262 && ((GET_CODE (y) == PLUS
16263 && arm_borrow_operation (XEXP (y, 0), DImode))
16264 || arm_borrow_operation (y, DImode)))
16265 return CC_Bmode;
16266
16267 if (GET_MODE (x) == DImode
16268 && (op == EQ || op == NE)
16269 && (GET_CODE (x) == PLUS
16270 || GET_CODE (x) == MINUS)
16271 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16272 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
16273 && GET_CODE (y) == SIGN_EXTEND
16274 && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
16275 return CC_Vmode;
16276
16277 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
16278 return GET_MODE (x);
16279
16280 return CCmode;
16281 }
16282
16283 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
16284 the sequence of instructions needed to generate a suitable condition
16285 code register. Return the CC register result. */
16286 static rtx
16287 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16288 {
16289 machine_mode mode;
16290 rtx cc_reg;
16291
16292 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
16293 gcc_assert (TARGET_32BIT);
16294 gcc_assert (!CONST_INT_P (x));
16295
16296 rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
16297 subreg_lowpart_offset (SImode, DImode));
16298 rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
16299 subreg_highpart_offset (SImode, DImode));
16300 rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
16301 subreg_lowpart_offset (SImode, DImode));
16302 rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
16303 subreg_highpart_offset (SImode, DImode));
16304 switch (code)
16305 {
16306 case EQ:
16307 case NE:
16308 {
16309 if (y_lo == const0_rtx || y_hi == const0_rtx)
16310 {
16311 if (y_lo != const0_rtx)
16312 {
16313 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16314
16315 gcc_assert (y_hi == const0_rtx);
16316 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16317 if (!arm_add_operand (y_lo, SImode))
16318 y_lo = force_reg (SImode, y_lo);
16319 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16320 x_lo = scratch2;
16321 }
16322 else if (y_hi != const0_rtx)
16323 {
16324 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16325
16326 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16327 if (!arm_add_operand (y_hi, SImode))
16328 y_hi = force_reg (SImode, y_hi);
16329 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16330 x_hi = scratch2;
16331 }
16332
16333 if (!scratch)
16334 {
16335 gcc_assert (!reload_completed);
16336 scratch = gen_rtx_SCRATCH (SImode);
16337 }
16338
16339 rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16340 cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16341
16342 rtx set
16343 = gen_rtx_SET (cc_reg,
16344 gen_rtx_COMPARE (CC_NZmode,
16345 gen_rtx_IOR (SImode, x_lo, x_hi),
16346 const0_rtx));
16347 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16348 clobber)));
16349 return cc_reg;
16350 }
16351
16352 if (!arm_add_operand (y_lo, SImode))
16353 y_lo = force_reg (SImode, y_lo);
16354
16355 if (!arm_add_operand (y_hi, SImode))
16356 y_hi = force_reg (SImode, y_hi);
16357
16358 rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16359 rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16360 rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16361 mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16362 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16363
16364 emit_insn (gen_rtx_SET (cc_reg,
16365 gen_rtx_COMPARE (mode, conjunction,
16366 const0_rtx)));
16367 return cc_reg;
16368 }
16369
16370 case LT:
16371 case GE:
16372 {
16373 if (y_lo == const0_rtx)
16374 {
16375 /* If the low word of y is 0, then this is simply a normal
16376 compare of the upper words. */
16377 if (!arm_add_operand (y_hi, SImode))
16378 y_hi = force_reg (SImode, y_hi);
16379
16380 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16381 }
16382
16383 if (!arm_add_operand (y_lo, SImode))
16384 y_lo = force_reg (SImode, y_lo);
16385
16386 rtx cmp1
16387 = gen_rtx_LTU (DImode,
16388 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16389 const0_rtx);
16390
16391 if (!scratch)
16392 scratch = gen_rtx_SCRATCH (SImode);
16393
16394 if (!arm_not_operand (y_hi, SImode))
16395 y_hi = force_reg (SImode, y_hi);
16396
16397 rtx_insn *insn;
16398 if (y_hi == const0_rtx)
16399 insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16400 cmp1));
16401 else if (CONST_INT_P (y_hi))
16402 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16403 y_hi, cmp1));
16404 else
16405 insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16406 cmp1));
16407 return SET_DEST (single_set (insn));
16408 }
16409
16410 case LE:
16411 case GT:
16412 {
16413 /* During expansion, we only expect to get here if y is a
16414 constant that we want to handle, otherwise we should have
16415 swapped the operands already. */
16416 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16417
16418 if (!const_ok_for_arm (INTVAL (y_lo)))
16419 y_lo = force_reg (SImode, y_lo);
16420
16421 /* Perform a reverse subtract and compare. */
16422 rtx cmp1
16423 = gen_rtx_LTU (DImode,
16424 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16425 const0_rtx);
16426 rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16427 x_hi, cmp1));
16428 return SET_DEST (single_set (insn));
16429 }
16430
16431 case LTU:
16432 case GEU:
16433 {
16434 if (y_lo == const0_rtx)
16435 {
16436 /* If the low word of y is 0, then this is simply a normal
16437 compare of the upper words. */
16438 if (!arm_add_operand (y_hi, SImode))
16439 y_hi = force_reg (SImode, y_hi);
16440
16441 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16442 }
16443
16444 if (!arm_add_operand (y_lo, SImode))
16445 y_lo = force_reg (SImode, y_lo);
16446
16447 rtx cmp1
16448 = gen_rtx_LTU (DImode,
16449 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16450 const0_rtx);
16451
16452 if (!scratch)
16453 scratch = gen_rtx_SCRATCH (SImode);
16454 if (!arm_not_operand (y_hi, SImode))
16455 y_hi = force_reg (SImode, y_hi);
16456
16457 rtx_insn *insn;
16458 if (y_hi == const0_rtx)
16459 insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16460 cmp1));
16461 else if (CONST_INT_P (y_hi))
16462 {
16463 /* Constant is viewed as unsigned when zero-extended. */
16464 y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16465 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16466 y_hi, cmp1));
16467 }
16468 else
16469 insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16470 cmp1));
16471 return SET_DEST (single_set (insn));
16472 }
16473
16474 case LEU:
16475 case GTU:
16476 {
16477 /* During expansion, we only expect to get here if y is a
16478 constant that we want to handle, otherwise we should have
16479 swapped the operands already. */
16480 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16481
16482 if (!const_ok_for_arm (INTVAL (y_lo)))
16483 y_lo = force_reg (SImode, y_lo);
16484
16485 /* Perform a reverse subtract and compare. */
16486 rtx cmp1
16487 = gen_rtx_LTU (DImode,
16488 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16489 const0_rtx);
16490 y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16491 rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16492 x_hi, cmp1));
16493 return SET_DEST (single_set (insn));
16494 }
16495
16496 default:
16497 gcc_unreachable ();
16498 }
16499 }
16500
16501 /* X and Y are two things to compare using CODE. Emit the compare insn and
16502 return the rtx for register 0 in the proper mode. */
16503 rtx
16504 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16505 {
16506 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16507 return arm_gen_dicompare_reg (code, x, y, scratch);
16508
16509 machine_mode mode = SELECT_CC_MODE (code, x, y);
16510 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16511 if (mode == CC_RSBmode)
16512 {
16513 if (!scratch)
16514 scratch = gen_rtx_SCRATCH (SImode);
16515 emit_insn (gen_rsb_imm_compare_scratch (scratch,
16516 GEN_INT (~UINTVAL (x)), y));
16517 }
16518 else
16519 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16520
16521 return cc_reg;
16522 }
16523
16524 /* Generate a sequence of insns that will generate the correct return
16525 address mask depending on the physical architecture that the program
16526 is running on. */
16527 rtx
16528 arm_gen_return_addr_mask (void)
16529 {
16530 rtx reg = gen_reg_rtx (Pmode);
16531
16532 emit_insn (gen_return_addr_mask (reg));
16533 return reg;
16534 }
16535
16536 void
16537 arm_reload_in_hi (rtx *operands)
16538 {
16539 rtx ref = operands[1];
16540 rtx base, scratch;
16541 HOST_WIDE_INT offset = 0;
16542
16543 if (SUBREG_P (ref))
16544 {
16545 offset = SUBREG_BYTE (ref);
16546 ref = SUBREG_REG (ref);
16547 }
16548
16549 if (REG_P (ref))
16550 {
16551 /* We have a pseudo which has been spilt onto the stack; there
16552 are two cases here: the first where there is a simple
16553 stack-slot replacement and a second where the stack-slot is
16554 out of range, or is used as a subreg. */
16555 if (reg_equiv_mem (REGNO (ref)))
16556 {
16557 ref = reg_equiv_mem (REGNO (ref));
16558 base = find_replacement (&XEXP (ref, 0));
16559 }
16560 else
16561 /* The slot is out of range, or was dressed up in a SUBREG. */
16562 base = reg_equiv_address (REGNO (ref));
16563
16564 /* PR 62554: If there is no equivalent memory location then just move
16565 the value as an SImode register move. This happens when the target
16566 architecture variant does not have an HImode register move. */
16567 if (base == NULL)
16568 {
16569 gcc_assert (REG_P (operands[0]));
16570 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16571 gen_rtx_SUBREG (SImode, ref, 0)));
16572 return;
16573 }
16574 }
16575 else
16576 base = find_replacement (&XEXP (ref, 0));
16577
16578 /* Handle the case where the address is too complex to be offset by 1. */
16579 if (GET_CODE (base) == MINUS
16580 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16581 {
16582 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16583
16584 emit_set_insn (base_plus, base);
16585 base = base_plus;
16586 }
16587 else if (GET_CODE (base) == PLUS)
16588 {
16589 /* The addend must be CONST_INT, or we would have dealt with it above. */
16590 HOST_WIDE_INT hi, lo;
16591
16592 offset += INTVAL (XEXP (base, 1));
16593 base = XEXP (base, 0);
16594
16595 /* Rework the address into a legal sequence of insns. */
16596 /* Valid range for lo is -4095 -> 4095 */
16597 lo = (offset >= 0
16598 ? (offset & 0xfff)
16599 : -((-offset) & 0xfff));
16600
16601 /* Corner case, if lo is the max offset then we would be out of range
16602 once we have added the additional 1 below, so bump the msb into the
16603 pre-loading insn(s). */
16604 if (lo == 4095)
16605 lo &= 0x7ff;
16606
16607 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16608 ^ (HOST_WIDE_INT) 0x80000000)
16609 - (HOST_WIDE_INT) 0x80000000);
16610
16611 gcc_assert (hi + lo == offset);
16612
16613 if (hi != 0)
16614 {
16615 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16616
16617 /* Get the base address; addsi3 knows how to handle constants
16618 that require more than one insn. */
16619 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16620 base = base_plus;
16621 offset = lo;
16622 }
16623 }
16624
16625 /* Operands[2] may overlap operands[0] (though it won't overlap
16626 operands[1]), that's why we asked for a DImode reg -- so we can
16627 use the bit that does not overlap. */
16628 if (REGNO (operands[2]) == REGNO (operands[0]))
16629 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16630 else
16631 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16632
16633 emit_insn (gen_zero_extendqisi2 (scratch,
16634 gen_rtx_MEM (QImode,
16635 plus_constant (Pmode, base,
16636 offset))));
16637 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16638 gen_rtx_MEM (QImode,
16639 plus_constant (Pmode, base,
16640 offset + 1))));
16641 if (!BYTES_BIG_ENDIAN)
16642 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16643 gen_rtx_IOR (SImode,
16644 gen_rtx_ASHIFT
16645 (SImode,
16646 gen_rtx_SUBREG (SImode, operands[0], 0),
16647 GEN_INT (8)),
16648 scratch));
16649 else
16650 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16651 gen_rtx_IOR (SImode,
16652 gen_rtx_ASHIFT (SImode, scratch,
16653 GEN_INT (8)),
16654 gen_rtx_SUBREG (SImode, operands[0], 0)));
16655 }
16656
16657 /* Handle storing a half-word to memory during reload by synthesizing as two
16658 byte stores. Take care not to clobber the input values until after we
16659 have moved them somewhere safe. This code assumes that if the DImode
16660 scratch in operands[2] overlaps either the input value or output address
16661 in some way, then that value must die in this insn (we absolutely need
16662 two scratch registers for some corner cases). */
16663 void
16664 arm_reload_out_hi (rtx *operands)
16665 {
16666 rtx ref = operands[0];
16667 rtx outval = operands[1];
16668 rtx base, scratch;
16669 HOST_WIDE_INT offset = 0;
16670
16671 if (SUBREG_P (ref))
16672 {
16673 offset = SUBREG_BYTE (ref);
16674 ref = SUBREG_REG (ref);
16675 }
16676
16677 if (REG_P (ref))
16678 {
16679 /* We have a pseudo which has been spilt onto the stack; there
16680 are two cases here: the first where there is a simple
16681 stack-slot replacement and a second where the stack-slot is
16682 out of range, or is used as a subreg. */
16683 if (reg_equiv_mem (REGNO (ref)))
16684 {
16685 ref = reg_equiv_mem (REGNO (ref));
16686 base = find_replacement (&XEXP (ref, 0));
16687 }
16688 else
16689 /* The slot is out of range, or was dressed up in a SUBREG. */
16690 base = reg_equiv_address (REGNO (ref));
16691
16692 /* PR 62254: If there is no equivalent memory location then just move
16693 the value as an SImode register move. This happens when the target
16694 architecture variant does not have an HImode register move. */
16695 if (base == NULL)
16696 {
16697 gcc_assert (REG_P (outval) || SUBREG_P (outval));
16698
16699 if (REG_P (outval))
16700 {
16701 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16702 gen_rtx_SUBREG (SImode, outval, 0)));
16703 }
16704 else /* SUBREG_P (outval) */
16705 {
16706 if (GET_MODE (SUBREG_REG (outval)) == SImode)
16707 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16708 SUBREG_REG (outval)));
16709 else
16710 /* FIXME: Handle other cases ? */
16711 gcc_unreachable ();
16712 }
16713 return;
16714 }
16715 }
16716 else
16717 base = find_replacement (&XEXP (ref, 0));
16718
16719 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16720
16721 /* Handle the case where the address is too complex to be offset by 1. */
16722 if (GET_CODE (base) == MINUS
16723 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16724 {
16725 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16726
16727 /* Be careful not to destroy OUTVAL. */
16728 if (reg_overlap_mentioned_p (base_plus, outval))
16729 {
16730 /* Updating base_plus might destroy outval, see if we can
16731 swap the scratch and base_plus. */
16732 if (!reg_overlap_mentioned_p (scratch, outval))
16733 std::swap (scratch, base_plus);
16734 else
16735 {
16736 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16737
16738 /* Be conservative and copy OUTVAL into the scratch now,
16739 this should only be necessary if outval is a subreg
16740 of something larger than a word. */
16741 /* XXX Might this clobber base? I can't see how it can,
16742 since scratch is known to overlap with OUTVAL, and
16743 must be wider than a word. */
16744 emit_insn (gen_movhi (scratch_hi, outval));
16745 outval = scratch_hi;
16746 }
16747 }
16748
16749 emit_set_insn (base_plus, base);
16750 base = base_plus;
16751 }
16752 else if (GET_CODE (base) == PLUS)
16753 {
16754 /* The addend must be CONST_INT, or we would have dealt with it above. */
16755 HOST_WIDE_INT hi, lo;
16756
16757 offset += INTVAL (XEXP (base, 1));
16758 base = XEXP (base, 0);
16759
16760 /* Rework the address into a legal sequence of insns. */
16761 /* Valid range for lo is -4095 -> 4095 */
16762 lo = (offset >= 0
16763 ? (offset & 0xfff)
16764 : -((-offset) & 0xfff));
16765
16766 /* Corner case, if lo is the max offset then we would be out of range
16767 once we have added the additional 1 below, so bump the msb into the
16768 pre-loading insn(s). */
16769 if (lo == 4095)
16770 lo &= 0x7ff;
16771
16772 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16773 ^ (HOST_WIDE_INT) 0x80000000)
16774 - (HOST_WIDE_INT) 0x80000000);
16775
16776 gcc_assert (hi + lo == offset);
16777
16778 if (hi != 0)
16779 {
16780 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16781
16782 /* Be careful not to destroy OUTVAL. */
16783 if (reg_overlap_mentioned_p (base_plus, outval))
16784 {
16785 /* Updating base_plus might destroy outval, see if we
16786 can swap the scratch and base_plus. */
16787 if (!reg_overlap_mentioned_p (scratch, outval))
16788 std::swap (scratch, base_plus);
16789 else
16790 {
16791 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16792
16793 /* Be conservative and copy outval into scratch now,
16794 this should only be necessary if outval is a
16795 subreg of something larger than a word. */
16796 /* XXX Might this clobber base? I can't see how it
16797 can, since scratch is known to overlap with
16798 outval. */
16799 emit_insn (gen_movhi (scratch_hi, outval));
16800 outval = scratch_hi;
16801 }
16802 }
16803
16804 /* Get the base address; addsi3 knows how to handle constants
16805 that require more than one insn. */
16806 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16807 base = base_plus;
16808 offset = lo;
16809 }
16810 }
16811
16812 if (BYTES_BIG_ENDIAN)
16813 {
16814 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16815 plus_constant (Pmode, base,
16816 offset + 1)),
16817 gen_lowpart (QImode, outval)));
16818 emit_insn (gen_lshrsi3 (scratch,
16819 gen_rtx_SUBREG (SImode, outval, 0),
16820 GEN_INT (8)));
16821 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16822 offset)),
16823 gen_lowpart (QImode, scratch)));
16824 }
16825 else
16826 {
16827 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16828 offset)),
16829 gen_lowpart (QImode, outval)));
16830 emit_insn (gen_lshrsi3 (scratch,
16831 gen_rtx_SUBREG (SImode, outval, 0),
16832 GEN_INT (8)));
16833 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16834 plus_constant (Pmode, base,
16835 offset + 1)),
16836 gen_lowpart (QImode, scratch)));
16837 }
16838 }
16839
16840 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16841 (padded to the size of a word) should be passed in a register. */
16842
16843 static bool
16844 arm_must_pass_in_stack (const function_arg_info &arg)
16845 {
16846 if (TARGET_AAPCS_BASED)
16847 return must_pass_in_stack_var_size (arg);
16848 else
16849 return must_pass_in_stack_var_size_or_pad (arg);
16850 }
16851
16852
16853 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16854 byte of a stack argument has useful data. For legacy APCS ABIs we use
16855 the default. For AAPCS based ABIs small aggregate types are placed
16856 in the lowest memory address. */
16857
16858 static pad_direction
16859 arm_function_arg_padding (machine_mode mode, const_tree type)
16860 {
16861 if (!TARGET_AAPCS_BASED)
16862 return default_function_arg_padding (mode, type);
16863
16864 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16865 return PAD_DOWNWARD;
16866
16867 return PAD_UPWARD;
16868 }
16869
16870
16871 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16872 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16873 register has useful data, and return the opposite if the most
16874 significant byte does. */
16875
16876 bool
16877 arm_pad_reg_upward (machine_mode mode,
16878 tree type, int first ATTRIBUTE_UNUSED)
16879 {
16880 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16881 {
16882 /* For AAPCS, small aggregates, small fixed-point types,
16883 and small complex types are always padded upwards. */
16884 if (type)
16885 {
16886 if ((AGGREGATE_TYPE_P (type)
16887 || TREE_CODE (type) == COMPLEX_TYPE
16888 || FIXED_POINT_TYPE_P (type))
16889 && int_size_in_bytes (type) <= 4)
16890 return true;
16891 }
16892 else
16893 {
16894 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16895 && GET_MODE_SIZE (mode) <= 4)
16896 return true;
16897 }
16898 }
16899
16900 /* Otherwise, use default padding. */
16901 return !BYTES_BIG_ENDIAN;
16902 }
16903
16904 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16905 assuming that the address in the base register is word aligned. */
16906 bool
16907 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16908 {
16909 HOST_WIDE_INT max_offset;
16910
16911 /* Offset must be a multiple of 4 in Thumb mode. */
16912 if (TARGET_THUMB2 && ((offset & 3) != 0))
16913 return false;
16914
16915 if (TARGET_THUMB2)
16916 max_offset = 1020;
16917 else if (TARGET_ARM)
16918 max_offset = 255;
16919 else
16920 return false;
16921
16922 return ((offset <= max_offset) && (offset >= -max_offset));
16923 }
16924
16925 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16926 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16927 Assumes that the address in the base register RN is word aligned. Pattern
16928 guarantees that both memory accesses use the same base register,
16929 the offsets are constants within the range, and the gap between the offsets is 4.
16930 If preload complete then check that registers are legal. WBACK indicates whether
16931 address is updated. LOAD indicates whether memory access is load or store. */
16932 bool
16933 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16934 bool wback, bool load)
16935 {
16936 unsigned int t, t2, n;
16937
16938 if (!reload_completed)
16939 return true;
16940
16941 if (!offset_ok_for_ldrd_strd (offset))
16942 return false;
16943
16944 t = REGNO (rt);
16945 t2 = REGNO (rt2);
16946 n = REGNO (rn);
16947
16948 if ((TARGET_THUMB2)
16949 && ((wback && (n == t || n == t2))
16950 || (t == SP_REGNUM)
16951 || (t == PC_REGNUM)
16952 || (t2 == SP_REGNUM)
16953 || (t2 == PC_REGNUM)
16954 || (!load && (n == PC_REGNUM))
16955 || (load && (t == t2))
16956 /* Triggers Cortex-M3 LDRD errata. */
16957 || (!wback && load && fix_cm3_ldrd && (n == t))))
16958 return false;
16959
16960 if ((TARGET_ARM)
16961 && ((wback && (n == t || n == t2))
16962 || (t2 == PC_REGNUM)
16963 || (t % 2 != 0) /* First destination register is not even. */
16964 || (t2 != t + 1)
16965 /* PC can be used as base register (for offset addressing only),
16966 but it is depricated. */
16967 || (n == PC_REGNUM)))
16968 return false;
16969
16970 return true;
16971 }
16972
16973 /* Return true if a 64-bit access with alignment ALIGN and with a
16974 constant offset OFFSET from the base pointer is permitted on this
16975 architecture. */
16976 static bool
16977 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
16978 {
16979 return (unaligned_access
16980 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
16981 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
16982 }
16983
16984 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
16985 operand MEM's address contains an immediate offset from the base
16986 register and has no side effects, in which case it sets BASE,
16987 OFFSET and ALIGN accordingly. */
16988 static bool
16989 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
16990 {
16991 rtx addr;
16992
16993 gcc_assert (base != NULL && offset != NULL);
16994
16995 /* TODO: Handle more general memory operand patterns, such as
16996 PRE_DEC and PRE_INC. */
16997
16998 if (side_effects_p (mem))
16999 return false;
17000
17001 /* Can't deal with subregs. */
17002 if (SUBREG_P (mem))
17003 return false;
17004
17005 gcc_assert (MEM_P (mem));
17006
17007 *offset = const0_rtx;
17008 *align = MEM_ALIGN (mem);
17009
17010 addr = XEXP (mem, 0);
17011
17012 /* If addr isn't valid for DImode, then we can't handle it. */
17013 if (!arm_legitimate_address_p (DImode, addr,
17014 reload_in_progress || reload_completed))
17015 return false;
17016
17017 if (REG_P (addr))
17018 {
17019 *base = addr;
17020 return true;
17021 }
17022 else if (GET_CODE (addr) == PLUS)
17023 {
17024 *base = XEXP (addr, 0);
17025 *offset = XEXP (addr, 1);
17026 return (REG_P (*base) && CONST_INT_P (*offset));
17027 }
17028
17029 return false;
17030 }
17031
17032 /* Called from a peephole2 to replace two word-size accesses with a
17033 single LDRD/STRD instruction. Returns true iff we can generate a
17034 new instruction sequence. That is, both accesses use the same base
17035 register and the gap between constant offsets is 4. This function
17036 may reorder its operands to match ldrd/strd RTL templates.
17037 OPERANDS are the operands found by the peephole matcher;
17038 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
17039 corresponding memory operands. LOAD indicaates whether the access
17040 is load or store. CONST_STORE indicates a store of constant
17041 integer values held in OPERANDS[4,5] and assumes that the pattern
17042 is of length 4 insn, for the purpose of checking dead registers.
17043 COMMUTE indicates that register operands may be reordered. */
17044 bool
17045 gen_operands_ldrd_strd (rtx *operands, bool load,
17046 bool const_store, bool commute)
17047 {
17048 int nops = 2;
17049 HOST_WIDE_INT offsets[2], offset, align[2];
17050 rtx base = NULL_RTX;
17051 rtx cur_base, cur_offset, tmp;
17052 int i, gap;
17053 HARD_REG_SET regset;
17054
17055 gcc_assert (!const_store || !load);
17056 /* Check that the memory references are immediate offsets from the
17057 same base register. Extract the base register, the destination
17058 registers, and the corresponding memory offsets. */
17059 for (i = 0; i < nops; i++)
17060 {
17061 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17062 &align[i]))
17063 return false;
17064
17065 if (i == 0)
17066 base = cur_base;
17067 else if (REGNO (base) != REGNO (cur_base))
17068 return false;
17069
17070 offsets[i] = INTVAL (cur_offset);
17071 if (GET_CODE (operands[i]) == SUBREG)
17072 {
17073 tmp = SUBREG_REG (operands[i]);
17074 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
17075 operands[i] = tmp;
17076 }
17077 }
17078
17079 /* Make sure there is no dependency between the individual loads. */
17080 if (load && REGNO (operands[0]) == REGNO (base))
17081 return false; /* RAW */
17082
17083 if (load && REGNO (operands[0]) == REGNO (operands[1]))
17084 return false; /* WAW */
17085
17086 /* If the same input register is used in both stores
17087 when storing different constants, try to find a free register.
17088 For example, the code
17089 mov r0, 0
17090 str r0, [r2]
17091 mov r0, 1
17092 str r0, [r2, #4]
17093 can be transformed into
17094 mov r1, 0
17095 mov r0, 1
17096 strd r1, r0, [r2]
17097 in Thumb mode assuming that r1 is free.
17098 For ARM mode do the same but only if the starting register
17099 can be made to be even. */
17100 if (const_store
17101 && REGNO (operands[0]) == REGNO (operands[1])
17102 && INTVAL (operands[4]) != INTVAL (operands[5]))
17103 {
17104 if (TARGET_THUMB2)
17105 {
17106 CLEAR_HARD_REG_SET (regset);
17107 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17108 if (tmp == NULL_RTX)
17109 return false;
17110
17111 /* Use the new register in the first load to ensure that
17112 if the original input register is not dead after peephole,
17113 then it will have the correct constant value. */
17114 operands[0] = tmp;
17115 }
17116 else if (TARGET_ARM)
17117 {
17118 int regno = REGNO (operands[0]);
17119 if (!peep2_reg_dead_p (4, operands[0]))
17120 {
17121 /* When the input register is even and is not dead after the
17122 pattern, it has to hold the second constant but we cannot
17123 form a legal STRD in ARM mode with this register as the second
17124 register. */
17125 if (regno % 2 == 0)
17126 return false;
17127
17128 /* Is regno-1 free? */
17129 SET_HARD_REG_SET (regset);
17130 CLEAR_HARD_REG_BIT(regset, regno - 1);
17131 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17132 if (tmp == NULL_RTX)
17133 return false;
17134
17135 operands[0] = tmp;
17136 }
17137 else
17138 {
17139 /* Find a DImode register. */
17140 CLEAR_HARD_REG_SET (regset);
17141 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17142 if (tmp != NULL_RTX)
17143 {
17144 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17145 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17146 }
17147 else
17148 {
17149 /* Can we use the input register to form a DI register? */
17150 SET_HARD_REG_SET (regset);
17151 CLEAR_HARD_REG_BIT(regset,
17152 regno % 2 == 0 ? regno + 1 : regno - 1);
17153 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17154 if (tmp == NULL_RTX)
17155 return false;
17156 operands[regno % 2 == 1 ? 0 : 1] = tmp;
17157 }
17158 }
17159
17160 gcc_assert (operands[0] != NULL_RTX);
17161 gcc_assert (operands[1] != NULL_RTX);
17162 gcc_assert (REGNO (operands[0]) % 2 == 0);
17163 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
17164 }
17165 }
17166
17167 /* Make sure the instructions are ordered with lower memory access first. */
17168 if (offsets[0] > offsets[1])
17169 {
17170 gap = offsets[0] - offsets[1];
17171 offset = offsets[1];
17172
17173 /* Swap the instructions such that lower memory is accessed first. */
17174 std::swap (operands[0], operands[1]);
17175 std::swap (operands[2], operands[3]);
17176 std::swap (align[0], align[1]);
17177 if (const_store)
17178 std::swap (operands[4], operands[5]);
17179 }
17180 else
17181 {
17182 gap = offsets[1] - offsets[0];
17183 offset = offsets[0];
17184 }
17185
17186 /* Make sure accesses are to consecutive memory locations. */
17187 if (gap != GET_MODE_SIZE (SImode))
17188 return false;
17189
17190 if (!align_ok_ldrd_strd (align[0], offset))
17191 return false;
17192
17193 /* Make sure we generate legal instructions. */
17194 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17195 false, load))
17196 return true;
17197
17198 /* In Thumb state, where registers are almost unconstrained, there
17199 is little hope to fix it. */
17200 if (TARGET_THUMB2)
17201 return false;
17202
17203 if (load && commute)
17204 {
17205 /* Try reordering registers. */
17206 std::swap (operands[0], operands[1]);
17207 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17208 false, load))
17209 return true;
17210 }
17211
17212 if (const_store)
17213 {
17214 /* If input registers are dead after this pattern, they can be
17215 reordered or replaced by other registers that are free in the
17216 current pattern. */
17217 if (!peep2_reg_dead_p (4, operands[0])
17218 || !peep2_reg_dead_p (4, operands[1]))
17219 return false;
17220
17221 /* Try to reorder the input registers. */
17222 /* For example, the code
17223 mov r0, 0
17224 mov r1, 1
17225 str r1, [r2]
17226 str r0, [r2, #4]
17227 can be transformed into
17228 mov r1, 0
17229 mov r0, 1
17230 strd r0, [r2]
17231 */
17232 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
17233 false, false))
17234 {
17235 std::swap (operands[0], operands[1]);
17236 return true;
17237 }
17238
17239 /* Try to find a free DI register. */
17240 CLEAR_HARD_REG_SET (regset);
17241 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
17242 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
17243 while (true)
17244 {
17245 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17246 if (tmp == NULL_RTX)
17247 return false;
17248
17249 /* DREG must be an even-numbered register in DImode.
17250 Split it into SI registers. */
17251 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17252 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17253 gcc_assert (operands[0] != NULL_RTX);
17254 gcc_assert (operands[1] != NULL_RTX);
17255 gcc_assert (REGNO (operands[0]) % 2 == 0);
17256 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
17257
17258 return (operands_ok_ldrd_strd (operands[0], operands[1],
17259 base, offset,
17260 false, load));
17261 }
17262 }
17263
17264 return false;
17265 }
17266
17267
17268 /* Return true if parallel execution of the two word-size accesses provided
17269 could be satisfied with a single LDRD/STRD instruction. Two word-size
17270 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17271 register operands and OPERANDS[2,3] are the corresponding memory operands.
17272 */
17273 bool
17274 valid_operands_ldrd_strd (rtx *operands, bool load)
17275 {
17276 int nops = 2;
17277 HOST_WIDE_INT offsets[2], offset, align[2];
17278 rtx base = NULL_RTX;
17279 rtx cur_base, cur_offset;
17280 int i, gap;
17281
17282 /* Check that the memory references are immediate offsets from the
17283 same base register. Extract the base register, the destination
17284 registers, and the corresponding memory offsets. */
17285 for (i = 0; i < nops; i++)
17286 {
17287 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17288 &align[i]))
17289 return false;
17290
17291 if (i == 0)
17292 base = cur_base;
17293 else if (REGNO (base) != REGNO (cur_base))
17294 return false;
17295
17296 offsets[i] = INTVAL (cur_offset);
17297 if (GET_CODE (operands[i]) == SUBREG)
17298 return false;
17299 }
17300
17301 if (offsets[0] > offsets[1])
17302 return false;
17303
17304 gap = offsets[1] - offsets[0];
17305 offset = offsets[0];
17306
17307 /* Make sure accesses are to consecutive memory locations. */
17308 if (gap != GET_MODE_SIZE (SImode))
17309 return false;
17310
17311 if (!align_ok_ldrd_strd (align[0], offset))
17312 return false;
17313
17314 return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17315 false, load);
17316 }
17317
17318 \f
17319 /* Print a symbolic form of X to the debug file, F. */
17320 static void
17321 arm_print_value (FILE *f, rtx x)
17322 {
17323 switch (GET_CODE (x))
17324 {
17325 case CONST_INT:
17326 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17327 return;
17328
17329 case CONST_DOUBLE:
17330 {
17331 char fpstr[20];
17332 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17333 sizeof (fpstr), 0, 1);
17334 fputs (fpstr, f);
17335 }
17336 return;
17337
17338 case CONST_VECTOR:
17339 {
17340 int i;
17341
17342 fprintf (f, "<");
17343 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17344 {
17345 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17346 if (i < (CONST_VECTOR_NUNITS (x) - 1))
17347 fputc (',', f);
17348 }
17349 fprintf (f, ">");
17350 }
17351 return;
17352
17353 case CONST_STRING:
17354 fprintf (f, "\"%s\"", XSTR (x, 0));
17355 return;
17356
17357 case SYMBOL_REF:
17358 fprintf (f, "`%s'", XSTR (x, 0));
17359 return;
17360
17361 case LABEL_REF:
17362 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17363 return;
17364
17365 case CONST:
17366 arm_print_value (f, XEXP (x, 0));
17367 return;
17368
17369 case PLUS:
17370 arm_print_value (f, XEXP (x, 0));
17371 fprintf (f, "+");
17372 arm_print_value (f, XEXP (x, 1));
17373 return;
17374
17375 case PC:
17376 fprintf (f, "pc");
17377 return;
17378
17379 default:
17380 fprintf (f, "????");
17381 return;
17382 }
17383 }
17384 \f
17385 /* Routines for manipulation of the constant pool. */
17386
17387 /* Arm instructions cannot load a large constant directly into a
17388 register; they have to come from a pc relative load. The constant
17389 must therefore be placed in the addressable range of the pc
17390 relative load. Depending on the precise pc relative load
17391 instruction the range is somewhere between 256 bytes and 4k. This
17392 means that we often have to dump a constant inside a function, and
17393 generate code to branch around it.
17394
17395 It is important to minimize this, since the branches will slow
17396 things down and make the code larger.
17397
17398 Normally we can hide the table after an existing unconditional
17399 branch so that there is no interruption of the flow, but in the
17400 worst case the code looks like this:
17401
17402 ldr rn, L1
17403 ...
17404 b L2
17405 align
17406 L1: .long value
17407 L2:
17408 ...
17409
17410 ldr rn, L3
17411 ...
17412 b L4
17413 align
17414 L3: .long value
17415 L4:
17416 ...
17417
17418 We fix this by performing a scan after scheduling, which notices
17419 which instructions need to have their operands fetched from the
17420 constant table and builds the table.
17421
17422 The algorithm starts by building a table of all the constants that
17423 need fixing up and all the natural barriers in the function (places
17424 where a constant table can be dropped without breaking the flow).
17425 For each fixup we note how far the pc-relative replacement will be
17426 able to reach and the offset of the instruction into the function.
17427
17428 Having built the table we then group the fixes together to form
17429 tables that are as large as possible (subject to addressing
17430 constraints) and emit each table of constants after the last
17431 barrier that is within range of all the instructions in the group.
17432 If a group does not contain a barrier, then we forcibly create one
17433 by inserting a jump instruction into the flow. Once the table has
17434 been inserted, the insns are then modified to reference the
17435 relevant entry in the pool.
17436
17437 Possible enhancements to the algorithm (not implemented) are:
17438
17439 1) For some processors and object formats, there may be benefit in
17440 aligning the pools to the start of cache lines; this alignment
17441 would need to be taken into account when calculating addressability
17442 of a pool. */
17443
17444 /* These typedefs are located at the start of this file, so that
17445 they can be used in the prototypes there. This comment is to
17446 remind readers of that fact so that the following structures
17447 can be understood more easily.
17448
17449 typedef struct minipool_node Mnode;
17450 typedef struct minipool_fixup Mfix; */
17451
17452 struct minipool_node
17453 {
17454 /* Doubly linked chain of entries. */
17455 Mnode * next;
17456 Mnode * prev;
17457 /* The maximum offset into the code that this entry can be placed. While
17458 pushing fixes for forward references, all entries are sorted in order
17459 of increasing max_address. */
17460 HOST_WIDE_INT max_address;
17461 /* Similarly for an entry inserted for a backwards ref. */
17462 HOST_WIDE_INT min_address;
17463 /* The number of fixes referencing this entry. This can become zero
17464 if we "unpush" an entry. In this case we ignore the entry when we
17465 come to emit the code. */
17466 int refcount;
17467 /* The offset from the start of the minipool. */
17468 HOST_WIDE_INT offset;
17469 /* The value in table. */
17470 rtx value;
17471 /* The mode of value. */
17472 machine_mode mode;
17473 /* The size of the value. With iWMMXt enabled
17474 sizes > 4 also imply an alignment of 8-bytes. */
17475 int fix_size;
17476 };
17477
17478 struct minipool_fixup
17479 {
17480 Mfix * next;
17481 rtx_insn * insn;
17482 HOST_WIDE_INT address;
17483 rtx * loc;
17484 machine_mode mode;
17485 int fix_size;
17486 rtx value;
17487 Mnode * minipool;
17488 HOST_WIDE_INT forwards;
17489 HOST_WIDE_INT backwards;
17490 };
17491
17492 /* Fixes less than a word need padding out to a word boundary. */
17493 #define MINIPOOL_FIX_SIZE(mode) \
17494 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17495
17496 static Mnode * minipool_vector_head;
17497 static Mnode * minipool_vector_tail;
17498 static rtx_code_label *minipool_vector_label;
17499 static int minipool_pad;
17500
17501 /* The linked list of all minipool fixes required for this function. */
17502 Mfix * minipool_fix_head;
17503 Mfix * minipool_fix_tail;
17504 /* The fix entry for the current minipool, once it has been placed. */
17505 Mfix * minipool_barrier;
17506
17507 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17508 #define JUMP_TABLES_IN_TEXT_SECTION 0
17509 #endif
17510
17511 static HOST_WIDE_INT
17512 get_jump_table_size (rtx_jump_table_data *insn)
17513 {
17514 /* ADDR_VECs only take room if read-only data does into the text
17515 section. */
17516 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17517 {
17518 rtx body = PATTERN (insn);
17519 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17520 HOST_WIDE_INT size;
17521 HOST_WIDE_INT modesize;
17522
17523 modesize = GET_MODE_SIZE (GET_MODE (body));
17524 size = modesize * XVECLEN (body, elt);
17525 switch (modesize)
17526 {
17527 case 1:
17528 /* Round up size of TBB table to a halfword boundary. */
17529 size = (size + 1) & ~HOST_WIDE_INT_1;
17530 break;
17531 case 2:
17532 /* No padding necessary for TBH. */
17533 break;
17534 case 4:
17535 /* Add two bytes for alignment on Thumb. */
17536 if (TARGET_THUMB)
17537 size += 2;
17538 break;
17539 default:
17540 gcc_unreachable ();
17541 }
17542 return size;
17543 }
17544
17545 return 0;
17546 }
17547
17548 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17549 function descriptor) into a register and the GOT address into the
17550 FDPIC register, returning an rtx for the register holding the
17551 function address. */
17552
17553 rtx
17554 arm_load_function_descriptor (rtx funcdesc)
17555 {
17556 rtx fnaddr_reg = gen_reg_rtx (Pmode);
17557 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17558 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17559 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17560
17561 emit_move_insn (fnaddr_reg, fnaddr);
17562
17563 /* The ABI requires the entry point address to be loaded first, but
17564 since we cannot support lazy binding for lack of atomic load of
17565 two 32-bits values, we do not need to bother to prevent the
17566 previous load from being moved after that of the GOT address. */
17567 emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17568
17569 return fnaddr_reg;
17570 }
17571
17572 /* Return the maximum amount of padding that will be inserted before
17573 label LABEL. */
17574 static HOST_WIDE_INT
17575 get_label_padding (rtx label)
17576 {
17577 HOST_WIDE_INT align, min_insn_size;
17578
17579 align = 1 << label_to_alignment (label).levels[0].log;
17580 min_insn_size = TARGET_THUMB ? 2 : 4;
17581 return align > min_insn_size ? align - min_insn_size : 0;
17582 }
17583
17584 /* Move a minipool fix MP from its current location to before MAX_MP.
17585 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17586 constraints may need updating. */
17587 static Mnode *
17588 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17589 HOST_WIDE_INT max_address)
17590 {
17591 /* The code below assumes these are different. */
17592 gcc_assert (mp != max_mp);
17593
17594 if (max_mp == NULL)
17595 {
17596 if (max_address < mp->max_address)
17597 mp->max_address = max_address;
17598 }
17599 else
17600 {
17601 if (max_address > max_mp->max_address - mp->fix_size)
17602 mp->max_address = max_mp->max_address - mp->fix_size;
17603 else
17604 mp->max_address = max_address;
17605
17606 /* Unlink MP from its current position. Since max_mp is non-null,
17607 mp->prev must be non-null. */
17608 mp->prev->next = mp->next;
17609 if (mp->next != NULL)
17610 mp->next->prev = mp->prev;
17611 else
17612 minipool_vector_tail = mp->prev;
17613
17614 /* Re-insert it before MAX_MP. */
17615 mp->next = max_mp;
17616 mp->prev = max_mp->prev;
17617 max_mp->prev = mp;
17618
17619 if (mp->prev != NULL)
17620 mp->prev->next = mp;
17621 else
17622 minipool_vector_head = mp;
17623 }
17624
17625 /* Save the new entry. */
17626 max_mp = mp;
17627
17628 /* Scan over the preceding entries and adjust their addresses as
17629 required. */
17630 while (mp->prev != NULL
17631 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17632 {
17633 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17634 mp = mp->prev;
17635 }
17636
17637 return max_mp;
17638 }
17639
17640 /* Add a constant to the minipool for a forward reference. Returns the
17641 node added or NULL if the constant will not fit in this pool. */
17642 static Mnode *
17643 add_minipool_forward_ref (Mfix *fix)
17644 {
17645 /* If set, max_mp is the first pool_entry that has a lower
17646 constraint than the one we are trying to add. */
17647 Mnode * max_mp = NULL;
17648 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17649 Mnode * mp;
17650
17651 /* If the minipool starts before the end of FIX->INSN then this FIX
17652 cannot be placed into the current pool. Furthermore, adding the
17653 new constant pool entry may cause the pool to start FIX_SIZE bytes
17654 earlier. */
17655 if (minipool_vector_head &&
17656 (fix->address + get_attr_length (fix->insn)
17657 >= minipool_vector_head->max_address - fix->fix_size))
17658 return NULL;
17659
17660 /* Scan the pool to see if a constant with the same value has
17661 already been added. While we are doing this, also note the
17662 location where we must insert the constant if it doesn't already
17663 exist. */
17664 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17665 {
17666 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17667 && fix->mode == mp->mode
17668 && (!LABEL_P (fix->value)
17669 || (CODE_LABEL_NUMBER (fix->value)
17670 == CODE_LABEL_NUMBER (mp->value)))
17671 && rtx_equal_p (fix->value, mp->value))
17672 {
17673 /* More than one fix references this entry. */
17674 mp->refcount++;
17675 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17676 }
17677
17678 /* Note the insertion point if necessary. */
17679 if (max_mp == NULL
17680 && mp->max_address > max_address)
17681 max_mp = mp;
17682
17683 /* If we are inserting an 8-bytes aligned quantity and
17684 we have not already found an insertion point, then
17685 make sure that all such 8-byte aligned quantities are
17686 placed at the start of the pool. */
17687 if (ARM_DOUBLEWORD_ALIGN
17688 && max_mp == NULL
17689 && fix->fix_size >= 8
17690 && mp->fix_size < 8)
17691 {
17692 max_mp = mp;
17693 max_address = mp->max_address;
17694 }
17695 }
17696
17697 /* The value is not currently in the minipool, so we need to create
17698 a new entry for it. If MAX_MP is NULL, the entry will be put on
17699 the end of the list since the placement is less constrained than
17700 any existing entry. Otherwise, we insert the new fix before
17701 MAX_MP and, if necessary, adjust the constraints on the other
17702 entries. */
17703 mp = XNEW (Mnode);
17704 mp->fix_size = fix->fix_size;
17705 mp->mode = fix->mode;
17706 mp->value = fix->value;
17707 mp->refcount = 1;
17708 /* Not yet required for a backwards ref. */
17709 mp->min_address = -65536;
17710
17711 if (max_mp == NULL)
17712 {
17713 mp->max_address = max_address;
17714 mp->next = NULL;
17715 mp->prev = minipool_vector_tail;
17716
17717 if (mp->prev == NULL)
17718 {
17719 minipool_vector_head = mp;
17720 minipool_vector_label = gen_label_rtx ();
17721 }
17722 else
17723 mp->prev->next = mp;
17724
17725 minipool_vector_tail = mp;
17726 }
17727 else
17728 {
17729 if (max_address > max_mp->max_address - mp->fix_size)
17730 mp->max_address = max_mp->max_address - mp->fix_size;
17731 else
17732 mp->max_address = max_address;
17733
17734 mp->next = max_mp;
17735 mp->prev = max_mp->prev;
17736 max_mp->prev = mp;
17737 if (mp->prev != NULL)
17738 mp->prev->next = mp;
17739 else
17740 minipool_vector_head = mp;
17741 }
17742
17743 /* Save the new entry. */
17744 max_mp = mp;
17745
17746 /* Scan over the preceding entries and adjust their addresses as
17747 required. */
17748 while (mp->prev != NULL
17749 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17750 {
17751 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17752 mp = mp->prev;
17753 }
17754
17755 return max_mp;
17756 }
17757
17758 static Mnode *
17759 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17760 HOST_WIDE_INT min_address)
17761 {
17762 HOST_WIDE_INT offset;
17763
17764 /* The code below assumes these are different. */
17765 gcc_assert (mp != min_mp);
17766
17767 if (min_mp == NULL)
17768 {
17769 if (min_address > mp->min_address)
17770 mp->min_address = min_address;
17771 }
17772 else
17773 {
17774 /* We will adjust this below if it is too loose. */
17775 mp->min_address = min_address;
17776
17777 /* Unlink MP from its current position. Since min_mp is non-null,
17778 mp->next must be non-null. */
17779 mp->next->prev = mp->prev;
17780 if (mp->prev != NULL)
17781 mp->prev->next = mp->next;
17782 else
17783 minipool_vector_head = mp->next;
17784
17785 /* Reinsert it after MIN_MP. */
17786 mp->prev = min_mp;
17787 mp->next = min_mp->next;
17788 min_mp->next = mp;
17789 if (mp->next != NULL)
17790 mp->next->prev = mp;
17791 else
17792 minipool_vector_tail = mp;
17793 }
17794
17795 min_mp = mp;
17796
17797 offset = 0;
17798 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17799 {
17800 mp->offset = offset;
17801 if (mp->refcount > 0)
17802 offset += mp->fix_size;
17803
17804 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17805 mp->next->min_address = mp->min_address + mp->fix_size;
17806 }
17807
17808 return min_mp;
17809 }
17810
17811 /* Add a constant to the minipool for a backward reference. Returns the
17812 node added or NULL if the constant will not fit in this pool.
17813
17814 Note that the code for insertion for a backwards reference can be
17815 somewhat confusing because the calculated offsets for each fix do
17816 not take into account the size of the pool (which is still under
17817 construction. */
17818 static Mnode *
17819 add_minipool_backward_ref (Mfix *fix)
17820 {
17821 /* If set, min_mp is the last pool_entry that has a lower constraint
17822 than the one we are trying to add. */
17823 Mnode *min_mp = NULL;
17824 /* This can be negative, since it is only a constraint. */
17825 HOST_WIDE_INT min_address = fix->address - fix->backwards;
17826 Mnode *mp;
17827
17828 /* If we can't reach the current pool from this insn, or if we can't
17829 insert this entry at the end of the pool without pushing other
17830 fixes out of range, then we don't try. This ensures that we
17831 can't fail later on. */
17832 if (min_address >= minipool_barrier->address
17833 || (minipool_vector_tail->min_address + fix->fix_size
17834 >= minipool_barrier->address))
17835 return NULL;
17836
17837 /* Scan the pool to see if a constant with the same value has
17838 already been added. While we are doing this, also note the
17839 location where we must insert the constant if it doesn't already
17840 exist. */
17841 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17842 {
17843 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17844 && fix->mode == mp->mode
17845 && (!LABEL_P (fix->value)
17846 || (CODE_LABEL_NUMBER (fix->value)
17847 == CODE_LABEL_NUMBER (mp->value)))
17848 && rtx_equal_p (fix->value, mp->value)
17849 /* Check that there is enough slack to move this entry to the
17850 end of the table (this is conservative). */
17851 && (mp->max_address
17852 > (minipool_barrier->address
17853 + minipool_vector_tail->offset
17854 + minipool_vector_tail->fix_size)))
17855 {
17856 mp->refcount++;
17857 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17858 }
17859
17860 if (min_mp != NULL)
17861 mp->min_address += fix->fix_size;
17862 else
17863 {
17864 /* Note the insertion point if necessary. */
17865 if (mp->min_address < min_address)
17866 {
17867 /* For now, we do not allow the insertion of 8-byte alignment
17868 requiring nodes anywhere but at the start of the pool. */
17869 if (ARM_DOUBLEWORD_ALIGN
17870 && fix->fix_size >= 8 && mp->fix_size < 8)
17871 return NULL;
17872 else
17873 min_mp = mp;
17874 }
17875 else if (mp->max_address
17876 < minipool_barrier->address + mp->offset + fix->fix_size)
17877 {
17878 /* Inserting before this entry would push the fix beyond
17879 its maximum address (which can happen if we have
17880 re-located a forwards fix); force the new fix to come
17881 after it. */
17882 if (ARM_DOUBLEWORD_ALIGN
17883 && fix->fix_size >= 8 && mp->fix_size < 8)
17884 return NULL;
17885 else
17886 {
17887 min_mp = mp;
17888 min_address = mp->min_address + fix->fix_size;
17889 }
17890 }
17891 /* Do not insert a non-8-byte aligned quantity before 8-byte
17892 aligned quantities. */
17893 else if (ARM_DOUBLEWORD_ALIGN
17894 && fix->fix_size < 8
17895 && mp->fix_size >= 8)
17896 {
17897 min_mp = mp;
17898 min_address = mp->min_address + fix->fix_size;
17899 }
17900 }
17901 }
17902
17903 /* We need to create a new entry. */
17904 mp = XNEW (Mnode);
17905 mp->fix_size = fix->fix_size;
17906 mp->mode = fix->mode;
17907 mp->value = fix->value;
17908 mp->refcount = 1;
17909 mp->max_address = minipool_barrier->address + 65536;
17910
17911 mp->min_address = min_address;
17912
17913 if (min_mp == NULL)
17914 {
17915 mp->prev = NULL;
17916 mp->next = minipool_vector_head;
17917
17918 if (mp->next == NULL)
17919 {
17920 minipool_vector_tail = mp;
17921 minipool_vector_label = gen_label_rtx ();
17922 }
17923 else
17924 mp->next->prev = mp;
17925
17926 minipool_vector_head = mp;
17927 }
17928 else
17929 {
17930 mp->next = min_mp->next;
17931 mp->prev = min_mp;
17932 min_mp->next = mp;
17933
17934 if (mp->next != NULL)
17935 mp->next->prev = mp;
17936 else
17937 minipool_vector_tail = mp;
17938 }
17939
17940 /* Save the new entry. */
17941 min_mp = mp;
17942
17943 if (mp->prev)
17944 mp = mp->prev;
17945 else
17946 mp->offset = 0;
17947
17948 /* Scan over the following entries and adjust their offsets. */
17949 while (mp->next != NULL)
17950 {
17951 if (mp->next->min_address < mp->min_address + mp->fix_size)
17952 mp->next->min_address = mp->min_address + mp->fix_size;
17953
17954 if (mp->refcount)
17955 mp->next->offset = mp->offset + mp->fix_size;
17956 else
17957 mp->next->offset = mp->offset;
17958
17959 mp = mp->next;
17960 }
17961
17962 return min_mp;
17963 }
17964
17965 static void
17966 assign_minipool_offsets (Mfix *barrier)
17967 {
17968 HOST_WIDE_INT offset = 0;
17969 Mnode *mp;
17970
17971 minipool_barrier = barrier;
17972
17973 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17974 {
17975 mp->offset = offset;
17976
17977 if (mp->refcount > 0)
17978 offset += mp->fix_size;
17979 }
17980 }
17981
17982 /* Output the literal table */
17983 static void
17984 dump_minipool (rtx_insn *scan)
17985 {
17986 Mnode * mp;
17987 Mnode * nmp;
17988 int align64 = 0;
17989
17990 if (ARM_DOUBLEWORD_ALIGN)
17991 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17992 if (mp->refcount > 0 && mp->fix_size >= 8)
17993 {
17994 align64 = 1;
17995 break;
17996 }
17997
17998 if (dump_file)
17999 fprintf (dump_file,
18000 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
18001 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
18002
18003 scan = emit_label_after (gen_label_rtx (), scan);
18004 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
18005 scan = emit_label_after (minipool_vector_label, scan);
18006
18007 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
18008 {
18009 if (mp->refcount > 0)
18010 {
18011 if (dump_file)
18012 {
18013 fprintf (dump_file,
18014 ";; Offset %u, min %ld, max %ld ",
18015 (unsigned) mp->offset, (unsigned long) mp->min_address,
18016 (unsigned long) mp->max_address);
18017 arm_print_value (dump_file, mp->value);
18018 fputc ('\n', dump_file);
18019 }
18020
18021 rtx val = copy_rtx (mp->value);
18022
18023 switch (GET_MODE_SIZE (mp->mode))
18024 {
18025 #ifdef HAVE_consttable_1
18026 case 1:
18027 scan = emit_insn_after (gen_consttable_1 (val), scan);
18028 break;
18029
18030 #endif
18031 #ifdef HAVE_consttable_2
18032 case 2:
18033 scan = emit_insn_after (gen_consttable_2 (val), scan);
18034 break;
18035
18036 #endif
18037 #ifdef HAVE_consttable_4
18038 case 4:
18039 scan = emit_insn_after (gen_consttable_4 (val), scan);
18040 break;
18041
18042 #endif
18043 #ifdef HAVE_consttable_8
18044 case 8:
18045 scan = emit_insn_after (gen_consttable_8 (val), scan);
18046 break;
18047
18048 #endif
18049 #ifdef HAVE_consttable_16
18050 case 16:
18051 scan = emit_insn_after (gen_consttable_16 (val), scan);
18052 break;
18053
18054 #endif
18055 default:
18056 gcc_unreachable ();
18057 }
18058 }
18059
18060 nmp = mp->next;
18061 free (mp);
18062 }
18063
18064 minipool_vector_head = minipool_vector_tail = NULL;
18065 scan = emit_insn_after (gen_consttable_end (), scan);
18066 scan = emit_barrier_after (scan);
18067 }
18068
18069 /* Return the cost of forcibly inserting a barrier after INSN. */
18070 static int
18071 arm_barrier_cost (rtx_insn *insn)
18072 {
18073 /* Basing the location of the pool on the loop depth is preferable,
18074 but at the moment, the basic block information seems to be
18075 corrupt by this stage of the compilation. */
18076 int base_cost = 50;
18077 rtx_insn *next = next_nonnote_insn (insn);
18078
18079 if (next != NULL && LABEL_P (next))
18080 base_cost -= 20;
18081
18082 switch (GET_CODE (insn))
18083 {
18084 case CODE_LABEL:
18085 /* It will always be better to place the table before the label, rather
18086 than after it. */
18087 return 50;
18088
18089 case INSN:
18090 case CALL_INSN:
18091 return base_cost;
18092
18093 case JUMP_INSN:
18094 return base_cost - 10;
18095
18096 default:
18097 return base_cost + 10;
18098 }
18099 }
18100
18101 /* Find the best place in the insn stream in the range
18102 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18103 Create the barrier by inserting a jump and add a new fix entry for
18104 it. */
18105 static Mfix *
18106 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
18107 {
18108 HOST_WIDE_INT count = 0;
18109 rtx_barrier *barrier;
18110 rtx_insn *from = fix->insn;
18111 /* The instruction after which we will insert the jump. */
18112 rtx_insn *selected = NULL;
18113 int selected_cost;
18114 /* The address at which the jump instruction will be placed. */
18115 HOST_WIDE_INT selected_address;
18116 Mfix * new_fix;
18117 HOST_WIDE_INT max_count = max_address - fix->address;
18118 rtx_code_label *label = gen_label_rtx ();
18119
18120 selected_cost = arm_barrier_cost (from);
18121 selected_address = fix->address;
18122
18123 while (from && count < max_count)
18124 {
18125 rtx_jump_table_data *tmp;
18126 int new_cost;
18127
18128 /* This code shouldn't have been called if there was a natural barrier
18129 within range. */
18130 gcc_assert (!BARRIER_P (from));
18131
18132 /* Count the length of this insn. This must stay in sync with the
18133 code that pushes minipool fixes. */
18134 if (LABEL_P (from))
18135 count += get_label_padding (from);
18136 else
18137 count += get_attr_length (from);
18138
18139 /* If there is a jump table, add its length. */
18140 if (tablejump_p (from, NULL, &tmp))
18141 {
18142 count += get_jump_table_size (tmp);
18143
18144 /* Jump tables aren't in a basic block, so base the cost on
18145 the dispatch insn. If we select this location, we will
18146 still put the pool after the table. */
18147 new_cost = arm_barrier_cost (from);
18148
18149 if (count < max_count
18150 && (!selected || new_cost <= selected_cost))
18151 {
18152 selected = tmp;
18153 selected_cost = new_cost;
18154 selected_address = fix->address + count;
18155 }
18156
18157 /* Continue after the dispatch table. */
18158 from = NEXT_INSN (tmp);
18159 continue;
18160 }
18161
18162 new_cost = arm_barrier_cost (from);
18163
18164 if (count < max_count
18165 && (!selected || new_cost <= selected_cost))
18166 {
18167 selected = from;
18168 selected_cost = new_cost;
18169 selected_address = fix->address + count;
18170 }
18171
18172 from = NEXT_INSN (from);
18173 }
18174
18175 /* Make sure that we found a place to insert the jump. */
18176 gcc_assert (selected);
18177
18178 /* Create a new JUMP_INSN that branches around a barrier. */
18179 from = emit_jump_insn_after (gen_jump (label), selected);
18180 JUMP_LABEL (from) = label;
18181 barrier = emit_barrier_after (from);
18182 emit_label_after (label, barrier);
18183
18184 /* Create a minipool barrier entry for the new barrier. */
18185 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
18186 new_fix->insn = barrier;
18187 new_fix->address = selected_address;
18188 new_fix->next = fix->next;
18189 fix->next = new_fix;
18190
18191 return new_fix;
18192 }
18193
18194 /* Record that there is a natural barrier in the insn stream at
18195 ADDRESS. */
18196 static void
18197 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
18198 {
18199 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18200
18201 fix->insn = insn;
18202 fix->address = address;
18203
18204 fix->next = NULL;
18205 if (minipool_fix_head != NULL)
18206 minipool_fix_tail->next = fix;
18207 else
18208 minipool_fix_head = fix;
18209
18210 minipool_fix_tail = fix;
18211 }
18212
18213 /* Record INSN, which will need fixing up to load a value from the
18214 minipool. ADDRESS is the offset of the insn since the start of the
18215 function; LOC is a pointer to the part of the insn which requires
18216 fixing; VALUE is the constant that must be loaded, which is of type
18217 MODE. */
18218 static void
18219 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
18220 machine_mode mode, rtx value)
18221 {
18222 gcc_assert (!arm_disable_literal_pool);
18223 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18224
18225 fix->insn = insn;
18226 fix->address = address;
18227 fix->loc = loc;
18228 fix->mode = mode;
18229 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
18230 fix->value = value;
18231 fix->forwards = get_attr_pool_range (insn);
18232 fix->backwards = get_attr_neg_pool_range (insn);
18233 fix->minipool = NULL;
18234
18235 /* If an insn doesn't have a range defined for it, then it isn't
18236 expecting to be reworked by this code. Better to stop now than
18237 to generate duff assembly code. */
18238 gcc_assert (fix->forwards || fix->backwards);
18239
18240 /* If an entry requires 8-byte alignment then assume all constant pools
18241 require 4 bytes of padding. Trying to do this later on a per-pool
18242 basis is awkward because existing pool entries have to be modified. */
18243 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
18244 minipool_pad = 4;
18245
18246 if (dump_file)
18247 {
18248 fprintf (dump_file,
18249 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18250 GET_MODE_NAME (mode),
18251 INSN_UID (insn), (unsigned long) address,
18252 -1 * (long)fix->backwards, (long)fix->forwards);
18253 arm_print_value (dump_file, fix->value);
18254 fprintf (dump_file, "\n");
18255 }
18256
18257 /* Add it to the chain of fixes. */
18258 fix->next = NULL;
18259
18260 if (minipool_fix_head != NULL)
18261 minipool_fix_tail->next = fix;
18262 else
18263 minipool_fix_head = fix;
18264
18265 minipool_fix_tail = fix;
18266 }
18267
18268 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18269 Returns the number of insns needed, or 99 if we always want to synthesize
18270 the value. */
18271 int
18272 arm_max_const_double_inline_cost ()
18273 {
18274 return ((optimize_size || arm_ld_sched) ? 3 : 4);
18275 }
18276
18277 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18278 Returns the number of insns needed, or 99 if we don't know how to
18279 do it. */
18280 int
18281 arm_const_double_inline_cost (rtx val)
18282 {
18283 rtx lowpart, highpart;
18284 machine_mode mode;
18285
18286 mode = GET_MODE (val);
18287
18288 if (mode == VOIDmode)
18289 mode = DImode;
18290
18291 gcc_assert (GET_MODE_SIZE (mode) == 8);
18292
18293 lowpart = gen_lowpart (SImode, val);
18294 highpart = gen_highpart_mode (SImode, mode, val);
18295
18296 gcc_assert (CONST_INT_P (lowpart));
18297 gcc_assert (CONST_INT_P (highpart));
18298
18299 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
18300 NULL_RTX, NULL_RTX, 0, 0)
18301 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
18302 NULL_RTX, NULL_RTX, 0, 0));
18303 }
18304
18305 /* Cost of loading a SImode constant. */
18306 static inline int
18307 arm_const_inline_cost (enum rtx_code code, rtx val)
18308 {
18309 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18310 NULL_RTX, NULL_RTX, 1, 0);
18311 }
18312
18313 /* Return true if it is worthwhile to split a 64-bit constant into two
18314 32-bit operations. This is the case if optimizing for size, or
18315 if we have load delay slots, or if one 32-bit part can be done with
18316 a single data operation. */
18317 bool
18318 arm_const_double_by_parts (rtx val)
18319 {
18320 machine_mode mode = GET_MODE (val);
18321 rtx part;
18322
18323 if (optimize_size || arm_ld_sched)
18324 return true;
18325
18326 if (mode == VOIDmode)
18327 mode = DImode;
18328
18329 part = gen_highpart_mode (SImode, mode, val);
18330
18331 gcc_assert (CONST_INT_P (part));
18332
18333 if (const_ok_for_arm (INTVAL (part))
18334 || const_ok_for_arm (~INTVAL (part)))
18335 return true;
18336
18337 part = gen_lowpart (SImode, val);
18338
18339 gcc_assert (CONST_INT_P (part));
18340
18341 if (const_ok_for_arm (INTVAL (part))
18342 || const_ok_for_arm (~INTVAL (part)))
18343 return true;
18344
18345 return false;
18346 }
18347
18348 /* Return true if it is possible to inline both the high and low parts
18349 of a 64-bit constant into 32-bit data processing instructions. */
18350 bool
18351 arm_const_double_by_immediates (rtx val)
18352 {
18353 machine_mode mode = GET_MODE (val);
18354 rtx part;
18355
18356 if (mode == VOIDmode)
18357 mode = DImode;
18358
18359 part = gen_highpart_mode (SImode, mode, val);
18360
18361 gcc_assert (CONST_INT_P (part));
18362
18363 if (!const_ok_for_arm (INTVAL (part)))
18364 return false;
18365
18366 part = gen_lowpart (SImode, val);
18367
18368 gcc_assert (CONST_INT_P (part));
18369
18370 if (!const_ok_for_arm (INTVAL (part)))
18371 return false;
18372
18373 return true;
18374 }
18375
18376 /* Scan INSN and note any of its operands that need fixing.
18377 If DO_PUSHES is false we do not actually push any of the fixups
18378 needed. */
18379 static void
18380 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18381 {
18382 int opno;
18383
18384 extract_constrain_insn (insn);
18385
18386 if (recog_data.n_alternatives == 0)
18387 return;
18388
18389 /* Fill in recog_op_alt with information about the constraints of
18390 this insn. */
18391 preprocess_constraints (insn);
18392
18393 const operand_alternative *op_alt = which_op_alt ();
18394 for (opno = 0; opno < recog_data.n_operands; opno++)
18395 {
18396 /* Things we need to fix can only occur in inputs. */
18397 if (recog_data.operand_type[opno] != OP_IN)
18398 continue;
18399
18400 /* If this alternative is a memory reference, then any mention
18401 of constants in this alternative is really to fool reload
18402 into allowing us to accept one there. We need to fix them up
18403 now so that we output the right code. */
18404 if (op_alt[opno].memory_ok)
18405 {
18406 rtx op = recog_data.operand[opno];
18407
18408 if (CONSTANT_P (op))
18409 {
18410 if (do_pushes)
18411 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18412 recog_data.operand_mode[opno], op);
18413 }
18414 else if (MEM_P (op)
18415 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18416 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18417 {
18418 if (do_pushes)
18419 {
18420 rtx cop = avoid_constant_pool_reference (op);
18421
18422 /* Casting the address of something to a mode narrower
18423 than a word can cause avoid_constant_pool_reference()
18424 to return the pool reference itself. That's no good to
18425 us here. Lets just hope that we can use the
18426 constant pool value directly. */
18427 if (op == cop)
18428 cop = get_pool_constant (XEXP (op, 0));
18429
18430 push_minipool_fix (insn, address,
18431 recog_data.operand_loc[opno],
18432 recog_data.operand_mode[opno], cop);
18433 }
18434
18435 }
18436 }
18437 }
18438
18439 return;
18440 }
18441
18442 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18443 and unions in the context of ARMv8-M Security Extensions. It is used as a
18444 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18445 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18446 or four masks, depending on whether it is being computed for a
18447 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18448 respectively. The tree for the type of the argument or a field within an
18449 argument is passed in ARG_TYPE, the current register this argument or field
18450 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18451 argument or field starts at is passed in STARTING_BIT and the last used bit
18452 is kept in LAST_USED_BIT which is also updated accordingly. */
18453
18454 static unsigned HOST_WIDE_INT
18455 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18456 uint32_t * padding_bits_to_clear,
18457 unsigned starting_bit, int * last_used_bit)
18458
18459 {
18460 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18461
18462 if (TREE_CODE (arg_type) == RECORD_TYPE)
18463 {
18464 unsigned current_bit = starting_bit;
18465 tree field;
18466 long int offset, size;
18467
18468
18469 field = TYPE_FIELDS (arg_type);
18470 while (field)
18471 {
18472 /* The offset within a structure is always an offset from
18473 the start of that structure. Make sure we take that into the
18474 calculation of the register based offset that we use here. */
18475 offset = starting_bit;
18476 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18477 offset %= 32;
18478
18479 /* This is the actual size of the field, for bitfields this is the
18480 bitfield width and not the container size. */
18481 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18482
18483 if (*last_used_bit != offset)
18484 {
18485 if (offset < *last_used_bit)
18486 {
18487 /* This field's offset is before the 'last_used_bit', that
18488 means this field goes on the next register. So we need to
18489 pad the rest of the current register and increase the
18490 register number. */
18491 uint32_t mask;
18492 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18493 mask++;
18494
18495 padding_bits_to_clear[*regno] |= mask;
18496 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18497 (*regno)++;
18498 }
18499 else
18500 {
18501 /* Otherwise we pad the bits between the last field's end and
18502 the start of the new field. */
18503 uint32_t mask;
18504
18505 mask = ((uint32_t)-1) >> (32 - offset);
18506 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18507 padding_bits_to_clear[*regno] |= mask;
18508 }
18509 current_bit = offset;
18510 }
18511
18512 /* Calculate further padding bits for inner structs/unions too. */
18513 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18514 {
18515 *last_used_bit = current_bit;
18516 not_to_clear_reg_mask
18517 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18518 padding_bits_to_clear, offset,
18519 last_used_bit);
18520 }
18521 else
18522 {
18523 /* Update 'current_bit' with this field's size. If the
18524 'current_bit' lies in a subsequent register, update 'regno' and
18525 reset 'current_bit' to point to the current bit in that new
18526 register. */
18527 current_bit += size;
18528 while (current_bit >= 32)
18529 {
18530 current_bit-=32;
18531 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18532 (*regno)++;
18533 }
18534 *last_used_bit = current_bit;
18535 }
18536
18537 field = TREE_CHAIN (field);
18538 }
18539 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18540 }
18541 else if (TREE_CODE (arg_type) == UNION_TYPE)
18542 {
18543 tree field, field_t;
18544 int i, regno_t, field_size;
18545 int max_reg = -1;
18546 int max_bit = -1;
18547 uint32_t mask;
18548 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18549 = {-1, -1, -1, -1};
18550
18551 /* To compute the padding bits in a union we only consider bits as
18552 padding bits if they are always either a padding bit or fall outside a
18553 fields size for all fields in the union. */
18554 field = TYPE_FIELDS (arg_type);
18555 while (field)
18556 {
18557 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18558 = {0U, 0U, 0U, 0U};
18559 int last_used_bit_t = *last_used_bit;
18560 regno_t = *regno;
18561 field_t = TREE_TYPE (field);
18562
18563 /* If the field's type is either a record or a union make sure to
18564 compute their padding bits too. */
18565 if (RECORD_OR_UNION_TYPE_P (field_t))
18566 not_to_clear_reg_mask
18567 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18568 &padding_bits_to_clear_t[0],
18569 starting_bit, &last_used_bit_t);
18570 else
18571 {
18572 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18573 regno_t = (field_size / 32) + *regno;
18574 last_used_bit_t = (starting_bit + field_size) % 32;
18575 }
18576
18577 for (i = *regno; i < regno_t; i++)
18578 {
18579 /* For all but the last register used by this field only keep the
18580 padding bits that were padding bits in this field. */
18581 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18582 }
18583
18584 /* For the last register, keep all padding bits that were padding
18585 bits in this field and any padding bits that are still valid
18586 as padding bits but fall outside of this field's size. */
18587 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18588 padding_bits_to_clear_res[regno_t]
18589 &= padding_bits_to_clear_t[regno_t] | mask;
18590
18591 /* Update the maximum size of the fields in terms of registers used
18592 ('max_reg') and the 'last_used_bit' in said register. */
18593 if (max_reg < regno_t)
18594 {
18595 max_reg = regno_t;
18596 max_bit = last_used_bit_t;
18597 }
18598 else if (max_reg == regno_t && max_bit < last_used_bit_t)
18599 max_bit = last_used_bit_t;
18600
18601 field = TREE_CHAIN (field);
18602 }
18603
18604 /* Update the current padding_bits_to_clear using the intersection of the
18605 padding bits of all the fields. */
18606 for (i=*regno; i < max_reg; i++)
18607 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18608
18609 /* Do not keep trailing padding bits, we do not know yet whether this
18610 is the end of the argument. */
18611 mask = ((uint32_t) 1 << max_bit) - 1;
18612 padding_bits_to_clear[max_reg]
18613 |= padding_bits_to_clear_res[max_reg] & mask;
18614
18615 *regno = max_reg;
18616 *last_used_bit = max_bit;
18617 }
18618 else
18619 /* This function should only be used for structs and unions. */
18620 gcc_unreachable ();
18621
18622 return not_to_clear_reg_mask;
18623 }
18624
18625 /* In the context of ARMv8-M Security Extensions, this function is used for both
18626 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18627 registers are used when returning or passing arguments, which is then
18628 returned as a mask. It will also compute a mask to indicate padding/unused
18629 bits for each of these registers, and passes this through the
18630 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18631 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18632 the starting register used to pass this argument or return value is passed
18633 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18634 for struct and union types. */
18635
18636 static unsigned HOST_WIDE_INT
18637 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18638 uint32_t * padding_bits_to_clear)
18639
18640 {
18641 int last_used_bit = 0;
18642 unsigned HOST_WIDE_INT not_to_clear_mask;
18643
18644 if (RECORD_OR_UNION_TYPE_P (arg_type))
18645 {
18646 not_to_clear_mask
18647 = comp_not_to_clear_mask_str_un (arg_type, &regno,
18648 padding_bits_to_clear, 0,
18649 &last_used_bit);
18650
18651
18652 /* If the 'last_used_bit' is not zero, that means we are still using a
18653 part of the last 'regno'. In such cases we must clear the trailing
18654 bits. Otherwise we are not using regno and we should mark it as to
18655 clear. */
18656 if (last_used_bit != 0)
18657 padding_bits_to_clear[regno]
18658 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18659 else
18660 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18661 }
18662 else
18663 {
18664 not_to_clear_mask = 0;
18665 /* We are not dealing with structs nor unions. So these arguments may be
18666 passed in floating point registers too. In some cases a BLKmode is
18667 used when returning or passing arguments in multiple VFP registers. */
18668 if (GET_MODE (arg_rtx) == BLKmode)
18669 {
18670 int i, arg_regs;
18671 rtx reg;
18672
18673 /* This should really only occur when dealing with the hard-float
18674 ABI. */
18675 gcc_assert (TARGET_HARD_FLOAT_ABI);
18676
18677 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18678 {
18679 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18680 gcc_assert (REG_P (reg));
18681
18682 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18683
18684 /* If we are dealing with DF mode, make sure we don't
18685 clear either of the registers it addresses. */
18686 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18687 if (arg_regs > 1)
18688 {
18689 unsigned HOST_WIDE_INT mask;
18690 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18691 mask -= HOST_WIDE_INT_1U << REGNO (reg);
18692 not_to_clear_mask |= mask;
18693 }
18694 }
18695 }
18696 else
18697 {
18698 /* Otherwise we can rely on the MODE to determine how many registers
18699 are being used by this argument. */
18700 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18701 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18702 if (arg_regs > 1)
18703 {
18704 unsigned HOST_WIDE_INT
18705 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18706 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18707 not_to_clear_mask |= mask;
18708 }
18709 }
18710 }
18711
18712 return not_to_clear_mask;
18713 }
18714
18715 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18716 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18717 are to be fully cleared, using the value in register CLEARING_REG if more
18718 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18719 the bits that needs to be cleared in caller-saved core registers, with
18720 SCRATCH_REG used as a scratch register for that clearing.
18721
18722 NOTE: one of three following assertions must hold:
18723 - SCRATCH_REG is a low register
18724 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18725 in TO_CLEAR_BITMAP)
18726 - CLEARING_REG is a low register. */
18727
18728 static void
18729 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18730 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18731 {
18732 bool saved_clearing = false;
18733 rtx saved_clearing_reg = NULL_RTX;
18734 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18735
18736 gcc_assert (arm_arch_cmse);
18737
18738 if (!bitmap_empty_p (to_clear_bitmap))
18739 {
18740 minregno = bitmap_first_set_bit (to_clear_bitmap);
18741 maxregno = bitmap_last_set_bit (to_clear_bitmap);
18742 }
18743 clearing_regno = REGNO (clearing_reg);
18744
18745 /* Clear padding bits. */
18746 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18747 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18748 {
18749 uint64_t mask;
18750 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18751
18752 if (padding_bits_to_clear[i] == 0)
18753 continue;
18754
18755 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18756 CLEARING_REG as scratch. */
18757 if (TARGET_THUMB1
18758 && REGNO (scratch_reg) > LAST_LO_REGNUM)
18759 {
18760 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18761 such that we can use clearing_reg to clear the unused bits in the
18762 arguments. */
18763 if ((clearing_regno > maxregno
18764 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18765 && !saved_clearing)
18766 {
18767 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18768 emit_move_insn (scratch_reg, clearing_reg);
18769 saved_clearing = true;
18770 saved_clearing_reg = scratch_reg;
18771 }
18772 scratch_reg = clearing_reg;
18773 }
18774
18775 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18776 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18777 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18778
18779 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18780 mask = (~padding_bits_to_clear[i]) >> 16;
18781 rtx16 = gen_int_mode (16, SImode);
18782 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18783 if (mask)
18784 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18785
18786 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18787 }
18788 if (saved_clearing)
18789 emit_move_insn (clearing_reg, saved_clearing_reg);
18790
18791
18792 /* Clear full registers. */
18793
18794 if (TARGET_HAVE_FPCXT_CMSE)
18795 {
18796 rtvec vunspec_vec;
18797 int i, j, k, nb_regs;
18798 rtx use_seq, par, reg, set, vunspec;
18799 int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18800 auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18801 auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18802
18803 for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18804 {
18805 /* Find next register to clear and exit if none. */
18806 for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18807 if (i > maxregno)
18808 break;
18809
18810 /* Compute number of consecutive registers to clear. */
18811 for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18812 j++);
18813 nb_regs = j - i;
18814
18815 /* Create VSCCLRM RTX pattern. */
18816 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18817 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18818 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18819 VUNSPEC_VSCCLRM_VPR);
18820 XVECEXP (par, 0, 0) = vunspec;
18821
18822 /* Insert VFP register clearing RTX in the pattern. */
18823 start_sequence ();
18824 for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18825 {
18826 if (!bitmap_bit_p (to_clear_bitmap, j))
18827 continue;
18828
18829 reg = gen_rtx_REG (SFmode, j);
18830 set = gen_rtx_SET (reg, const0_rtx);
18831 XVECEXP (par, 0, k++) = set;
18832 emit_use (reg);
18833 }
18834 use_seq = get_insns ();
18835 end_sequence ();
18836
18837 emit_insn_after (use_seq, emit_insn (par));
18838 }
18839
18840 /* Get set of core registers to clear. */
18841 bitmap_clear (core_regs_bitmap);
18842 bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18843 IP_REGNUM - R0_REGNUM + 1);
18844 bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18845 core_regs_bitmap);
18846 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18847
18848 if (bitmap_empty_p (to_clear_core_bitmap))
18849 return;
18850
18851 /* Create clrm RTX pattern. */
18852 nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18853 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18854
18855 /* Insert core register clearing RTX in the pattern. */
18856 start_sequence ();
18857 for (j = 0, i = minregno; j < nb_regs; i++)
18858 {
18859 if (!bitmap_bit_p (to_clear_core_bitmap, i))
18860 continue;
18861
18862 reg = gen_rtx_REG (SImode, i);
18863 set = gen_rtx_SET (reg, const0_rtx);
18864 XVECEXP (par, 0, j++) = set;
18865 emit_use (reg);
18866 }
18867
18868 /* Insert APSR register clearing RTX in the pattern
18869 * along with clobbering CC. */
18870 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18871 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18872 VUNSPEC_CLRM_APSR);
18873
18874 XVECEXP (par, 0, j++) = vunspec;
18875
18876 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18877 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18878 XVECEXP (par, 0, j) = clobber;
18879
18880 use_seq = get_insns ();
18881 end_sequence ();
18882
18883 emit_insn_after (use_seq, emit_insn (par));
18884 }
18885 else
18886 {
18887 /* If not marked for clearing, clearing_reg already does not contain
18888 any secret. */
18889 if (clearing_regno <= maxregno
18890 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18891 {
18892 emit_move_insn (clearing_reg, const0_rtx);
18893 emit_use (clearing_reg);
18894 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18895 }
18896
18897 for (regno = minregno; regno <= maxregno; regno++)
18898 {
18899 if (!bitmap_bit_p (to_clear_bitmap, regno))
18900 continue;
18901
18902 if (IS_VFP_REGNUM (regno))
18903 {
18904 /* If regno is an even vfp register and its successor is also to
18905 be cleared, use vmov. */
18906 if (TARGET_VFP_DOUBLE
18907 && VFP_REGNO_OK_FOR_DOUBLE (regno)
18908 && bitmap_bit_p (to_clear_bitmap, regno + 1))
18909 {
18910 emit_move_insn (gen_rtx_REG (DFmode, regno),
18911 CONST1_RTX (DFmode));
18912 emit_use (gen_rtx_REG (DFmode, regno));
18913 regno++;
18914 }
18915 else
18916 {
18917 emit_move_insn (gen_rtx_REG (SFmode, regno),
18918 CONST1_RTX (SFmode));
18919 emit_use (gen_rtx_REG (SFmode, regno));
18920 }
18921 }
18922 else
18923 {
18924 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18925 emit_use (gen_rtx_REG (SImode, regno));
18926 }
18927 }
18928 }
18929 }
18930
18931 /* Clear core and caller-saved VFP registers not used to pass arguments before
18932 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
18933 registers is done in the __gnu_cmse_nonsecure_call libcall. See
18934 libgcc/config/arm/cmse_nonsecure_call.S. */
18935
18936 static void
18937 cmse_nonsecure_call_inline_register_clear (void)
18938 {
18939 basic_block bb;
18940
18941 FOR_EACH_BB_FN (bb, cfun)
18942 {
18943 rtx_insn *insn;
18944
18945 FOR_BB_INSNS (bb, insn)
18946 {
18947 bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18948 /* frame = VFP regs + FPSCR + VPR. */
18949 unsigned lazy_store_stack_frame_size
18950 = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18951 unsigned long callee_saved_mask
18952 = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18953 & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18954 unsigned address_regnum, regno;
18955 unsigned max_int_regno
18956 = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
18957 unsigned max_fp_regno
18958 = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
18959 unsigned maxregno
18960 = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
18961 auto_sbitmap to_clear_bitmap (maxregno + 1);
18962 rtx_insn *seq;
18963 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
18964 rtx address;
18965 CUMULATIVE_ARGS args_so_far_v;
18966 cumulative_args_t args_so_far;
18967 tree arg_type, fntype;
18968 bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
18969 function_args_iterator args_iter;
18970 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
18971
18972 if (!NONDEBUG_INSN_P (insn))
18973 continue;
18974
18975 if (!CALL_P (insn))
18976 continue;
18977
18978 pat = PATTERN (insn);
18979 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
18980 call = XVECEXP (pat, 0, 0);
18981
18982 /* Get the real call RTX if the insn sets a value, ie. returns. */
18983 if (GET_CODE (call) == SET)
18984 call = SET_SRC (call);
18985
18986 /* Check if it is a cmse_nonsecure_call. */
18987 unspec = XEXP (call, 0);
18988 if (GET_CODE (unspec) != UNSPEC
18989 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
18990 continue;
18991
18992 /* Mark registers that needs to be cleared. Those that holds a
18993 parameter are removed from the set further below. */
18994 bitmap_clear (to_clear_bitmap);
18995 bitmap_set_range (to_clear_bitmap, R0_REGNUM,
18996 max_int_regno - R0_REGNUM + 1);
18997
18998 /* Only look at the caller-saved floating point registers in case of
18999 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
19000 lazy store and loads which clear both caller- and callee-saved
19001 registers. */
19002 if (!lazy_fpclear)
19003 {
19004 auto_sbitmap float_bitmap (maxregno + 1);
19005
19006 bitmap_clear (float_bitmap);
19007 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
19008 max_fp_regno - FIRST_VFP_REGNUM + 1);
19009 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
19010 }
19011
19012 /* Make sure the register used to hold the function address is not
19013 cleared. */
19014 address = RTVEC_ELT (XVEC (unspec, 0), 0);
19015 gcc_assert (MEM_P (address));
19016 gcc_assert (REG_P (XEXP (address, 0)));
19017 address_regnum = REGNO (XEXP (address, 0));
19018 if (address_regnum <= max_int_regno)
19019 bitmap_clear_bit (to_clear_bitmap, address_regnum);
19020
19021 /* Set basic block of call insn so that df rescan is performed on
19022 insns inserted here. */
19023 set_block_for_insn (insn, bb);
19024 df_set_flags (DF_DEFER_INSN_RESCAN);
19025 start_sequence ();
19026
19027 /* Make sure the scheduler doesn't schedule other insns beyond
19028 here. */
19029 emit_insn (gen_blockage ());
19030
19031 /* Walk through all arguments and clear registers appropriately.
19032 */
19033 fntype = TREE_TYPE (MEM_EXPR (address));
19034 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
19035 NULL_TREE);
19036 args_so_far = pack_cumulative_args (&args_so_far_v);
19037 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
19038 {
19039 rtx arg_rtx;
19040 uint64_t to_clear_args_mask;
19041
19042 if (VOID_TYPE_P (arg_type))
19043 continue;
19044
19045 function_arg_info arg (arg_type, /*named=*/true);
19046 if (!first_param)
19047 /* ??? We should advance after processing the argument and pass
19048 the argument we're advancing past. */
19049 arm_function_arg_advance (args_so_far, arg);
19050
19051 arg_rtx = arm_function_arg (args_so_far, arg);
19052 gcc_assert (REG_P (arg_rtx));
19053 to_clear_args_mask
19054 = compute_not_to_clear_mask (arg_type, arg_rtx,
19055 REGNO (arg_rtx),
19056 &padding_bits_to_clear[0]);
19057 if (to_clear_args_mask)
19058 {
19059 for (regno = R0_REGNUM; regno <= maxregno; regno++)
19060 {
19061 if (to_clear_args_mask & (1ULL << regno))
19062 bitmap_clear_bit (to_clear_bitmap, regno);
19063 }
19064 }
19065
19066 first_param = false;
19067 }
19068
19069 /* We use right shift and left shift to clear the LSB of the address
19070 we jump to instead of using bic, to avoid having to use an extra
19071 register on Thumb-1. */
19072 clearing_reg = XEXP (address, 0);
19073 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
19074 emit_insn (gen_rtx_SET (clearing_reg, shift));
19075 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
19076 emit_insn (gen_rtx_SET (clearing_reg, shift));
19077
19078 if (clear_callee_saved)
19079 {
19080 rtx push_insn =
19081 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
19082 /* Disable frame debug info in push because it needs to be
19083 disabled for pop (see below). */
19084 RTX_FRAME_RELATED_P (push_insn) = 0;
19085
19086 /* Lazy store multiple. */
19087 if (lazy_fpclear)
19088 {
19089 rtx imm;
19090 rtx_insn *add_insn;
19091
19092 imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
19093 add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19094 stack_pointer_rtx, imm));
19095 /* If we have the frame pointer, then it will be the
19096 CFA reg. Otherwise, the stack pointer is the CFA
19097 reg, so we need to emit a CFA adjust. */
19098 if (!frame_pointer_needed)
19099 arm_add_cfa_adjust_cfa_note (add_insn,
19100 - lazy_store_stack_frame_size,
19101 stack_pointer_rtx,
19102 stack_pointer_rtx);
19103 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
19104 }
19105 /* Save VFP callee-saved registers. */
19106 else
19107 {
19108 vfp_emit_fstmd (D7_VFP_REGNUM + 1,
19109 (max_fp_regno - D7_VFP_REGNUM) / 2);
19110 /* Disable frame debug info in push because it needs to be
19111 disabled for vpop (see below). */
19112 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19113 }
19114 }
19115
19116 /* Clear caller-saved registers that leak before doing a non-secure
19117 call. */
19118 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
19119 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
19120 NUM_ARG_REGS, ip_reg, clearing_reg);
19121
19122 seq = get_insns ();
19123 end_sequence ();
19124 emit_insn_before (seq, insn);
19125
19126 if (TARGET_HAVE_FPCXT_CMSE)
19127 {
19128 rtx_insn *last, *pop_insn, *after = insn;
19129
19130 start_sequence ();
19131
19132 /* Lazy load multiple done as part of libcall in Armv8-M. */
19133 if (lazy_fpclear)
19134 {
19135 rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
19136 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
19137 rtx_insn *add_insn =
19138 emit_insn (gen_addsi3 (stack_pointer_rtx,
19139 stack_pointer_rtx, imm));
19140 if (!frame_pointer_needed)
19141 arm_add_cfa_adjust_cfa_note (add_insn,
19142 lazy_store_stack_frame_size,
19143 stack_pointer_rtx,
19144 stack_pointer_rtx);
19145 }
19146 /* Restore VFP callee-saved registers. */
19147 else
19148 {
19149 int nb_callee_saved_vfp_regs =
19150 (max_fp_regno - D7_VFP_REGNUM) / 2;
19151 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
19152 nb_callee_saved_vfp_regs,
19153 stack_pointer_rtx);
19154 /* Disable frame debug info in vpop because the SP adjustment
19155 is made using a CFA adjustment note while CFA used is
19156 sometimes R7. This then causes an assert failure in the
19157 CFI note creation code. */
19158 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19159 }
19160
19161 arm_emit_multi_reg_pop (callee_saved_mask);
19162 pop_insn = get_last_insn ();
19163
19164 /* Disable frame debug info in pop because they reset the state
19165 of popped registers to what it was at the beginning of the
19166 function, before the prologue. This leads to incorrect state
19167 when doing the pop after the nonsecure call for registers that
19168 are pushed both in prologue and before the nonsecure call.
19169
19170 It also occasionally triggers an assert failure in CFI note
19171 creation code when there are two codepaths to the epilogue,
19172 one of which does not go through the nonsecure call.
19173 Obviously this mean that debugging between the push and pop is
19174 not reliable. */
19175 RTX_FRAME_RELATED_P (pop_insn) = 0;
19176
19177 seq = get_insns ();
19178 last = get_last_insn ();
19179 end_sequence ();
19180
19181 emit_insn_after (seq, after);
19182
19183 /* Skip pop we have just inserted after nonsecure call, we know
19184 it does not contain a nonsecure call. */
19185 insn = last;
19186 }
19187 }
19188 }
19189 }
19190
19191 /* Rewrite move insn into subtract of 0 if the condition codes will
19192 be useful in next conditional jump insn. */
19193
19194 static void
19195 thumb1_reorg (void)
19196 {
19197 basic_block bb;
19198
19199 FOR_EACH_BB_FN (bb, cfun)
19200 {
19201 rtx dest, src;
19202 rtx cmp, op0, op1, set = NULL;
19203 rtx_insn *prev, *insn = BB_END (bb);
19204 bool insn_clobbered = false;
19205
19206 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
19207 insn = PREV_INSN (insn);
19208
19209 /* Find the last cbranchsi4_insn in basic block BB. */
19210 if (insn == BB_HEAD (bb)
19211 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19212 continue;
19213
19214 /* Get the register with which we are comparing. */
19215 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
19216 op0 = XEXP (cmp, 0);
19217 op1 = XEXP (cmp, 1);
19218
19219 /* Check that comparison is against ZERO. */
19220 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
19221 continue;
19222
19223 /* Find the first flag setting insn before INSN in basic block BB. */
19224 gcc_assert (insn != BB_HEAD (bb));
19225 for (prev = PREV_INSN (insn);
19226 (!insn_clobbered
19227 && prev != BB_HEAD (bb)
19228 && (NOTE_P (prev)
19229 || DEBUG_INSN_P (prev)
19230 || ((set = single_set (prev)) != NULL
19231 && get_attr_conds (prev) == CONDS_NOCOND)));
19232 prev = PREV_INSN (prev))
19233 {
19234 if (reg_set_p (op0, prev))
19235 insn_clobbered = true;
19236 }
19237
19238 /* Skip if op0 is clobbered by insn other than prev. */
19239 if (insn_clobbered)
19240 continue;
19241
19242 if (!set)
19243 continue;
19244
19245 dest = SET_DEST (set);
19246 src = SET_SRC (set);
19247 if (!low_register_operand (dest, SImode)
19248 || !low_register_operand (src, SImode))
19249 continue;
19250
19251 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19252 in INSN. Both src and dest of the move insn are checked. */
19253 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
19254 {
19255 dest = copy_rtx (dest);
19256 src = copy_rtx (src);
19257 src = gen_rtx_MINUS (SImode, src, const0_rtx);
19258 PATTERN (prev) = gen_rtx_SET (dest, src);
19259 INSN_CODE (prev) = -1;
19260 /* Set test register in INSN to dest. */
19261 XEXP (cmp, 0) = copy_rtx (dest);
19262 INSN_CODE (insn) = -1;
19263 }
19264 }
19265 }
19266
19267 /* Convert instructions to their cc-clobbering variant if possible, since
19268 that allows us to use smaller encodings. */
19269
19270 static void
19271 thumb2_reorg (void)
19272 {
19273 basic_block bb;
19274 regset_head live;
19275
19276 INIT_REG_SET (&live);
19277
19278 /* We are freeing block_for_insn in the toplev to keep compatibility
19279 with old MDEP_REORGS that are not CFG based. Recompute it now. */
19280 compute_bb_for_insn ();
19281 df_analyze ();
19282
19283 enum Convert_Action {SKIP, CONV, SWAP_CONV};
19284
19285 FOR_EACH_BB_FN (bb, cfun)
19286 {
19287 if ((current_tune->disparage_flag_setting_t16_encodings
19288 == tune_params::DISPARAGE_FLAGS_ALL)
19289 && optimize_bb_for_speed_p (bb))
19290 continue;
19291
19292 rtx_insn *insn;
19293 Convert_Action action = SKIP;
19294 Convert_Action action_for_partial_flag_setting
19295 = ((current_tune->disparage_flag_setting_t16_encodings
19296 != tune_params::DISPARAGE_FLAGS_NEITHER)
19297 && optimize_bb_for_speed_p (bb))
19298 ? SKIP : CONV;
19299
19300 COPY_REG_SET (&live, DF_LR_OUT (bb));
19301 df_simulate_initialize_backwards (bb, &live);
19302 FOR_BB_INSNS_REVERSE (bb, insn)
19303 {
19304 if (NONJUMP_INSN_P (insn)
19305 && !REGNO_REG_SET_P (&live, CC_REGNUM)
19306 && GET_CODE (PATTERN (insn)) == SET)
19307 {
19308 action = SKIP;
19309 rtx pat = PATTERN (insn);
19310 rtx dst = XEXP (pat, 0);
19311 rtx src = XEXP (pat, 1);
19312 rtx op0 = NULL_RTX, op1 = NULL_RTX;
19313
19314 if (UNARY_P (src) || BINARY_P (src))
19315 op0 = XEXP (src, 0);
19316
19317 if (BINARY_P (src))
19318 op1 = XEXP (src, 1);
19319
19320 if (low_register_operand (dst, SImode))
19321 {
19322 switch (GET_CODE (src))
19323 {
19324 case PLUS:
19325 /* Adding two registers and storing the result
19326 in the first source is already a 16-bit
19327 operation. */
19328 if (rtx_equal_p (dst, op0)
19329 && register_operand (op1, SImode))
19330 break;
19331
19332 if (low_register_operand (op0, SImode))
19333 {
19334 /* ADDS <Rd>,<Rn>,<Rm> */
19335 if (low_register_operand (op1, SImode))
19336 action = CONV;
19337 /* ADDS <Rdn>,#<imm8> */
19338 /* SUBS <Rdn>,#<imm8> */
19339 else if (rtx_equal_p (dst, op0)
19340 && CONST_INT_P (op1)
19341 && IN_RANGE (INTVAL (op1), -255, 255))
19342 action = CONV;
19343 /* ADDS <Rd>,<Rn>,#<imm3> */
19344 /* SUBS <Rd>,<Rn>,#<imm3> */
19345 else if (CONST_INT_P (op1)
19346 && IN_RANGE (INTVAL (op1), -7, 7))
19347 action = CONV;
19348 }
19349 /* ADCS <Rd>, <Rn> */
19350 else if (GET_CODE (XEXP (src, 0)) == PLUS
19351 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19352 && low_register_operand (XEXP (XEXP (src, 0), 1),
19353 SImode)
19354 && COMPARISON_P (op1)
19355 && cc_register (XEXP (op1, 0), VOIDmode)
19356 && maybe_get_arm_condition_code (op1) == ARM_CS
19357 && XEXP (op1, 1) == const0_rtx)
19358 action = CONV;
19359 break;
19360
19361 case MINUS:
19362 /* RSBS <Rd>,<Rn>,#0
19363 Not handled here: see NEG below. */
19364 /* SUBS <Rd>,<Rn>,#<imm3>
19365 SUBS <Rdn>,#<imm8>
19366 Not handled here: see PLUS above. */
19367 /* SUBS <Rd>,<Rn>,<Rm> */
19368 if (low_register_operand (op0, SImode)
19369 && low_register_operand (op1, SImode))
19370 action = CONV;
19371 break;
19372
19373 case MULT:
19374 /* MULS <Rdm>,<Rn>,<Rdm>
19375 As an exception to the rule, this is only used
19376 when optimizing for size since MULS is slow on all
19377 known implementations. We do not even want to use
19378 MULS in cold code, if optimizing for speed, so we
19379 test the global flag here. */
19380 if (!optimize_size)
19381 break;
19382 /* Fall through. */
19383 case AND:
19384 case IOR:
19385 case XOR:
19386 /* ANDS <Rdn>,<Rm> */
19387 if (rtx_equal_p (dst, op0)
19388 && low_register_operand (op1, SImode))
19389 action = action_for_partial_flag_setting;
19390 else if (rtx_equal_p (dst, op1)
19391 && low_register_operand (op0, SImode))
19392 action = action_for_partial_flag_setting == SKIP
19393 ? SKIP : SWAP_CONV;
19394 break;
19395
19396 case ASHIFTRT:
19397 case ASHIFT:
19398 case LSHIFTRT:
19399 /* ASRS <Rdn>,<Rm> */
19400 /* LSRS <Rdn>,<Rm> */
19401 /* LSLS <Rdn>,<Rm> */
19402 if (rtx_equal_p (dst, op0)
19403 && low_register_operand (op1, SImode))
19404 action = action_for_partial_flag_setting;
19405 /* ASRS <Rd>,<Rm>,#<imm5> */
19406 /* LSRS <Rd>,<Rm>,#<imm5> */
19407 /* LSLS <Rd>,<Rm>,#<imm5> */
19408 else if (low_register_operand (op0, SImode)
19409 && CONST_INT_P (op1)
19410 && IN_RANGE (INTVAL (op1), 0, 31))
19411 action = action_for_partial_flag_setting;
19412 break;
19413
19414 case ROTATERT:
19415 /* RORS <Rdn>,<Rm> */
19416 if (rtx_equal_p (dst, op0)
19417 && low_register_operand (op1, SImode))
19418 action = action_for_partial_flag_setting;
19419 break;
19420
19421 case NOT:
19422 /* MVNS <Rd>,<Rm> */
19423 if (low_register_operand (op0, SImode))
19424 action = action_for_partial_flag_setting;
19425 break;
19426
19427 case NEG:
19428 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19429 if (low_register_operand (op0, SImode))
19430 action = CONV;
19431 break;
19432
19433 case CONST_INT:
19434 /* MOVS <Rd>,#<imm8> */
19435 if (CONST_INT_P (src)
19436 && IN_RANGE (INTVAL (src), 0, 255))
19437 action = action_for_partial_flag_setting;
19438 break;
19439
19440 case REG:
19441 /* MOVS and MOV<c> with registers have different
19442 encodings, so are not relevant here. */
19443 break;
19444
19445 default:
19446 break;
19447 }
19448 }
19449
19450 if (action != SKIP)
19451 {
19452 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19453 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19454 rtvec vec;
19455
19456 if (action == SWAP_CONV)
19457 {
19458 src = copy_rtx (src);
19459 XEXP (src, 0) = op1;
19460 XEXP (src, 1) = op0;
19461 pat = gen_rtx_SET (dst, src);
19462 vec = gen_rtvec (2, pat, clobber);
19463 }
19464 else /* action == CONV */
19465 vec = gen_rtvec (2, pat, clobber);
19466
19467 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19468 INSN_CODE (insn) = -1;
19469 }
19470 }
19471
19472 if (NONDEBUG_INSN_P (insn))
19473 df_simulate_one_insn_backwards (bb, insn, &live);
19474 }
19475 }
19476
19477 CLEAR_REG_SET (&live);
19478 }
19479
19480 /* Gcc puts the pool in the wrong place for ARM, since we can only
19481 load addresses a limited distance around the pc. We do some
19482 special munging to move the constant pool values to the correct
19483 point in the code. */
19484 static void
19485 arm_reorg (void)
19486 {
19487 rtx_insn *insn;
19488 HOST_WIDE_INT address = 0;
19489 Mfix * fix;
19490
19491 if (use_cmse)
19492 cmse_nonsecure_call_inline_register_clear ();
19493
19494 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19495 if (cfun->is_thunk)
19496 ;
19497 else if (TARGET_THUMB1)
19498 thumb1_reorg ();
19499 else if (TARGET_THUMB2)
19500 thumb2_reorg ();
19501
19502 /* Ensure all insns that must be split have been split at this point.
19503 Otherwise, the pool placement code below may compute incorrect
19504 insn lengths. Note that when optimizing, all insns have already
19505 been split at this point. */
19506 if (!optimize)
19507 split_all_insns_noflow ();
19508
19509 /* Make sure we do not attempt to create a literal pool even though it should
19510 no longer be necessary to create any. */
19511 if (arm_disable_literal_pool)
19512 return ;
19513
19514 minipool_fix_head = minipool_fix_tail = NULL;
19515
19516 /* The first insn must always be a note, or the code below won't
19517 scan it properly. */
19518 insn = get_insns ();
19519 gcc_assert (NOTE_P (insn));
19520 minipool_pad = 0;
19521
19522 /* Scan all the insns and record the operands that will need fixing. */
19523 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19524 {
19525 if (BARRIER_P (insn))
19526 push_minipool_barrier (insn, address);
19527 else if (INSN_P (insn))
19528 {
19529 rtx_jump_table_data *table;
19530
19531 note_invalid_constants (insn, address, true);
19532 address += get_attr_length (insn);
19533
19534 /* If the insn is a vector jump, add the size of the table
19535 and skip the table. */
19536 if (tablejump_p (insn, NULL, &table))
19537 {
19538 address += get_jump_table_size (table);
19539 insn = table;
19540 }
19541 }
19542 else if (LABEL_P (insn))
19543 /* Add the worst-case padding due to alignment. We don't add
19544 the _current_ padding because the minipool insertions
19545 themselves might change it. */
19546 address += get_label_padding (insn);
19547 }
19548
19549 fix = minipool_fix_head;
19550
19551 /* Now scan the fixups and perform the required changes. */
19552 while (fix)
19553 {
19554 Mfix * ftmp;
19555 Mfix * fdel;
19556 Mfix * last_added_fix;
19557 Mfix * last_barrier = NULL;
19558 Mfix * this_fix;
19559
19560 /* Skip any further barriers before the next fix. */
19561 while (fix && BARRIER_P (fix->insn))
19562 fix = fix->next;
19563
19564 /* No more fixes. */
19565 if (fix == NULL)
19566 break;
19567
19568 last_added_fix = NULL;
19569
19570 for (ftmp = fix; ftmp; ftmp = ftmp->next)
19571 {
19572 if (BARRIER_P (ftmp->insn))
19573 {
19574 if (ftmp->address >= minipool_vector_head->max_address)
19575 break;
19576
19577 last_barrier = ftmp;
19578 }
19579 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19580 break;
19581
19582 last_added_fix = ftmp; /* Keep track of the last fix added. */
19583 }
19584
19585 /* If we found a barrier, drop back to that; any fixes that we
19586 could have reached but come after the barrier will now go in
19587 the next mini-pool. */
19588 if (last_barrier != NULL)
19589 {
19590 /* Reduce the refcount for those fixes that won't go into this
19591 pool after all. */
19592 for (fdel = last_barrier->next;
19593 fdel && fdel != ftmp;
19594 fdel = fdel->next)
19595 {
19596 fdel->minipool->refcount--;
19597 fdel->minipool = NULL;
19598 }
19599
19600 ftmp = last_barrier;
19601 }
19602 else
19603 {
19604 /* ftmp is first fix that we can't fit into this pool and
19605 there no natural barriers that we could use. Insert a
19606 new barrier in the code somewhere between the previous
19607 fix and this one, and arrange to jump around it. */
19608 HOST_WIDE_INT max_address;
19609
19610 /* The last item on the list of fixes must be a barrier, so
19611 we can never run off the end of the list of fixes without
19612 last_barrier being set. */
19613 gcc_assert (ftmp);
19614
19615 max_address = minipool_vector_head->max_address;
19616 /* Check that there isn't another fix that is in range that
19617 we couldn't fit into this pool because the pool was
19618 already too large: we need to put the pool before such an
19619 instruction. The pool itself may come just after the
19620 fix because create_fix_barrier also allows space for a
19621 jump instruction. */
19622 if (ftmp->address < max_address)
19623 max_address = ftmp->address + 1;
19624
19625 last_barrier = create_fix_barrier (last_added_fix, max_address);
19626 }
19627
19628 assign_minipool_offsets (last_barrier);
19629
19630 while (ftmp)
19631 {
19632 if (!BARRIER_P (ftmp->insn)
19633 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19634 == NULL))
19635 break;
19636
19637 ftmp = ftmp->next;
19638 }
19639
19640 /* Scan over the fixes we have identified for this pool, fixing them
19641 up and adding the constants to the pool itself. */
19642 for (this_fix = fix; this_fix && ftmp != this_fix;
19643 this_fix = this_fix->next)
19644 if (!BARRIER_P (this_fix->insn))
19645 {
19646 rtx addr
19647 = plus_constant (Pmode,
19648 gen_rtx_LABEL_REF (VOIDmode,
19649 minipool_vector_label),
19650 this_fix->minipool->offset);
19651 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19652 }
19653
19654 dump_minipool (last_barrier->insn);
19655 fix = ftmp;
19656 }
19657
19658 /* From now on we must synthesize any constants that we can't handle
19659 directly. This can happen if the RTL gets split during final
19660 instruction generation. */
19661 cfun->machine->after_arm_reorg = 1;
19662
19663 /* Free the minipool memory. */
19664 obstack_free (&minipool_obstack, minipool_startobj);
19665 }
19666 \f
19667 /* Routines to output assembly language. */
19668
19669 /* Return string representation of passed in real value. */
19670 static const char *
19671 fp_const_from_val (REAL_VALUE_TYPE *r)
19672 {
19673 if (!fp_consts_inited)
19674 init_fp_table ();
19675
19676 gcc_assert (real_equal (r, &value_fp0));
19677 return "0";
19678 }
19679
19680 /* OPERANDS[0] is the entire list of insns that constitute pop,
19681 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19682 is in the list, UPDATE is true iff the list contains explicit
19683 update of base register. */
19684 void
19685 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19686 bool update)
19687 {
19688 int i;
19689 char pattern[100];
19690 int offset;
19691 const char *conditional;
19692 int num_saves = XVECLEN (operands[0], 0);
19693 unsigned int regno;
19694 unsigned int regno_base = REGNO (operands[1]);
19695 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19696
19697 offset = 0;
19698 offset += update ? 1 : 0;
19699 offset += return_pc ? 1 : 0;
19700
19701 /* Is the base register in the list? */
19702 for (i = offset; i < num_saves; i++)
19703 {
19704 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19705 /* If SP is in the list, then the base register must be SP. */
19706 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19707 /* If base register is in the list, there must be no explicit update. */
19708 if (regno == regno_base)
19709 gcc_assert (!update);
19710 }
19711
19712 conditional = reverse ? "%?%D0" : "%?%d0";
19713 /* Can't use POP if returning from an interrupt. */
19714 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19715 sprintf (pattern, "pop%s\t{", conditional);
19716 else
19717 {
19718 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19719 It's just a convention, their semantics are identical. */
19720 if (regno_base == SP_REGNUM)
19721 sprintf (pattern, "ldmfd%s\t", conditional);
19722 else if (update)
19723 sprintf (pattern, "ldmia%s\t", conditional);
19724 else
19725 sprintf (pattern, "ldm%s\t", conditional);
19726
19727 strcat (pattern, reg_names[regno_base]);
19728 if (update)
19729 strcat (pattern, "!, {");
19730 else
19731 strcat (pattern, ", {");
19732 }
19733
19734 /* Output the first destination register. */
19735 strcat (pattern,
19736 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19737
19738 /* Output the rest of the destination registers. */
19739 for (i = offset + 1; i < num_saves; i++)
19740 {
19741 strcat (pattern, ", ");
19742 strcat (pattern,
19743 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19744 }
19745
19746 strcat (pattern, "}");
19747
19748 if (interrupt_p && return_pc)
19749 strcat (pattern, "^");
19750
19751 output_asm_insn (pattern, &cond);
19752 }
19753
19754
19755 /* Output the assembly for a store multiple. */
19756
19757 const char *
19758 vfp_output_vstmd (rtx * operands)
19759 {
19760 char pattern[100];
19761 int p;
19762 int base;
19763 int i;
19764 rtx addr_reg = REG_P (XEXP (operands[0], 0))
19765 ? XEXP (operands[0], 0)
19766 : XEXP (XEXP (operands[0], 0), 0);
19767 bool push_p = REGNO (addr_reg) == SP_REGNUM;
19768
19769 if (push_p)
19770 strcpy (pattern, "vpush%?.64\t{%P1");
19771 else
19772 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19773
19774 p = strlen (pattern);
19775
19776 gcc_assert (REG_P (operands[1]));
19777
19778 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19779 for (i = 1; i < XVECLEN (operands[2], 0); i++)
19780 {
19781 p += sprintf (&pattern[p], ", d%d", base + i);
19782 }
19783 strcpy (&pattern[p], "}");
19784
19785 output_asm_insn (pattern, operands);
19786 return "";
19787 }
19788
19789
19790 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19791 number of bytes pushed. */
19792
19793 static int
19794 vfp_emit_fstmd (int base_reg, int count)
19795 {
19796 rtx par;
19797 rtx dwarf;
19798 rtx tmp, reg;
19799 int i;
19800
19801 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19802 register pairs are stored by a store multiple insn. We avoid this
19803 by pushing an extra pair. */
19804 if (count == 2 && !arm_arch6)
19805 {
19806 if (base_reg == LAST_VFP_REGNUM - 3)
19807 base_reg -= 2;
19808 count++;
19809 }
19810
19811 /* FSTMD may not store more than 16 doubleword registers at once. Split
19812 larger stores into multiple parts (up to a maximum of two, in
19813 practice). */
19814 if (count > 16)
19815 {
19816 int saved;
19817 /* NOTE: base_reg is an internal register number, so each D register
19818 counts as 2. */
19819 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19820 saved += vfp_emit_fstmd (base_reg, 16);
19821 return saved;
19822 }
19823
19824 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19825 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19826
19827 reg = gen_rtx_REG (DFmode, base_reg);
19828 base_reg += 2;
19829
19830 XVECEXP (par, 0, 0)
19831 = gen_rtx_SET (gen_frame_mem
19832 (BLKmode,
19833 gen_rtx_PRE_MODIFY (Pmode,
19834 stack_pointer_rtx,
19835 plus_constant
19836 (Pmode, stack_pointer_rtx,
19837 - (count * 8)))
19838 ),
19839 gen_rtx_UNSPEC (BLKmode,
19840 gen_rtvec (1, reg),
19841 UNSPEC_PUSH_MULT));
19842
19843 tmp = gen_rtx_SET (stack_pointer_rtx,
19844 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19845 RTX_FRAME_RELATED_P (tmp) = 1;
19846 XVECEXP (dwarf, 0, 0) = tmp;
19847
19848 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19849 RTX_FRAME_RELATED_P (tmp) = 1;
19850 XVECEXP (dwarf, 0, 1) = tmp;
19851
19852 for (i = 1; i < count; i++)
19853 {
19854 reg = gen_rtx_REG (DFmode, base_reg);
19855 base_reg += 2;
19856 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19857
19858 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19859 plus_constant (Pmode,
19860 stack_pointer_rtx,
19861 i * 8)),
19862 reg);
19863 RTX_FRAME_RELATED_P (tmp) = 1;
19864 XVECEXP (dwarf, 0, i + 1) = tmp;
19865 }
19866
19867 par = emit_insn (par);
19868 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19869 RTX_FRAME_RELATED_P (par) = 1;
19870
19871 return count * 8;
19872 }
19873
19874 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19875 has the cmse_nonsecure_call attribute and returns false otherwise. */
19876
19877 bool
19878 detect_cmse_nonsecure_call (tree addr)
19879 {
19880 if (!addr)
19881 return FALSE;
19882
19883 tree fntype = TREE_TYPE (addr);
19884 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19885 TYPE_ATTRIBUTES (fntype)))
19886 return TRUE;
19887 return FALSE;
19888 }
19889
19890
19891 /* Emit a call instruction with pattern PAT. ADDR is the address of
19892 the call target. */
19893
19894 void
19895 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19896 {
19897 rtx insn;
19898
19899 insn = emit_call_insn (pat);
19900
19901 /* The PIC register is live on entry to VxWorks PIC PLT entries.
19902 If the call might use such an entry, add a use of the PIC register
19903 to the instruction's CALL_INSN_FUNCTION_USAGE. */
19904 if (TARGET_VXWORKS_RTP
19905 && flag_pic
19906 && !sibcall
19907 && SYMBOL_REF_P (addr)
19908 && (SYMBOL_REF_DECL (addr)
19909 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19910 : !SYMBOL_REF_LOCAL_P (addr)))
19911 {
19912 require_pic_register (NULL_RTX, false /*compute_now*/);
19913 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19914 }
19915
19916 if (TARGET_FDPIC)
19917 {
19918 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19919 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19920 }
19921
19922 if (TARGET_AAPCS_BASED)
19923 {
19924 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19925 linker. We need to add an IP clobber to allow setting
19926 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
19927 is not needed since it's a fixed register. */
19928 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19929 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19930 }
19931 }
19932
19933 /* Output a 'call' insn. */
19934 const char *
19935 output_call (rtx *operands)
19936 {
19937 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
19938
19939 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
19940 if (REGNO (operands[0]) == LR_REGNUM)
19941 {
19942 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19943 output_asm_insn ("mov%?\t%0, %|lr", operands);
19944 }
19945
19946 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19947
19948 if (TARGET_INTERWORK || arm_arch4t)
19949 output_asm_insn ("bx%?\t%0", operands);
19950 else
19951 output_asm_insn ("mov%?\t%|pc, %0", operands);
19952
19953 return "";
19954 }
19955
19956 /* Output a move from arm registers to arm registers of a long double
19957 OPERANDS[0] is the destination.
19958 OPERANDS[1] is the source. */
19959 const char *
19960 output_mov_long_double_arm_from_arm (rtx *operands)
19961 {
19962 /* We have to be careful here because the two might overlap. */
19963 int dest_start = REGNO (operands[0]);
19964 int src_start = REGNO (operands[1]);
19965 rtx ops[2];
19966 int i;
19967
19968 if (dest_start < src_start)
19969 {
19970 for (i = 0; i < 3; i++)
19971 {
19972 ops[0] = gen_rtx_REG (SImode, dest_start + i);
19973 ops[1] = gen_rtx_REG (SImode, src_start + i);
19974 output_asm_insn ("mov%?\t%0, %1", ops);
19975 }
19976 }
19977 else
19978 {
19979 for (i = 2; i >= 0; i--)
19980 {
19981 ops[0] = gen_rtx_REG (SImode, dest_start + i);
19982 ops[1] = gen_rtx_REG (SImode, src_start + i);
19983 output_asm_insn ("mov%?\t%0, %1", ops);
19984 }
19985 }
19986
19987 return "";
19988 }
19989
19990 void
19991 arm_emit_movpair (rtx dest, rtx src)
19992 {
19993 /* If the src is an immediate, simplify it. */
19994 if (CONST_INT_P (src))
19995 {
19996 HOST_WIDE_INT val = INTVAL (src);
19997 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
19998 if ((val >> 16) & 0x0000ffff)
19999 {
20000 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
20001 GEN_INT (16)),
20002 GEN_INT ((val >> 16) & 0x0000ffff));
20003 rtx_insn *insn = get_last_insn ();
20004 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20005 }
20006 return;
20007 }
20008 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
20009 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
20010 rtx_insn *insn = get_last_insn ();
20011 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20012 }
20013
20014 /* Output a move between double words. It must be REG<-MEM
20015 or MEM<-REG. */
20016 const char *
20017 output_move_double (rtx *operands, bool emit, int *count)
20018 {
20019 enum rtx_code code0 = GET_CODE (operands[0]);
20020 enum rtx_code code1 = GET_CODE (operands[1]);
20021 rtx otherops[3];
20022 if (count)
20023 *count = 1;
20024
20025 /* The only case when this might happen is when
20026 you are looking at the length of a DImode instruction
20027 that has an invalid constant in it. */
20028 if (code0 == REG && code1 != MEM)
20029 {
20030 gcc_assert (!emit);
20031 *count = 2;
20032 return "";
20033 }
20034
20035 if (code0 == REG)
20036 {
20037 unsigned int reg0 = REGNO (operands[0]);
20038 const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
20039
20040 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
20041
20042 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
20043
20044 switch (GET_CODE (XEXP (operands[1], 0)))
20045 {
20046 case REG:
20047
20048 if (emit)
20049 {
20050 if (can_ldrd
20051 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
20052 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
20053 else
20054 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20055 }
20056 break;
20057
20058 case PRE_INC:
20059 gcc_assert (can_ldrd);
20060 if (emit)
20061 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
20062 break;
20063
20064 case PRE_DEC:
20065 if (emit)
20066 {
20067 if (can_ldrd)
20068 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
20069 else
20070 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
20071 }
20072 break;
20073
20074 case POST_INC:
20075 if (emit)
20076 {
20077 if (can_ldrd)
20078 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
20079 else
20080 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
20081 }
20082 break;
20083
20084 case POST_DEC:
20085 gcc_assert (can_ldrd);
20086 if (emit)
20087 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
20088 break;
20089
20090 case PRE_MODIFY:
20091 case POST_MODIFY:
20092 /* Autoicrement addressing modes should never have overlapping
20093 base and destination registers, and overlapping index registers
20094 are already prohibited, so this doesn't need to worry about
20095 fix_cm3_ldrd. */
20096 otherops[0] = operands[0];
20097 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
20098 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
20099
20100 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
20101 {
20102 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
20103 {
20104 /* Registers overlap so split out the increment. */
20105 if (emit)
20106 {
20107 gcc_assert (can_ldrd);
20108 output_asm_insn ("add%?\t%1, %1, %2", otherops);
20109 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
20110 }
20111 if (count)
20112 *count = 2;
20113 }
20114 else
20115 {
20116 /* Use a single insn if we can.
20117 FIXME: IWMMXT allows offsets larger than ldrd can
20118 handle, fix these up with a pair of ldr. */
20119 if (can_ldrd
20120 && (TARGET_THUMB2
20121 || !CONST_INT_P (otherops[2])
20122 || (INTVAL (otherops[2]) > -256
20123 && INTVAL (otherops[2]) < 256)))
20124 {
20125 if (emit)
20126 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
20127 }
20128 else
20129 {
20130 if (emit)
20131 {
20132 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
20133 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20134 }
20135 if (count)
20136 *count = 2;
20137
20138 }
20139 }
20140 }
20141 else
20142 {
20143 /* Use a single insn if we can.
20144 FIXME: IWMMXT allows offsets larger than ldrd can handle,
20145 fix these up with a pair of ldr. */
20146 if (can_ldrd
20147 && (TARGET_THUMB2
20148 || !CONST_INT_P (otherops[2])
20149 || (INTVAL (otherops[2]) > -256
20150 && INTVAL (otherops[2]) < 256)))
20151 {
20152 if (emit)
20153 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
20154 }
20155 else
20156 {
20157 if (emit)
20158 {
20159 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20160 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
20161 }
20162 if (count)
20163 *count = 2;
20164 }
20165 }
20166 break;
20167
20168 case LABEL_REF:
20169 case CONST:
20170 /* We might be able to use ldrd %0, %1 here. However the range is
20171 different to ldr/adr, and it is broken on some ARMv7-M
20172 implementations. */
20173 /* Use the second register of the pair to avoid problematic
20174 overlap. */
20175 otherops[1] = operands[1];
20176 if (emit)
20177 output_asm_insn ("adr%?\t%0, %1", otherops);
20178 operands[1] = otherops[0];
20179 if (emit)
20180 {
20181 if (can_ldrd)
20182 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20183 else
20184 output_asm_insn ("ldmia%?\t%1, %M0", operands);
20185 }
20186
20187 if (count)
20188 *count = 2;
20189 break;
20190
20191 /* ??? This needs checking for thumb2. */
20192 default:
20193 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
20194 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
20195 {
20196 otherops[0] = operands[0];
20197 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
20198 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
20199
20200 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
20201 {
20202 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20203 {
20204 switch ((int) INTVAL (otherops[2]))
20205 {
20206 case -8:
20207 if (emit)
20208 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
20209 return "";
20210 case -4:
20211 if (TARGET_THUMB2)
20212 break;
20213 if (emit)
20214 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
20215 return "";
20216 case 4:
20217 if (TARGET_THUMB2)
20218 break;
20219 if (emit)
20220 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
20221 return "";
20222 }
20223 }
20224 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
20225 operands[1] = otherops[0];
20226 if (can_ldrd
20227 && (REG_P (otherops[2])
20228 || TARGET_THUMB2
20229 || (CONST_INT_P (otherops[2])
20230 && INTVAL (otherops[2]) > -256
20231 && INTVAL (otherops[2]) < 256)))
20232 {
20233 if (reg_overlap_mentioned_p (operands[0],
20234 otherops[2]))
20235 {
20236 /* Swap base and index registers over to
20237 avoid a conflict. */
20238 std::swap (otherops[1], otherops[2]);
20239 }
20240 /* If both registers conflict, it will usually
20241 have been fixed by a splitter. */
20242 if (reg_overlap_mentioned_p (operands[0], otherops[2])
20243 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
20244 {
20245 if (emit)
20246 {
20247 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20248 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20249 }
20250 if (count)
20251 *count = 2;
20252 }
20253 else
20254 {
20255 otherops[0] = operands[0];
20256 if (emit)
20257 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
20258 }
20259 return "";
20260 }
20261
20262 if (CONST_INT_P (otherops[2]))
20263 {
20264 if (emit)
20265 {
20266 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
20267 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
20268 else
20269 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20270 }
20271 }
20272 else
20273 {
20274 if (emit)
20275 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20276 }
20277 }
20278 else
20279 {
20280 if (emit)
20281 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
20282 }
20283
20284 if (count)
20285 *count = 2;
20286
20287 if (can_ldrd)
20288 return "ldrd%?\t%0, [%1]";
20289
20290 return "ldmia%?\t%1, %M0";
20291 }
20292 else
20293 {
20294 otherops[1] = adjust_address (operands[1], SImode, 4);
20295 /* Take care of overlapping base/data reg. */
20296 if (reg_mentioned_p (operands[0], operands[1]))
20297 {
20298 if (emit)
20299 {
20300 output_asm_insn ("ldr%?\t%0, %1", otherops);
20301 output_asm_insn ("ldr%?\t%0, %1", operands);
20302 }
20303 if (count)
20304 *count = 2;
20305
20306 }
20307 else
20308 {
20309 if (emit)
20310 {
20311 output_asm_insn ("ldr%?\t%0, %1", operands);
20312 output_asm_insn ("ldr%?\t%0, %1", otherops);
20313 }
20314 if (count)
20315 *count = 2;
20316 }
20317 }
20318 }
20319 }
20320 else
20321 {
20322 /* Constraints should ensure this. */
20323 gcc_assert (code0 == MEM && code1 == REG);
20324 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20325 || (TARGET_ARM && TARGET_LDRD));
20326
20327 /* For TARGET_ARM the first source register of an STRD
20328 must be even. This is usually the case for double-word
20329 values but user assembly constraints can force an odd
20330 starting register. */
20331 bool allow_strd = TARGET_LDRD
20332 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20333 switch (GET_CODE (XEXP (operands[0], 0)))
20334 {
20335 case REG:
20336 if (emit)
20337 {
20338 if (allow_strd)
20339 output_asm_insn ("strd%?\t%1, [%m0]", operands);
20340 else
20341 output_asm_insn ("stm%?\t%m0, %M1", operands);
20342 }
20343 break;
20344
20345 case PRE_INC:
20346 gcc_assert (allow_strd);
20347 if (emit)
20348 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20349 break;
20350
20351 case PRE_DEC:
20352 if (emit)
20353 {
20354 if (allow_strd)
20355 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20356 else
20357 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20358 }
20359 break;
20360
20361 case POST_INC:
20362 if (emit)
20363 {
20364 if (allow_strd)
20365 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20366 else
20367 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20368 }
20369 break;
20370
20371 case POST_DEC:
20372 gcc_assert (allow_strd);
20373 if (emit)
20374 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20375 break;
20376
20377 case PRE_MODIFY:
20378 case POST_MODIFY:
20379 otherops[0] = operands[1];
20380 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20381 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20382
20383 /* IWMMXT allows offsets larger than strd can handle,
20384 fix these up with a pair of str. */
20385 if (!TARGET_THUMB2
20386 && CONST_INT_P (otherops[2])
20387 && (INTVAL(otherops[2]) <= -256
20388 || INTVAL(otherops[2]) >= 256))
20389 {
20390 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20391 {
20392 if (emit)
20393 {
20394 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20395 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20396 }
20397 if (count)
20398 *count = 2;
20399 }
20400 else
20401 {
20402 if (emit)
20403 {
20404 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20405 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20406 }
20407 if (count)
20408 *count = 2;
20409 }
20410 }
20411 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20412 {
20413 if (emit)
20414 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20415 }
20416 else
20417 {
20418 if (emit)
20419 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20420 }
20421 break;
20422
20423 case PLUS:
20424 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20425 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20426 {
20427 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20428 {
20429 case -8:
20430 if (emit)
20431 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20432 return "";
20433
20434 case -4:
20435 if (TARGET_THUMB2)
20436 break;
20437 if (emit)
20438 output_asm_insn ("stmda%?\t%m0, %M1", operands);
20439 return "";
20440
20441 case 4:
20442 if (TARGET_THUMB2)
20443 break;
20444 if (emit)
20445 output_asm_insn ("stmib%?\t%m0, %M1", operands);
20446 return "";
20447 }
20448 }
20449 if (allow_strd
20450 && (REG_P (otherops[2])
20451 || TARGET_THUMB2
20452 || (CONST_INT_P (otherops[2])
20453 && INTVAL (otherops[2]) > -256
20454 && INTVAL (otherops[2]) < 256)))
20455 {
20456 otherops[0] = operands[1];
20457 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20458 if (emit)
20459 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20460 return "";
20461 }
20462 /* Fall through */
20463
20464 default:
20465 otherops[0] = adjust_address (operands[0], SImode, 4);
20466 otherops[1] = operands[1];
20467 if (emit)
20468 {
20469 output_asm_insn ("str%?\t%1, %0", operands);
20470 output_asm_insn ("str%?\t%H1, %0", otherops);
20471 }
20472 if (count)
20473 *count = 2;
20474 }
20475 }
20476
20477 return "";
20478 }
20479
20480 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20481 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20482
20483 const char *
20484 output_move_quad (rtx *operands)
20485 {
20486 if (REG_P (operands[0]))
20487 {
20488 /* Load, or reg->reg move. */
20489
20490 if (MEM_P (operands[1]))
20491 {
20492 switch (GET_CODE (XEXP (operands[1], 0)))
20493 {
20494 case REG:
20495 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20496 break;
20497
20498 case LABEL_REF:
20499 case CONST:
20500 output_asm_insn ("adr%?\t%0, %1", operands);
20501 output_asm_insn ("ldmia%?\t%0, %M0", operands);
20502 break;
20503
20504 default:
20505 gcc_unreachable ();
20506 }
20507 }
20508 else
20509 {
20510 rtx ops[2];
20511 int dest, src, i;
20512
20513 gcc_assert (REG_P (operands[1]));
20514
20515 dest = REGNO (operands[0]);
20516 src = REGNO (operands[1]);
20517
20518 /* This seems pretty dumb, but hopefully GCC won't try to do it
20519 very often. */
20520 if (dest < src)
20521 for (i = 0; i < 4; i++)
20522 {
20523 ops[0] = gen_rtx_REG (SImode, dest + i);
20524 ops[1] = gen_rtx_REG (SImode, src + i);
20525 output_asm_insn ("mov%?\t%0, %1", ops);
20526 }
20527 else
20528 for (i = 3; i >= 0; i--)
20529 {
20530 ops[0] = gen_rtx_REG (SImode, dest + i);
20531 ops[1] = gen_rtx_REG (SImode, src + i);
20532 output_asm_insn ("mov%?\t%0, %1", ops);
20533 }
20534 }
20535 }
20536 else
20537 {
20538 gcc_assert (MEM_P (operands[0]));
20539 gcc_assert (REG_P (operands[1]));
20540 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20541
20542 switch (GET_CODE (XEXP (operands[0], 0)))
20543 {
20544 case REG:
20545 output_asm_insn ("stm%?\t%m0, %M1", operands);
20546 break;
20547
20548 default:
20549 gcc_unreachable ();
20550 }
20551 }
20552
20553 return "";
20554 }
20555
20556 /* Output a VFP load or store instruction. */
20557
20558 const char *
20559 output_move_vfp (rtx *operands)
20560 {
20561 rtx reg, mem, addr, ops[2];
20562 int load = REG_P (operands[0]);
20563 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20564 int sp = (!TARGET_VFP_FP16INST
20565 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20566 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20567 const char *templ;
20568 char buff[50];
20569 machine_mode mode;
20570
20571 reg = operands[!load];
20572 mem = operands[load];
20573
20574 mode = GET_MODE (reg);
20575
20576 gcc_assert (REG_P (reg));
20577 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20578 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20579 || mode == SFmode
20580 || mode == DFmode
20581 || mode == HImode
20582 || mode == SImode
20583 || mode == DImode
20584 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20585 gcc_assert (MEM_P (mem));
20586
20587 addr = XEXP (mem, 0);
20588
20589 switch (GET_CODE (addr))
20590 {
20591 case PRE_DEC:
20592 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20593 ops[0] = XEXP (addr, 0);
20594 ops[1] = reg;
20595 break;
20596
20597 case POST_INC:
20598 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20599 ops[0] = XEXP (addr, 0);
20600 ops[1] = reg;
20601 break;
20602
20603 default:
20604 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20605 ops[0] = reg;
20606 ops[1] = mem;
20607 break;
20608 }
20609
20610 sprintf (buff, templ,
20611 load ? "ld" : "st",
20612 dp ? "64" : sp ? "32" : "16",
20613 dp ? "P" : "",
20614 integer_p ? "\t%@ int" : "");
20615 output_asm_insn (buff, ops);
20616
20617 return "";
20618 }
20619
20620 /* Output a Neon double-word or quad-word load or store, or a load
20621 or store for larger structure modes.
20622
20623 WARNING: The ordering of elements is weird in big-endian mode,
20624 because the EABI requires that vectors stored in memory appear
20625 as though they were stored by a VSTM, as required by the EABI.
20626 GCC RTL defines element ordering based on in-memory order.
20627 This can be different from the architectural ordering of elements
20628 within a NEON register. The intrinsics defined in arm_neon.h use the
20629 NEON register element ordering, not the GCC RTL element ordering.
20630
20631 For example, the in-memory ordering of a big-endian a quadword
20632 vector with 16-bit elements when stored from register pair {d0,d1}
20633 will be (lowest address first, d0[N] is NEON register element N):
20634
20635 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20636
20637 When necessary, quadword registers (dN, dN+1) are moved to ARM
20638 registers from rN in the order:
20639
20640 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20641
20642 So that STM/LDM can be used on vectors in ARM registers, and the
20643 same memory layout will result as if VSTM/VLDM were used.
20644
20645 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20646 possible, which allows use of appropriate alignment tags.
20647 Note that the choice of "64" is independent of the actual vector
20648 element size; this size simply ensures that the behavior is
20649 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20650
20651 Due to limitations of those instructions, use of VST1.64/VLD1.64
20652 is not possible if:
20653 - the address contains PRE_DEC, or
20654 - the mode refers to more than 4 double-word registers
20655
20656 In those cases, it would be possible to replace VSTM/VLDM by a
20657 sequence of instructions; this is not currently implemented since
20658 this is not certain to actually improve performance. */
20659
20660 const char *
20661 output_move_neon (rtx *operands)
20662 {
20663 rtx reg, mem, addr, ops[2];
20664 int regno, nregs, load = REG_P (operands[0]);
20665 const char *templ;
20666 char buff[50];
20667 machine_mode mode;
20668
20669 reg = operands[!load];
20670 mem = operands[load];
20671
20672 mode = GET_MODE (reg);
20673
20674 gcc_assert (REG_P (reg));
20675 regno = REGNO (reg);
20676 nregs = REG_NREGS (reg) / 2;
20677 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20678 || NEON_REGNO_OK_FOR_QUAD (regno));
20679 gcc_assert (VALID_NEON_DREG_MODE (mode)
20680 || VALID_NEON_QREG_MODE (mode)
20681 || VALID_NEON_STRUCT_MODE (mode));
20682 gcc_assert (MEM_P (mem));
20683
20684 addr = XEXP (mem, 0);
20685
20686 /* Strip off const from addresses like (const (plus (...))). */
20687 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20688 addr = XEXP (addr, 0);
20689
20690 switch (GET_CODE (addr))
20691 {
20692 case POST_INC:
20693 /* We have to use vldm / vstm for too-large modes. */
20694 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20695 {
20696 templ = "v%smia%%?\t%%0!, %%h1";
20697 ops[0] = XEXP (addr, 0);
20698 }
20699 else
20700 {
20701 templ = "v%s1.64\t%%h1, %%A0";
20702 ops[0] = mem;
20703 }
20704 ops[1] = reg;
20705 break;
20706
20707 case PRE_DEC:
20708 /* We have to use vldm / vstm in this case, since there is no
20709 pre-decrement form of the vld1 / vst1 instructions. */
20710 templ = "v%smdb%%?\t%%0!, %%h1";
20711 ops[0] = XEXP (addr, 0);
20712 ops[1] = reg;
20713 break;
20714
20715 case POST_MODIFY:
20716 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20717 gcc_unreachable ();
20718
20719 case REG:
20720 /* We have to use vldm / vstm for too-large modes. */
20721 if (nregs > 1)
20722 {
20723 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20724 templ = "v%smia%%?\t%%m0, %%h1";
20725 else
20726 templ = "v%s1.64\t%%h1, %%A0";
20727
20728 ops[0] = mem;
20729 ops[1] = reg;
20730 break;
20731 }
20732 /* Fall through. */
20733 case PLUS:
20734 if (GET_CODE (addr) == PLUS)
20735 addr = XEXP (addr, 0);
20736 /* Fall through. */
20737 case LABEL_REF:
20738 {
20739 int i;
20740 int overlap = -1;
20741 for (i = 0; i < nregs; i++)
20742 {
20743 /* We're only using DImode here because it's a convenient
20744 size. */
20745 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20746 ops[1] = adjust_address (mem, DImode, 8 * i);
20747 if (reg_overlap_mentioned_p (ops[0], mem))
20748 {
20749 gcc_assert (overlap == -1);
20750 overlap = i;
20751 }
20752 else
20753 {
20754 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20755 sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20756 else
20757 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20758 output_asm_insn (buff, ops);
20759 }
20760 }
20761 if (overlap != -1)
20762 {
20763 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20764 ops[1] = adjust_address (mem, SImode, 8 * overlap);
20765 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20766 sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20767 else
20768 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20769 output_asm_insn (buff, ops);
20770 }
20771
20772 return "";
20773 }
20774
20775 default:
20776 gcc_unreachable ();
20777 }
20778
20779 sprintf (buff, templ, load ? "ld" : "st");
20780 output_asm_insn (buff, ops);
20781
20782 return "";
20783 }
20784
20785 /* Compute and return the length of neon_mov<mode>, where <mode> is
20786 one of VSTRUCT modes: EI, OI, CI or XI. */
20787 int
20788 arm_attr_length_move_neon (rtx_insn *insn)
20789 {
20790 rtx reg, mem, addr;
20791 int load;
20792 machine_mode mode;
20793
20794 extract_insn_cached (insn);
20795
20796 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20797 {
20798 mode = GET_MODE (recog_data.operand[0]);
20799 switch (mode)
20800 {
20801 case E_EImode:
20802 case E_OImode:
20803 return 8;
20804 case E_CImode:
20805 return 12;
20806 case E_XImode:
20807 return 16;
20808 default:
20809 gcc_unreachable ();
20810 }
20811 }
20812
20813 load = REG_P (recog_data.operand[0]);
20814 reg = recog_data.operand[!load];
20815 mem = recog_data.operand[load];
20816
20817 gcc_assert (MEM_P (mem));
20818
20819 addr = XEXP (mem, 0);
20820
20821 /* Strip off const from addresses like (const (plus (...))). */
20822 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20823 addr = XEXP (addr, 0);
20824
20825 if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20826 {
20827 int insns = REG_NREGS (reg) / 2;
20828 return insns * 4;
20829 }
20830 else
20831 return 4;
20832 }
20833
20834 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20835 return zero. */
20836
20837 int
20838 arm_address_offset_is_imm (rtx_insn *insn)
20839 {
20840 rtx mem, addr;
20841
20842 extract_insn_cached (insn);
20843
20844 if (REG_P (recog_data.operand[0]))
20845 return 0;
20846
20847 mem = recog_data.operand[0];
20848
20849 gcc_assert (MEM_P (mem));
20850
20851 addr = XEXP (mem, 0);
20852
20853 if (REG_P (addr)
20854 || (GET_CODE (addr) == PLUS
20855 && REG_P (XEXP (addr, 0))
20856 && CONST_INT_P (XEXP (addr, 1))))
20857 return 1;
20858 else
20859 return 0;
20860 }
20861
20862 /* Output an ADD r, s, #n where n may be too big for one instruction.
20863 If adding zero to one register, output nothing. */
20864 const char *
20865 output_add_immediate (rtx *operands)
20866 {
20867 HOST_WIDE_INT n = INTVAL (operands[2]);
20868
20869 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20870 {
20871 if (n < 0)
20872 output_multi_immediate (operands,
20873 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20874 -n);
20875 else
20876 output_multi_immediate (operands,
20877 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20878 n);
20879 }
20880
20881 return "";
20882 }
20883
20884 /* Output a multiple immediate operation.
20885 OPERANDS is the vector of operands referred to in the output patterns.
20886 INSTR1 is the output pattern to use for the first constant.
20887 INSTR2 is the output pattern to use for subsequent constants.
20888 IMMED_OP is the index of the constant slot in OPERANDS.
20889 N is the constant value. */
20890 static const char *
20891 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20892 int immed_op, HOST_WIDE_INT n)
20893 {
20894 #if HOST_BITS_PER_WIDE_INT > 32
20895 n &= 0xffffffff;
20896 #endif
20897
20898 if (n == 0)
20899 {
20900 /* Quick and easy output. */
20901 operands[immed_op] = const0_rtx;
20902 output_asm_insn (instr1, operands);
20903 }
20904 else
20905 {
20906 int i;
20907 const char * instr = instr1;
20908
20909 /* Note that n is never zero here (which would give no output). */
20910 for (i = 0; i < 32; i += 2)
20911 {
20912 if (n & (3 << i))
20913 {
20914 operands[immed_op] = GEN_INT (n & (255 << i));
20915 output_asm_insn (instr, operands);
20916 instr = instr2;
20917 i += 6;
20918 }
20919 }
20920 }
20921
20922 return "";
20923 }
20924
20925 /* Return the name of a shifter operation. */
20926 static const char *
20927 arm_shift_nmem(enum rtx_code code)
20928 {
20929 switch (code)
20930 {
20931 case ASHIFT:
20932 return ARM_LSL_NAME;
20933
20934 case ASHIFTRT:
20935 return "asr";
20936
20937 case LSHIFTRT:
20938 return "lsr";
20939
20940 case ROTATERT:
20941 return "ror";
20942
20943 default:
20944 abort();
20945 }
20946 }
20947
20948 /* Return the appropriate ARM instruction for the operation code.
20949 The returned result should not be overwritten. OP is the rtx of the
20950 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20951 was shifted. */
20952 const char *
20953 arithmetic_instr (rtx op, int shift_first_arg)
20954 {
20955 switch (GET_CODE (op))
20956 {
20957 case PLUS:
20958 return "add";
20959
20960 case MINUS:
20961 return shift_first_arg ? "rsb" : "sub";
20962
20963 case IOR:
20964 return "orr";
20965
20966 case XOR:
20967 return "eor";
20968
20969 case AND:
20970 return "and";
20971
20972 case ASHIFT:
20973 case ASHIFTRT:
20974 case LSHIFTRT:
20975 case ROTATERT:
20976 return arm_shift_nmem(GET_CODE(op));
20977
20978 default:
20979 gcc_unreachable ();
20980 }
20981 }
20982
20983 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20984 for the operation code. The returned result should not be overwritten.
20985 OP is the rtx code of the shift.
20986 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20987 shift. */
20988 static const char *
20989 shift_op (rtx op, HOST_WIDE_INT *amountp)
20990 {
20991 const char * mnem;
20992 enum rtx_code code = GET_CODE (op);
20993
20994 switch (code)
20995 {
20996 case ROTATE:
20997 if (!CONST_INT_P (XEXP (op, 1)))
20998 {
20999 output_operand_lossage ("invalid shift operand");
21000 return NULL;
21001 }
21002
21003 code = ROTATERT;
21004 *amountp = 32 - INTVAL (XEXP (op, 1));
21005 mnem = "ror";
21006 break;
21007
21008 case ASHIFT:
21009 case ASHIFTRT:
21010 case LSHIFTRT:
21011 case ROTATERT:
21012 mnem = arm_shift_nmem(code);
21013 if (CONST_INT_P (XEXP (op, 1)))
21014 {
21015 *amountp = INTVAL (XEXP (op, 1));
21016 }
21017 else if (REG_P (XEXP (op, 1)))
21018 {
21019 *amountp = -1;
21020 return mnem;
21021 }
21022 else
21023 {
21024 output_operand_lossage ("invalid shift operand");
21025 return NULL;
21026 }
21027 break;
21028
21029 case MULT:
21030 /* We never have to worry about the amount being other than a
21031 power of 2, since this case can never be reloaded from a reg. */
21032 if (!CONST_INT_P (XEXP (op, 1)))
21033 {
21034 output_operand_lossage ("invalid shift operand");
21035 return NULL;
21036 }
21037
21038 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
21039
21040 /* Amount must be a power of two. */
21041 if (*amountp & (*amountp - 1))
21042 {
21043 output_operand_lossage ("invalid shift operand");
21044 return NULL;
21045 }
21046
21047 *amountp = exact_log2 (*amountp);
21048 gcc_assert (IN_RANGE (*amountp, 0, 31));
21049 return ARM_LSL_NAME;
21050
21051 default:
21052 output_operand_lossage ("invalid shift operand");
21053 return NULL;
21054 }
21055
21056 /* This is not 100% correct, but follows from the desire to merge
21057 multiplication by a power of 2 with the recognizer for a
21058 shift. >=32 is not a valid shift for "lsl", so we must try and
21059 output a shift that produces the correct arithmetical result.
21060 Using lsr #32 is identical except for the fact that the carry bit
21061 is not set correctly if we set the flags; but we never use the
21062 carry bit from such an operation, so we can ignore that. */
21063 if (code == ROTATERT)
21064 /* Rotate is just modulo 32. */
21065 *amountp &= 31;
21066 else if (*amountp != (*amountp & 31))
21067 {
21068 if (code == ASHIFT)
21069 mnem = "lsr";
21070 *amountp = 32;
21071 }
21072
21073 /* Shifts of 0 are no-ops. */
21074 if (*amountp == 0)
21075 return NULL;
21076
21077 return mnem;
21078 }
21079
21080 /* Output a .ascii pseudo-op, keeping track of lengths. This is
21081 because /bin/as is horribly restrictive. The judgement about
21082 whether or not each character is 'printable' (and can be output as
21083 is) or not (and must be printed with an octal escape) must be made
21084 with reference to the *host* character set -- the situation is
21085 similar to that discussed in the comments above pp_c_char in
21086 c-pretty-print.cc. */
21087
21088 #define MAX_ASCII_LEN 51
21089
21090 void
21091 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
21092 {
21093 int i;
21094 int len_so_far = 0;
21095
21096 fputs ("\t.ascii\t\"", stream);
21097
21098 for (i = 0; i < len; i++)
21099 {
21100 int c = p[i];
21101
21102 if (len_so_far >= MAX_ASCII_LEN)
21103 {
21104 fputs ("\"\n\t.ascii\t\"", stream);
21105 len_so_far = 0;
21106 }
21107
21108 if (ISPRINT (c))
21109 {
21110 if (c == '\\' || c == '\"')
21111 {
21112 putc ('\\', stream);
21113 len_so_far++;
21114 }
21115 putc (c, stream);
21116 len_so_far++;
21117 }
21118 else
21119 {
21120 fprintf (stream, "\\%03o", c);
21121 len_so_far += 4;
21122 }
21123 }
21124
21125 fputs ("\"\n", stream);
21126 }
21127 \f
21128
21129 /* Compute the register save mask for registers 0 through 12
21130 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
21131
21132 static unsigned long
21133 arm_compute_save_reg0_reg12_mask (void)
21134 {
21135 unsigned long func_type = arm_current_func_type ();
21136 unsigned long save_reg_mask = 0;
21137 unsigned int reg;
21138
21139 if (IS_INTERRUPT (func_type))
21140 {
21141 unsigned int max_reg;
21142 /* Interrupt functions must not corrupt any registers,
21143 even call clobbered ones. If this is a leaf function
21144 we can just examine the registers used by the RTL, but
21145 otherwise we have to assume that whatever function is
21146 called might clobber anything, and so we have to save
21147 all the call-clobbered registers as well. */
21148 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
21149 /* FIQ handlers have registers r8 - r12 banked, so
21150 we only need to check r0 - r7, Normal ISRs only
21151 bank r14 and r15, so we must check up to r12.
21152 r13 is the stack pointer which is always preserved,
21153 so we do not need to consider it here. */
21154 max_reg = 7;
21155 else
21156 max_reg = 12;
21157
21158 for (reg = 0; reg <= max_reg; reg++)
21159 if (reg_needs_saving_p (reg))
21160 save_reg_mask |= (1 << reg);
21161
21162 /* Also save the pic base register if necessary. */
21163 if (PIC_REGISTER_MAY_NEED_SAVING
21164 && crtl->uses_pic_offset_table)
21165 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21166 }
21167 else if (IS_VOLATILE(func_type))
21168 {
21169 /* For noreturn functions we historically omitted register saves
21170 altogether. However this really messes up debugging. As a
21171 compromise save just the frame pointers. Combined with the link
21172 register saved elsewhere this should be sufficient to get
21173 a backtrace. */
21174 if (frame_pointer_needed)
21175 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21176 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
21177 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21178 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
21179 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
21180 }
21181 else
21182 {
21183 /* In the normal case we only need to save those registers
21184 which are call saved and which are used by this function. */
21185 for (reg = 0; reg <= 11; reg++)
21186 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21187 save_reg_mask |= (1 << reg);
21188
21189 /* Handle the frame pointer as a special case. */
21190 if (frame_pointer_needed)
21191 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21192
21193 /* If we aren't loading the PIC register,
21194 don't stack it even though it may be live. */
21195 if (PIC_REGISTER_MAY_NEED_SAVING
21196 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
21197 || crtl->uses_pic_offset_table))
21198 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21199
21200 /* The prologue will copy SP into R0, so save it. */
21201 if (IS_STACKALIGN (func_type))
21202 save_reg_mask |= 1;
21203 }
21204
21205 /* Save registers so the exception handler can modify them. */
21206 if (crtl->calls_eh_return)
21207 {
21208 unsigned int i;
21209
21210 for (i = 0; ; i++)
21211 {
21212 reg = EH_RETURN_DATA_REGNO (i);
21213 if (reg == INVALID_REGNUM)
21214 break;
21215 save_reg_mask |= 1 << reg;
21216 }
21217 }
21218
21219 return save_reg_mask;
21220 }
21221
21222 /* Return true if r3 is live at the start of the function. */
21223
21224 static bool
21225 arm_r3_live_at_start_p (void)
21226 {
21227 /* Just look at cfg info, which is still close enough to correct at this
21228 point. This gives false positives for broken functions that might use
21229 uninitialized data that happens to be allocated in r3, but who cares? */
21230 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
21231 }
21232
21233 /* Compute the number of bytes used to store the static chain register on the
21234 stack, above the stack frame. We need to know this accurately to get the
21235 alignment of the rest of the stack frame correct. */
21236
21237 static int
21238 arm_compute_static_chain_stack_bytes (void)
21239 {
21240 /* Once the value is updated from the init value of -1, do not
21241 re-compute. */
21242 if (cfun->machine->static_chain_stack_bytes != -1)
21243 return cfun->machine->static_chain_stack_bytes;
21244
21245 /* See the defining assertion in arm_expand_prologue. */
21246 if (IS_NESTED (arm_current_func_type ())
21247 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21248 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21249 || flag_stack_clash_protection)
21250 && !df_regs_ever_live_p (LR_REGNUM)))
21251 && arm_r3_live_at_start_p ()
21252 && crtl->args.pretend_args_size == 0)
21253 return 4;
21254
21255 return 0;
21256 }
21257
21258 /* Compute a bit mask of which core registers need to be
21259 saved on the stack for the current function.
21260 This is used by arm_compute_frame_layout, which may add extra registers. */
21261
21262 static unsigned long
21263 arm_compute_save_core_reg_mask (void)
21264 {
21265 unsigned int save_reg_mask = 0;
21266 unsigned long func_type = arm_current_func_type ();
21267 unsigned int reg;
21268
21269 if (IS_NAKED (func_type))
21270 /* This should never really happen. */
21271 return 0;
21272
21273 /* If we are creating a stack frame, then we must save the frame pointer,
21274 IP (which will hold the old stack pointer), LR and the PC. */
21275 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21276 save_reg_mask |=
21277 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
21278 | (1 << IP_REGNUM)
21279 | (1 << LR_REGNUM)
21280 | (1 << PC_REGNUM);
21281
21282 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
21283
21284 if (arm_current_function_pac_enabled_p ())
21285 save_reg_mask |= 1 << IP_REGNUM;
21286
21287 /* Decide if we need to save the link register.
21288 Interrupt routines have their own banked link register,
21289 so they never need to save it.
21290 Otherwise if we do not use the link register we do not need to save
21291 it. If we are pushing other registers onto the stack however, we
21292 can save an instruction in the epilogue by pushing the link register
21293 now and then popping it back into the PC. This incurs extra memory
21294 accesses though, so we only do it when optimizing for size, and only
21295 if we know that we will not need a fancy return sequence. */
21296 if (df_regs_ever_live_p (LR_REGNUM)
21297 || (save_reg_mask
21298 && optimize_size
21299 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
21300 && !crtl->tail_call_emit
21301 && !crtl->calls_eh_return))
21302 save_reg_mask |= 1 << LR_REGNUM;
21303
21304 if (cfun->machine->lr_save_eliminated)
21305 save_reg_mask &= ~ (1 << LR_REGNUM);
21306
21307 if (TARGET_REALLY_IWMMXT
21308 && ((bit_count (save_reg_mask)
21309 + ARM_NUM_INTS (crtl->args.pretend_args_size +
21310 arm_compute_static_chain_stack_bytes())
21311 ) % 2) != 0)
21312 {
21313 /* The total number of registers that are going to be pushed
21314 onto the stack is odd. We need to ensure that the stack
21315 is 64-bit aligned before we start to save iWMMXt registers,
21316 and also before we start to create locals. (A local variable
21317 might be a double or long long which we will load/store using
21318 an iWMMXt instruction). Therefore we need to push another
21319 ARM register, so that the stack will be 64-bit aligned. We
21320 try to avoid using the arg registers (r0 -r3) as they might be
21321 used to pass values in a tail call. */
21322 for (reg = 4; reg <= 12; reg++)
21323 if ((save_reg_mask & (1 << reg)) == 0)
21324 break;
21325
21326 if (reg <= 12)
21327 save_reg_mask |= (1 << reg);
21328 else
21329 {
21330 cfun->machine->sibcall_blocked = 1;
21331 save_reg_mask |= (1 << 3);
21332 }
21333 }
21334
21335 /* We may need to push an additional register for use initializing the
21336 PIC base register. */
21337 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21338 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21339 {
21340 reg = thumb_find_work_register (1 << 4);
21341 if (!call_used_or_fixed_reg_p (reg))
21342 save_reg_mask |= (1 << reg);
21343 }
21344
21345 return save_reg_mask;
21346 }
21347
21348 /* Compute a bit mask of which core registers need to be
21349 saved on the stack for the current function. */
21350 static unsigned long
21351 thumb1_compute_save_core_reg_mask (void)
21352 {
21353 unsigned long mask;
21354 unsigned reg;
21355
21356 mask = 0;
21357 for (reg = 0; reg < 12; reg ++)
21358 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21359 mask |= 1 << reg;
21360
21361 /* Handle the frame pointer as a special case. */
21362 if (frame_pointer_needed)
21363 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21364
21365 if (flag_pic
21366 && !TARGET_SINGLE_PIC_BASE
21367 && arm_pic_register != INVALID_REGNUM
21368 && crtl->uses_pic_offset_table)
21369 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21370
21371 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21372 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21373 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21374
21375 /* LR will also be pushed if any lo regs are pushed. */
21376 if (mask & 0xff || thumb_force_lr_save ())
21377 mask |= (1 << LR_REGNUM);
21378
21379 bool call_clobbered_scratch
21380 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21381 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21382
21383 /* Make sure we have a low work register if we need one. We will
21384 need one if we are going to push a high register, but we are not
21385 currently intending to push a low register. However if both the
21386 prologue and epilogue have a spare call-clobbered low register,
21387 then we won't need to find an additional work register. It does
21388 not need to be the same register in the prologue and
21389 epilogue. */
21390 if ((mask & 0xff) == 0
21391 && !call_clobbered_scratch
21392 && ((mask & 0x0f00) || TARGET_BACKTRACE))
21393 {
21394 /* Use thumb_find_work_register to choose which register
21395 we will use. If the register is live then we will
21396 have to push it. Use LAST_LO_REGNUM as our fallback
21397 choice for the register to select. */
21398 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21399 /* Make sure the register returned by thumb_find_work_register is
21400 not part of the return value. */
21401 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21402 reg = LAST_LO_REGNUM;
21403
21404 if (callee_saved_reg_p (reg))
21405 mask |= 1 << reg;
21406 }
21407
21408 /* The 504 below is 8 bytes less than 512 because there are two possible
21409 alignment words. We can't tell here if they will be present or not so we
21410 have to play it safe and assume that they are. */
21411 if ((CALLER_INTERWORKING_SLOT_SIZE +
21412 ROUND_UP_WORD (get_frame_size ()) +
21413 crtl->outgoing_args_size) >= 504)
21414 {
21415 /* This is the same as the code in thumb1_expand_prologue() which
21416 determines which register to use for stack decrement. */
21417 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21418 if (mask & (1 << reg))
21419 break;
21420
21421 if (reg > LAST_LO_REGNUM)
21422 {
21423 /* Make sure we have a register available for stack decrement. */
21424 mask |= 1 << LAST_LO_REGNUM;
21425 }
21426 }
21427
21428 return mask;
21429 }
21430
21431 /* Return the number of bytes required to save VFP registers. */
21432 static int
21433 arm_get_vfp_saved_size (void)
21434 {
21435 unsigned int regno;
21436 int count;
21437 int saved;
21438
21439 saved = 0;
21440 /* Space for saved VFP registers. */
21441 if (TARGET_VFP_BASE)
21442 {
21443 count = 0;
21444 for (regno = FIRST_VFP_REGNUM;
21445 regno < LAST_VFP_REGNUM;
21446 regno += 2)
21447 {
21448 if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21449 {
21450 if (count > 0)
21451 {
21452 /* Workaround ARM10 VFPr1 bug. */
21453 if (count == 2 && !arm_arch6)
21454 count++;
21455 saved += count * 8;
21456 }
21457 count = 0;
21458 }
21459 else
21460 count++;
21461 }
21462 if (count > 0)
21463 {
21464 if (count == 2 && !arm_arch6)
21465 count++;
21466 saved += count * 8;
21467 }
21468 }
21469 return saved;
21470 }
21471
21472
21473 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21474 everything bar the final return instruction. If simple_return is true,
21475 then do not output epilogue, because it has already been emitted in RTL.
21476
21477 Note: do not forget to update length attribute of corresponding insn pattern
21478 when changing assembly output (eg. length attribute of
21479 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21480 register clearing sequences). */
21481 const char *
21482 output_return_instruction (rtx operand, bool really_return, bool reverse,
21483 bool simple_return)
21484 {
21485 char conditional[10];
21486 char instr[100];
21487 unsigned reg;
21488 unsigned long live_regs_mask;
21489 unsigned long func_type;
21490 arm_stack_offsets *offsets;
21491
21492 func_type = arm_current_func_type ();
21493
21494 if (IS_NAKED (func_type))
21495 return "";
21496
21497 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21498 {
21499 /* If this function was declared non-returning, and we have
21500 found a tail call, then we have to trust that the called
21501 function won't return. */
21502 if (really_return)
21503 {
21504 rtx ops[2];
21505
21506 /* Otherwise, trap an attempted return by aborting. */
21507 ops[0] = operand;
21508 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21509 : "abort");
21510 assemble_external_libcall (ops[1]);
21511 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21512 }
21513
21514 return "";
21515 }
21516
21517 gcc_assert (!cfun->calls_alloca || really_return);
21518
21519 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21520
21521 cfun->machine->return_used_this_function = 1;
21522
21523 offsets = arm_get_frame_offsets ();
21524 live_regs_mask = offsets->saved_regs_mask;
21525
21526 if (!simple_return && live_regs_mask)
21527 {
21528 const char * return_reg;
21529
21530 /* If we do not have any special requirements for function exit
21531 (e.g. interworking) then we can load the return address
21532 directly into the PC. Otherwise we must load it into LR. */
21533 if (really_return
21534 && !IS_CMSE_ENTRY (func_type)
21535 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21536 return_reg = reg_names[PC_REGNUM];
21537 else
21538 return_reg = reg_names[LR_REGNUM];
21539
21540 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21541 {
21542 /* There are three possible reasons for the IP register
21543 being saved. 1) a stack frame was created, in which case
21544 IP contains the old stack pointer, or 2) an ISR routine
21545 corrupted it, or 3) it was saved to align the stack on
21546 iWMMXt. In case 1, restore IP into SP, otherwise just
21547 restore IP. */
21548 if (frame_pointer_needed)
21549 {
21550 live_regs_mask &= ~ (1 << IP_REGNUM);
21551 live_regs_mask |= (1 << SP_REGNUM);
21552 }
21553 else
21554 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21555 }
21556
21557 /* On some ARM architectures it is faster to use LDR rather than
21558 LDM to load a single register. On other architectures, the
21559 cost is the same. In 26 bit mode, or for exception handlers,
21560 we have to use LDM to load the PC so that the CPSR is also
21561 restored. */
21562 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21563 if (live_regs_mask == (1U << reg))
21564 break;
21565
21566 if (reg <= LAST_ARM_REGNUM
21567 && (reg != LR_REGNUM
21568 || ! really_return
21569 || ! IS_INTERRUPT (func_type)))
21570 {
21571 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21572 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21573 }
21574 else
21575 {
21576 char *p;
21577 int first = 1;
21578
21579 /* Generate the load multiple instruction to restore the
21580 registers. Note we can get here, even if
21581 frame_pointer_needed is true, but only if sp already
21582 points to the base of the saved core registers. */
21583 if (live_regs_mask & (1 << SP_REGNUM))
21584 {
21585 unsigned HOST_WIDE_INT stack_adjust;
21586
21587 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21588 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21589
21590 if (stack_adjust && arm_arch5t && TARGET_ARM)
21591 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21592 else
21593 {
21594 /* If we can't use ldmib (SA110 bug),
21595 then try to pop r3 instead. */
21596 if (stack_adjust)
21597 live_regs_mask |= 1 << 3;
21598
21599 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21600 }
21601 }
21602 /* For interrupt returns we have to use an LDM rather than
21603 a POP so that we can use the exception return variant. */
21604 else if (IS_INTERRUPT (func_type))
21605 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21606 else
21607 sprintf (instr, "pop%s\t{", conditional);
21608
21609 p = instr + strlen (instr);
21610
21611 for (reg = 0; reg <= SP_REGNUM; reg++)
21612 if (live_regs_mask & (1 << reg))
21613 {
21614 int l = strlen (reg_names[reg]);
21615
21616 if (first)
21617 first = 0;
21618 else
21619 {
21620 memcpy (p, ", ", 2);
21621 p += 2;
21622 }
21623
21624 memcpy (p, "%|", 2);
21625 memcpy (p + 2, reg_names[reg], l);
21626 p += l + 2;
21627 }
21628
21629 if (live_regs_mask & (1 << LR_REGNUM))
21630 {
21631 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21632 /* If returning from an interrupt, restore the CPSR. */
21633 if (IS_INTERRUPT (func_type))
21634 strcat (p, "^");
21635 }
21636 else
21637 strcpy (p, "}");
21638 }
21639
21640 output_asm_insn (instr, & operand);
21641
21642 /* See if we need to generate an extra instruction to
21643 perform the actual function return. */
21644 if (really_return
21645 && func_type != ARM_FT_INTERWORKED
21646 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21647 {
21648 /* The return has already been handled
21649 by loading the LR into the PC. */
21650 return "";
21651 }
21652 }
21653
21654 if (really_return)
21655 {
21656 switch ((int) ARM_FUNC_TYPE (func_type))
21657 {
21658 case ARM_FT_ISR:
21659 case ARM_FT_FIQ:
21660 /* ??? This is wrong for unified assembly syntax. */
21661 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21662 break;
21663
21664 case ARM_FT_INTERWORKED:
21665 gcc_assert (arm_arch5t || arm_arch4t);
21666 sprintf (instr, "bx%s\t%%|lr", conditional);
21667 break;
21668
21669 case ARM_FT_EXCEPTION:
21670 /* ??? This is wrong for unified assembly syntax. */
21671 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21672 break;
21673
21674 default:
21675 if (IS_CMSE_ENTRY (func_type))
21676 {
21677 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21678 emitted by cmse_nonsecure_entry_clear_before_return () and the
21679 VSTR/VLDR instructions in the prologue and epilogue. */
21680 if (!TARGET_HAVE_FPCXT_CMSE)
21681 {
21682 /* Check if we have to clear the 'GE bits' which is only used if
21683 parallel add and subtraction instructions are available. */
21684 if (TARGET_INT_SIMD)
21685 snprintf (instr, sizeof (instr),
21686 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21687 else
21688 snprintf (instr, sizeof (instr),
21689 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21690
21691 output_asm_insn (instr, & operand);
21692 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21693 care of it. */
21694 if (TARGET_HARD_FLOAT)
21695 {
21696 /* Clear the cumulative exception-status bits (0-4,7) and
21697 the condition code bits (28-31) of the FPSCR. We need
21698 to remember to clear the first scratch register used
21699 (IP) and save and restore the second (r4).
21700
21701 Important note: the length of the
21702 thumb2_cmse_entry_return insn pattern must account for
21703 the size of the below instructions. */
21704 output_asm_insn ("push\t{%|r4}", & operand);
21705 output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21706 output_asm_insn ("movw\t%|r4, #65376", & operand);
21707 output_asm_insn ("movt\t%|r4, #4095", & operand);
21708 output_asm_insn ("and\t%|ip, %|r4", & operand);
21709 output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21710 output_asm_insn ("pop\t{%|r4}", & operand);
21711 output_asm_insn ("mov\t%|ip, %|lr", & operand);
21712 }
21713 }
21714 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21715 }
21716 /* Use bx if it's available. */
21717 else if (arm_arch5t || arm_arch4t)
21718 sprintf (instr, "bx%s\t%%|lr", conditional);
21719 else
21720 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21721 break;
21722 }
21723
21724 output_asm_insn (instr, & operand);
21725 }
21726
21727 return "";
21728 }
21729
21730 /* Output in FILE asm statements needed to declare the NAME of the function
21731 defined by its DECL node. */
21732
21733 void
21734 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21735 {
21736 size_t cmse_name_len;
21737 char *cmse_name = 0;
21738 char cmse_prefix[] = "__acle_se_";
21739
21740 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21741 extra function label for each function with the 'cmse_nonsecure_entry'
21742 attribute. This extra function label should be prepended with
21743 '__acle_se_', telling the linker that it needs to create secure gateway
21744 veneers for this function. */
21745 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21746 DECL_ATTRIBUTES (decl)))
21747 {
21748 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21749 cmse_name = XALLOCAVEC (char, cmse_name_len);
21750 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21751 targetm.asm_out.globalize_label (file, cmse_name);
21752
21753 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21754 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21755 }
21756
21757 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21758 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21759 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21760 ASM_OUTPUT_LABEL (file, name);
21761
21762 if (cmse_name)
21763 ASM_OUTPUT_LABEL (file, cmse_name);
21764
21765 ARM_OUTPUT_FN_UNWIND (file, TRUE);
21766 }
21767
21768 /* Write the function name into the code section, directly preceding
21769 the function prologue.
21770
21771 Code will be output similar to this:
21772 t0
21773 .ascii "arm_poke_function_name", 0
21774 .align
21775 t1
21776 .word 0xff000000 + (t1 - t0)
21777 arm_poke_function_name
21778 mov ip, sp
21779 stmfd sp!, {fp, ip, lr, pc}
21780 sub fp, ip, #4
21781
21782 When performing a stack backtrace, code can inspect the value
21783 of 'pc' stored at 'fp' + 0. If the trace function then looks
21784 at location pc - 12 and the top 8 bits are set, then we know
21785 that there is a function name embedded immediately preceding this
21786 location and has length ((pc[-3]) & 0xff000000).
21787
21788 We assume that pc is declared as a pointer to an unsigned long.
21789
21790 It is of no benefit to output the function name if we are assembling
21791 a leaf function. These function types will not contain a stack
21792 backtrace structure, therefore it is not possible to determine the
21793 function name. */
21794 void
21795 arm_poke_function_name (FILE *stream, const char *name)
21796 {
21797 unsigned long alignlength;
21798 unsigned long length;
21799 rtx x;
21800
21801 length = strlen (name) + 1;
21802 alignlength = ROUND_UP_WORD (length);
21803
21804 ASM_OUTPUT_ASCII (stream, name, length);
21805 ASM_OUTPUT_ALIGN (stream, 2);
21806 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21807 assemble_aligned_integer (UNITS_PER_WORD, x);
21808 }
21809
21810 /* Place some comments into the assembler stream
21811 describing the current function. */
21812 static void
21813 arm_output_function_prologue (FILE *f)
21814 {
21815 unsigned long func_type;
21816
21817 /* Sanity check. */
21818 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21819
21820 func_type = arm_current_func_type ();
21821
21822 switch ((int) ARM_FUNC_TYPE (func_type))
21823 {
21824 default:
21825 case ARM_FT_NORMAL:
21826 break;
21827 case ARM_FT_INTERWORKED:
21828 asm_fprintf (f, "\t%@ Function supports interworking.\n");
21829 break;
21830 case ARM_FT_ISR:
21831 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21832 break;
21833 case ARM_FT_FIQ:
21834 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21835 break;
21836 case ARM_FT_EXCEPTION:
21837 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21838 break;
21839 }
21840
21841 if (IS_NAKED (func_type))
21842 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21843
21844 if (IS_VOLATILE (func_type))
21845 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21846
21847 if (IS_NESTED (func_type))
21848 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21849 if (IS_STACKALIGN (func_type))
21850 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21851 if (IS_CMSE_ENTRY (func_type))
21852 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21853
21854 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21855 (HOST_WIDE_INT) crtl->args.size,
21856 crtl->args.pretend_args_size,
21857 (HOST_WIDE_INT) get_frame_size ());
21858
21859 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21860 frame_pointer_needed,
21861 cfun->machine->uses_anonymous_args);
21862
21863 if (cfun->machine->lr_save_eliminated)
21864 asm_fprintf (f, "\t%@ link register save eliminated.\n");
21865
21866 if (crtl->calls_eh_return)
21867 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21868
21869 }
21870
21871 static void
21872 arm_output_function_epilogue (FILE *)
21873 {
21874 arm_stack_offsets *offsets;
21875
21876 if (TARGET_THUMB1)
21877 {
21878 int regno;
21879
21880 /* Emit any call-via-reg trampolines that are needed for v4t support
21881 of call_reg and call_value_reg type insns. */
21882 for (regno = 0; regno < LR_REGNUM; regno++)
21883 {
21884 rtx label = cfun->machine->call_via[regno];
21885
21886 if (label != NULL)
21887 {
21888 switch_to_section (function_section (current_function_decl));
21889 targetm.asm_out.internal_label (asm_out_file, "L",
21890 CODE_LABEL_NUMBER (label));
21891 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21892 }
21893 }
21894
21895 /* ??? Probably not safe to set this here, since it assumes that a
21896 function will be emitted as assembly immediately after we generate
21897 RTL for it. This does not happen for inline functions. */
21898 cfun->machine->return_used_this_function = 0;
21899 }
21900 else /* TARGET_32BIT */
21901 {
21902 /* We need to take into account any stack-frame rounding. */
21903 offsets = arm_get_frame_offsets ();
21904
21905 gcc_assert (!use_return_insn (FALSE, NULL)
21906 || (cfun->machine->return_used_this_function != 0)
21907 || offsets->saved_regs == offsets->outgoing_args
21908 || frame_pointer_needed);
21909 }
21910 }
21911
21912 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21913 STR and STRD. If an even number of registers are being pushed, one
21914 or more STRD patterns are created for each register pair. If an
21915 odd number of registers are pushed, emit an initial STR followed by
21916 as many STRD instructions as are needed. This works best when the
21917 stack is initially 64-bit aligned (the normal case), since it
21918 ensures that each STRD is also 64-bit aligned. */
21919 static void
21920 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21921 {
21922 int num_regs = 0;
21923 int i;
21924 int regno;
21925 rtx par = NULL_RTX;
21926 rtx dwarf = NULL_RTX;
21927 rtx tmp;
21928 bool first = true;
21929
21930 num_regs = bit_count (saved_regs_mask);
21931
21932 /* Must be at least one register to save, and can't save SP or PC. */
21933 gcc_assert (num_regs > 0 && num_regs <= 14);
21934 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21935 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21936
21937 /* Create sequence for DWARF info. All the frame-related data for
21938 debugging is held in this wrapper. */
21939 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21940
21941 /* Describe the stack adjustment. */
21942 tmp = gen_rtx_SET (stack_pointer_rtx,
21943 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21944 RTX_FRAME_RELATED_P (tmp) = 1;
21945 XVECEXP (dwarf, 0, 0) = tmp;
21946
21947 /* Find the first register. */
21948 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21949 ;
21950
21951 i = 0;
21952
21953 /* If there's an odd number of registers to push. Start off by
21954 pushing a single register. This ensures that subsequent strd
21955 operations are dword aligned (assuming that SP was originally
21956 64-bit aligned). */
21957 if ((num_regs & 1) != 0)
21958 {
21959 rtx reg, mem, insn;
21960
21961 reg = gen_rtx_REG (SImode, regno);
21962 if (num_regs == 1)
21963 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
21964 stack_pointer_rtx));
21965 else
21966 mem = gen_frame_mem (Pmode,
21967 gen_rtx_PRE_MODIFY
21968 (Pmode, stack_pointer_rtx,
21969 plus_constant (Pmode, stack_pointer_rtx,
21970 -4 * num_regs)));
21971
21972 tmp = gen_rtx_SET (mem, reg);
21973 RTX_FRAME_RELATED_P (tmp) = 1;
21974 insn = emit_insn (tmp);
21975 RTX_FRAME_RELATED_P (insn) = 1;
21976 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21977 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
21978 RTX_FRAME_RELATED_P (tmp) = 1;
21979 i++;
21980 regno++;
21981 XVECEXP (dwarf, 0, i) = tmp;
21982 first = false;
21983 }
21984
21985 while (i < num_regs)
21986 if (saved_regs_mask & (1 << regno))
21987 {
21988 rtx reg1, reg2, mem1, mem2;
21989 rtx tmp0, tmp1, tmp2;
21990 int regno2;
21991
21992 /* Find the register to pair with this one. */
21993 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
21994 regno2++)
21995 ;
21996
21997 reg1 = gen_rtx_REG (SImode, regno);
21998 reg2 = gen_rtx_REG (SImode, regno2);
21999
22000 if (first)
22001 {
22002 rtx insn;
22003
22004 first = false;
22005 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22006 stack_pointer_rtx,
22007 -4 * num_regs));
22008 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22009 stack_pointer_rtx,
22010 -4 * (num_regs - 1)));
22011 tmp0 = gen_rtx_SET (stack_pointer_rtx,
22012 plus_constant (Pmode, stack_pointer_rtx,
22013 -4 * (num_regs)));
22014 tmp1 = gen_rtx_SET (mem1, reg1);
22015 tmp2 = gen_rtx_SET (mem2, reg2);
22016 RTX_FRAME_RELATED_P (tmp0) = 1;
22017 RTX_FRAME_RELATED_P (tmp1) = 1;
22018 RTX_FRAME_RELATED_P (tmp2) = 1;
22019 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
22020 XVECEXP (par, 0, 0) = tmp0;
22021 XVECEXP (par, 0, 1) = tmp1;
22022 XVECEXP (par, 0, 2) = tmp2;
22023 insn = emit_insn (par);
22024 RTX_FRAME_RELATED_P (insn) = 1;
22025 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22026 }
22027 else
22028 {
22029 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22030 stack_pointer_rtx,
22031 4 * i));
22032 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22033 stack_pointer_rtx,
22034 4 * (i + 1)));
22035 tmp1 = gen_rtx_SET (mem1, reg1);
22036 tmp2 = gen_rtx_SET (mem2, reg2);
22037 RTX_FRAME_RELATED_P (tmp1) = 1;
22038 RTX_FRAME_RELATED_P (tmp2) = 1;
22039 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22040 XVECEXP (par, 0, 0) = tmp1;
22041 XVECEXP (par, 0, 1) = tmp2;
22042 emit_insn (par);
22043 }
22044
22045 /* Create unwind information. This is an approximation. */
22046 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
22047 plus_constant (Pmode,
22048 stack_pointer_rtx,
22049 4 * i)),
22050 reg1);
22051 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
22052 plus_constant (Pmode,
22053 stack_pointer_rtx,
22054 4 * (i + 1))),
22055 reg2);
22056
22057 RTX_FRAME_RELATED_P (tmp1) = 1;
22058 RTX_FRAME_RELATED_P (tmp2) = 1;
22059 XVECEXP (dwarf, 0, i + 1) = tmp1;
22060 XVECEXP (dwarf, 0, i + 2) = tmp2;
22061 i += 2;
22062 regno = regno2 + 1;
22063 }
22064 else
22065 regno++;
22066
22067 return;
22068 }
22069
22070 /* STRD in ARM mode requires consecutive registers. This function emits STRD
22071 whenever possible, otherwise it emits single-word stores. The first store
22072 also allocates stack space for all saved registers, using writeback with
22073 post-addressing mode. All other stores use offset addressing. If no STRD
22074 can be emitted, this function emits a sequence of single-word stores,
22075 and not an STM as before, because single-word stores provide more freedom
22076 scheduling and can be turned into an STM by peephole optimizations. */
22077 static void
22078 arm_emit_strd_push (unsigned long saved_regs_mask)
22079 {
22080 int num_regs = 0;
22081 int i, j, dwarf_index = 0;
22082 int offset = 0;
22083 rtx dwarf = NULL_RTX;
22084 rtx insn = NULL_RTX;
22085 rtx tmp, mem;
22086
22087 /* TODO: A more efficient code can be emitted by changing the
22088 layout, e.g., first push all pairs that can use STRD to keep the
22089 stack aligned, and then push all other registers. */
22090 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22091 if (saved_regs_mask & (1 << i))
22092 num_regs++;
22093
22094 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22095 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22096 gcc_assert (num_regs > 0);
22097
22098 /* Create sequence for DWARF info. */
22099 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22100
22101 /* For dwarf info, we generate explicit stack update. */
22102 tmp = gen_rtx_SET (stack_pointer_rtx,
22103 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22104 RTX_FRAME_RELATED_P (tmp) = 1;
22105 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22106
22107 /* Save registers. */
22108 offset = - 4 * num_regs;
22109 j = 0;
22110 while (j <= LAST_ARM_REGNUM)
22111 if (saved_regs_mask & (1 << j))
22112 {
22113 if ((j % 2 == 0)
22114 && (saved_regs_mask & (1 << (j + 1))))
22115 {
22116 /* Current register and previous register form register pair for
22117 which STRD can be generated. */
22118 if (offset < 0)
22119 {
22120 /* Allocate stack space for all saved registers. */
22121 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22122 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22123 mem = gen_frame_mem (DImode, tmp);
22124 offset = 0;
22125 }
22126 else if (offset > 0)
22127 mem = gen_frame_mem (DImode,
22128 plus_constant (Pmode,
22129 stack_pointer_rtx,
22130 offset));
22131 else
22132 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22133
22134 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
22135 RTX_FRAME_RELATED_P (tmp) = 1;
22136 tmp = emit_insn (tmp);
22137
22138 /* Record the first store insn. */
22139 if (dwarf_index == 1)
22140 insn = tmp;
22141
22142 /* Generate dwarf info. */
22143 mem = gen_frame_mem (SImode,
22144 plus_constant (Pmode,
22145 stack_pointer_rtx,
22146 offset));
22147 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22148 RTX_FRAME_RELATED_P (tmp) = 1;
22149 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22150
22151 mem = gen_frame_mem (SImode,
22152 plus_constant (Pmode,
22153 stack_pointer_rtx,
22154 offset + 4));
22155 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
22156 RTX_FRAME_RELATED_P (tmp) = 1;
22157 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22158
22159 offset += 8;
22160 j += 2;
22161 }
22162 else
22163 {
22164 /* Emit a single word store. */
22165 if (offset < 0)
22166 {
22167 /* Allocate stack space for all saved registers. */
22168 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22169 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22170 mem = gen_frame_mem (SImode, tmp);
22171 offset = 0;
22172 }
22173 else if (offset > 0)
22174 mem = gen_frame_mem (SImode,
22175 plus_constant (Pmode,
22176 stack_pointer_rtx,
22177 offset));
22178 else
22179 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22180
22181 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22182 RTX_FRAME_RELATED_P (tmp) = 1;
22183 tmp = emit_insn (tmp);
22184
22185 /* Record the first store insn. */
22186 if (dwarf_index == 1)
22187 insn = tmp;
22188
22189 /* Generate dwarf info. */
22190 mem = gen_frame_mem (SImode,
22191 plus_constant(Pmode,
22192 stack_pointer_rtx,
22193 offset));
22194 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22195 RTX_FRAME_RELATED_P (tmp) = 1;
22196 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22197
22198 offset += 4;
22199 j += 1;
22200 }
22201 }
22202 else
22203 j++;
22204
22205 /* Attach dwarf info to the first insn we generate. */
22206 gcc_assert (insn != NULL_RTX);
22207 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22208 RTX_FRAME_RELATED_P (insn) = 1;
22209 }
22210
22211 /* Generate and emit an insn that we will recognize as a push_multi.
22212 Unfortunately, since this insn does not reflect very well the actual
22213 semantics of the operation, we need to annotate the insn for the benefit
22214 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
22215 MASK for registers that should be annotated for DWARF2 frame unwind
22216 information. */
22217 static rtx
22218 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
22219 {
22220 int num_regs = 0;
22221 int num_dwarf_regs = 0;
22222 int i, j;
22223 rtx par;
22224 rtx dwarf;
22225 int dwarf_par_index;
22226 rtx tmp, reg;
22227
22228 /* We don't record the PC in the dwarf frame information. */
22229 dwarf_regs_mask &= ~(1 << PC_REGNUM);
22230
22231 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22232 {
22233 if (mask & (1 << i))
22234 num_regs++;
22235 if (dwarf_regs_mask & (1 << i))
22236 num_dwarf_regs++;
22237 }
22238
22239 gcc_assert (num_regs && num_regs <= 16);
22240 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
22241
22242 /* For the body of the insn we are going to generate an UNSPEC in
22243 parallel with several USEs. This allows the insn to be recognized
22244 by the push_multi pattern in the arm.md file.
22245
22246 The body of the insn looks something like this:
22247
22248 (parallel [
22249 (set (mem:BLK (pre_modify:SI (reg:SI sp)
22250 (const_int:SI <num>)))
22251 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22252 (use (reg:SI XX))
22253 (use (reg:SI YY))
22254 ...
22255 ])
22256
22257 For the frame note however, we try to be more explicit and actually
22258 show each register being stored into the stack frame, plus a (single)
22259 decrement of the stack pointer. We do it this way in order to be
22260 friendly to the stack unwinding code, which only wants to see a single
22261 stack decrement per instruction. The RTL we generate for the note looks
22262 something like this:
22263
22264 (sequence [
22265 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22266 (set (mem:SI (reg:SI sp)) (reg:SI r4))
22267 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22268 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22269 ...
22270 ])
22271
22272 FIXME:: In an ideal world the PRE_MODIFY would not exist and
22273 instead we'd have a parallel expression detailing all
22274 the stores to the various memory addresses so that debug
22275 information is more up-to-date. Remember however while writing
22276 this to take care of the constraints with the push instruction.
22277
22278 Note also that this has to be taken care of for the VFP registers.
22279
22280 For more see PR43399. */
22281
22282 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
22283 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
22284 dwarf_par_index = 1;
22285
22286 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22287 {
22288 if (mask & (1 << i))
22289 {
22290 /* NOTE: Dwarf code emitter handle reg-reg copies correctly and in the
22291 following example reg-reg copy of SP to IP register is handled
22292 through .cfi_def_cfa_register directive and the .cfi_offset
22293 directive for IP register is skipped by dwarf code emitter.
22294 Example:
22295 mov ip, sp
22296 .cfi_def_cfa_register 12
22297 push {fp, ip, lr, pc}
22298 .cfi_offset 11, -16
22299 .cfi_offset 13, -12
22300 .cfi_offset 14, -8
22301
22302 Where as Arm-specific .save directive handling is different to that
22303 of dwarf code emitter and it doesn't consider reg-reg copies while
22304 updating the register list. When PACBTI is enabled we manually
22305 updated the .save directive register list to use "ra_auth_code"
22306 (pseduo register 143) instead of IP register as shown in following
22307 pseduo code.
22308 Example:
22309 pacbti ip, lr, sp
22310 .cfi_register 143, 12
22311 push {r3, r7, ip, lr}
22312 .save {r3, r7, ra_auth_code, lr}
22313 */
22314 rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22315 if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22316 dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22317
22318 XVECEXP (par, 0, 0)
22319 = gen_rtx_SET (gen_frame_mem
22320 (BLKmode,
22321 gen_rtx_PRE_MODIFY (Pmode,
22322 stack_pointer_rtx,
22323 plus_constant
22324 (Pmode, stack_pointer_rtx,
22325 -4 * num_regs))
22326 ),
22327 gen_rtx_UNSPEC (BLKmode,
22328 gen_rtvec (1, reg),
22329 UNSPEC_PUSH_MULT));
22330
22331 if (dwarf_regs_mask & (1 << i))
22332 {
22333 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22334 dwarf_reg);
22335 RTX_FRAME_RELATED_P (tmp) = 1;
22336 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22337 }
22338
22339 break;
22340 }
22341 }
22342
22343 for (j = 1, i++; j < num_regs; i++)
22344 {
22345 if (mask & (1 << i))
22346 {
22347 rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22348 if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22349 dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22350
22351 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22352
22353 if (dwarf_regs_mask & (1 << i))
22354 {
22355 tmp
22356 = gen_rtx_SET (gen_frame_mem
22357 (SImode,
22358 plus_constant (Pmode, stack_pointer_rtx,
22359 4 * j)),
22360 dwarf_reg);
22361 RTX_FRAME_RELATED_P (tmp) = 1;
22362 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22363 }
22364
22365 j++;
22366 }
22367 }
22368
22369 par = emit_insn (par);
22370
22371 tmp = gen_rtx_SET (stack_pointer_rtx,
22372 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22373 RTX_FRAME_RELATED_P (tmp) = 1;
22374 XVECEXP (dwarf, 0, 0) = tmp;
22375
22376 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22377
22378 return par;
22379 }
22380
22381 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22382 SIZE is the offset to be adjusted.
22383 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22384 static void
22385 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22386 {
22387 rtx dwarf;
22388
22389 RTX_FRAME_RELATED_P (insn) = 1;
22390 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22391 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22392 }
22393
22394 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22395 SAVED_REGS_MASK shows which registers need to be restored.
22396
22397 Unfortunately, since this insn does not reflect very well the actual
22398 semantics of the operation, we need to annotate the insn for the benefit
22399 of DWARF2 frame unwind information. */
22400 static void
22401 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22402 {
22403 int num_regs = 0;
22404 int i, j;
22405 rtx par;
22406 rtx dwarf = NULL_RTX;
22407 rtx tmp, reg;
22408 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22409 int offset_adj;
22410 int emit_update;
22411
22412 offset_adj = return_in_pc ? 1 : 0;
22413 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22414 if (saved_regs_mask & (1 << i))
22415 num_regs++;
22416
22417 gcc_assert (num_regs && num_regs <= 16);
22418
22419 /* If SP is in reglist, then we don't emit SP update insn. */
22420 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22421
22422 /* The parallel needs to hold num_regs SETs
22423 and one SET for the stack update. */
22424 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22425
22426 if (return_in_pc)
22427 XVECEXP (par, 0, 0) = ret_rtx;
22428
22429 if (emit_update)
22430 {
22431 /* Increment the stack pointer, based on there being
22432 num_regs 4-byte registers to restore. */
22433 tmp = gen_rtx_SET (stack_pointer_rtx,
22434 plus_constant (Pmode,
22435 stack_pointer_rtx,
22436 4 * num_regs));
22437 RTX_FRAME_RELATED_P (tmp) = 1;
22438 XVECEXP (par, 0, offset_adj) = tmp;
22439 }
22440
22441 /* Now restore every reg, which may include PC. */
22442 for (j = 0, i = 0; j < num_regs; i++)
22443 if (saved_regs_mask & (1 << i))
22444 {
22445 rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22446 if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22447 dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22448 if ((num_regs == 1) && emit_update && !return_in_pc)
22449 {
22450 /* Emit single load with writeback. */
22451 tmp = gen_frame_mem (SImode,
22452 gen_rtx_POST_INC (Pmode,
22453 stack_pointer_rtx));
22454 tmp = emit_insn (gen_rtx_SET (reg, tmp));
22455 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg,
22456 dwarf);
22457 return;
22458 }
22459
22460 tmp = gen_rtx_SET (reg,
22461 gen_frame_mem
22462 (SImode,
22463 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22464 RTX_FRAME_RELATED_P (tmp) = 1;
22465 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22466
22467 /* We need to maintain a sequence for DWARF info too. As dwarf info
22468 should not have PC, skip PC. */
22469 if (i != PC_REGNUM)
22470 dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf);
22471
22472 j++;
22473 }
22474
22475 if (return_in_pc)
22476 par = emit_jump_insn (par);
22477 else
22478 par = emit_insn (par);
22479
22480 REG_NOTES (par) = dwarf;
22481 if (!return_in_pc)
22482 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22483 stack_pointer_rtx, stack_pointer_rtx);
22484 }
22485
22486 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22487 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22488
22489 Unfortunately, since this insn does not reflect very well the actual
22490 semantics of the operation, we need to annotate the insn for the benefit
22491 of DWARF2 frame unwind information. */
22492 static void
22493 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22494 {
22495 int i, j;
22496 rtx par;
22497 rtx dwarf = NULL_RTX;
22498 rtx tmp, reg;
22499
22500 gcc_assert (num_regs && num_regs <= 32);
22501
22502 /* Workaround ARM10 VFPr1 bug. */
22503 if (num_regs == 2 && !arm_arch6)
22504 {
22505 if (first_reg == 15)
22506 first_reg--;
22507
22508 num_regs++;
22509 }
22510
22511 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22512 there could be up to 32 D-registers to restore.
22513 If there are more than 16 D-registers, make two recursive calls,
22514 each of which emits one pop_multi instruction. */
22515 if (num_regs > 16)
22516 {
22517 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22518 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22519 return;
22520 }
22521
22522 /* The parallel needs to hold num_regs SETs
22523 and one SET for the stack update. */
22524 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22525
22526 /* Increment the stack pointer, based on there being
22527 num_regs 8-byte registers to restore. */
22528 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22529 RTX_FRAME_RELATED_P (tmp) = 1;
22530 XVECEXP (par, 0, 0) = tmp;
22531
22532 /* Now show every reg that will be restored, using a SET for each. */
22533 for (j = 0, i=first_reg; j < num_regs; i += 2)
22534 {
22535 reg = gen_rtx_REG (DFmode, i);
22536
22537 tmp = gen_rtx_SET (reg,
22538 gen_frame_mem
22539 (DFmode,
22540 plus_constant (Pmode, base_reg, 8 * j)));
22541 RTX_FRAME_RELATED_P (tmp) = 1;
22542 XVECEXP (par, 0, j + 1) = tmp;
22543
22544 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22545
22546 j++;
22547 }
22548
22549 par = emit_insn (par);
22550 REG_NOTES (par) = dwarf;
22551
22552 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22553 if (REGNO (base_reg) == IP_REGNUM)
22554 {
22555 RTX_FRAME_RELATED_P (par) = 1;
22556 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22557 }
22558 else
22559 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22560 base_reg, base_reg);
22561 }
22562
22563 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22564 number of registers are being popped, multiple LDRD patterns are created for
22565 all register pairs. If odd number of registers are popped, last register is
22566 loaded by using LDR pattern. */
22567 static void
22568 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22569 {
22570 int num_regs = 0;
22571 int i, j;
22572 rtx par = NULL_RTX;
22573 rtx dwarf = NULL_RTX;
22574 rtx tmp, reg, tmp1;
22575 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22576
22577 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22578 if (saved_regs_mask & (1 << i))
22579 num_regs++;
22580
22581 gcc_assert (num_regs && num_regs <= 16);
22582
22583 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22584 to be popped. So, if num_regs is even, now it will become odd,
22585 and we can generate pop with PC. If num_regs is odd, it will be
22586 even now, and ldr with return can be generated for PC. */
22587 if (return_in_pc)
22588 num_regs--;
22589
22590 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22591
22592 /* Var j iterates over all the registers to gather all the registers in
22593 saved_regs_mask. Var i gives index of saved registers in stack frame.
22594 A PARALLEL RTX of register-pair is created here, so that pattern for
22595 LDRD can be matched. As PC is always last register to be popped, and
22596 we have already decremented num_regs if PC, we don't have to worry
22597 about PC in this loop. */
22598 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22599 if (saved_regs_mask & (1 << j))
22600 {
22601 /* Create RTX for memory load. */
22602 reg = gen_rtx_REG (SImode, j);
22603 tmp = gen_rtx_SET (reg,
22604 gen_frame_mem (SImode,
22605 plus_constant (Pmode,
22606 stack_pointer_rtx, 4 * i)));
22607 RTX_FRAME_RELATED_P (tmp) = 1;
22608
22609 if (i % 2 == 0)
22610 {
22611 /* When saved-register index (i) is even, the RTX to be emitted is
22612 yet to be created. Hence create it first. The LDRD pattern we
22613 are generating is :
22614 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22615 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22616 where target registers need not be consecutive. */
22617 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22618 dwarf = NULL_RTX;
22619 }
22620
22621 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22622 added as 0th element and if i is odd, reg_i is added as 1st element
22623 of LDRD pattern shown above. */
22624 XVECEXP (par, 0, (i % 2)) = tmp;
22625 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22626
22627 if ((i % 2) == 1)
22628 {
22629 /* When saved-register index (i) is odd, RTXs for both the registers
22630 to be loaded are generated in above given LDRD pattern, and the
22631 pattern can be emitted now. */
22632 par = emit_insn (par);
22633 REG_NOTES (par) = dwarf;
22634 RTX_FRAME_RELATED_P (par) = 1;
22635 }
22636
22637 i++;
22638 }
22639
22640 /* If the number of registers pushed is odd AND return_in_pc is false OR
22641 number of registers are even AND return_in_pc is true, last register is
22642 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22643 then LDR with post increment. */
22644
22645 /* Increment the stack pointer, based on there being
22646 num_regs 4-byte registers to restore. */
22647 tmp = gen_rtx_SET (stack_pointer_rtx,
22648 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22649 RTX_FRAME_RELATED_P (tmp) = 1;
22650 tmp = emit_insn (tmp);
22651 if (!return_in_pc)
22652 {
22653 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22654 stack_pointer_rtx, stack_pointer_rtx);
22655 }
22656
22657 dwarf = NULL_RTX;
22658
22659 if (((num_regs % 2) == 1 && !return_in_pc)
22660 || ((num_regs % 2) == 0 && return_in_pc))
22661 {
22662 /* Scan for the single register to be popped. Skip until the saved
22663 register is found. */
22664 for (; (saved_regs_mask & (1 << j)) == 0; j++);
22665
22666 /* Gen LDR with post increment here. */
22667 tmp1 = gen_rtx_MEM (SImode,
22668 gen_rtx_POST_INC (SImode,
22669 stack_pointer_rtx));
22670 set_mem_alias_set (tmp1, get_frame_alias_set ());
22671
22672 reg = gen_rtx_REG (SImode, j);
22673 tmp = gen_rtx_SET (reg, tmp1);
22674 RTX_FRAME_RELATED_P (tmp) = 1;
22675 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22676
22677 if (return_in_pc)
22678 {
22679 /* If return_in_pc, j must be PC_REGNUM. */
22680 gcc_assert (j == PC_REGNUM);
22681 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22682 XVECEXP (par, 0, 0) = ret_rtx;
22683 XVECEXP (par, 0, 1) = tmp;
22684 par = emit_jump_insn (par);
22685 }
22686 else
22687 {
22688 par = emit_insn (tmp);
22689 REG_NOTES (par) = dwarf;
22690 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22691 stack_pointer_rtx, stack_pointer_rtx);
22692 }
22693
22694 }
22695 else if ((num_regs % 2) == 1 && return_in_pc)
22696 {
22697 /* There are 2 registers to be popped. So, generate the pattern
22698 pop_multiple_with_stack_update_and_return to pop in PC. */
22699 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22700 }
22701
22702 return;
22703 }
22704
22705 /* LDRD in ARM mode needs consecutive registers as operands. This function
22706 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22707 offset addressing and then generates one separate stack udpate. This provides
22708 more scheduling freedom, compared to writeback on every load. However,
22709 if the function returns using load into PC directly
22710 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22711 before the last load. TODO: Add a peephole optimization to recognize
22712 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22713 peephole optimization to merge the load at stack-offset zero
22714 with the stack update instruction using load with writeback
22715 in post-index addressing mode. */
22716 static void
22717 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22718 {
22719 int j = 0;
22720 int offset = 0;
22721 rtx par = NULL_RTX;
22722 rtx dwarf = NULL_RTX;
22723 rtx tmp, mem;
22724
22725 /* Restore saved registers. */
22726 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22727 j = 0;
22728 while (j <= LAST_ARM_REGNUM)
22729 if (saved_regs_mask & (1 << j))
22730 {
22731 if ((j % 2) == 0
22732 && (saved_regs_mask & (1 << (j + 1)))
22733 && (j + 1) != PC_REGNUM)
22734 {
22735 /* Current register and next register form register pair for which
22736 LDRD can be generated. PC is always the last register popped, and
22737 we handle it separately. */
22738 if (offset > 0)
22739 mem = gen_frame_mem (DImode,
22740 plus_constant (Pmode,
22741 stack_pointer_rtx,
22742 offset));
22743 else
22744 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22745
22746 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22747 tmp = emit_insn (tmp);
22748 RTX_FRAME_RELATED_P (tmp) = 1;
22749
22750 /* Generate dwarf info. */
22751
22752 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22753 gen_rtx_REG (SImode, j),
22754 NULL_RTX);
22755 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22756 gen_rtx_REG (SImode, j + 1),
22757 dwarf);
22758
22759 REG_NOTES (tmp) = dwarf;
22760
22761 offset += 8;
22762 j += 2;
22763 }
22764 else if (j != PC_REGNUM)
22765 {
22766 /* Emit a single word load. */
22767 if (offset > 0)
22768 mem = gen_frame_mem (SImode,
22769 plus_constant (Pmode,
22770 stack_pointer_rtx,
22771 offset));
22772 else
22773 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22774
22775 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22776 tmp = emit_insn (tmp);
22777 RTX_FRAME_RELATED_P (tmp) = 1;
22778
22779 /* Generate dwarf info. */
22780 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22781 gen_rtx_REG (SImode, j),
22782 NULL_RTX);
22783
22784 offset += 4;
22785 j += 1;
22786 }
22787 else /* j == PC_REGNUM */
22788 j++;
22789 }
22790 else
22791 j++;
22792
22793 /* Update the stack. */
22794 if (offset > 0)
22795 {
22796 tmp = gen_rtx_SET (stack_pointer_rtx,
22797 plus_constant (Pmode,
22798 stack_pointer_rtx,
22799 offset));
22800 tmp = emit_insn (tmp);
22801 arm_add_cfa_adjust_cfa_note (tmp, offset,
22802 stack_pointer_rtx, stack_pointer_rtx);
22803 offset = 0;
22804 }
22805
22806 if (saved_regs_mask & (1 << PC_REGNUM))
22807 {
22808 /* Only PC is to be popped. */
22809 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22810 XVECEXP (par, 0, 0) = ret_rtx;
22811 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22812 gen_frame_mem (SImode,
22813 gen_rtx_POST_INC (SImode,
22814 stack_pointer_rtx)));
22815 RTX_FRAME_RELATED_P (tmp) = 1;
22816 XVECEXP (par, 0, 1) = tmp;
22817 par = emit_jump_insn (par);
22818
22819 /* Generate dwarf info. */
22820 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22821 gen_rtx_REG (SImode, PC_REGNUM),
22822 NULL_RTX);
22823 REG_NOTES (par) = dwarf;
22824 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22825 stack_pointer_rtx, stack_pointer_rtx);
22826 }
22827 }
22828
22829 /* Calculate the size of the return value that is passed in registers. */
22830 static unsigned
22831 arm_size_return_regs (void)
22832 {
22833 machine_mode mode;
22834
22835 if (crtl->return_rtx != 0)
22836 mode = GET_MODE (crtl->return_rtx);
22837 else
22838 mode = DECL_MODE (DECL_RESULT (current_function_decl));
22839
22840 return GET_MODE_SIZE (mode);
22841 }
22842
22843 /* Return true if the current function needs to save/restore LR. */
22844 static bool
22845 thumb_force_lr_save (void)
22846 {
22847 return !cfun->machine->lr_save_eliminated
22848 && (!crtl->is_leaf
22849 || thumb_far_jump_used_p ()
22850 || df_regs_ever_live_p (LR_REGNUM));
22851 }
22852
22853 /* We do not know if r3 will be available because
22854 we do have an indirect tailcall happening in this
22855 particular case. */
22856 static bool
22857 is_indirect_tailcall_p (rtx call)
22858 {
22859 rtx pat = PATTERN (call);
22860
22861 /* Indirect tail call. */
22862 pat = XVECEXP (pat, 0, 0);
22863 if (GET_CODE (pat) == SET)
22864 pat = SET_SRC (pat);
22865
22866 pat = XEXP (XEXP (pat, 0), 0);
22867 return REG_P (pat);
22868 }
22869
22870 /* Return true if r3 is used by any of the tail call insns in the
22871 current function. */
22872 static bool
22873 any_sibcall_could_use_r3 (void)
22874 {
22875 edge_iterator ei;
22876 edge e;
22877
22878 if (!crtl->tail_call_emit)
22879 return false;
22880 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22881 if (e->flags & EDGE_SIBCALL)
22882 {
22883 rtx_insn *call = BB_END (e->src);
22884 if (!CALL_P (call))
22885 call = prev_nonnote_nondebug_insn (call);
22886 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22887 if (find_regno_fusage (call, USE, 3)
22888 || is_indirect_tailcall_p (call))
22889 return true;
22890 }
22891 return false;
22892 }
22893
22894
22895 /* Compute the distance from register FROM to register TO.
22896 These can be the arg pointer (26), the soft frame pointer (25),
22897 the stack pointer (13) or the hard frame pointer (11).
22898 In thumb mode r7 is used as the soft frame pointer, if needed.
22899 Typical stack layout looks like this:
22900
22901 old stack pointer -> | |
22902 ----
22903 | | \
22904 | | saved arguments for
22905 | | vararg functions
22906 | | /
22907 --
22908 hard FP & arg pointer -> | | \
22909 | | stack
22910 | | frame
22911 | | /
22912 --
22913 | | \
22914 | | call saved
22915 | | registers
22916 soft frame pointer -> | | /
22917 --
22918 | | \
22919 | | local
22920 | | variables
22921 locals base pointer -> | | /
22922 --
22923 | | \
22924 | | outgoing
22925 | | arguments
22926 current stack pointer -> | | /
22927 --
22928
22929 For a given function some or all of these stack components
22930 may not be needed, giving rise to the possibility of
22931 eliminating some of the registers.
22932
22933 The values returned by this function must reflect the behavior
22934 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22935
22936 The sign of the number returned reflects the direction of stack
22937 growth, so the values are positive for all eliminations except
22938 from the soft frame pointer to the hard frame pointer.
22939
22940 SFP may point just inside the local variables block to ensure correct
22941 alignment. */
22942
22943
22944 /* Return cached stack offsets. */
22945
22946 static arm_stack_offsets *
22947 arm_get_frame_offsets (void)
22948 {
22949 struct arm_stack_offsets *offsets;
22950
22951 offsets = &cfun->machine->stack_offsets;
22952
22953 return offsets;
22954 }
22955
22956
22957 /* Calculate stack offsets. These are used to calculate register elimination
22958 offsets and in prologue/epilogue code. Also calculates which registers
22959 should be saved. */
22960
22961 static void
22962 arm_compute_frame_layout (void)
22963 {
22964 struct arm_stack_offsets *offsets;
22965 unsigned long func_type;
22966 int saved;
22967 int core_saved;
22968 HOST_WIDE_INT frame_size;
22969 int i;
22970
22971 offsets = &cfun->machine->stack_offsets;
22972
22973 /* Initially this is the size of the local variables. It will translated
22974 into an offset once we have determined the size of preceding data. */
22975 frame_size = ROUND_UP_WORD (get_frame_size ());
22976
22977 /* Space for variadic functions. */
22978 offsets->saved_args = crtl->args.pretend_args_size;
22979
22980 /* In Thumb mode this is incorrect, but never used. */
22981 offsets->frame
22982 = (offsets->saved_args
22983 + arm_compute_static_chain_stack_bytes ()
22984 + (frame_pointer_needed ? 4 : 0));
22985
22986 if (TARGET_32BIT)
22987 {
22988 unsigned int regno;
22989
22990 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
22991 core_saved = bit_count (offsets->saved_regs_mask) * 4;
22992 saved = core_saved;
22993
22994 /* We know that SP will be doubleword aligned on entry, and we must
22995 preserve that condition at any subroutine call. We also require the
22996 soft frame pointer to be doubleword aligned. */
22997
22998 if (TARGET_REALLY_IWMMXT)
22999 {
23000 /* Check for the call-saved iWMMXt registers. */
23001 for (regno = FIRST_IWMMXT_REGNUM;
23002 regno <= LAST_IWMMXT_REGNUM;
23003 regno++)
23004 if (reg_needs_saving_p (regno))
23005 saved += 8;
23006 }
23007
23008 func_type = arm_current_func_type ();
23009 /* Space for saved VFP registers. */
23010 if (! IS_VOLATILE (func_type)
23011 && TARGET_VFP_BASE)
23012 saved += arm_get_vfp_saved_size ();
23013
23014 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
23015 nonecure entry functions with VSTR/VLDR. */
23016 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23017 saved += 4;
23018 }
23019 else /* TARGET_THUMB1 */
23020 {
23021 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
23022 core_saved = bit_count (offsets->saved_regs_mask) * 4;
23023 saved = core_saved;
23024 if (TARGET_BACKTRACE)
23025 saved += 16;
23026 }
23027
23028 /* Saved registers include the stack frame. */
23029 offsets->saved_regs
23030 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
23031 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
23032
23033 /* A leaf function does not need any stack alignment if it has nothing
23034 on the stack. */
23035 if (crtl->is_leaf && frame_size == 0
23036 /* However if it calls alloca(), we have a dynamically allocated
23037 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
23038 && ! cfun->calls_alloca)
23039 {
23040 offsets->outgoing_args = offsets->soft_frame;
23041 offsets->locals_base = offsets->soft_frame;
23042 return;
23043 }
23044
23045 /* Ensure SFP has the correct alignment. */
23046 if (ARM_DOUBLEWORD_ALIGN
23047 && (offsets->soft_frame & 7))
23048 {
23049 offsets->soft_frame += 4;
23050 /* Try to align stack by pushing an extra reg. Don't bother doing this
23051 when there is a stack frame as the alignment will be rolled into
23052 the normal stack adjustment. */
23053 if (frame_size + crtl->outgoing_args_size == 0)
23054 {
23055 int reg = -1;
23056
23057 /* Register r3 is caller-saved. Normally it does not need to be
23058 saved on entry by the prologue. However if we choose to save
23059 it for padding then we may confuse the compiler into thinking
23060 a prologue sequence is required when in fact it is not. This
23061 will occur when shrink-wrapping if r3 is used as a scratch
23062 register and there are no other callee-saved writes.
23063
23064 This situation can be avoided when other callee-saved registers
23065 are available and r3 is not mandatory if we choose a callee-saved
23066 register for padding. */
23067 bool prefer_callee_reg_p = false;
23068
23069 /* If it is safe to use r3, then do so. This sometimes
23070 generates better code on Thumb-2 by avoiding the need to
23071 use 32-bit push/pop instructions. */
23072 if (! any_sibcall_could_use_r3 ()
23073 && arm_size_return_regs () <= 12
23074 && (offsets->saved_regs_mask & (1 << 3)) == 0
23075 && (TARGET_THUMB2
23076 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
23077 {
23078 reg = 3;
23079 if (!TARGET_THUMB2)
23080 prefer_callee_reg_p = true;
23081 }
23082 if (reg == -1
23083 || prefer_callee_reg_p)
23084 {
23085 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
23086 {
23087 /* Avoid fixed registers; they may be changed at
23088 arbitrary times so it's unsafe to restore them
23089 during the epilogue. */
23090 if (!fixed_regs[i]
23091 && (offsets->saved_regs_mask & (1 << i)) == 0)
23092 {
23093 reg = i;
23094 break;
23095 }
23096 }
23097 }
23098
23099 if (reg != -1)
23100 {
23101 offsets->saved_regs += 4;
23102 offsets->saved_regs_mask |= (1 << reg);
23103 }
23104 }
23105 }
23106
23107 offsets->locals_base = offsets->soft_frame + frame_size;
23108 offsets->outgoing_args = (offsets->locals_base
23109 + crtl->outgoing_args_size);
23110
23111 if (ARM_DOUBLEWORD_ALIGN)
23112 {
23113 /* Ensure SP remains doubleword aligned. */
23114 if (offsets->outgoing_args & 7)
23115 offsets->outgoing_args += 4;
23116 gcc_assert (!(offsets->outgoing_args & 7));
23117 }
23118 }
23119
23120
23121 /* Calculate the relative offsets for the different stack pointers. Positive
23122 offsets are in the direction of stack growth. */
23123
23124 HOST_WIDE_INT
23125 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23126 {
23127 arm_stack_offsets *offsets;
23128
23129 offsets = arm_get_frame_offsets ();
23130
23131 /* OK, now we have enough information to compute the distances.
23132 There must be an entry in these switch tables for each pair
23133 of registers in ELIMINABLE_REGS, even if some of the entries
23134 seem to be redundant or useless. */
23135 switch (from)
23136 {
23137 case ARG_POINTER_REGNUM:
23138 switch (to)
23139 {
23140 case THUMB_HARD_FRAME_POINTER_REGNUM:
23141 return 0;
23142
23143 case FRAME_POINTER_REGNUM:
23144 /* This is the reverse of the soft frame pointer
23145 to hard frame pointer elimination below. */
23146 return offsets->soft_frame - offsets->saved_args;
23147
23148 case ARM_HARD_FRAME_POINTER_REGNUM:
23149 /* This is only non-zero in the case where the static chain register
23150 is stored above the frame. */
23151 return offsets->frame - offsets->saved_args - 4;
23152
23153 case STACK_POINTER_REGNUM:
23154 /* If nothing has been pushed on the stack at all
23155 then this will return -4. This *is* correct! */
23156 return offsets->outgoing_args - (offsets->saved_args + 4);
23157
23158 default:
23159 gcc_unreachable ();
23160 }
23161 gcc_unreachable ();
23162
23163 case FRAME_POINTER_REGNUM:
23164 switch (to)
23165 {
23166 case THUMB_HARD_FRAME_POINTER_REGNUM:
23167 return 0;
23168
23169 case ARM_HARD_FRAME_POINTER_REGNUM:
23170 /* The hard frame pointer points to the top entry in the
23171 stack frame. The soft frame pointer to the bottom entry
23172 in the stack frame. If there is no stack frame at all,
23173 then they are identical. */
23174
23175 return offsets->frame - offsets->soft_frame;
23176
23177 case STACK_POINTER_REGNUM:
23178 return offsets->outgoing_args - offsets->soft_frame;
23179
23180 default:
23181 gcc_unreachable ();
23182 }
23183 gcc_unreachable ();
23184
23185 default:
23186 /* You cannot eliminate from the stack pointer.
23187 In theory you could eliminate from the hard frame
23188 pointer to the stack pointer, but this will never
23189 happen, since if a stack frame is not needed the
23190 hard frame pointer will never be used. */
23191 gcc_unreachable ();
23192 }
23193 }
23194
23195 /* Given FROM and TO register numbers, say whether this elimination is
23196 allowed. Frame pointer elimination is automatically handled.
23197
23198 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
23199 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
23200 pointer, we must eliminate FRAME_POINTER_REGNUM into
23201 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23202 ARG_POINTER_REGNUM. */
23203
23204 bool
23205 arm_can_eliminate (const int from, const int to)
23206 {
23207 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
23208 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
23209 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
23210 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
23211 true);
23212 }
23213
23214 /* Emit RTL to save coprocessor registers on function entry. Returns the
23215 number of bytes pushed. */
23216
23217 static int
23218 arm_save_coproc_regs(void)
23219 {
23220 int saved_size = 0;
23221 unsigned reg;
23222 unsigned start_reg;
23223 rtx insn;
23224
23225 if (TARGET_REALLY_IWMMXT)
23226 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
23227 if (reg_needs_saving_p (reg))
23228 {
23229 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23230 insn = gen_rtx_MEM (V2SImode, insn);
23231 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
23232 RTX_FRAME_RELATED_P (insn) = 1;
23233 saved_size += 8;
23234 }
23235
23236 if (TARGET_VFP_BASE)
23237 {
23238 start_reg = FIRST_VFP_REGNUM;
23239
23240 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
23241 {
23242 if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
23243 {
23244 if (start_reg != reg)
23245 saved_size += vfp_emit_fstmd (start_reg,
23246 (reg - start_reg) / 2);
23247 start_reg = reg + 2;
23248 }
23249 }
23250 if (start_reg != reg)
23251 saved_size += vfp_emit_fstmd (start_reg,
23252 (reg - start_reg) / 2);
23253 }
23254 return saved_size;
23255 }
23256
23257
23258 /* Set the Thumb frame pointer from the stack pointer. */
23259
23260 static void
23261 thumb_set_frame_pointer (arm_stack_offsets *offsets)
23262 {
23263 HOST_WIDE_INT amount;
23264 rtx insn, dwarf;
23265
23266 amount = offsets->outgoing_args - offsets->locals_base;
23267 if (amount < 1024)
23268 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23269 stack_pointer_rtx, GEN_INT (amount)));
23270 else
23271 {
23272 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
23273 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
23274 expects the first two operands to be the same. */
23275 if (TARGET_THUMB2)
23276 {
23277 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23278 stack_pointer_rtx,
23279 hard_frame_pointer_rtx));
23280 }
23281 else
23282 {
23283 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23284 hard_frame_pointer_rtx,
23285 stack_pointer_rtx));
23286 }
23287 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
23288 plus_constant (Pmode, stack_pointer_rtx, amount));
23289 RTX_FRAME_RELATED_P (dwarf) = 1;
23290 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23291 }
23292
23293 RTX_FRAME_RELATED_P (insn) = 1;
23294 }
23295
23296 struct scratch_reg {
23297 rtx reg;
23298 bool saved;
23299 };
23300
23301 /* Return a short-lived scratch register for use as a 2nd scratch register on
23302 function entry after the registers are saved in the prologue. This register
23303 must be released by means of release_scratch_register_on_entry. IP is not
23304 considered since it is always used as the 1st scratch register if available.
23305
23306 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23307 mask of live registers. */
23308
23309 static void
23310 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
23311 unsigned long live_regs)
23312 {
23313 int regno = -1;
23314
23315 sr->saved = false;
23316
23317 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
23318 regno = LR_REGNUM;
23319 else
23320 {
23321 unsigned int i;
23322
23323 for (i = 4; i < 11; i++)
23324 if (regno1 != i && (live_regs & (1 << i)) != 0)
23325 {
23326 regno = i;
23327 break;
23328 }
23329
23330 if (regno < 0)
23331 {
23332 /* If IP is used as the 1st scratch register for a nested function,
23333 then either r3 wasn't available or is used to preserve IP. */
23334 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
23335 regno1 = 3;
23336 regno = (regno1 == 3 ? 2 : 3);
23337 sr->saved
23338 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23339 regno);
23340 }
23341 }
23342
23343 sr->reg = gen_rtx_REG (SImode, regno);
23344 if (sr->saved)
23345 {
23346 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23347 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23348 rtx x = gen_rtx_SET (stack_pointer_rtx,
23349 plus_constant (Pmode, stack_pointer_rtx, -4));
23350 RTX_FRAME_RELATED_P (insn) = 1;
23351 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23352 }
23353 }
23354
23355 /* Release a scratch register obtained from the preceding function. */
23356
23357 static void
23358 release_scratch_register_on_entry (struct scratch_reg *sr)
23359 {
23360 if (sr->saved)
23361 {
23362 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23363 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23364 rtx x = gen_rtx_SET (stack_pointer_rtx,
23365 plus_constant (Pmode, stack_pointer_rtx, 4));
23366 RTX_FRAME_RELATED_P (insn) = 1;
23367 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23368 }
23369 }
23370
23371 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23372
23373 #if PROBE_INTERVAL > 4096
23374 #error Cannot use indexed addressing mode for stack probing
23375 #endif
23376
23377 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23378 inclusive. These are offsets from the current stack pointer. REGNO1
23379 is the index number of the 1st scratch register and LIVE_REGS is the
23380 mask of live registers. */
23381
23382 static void
23383 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23384 unsigned int regno1, unsigned long live_regs)
23385 {
23386 rtx reg1 = gen_rtx_REG (Pmode, regno1);
23387
23388 /* See if we have a constant small number of probes to generate. If so,
23389 that's the easy case. */
23390 if (size <= PROBE_INTERVAL)
23391 {
23392 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23393 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23394 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23395 }
23396
23397 /* The run-time loop is made up of 10 insns in the generic case while the
23398 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23399 else if (size <= 5 * PROBE_INTERVAL)
23400 {
23401 HOST_WIDE_INT i, rem;
23402
23403 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23404 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23405 emit_stack_probe (reg1);
23406
23407 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23408 it exceeds SIZE. If only two probes are needed, this will not
23409 generate any code. Then probe at FIRST + SIZE. */
23410 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23411 {
23412 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23413 emit_stack_probe (reg1);
23414 }
23415
23416 rem = size - (i - PROBE_INTERVAL);
23417 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23418 {
23419 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23420 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23421 }
23422 else
23423 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23424 }
23425
23426 /* Otherwise, do the same as above, but in a loop. Note that we must be
23427 extra careful with variables wrapping around because we might be at
23428 the very top (or the very bottom) of the address space and we have
23429 to be able to handle this case properly; in particular, we use an
23430 equality test for the loop condition. */
23431 else
23432 {
23433 HOST_WIDE_INT rounded_size;
23434 struct scratch_reg sr;
23435
23436 get_scratch_register_on_entry (&sr, regno1, live_regs);
23437
23438 emit_move_insn (reg1, GEN_INT (first));
23439
23440
23441 /* Step 1: round SIZE to the previous multiple of the interval. */
23442
23443 rounded_size = size & -PROBE_INTERVAL;
23444 emit_move_insn (sr.reg, GEN_INT (rounded_size));
23445
23446
23447 /* Step 2: compute initial and final value of the loop counter. */
23448
23449 /* TEST_ADDR = SP + FIRST. */
23450 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23451
23452 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23453 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23454
23455
23456 /* Step 3: the loop
23457
23458 do
23459 {
23460 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23461 probe at TEST_ADDR
23462 }
23463 while (TEST_ADDR != LAST_ADDR)
23464
23465 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23466 until it is equal to ROUNDED_SIZE. */
23467
23468 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23469
23470
23471 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23472 that SIZE is equal to ROUNDED_SIZE. */
23473
23474 if (size != rounded_size)
23475 {
23476 HOST_WIDE_INT rem = size - rounded_size;
23477
23478 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23479 {
23480 emit_set_insn (sr.reg,
23481 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23482 emit_stack_probe (plus_constant (Pmode, sr.reg,
23483 PROBE_INTERVAL - rem));
23484 }
23485 else
23486 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23487 }
23488
23489 release_scratch_register_on_entry (&sr);
23490 }
23491
23492 /* Make sure nothing is scheduled before we are done. */
23493 emit_insn (gen_blockage ());
23494 }
23495
23496 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23497 absolute addresses. */
23498
23499 const char *
23500 output_probe_stack_range (rtx reg1, rtx reg2)
23501 {
23502 static int labelno = 0;
23503 char loop_lab[32];
23504 rtx xops[2];
23505
23506 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23507
23508 /* Loop. */
23509 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23510
23511 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23512 xops[0] = reg1;
23513 xops[1] = GEN_INT (PROBE_INTERVAL);
23514 output_asm_insn ("sub\t%0, %0, %1", xops);
23515
23516 /* Probe at TEST_ADDR. */
23517 output_asm_insn ("str\tr0, [%0, #0]", xops);
23518
23519 /* Test if TEST_ADDR == LAST_ADDR. */
23520 xops[1] = reg2;
23521 output_asm_insn ("cmp\t%0, %1", xops);
23522
23523 /* Branch. */
23524 fputs ("\tbne\t", asm_out_file);
23525 assemble_name_raw (asm_out_file, loop_lab);
23526 fputc ('\n', asm_out_file);
23527
23528 return "";
23529 }
23530
23531 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23532 function. */
23533 void
23534 arm_expand_prologue (void)
23535 {
23536 rtx amount;
23537 rtx insn;
23538 rtx ip_rtx;
23539 unsigned long live_regs_mask;
23540 unsigned long func_type;
23541 int fp_offset = 0;
23542 int saved_pretend_args = 0;
23543 int saved_regs = 0;
23544 unsigned HOST_WIDE_INT args_to_push;
23545 HOST_WIDE_INT size;
23546 arm_stack_offsets *offsets;
23547 bool clobber_ip;
23548
23549 func_type = arm_current_func_type ();
23550
23551 /* Naked functions don't have prologues. */
23552 if (IS_NAKED (func_type))
23553 {
23554 if (flag_stack_usage_info)
23555 current_function_static_stack_size = 0;
23556 return;
23557 }
23558
23559 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23560 args_to_push = crtl->args.pretend_args_size;
23561
23562 /* Compute which register we will have to save onto the stack. */
23563 offsets = arm_get_frame_offsets ();
23564 live_regs_mask = offsets->saved_regs_mask;
23565
23566 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23567
23568 if (IS_STACKALIGN (func_type))
23569 {
23570 rtx r0, r1;
23571
23572 /* Handle a word-aligned stack pointer. We generate the following:
23573
23574 mov r0, sp
23575 bic r1, r0, #7
23576 mov sp, r1
23577 <save and restore r0 in normal prologue/epilogue>
23578 mov sp, r0
23579 bx lr
23580
23581 The unwinder doesn't need to know about the stack realignment.
23582 Just tell it we saved SP in r0. */
23583 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23584
23585 r0 = gen_rtx_REG (SImode, R0_REGNUM);
23586 r1 = gen_rtx_REG (SImode, R1_REGNUM);
23587
23588 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23589 RTX_FRAME_RELATED_P (insn) = 1;
23590 add_reg_note (insn, REG_CFA_REGISTER, NULL);
23591
23592 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23593
23594 /* ??? The CFA changes here, which may cause GDB to conclude that it
23595 has entered a different function. That said, the unwind info is
23596 correct, individually, before and after this instruction because
23597 we've described the save of SP, which will override the default
23598 handling of SP as restoring from the CFA. */
23599 emit_insn (gen_movsi (stack_pointer_rtx, r1));
23600 }
23601
23602 /* Let's compute the static_chain_stack_bytes required and store it. Right
23603 now the value must be -1 as stored by arm_init_machine_status (). */
23604 cfun->machine->static_chain_stack_bytes
23605 = arm_compute_static_chain_stack_bytes ();
23606
23607 /* The static chain register is the same as the IP register. If it is
23608 clobbered when creating the frame, we need to save and restore it. */
23609 clobber_ip = (IS_NESTED (func_type)
23610 && (((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23611 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23612 || flag_stack_clash_protection)
23613 && !df_regs_ever_live_p (LR_REGNUM)
23614 && arm_r3_live_at_start_p ()))
23615 || arm_current_function_pac_enabled_p ()));
23616
23617 /* Find somewhere to store IP whilst the frame is being created.
23618 We try the following places in order:
23619
23620 1. The last argument register r3 if it is available.
23621 2. A slot on the stack above the frame if there are no
23622 arguments to push onto the stack.
23623 3. Register r3 again, after pushing the argument registers
23624 onto the stack, if this is a varargs function.
23625 4. The last slot on the stack created for the arguments to
23626 push, if this isn't a varargs function.
23627
23628 Note - we only need to tell the dwarf2 backend about the SP
23629 adjustment in the second variant; the static chain register
23630 doesn't need to be unwound, as it doesn't contain a value
23631 inherited from the caller. */
23632 if (clobber_ip)
23633 {
23634 if (!arm_r3_live_at_start_p ())
23635 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23636 else if (args_to_push == 0)
23637 {
23638 rtx addr, dwarf;
23639
23640 saved_regs += 4;
23641
23642 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23643 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23644 fp_offset = 4;
23645
23646 /* Just tell the dwarf backend that we adjusted SP. */
23647 dwarf = gen_rtx_SET (stack_pointer_rtx,
23648 plus_constant (Pmode, stack_pointer_rtx,
23649 -fp_offset));
23650 RTX_FRAME_RELATED_P (insn) = 1;
23651 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23652 if (arm_current_function_pac_enabled_p ())
23653 cfun->machine->pacspval_needed = 1;
23654 }
23655 else
23656 {
23657 /* Store the args on the stack. */
23658 if (cfun->machine->uses_anonymous_args)
23659 {
23660 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23661 (0xf0 >> (args_to_push / 4)) & 0xf);
23662 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23663 saved_pretend_args = 1;
23664 }
23665 else
23666 {
23667 rtx addr, dwarf;
23668
23669 if (args_to_push == 4)
23670 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23671 else
23672 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23673 plus_constant (Pmode,
23674 stack_pointer_rtx,
23675 -args_to_push));
23676
23677 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23678
23679 /* Just tell the dwarf backend that we adjusted SP. */
23680 dwarf = gen_rtx_SET (stack_pointer_rtx,
23681 plus_constant (Pmode, stack_pointer_rtx,
23682 -args_to_push));
23683 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23684 }
23685
23686 RTX_FRAME_RELATED_P (insn) = 1;
23687 fp_offset = args_to_push;
23688 args_to_push = 0;
23689 if (arm_current_function_pac_enabled_p ())
23690 cfun->machine->pacspval_needed = 1;
23691 }
23692 }
23693
23694 if (arm_current_function_pac_enabled_p ())
23695 {
23696 /* If IP was clobbered we only emit a PAC instruction as the BTI
23697 one will be added before the push of the clobbered IP (if
23698 necessary) by the bti pass. */
23699 if (aarch_bti_enabled () && !clobber_ip)
23700 insn = emit_insn (gen_pacbti_nop ());
23701 else
23702 insn = emit_insn (gen_pac_nop ());
23703
23704 rtx dwarf = gen_rtx_SET (ip_rtx, gen_rtx_REG (SImode, RA_AUTH_CODE));
23705 RTX_FRAME_RELATED_P (insn) = 1;
23706 add_reg_note (insn, REG_CFA_REGISTER, dwarf);
23707 }
23708
23709 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23710 {
23711 if (IS_INTERRUPT (func_type))
23712 {
23713 /* Interrupt functions must not corrupt any registers.
23714 Creating a frame pointer however, corrupts the IP
23715 register, so we must push it first. */
23716 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23717
23718 /* Do not set RTX_FRAME_RELATED_P on this insn.
23719 The dwarf stack unwinding code only wants to see one
23720 stack decrement per function, and this is not it. If
23721 this instruction is labeled as being part of the frame
23722 creation sequence then dwarf2out_frame_debug_expr will
23723 die when it encounters the assignment of IP to FP
23724 later on, since the use of SP here establishes SP as
23725 the CFA register and not IP.
23726
23727 Anyway this instruction is not really part of the stack
23728 frame creation although it is part of the prologue. */
23729 }
23730
23731 insn = emit_set_insn (ip_rtx,
23732 plus_constant (Pmode, stack_pointer_rtx,
23733 fp_offset));
23734 RTX_FRAME_RELATED_P (insn) = 1;
23735 }
23736
23737 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23738 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23739 {
23740 saved_regs += 4;
23741 insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23742 GEN_INT (FPCXTNS_ENUM)));
23743 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23744 plus_constant (Pmode, stack_pointer_rtx, -4));
23745 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23746 RTX_FRAME_RELATED_P (insn) = 1;
23747 }
23748
23749 if (args_to_push)
23750 {
23751 /* Push the argument registers, or reserve space for them. */
23752 if (cfun->machine->uses_anonymous_args)
23753 insn = emit_multi_reg_push
23754 ((0xf0 >> (args_to_push / 4)) & 0xf,
23755 (0xf0 >> (args_to_push / 4)) & 0xf);
23756 else
23757 insn = emit_insn
23758 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23759 GEN_INT (- args_to_push)));
23760 RTX_FRAME_RELATED_P (insn) = 1;
23761 }
23762
23763 /* If this is an interrupt service routine, and the link register
23764 is going to be pushed, and we're not generating extra
23765 push of IP (needed when frame is needed and frame layout if apcs),
23766 subtracting four from LR now will mean that the function return
23767 can be done with a single instruction. */
23768 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23769 && (live_regs_mask & (1 << LR_REGNUM)) != 0
23770 && !(frame_pointer_needed && TARGET_APCS_FRAME)
23771 && TARGET_ARM)
23772 {
23773 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23774
23775 emit_set_insn (lr, plus_constant (SImode, lr, -4));
23776 }
23777
23778 if (live_regs_mask)
23779 {
23780 unsigned long dwarf_regs_mask = live_regs_mask;
23781
23782 saved_regs += bit_count (live_regs_mask) * 4;
23783 if (optimize_size && !frame_pointer_needed
23784 && saved_regs == offsets->saved_regs - offsets->saved_args)
23785 {
23786 /* If no coprocessor registers are being pushed and we don't have
23787 to worry about a frame pointer then push extra registers to
23788 create the stack frame. This is done in a way that does not
23789 alter the frame layout, so is independent of the epilogue. */
23790 int n;
23791 int frame;
23792 n = 0;
23793 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23794 n++;
23795 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23796 if (frame && n * 4 >= frame)
23797 {
23798 n = frame / 4;
23799 live_regs_mask |= (1 << n) - 1;
23800 saved_regs += frame;
23801 }
23802 }
23803
23804 if (TARGET_LDRD
23805 && current_tune->prefer_ldrd_strd
23806 && !optimize_function_for_size_p (cfun))
23807 {
23808 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23809 if (TARGET_THUMB2)
23810 thumb2_emit_strd_push (live_regs_mask);
23811 else if (TARGET_ARM
23812 && !TARGET_APCS_FRAME
23813 && !IS_INTERRUPT (func_type))
23814 arm_emit_strd_push (live_regs_mask);
23815 else
23816 {
23817 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23818 RTX_FRAME_RELATED_P (insn) = 1;
23819 }
23820 }
23821 else
23822 {
23823 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23824 RTX_FRAME_RELATED_P (insn) = 1;
23825 }
23826 }
23827
23828 if (! IS_VOLATILE (func_type))
23829 saved_regs += arm_save_coproc_regs ();
23830
23831 if (frame_pointer_needed && TARGET_ARM)
23832 {
23833 /* Create the new frame pointer. */
23834 if (TARGET_APCS_FRAME)
23835 {
23836 insn = GEN_INT (-(4 + args_to_push + fp_offset));
23837 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23838 RTX_FRAME_RELATED_P (insn) = 1;
23839 }
23840 else
23841 {
23842 insn = GEN_INT (saved_regs - (4 + fp_offset));
23843 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23844 stack_pointer_rtx, insn));
23845 RTX_FRAME_RELATED_P (insn) = 1;
23846 }
23847 }
23848
23849 size = offsets->outgoing_args - offsets->saved_args;
23850 if (flag_stack_usage_info)
23851 current_function_static_stack_size = size;
23852
23853 /* If this isn't an interrupt service routine and we have a frame, then do
23854 stack checking. We use IP as the first scratch register, except for the
23855 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
23856 if (!IS_INTERRUPT (func_type)
23857 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23858 || flag_stack_clash_protection))
23859 {
23860 unsigned int regno;
23861
23862 if (!IS_NESTED (func_type) || clobber_ip)
23863 regno = IP_REGNUM;
23864 else if (df_regs_ever_live_p (LR_REGNUM))
23865 regno = LR_REGNUM;
23866 else
23867 regno = 3;
23868
23869 if (crtl->is_leaf && !cfun->calls_alloca)
23870 {
23871 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23872 arm_emit_probe_stack_range (get_stack_check_protect (),
23873 size - get_stack_check_protect (),
23874 regno, live_regs_mask);
23875 }
23876 else if (size > 0)
23877 arm_emit_probe_stack_range (get_stack_check_protect (), size,
23878 regno, live_regs_mask);
23879 }
23880
23881 /* Recover the static chain register. */
23882 if (clobber_ip)
23883 {
23884 if (!arm_r3_live_at_start_p () || saved_pretend_args)
23885 insn = gen_rtx_REG (SImode, 3);
23886 else
23887 {
23888 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23889 insn = gen_frame_mem (SImode, insn);
23890 }
23891 emit_set_insn (ip_rtx, insn);
23892 emit_insn (gen_force_register_use (ip_rtx));
23893 }
23894
23895 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23896 {
23897 /* This add can produce multiple insns for a large constant, so we
23898 need to get tricky. */
23899 rtx_insn *last = get_last_insn ();
23900
23901 amount = GEN_INT (offsets->saved_args + saved_regs
23902 - offsets->outgoing_args);
23903
23904 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23905 amount));
23906 do
23907 {
23908 last = last ? NEXT_INSN (last) : get_insns ();
23909 RTX_FRAME_RELATED_P (last) = 1;
23910 }
23911 while (last != insn);
23912
23913 /* If the frame pointer is needed, emit a special barrier that
23914 will prevent the scheduler from moving stores to the frame
23915 before the stack adjustment. */
23916 if (frame_pointer_needed)
23917 emit_insn (gen_stack_tie (stack_pointer_rtx,
23918 hard_frame_pointer_rtx));
23919 }
23920
23921
23922 if (frame_pointer_needed && TARGET_THUMB2)
23923 thumb_set_frame_pointer (offsets);
23924
23925 if (flag_pic && arm_pic_register != INVALID_REGNUM)
23926 {
23927 unsigned long mask;
23928
23929 mask = live_regs_mask;
23930 mask &= THUMB2_WORK_REGS;
23931 if (!IS_NESTED (func_type))
23932 mask |= (1 << IP_REGNUM);
23933 arm_load_pic_register (mask, NULL_RTX);
23934 }
23935
23936 /* If we are profiling, make sure no instructions are scheduled before
23937 the call to mcount. Similarly if the user has requested no
23938 scheduling in the prolog. Similarly if we want non-call exceptions
23939 using the EABI unwinder, to prevent faulting instructions from being
23940 swapped with a stack adjustment. */
23941 if (crtl->profile || !TARGET_SCHED_PROLOG
23942 || (arm_except_unwind_info (&global_options) == UI_TARGET
23943 && cfun->can_throw_non_call_exceptions))
23944 emit_insn (gen_blockage ());
23945
23946 /* If the link register is being kept alive, with the return address in it,
23947 then make sure that it does not get reused by the ce2 pass. */
23948 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23949 cfun->machine->lr_save_eliminated = 1;
23950 }
23951 \f
23952 /* Print condition code to STREAM. Helper function for arm_print_operand. */
23953 static void
23954 arm_print_condition (FILE *stream)
23955 {
23956 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
23957 {
23958 /* Branch conversion is not implemented for Thumb-2. */
23959 if (TARGET_THUMB)
23960 {
23961 output_operand_lossage ("predicated Thumb instruction");
23962 return;
23963 }
23964 if (current_insn_predicate != NULL)
23965 {
23966 output_operand_lossage
23967 ("predicated instruction in conditional sequence");
23968 return;
23969 }
23970
23971 fputs (arm_condition_codes[arm_current_cc], stream);
23972 }
23973 else if (current_insn_predicate)
23974 {
23975 enum arm_cond_code code;
23976
23977 if (TARGET_THUMB1)
23978 {
23979 output_operand_lossage ("predicated Thumb instruction");
23980 return;
23981 }
23982
23983 code = get_arm_condition_code (current_insn_predicate);
23984 fputs (arm_condition_codes[code], stream);
23985 }
23986 }
23987
23988
23989 /* Globally reserved letters: acln
23990 Puncutation letters currently used: @_|?().!#
23991 Lower case letters currently used: bcdefhimpqtvwxyz
23992 Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
23993 Letters previously used, but now deprecated/obsolete: sWXYZ.
23994
23995 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23996
23997 If CODE is 'd', then the X is a condition operand and the instruction
23998 should only be executed if the condition is true.
23999 if CODE is 'D', then the X is a condition operand and the instruction
24000 should only be executed if the condition is false: however, if the mode
24001 of the comparison is CCFPEmode, then always execute the instruction -- we
24002 do this because in these circumstances !GE does not necessarily imply LT;
24003 in these cases the instruction pattern will take care to make sure that
24004 an instruction containing %d will follow, thereby undoing the effects of
24005 doing this instruction unconditionally.
24006 If CODE is 'N' then X is a floating point operand that must be negated
24007 before output.
24008 If CODE is 'B' then output a bitwise inverted value of X (a const int).
24009 If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
24010 If CODE is 'V', then the operand must be a CONST_INT representing
24011 the bits to preserve in the modified register (Rd) of a BFI or BFC
24012 instruction: print out both the width and lsb (shift) fields. */
24013 static void
24014 arm_print_operand (FILE *stream, rtx x, int code)
24015 {
24016 switch (code)
24017 {
24018 case '@':
24019 fputs (ASM_COMMENT_START, stream);
24020 return;
24021
24022 case '_':
24023 fputs (user_label_prefix, stream);
24024 return;
24025
24026 case '|':
24027 fputs (REGISTER_PREFIX, stream);
24028 return;
24029
24030 case '?':
24031 arm_print_condition (stream);
24032 return;
24033
24034 case '.':
24035 /* The current condition code for a condition code setting instruction.
24036 Preceded by 's' in unified syntax, otherwise followed by 's'. */
24037 fputc('s', stream);
24038 arm_print_condition (stream);
24039 return;
24040
24041 case '!':
24042 /* If the instruction is conditionally executed then print
24043 the current condition code, otherwise print 's'. */
24044 gcc_assert (TARGET_THUMB2);
24045 if (current_insn_predicate)
24046 arm_print_condition (stream);
24047 else
24048 fputc('s', stream);
24049 break;
24050
24051 /* %# is a "break" sequence. It doesn't output anything, but is used to
24052 separate e.g. operand numbers from following text, if that text consists
24053 of further digits which we don't want to be part of the operand
24054 number. */
24055 case '#':
24056 return;
24057
24058 case 'N':
24059 {
24060 REAL_VALUE_TYPE r;
24061 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
24062 fprintf (stream, "%s", fp_const_from_val (&r));
24063 }
24064 return;
24065
24066 /* An integer or symbol address without a preceding # sign. */
24067 case 'c':
24068 switch (GET_CODE (x))
24069 {
24070 case CONST_INT:
24071 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
24072 break;
24073
24074 case SYMBOL_REF:
24075 output_addr_const (stream, x);
24076 break;
24077
24078 case CONST:
24079 if (GET_CODE (XEXP (x, 0)) == PLUS
24080 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
24081 {
24082 output_addr_const (stream, x);
24083 break;
24084 }
24085 /* Fall through. */
24086
24087 default:
24088 output_operand_lossage ("Unsupported operand for code '%c'", code);
24089 }
24090 return;
24091
24092 /* An integer that we want to print in HEX. */
24093 case 'x':
24094 switch (GET_CODE (x))
24095 {
24096 case CONST_INT:
24097 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
24098 break;
24099
24100 default:
24101 output_operand_lossage ("Unsupported operand for code '%c'", code);
24102 }
24103 return;
24104
24105 case 'B':
24106 if (CONST_INT_P (x))
24107 {
24108 HOST_WIDE_INT val;
24109 val = ARM_SIGN_EXTEND (~INTVAL (x));
24110 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
24111 }
24112 else
24113 {
24114 putc ('~', stream);
24115 output_addr_const (stream, x);
24116 }
24117 return;
24118
24119 case 'b':
24120 /* Print the log2 of a CONST_INT. */
24121 {
24122 HOST_WIDE_INT val;
24123
24124 if (!CONST_INT_P (x)
24125 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
24126 output_operand_lossage ("Unsupported operand for code '%c'", code);
24127 else
24128 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24129 }
24130 return;
24131
24132 case 'L':
24133 /* The low 16 bits of an immediate constant. */
24134 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
24135 return;
24136
24137 case 'i':
24138 fprintf (stream, "%s", arithmetic_instr (x, 1));
24139 return;
24140
24141 case 'I':
24142 fprintf (stream, "%s", arithmetic_instr (x, 0));
24143 return;
24144
24145 case 'S':
24146 {
24147 HOST_WIDE_INT val;
24148 const char *shift;
24149
24150 shift = shift_op (x, &val);
24151
24152 if (shift)
24153 {
24154 fprintf (stream, ", %s ", shift);
24155 if (val == -1)
24156 arm_print_operand (stream, XEXP (x, 1), 0);
24157 else
24158 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24159 }
24160 }
24161 return;
24162
24163 /* An explanation of the 'Q', 'R' and 'H' register operands:
24164
24165 In a pair of registers containing a DI or DF value the 'Q'
24166 operand returns the register number of the register containing
24167 the least significant part of the value. The 'R' operand returns
24168 the register number of the register containing the most
24169 significant part of the value.
24170
24171 The 'H' operand returns the higher of the two register numbers.
24172 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24173 same as the 'Q' operand, since the most significant part of the
24174 value is held in the lower number register. The reverse is true
24175 on systems where WORDS_BIG_ENDIAN is false.
24176
24177 The purpose of these operands is to distinguish between cases
24178 where the endian-ness of the values is important (for example
24179 when they are added together), and cases where the endian-ness
24180 is irrelevant, but the order of register operations is important.
24181 For example when loading a value from memory into a register
24182 pair, the endian-ness does not matter. Provided that the value
24183 from the lower memory address is put into the lower numbered
24184 register, and the value from the higher address is put into the
24185 higher numbered register, the load will work regardless of whether
24186 the value being loaded is big-wordian or little-wordian. The
24187 order of the two register loads can matter however, if the address
24188 of the memory location is actually held in one of the registers
24189 being overwritten by the load.
24190
24191 The 'Q' and 'R' constraints are also available for 64-bit
24192 constants. */
24193 case 'Q':
24194 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24195 {
24196 rtx part = gen_lowpart (SImode, x);
24197 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24198 return;
24199 }
24200
24201 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24202 {
24203 output_operand_lossage ("invalid operand for code '%c'", code);
24204 return;
24205 }
24206
24207 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
24208 return;
24209
24210 case 'R':
24211 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24212 {
24213 machine_mode mode = GET_MODE (x);
24214 rtx part;
24215
24216 if (mode == VOIDmode)
24217 mode = DImode;
24218 part = gen_highpart_mode (SImode, mode, x);
24219 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24220 return;
24221 }
24222
24223 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24224 {
24225 output_operand_lossage ("invalid operand for code '%c'", code);
24226 return;
24227 }
24228
24229 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
24230 return;
24231
24232 case 'H':
24233 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24234 {
24235 output_operand_lossage ("invalid operand for code '%c'", code);
24236 return;
24237 }
24238
24239 asm_fprintf (stream, "%r", REGNO (x) + 1);
24240 return;
24241
24242 case 'J':
24243 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24244 {
24245 output_operand_lossage ("invalid operand for code '%c'", code);
24246 return;
24247 }
24248
24249 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
24250 return;
24251
24252 case 'K':
24253 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24254 {
24255 output_operand_lossage ("invalid operand for code '%c'", code);
24256 return;
24257 }
24258
24259 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
24260 return;
24261
24262 case 'm':
24263 asm_fprintf (stream, "%r",
24264 REG_P (XEXP (x, 0))
24265 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
24266 return;
24267
24268 case 'M':
24269 asm_fprintf (stream, "{%r-%r}",
24270 REGNO (x),
24271 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
24272 return;
24273
24274 /* Like 'M', but writing doubleword vector registers, for use by Neon
24275 insns. */
24276 case 'h':
24277 {
24278 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
24279 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
24280 if (numregs == 1)
24281 asm_fprintf (stream, "{d%d}", regno);
24282 else
24283 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
24284 }
24285 return;
24286
24287 case 'd':
24288 /* CONST_TRUE_RTX means always -- that's the default. */
24289 if (x == const_true_rtx)
24290 return;
24291
24292 if (!COMPARISON_P (x))
24293 {
24294 output_operand_lossage ("invalid operand for code '%c'", code);
24295 return;
24296 }
24297
24298 fputs (arm_condition_codes[get_arm_condition_code (x)],
24299 stream);
24300 return;
24301
24302 case 'D':
24303 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
24304 want to do that. */
24305 if (x == const_true_rtx)
24306 {
24307 output_operand_lossage ("instruction never executed");
24308 return;
24309 }
24310 if (!COMPARISON_P (x))
24311 {
24312 output_operand_lossage ("invalid operand for code '%c'", code);
24313 return;
24314 }
24315
24316 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
24317 (get_arm_condition_code (x))],
24318 stream);
24319 return;
24320
24321 case 'V':
24322 {
24323 /* Output the LSB (shift) and width for a bitmask instruction
24324 based on a literal mask. The LSB is printed first,
24325 followed by the width.
24326
24327 Eg. For 0b1...1110001, the result is #1, #3. */
24328 if (!CONST_INT_P (x))
24329 {
24330 output_operand_lossage ("invalid operand for code '%c'", code);
24331 return;
24332 }
24333
24334 unsigned HOST_WIDE_INT val
24335 = ~UINTVAL (x) & HOST_WIDE_INT_UC (0xffffffff);
24336 int lsb = exact_log2 (val & -val);
24337 asm_fprintf (stream, "#%d, #%d", lsb,
24338 (exact_log2 (val + (val & -val)) - lsb));
24339 }
24340 return;
24341
24342 case 's':
24343 case 'W':
24344 case 'X':
24345 case 'Y':
24346 case 'Z':
24347 /* Former Maverick support, removed after GCC-4.7. */
24348 output_operand_lossage ("obsolete Maverick format code '%c'", code);
24349 return;
24350
24351 case 'U':
24352 if (!REG_P (x)
24353 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
24354 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
24355 /* Bad value for wCG register number. */
24356 {
24357 output_operand_lossage ("invalid operand for code '%c'", code);
24358 return;
24359 }
24360
24361 else
24362 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
24363 return;
24364
24365 /* Print an iWMMXt control register name. */
24366 case 'w':
24367 if (!CONST_INT_P (x)
24368 || INTVAL (x) < 0
24369 || INTVAL (x) >= 16)
24370 /* Bad value for wC register number. */
24371 {
24372 output_operand_lossage ("invalid operand for code '%c'", code);
24373 return;
24374 }
24375
24376 else
24377 {
24378 static const char * wc_reg_names [16] =
24379 {
24380 "wCID", "wCon", "wCSSF", "wCASF",
24381 "wC4", "wC5", "wC6", "wC7",
24382 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24383 "wC12", "wC13", "wC14", "wC15"
24384 };
24385
24386 fputs (wc_reg_names [INTVAL (x)], stream);
24387 }
24388 return;
24389
24390 /* Print the high single-precision register of a VFP double-precision
24391 register. */
24392 case 'p':
24393 {
24394 machine_mode mode = GET_MODE (x);
24395 int regno;
24396
24397 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24398 {
24399 output_operand_lossage ("invalid operand for code '%c'", code);
24400 return;
24401 }
24402
24403 regno = REGNO (x);
24404 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24405 {
24406 output_operand_lossage ("invalid operand for code '%c'", code);
24407 return;
24408 }
24409
24410 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24411 }
24412 return;
24413
24414 /* Print a VFP/Neon double precision or quad precision register name. */
24415 case 'P':
24416 case 'q':
24417 {
24418 machine_mode mode = GET_MODE (x);
24419 int is_quad = (code == 'q');
24420 int regno;
24421
24422 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24423 {
24424 output_operand_lossage ("invalid operand for code '%c'", code);
24425 return;
24426 }
24427
24428 if (!REG_P (x)
24429 || !IS_VFP_REGNUM (REGNO (x)))
24430 {
24431 output_operand_lossage ("invalid operand for code '%c'", code);
24432 return;
24433 }
24434
24435 regno = REGNO (x);
24436 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24437 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24438 {
24439 output_operand_lossage ("invalid operand for code '%c'", code);
24440 return;
24441 }
24442
24443 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24444 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24445 }
24446 return;
24447
24448 /* These two codes print the low/high doubleword register of a Neon quad
24449 register, respectively. For pair-structure types, can also print
24450 low/high quadword registers. */
24451 case 'e':
24452 case 'f':
24453 {
24454 machine_mode mode = GET_MODE (x);
24455 int regno;
24456
24457 if ((GET_MODE_SIZE (mode) != 16
24458 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24459 {
24460 output_operand_lossage ("invalid operand for code '%c'", code);
24461 return;
24462 }
24463
24464 regno = REGNO (x);
24465 if (!NEON_REGNO_OK_FOR_QUAD (regno))
24466 {
24467 output_operand_lossage ("invalid operand for code '%c'", code);
24468 return;
24469 }
24470
24471 if (GET_MODE_SIZE (mode) == 16)
24472 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24473 + (code == 'f' ? 1 : 0));
24474 else
24475 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24476 + (code == 'f' ? 1 : 0));
24477 }
24478 return;
24479
24480 /* Print a VFPv3 floating-point constant, represented as an integer
24481 index. */
24482 case 'G':
24483 {
24484 int index = vfp3_const_double_index (x);
24485 gcc_assert (index != -1);
24486 fprintf (stream, "%d", index);
24487 }
24488 return;
24489
24490 /* Print bits representing opcode features for Neon.
24491
24492 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24493 and polynomials as unsigned.
24494
24495 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24496
24497 Bit 2 is 1 for rounding functions, 0 otherwise. */
24498
24499 /* Identify the type as 's', 'u', 'p' or 'f'. */
24500 case 'T':
24501 {
24502 HOST_WIDE_INT bits = INTVAL (x);
24503 fputc ("uspf"[bits & 3], stream);
24504 }
24505 return;
24506
24507 /* Likewise, but signed and unsigned integers are both 'i'. */
24508 case 'F':
24509 {
24510 HOST_WIDE_INT bits = INTVAL (x);
24511 fputc ("iipf"[bits & 3], stream);
24512 }
24513 return;
24514
24515 /* As for 'T', but emit 'u' instead of 'p'. */
24516 case 't':
24517 {
24518 HOST_WIDE_INT bits = INTVAL (x);
24519 fputc ("usuf"[bits & 3], stream);
24520 }
24521 return;
24522
24523 /* Bit 2: rounding (vs none). */
24524 case 'O':
24525 {
24526 HOST_WIDE_INT bits = INTVAL (x);
24527 fputs ((bits & 4) != 0 ? "r" : "", stream);
24528 }
24529 return;
24530
24531 /* Memory operand for vld1/vst1 instruction. */
24532 case 'A':
24533 {
24534 rtx addr;
24535 bool postinc = FALSE;
24536 rtx postinc_reg = NULL;
24537 unsigned align, memsize, align_bits;
24538
24539 gcc_assert (MEM_P (x));
24540 addr = XEXP (x, 0);
24541 if (GET_CODE (addr) == POST_INC)
24542 {
24543 postinc = 1;
24544 addr = XEXP (addr, 0);
24545 }
24546 if (GET_CODE (addr) == POST_MODIFY)
24547 {
24548 postinc_reg = XEXP( XEXP (addr, 1), 1);
24549 addr = XEXP (addr, 0);
24550 }
24551 asm_fprintf (stream, "[%r", REGNO (addr));
24552
24553 /* We know the alignment of this access, so we can emit a hint in the
24554 instruction (for some alignments) as an aid to the memory subsystem
24555 of the target. */
24556 align = MEM_ALIGN (x) >> 3;
24557 memsize = MEM_SIZE (x);
24558
24559 /* Only certain alignment specifiers are supported by the hardware. */
24560 if (memsize == 32 && (align % 32) == 0)
24561 align_bits = 256;
24562 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24563 align_bits = 128;
24564 else if (memsize >= 8 && (align % 8) == 0)
24565 align_bits = 64;
24566 else
24567 align_bits = 0;
24568
24569 if (align_bits != 0)
24570 asm_fprintf (stream, ":%d", align_bits);
24571
24572 asm_fprintf (stream, "]");
24573
24574 if (postinc)
24575 fputs("!", stream);
24576 if (postinc_reg)
24577 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24578 }
24579 return;
24580
24581 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24582 rtx_code the memory operands output looks like following.
24583 1. [Rn], #+/-<imm>
24584 2. [Rn, #+/-<imm>]!
24585 3. [Rn, #+/-<imm>]
24586 4. [Rn]. */
24587 case 'E':
24588 {
24589 rtx addr;
24590 rtx postinc_reg = NULL;
24591 unsigned inc_val = 0;
24592 enum rtx_code code;
24593
24594 gcc_assert (MEM_P (x));
24595 addr = XEXP (x, 0);
24596 code = GET_CODE (addr);
24597 if (code == POST_INC || code == POST_DEC || code == PRE_INC
24598 || code == PRE_DEC)
24599 {
24600 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24601 inc_val = GET_MODE_SIZE (GET_MODE (x));
24602 if (code == POST_INC || code == POST_DEC)
24603 asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24604 ? "": "-", inc_val);
24605 else
24606 asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24607 ? "": "-", inc_val);
24608 }
24609 else if (code == POST_MODIFY || code == PRE_MODIFY)
24610 {
24611 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24612 postinc_reg = XEXP (XEXP (addr, 1), 1);
24613 if (postinc_reg && CONST_INT_P (postinc_reg))
24614 {
24615 if (code == POST_MODIFY)
24616 asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24617 else
24618 asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24619 }
24620 }
24621 else if (code == PLUS)
24622 {
24623 rtx base = XEXP (addr, 0);
24624 rtx index = XEXP (addr, 1);
24625
24626 gcc_assert (REG_P (base) && CONST_INT_P (index));
24627
24628 HOST_WIDE_INT offset = INTVAL (index);
24629 asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24630 }
24631 else
24632 {
24633 gcc_assert (REG_P (addr));
24634 asm_fprintf (stream, "[%r]",REGNO (addr));
24635 }
24636 }
24637 return;
24638
24639 case 'C':
24640 {
24641 rtx addr;
24642
24643 gcc_assert (MEM_P (x));
24644 addr = XEXP (x, 0);
24645 gcc_assert (REG_P (addr));
24646 asm_fprintf (stream, "[%r]", REGNO (addr));
24647 }
24648 return;
24649
24650 /* Translate an S register number into a D register number and element index. */
24651 case 'y':
24652 {
24653 machine_mode mode = GET_MODE (x);
24654 int regno;
24655
24656 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24657 {
24658 output_operand_lossage ("invalid operand for code '%c'", code);
24659 return;
24660 }
24661
24662 regno = REGNO (x);
24663 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24664 {
24665 output_operand_lossage ("invalid operand for code '%c'", code);
24666 return;
24667 }
24668
24669 regno = regno - FIRST_VFP_REGNUM;
24670 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24671 }
24672 return;
24673
24674 case 'v':
24675 gcc_assert (CONST_DOUBLE_P (x));
24676 int result;
24677 result = vfp3_const_double_for_fract_bits (x);
24678 if (result == 0)
24679 result = vfp3_const_double_for_bits (x);
24680 fprintf (stream, "#%d", result);
24681 return;
24682
24683 /* Register specifier for vld1.16/vst1.16. Translate the S register
24684 number into a D register number and element index. */
24685 case 'z':
24686 {
24687 machine_mode mode = GET_MODE (x);
24688 int regno;
24689
24690 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24691 {
24692 output_operand_lossage ("invalid operand for code '%c'", code);
24693 return;
24694 }
24695
24696 regno = REGNO (x);
24697 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24698 {
24699 output_operand_lossage ("invalid operand for code '%c'", code);
24700 return;
24701 }
24702
24703 regno = regno - FIRST_VFP_REGNUM;
24704 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24705 }
24706 return;
24707
24708 default:
24709 if (x == 0)
24710 {
24711 output_operand_lossage ("missing operand");
24712 return;
24713 }
24714
24715 switch (GET_CODE (x))
24716 {
24717 case REG:
24718 asm_fprintf (stream, "%r", REGNO (x));
24719 break;
24720
24721 case MEM:
24722 output_address (GET_MODE (x), XEXP (x, 0));
24723 break;
24724
24725 case CONST_DOUBLE:
24726 {
24727 char fpstr[20];
24728 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24729 sizeof (fpstr), 0, 1);
24730 fprintf (stream, "#%s", fpstr);
24731 }
24732 break;
24733
24734 default:
24735 gcc_assert (GET_CODE (x) != NEG);
24736 fputc ('#', stream);
24737 if (GET_CODE (x) == HIGH)
24738 {
24739 fputs (":lower16:", stream);
24740 x = XEXP (x, 0);
24741 }
24742
24743 output_addr_const (stream, x);
24744 break;
24745 }
24746 }
24747 }
24748 \f
24749 /* Target hook for printing a memory address. */
24750 static void
24751 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24752 {
24753 if (TARGET_32BIT)
24754 {
24755 int is_minus = GET_CODE (x) == MINUS;
24756
24757 if (REG_P (x))
24758 asm_fprintf (stream, "[%r]", REGNO (x));
24759 else if (GET_CODE (x) == PLUS || is_minus)
24760 {
24761 rtx base = XEXP (x, 0);
24762 rtx index = XEXP (x, 1);
24763 HOST_WIDE_INT offset = 0;
24764 if (!REG_P (base)
24765 || (REG_P (index) && REGNO (index) == SP_REGNUM))
24766 {
24767 /* Ensure that BASE is a register. */
24768 /* (one of them must be). */
24769 /* Also ensure the SP is not used as in index register. */
24770 std::swap (base, index);
24771 }
24772 switch (GET_CODE (index))
24773 {
24774 case CONST_INT:
24775 offset = INTVAL (index);
24776 if (is_minus)
24777 offset = -offset;
24778 asm_fprintf (stream, "[%r, #%wd]",
24779 REGNO (base), offset);
24780 break;
24781
24782 case REG:
24783 asm_fprintf (stream, "[%r, %s%r]",
24784 REGNO (base), is_minus ? "-" : "",
24785 REGNO (index));
24786 break;
24787
24788 case MULT:
24789 case ASHIFTRT:
24790 case LSHIFTRT:
24791 case ASHIFT:
24792 case ROTATERT:
24793 {
24794 asm_fprintf (stream, "[%r, %s%r",
24795 REGNO (base), is_minus ? "-" : "",
24796 REGNO (XEXP (index, 0)));
24797 arm_print_operand (stream, index, 'S');
24798 fputs ("]", stream);
24799 break;
24800 }
24801
24802 default:
24803 gcc_unreachable ();
24804 }
24805 }
24806 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24807 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24808 {
24809 gcc_assert (REG_P (XEXP (x, 0)));
24810
24811 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24812 asm_fprintf (stream, "[%r, #%s%d]!",
24813 REGNO (XEXP (x, 0)),
24814 GET_CODE (x) == PRE_DEC ? "-" : "",
24815 GET_MODE_SIZE (mode));
24816 else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24817 asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24818 else
24819 asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24820 GET_CODE (x) == POST_DEC ? "-" : "",
24821 GET_MODE_SIZE (mode));
24822 }
24823 else if (GET_CODE (x) == PRE_MODIFY)
24824 {
24825 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24826 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24827 asm_fprintf (stream, "#%wd]!",
24828 INTVAL (XEXP (XEXP (x, 1), 1)));
24829 else
24830 asm_fprintf (stream, "%r]!",
24831 REGNO (XEXP (XEXP (x, 1), 1)));
24832 }
24833 else if (GET_CODE (x) == POST_MODIFY)
24834 {
24835 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24836 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24837 asm_fprintf (stream, "#%wd",
24838 INTVAL (XEXP (XEXP (x, 1), 1)));
24839 else
24840 asm_fprintf (stream, "%r",
24841 REGNO (XEXP (XEXP (x, 1), 1)));
24842 }
24843 else output_addr_const (stream, x);
24844 }
24845 else
24846 {
24847 if (REG_P (x))
24848 asm_fprintf (stream, "[%r]", REGNO (x));
24849 else if (GET_CODE (x) == POST_INC)
24850 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24851 else if (GET_CODE (x) == PLUS)
24852 {
24853 gcc_assert (REG_P (XEXP (x, 0)));
24854 if (CONST_INT_P (XEXP (x, 1)))
24855 asm_fprintf (stream, "[%r, #%wd]",
24856 REGNO (XEXP (x, 0)),
24857 INTVAL (XEXP (x, 1)));
24858 else
24859 asm_fprintf (stream, "[%r, %r]",
24860 REGNO (XEXP (x, 0)),
24861 REGNO (XEXP (x, 1)));
24862 }
24863 else
24864 output_addr_const (stream, x);
24865 }
24866 }
24867 \f
24868 /* Target hook for indicating whether a punctuation character for
24869 TARGET_PRINT_OPERAND is valid. */
24870 static bool
24871 arm_print_operand_punct_valid_p (unsigned char code)
24872 {
24873 return (code == '@' || code == '|' || code == '.'
24874 || code == '(' || code == ')' || code == '#'
24875 || (TARGET_32BIT && (code == '?'))
24876 || (TARGET_THUMB2 && (code == '!'))
24877 || (TARGET_THUMB && (code == '_')));
24878 }
24879 \f
24880 /* Target hook for assembling integer objects. The ARM version needs to
24881 handle word-sized values specially. */
24882 static bool
24883 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24884 {
24885 machine_mode mode;
24886
24887 if (size == UNITS_PER_WORD && aligned_p)
24888 {
24889 fputs ("\t.word\t", asm_out_file);
24890 output_addr_const (asm_out_file, x);
24891
24892 /* Mark symbols as position independent. We only do this in the
24893 .text segment, not in the .data segment. */
24894 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24895 (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
24896 {
24897 /* See legitimize_pic_address for an explanation of the
24898 TARGET_VXWORKS_RTP check. */
24899 /* References to weak symbols cannot be resolved locally:
24900 they may be overridden by a non-weak definition at link
24901 time. */
24902 if (!arm_pic_data_is_text_relative
24903 || (SYMBOL_REF_P (x)
24904 && (!SYMBOL_REF_LOCAL_P (x)
24905 || (SYMBOL_REF_DECL (x)
24906 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24907 || (SYMBOL_REF_FUNCTION_P (x)
24908 && !arm_fdpic_local_funcdesc_p (x)))))
24909 {
24910 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24911 fputs ("(GOTFUNCDESC)", asm_out_file);
24912 else
24913 fputs ("(GOT)", asm_out_file);
24914 }
24915 else
24916 {
24917 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24918 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24919 else
24920 {
24921 bool is_readonly;
24922
24923 if (!TARGET_FDPIC
24924 || arm_is_segment_info_known (x, &is_readonly))
24925 fputs ("(GOTOFF)", asm_out_file);
24926 else
24927 fputs ("(GOT)", asm_out_file);
24928 }
24929 }
24930 }
24931
24932 /* For FDPIC we also have to mark symbol for .data section. */
24933 if (TARGET_FDPIC
24934 && !making_const_table
24935 && SYMBOL_REF_P (x)
24936 && SYMBOL_REF_FUNCTION_P (x))
24937 fputs ("(FUNCDESC)", asm_out_file);
24938
24939 fputc ('\n', asm_out_file);
24940 return true;
24941 }
24942
24943 mode = GET_MODE (x);
24944
24945 if (arm_vector_mode_supported_p (mode))
24946 {
24947 int i, units;
24948
24949 gcc_assert (GET_CODE (x) == CONST_VECTOR);
24950
24951 units = CONST_VECTOR_NUNITS (x);
24952 size = GET_MODE_UNIT_SIZE (mode);
24953
24954 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24955 for (i = 0; i < units; i++)
24956 {
24957 rtx elt = CONST_VECTOR_ELT (x, i);
24958 assemble_integer
24959 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
24960 }
24961 else
24962 for (i = 0; i < units; i++)
24963 {
24964 rtx elt = CONST_VECTOR_ELT (x, i);
24965 assemble_real
24966 (*CONST_DOUBLE_REAL_VALUE (elt),
24967 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
24968 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
24969 }
24970
24971 return true;
24972 }
24973
24974 return default_assemble_integer (x, size, aligned_p);
24975 }
24976
24977 static void
24978 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
24979 {
24980 section *s;
24981
24982 if (!TARGET_AAPCS_BASED)
24983 {
24984 (is_ctor ?
24985 default_named_section_asm_out_constructor
24986 : default_named_section_asm_out_destructor) (symbol, priority);
24987 return;
24988 }
24989
24990 /* Put these in the .init_array section, using a special relocation. */
24991 if (priority != DEFAULT_INIT_PRIORITY)
24992 {
24993 char buf[18];
24994 sprintf (buf, "%s.%.5u",
24995 is_ctor ? ".init_array" : ".fini_array",
24996 priority);
24997 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
24998 }
24999 else if (is_ctor)
25000 s = ctors_section;
25001 else
25002 s = dtors_section;
25003
25004 switch_to_section (s);
25005 assemble_align (POINTER_SIZE);
25006 fputs ("\t.word\t", asm_out_file);
25007 output_addr_const (asm_out_file, symbol);
25008 fputs ("(target1)\n", asm_out_file);
25009 }
25010
25011 /* Add a function to the list of static constructors. */
25012
25013 static void
25014 arm_elf_asm_constructor (rtx symbol, int priority)
25015 {
25016 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
25017 }
25018
25019 /* Add a function to the list of static destructors. */
25020
25021 static void
25022 arm_elf_asm_destructor (rtx symbol, int priority)
25023 {
25024 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
25025 }
25026 \f
25027 /* A finite state machine takes care of noticing whether or not instructions
25028 can be conditionally executed, and thus decrease execution time and code
25029 size by deleting branch instructions. The fsm is controlled by
25030 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
25031
25032 /* The state of the fsm controlling condition codes are:
25033 0: normal, do nothing special
25034 1: make ASM_OUTPUT_OPCODE not output this instruction
25035 2: make ASM_OUTPUT_OPCODE not output this instruction
25036 3: make instructions conditional
25037 4: make instructions conditional
25038
25039 State transitions (state->state by whom under condition):
25040 0 -> 1 final_prescan_insn if the `target' is a label
25041 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
25042 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
25043 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
25044 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
25045 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
25046 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
25047 (the target insn is arm_target_insn).
25048
25049 If the jump clobbers the conditions then we use states 2 and 4.
25050
25051 A similar thing can be done with conditional return insns.
25052
25053 XXX In case the `target' is an unconditional branch, this conditionalising
25054 of the instructions always reduces code size, but not always execution
25055 time. But then, I want to reduce the code size to somewhere near what
25056 /bin/cc produces. */
25057
25058 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
25059 instructions. When a COND_EXEC instruction is seen the subsequent
25060 instructions are scanned so that multiple conditional instructions can be
25061 combined into a single IT block. arm_condexec_count and arm_condexec_mask
25062 specify the length and true/false mask for the IT block. These will be
25063 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
25064
25065 /* Returns the index of the ARM condition code string in
25066 `arm_condition_codes', or ARM_NV if the comparison is invalid.
25067 COMPARISON should be an rtx like `(eq (...) (...))'. */
25068
25069 enum arm_cond_code
25070 maybe_get_arm_condition_code (rtx comparison)
25071 {
25072 machine_mode mode = GET_MODE (XEXP (comparison, 0));
25073 enum arm_cond_code code;
25074 enum rtx_code comp_code = GET_CODE (comparison);
25075
25076 if (GET_MODE_CLASS (mode) != MODE_CC)
25077 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
25078 XEXP (comparison, 1));
25079
25080 switch (mode)
25081 {
25082 case E_CC_DNEmode: code = ARM_NE; goto dominance;
25083 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
25084 case E_CC_DGEmode: code = ARM_GE; goto dominance;
25085 case E_CC_DGTmode: code = ARM_GT; goto dominance;
25086 case E_CC_DLEmode: code = ARM_LE; goto dominance;
25087 case E_CC_DLTmode: code = ARM_LT; goto dominance;
25088 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
25089 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
25090 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
25091 case E_CC_DLTUmode: code = ARM_CC;
25092
25093 dominance:
25094 if (comp_code == EQ)
25095 return ARM_INVERSE_CONDITION_CODE (code);
25096 if (comp_code == NE)
25097 return code;
25098 return ARM_NV;
25099
25100 case E_CC_NZmode:
25101 switch (comp_code)
25102 {
25103 case NE: return ARM_NE;
25104 case EQ: return ARM_EQ;
25105 case GE: return ARM_PL;
25106 case LT: return ARM_MI;
25107 default: return ARM_NV;
25108 }
25109
25110 case E_CC_Zmode:
25111 switch (comp_code)
25112 {
25113 case NE: return ARM_NE;
25114 case EQ: return ARM_EQ;
25115 default: return ARM_NV;
25116 }
25117
25118 case E_CC_Nmode:
25119 switch (comp_code)
25120 {
25121 case NE: return ARM_MI;
25122 case EQ: return ARM_PL;
25123 default: return ARM_NV;
25124 }
25125
25126 case E_CCFPEmode:
25127 case E_CCFPmode:
25128 /* We can handle all cases except UNEQ and LTGT. */
25129 switch (comp_code)
25130 {
25131 case GE: return ARM_GE;
25132 case GT: return ARM_GT;
25133 case LE: return ARM_LS;
25134 case LT: return ARM_MI;
25135 case NE: return ARM_NE;
25136 case EQ: return ARM_EQ;
25137 case ORDERED: return ARM_VC;
25138 case UNORDERED: return ARM_VS;
25139 case UNLT: return ARM_LT;
25140 case UNLE: return ARM_LE;
25141 case UNGT: return ARM_HI;
25142 case UNGE: return ARM_PL;
25143 /* UNEQ and LTGT do not have a representation. */
25144 case UNEQ: /* Fall through. */
25145 case LTGT: /* Fall through. */
25146 default: return ARM_NV;
25147 }
25148
25149 case E_CC_SWPmode:
25150 switch (comp_code)
25151 {
25152 case NE: return ARM_NE;
25153 case EQ: return ARM_EQ;
25154 case GE: return ARM_LE;
25155 case GT: return ARM_LT;
25156 case LE: return ARM_GE;
25157 case LT: return ARM_GT;
25158 case GEU: return ARM_LS;
25159 case GTU: return ARM_CC;
25160 case LEU: return ARM_CS;
25161 case LTU: return ARM_HI;
25162 default: return ARM_NV;
25163 }
25164
25165 case E_CC_Cmode:
25166 switch (comp_code)
25167 {
25168 case LTU: return ARM_CS;
25169 case GEU: return ARM_CC;
25170 default: return ARM_NV;
25171 }
25172
25173 case E_CC_NVmode:
25174 switch (comp_code)
25175 {
25176 case GE: return ARM_GE;
25177 case LT: return ARM_LT;
25178 default: return ARM_NV;
25179 }
25180
25181 case E_CC_Bmode:
25182 switch (comp_code)
25183 {
25184 case GEU: return ARM_CS;
25185 case LTU: return ARM_CC;
25186 default: return ARM_NV;
25187 }
25188
25189 case E_CC_Vmode:
25190 switch (comp_code)
25191 {
25192 case NE: return ARM_VS;
25193 case EQ: return ARM_VC;
25194 default: return ARM_NV;
25195 }
25196
25197 case E_CC_ADCmode:
25198 switch (comp_code)
25199 {
25200 case GEU: return ARM_CS;
25201 case LTU: return ARM_CC;
25202 default: return ARM_NV;
25203 }
25204
25205 case E_CCmode:
25206 case E_CC_RSBmode:
25207 switch (comp_code)
25208 {
25209 case NE: return ARM_NE;
25210 case EQ: return ARM_EQ;
25211 case GE: return ARM_GE;
25212 case GT: return ARM_GT;
25213 case LE: return ARM_LE;
25214 case LT: return ARM_LT;
25215 case GEU: return ARM_CS;
25216 case GTU: return ARM_HI;
25217 case LEU: return ARM_LS;
25218 case LTU: return ARM_CC;
25219 default: return ARM_NV;
25220 }
25221
25222 default: gcc_unreachable ();
25223 }
25224 }
25225
25226 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
25227 static enum arm_cond_code
25228 get_arm_condition_code (rtx comparison)
25229 {
25230 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
25231 gcc_assert (code != ARM_NV);
25232 return code;
25233 }
25234
25235 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
25236 code registers when not targetting Thumb1. The VFP condition register
25237 only exists when generating hard-float code. */
25238 static bool
25239 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
25240 {
25241 if (!TARGET_32BIT)
25242 return false;
25243
25244 *p1 = CC_REGNUM;
25245 *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
25246 return true;
25247 }
25248
25249 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25250 instructions. */
25251 void
25252 thumb2_final_prescan_insn (rtx_insn *insn)
25253 {
25254 rtx_insn *first_insn = insn;
25255 rtx body = PATTERN (insn);
25256 rtx predicate;
25257 enum arm_cond_code code;
25258 int n;
25259 int mask;
25260 int max;
25261
25262 /* max_insns_skipped in the tune was already taken into account in the
25263 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
25264 just emit the IT blocks as we can. It does not make sense to split
25265 the IT blocks. */
25266 max = MAX_INSN_PER_IT_BLOCK;
25267
25268 /* Remove the previous insn from the count of insns to be output. */
25269 if (arm_condexec_count)
25270 arm_condexec_count--;
25271
25272 /* Nothing to do if we are already inside a conditional block. */
25273 if (arm_condexec_count)
25274 return;
25275
25276 if (GET_CODE (body) != COND_EXEC)
25277 return;
25278
25279 /* Conditional jumps are implemented directly. */
25280 if (JUMP_P (insn))
25281 return;
25282
25283 predicate = COND_EXEC_TEST (body);
25284 arm_current_cc = get_arm_condition_code (predicate);
25285
25286 n = get_attr_ce_count (insn);
25287 arm_condexec_count = 1;
25288 arm_condexec_mask = (1 << n) - 1;
25289 arm_condexec_masklen = n;
25290 /* See if subsequent instructions can be combined into the same block. */
25291 for (;;)
25292 {
25293 insn = next_nonnote_insn (insn);
25294
25295 /* Jumping into the middle of an IT block is illegal, so a label or
25296 barrier terminates the block. */
25297 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
25298 break;
25299
25300 body = PATTERN (insn);
25301 /* USE and CLOBBER aren't really insns, so just skip them. */
25302 if (GET_CODE (body) == USE
25303 || GET_CODE (body) == CLOBBER)
25304 continue;
25305
25306 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
25307 if (GET_CODE (body) != COND_EXEC)
25308 break;
25309 /* Maximum number of conditionally executed instructions in a block. */
25310 n = get_attr_ce_count (insn);
25311 if (arm_condexec_masklen + n > max)
25312 break;
25313
25314 predicate = COND_EXEC_TEST (body);
25315 code = get_arm_condition_code (predicate);
25316 mask = (1 << n) - 1;
25317 if (arm_current_cc == code)
25318 arm_condexec_mask |= (mask << arm_condexec_masklen);
25319 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
25320 break;
25321
25322 arm_condexec_count++;
25323 arm_condexec_masklen += n;
25324
25325 /* A jump must be the last instruction in a conditional block. */
25326 if (JUMP_P (insn))
25327 break;
25328 }
25329 /* Restore recog_data (getting the attributes of other insns can
25330 destroy this array, but final.cc assumes that it remains intact
25331 across this call). */
25332 extract_constrain_insn_cached (first_insn);
25333 }
25334
25335 void
25336 arm_final_prescan_insn (rtx_insn *insn)
25337 {
25338 /* BODY will hold the body of INSN. */
25339 rtx body = PATTERN (insn);
25340
25341 /* This will be 1 if trying to repeat the trick, and things need to be
25342 reversed if it appears to fail. */
25343 int reverse = 0;
25344
25345 /* If we start with a return insn, we only succeed if we find another one. */
25346 int seeking_return = 0;
25347 enum rtx_code return_code = UNKNOWN;
25348
25349 /* START_INSN will hold the insn from where we start looking. This is the
25350 first insn after the following code_label if REVERSE is true. */
25351 rtx_insn *start_insn = insn;
25352
25353 /* If in state 4, check if the target branch is reached, in order to
25354 change back to state 0. */
25355 if (arm_ccfsm_state == 4)
25356 {
25357 if (insn == arm_target_insn)
25358 {
25359 arm_target_insn = NULL;
25360 arm_ccfsm_state = 0;
25361 }
25362 return;
25363 }
25364
25365 /* If in state 3, it is possible to repeat the trick, if this insn is an
25366 unconditional branch to a label, and immediately following this branch
25367 is the previous target label which is only used once, and the label this
25368 branch jumps to is not too far off. */
25369 if (arm_ccfsm_state == 3)
25370 {
25371 if (simplejump_p (insn))
25372 {
25373 start_insn = next_nonnote_insn (start_insn);
25374 if (BARRIER_P (start_insn))
25375 {
25376 /* XXX Isn't this always a barrier? */
25377 start_insn = next_nonnote_insn (start_insn);
25378 }
25379 if (LABEL_P (start_insn)
25380 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25381 && LABEL_NUSES (start_insn) == 1)
25382 reverse = TRUE;
25383 else
25384 return;
25385 }
25386 else if (ANY_RETURN_P (body))
25387 {
25388 start_insn = next_nonnote_insn (start_insn);
25389 if (BARRIER_P (start_insn))
25390 start_insn = next_nonnote_insn (start_insn);
25391 if (LABEL_P (start_insn)
25392 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25393 && LABEL_NUSES (start_insn) == 1)
25394 {
25395 reverse = TRUE;
25396 seeking_return = 1;
25397 return_code = GET_CODE (body);
25398 }
25399 else
25400 return;
25401 }
25402 else
25403 return;
25404 }
25405
25406 gcc_assert (!arm_ccfsm_state || reverse);
25407 if (!JUMP_P (insn))
25408 return;
25409
25410 /* This jump might be paralleled with a clobber of the condition codes
25411 the jump should always come first */
25412 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25413 body = XVECEXP (body, 0, 0);
25414
25415 if (reverse
25416 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25417 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25418 {
25419 int insns_skipped;
25420 int fail = FALSE, succeed = FALSE;
25421 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25422 int then_not_else = TRUE;
25423 rtx_insn *this_insn = start_insn;
25424 rtx label = 0;
25425
25426 /* Register the insn jumped to. */
25427 if (reverse)
25428 {
25429 if (!seeking_return)
25430 label = XEXP (SET_SRC (body), 0);
25431 }
25432 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25433 label = XEXP (XEXP (SET_SRC (body), 1), 0);
25434 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25435 {
25436 label = XEXP (XEXP (SET_SRC (body), 2), 0);
25437 then_not_else = FALSE;
25438 }
25439 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25440 {
25441 seeking_return = 1;
25442 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25443 }
25444 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25445 {
25446 seeking_return = 1;
25447 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25448 then_not_else = FALSE;
25449 }
25450 else
25451 gcc_unreachable ();
25452
25453 /* See how many insns this branch skips, and what kind of insns. If all
25454 insns are okay, and the label or unconditional branch to the same
25455 label is not too far away, succeed. */
25456 for (insns_skipped = 0;
25457 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25458 {
25459 rtx scanbody;
25460
25461 this_insn = next_nonnote_insn (this_insn);
25462 if (!this_insn)
25463 break;
25464
25465 switch (GET_CODE (this_insn))
25466 {
25467 case CODE_LABEL:
25468 /* Succeed if it is the target label, otherwise fail since
25469 control falls in from somewhere else. */
25470 if (this_insn == label)
25471 {
25472 arm_ccfsm_state = 1;
25473 succeed = TRUE;
25474 }
25475 else
25476 fail = TRUE;
25477 break;
25478
25479 case BARRIER:
25480 /* Succeed if the following insn is the target label.
25481 Otherwise fail.
25482 If return insns are used then the last insn in a function
25483 will be a barrier. */
25484 this_insn = next_nonnote_insn (this_insn);
25485 if (this_insn && this_insn == label)
25486 {
25487 arm_ccfsm_state = 1;
25488 succeed = TRUE;
25489 }
25490 else
25491 fail = TRUE;
25492 break;
25493
25494 case CALL_INSN:
25495 /* The AAPCS says that conditional calls should not be
25496 used since they make interworking inefficient (the
25497 linker can't transform BL<cond> into BLX). That's
25498 only a problem if the machine has BLX. */
25499 if (arm_arch5t)
25500 {
25501 fail = TRUE;
25502 break;
25503 }
25504
25505 /* Succeed if the following insn is the target label, or
25506 if the following two insns are a barrier and the
25507 target label. */
25508 this_insn = next_nonnote_insn (this_insn);
25509 if (this_insn && BARRIER_P (this_insn))
25510 this_insn = next_nonnote_insn (this_insn);
25511
25512 if (this_insn && this_insn == label
25513 && insns_skipped < max_insns_skipped)
25514 {
25515 arm_ccfsm_state = 1;
25516 succeed = TRUE;
25517 }
25518 else
25519 fail = TRUE;
25520 break;
25521
25522 case JUMP_INSN:
25523 /* If this is an unconditional branch to the same label, succeed.
25524 If it is to another label, do nothing. If it is conditional,
25525 fail. */
25526 /* XXX Probably, the tests for SET and the PC are
25527 unnecessary. */
25528
25529 scanbody = PATTERN (this_insn);
25530 if (GET_CODE (scanbody) == SET
25531 && GET_CODE (SET_DEST (scanbody)) == PC)
25532 {
25533 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25534 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25535 {
25536 arm_ccfsm_state = 2;
25537 succeed = TRUE;
25538 }
25539 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25540 fail = TRUE;
25541 }
25542 /* Fail if a conditional return is undesirable (e.g. on a
25543 StrongARM), but still allow this if optimizing for size. */
25544 else if (GET_CODE (scanbody) == return_code
25545 && !use_return_insn (TRUE, NULL)
25546 && !optimize_size)
25547 fail = TRUE;
25548 else if (GET_CODE (scanbody) == return_code)
25549 {
25550 arm_ccfsm_state = 2;
25551 succeed = TRUE;
25552 }
25553 else if (GET_CODE (scanbody) == PARALLEL)
25554 {
25555 switch (get_attr_conds (this_insn))
25556 {
25557 case CONDS_NOCOND:
25558 break;
25559 default:
25560 fail = TRUE;
25561 break;
25562 }
25563 }
25564 else
25565 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
25566
25567 break;
25568
25569 case INSN:
25570 /* Instructions using or affecting the condition codes make it
25571 fail. */
25572 scanbody = PATTERN (this_insn);
25573 if (!(GET_CODE (scanbody) == SET
25574 || GET_CODE (scanbody) == PARALLEL)
25575 || get_attr_conds (this_insn) != CONDS_NOCOND)
25576 fail = TRUE;
25577 break;
25578
25579 default:
25580 break;
25581 }
25582 }
25583 if (succeed)
25584 {
25585 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25586 arm_target_label = CODE_LABEL_NUMBER (label);
25587 else
25588 {
25589 gcc_assert (seeking_return || arm_ccfsm_state == 2);
25590
25591 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25592 {
25593 this_insn = next_nonnote_insn (this_insn);
25594 gcc_assert (!this_insn
25595 || (!BARRIER_P (this_insn)
25596 && !LABEL_P (this_insn)));
25597 }
25598 if (!this_insn)
25599 {
25600 /* Oh, dear! we ran off the end.. give up. */
25601 extract_constrain_insn_cached (insn);
25602 arm_ccfsm_state = 0;
25603 arm_target_insn = NULL;
25604 return;
25605 }
25606 arm_target_insn = this_insn;
25607 }
25608
25609 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25610 what it was. */
25611 if (!reverse)
25612 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25613
25614 if (reverse || then_not_else)
25615 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25616 }
25617
25618 /* Restore recog_data (getting the attributes of other insns can
25619 destroy this array, but final.cc assumes that it remains intact
25620 across this call. */
25621 extract_constrain_insn_cached (insn);
25622 }
25623 }
25624
25625 /* Output IT instructions. */
25626 void
25627 thumb2_asm_output_opcode (FILE * stream)
25628 {
25629 char buff[5];
25630 int n;
25631
25632 if (arm_condexec_mask)
25633 {
25634 for (n = 0; n < arm_condexec_masklen; n++)
25635 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25636 buff[n] = 0;
25637 asm_fprintf(stream, "i%s\t%s\n\t", buff,
25638 arm_condition_codes[arm_current_cc]);
25639 arm_condexec_mask = 0;
25640 }
25641 }
25642
25643 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25644 UNITS_PER_WORD bytes wide. */
25645 static unsigned int
25646 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25647 {
25648 if (IS_VPR_REGNUM (regno))
25649 return CEIL (GET_MODE_SIZE (mode), 2);
25650
25651 if (TARGET_32BIT
25652 && regno > PC_REGNUM
25653 && regno != FRAME_POINTER_REGNUM
25654 && regno != ARG_POINTER_REGNUM
25655 && !IS_VFP_REGNUM (regno))
25656 return 1;
25657
25658 return ARM_NUM_REGS (mode);
25659 }
25660
25661 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25662 static bool
25663 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25664 {
25665 if (GET_MODE_CLASS (mode) == MODE_CC)
25666 return (regno == CC_REGNUM
25667 || (TARGET_VFP_BASE
25668 && regno == VFPCC_REGNUM));
25669
25670 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25671 return false;
25672
25673 if (IS_VPR_REGNUM (regno))
25674 return VALID_MVE_PRED_MODE (mode);
25675
25676 if (TARGET_THUMB1)
25677 /* For the Thumb we only allow values bigger than SImode in
25678 registers 0 - 6, so that there is always a second low
25679 register available to hold the upper part of the value.
25680 We probably we ought to ensure that the register is the
25681 start of an even numbered register pair. */
25682 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25683
25684 if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25685 {
25686 if (mode == DFmode || mode == DImode)
25687 return VFP_REGNO_OK_FOR_DOUBLE (regno);
25688
25689 if (mode == HFmode || mode == BFmode || mode == HImode
25690 || mode == SFmode || mode == SImode)
25691 return VFP_REGNO_OK_FOR_SINGLE (regno);
25692
25693 if (TARGET_NEON)
25694 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25695 || (VALID_NEON_QREG_MODE (mode)
25696 && NEON_REGNO_OK_FOR_QUAD (regno))
25697 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25698 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25699 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25700 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25701 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25702 if (TARGET_HAVE_MVE)
25703 return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25704 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25705 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25706
25707 return false;
25708 }
25709
25710 if (TARGET_REALLY_IWMMXT)
25711 {
25712 if (IS_IWMMXT_GR_REGNUM (regno))
25713 return mode == SImode;
25714
25715 if (IS_IWMMXT_REGNUM (regno))
25716 return VALID_IWMMXT_REG_MODE (mode);
25717 }
25718
25719 /* We allow almost any value to be stored in the general registers.
25720 Restrict doubleword quantities to even register pairs in ARM state
25721 so that we can use ldrd. The same restriction applies for MVE
25722 in order to support Armv8.1-M Mainline instructions.
25723 Do not allow very large Neon structure opaque modes in general
25724 registers; they would use too many. */
25725 if (regno <= LAST_ARM_REGNUM)
25726 {
25727 if (ARM_NUM_REGS (mode) > 4)
25728 return false;
25729
25730 if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25731 return true;
25732
25733 return !((TARGET_LDRD || TARGET_CDE)
25734 && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25735 }
25736
25737 if (regno == FRAME_POINTER_REGNUM
25738 || regno == ARG_POINTER_REGNUM)
25739 /* We only allow integers in the fake hard registers. */
25740 return GET_MODE_CLASS (mode) == MODE_INT;
25741
25742 return false;
25743 }
25744
25745 /* Implement TARGET_MODES_TIEABLE_P. */
25746
25747 static bool
25748 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25749 {
25750 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25751 return true;
25752
25753 if (TARGET_HAVE_MVE
25754 && (VALID_MVE_PRED_MODE (mode1) && VALID_MVE_PRED_MODE (mode2)))
25755 return true;
25756
25757 /* We specifically want to allow elements of "structure" modes to
25758 be tieable to the structure. This more general condition allows
25759 other rarer situations too. */
25760 if ((TARGET_NEON
25761 && (VALID_NEON_DREG_MODE (mode1)
25762 || VALID_NEON_QREG_MODE (mode1)
25763 || VALID_NEON_STRUCT_MODE (mode1))
25764 && (VALID_NEON_DREG_MODE (mode2)
25765 || VALID_NEON_QREG_MODE (mode2)
25766 || VALID_NEON_STRUCT_MODE (mode2)))
25767 || (TARGET_HAVE_MVE
25768 && (VALID_MVE_MODE (mode1)
25769 || VALID_MVE_STRUCT_MODE (mode1))
25770 && (VALID_MVE_MODE (mode2)
25771 || VALID_MVE_STRUCT_MODE (mode2))))
25772 return true;
25773
25774 return false;
25775 }
25776
25777 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25778 not used in arm mode. */
25779
25780 enum reg_class
25781 arm_regno_class (int regno)
25782 {
25783 if (regno == PC_REGNUM)
25784 return NO_REGS;
25785
25786 if (IS_VPR_REGNUM (regno))
25787 return VPR_REG;
25788
25789 if (IS_PAC_REGNUM (regno))
25790 return PAC_REG;
25791
25792 if (TARGET_THUMB1)
25793 {
25794 if (regno == STACK_POINTER_REGNUM)
25795 return STACK_REG;
25796 if (regno == CC_REGNUM)
25797 return CC_REG;
25798 if (regno < 8)
25799 return LO_REGS;
25800 return HI_REGS;
25801 }
25802
25803 if (TARGET_THUMB2 && regno < 8)
25804 return LO_REGS;
25805
25806 if ( regno <= LAST_ARM_REGNUM
25807 || regno == FRAME_POINTER_REGNUM
25808 || regno == ARG_POINTER_REGNUM)
25809 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25810
25811 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25812 return TARGET_THUMB2 ? CC_REG : NO_REGS;
25813
25814 if (IS_VFP_REGNUM (regno))
25815 {
25816 if (regno <= D7_VFP_REGNUM)
25817 return VFP_D0_D7_REGS;
25818 else if (regno <= LAST_LO_VFP_REGNUM)
25819 return VFP_LO_REGS;
25820 else
25821 return VFP_HI_REGS;
25822 }
25823
25824 if (IS_IWMMXT_REGNUM (regno))
25825 return IWMMXT_REGS;
25826
25827 if (IS_IWMMXT_GR_REGNUM (regno))
25828 return IWMMXT_GR_REGS;
25829
25830 return NO_REGS;
25831 }
25832
25833 /* Handle a special case when computing the offset
25834 of an argument from the frame pointer. */
25835 int
25836 arm_debugger_arg_offset (int value, rtx addr)
25837 {
25838 rtx_insn *insn;
25839
25840 /* We are only interested if dbxout_parms() failed to compute the offset. */
25841 if (value != 0)
25842 return 0;
25843
25844 /* We can only cope with the case where the address is held in a register. */
25845 if (!REG_P (addr))
25846 return 0;
25847
25848 /* If we are using the frame pointer to point at the argument, then
25849 an offset of 0 is correct. */
25850 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25851 return 0;
25852
25853 /* If we are using the stack pointer to point at the
25854 argument, then an offset of 0 is correct. */
25855 /* ??? Check this is consistent with thumb2 frame layout. */
25856 if ((TARGET_THUMB || !frame_pointer_needed)
25857 && REGNO (addr) == SP_REGNUM)
25858 return 0;
25859
25860 /* Oh dear. The argument is pointed to by a register rather
25861 than being held in a register, or being stored at a known
25862 offset from the frame pointer. Since GDB only understands
25863 those two kinds of argument we must translate the address
25864 held in the register into an offset from the frame pointer.
25865 We do this by searching through the insns for the function
25866 looking to see where this register gets its value. If the
25867 register is initialized from the frame pointer plus an offset
25868 then we are in luck and we can continue, otherwise we give up.
25869
25870 This code is exercised by producing debugging information
25871 for a function with arguments like this:
25872
25873 double func (double a, double b, int c, double d) {return d;}
25874
25875 Without this code the stab for parameter 'd' will be set to
25876 an offset of 0 from the frame pointer, rather than 8. */
25877
25878 /* The if() statement says:
25879
25880 If the insn is a normal instruction
25881 and if the insn is setting the value in a register
25882 and if the register being set is the register holding the address of the argument
25883 and if the address is computing by an addition
25884 that involves adding to a register
25885 which is the frame pointer
25886 a constant integer
25887
25888 then... */
25889
25890 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25891 {
25892 if ( NONJUMP_INSN_P (insn)
25893 && GET_CODE (PATTERN (insn)) == SET
25894 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25895 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25896 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25897 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25898 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25899 )
25900 {
25901 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25902
25903 break;
25904 }
25905 }
25906
25907 if (value == 0)
25908 {
25909 debug_rtx (addr);
25910 warning (0, "unable to compute real location of stacked parameter");
25911 value = 8; /* XXX magic hack */
25912 }
25913
25914 return value;
25915 }
25916 \f
25917 /* Implement TARGET_PROMOTED_TYPE. */
25918
25919 static tree
25920 arm_promoted_type (const_tree t)
25921 {
25922 if (SCALAR_FLOAT_TYPE_P (t)
25923 && TYPE_PRECISION (t) == 16
25924 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25925 return float_type_node;
25926 return NULL_TREE;
25927 }
25928
25929 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25930 This simply adds HFmode as a supported mode; even though we don't
25931 implement arithmetic on this type directly, it's supported by
25932 optabs conversions, much the way the double-word arithmetic is
25933 special-cased in the default hook. */
25934
25935 static bool
25936 arm_scalar_mode_supported_p (scalar_mode mode)
25937 {
25938 if (mode == HFmode)
25939 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25940 else if (ALL_FIXED_POINT_MODE_P (mode))
25941 return true;
25942 else
25943 return default_scalar_mode_supported_p (mode);
25944 }
25945
25946 /* Set the value of FLT_EVAL_METHOD.
25947 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25948
25949 0: evaluate all operations and constants, whose semantic type has at
25950 most the range and precision of type float, to the range and
25951 precision of float; evaluate all other operations and constants to
25952 the range and precision of the semantic type;
25953
25954 N, where _FloatN is a supported interchange floating type
25955 evaluate all operations and constants, whose semantic type has at
25956 most the range and precision of _FloatN type, to the range and
25957 precision of the _FloatN type; evaluate all other operations and
25958 constants to the range and precision of the semantic type;
25959
25960 If we have the ARMv8.2-A extensions then we support _Float16 in native
25961 precision, so we should set this to 16. Otherwise, we support the type,
25962 but want to evaluate expressions in float precision, so set this to
25963 0. */
25964
25965 static enum flt_eval_method
25966 arm_excess_precision (enum excess_precision_type type)
25967 {
25968 switch (type)
25969 {
25970 case EXCESS_PRECISION_TYPE_FAST:
25971 case EXCESS_PRECISION_TYPE_STANDARD:
25972 /* We can calculate either in 16-bit range and precision or
25973 32-bit range and precision. Make that decision based on whether
25974 we have native support for the ARMv8.2-A 16-bit floating-point
25975 instructions or not. */
25976 return (TARGET_VFP_FP16INST
25977 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25978 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
25979 case EXCESS_PRECISION_TYPE_IMPLICIT:
25980 case EXCESS_PRECISION_TYPE_FLOAT16:
25981 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25982 default:
25983 gcc_unreachable ();
25984 }
25985 return FLT_EVAL_METHOD_UNPREDICTABLE;
25986 }
25987
25988
25989 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
25990 _Float16 if we are using anything other than ieee format for 16-bit
25991 floating point. Otherwise, punt to the default implementation. */
25992 static opt_scalar_float_mode
25993 arm_floatn_mode (int n, bool extended)
25994 {
25995 if (!extended && n == 16)
25996 {
25997 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
25998 return HFmode;
25999 return opt_scalar_float_mode ();
26000 }
26001
26002 return default_floatn_mode (n, extended);
26003 }
26004
26005
26006 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
26007 not to early-clobber SRC registers in the process.
26008
26009 We assume that the operands described by SRC and DEST represent a
26010 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
26011 number of components into which the copy has been decomposed. */
26012 void
26013 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
26014 {
26015 unsigned int i;
26016
26017 if (!reg_overlap_mentioned_p (operands[0], operands[1])
26018 || REGNO (operands[0]) < REGNO (operands[1]))
26019 {
26020 for (i = 0; i < count; i++)
26021 {
26022 operands[2 * i] = dest[i];
26023 operands[2 * i + 1] = src[i];
26024 }
26025 }
26026 else
26027 {
26028 for (i = 0; i < count; i++)
26029 {
26030 operands[2 * i] = dest[count - i - 1];
26031 operands[2 * i + 1] = src[count - i - 1];
26032 }
26033 }
26034 }
26035
26036 /* Split operands into moves from op[1] + op[2] into op[0]. */
26037
26038 void
26039 neon_split_vcombine (rtx operands[3])
26040 {
26041 unsigned int dest = REGNO (operands[0]);
26042 unsigned int src1 = REGNO (operands[1]);
26043 unsigned int src2 = REGNO (operands[2]);
26044 machine_mode halfmode = GET_MODE (operands[1]);
26045 unsigned int halfregs = REG_NREGS (operands[1]);
26046 rtx destlo, desthi;
26047
26048 if (src1 == dest && src2 == dest + halfregs)
26049 {
26050 /* No-op move. Can't split to nothing; emit something. */
26051 emit_note (NOTE_INSN_DELETED);
26052 return;
26053 }
26054
26055 /* Preserve register attributes for variable tracking. */
26056 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
26057 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
26058 GET_MODE_SIZE (halfmode));
26059
26060 /* Special case of reversed high/low parts. Use VSWP. */
26061 if (src2 == dest && src1 == dest + halfregs)
26062 {
26063 rtx x = gen_rtx_SET (destlo, operands[1]);
26064 rtx y = gen_rtx_SET (desthi, operands[2]);
26065 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
26066 return;
26067 }
26068
26069 if (!reg_overlap_mentioned_p (operands[2], destlo))
26070 {
26071 /* Try to avoid unnecessary moves if part of the result
26072 is in the right place already. */
26073 if (src1 != dest)
26074 emit_move_insn (destlo, operands[1]);
26075 if (src2 != dest + halfregs)
26076 emit_move_insn (desthi, operands[2]);
26077 }
26078 else
26079 {
26080 if (src2 != dest + halfregs)
26081 emit_move_insn (desthi, operands[2]);
26082 if (src1 != dest)
26083 emit_move_insn (destlo, operands[1]);
26084 }
26085 }
26086 \f
26087 /* Return the number (counting from 0) of
26088 the least significant set bit in MASK. */
26089
26090 inline static int
26091 number_of_first_bit_set (unsigned mask)
26092 {
26093 return ctz_hwi (mask);
26094 }
26095
26096 /* Like emit_multi_reg_push, but allowing for a different set of
26097 registers to be described as saved. MASK is the set of registers
26098 to be saved; REAL_REGS is the set of registers to be described as
26099 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26100
26101 static rtx_insn *
26102 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26103 {
26104 unsigned long regno;
26105 rtx par[10], tmp, reg;
26106 rtx_insn *insn;
26107 int i, j;
26108
26109 /* Build the parallel of the registers actually being stored. */
26110 for (i = 0; mask; ++i, mask &= mask - 1)
26111 {
26112 regno = ctz_hwi (mask);
26113 reg = gen_rtx_REG (SImode, regno);
26114
26115 if (i == 0)
26116 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26117 else
26118 tmp = gen_rtx_USE (VOIDmode, reg);
26119
26120 par[i] = tmp;
26121 }
26122
26123 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26124 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26125 tmp = gen_frame_mem (BLKmode, tmp);
26126 tmp = gen_rtx_SET (tmp, par[0]);
26127 par[0] = tmp;
26128
26129 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26130 insn = emit_insn (tmp);
26131
26132 /* Always build the stack adjustment note for unwind info. */
26133 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26134 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
26135 par[0] = tmp;
26136
26137 /* Build the parallel of the registers recorded as saved for unwind. */
26138 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26139 {
26140 regno = ctz_hwi (real_regs);
26141 reg = gen_rtx_REG (SImode, regno);
26142
26143 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26144 tmp = gen_frame_mem (SImode, tmp);
26145 tmp = gen_rtx_SET (tmp, reg);
26146 RTX_FRAME_RELATED_P (tmp) = 1;
26147 par[j + 1] = tmp;
26148 }
26149
26150 if (j == 0)
26151 tmp = par[0];
26152 else
26153 {
26154 RTX_FRAME_RELATED_P (par[0]) = 1;
26155 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26156 }
26157
26158 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26159
26160 return insn;
26161 }
26162
26163 /* Emit code to push or pop registers to or from the stack. F is the
26164 assembly file. MASK is the registers to pop. */
26165 static void
26166 thumb_pop (FILE *f, unsigned long mask)
26167 {
26168 int regno;
26169 int lo_mask = mask & 0xFF;
26170
26171 gcc_assert (mask);
26172
26173 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26174 {
26175 /* Special case. Do not generate a POP PC statement here, do it in
26176 thumb_exit() */
26177 thumb_exit (f, -1);
26178 return;
26179 }
26180
26181 fprintf (f, "\tpop\t{");
26182
26183 /* Look at the low registers first. */
26184 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26185 {
26186 if (lo_mask & 1)
26187 {
26188 asm_fprintf (f, "%r", regno);
26189
26190 if ((lo_mask & ~1) != 0)
26191 fprintf (f, ", ");
26192 }
26193 }
26194
26195 if (mask & (1 << PC_REGNUM))
26196 {
26197 /* Catch popping the PC. */
26198 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
26199 || IS_CMSE_ENTRY (arm_current_func_type ()))
26200 {
26201 /* The PC is never poped directly, instead
26202 it is popped into r3 and then BX is used. */
26203 fprintf (f, "}\n");
26204
26205 thumb_exit (f, -1);
26206
26207 return;
26208 }
26209 else
26210 {
26211 if (mask & 0xFF)
26212 fprintf (f, ", ");
26213
26214 asm_fprintf (f, "%r", PC_REGNUM);
26215 }
26216 }
26217
26218 fprintf (f, "}\n");
26219 }
26220
26221 /* Generate code to return from a thumb function.
26222 If 'reg_containing_return_addr' is -1, then the return address is
26223 actually on the stack, at the stack pointer.
26224
26225 Note: do not forget to update length attribute of corresponding insn pattern
26226 when changing assembly output (eg. length attribute of epilogue_insns when
26227 updating Armv8-M Baseline Security Extensions register clearing
26228 sequences). */
26229 static void
26230 thumb_exit (FILE *f, int reg_containing_return_addr)
26231 {
26232 unsigned regs_available_for_popping;
26233 unsigned regs_to_pop;
26234 int pops_needed;
26235 unsigned available;
26236 unsigned required;
26237 machine_mode mode;
26238 int size;
26239 int restore_a4 = FALSE;
26240
26241 /* Compute the registers we need to pop. */
26242 regs_to_pop = 0;
26243 pops_needed = 0;
26244
26245 if (reg_containing_return_addr == -1)
26246 {
26247 regs_to_pop |= 1 << LR_REGNUM;
26248 ++pops_needed;
26249 }
26250
26251 if (TARGET_BACKTRACE)
26252 {
26253 /* Restore the (ARM) frame pointer and stack pointer. */
26254 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26255 pops_needed += 2;
26256 }
26257
26258 /* If there is nothing to pop then just emit the BX instruction and
26259 return. */
26260 if (pops_needed == 0)
26261 {
26262 if (crtl->calls_eh_return)
26263 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26264
26265 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26266 {
26267 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26268 emitted by cmse_nonsecure_entry_clear_before_return (). */
26269 if (!TARGET_HAVE_FPCXT_CMSE)
26270 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
26271 reg_containing_return_addr);
26272 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26273 }
26274 else
26275 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26276 return;
26277 }
26278 /* Otherwise if we are not supporting interworking and we have not created
26279 a backtrace structure and the function was not entered in ARM mode then
26280 just pop the return address straight into the PC. */
26281 else if (!TARGET_INTERWORK
26282 && !TARGET_BACKTRACE
26283 && !is_called_in_ARM_mode (current_function_decl)
26284 && !crtl->calls_eh_return
26285 && !IS_CMSE_ENTRY (arm_current_func_type ()))
26286 {
26287 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26288 return;
26289 }
26290
26291 /* Find out how many of the (return) argument registers we can corrupt. */
26292 regs_available_for_popping = 0;
26293
26294 /* If returning via __builtin_eh_return, the bottom three registers
26295 all contain information needed for the return. */
26296 if (crtl->calls_eh_return)
26297 size = 12;
26298 else
26299 {
26300 /* If we can deduce the registers used from the function's
26301 return value. This is more reliable that examining
26302 df_regs_ever_live_p () because that will be set if the register is
26303 ever used in the function, not just if the register is used
26304 to hold a return value. */
26305
26306 if (crtl->return_rtx != 0)
26307 mode = GET_MODE (crtl->return_rtx);
26308 else
26309 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26310
26311 size = GET_MODE_SIZE (mode);
26312
26313 if (size == 0)
26314 {
26315 /* In a void function we can use any argument register.
26316 In a function that returns a structure on the stack
26317 we can use the second and third argument registers. */
26318 if (mode == VOIDmode)
26319 regs_available_for_popping =
26320 (1 << ARG_REGISTER (1))
26321 | (1 << ARG_REGISTER (2))
26322 | (1 << ARG_REGISTER (3));
26323 else
26324 regs_available_for_popping =
26325 (1 << ARG_REGISTER (2))
26326 | (1 << ARG_REGISTER (3));
26327 }
26328 else if (size <= 4)
26329 regs_available_for_popping =
26330 (1 << ARG_REGISTER (2))
26331 | (1 << ARG_REGISTER (3));
26332 else if (size <= 8)
26333 regs_available_for_popping =
26334 (1 << ARG_REGISTER (3));
26335 }
26336
26337 /* Match registers to be popped with registers into which we pop them. */
26338 for (available = regs_available_for_popping,
26339 required = regs_to_pop;
26340 required != 0 && available != 0;
26341 available &= ~(available & - available),
26342 required &= ~(required & - required))
26343 -- pops_needed;
26344
26345 /* If we have any popping registers left over, remove them. */
26346 if (available > 0)
26347 regs_available_for_popping &= ~available;
26348
26349 /* Otherwise if we need another popping register we can use
26350 the fourth argument register. */
26351 else if (pops_needed)
26352 {
26353 /* If we have not found any free argument registers and
26354 reg a4 contains the return address, we must move it. */
26355 if (regs_available_for_popping == 0
26356 && reg_containing_return_addr == LAST_ARG_REGNUM)
26357 {
26358 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26359 reg_containing_return_addr = LR_REGNUM;
26360 }
26361 else if (size > 12)
26362 {
26363 /* Register a4 is being used to hold part of the return value,
26364 but we have dire need of a free, low register. */
26365 restore_a4 = TRUE;
26366
26367 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26368 }
26369
26370 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26371 {
26372 /* The fourth argument register is available. */
26373 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26374
26375 --pops_needed;
26376 }
26377 }
26378
26379 /* Pop as many registers as we can. */
26380 thumb_pop (f, regs_available_for_popping);
26381
26382 /* Process the registers we popped. */
26383 if (reg_containing_return_addr == -1)
26384 {
26385 /* The return address was popped into the lowest numbered register. */
26386 regs_to_pop &= ~(1 << LR_REGNUM);
26387
26388 reg_containing_return_addr =
26389 number_of_first_bit_set (regs_available_for_popping);
26390
26391 /* Remove this register for the mask of available registers, so that
26392 the return address will not be corrupted by further pops. */
26393 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26394 }
26395
26396 /* If we popped other registers then handle them here. */
26397 if (regs_available_for_popping)
26398 {
26399 int frame_pointer;
26400
26401 /* Work out which register currently contains the frame pointer. */
26402 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26403
26404 /* Move it into the correct place. */
26405 asm_fprintf (f, "\tmov\t%r, %r\n",
26406 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26407
26408 /* (Temporarily) remove it from the mask of popped registers. */
26409 regs_available_for_popping &= ~(1 << frame_pointer);
26410 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26411
26412 if (regs_available_for_popping)
26413 {
26414 int stack_pointer;
26415
26416 /* We popped the stack pointer as well,
26417 find the register that contains it. */
26418 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26419
26420 /* Move it into the stack register. */
26421 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26422
26423 /* At this point we have popped all necessary registers, so
26424 do not worry about restoring regs_available_for_popping
26425 to its correct value:
26426
26427 assert (pops_needed == 0)
26428 assert (regs_available_for_popping == (1 << frame_pointer))
26429 assert (regs_to_pop == (1 << STACK_POINTER)) */
26430 }
26431 else
26432 {
26433 /* Since we have just move the popped value into the frame
26434 pointer, the popping register is available for reuse, and
26435 we know that we still have the stack pointer left to pop. */
26436 regs_available_for_popping |= (1 << frame_pointer);
26437 }
26438 }
26439
26440 /* If we still have registers left on the stack, but we no longer have
26441 any registers into which we can pop them, then we must move the return
26442 address into the link register and make available the register that
26443 contained it. */
26444 if (regs_available_for_popping == 0 && pops_needed > 0)
26445 {
26446 regs_available_for_popping |= 1 << reg_containing_return_addr;
26447
26448 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26449 reg_containing_return_addr);
26450
26451 reg_containing_return_addr = LR_REGNUM;
26452 }
26453
26454 /* If we have registers left on the stack then pop some more.
26455 We know that at most we will want to pop FP and SP. */
26456 if (pops_needed > 0)
26457 {
26458 int popped_into;
26459 int move_to;
26460
26461 thumb_pop (f, regs_available_for_popping);
26462
26463 /* We have popped either FP or SP.
26464 Move whichever one it is into the correct register. */
26465 popped_into = number_of_first_bit_set (regs_available_for_popping);
26466 move_to = number_of_first_bit_set (regs_to_pop);
26467
26468 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26469 --pops_needed;
26470 }
26471
26472 /* If we still have not popped everything then we must have only
26473 had one register available to us and we are now popping the SP. */
26474 if (pops_needed > 0)
26475 {
26476 int popped_into;
26477
26478 thumb_pop (f, regs_available_for_popping);
26479
26480 popped_into = number_of_first_bit_set (regs_available_for_popping);
26481
26482 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26483 /*
26484 assert (regs_to_pop == (1 << STACK_POINTER))
26485 assert (pops_needed == 1)
26486 */
26487 }
26488
26489 /* If necessary restore the a4 register. */
26490 if (restore_a4)
26491 {
26492 if (reg_containing_return_addr != LR_REGNUM)
26493 {
26494 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26495 reg_containing_return_addr = LR_REGNUM;
26496 }
26497
26498 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26499 }
26500
26501 if (crtl->calls_eh_return)
26502 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26503
26504 /* Return to caller. */
26505 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26506 {
26507 /* This is for the cases where LR is not being used to contain the return
26508 address. It may therefore contain information that we might not want
26509 to leak, hence it must be cleared. The value in R0 will never be a
26510 secret at this point, so it is safe to use it, see the clearing code
26511 in cmse_nonsecure_entry_clear_before_return (). */
26512 if (reg_containing_return_addr != LR_REGNUM)
26513 asm_fprintf (f, "\tmov\tlr, r0\n");
26514
26515 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26516 by cmse_nonsecure_entry_clear_before_return (). */
26517 if (!TARGET_HAVE_FPCXT_CMSE)
26518 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26519 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26520 }
26521 else
26522 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26523 }
26524 \f
26525 /* Scan INSN just before assembler is output for it.
26526 For Thumb-1, we track the status of the condition codes; this
26527 information is used in the cbranchsi4_insn pattern. */
26528 void
26529 thumb1_final_prescan_insn (rtx_insn *insn)
26530 {
26531 if (flag_print_asm_name)
26532 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26533 INSN_ADDRESSES (INSN_UID (insn)));
26534 /* Don't overwrite the previous setter when we get to a cbranch. */
26535 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26536 {
26537 enum attr_conds conds;
26538
26539 if (cfun->machine->thumb1_cc_insn)
26540 {
26541 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26542 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26543 CC_STATUS_INIT;
26544 }
26545 conds = get_attr_conds (insn);
26546 if (conds == CONDS_SET)
26547 {
26548 rtx set = single_set (insn);
26549 cfun->machine->thumb1_cc_insn = insn;
26550 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26551 cfun->machine->thumb1_cc_op1 = const0_rtx;
26552 cfun->machine->thumb1_cc_mode = CC_NZmode;
26553 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26554 {
26555 rtx src1 = XEXP (SET_SRC (set), 1);
26556 if (src1 == const0_rtx)
26557 cfun->machine->thumb1_cc_mode = CCmode;
26558 }
26559 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26560 {
26561 /* Record the src register operand instead of dest because
26562 cprop_hardreg pass propagates src. */
26563 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26564 }
26565 }
26566 else if (conds != CONDS_NOCOND)
26567 cfun->machine->thumb1_cc_insn = NULL_RTX;
26568 }
26569
26570 /* Check if unexpected far jump is used. */
26571 if (cfun->machine->lr_save_eliminated
26572 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26573 internal_error("Unexpected thumb1 far jump");
26574 }
26575
26576 int
26577 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26578 {
26579 unsigned HOST_WIDE_INT mask = 0xff;
26580 int i;
26581
26582 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26583 if (val == 0) /* XXX */
26584 return 0;
26585
26586 for (i = 0; i < 25; i++)
26587 if ((val & (mask << i)) == val)
26588 return 1;
26589
26590 return 0;
26591 }
26592
26593 /* Returns nonzero if the current function contains,
26594 or might contain a far jump. */
26595 static int
26596 thumb_far_jump_used_p (void)
26597 {
26598 rtx_insn *insn;
26599 bool far_jump = false;
26600 unsigned int func_size = 0;
26601
26602 /* If we have already decided that far jumps may be used,
26603 do not bother checking again, and always return true even if
26604 it turns out that they are not being used. Once we have made
26605 the decision that far jumps are present (and that hence the link
26606 register will be pushed onto the stack) we cannot go back on it. */
26607 if (cfun->machine->far_jump_used)
26608 return 1;
26609
26610 /* If this function is not being called from the prologue/epilogue
26611 generation code then it must be being called from the
26612 INITIAL_ELIMINATION_OFFSET macro. */
26613 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26614 {
26615 /* In this case we know that we are being asked about the elimination
26616 of the arg pointer register. If that register is not being used,
26617 then there are no arguments on the stack, and we do not have to
26618 worry that a far jump might force the prologue to push the link
26619 register, changing the stack offsets. In this case we can just
26620 return false, since the presence of far jumps in the function will
26621 not affect stack offsets.
26622
26623 If the arg pointer is live (or if it was live, but has now been
26624 eliminated and so set to dead) then we do have to test to see if
26625 the function might contain a far jump. This test can lead to some
26626 false negatives, since before reload is completed, then length of
26627 branch instructions is not known, so gcc defaults to returning their
26628 longest length, which in turn sets the far jump attribute to true.
26629
26630 A false negative will not result in bad code being generated, but it
26631 will result in a needless push and pop of the link register. We
26632 hope that this does not occur too often.
26633
26634 If we need doubleword stack alignment this could affect the other
26635 elimination offsets so we can't risk getting it wrong. */
26636 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26637 cfun->machine->arg_pointer_live = 1;
26638 else if (!cfun->machine->arg_pointer_live)
26639 return 0;
26640 }
26641
26642 /* We should not change far_jump_used during or after reload, as there is
26643 no chance to change stack frame layout. */
26644 if (reload_in_progress || reload_completed)
26645 return 0;
26646
26647 /* Check to see if the function contains a branch
26648 insn with the far jump attribute set. */
26649 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26650 {
26651 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26652 {
26653 far_jump = true;
26654 }
26655 func_size += get_attr_length (insn);
26656 }
26657
26658 /* Attribute far_jump will always be true for thumb1 before
26659 shorten_branch pass. So checking far_jump attribute before
26660 shorten_branch isn't much useful.
26661
26662 Following heuristic tries to estimate more accurately if a far jump
26663 may finally be used. The heuristic is very conservative as there is
26664 no chance to roll-back the decision of not to use far jump.
26665
26666 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26667 2-byte insn is associated with a 4 byte constant pool. Using
26668 function size 2048/3 as the threshold is conservative enough. */
26669 if (far_jump)
26670 {
26671 if ((func_size * 3) >= 2048)
26672 {
26673 /* Record the fact that we have decided that
26674 the function does use far jumps. */
26675 cfun->machine->far_jump_used = 1;
26676 return 1;
26677 }
26678 }
26679
26680 return 0;
26681 }
26682
26683 /* Return nonzero if FUNC must be entered in ARM mode. */
26684 static bool
26685 is_called_in_ARM_mode (tree func)
26686 {
26687 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26688
26689 /* Ignore the problem about functions whose address is taken. */
26690 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26691 return true;
26692
26693 #ifdef ARM_PE
26694 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26695 #else
26696 return false;
26697 #endif
26698 }
26699
26700 /* Given the stack offsets and register mask in OFFSETS, decide how
26701 many additional registers to push instead of subtracting a constant
26702 from SP. For epilogues the principle is the same except we use pop.
26703 FOR_PROLOGUE indicates which we're generating. */
26704 static int
26705 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26706 {
26707 HOST_WIDE_INT amount;
26708 unsigned long live_regs_mask = offsets->saved_regs_mask;
26709 /* Extract a mask of the ones we can give to the Thumb's push/pop
26710 instruction. */
26711 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26712 /* Then count how many other high registers will need to be pushed. */
26713 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26714 int n_free, reg_base, size;
26715
26716 if (!for_prologue && frame_pointer_needed)
26717 amount = offsets->locals_base - offsets->saved_regs;
26718 else
26719 amount = offsets->outgoing_args - offsets->saved_regs;
26720
26721 /* If the stack frame size is 512 exactly, we can save one load
26722 instruction, which should make this a win even when optimizing
26723 for speed. */
26724 if (!optimize_size && amount != 512)
26725 return 0;
26726
26727 /* Can't do this if there are high registers to push. */
26728 if (high_regs_pushed != 0)
26729 return 0;
26730
26731 /* Shouldn't do it in the prologue if no registers would normally
26732 be pushed at all. In the epilogue, also allow it if we'll have
26733 a pop insn for the PC. */
26734 if (l_mask == 0
26735 && (for_prologue
26736 || TARGET_BACKTRACE
26737 || (live_regs_mask & 1 << LR_REGNUM) == 0
26738 || TARGET_INTERWORK
26739 || crtl->args.pretend_args_size != 0))
26740 return 0;
26741
26742 /* Don't do this if thumb_expand_prologue wants to emit instructions
26743 between the push and the stack frame allocation. */
26744 if (for_prologue
26745 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26746 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26747 return 0;
26748
26749 reg_base = 0;
26750 n_free = 0;
26751 if (!for_prologue)
26752 {
26753 size = arm_size_return_regs ();
26754 reg_base = ARM_NUM_INTS (size);
26755 live_regs_mask >>= reg_base;
26756 }
26757
26758 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26759 && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26760 {
26761 live_regs_mask >>= 1;
26762 n_free++;
26763 }
26764
26765 if (n_free == 0)
26766 return 0;
26767 gcc_assert (amount / 4 * 4 == amount);
26768
26769 if (amount >= 512 && (amount - n_free * 4) < 512)
26770 return (amount - 508) / 4;
26771 if (amount <= n_free * 4)
26772 return amount / 4;
26773 return 0;
26774 }
26775
26776 /* The bits which aren't usefully expanded as rtl. */
26777 const char *
26778 thumb1_unexpanded_epilogue (void)
26779 {
26780 arm_stack_offsets *offsets;
26781 int regno;
26782 unsigned long live_regs_mask = 0;
26783 int high_regs_pushed = 0;
26784 int extra_pop;
26785 int had_to_push_lr;
26786 int size;
26787
26788 if (cfun->machine->return_used_this_function != 0)
26789 return "";
26790
26791 if (IS_NAKED (arm_current_func_type ()))
26792 return "";
26793
26794 offsets = arm_get_frame_offsets ();
26795 live_regs_mask = offsets->saved_regs_mask;
26796 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26797
26798 /* If we can deduce the registers used from the function's return value.
26799 This is more reliable that examining df_regs_ever_live_p () because that
26800 will be set if the register is ever used in the function, not just if
26801 the register is used to hold a return value. */
26802 size = arm_size_return_regs ();
26803
26804 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26805 if (extra_pop > 0)
26806 {
26807 unsigned long extra_mask = (1 << extra_pop) - 1;
26808 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26809 }
26810
26811 /* The prolog may have pushed some high registers to use as
26812 work registers. e.g. the testsuite file:
26813 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26814 compiles to produce:
26815 push {r4, r5, r6, r7, lr}
26816 mov r7, r9
26817 mov r6, r8
26818 push {r6, r7}
26819 as part of the prolog. We have to undo that pushing here. */
26820
26821 if (high_regs_pushed)
26822 {
26823 unsigned long mask = live_regs_mask & 0xff;
26824 int next_hi_reg;
26825
26826 mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26827
26828 if (mask == 0)
26829 /* Oh dear! We have no low registers into which we can pop
26830 high registers! */
26831 internal_error
26832 ("no low registers available for popping high registers");
26833
26834 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26835 if (live_regs_mask & (1 << next_hi_reg))
26836 break;
26837
26838 while (high_regs_pushed)
26839 {
26840 /* Find lo register(s) into which the high register(s) can
26841 be popped. */
26842 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26843 {
26844 if (mask & (1 << regno))
26845 high_regs_pushed--;
26846 if (high_regs_pushed == 0)
26847 break;
26848 }
26849
26850 if (high_regs_pushed == 0 && regno >= 0)
26851 mask &= ~((1 << regno) - 1);
26852
26853 /* Pop the values into the low register(s). */
26854 thumb_pop (asm_out_file, mask);
26855
26856 /* Move the value(s) into the high registers. */
26857 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26858 {
26859 if (mask & (1 << regno))
26860 {
26861 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26862 regno);
26863
26864 for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26865 next_hi_reg--)
26866 if (live_regs_mask & (1 << next_hi_reg))
26867 break;
26868 }
26869 }
26870 }
26871 live_regs_mask &= ~0x0f00;
26872 }
26873
26874 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26875 live_regs_mask &= 0xff;
26876
26877 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26878 {
26879 /* Pop the return address into the PC. */
26880 if (had_to_push_lr)
26881 live_regs_mask |= 1 << PC_REGNUM;
26882
26883 /* Either no argument registers were pushed or a backtrace
26884 structure was created which includes an adjusted stack
26885 pointer, so just pop everything. */
26886 if (live_regs_mask)
26887 thumb_pop (asm_out_file, live_regs_mask);
26888
26889 /* We have either just popped the return address into the
26890 PC or it is was kept in LR for the entire function.
26891 Note that thumb_pop has already called thumb_exit if the
26892 PC was in the list. */
26893 if (!had_to_push_lr)
26894 thumb_exit (asm_out_file, LR_REGNUM);
26895 }
26896 else
26897 {
26898 /* Pop everything but the return address. */
26899 if (live_regs_mask)
26900 thumb_pop (asm_out_file, live_regs_mask);
26901
26902 if (had_to_push_lr)
26903 {
26904 if (size > 12)
26905 {
26906 /* We have no free low regs, so save one. */
26907 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26908 LAST_ARG_REGNUM);
26909 }
26910
26911 /* Get the return address into a temporary register. */
26912 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26913
26914 if (size > 12)
26915 {
26916 /* Move the return address to lr. */
26917 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26918 LAST_ARG_REGNUM);
26919 /* Restore the low register. */
26920 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26921 IP_REGNUM);
26922 regno = LR_REGNUM;
26923 }
26924 else
26925 regno = LAST_ARG_REGNUM;
26926 }
26927 else
26928 regno = LR_REGNUM;
26929
26930 /* Remove the argument registers that were pushed onto the stack. */
26931 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26932 SP_REGNUM, SP_REGNUM,
26933 crtl->args.pretend_args_size);
26934
26935 thumb_exit (asm_out_file, regno);
26936 }
26937
26938 return "";
26939 }
26940
26941 /* Functions to save and restore machine-specific function data. */
26942 static struct machine_function *
26943 arm_init_machine_status (void)
26944 {
26945 struct machine_function *machine;
26946 machine = ggc_cleared_alloc<machine_function> ();
26947
26948 #if ARM_FT_UNKNOWN != 0
26949 machine->func_type = ARM_FT_UNKNOWN;
26950 #endif
26951 machine->static_chain_stack_bytes = -1;
26952 machine->pacspval_needed = 0;
26953 return machine;
26954 }
26955
26956 /* Return an RTX indicating where the return address to the
26957 calling function can be found. */
26958 rtx
26959 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26960 {
26961 if (count != 0)
26962 return NULL_RTX;
26963
26964 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26965 }
26966
26967 /* Do anything needed before RTL is emitted for each function. */
26968 void
26969 arm_init_expanders (void)
26970 {
26971 /* Arrange to initialize and mark the machine per-function status. */
26972 init_machine_status = arm_init_machine_status;
26973
26974 /* This is to stop the combine pass optimizing away the alignment
26975 adjustment of va_arg. */
26976 /* ??? It is claimed that this should not be necessary. */
26977 if (cfun)
26978 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26979 }
26980
26981 /* Check that FUNC is called with a different mode. */
26982
26983 bool
26984 arm_change_mode_p (tree func)
26985 {
26986 if (TREE_CODE (func) != FUNCTION_DECL)
26987 return false;
26988
26989 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
26990
26991 if (!callee_tree)
26992 callee_tree = target_option_default_node;
26993
26994 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
26995 int flags = callee_opts->x_target_flags;
26996
26997 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
26998 }
26999
27000 /* Like arm_compute_initial_elimination offset. Simpler because there
27001 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27002 to point at the base of the local variables after static stack
27003 space for a function has been allocated. */
27004
27005 HOST_WIDE_INT
27006 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27007 {
27008 arm_stack_offsets *offsets;
27009
27010 offsets = arm_get_frame_offsets ();
27011
27012 switch (from)
27013 {
27014 case ARG_POINTER_REGNUM:
27015 switch (to)
27016 {
27017 case STACK_POINTER_REGNUM:
27018 return offsets->outgoing_args - offsets->saved_args;
27019
27020 case FRAME_POINTER_REGNUM:
27021 return offsets->soft_frame - offsets->saved_args;
27022
27023 case ARM_HARD_FRAME_POINTER_REGNUM:
27024 return offsets->saved_regs - offsets->saved_args;
27025
27026 case THUMB_HARD_FRAME_POINTER_REGNUM:
27027 return offsets->locals_base - offsets->saved_args;
27028
27029 default:
27030 gcc_unreachable ();
27031 }
27032 break;
27033
27034 case FRAME_POINTER_REGNUM:
27035 switch (to)
27036 {
27037 case STACK_POINTER_REGNUM:
27038 return offsets->outgoing_args - offsets->soft_frame;
27039
27040 case ARM_HARD_FRAME_POINTER_REGNUM:
27041 return offsets->saved_regs - offsets->soft_frame;
27042
27043 case THUMB_HARD_FRAME_POINTER_REGNUM:
27044 return offsets->locals_base - offsets->soft_frame;
27045
27046 default:
27047 gcc_unreachable ();
27048 }
27049 break;
27050
27051 default:
27052 gcc_unreachable ();
27053 }
27054 }
27055
27056 /* Generate the function's prologue. */
27057
27058 void
27059 thumb1_expand_prologue (void)
27060 {
27061 rtx_insn *insn;
27062
27063 HOST_WIDE_INT amount;
27064 HOST_WIDE_INT size;
27065 arm_stack_offsets *offsets;
27066 unsigned long func_type;
27067 int regno;
27068 unsigned long live_regs_mask;
27069 unsigned long l_mask;
27070 unsigned high_regs_pushed = 0;
27071 bool lr_needs_saving;
27072
27073 func_type = arm_current_func_type ();
27074
27075 /* Naked functions don't have prologues. */
27076 if (IS_NAKED (func_type))
27077 {
27078 if (flag_stack_usage_info)
27079 current_function_static_stack_size = 0;
27080 return;
27081 }
27082
27083 if (IS_INTERRUPT (func_type))
27084 {
27085 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
27086 return;
27087 }
27088
27089 if (is_called_in_ARM_mode (current_function_decl))
27090 emit_insn (gen_prologue_thumb1_interwork ());
27091
27092 offsets = arm_get_frame_offsets ();
27093 live_regs_mask = offsets->saved_regs_mask;
27094 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
27095
27096 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27097 l_mask = live_regs_mask & 0x40ff;
27098 /* Then count how many other high registers will need to be pushed. */
27099 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27100
27101 if (crtl->args.pretend_args_size)
27102 {
27103 rtx x = GEN_INT (-crtl->args.pretend_args_size);
27104
27105 if (cfun->machine->uses_anonymous_args)
27106 {
27107 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27108 unsigned long mask;
27109
27110 mask = 1ul << (LAST_ARG_REGNUM + 1);
27111 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27112
27113 insn = thumb1_emit_multi_reg_push (mask, 0);
27114 }
27115 else
27116 {
27117 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27118 stack_pointer_rtx, x));
27119 }
27120 RTX_FRAME_RELATED_P (insn) = 1;
27121 }
27122
27123 if (TARGET_BACKTRACE)
27124 {
27125 HOST_WIDE_INT offset = 0;
27126 unsigned work_register;
27127 rtx work_reg, x, arm_hfp_rtx;
27128
27129 /* We have been asked to create a stack backtrace structure.
27130 The code looks like this:
27131
27132 0 .align 2
27133 0 func:
27134 0 sub SP, #16 Reserve space for 4 registers.
27135 2 push {R7} Push low registers.
27136 4 add R7, SP, #20 Get the stack pointer before the push.
27137 6 str R7, [SP, #8] Store the stack pointer
27138 (before reserving the space).
27139 8 mov R7, PC Get hold of the start of this code + 12.
27140 10 str R7, [SP, #16] Store it.
27141 12 mov R7, FP Get hold of the current frame pointer.
27142 14 str R7, [SP, #4] Store it.
27143 16 mov R7, LR Get hold of the current return address.
27144 18 str R7, [SP, #12] Store it.
27145 20 add R7, SP, #16 Point at the start of the
27146 backtrace structure.
27147 22 mov FP, R7 Put this value into the frame pointer. */
27148
27149 work_register = thumb_find_work_register (live_regs_mask);
27150 work_reg = gen_rtx_REG (SImode, work_register);
27151 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27152
27153 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27154 stack_pointer_rtx, GEN_INT (-16)));
27155 RTX_FRAME_RELATED_P (insn) = 1;
27156
27157 if (l_mask)
27158 {
27159 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27160 RTX_FRAME_RELATED_P (insn) = 1;
27161 lr_needs_saving = false;
27162
27163 offset = bit_count (l_mask) * UNITS_PER_WORD;
27164 }
27165
27166 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27167 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27168
27169 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27170 x = gen_frame_mem (SImode, x);
27171 emit_move_insn (x, work_reg);
27172
27173 /* Make sure that the instruction fetching the PC is in the right place
27174 to calculate "start of backtrace creation code + 12". */
27175 /* ??? The stores using the common WORK_REG ought to be enough to
27176 prevent the scheduler from doing anything weird. Failing that
27177 we could always move all of the following into an UNSPEC_VOLATILE. */
27178 if (l_mask)
27179 {
27180 x = gen_rtx_REG (SImode, PC_REGNUM);
27181 emit_move_insn (work_reg, x);
27182
27183 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27184 x = gen_frame_mem (SImode, x);
27185 emit_move_insn (x, work_reg);
27186
27187 emit_move_insn (work_reg, arm_hfp_rtx);
27188
27189 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27190 x = gen_frame_mem (SImode, x);
27191 emit_move_insn (x, work_reg);
27192 }
27193 else
27194 {
27195 emit_move_insn (work_reg, arm_hfp_rtx);
27196
27197 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27198 x = gen_frame_mem (SImode, x);
27199 emit_move_insn (x, work_reg);
27200
27201 x = gen_rtx_REG (SImode, PC_REGNUM);
27202 emit_move_insn (work_reg, x);
27203
27204 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27205 x = gen_frame_mem (SImode, x);
27206 emit_move_insn (x, work_reg);
27207 }
27208
27209 x = gen_rtx_REG (SImode, LR_REGNUM);
27210 emit_move_insn (work_reg, x);
27211
27212 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27213 x = gen_frame_mem (SImode, x);
27214 emit_move_insn (x, work_reg);
27215
27216 x = GEN_INT (offset + 12);
27217 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27218
27219 emit_move_insn (arm_hfp_rtx, work_reg);
27220 }
27221 /* Optimization: If we are not pushing any low registers but we are going
27222 to push some high registers then delay our first push. This will just
27223 be a push of LR and we can combine it with the push of the first high
27224 register. */
27225 else if ((l_mask & 0xff) != 0
27226 || (high_regs_pushed == 0 && lr_needs_saving))
27227 {
27228 unsigned long mask = l_mask;
27229 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27230 insn = thumb1_emit_multi_reg_push (mask, mask);
27231 RTX_FRAME_RELATED_P (insn) = 1;
27232 lr_needs_saving = false;
27233 }
27234
27235 if (high_regs_pushed)
27236 {
27237 unsigned pushable_regs;
27238 unsigned next_hi_reg;
27239 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27240 : crtl->args.info.nregs;
27241 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27242
27243 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27244 if (live_regs_mask & (1 << next_hi_reg))
27245 break;
27246
27247 /* Here we need to mask out registers used for passing arguments
27248 even if they can be pushed. This is to avoid using them to
27249 stash the high registers. Such kind of stash may clobber the
27250 use of arguments. */
27251 pushable_regs = l_mask & (~arg_regs_mask);
27252 pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
27253
27254 /* Normally, LR can be used as a scratch register once it has been
27255 saved; but if the function examines its own return address then
27256 the value is still live and we need to avoid using it. */
27257 bool return_addr_live
27258 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
27259 LR_REGNUM);
27260
27261 if (lr_needs_saving || return_addr_live)
27262 pushable_regs &= ~(1 << LR_REGNUM);
27263
27264 if (pushable_regs == 0)
27265 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27266
27267 while (high_regs_pushed > 0)
27268 {
27269 unsigned long real_regs_mask = 0;
27270 unsigned long push_mask = 0;
27271
27272 for (regno = LR_REGNUM; regno >= 0; regno --)
27273 {
27274 if (pushable_regs & (1 << regno))
27275 {
27276 emit_move_insn (gen_rtx_REG (SImode, regno),
27277 gen_rtx_REG (SImode, next_hi_reg));
27278
27279 high_regs_pushed --;
27280 real_regs_mask |= (1 << next_hi_reg);
27281 push_mask |= (1 << regno);
27282
27283 if (high_regs_pushed)
27284 {
27285 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27286 next_hi_reg --)
27287 if (live_regs_mask & (1 << next_hi_reg))
27288 break;
27289 }
27290 else
27291 break;
27292 }
27293 }
27294
27295 /* If we had to find a work register and we have not yet
27296 saved the LR then add it to the list of regs to push. */
27297 if (lr_needs_saving)
27298 {
27299 push_mask |= 1 << LR_REGNUM;
27300 real_regs_mask |= 1 << LR_REGNUM;
27301 lr_needs_saving = false;
27302 /* If the return address is not live at this point, we
27303 can add LR to the list of registers that we can use
27304 for pushes. */
27305 if (!return_addr_live)
27306 pushable_regs |= 1 << LR_REGNUM;
27307 }
27308
27309 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
27310 RTX_FRAME_RELATED_P (insn) = 1;
27311 }
27312 }
27313
27314 /* Load the pic register before setting the frame pointer,
27315 so we can use r7 as a temporary work register. */
27316 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27317 arm_load_pic_register (live_regs_mask, NULL_RTX);
27318
27319 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27320 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27321 stack_pointer_rtx);
27322
27323 size = offsets->outgoing_args - offsets->saved_args;
27324 if (flag_stack_usage_info)
27325 current_function_static_stack_size = size;
27326
27327 /* If we have a frame, then do stack checking. FIXME: not implemented. */
27328 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27329 || flag_stack_clash_protection)
27330 && size)
27331 sorry ("%<-fstack-check=specific%> for Thumb-1");
27332
27333 amount = offsets->outgoing_args - offsets->saved_regs;
27334 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27335 if (amount)
27336 {
27337 if (amount < 512)
27338 {
27339 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27340 GEN_INT (- amount)));
27341 RTX_FRAME_RELATED_P (insn) = 1;
27342 }
27343 else
27344 {
27345 rtx reg, dwarf;
27346
27347 /* The stack decrement is too big for an immediate value in a single
27348 insn. In theory we could issue multiple subtracts, but after
27349 three of them it becomes more space efficient to place the full
27350 value in the constant pool and load into a register. (Also the
27351 ARM debugger really likes to see only one stack decrement per
27352 function). So instead we look for a scratch register into which
27353 we can load the decrement, and then we subtract this from the
27354 stack pointer. Unfortunately on the thumb the only available
27355 scratch registers are the argument registers, and we cannot use
27356 these as they may hold arguments to the function. Instead we
27357 attempt to locate a call preserved register which is used by this
27358 function. If we can find one, then we know that it will have
27359 been pushed at the start of the prologue and so we can corrupt
27360 it now. */
27361 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27362 if (live_regs_mask & (1 << regno))
27363 break;
27364
27365 gcc_assert(regno <= LAST_LO_REGNUM);
27366
27367 reg = gen_rtx_REG (SImode, regno);
27368
27369 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27370
27371 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27372 stack_pointer_rtx, reg));
27373
27374 dwarf = gen_rtx_SET (stack_pointer_rtx,
27375 plus_constant (Pmode, stack_pointer_rtx,
27376 -amount));
27377 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27378 RTX_FRAME_RELATED_P (insn) = 1;
27379 }
27380 }
27381
27382 if (frame_pointer_needed)
27383 thumb_set_frame_pointer (offsets);
27384
27385 /* If we are profiling, make sure no instructions are scheduled before
27386 the call to mcount. Similarly if the user has requested no
27387 scheduling in the prolog. Similarly if we want non-call exceptions
27388 using the EABI unwinder, to prevent faulting instructions from being
27389 swapped with a stack adjustment. */
27390 if (crtl->profile || !TARGET_SCHED_PROLOG
27391 || (arm_except_unwind_info (&global_options) == UI_TARGET
27392 && cfun->can_throw_non_call_exceptions))
27393 emit_insn (gen_blockage ());
27394
27395 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27396 if (live_regs_mask & 0xff)
27397 cfun->machine->lr_save_eliminated = 0;
27398 }
27399
27400 /* Clear caller saved registers not used to pass return values and leaked
27401 condition flags before exiting a cmse_nonsecure_entry function. */
27402
27403 void
27404 cmse_nonsecure_entry_clear_before_return (void)
27405 {
27406 bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27407 int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27408 uint32_t padding_bits_to_clear = 0;
27409 auto_sbitmap to_clear_bitmap (maxregno + 1);
27410 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27411 tree result_type;
27412
27413 bitmap_clear (to_clear_bitmap);
27414 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27415 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27416
27417 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27418 registers. */
27419 if (clear_vfpregs)
27420 {
27421 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27422
27423 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27424
27425 if (!TARGET_HAVE_FPCXT_CMSE)
27426 {
27427 /* Make sure we don't clear the two scratch registers used to clear
27428 the relevant FPSCR bits in output_return_instruction. */
27429 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27430 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27431 emit_use (gen_rtx_REG (SImode, 4));
27432 bitmap_clear_bit (to_clear_bitmap, 4);
27433 }
27434 }
27435
27436 /* If the user has defined registers to be caller saved, these are no longer
27437 restored by the function before returning and must thus be cleared for
27438 security purposes. */
27439 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27440 {
27441 /* We do not touch registers that can be used to pass arguments as per
27442 the AAPCS, since these should never be made callee-saved by user
27443 options. */
27444 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27445 continue;
27446 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27447 continue;
27448 if (!callee_saved_reg_p (regno)
27449 && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27450 || TARGET_HARD_FLOAT))
27451 bitmap_set_bit (to_clear_bitmap, regno);
27452 }
27453
27454 /* Make sure we do not clear the registers used to return the result in. */
27455 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27456 if (!VOID_TYPE_P (result_type))
27457 {
27458 uint64_t to_clear_return_mask;
27459 result_rtl = arm_function_value (result_type, current_function_decl, 0);
27460
27461 /* No need to check that we return in registers, because we don't
27462 support returning on stack yet. */
27463 gcc_assert (REG_P (result_rtl));
27464 to_clear_return_mask
27465 = compute_not_to_clear_mask (result_type, result_rtl, 0,
27466 &padding_bits_to_clear);
27467 if (to_clear_return_mask)
27468 {
27469 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27470 for (regno = R0_REGNUM; regno <= maxregno; regno++)
27471 {
27472 if (to_clear_return_mask & (1ULL << regno))
27473 bitmap_clear_bit (to_clear_bitmap, regno);
27474 }
27475 }
27476 }
27477
27478 if (padding_bits_to_clear != 0)
27479 {
27480 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27481 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27482
27483 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27484 returning a composite type, which only uses r0. Let's make sure that
27485 r1-r3 is cleared too. */
27486 bitmap_clear (to_clear_arg_regs_bitmap);
27487 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27488 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27489 }
27490
27491 /* Clear full registers that leak before returning. */
27492 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27493 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27494 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27495 clearing_reg);
27496 }
27497
27498 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27499 POP instruction can be generated. LR should be replaced by PC. All
27500 the checks required are already done by USE_RETURN_INSN (). Hence,
27501 all we really need to check here is if single register is to be
27502 returned, or multiple register return. */
27503 void
27504 thumb2_expand_return (bool simple_return)
27505 {
27506 int i, num_regs;
27507 unsigned long saved_regs_mask;
27508 arm_stack_offsets *offsets;
27509
27510 offsets = arm_get_frame_offsets ();
27511 saved_regs_mask = offsets->saved_regs_mask;
27512
27513 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27514 if (saved_regs_mask & (1 << i))
27515 num_regs++;
27516
27517 if (!simple_return && saved_regs_mask)
27518 {
27519 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27520 functions or adapt code to handle according to ACLE. This path should
27521 not be reachable for cmse_nonsecure_entry functions though we prefer
27522 to assert it for now to ensure that future code changes do not silently
27523 change this behavior. */
27524 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27525 if (arm_current_function_pac_enabled_p ())
27526 {
27527 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
27528 arm_emit_multi_reg_pop (saved_regs_mask);
27529 emit_insn (gen_aut_nop ());
27530 emit_jump_insn (simple_return_rtx);
27531 }
27532 else if (num_regs == 1)
27533 {
27534 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27535 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27536 rtx addr = gen_rtx_MEM (SImode,
27537 gen_rtx_POST_INC (SImode,
27538 stack_pointer_rtx));
27539 set_mem_alias_set (addr, get_frame_alias_set ());
27540 XVECEXP (par, 0, 0) = ret_rtx;
27541 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27542 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27543 emit_jump_insn (par);
27544 }
27545 else
27546 {
27547 saved_regs_mask &= ~ (1 << LR_REGNUM);
27548 saved_regs_mask |= (1 << PC_REGNUM);
27549 arm_emit_multi_reg_pop (saved_regs_mask);
27550 }
27551 }
27552 else
27553 {
27554 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27555 cmse_nonsecure_entry_clear_before_return ();
27556 emit_jump_insn (simple_return_rtx);
27557 }
27558 }
27559
27560 void
27561 thumb1_expand_epilogue (void)
27562 {
27563 HOST_WIDE_INT amount;
27564 arm_stack_offsets *offsets;
27565 int regno;
27566
27567 /* Naked functions don't have prologues. */
27568 if (IS_NAKED (arm_current_func_type ()))
27569 return;
27570
27571 offsets = arm_get_frame_offsets ();
27572 amount = offsets->outgoing_args - offsets->saved_regs;
27573
27574 if (frame_pointer_needed)
27575 {
27576 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27577 amount = offsets->locals_base - offsets->saved_regs;
27578 }
27579 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27580
27581 gcc_assert (amount >= 0);
27582 if (amount)
27583 {
27584 emit_insn (gen_blockage ());
27585
27586 if (amount < 512)
27587 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27588 GEN_INT (amount)));
27589 else
27590 {
27591 /* r3 is always free in the epilogue. */
27592 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27593
27594 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27595 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27596 }
27597 }
27598
27599 /* Emit a USE (stack_pointer_rtx), so that
27600 the stack adjustment will not be deleted. */
27601 emit_insn (gen_force_register_use (stack_pointer_rtx));
27602
27603 if (crtl->profile || !TARGET_SCHED_PROLOG)
27604 emit_insn (gen_blockage ());
27605
27606 /* Emit a clobber for each insn that will be restored in the epilogue,
27607 so that flow2 will get register lifetimes correct. */
27608 for (regno = 0; regno < 13; regno++)
27609 if (reg_needs_saving_p (regno))
27610 emit_clobber (gen_rtx_REG (SImode, regno));
27611
27612 if (! df_regs_ever_live_p (LR_REGNUM))
27613 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27614
27615 /* Clear all caller-saved regs that are not used to return. */
27616 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27617 cmse_nonsecure_entry_clear_before_return ();
27618 }
27619
27620 /* Epilogue code for APCS frame. */
27621 static void
27622 arm_expand_epilogue_apcs_frame (bool really_return)
27623 {
27624 unsigned long func_type;
27625 unsigned long saved_regs_mask;
27626 int num_regs = 0;
27627 int i;
27628 int floats_from_frame = 0;
27629 arm_stack_offsets *offsets;
27630
27631 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27632 func_type = arm_current_func_type ();
27633
27634 /* Get frame offsets for ARM. */
27635 offsets = arm_get_frame_offsets ();
27636 saved_regs_mask = offsets->saved_regs_mask;
27637
27638 /* Find the offset of the floating-point save area in the frame. */
27639 floats_from_frame
27640 = (offsets->saved_args
27641 + arm_compute_static_chain_stack_bytes ()
27642 - offsets->frame);
27643
27644 /* Compute how many core registers saved and how far away the floats are. */
27645 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27646 if (saved_regs_mask & (1 << i))
27647 {
27648 num_regs++;
27649 floats_from_frame += 4;
27650 }
27651
27652 if (TARGET_VFP_BASE)
27653 {
27654 int start_reg;
27655 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27656
27657 /* The offset is from IP_REGNUM. */
27658 int saved_size = arm_get_vfp_saved_size ();
27659 if (saved_size > 0)
27660 {
27661 rtx_insn *insn;
27662 floats_from_frame += saved_size;
27663 insn = emit_insn (gen_addsi3 (ip_rtx,
27664 hard_frame_pointer_rtx,
27665 GEN_INT (-floats_from_frame)));
27666 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27667 ip_rtx, hard_frame_pointer_rtx);
27668 }
27669
27670 /* Generate VFP register multi-pop. */
27671 start_reg = FIRST_VFP_REGNUM;
27672
27673 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27674 /* Look for a case where a reg does not need restoring. */
27675 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27676 {
27677 if (start_reg != i)
27678 arm_emit_vfp_multi_reg_pop (start_reg,
27679 (i - start_reg) / 2,
27680 gen_rtx_REG (SImode,
27681 IP_REGNUM));
27682 start_reg = i + 2;
27683 }
27684
27685 /* Restore the remaining regs that we have discovered (or possibly
27686 even all of them, if the conditional in the for loop never
27687 fired). */
27688 if (start_reg != i)
27689 arm_emit_vfp_multi_reg_pop (start_reg,
27690 (i - start_reg) / 2,
27691 gen_rtx_REG (SImode, IP_REGNUM));
27692 }
27693
27694 if (TARGET_IWMMXT)
27695 {
27696 /* The frame pointer is guaranteed to be non-double-word aligned, as
27697 it is set to double-word-aligned old_stack_pointer - 4. */
27698 rtx_insn *insn;
27699 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27700
27701 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27702 if (reg_needs_saving_p (i))
27703 {
27704 rtx addr = gen_frame_mem (V2SImode,
27705 plus_constant (Pmode, hard_frame_pointer_rtx,
27706 - lrm_count * 4));
27707 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27708 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27709 gen_rtx_REG (V2SImode, i),
27710 NULL_RTX);
27711 lrm_count += 2;
27712 }
27713 }
27714
27715 /* saved_regs_mask should contain IP which contains old stack pointer
27716 at the time of activation creation. Since SP and IP are adjacent registers,
27717 we can restore the value directly into SP. */
27718 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27719 saved_regs_mask &= ~(1 << IP_REGNUM);
27720 saved_regs_mask |= (1 << SP_REGNUM);
27721
27722 /* There are two registers left in saved_regs_mask - LR and PC. We
27723 only need to restore LR (the return address), but to
27724 save time we can load it directly into PC, unless we need a
27725 special function exit sequence, or we are not really returning. */
27726 if (really_return
27727 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27728 && !crtl->calls_eh_return)
27729 /* Delete LR from the register mask, so that LR on
27730 the stack is loaded into the PC in the register mask. */
27731 saved_regs_mask &= ~(1 << LR_REGNUM);
27732 else
27733 saved_regs_mask &= ~(1 << PC_REGNUM);
27734
27735 num_regs = bit_count (saved_regs_mask);
27736 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27737 {
27738 rtx_insn *insn;
27739 emit_insn (gen_blockage ());
27740 /* Unwind the stack to just below the saved registers. */
27741 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27742 hard_frame_pointer_rtx,
27743 GEN_INT (- 4 * num_regs)));
27744
27745 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27746 stack_pointer_rtx, hard_frame_pointer_rtx);
27747 }
27748
27749 arm_emit_multi_reg_pop (saved_regs_mask);
27750
27751 if (IS_INTERRUPT (func_type))
27752 {
27753 /* Interrupt handlers will have pushed the
27754 IP onto the stack, so restore it now. */
27755 rtx_insn *insn;
27756 rtx addr = gen_rtx_MEM (SImode,
27757 gen_rtx_POST_INC (SImode,
27758 stack_pointer_rtx));
27759 set_mem_alias_set (addr, get_frame_alias_set ());
27760 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27761 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27762 gen_rtx_REG (SImode, IP_REGNUM),
27763 NULL_RTX);
27764 }
27765
27766 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27767 return;
27768
27769 if (crtl->calls_eh_return)
27770 emit_insn (gen_addsi3 (stack_pointer_rtx,
27771 stack_pointer_rtx,
27772 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27773
27774 if (IS_STACKALIGN (func_type))
27775 /* Restore the original stack pointer. Before prologue, the stack was
27776 realigned and the original stack pointer saved in r0. For details,
27777 see comment in arm_expand_prologue. */
27778 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27779
27780 emit_jump_insn (simple_return_rtx);
27781 }
27782
27783 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27784 function is not a sibcall. */
27785 void
27786 arm_expand_epilogue (bool really_return)
27787 {
27788 unsigned long func_type;
27789 unsigned long saved_regs_mask;
27790 int num_regs = 0;
27791 int i;
27792 int amount;
27793 arm_stack_offsets *offsets;
27794
27795 func_type = arm_current_func_type ();
27796
27797 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27798 let output_return_instruction take care of instruction emission if any. */
27799 if (IS_NAKED (func_type)
27800 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27801 {
27802 if (really_return)
27803 emit_jump_insn (simple_return_rtx);
27804 return;
27805 }
27806
27807 /* If we are throwing an exception, then we really must be doing a
27808 return, so we can't tail-call. */
27809 gcc_assert (!crtl->calls_eh_return || really_return);
27810
27811 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27812 {
27813 arm_expand_epilogue_apcs_frame (really_return);
27814 return;
27815 }
27816
27817 /* Get frame offsets for ARM. */
27818 offsets = arm_get_frame_offsets ();
27819 saved_regs_mask = offsets->saved_regs_mask;
27820 num_regs = bit_count (saved_regs_mask);
27821
27822 if (frame_pointer_needed)
27823 {
27824 rtx_insn *insn;
27825 /* Restore stack pointer if necessary. */
27826 if (TARGET_ARM)
27827 {
27828 /* In ARM mode, frame pointer points to first saved register.
27829 Restore stack pointer to last saved register. */
27830 amount = offsets->frame - offsets->saved_regs;
27831
27832 /* Force out any pending memory operations that reference stacked data
27833 before stack de-allocation occurs. */
27834 emit_insn (gen_blockage ());
27835 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27836 hard_frame_pointer_rtx,
27837 GEN_INT (amount)));
27838 arm_add_cfa_adjust_cfa_note (insn, amount,
27839 stack_pointer_rtx,
27840 hard_frame_pointer_rtx);
27841
27842 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27843 deleted. */
27844 emit_insn (gen_force_register_use (stack_pointer_rtx));
27845 }
27846 else
27847 {
27848 /* In Thumb-2 mode, the frame pointer points to the last saved
27849 register. */
27850 amount = offsets->locals_base - offsets->saved_regs;
27851 if (amount)
27852 {
27853 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27854 hard_frame_pointer_rtx,
27855 GEN_INT (amount)));
27856 arm_add_cfa_adjust_cfa_note (insn, amount,
27857 hard_frame_pointer_rtx,
27858 hard_frame_pointer_rtx);
27859 }
27860
27861 /* Force out any pending memory operations that reference stacked data
27862 before stack de-allocation occurs. */
27863 emit_insn (gen_blockage ());
27864 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27865 hard_frame_pointer_rtx));
27866 arm_add_cfa_adjust_cfa_note (insn, 0,
27867 stack_pointer_rtx,
27868 hard_frame_pointer_rtx);
27869 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27870 deleted. */
27871 emit_insn (gen_force_register_use (stack_pointer_rtx));
27872 }
27873 }
27874 else
27875 {
27876 /* Pop off outgoing args and local frame to adjust stack pointer to
27877 last saved register. */
27878 amount = offsets->outgoing_args - offsets->saved_regs;
27879 if (amount)
27880 {
27881 rtx_insn *tmp;
27882 /* Force out any pending memory operations that reference stacked data
27883 before stack de-allocation occurs. */
27884 emit_insn (gen_blockage ());
27885 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27886 stack_pointer_rtx,
27887 GEN_INT (amount)));
27888 arm_add_cfa_adjust_cfa_note (tmp, amount,
27889 stack_pointer_rtx, stack_pointer_rtx);
27890 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27891 not deleted. */
27892 emit_insn (gen_force_register_use (stack_pointer_rtx));
27893 }
27894 }
27895
27896 if (TARGET_VFP_BASE)
27897 {
27898 /* Generate VFP register multi-pop. */
27899 int end_reg = LAST_VFP_REGNUM + 1;
27900
27901 /* Scan the registers in reverse order. We need to match
27902 any groupings made in the prologue and generate matching
27903 vldm operations. The need to match groups is because,
27904 unlike pop, vldm can only do consecutive regs. */
27905 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27906 /* Look for a case where a reg does not need restoring. */
27907 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27908 {
27909 /* Restore the regs discovered so far (from reg+2 to
27910 end_reg). */
27911 if (end_reg > i + 2)
27912 arm_emit_vfp_multi_reg_pop (i + 2,
27913 (end_reg - (i + 2)) / 2,
27914 stack_pointer_rtx);
27915 end_reg = i;
27916 }
27917
27918 /* Restore the remaining regs that we have discovered (or possibly
27919 even all of them, if the conditional in the for loop never
27920 fired). */
27921 if (end_reg > i + 2)
27922 arm_emit_vfp_multi_reg_pop (i + 2,
27923 (end_reg - (i + 2)) / 2,
27924 stack_pointer_rtx);
27925 }
27926
27927 if (TARGET_IWMMXT)
27928 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27929 if (reg_needs_saving_p (i))
27930 {
27931 rtx_insn *insn;
27932 rtx addr = gen_rtx_MEM (V2SImode,
27933 gen_rtx_POST_INC (SImode,
27934 stack_pointer_rtx));
27935 set_mem_alias_set (addr, get_frame_alias_set ());
27936 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27937 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27938 gen_rtx_REG (V2SImode, i),
27939 NULL_RTX);
27940 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27941 stack_pointer_rtx, stack_pointer_rtx);
27942 }
27943
27944 if (saved_regs_mask)
27945 {
27946 rtx insn;
27947 bool return_in_pc = false;
27948
27949 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27950 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27951 && !IS_CMSE_ENTRY (func_type)
27952 && !IS_STACKALIGN (func_type)
27953 && really_return
27954 && crtl->args.pretend_args_size == 0
27955 && saved_regs_mask & (1 << LR_REGNUM)
27956 && !crtl->calls_eh_return
27957 && !arm_current_function_pac_enabled_p ())
27958 {
27959 saved_regs_mask &= ~(1 << LR_REGNUM);
27960 saved_regs_mask |= (1 << PC_REGNUM);
27961 return_in_pc = true;
27962 }
27963
27964 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27965 {
27966 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27967 if (saved_regs_mask & (1 << i))
27968 {
27969 rtx addr = gen_rtx_MEM (SImode,
27970 gen_rtx_POST_INC (SImode,
27971 stack_pointer_rtx));
27972 set_mem_alias_set (addr, get_frame_alias_set ());
27973
27974 if (i == PC_REGNUM)
27975 {
27976 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27977 XVECEXP (insn, 0, 0) = ret_rtx;
27978 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
27979 addr);
27980 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27981 insn = emit_jump_insn (insn);
27982 }
27983 else
27984 {
27985 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27986 addr));
27987 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27988 gen_rtx_REG (SImode, i),
27989 NULL_RTX);
27990 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27991 stack_pointer_rtx,
27992 stack_pointer_rtx);
27993 }
27994 }
27995 }
27996 else
27997 {
27998 if (TARGET_LDRD
27999 && current_tune->prefer_ldrd_strd
28000 && !optimize_function_for_size_p (cfun))
28001 {
28002 if (TARGET_THUMB2)
28003 thumb2_emit_ldrd_pop (saved_regs_mask);
28004 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
28005 arm_emit_ldrd_pop (saved_regs_mask);
28006 else
28007 arm_emit_multi_reg_pop (saved_regs_mask);
28008 }
28009 else
28010 arm_emit_multi_reg_pop (saved_regs_mask);
28011 }
28012
28013 if (return_in_pc)
28014 return;
28015 }
28016
28017 amount
28018 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
28019 if (amount)
28020 {
28021 int i, j;
28022 rtx dwarf = NULL_RTX;
28023 rtx_insn *tmp =
28024 emit_insn (gen_addsi3 (stack_pointer_rtx,
28025 stack_pointer_rtx,
28026 GEN_INT (amount)));
28027
28028 RTX_FRAME_RELATED_P (tmp) = 1;
28029
28030 if (cfun->machine->uses_anonymous_args)
28031 {
28032 /* Restore pretend args. Refer arm_expand_prologue on how to save
28033 pretend_args in stack. */
28034 int num_regs = crtl->args.pretend_args_size / 4;
28035 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
28036 for (j = 0, i = 0; j < num_regs; i++)
28037 if (saved_regs_mask & (1 << i))
28038 {
28039 rtx reg = gen_rtx_REG (SImode, i);
28040 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
28041 j++;
28042 }
28043 REG_NOTES (tmp) = dwarf;
28044 }
28045 arm_add_cfa_adjust_cfa_note (tmp, amount,
28046 stack_pointer_rtx, stack_pointer_rtx);
28047 }
28048
28049 if (IS_CMSE_ENTRY (func_type))
28050 {
28051 /* CMSE_ENTRY always returns. */
28052 gcc_assert (really_return);
28053 /* Clear all caller-saved regs that are not used to return. */
28054 cmse_nonsecure_entry_clear_before_return ();
28055
28056 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
28057 VLDR. */
28058 if (TARGET_HAVE_FPCXT_CMSE)
28059 {
28060 rtx_insn *insn;
28061
28062 insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
28063 GEN_INT (FPCXTNS_ENUM)));
28064 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
28065 plus_constant (Pmode, stack_pointer_rtx, 4));
28066 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
28067 RTX_FRAME_RELATED_P (insn) = 1;
28068 }
28069 }
28070
28071 if (arm_current_function_pac_enabled_p ())
28072 emit_insn (gen_aut_nop ());
28073
28074 if (!really_return)
28075 return;
28076
28077 if (crtl->calls_eh_return)
28078 emit_insn (gen_addsi3 (stack_pointer_rtx,
28079 stack_pointer_rtx,
28080 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
28081
28082 if (IS_STACKALIGN (func_type))
28083 /* Restore the original stack pointer. Before prologue, the stack was
28084 realigned and the original stack pointer saved in r0. For details,
28085 see comment in arm_expand_prologue. */
28086 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
28087
28088 emit_jump_insn (simple_return_rtx);
28089 }
28090
28091 /* Implementation of insn prologue_thumb1_interwork. This is the first
28092 "instruction" of a function called in ARM mode. Swap to thumb mode. */
28093
28094 const char *
28095 thumb1_output_interwork (void)
28096 {
28097 const char * name;
28098 FILE *f = asm_out_file;
28099
28100 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28101 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28102 == SYMBOL_REF);
28103 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28104
28105 /* Generate code sequence to switch us into Thumb mode. */
28106 /* The .code 32 directive has already been emitted by
28107 ASM_DECLARE_FUNCTION_NAME. */
28108 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28109 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28110
28111 /* Generate a label, so that the debugger will notice the
28112 change in instruction sets. This label is also used by
28113 the assembler to bypass the ARM code when this function
28114 is called from a Thumb encoded function elsewhere in the
28115 same file. Hence the definition of STUB_NAME here must
28116 agree with the definition in gas/config/tc-arm.c. */
28117
28118 #define STUB_NAME ".real_start_of"
28119
28120 fprintf (f, "\t.code\t16\n");
28121 #ifdef ARM_PE
28122 if (arm_dllexport_name_p (name))
28123 name = arm_strip_name_encoding (name);
28124 #endif
28125 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28126 fprintf (f, "\t.thumb_func\n");
28127 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28128
28129 return "";
28130 }
28131
28132 /* Handle the case of a double word load into a low register from
28133 a computed memory address. The computed address may involve a
28134 register which is overwritten by the load. */
28135 const char *
28136 thumb_load_double_from_address (rtx *operands)
28137 {
28138 rtx addr;
28139 rtx base;
28140 rtx offset;
28141 rtx arg1;
28142 rtx arg2;
28143
28144 gcc_assert (REG_P (operands[0]));
28145 gcc_assert (MEM_P (operands[1]));
28146
28147 /* Get the memory address. */
28148 addr = XEXP (operands[1], 0);
28149
28150 /* Work out how the memory address is computed. */
28151 switch (GET_CODE (addr))
28152 {
28153 case REG:
28154 operands[2] = adjust_address (operands[1], SImode, 4);
28155
28156 if (REGNO (operands[0]) == REGNO (addr))
28157 {
28158 output_asm_insn ("ldr\t%H0, %2", operands);
28159 output_asm_insn ("ldr\t%0, %1", operands);
28160 }
28161 else
28162 {
28163 output_asm_insn ("ldr\t%0, %1", operands);
28164 output_asm_insn ("ldr\t%H0, %2", operands);
28165 }
28166 break;
28167
28168 case CONST:
28169 /* Compute <address> + 4 for the high order load. */
28170 operands[2] = adjust_address (operands[1], SImode, 4);
28171
28172 output_asm_insn ("ldr\t%0, %1", operands);
28173 output_asm_insn ("ldr\t%H0, %2", operands);
28174 break;
28175
28176 case PLUS:
28177 arg1 = XEXP (addr, 0);
28178 arg2 = XEXP (addr, 1);
28179
28180 if (CONSTANT_P (arg1))
28181 base = arg2, offset = arg1;
28182 else
28183 base = arg1, offset = arg2;
28184
28185 gcc_assert (REG_P (base));
28186
28187 /* Catch the case of <address> = <reg> + <reg> */
28188 if (REG_P (offset))
28189 {
28190 int reg_offset = REGNO (offset);
28191 int reg_base = REGNO (base);
28192 int reg_dest = REGNO (operands[0]);
28193
28194 /* Add the base and offset registers together into the
28195 higher destination register. */
28196 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28197 reg_dest + 1, reg_base, reg_offset);
28198
28199 /* Load the lower destination register from the address in
28200 the higher destination register. */
28201 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28202 reg_dest, reg_dest + 1);
28203
28204 /* Load the higher destination register from its own address
28205 plus 4. */
28206 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28207 reg_dest + 1, reg_dest + 1);
28208 }
28209 else
28210 {
28211 /* Compute <address> + 4 for the high order load. */
28212 operands[2] = adjust_address (operands[1], SImode, 4);
28213
28214 /* If the computed address is held in the low order register
28215 then load the high order register first, otherwise always
28216 load the low order register first. */
28217 if (REGNO (operands[0]) == REGNO (base))
28218 {
28219 output_asm_insn ("ldr\t%H0, %2", operands);
28220 output_asm_insn ("ldr\t%0, %1", operands);
28221 }
28222 else
28223 {
28224 output_asm_insn ("ldr\t%0, %1", operands);
28225 output_asm_insn ("ldr\t%H0, %2", operands);
28226 }
28227 }
28228 break;
28229
28230 case LABEL_REF:
28231 /* With no registers to worry about we can just load the value
28232 directly. */
28233 operands[2] = adjust_address (operands[1], SImode, 4);
28234
28235 output_asm_insn ("ldr\t%H0, %2", operands);
28236 output_asm_insn ("ldr\t%0, %1", operands);
28237 break;
28238
28239 default:
28240 gcc_unreachable ();
28241 }
28242
28243 return "";
28244 }
28245
28246 const char *
28247 thumb_output_move_mem_multiple (int n, rtx *operands)
28248 {
28249 switch (n)
28250 {
28251 case 2:
28252 if (REGNO (operands[4]) > REGNO (operands[5]))
28253 std::swap (operands[4], operands[5]);
28254
28255 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28256 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28257 break;
28258
28259 case 3:
28260 if (REGNO (operands[4]) > REGNO (operands[5]))
28261 std::swap (operands[4], operands[5]);
28262 if (REGNO (operands[5]) > REGNO (operands[6]))
28263 std::swap (operands[5], operands[6]);
28264 if (REGNO (operands[4]) > REGNO (operands[5]))
28265 std::swap (operands[4], operands[5]);
28266
28267 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28268 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28269 break;
28270
28271 default:
28272 gcc_unreachable ();
28273 }
28274
28275 return "";
28276 }
28277
28278 /* Output a call-via instruction for thumb state. */
28279 const char *
28280 thumb_call_via_reg (rtx reg)
28281 {
28282 int regno = REGNO (reg);
28283 rtx *labelp;
28284
28285 gcc_assert (regno < LR_REGNUM);
28286
28287 /* If we are in the normal text section we can use a single instance
28288 per compilation unit. If we are doing function sections, then we need
28289 an entry per section, since we can't rely on reachability. */
28290 if (in_section == text_section)
28291 {
28292 thumb_call_reg_needed = 1;
28293
28294 if (thumb_call_via_label[regno] == NULL)
28295 thumb_call_via_label[regno] = gen_label_rtx ();
28296 labelp = thumb_call_via_label + regno;
28297 }
28298 else
28299 {
28300 if (cfun->machine->call_via[regno] == NULL)
28301 cfun->machine->call_via[regno] = gen_label_rtx ();
28302 labelp = cfun->machine->call_via + regno;
28303 }
28304
28305 output_asm_insn ("bl\t%a0", labelp);
28306 return "";
28307 }
28308
28309 /* Routines for generating rtl. */
28310 void
28311 thumb_expand_cpymemqi (rtx *operands)
28312 {
28313 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28314 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28315 HOST_WIDE_INT len = INTVAL (operands[2]);
28316 HOST_WIDE_INT offset = 0;
28317
28318 while (len >= 12)
28319 {
28320 emit_insn (gen_cpymem12b (out, in, out, in));
28321 len -= 12;
28322 }
28323
28324 if (len >= 8)
28325 {
28326 emit_insn (gen_cpymem8b (out, in, out, in));
28327 len -= 8;
28328 }
28329
28330 if (len >= 4)
28331 {
28332 rtx reg = gen_reg_rtx (SImode);
28333 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28334 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28335 len -= 4;
28336 offset += 4;
28337 }
28338
28339 if (len >= 2)
28340 {
28341 rtx reg = gen_reg_rtx (HImode);
28342 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28343 plus_constant (Pmode, in,
28344 offset))));
28345 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28346 offset)),
28347 reg));
28348 len -= 2;
28349 offset += 2;
28350 }
28351
28352 if (len)
28353 {
28354 rtx reg = gen_reg_rtx (QImode);
28355 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28356 plus_constant (Pmode, in,
28357 offset))));
28358 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28359 offset)),
28360 reg));
28361 }
28362 }
28363
28364 void
28365 thumb_reload_out_hi (rtx *operands)
28366 {
28367 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28368 }
28369
28370 /* Return the length of a function name prefix
28371 that starts with the character 'c'. */
28372 static int
28373 arm_get_strip_length (int c)
28374 {
28375 switch (c)
28376 {
28377 ARM_NAME_ENCODING_LENGTHS
28378 default: return 0;
28379 }
28380 }
28381
28382 /* Return a pointer to a function's name with any
28383 and all prefix encodings stripped from it. */
28384 const char *
28385 arm_strip_name_encoding (const char *name)
28386 {
28387 int skip;
28388
28389 while ((skip = arm_get_strip_length (* name)))
28390 name += skip;
28391
28392 return name;
28393 }
28394
28395 /* If there is a '*' anywhere in the name's prefix, then
28396 emit the stripped name verbatim, otherwise prepend an
28397 underscore if leading underscores are being used. */
28398 void
28399 arm_asm_output_labelref (FILE *stream, const char *name)
28400 {
28401 int skip;
28402 int verbatim = 0;
28403
28404 while ((skip = arm_get_strip_length (* name)))
28405 {
28406 verbatim |= (*name == '*');
28407 name += skip;
28408 }
28409
28410 if (verbatim)
28411 fputs (name, stream);
28412 else
28413 asm_fprintf (stream, "%U%s", name);
28414 }
28415
28416 /* This function is used to emit an EABI tag and its associated value.
28417 We emit the numerical value of the tag in case the assembler does not
28418 support textual tags. (Eg gas prior to 2.20). If requested we include
28419 the tag name in a comment so that anyone reading the assembler output
28420 will know which tag is being set.
28421
28422 This function is not static because arm-c.cc needs it too. */
28423
28424 void
28425 arm_emit_eabi_attribute (const char *name, int num, int val)
28426 {
28427 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28428 if (flag_verbose_asm || flag_debug_asm)
28429 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28430 asm_fprintf (asm_out_file, "\n");
28431 }
28432
28433 /* This function is used to print CPU tuning information as comment
28434 in assembler file. Pointers are not printed for now. */
28435
28436 void
28437 arm_print_tune_info (void)
28438 {
28439 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28440 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28441 current_tune->constant_limit);
28442 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28443 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28444 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28445 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28446 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28447 "prefetch.l1_cache_size:\t%d\n",
28448 current_tune->prefetch.l1_cache_size);
28449 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28450 "prefetch.l1_cache_line_size:\t%d\n",
28451 current_tune->prefetch.l1_cache_line_size);
28452 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28453 "prefer_constant_pool:\t%d\n",
28454 (int) current_tune->prefer_constant_pool);
28455 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28456 "branch_cost:\t(s:speed, p:predictable)\n");
28457 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28458 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28459 current_tune->branch_cost (false, false));
28460 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28461 current_tune->branch_cost (false, true));
28462 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28463 current_tune->branch_cost (true, false));
28464 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28465 current_tune->branch_cost (true, true));
28466 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28467 "prefer_ldrd_strd:\t%d\n",
28468 (int) current_tune->prefer_ldrd_strd);
28469 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28470 "logical_op_non_short_circuit:\t[%d,%d]\n",
28471 (int) current_tune->logical_op_non_short_circuit_thumb,
28472 (int) current_tune->logical_op_non_short_circuit_arm);
28473 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28474 "disparage_flag_setting_t16_encodings:\t%d\n",
28475 (int) current_tune->disparage_flag_setting_t16_encodings);
28476 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28477 "string_ops_prefer_neon:\t%d\n",
28478 (int) current_tune->string_ops_prefer_neon);
28479 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28480 "max_insns_inline_memset:\t%d\n",
28481 current_tune->max_insns_inline_memset);
28482 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28483 current_tune->fusible_ops);
28484 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28485 (int) current_tune->sched_autopref);
28486 }
28487
28488 /* The last set of target options used to emit .arch directives, etc. This
28489 could be a function-local static if it were not required to expose it as a
28490 root to the garbage collector. */
28491 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28492
28493 /* Print .arch and .arch_extension directives corresponding to the
28494 current architecture configuration. */
28495 static void
28496 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28497 {
28498 arm_build_target build_target;
28499 /* If the target options haven't changed since the last time we were called
28500 there is nothing to do. This should be sufficient to suppress the
28501 majority of redundant work. */
28502 if (last_asm_targ_options == targ_options)
28503 return;
28504
28505 last_asm_targ_options = targ_options;
28506
28507 build_target.isa = sbitmap_alloc (isa_num_bits);
28508 arm_configure_build_target (&build_target, targ_options, false);
28509
28510 if (build_target.core_name
28511 && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28512 {
28513 const char* truncated_name
28514 = arm_rewrite_selected_cpu (build_target.core_name);
28515 asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28516 }
28517
28518 const arch_option *arch
28519 = arm_parse_arch_option_name (all_architectures, "-march",
28520 build_target.arch_name);
28521 auto_sbitmap opt_bits (isa_num_bits);
28522
28523 gcc_assert (arch);
28524
28525 if (strcmp (build_target.arch_name, "armv7ve") == 0)
28526 {
28527 /* Keep backward compatability for assemblers which don't support
28528 armv7ve. Fortunately, none of the following extensions are reset
28529 by a .fpu directive. */
28530 asm_fprintf (stream, "\t.arch armv7-a\n");
28531 asm_fprintf (stream, "\t.arch_extension virt\n");
28532 asm_fprintf (stream, "\t.arch_extension idiv\n");
28533 asm_fprintf (stream, "\t.arch_extension sec\n");
28534 asm_fprintf (stream, "\t.arch_extension mp\n");
28535 }
28536 else
28537 asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28538
28539 /* The .fpu directive will reset any architecture extensions from the
28540 assembler that relate to the fp/vector extensions. So put this out before
28541 any .arch_extension directives. */
28542 const char *fpu_name = (TARGET_SOFT_FLOAT
28543 ? "softvfp"
28544 : arm_identify_fpu_from_isa (build_target.isa));
28545 asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28546
28547 if (!arch->common.extensions)
28548 return;
28549
28550 for (const struct cpu_arch_extension *opt = arch->common.extensions;
28551 opt->name != NULL;
28552 opt++)
28553 {
28554 if (!opt->remove)
28555 {
28556 arm_initialize_isa (opt_bits, opt->isa_bits);
28557
28558 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28559 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28560 floating point instructions is disabled. So the following check
28561 restricts the printing of ".arch_extension mve" and
28562 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28563 this special behaviour because the feature bit "mve" and
28564 "mve_float" are not part of "fpu bits", so they are not cleared
28565 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28566 TARGET_HAVE_MVE_FLOAT are disabled. */
28567 if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28568 || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28569 && !TARGET_HAVE_MVE_FLOAT))
28570 continue;
28571
28572 /* If every feature bit of this option is set in the target ISA
28573 specification, print out the option name. However, don't print
28574 anything if all the bits are part of the FPU specification. */
28575 if (bitmap_subset_p (opt_bits, build_target.isa)
28576 && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28577 asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28578 }
28579 }
28580 }
28581
28582 static void
28583 arm_file_start (void)
28584 {
28585 int val;
28586 bool pac = (aarch_ra_sign_scope != AARCH_FUNCTION_NONE);
28587 bool bti = (aarch_enable_bti == 1);
28588
28589 arm_print_asm_arch_directives
28590 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28591
28592 if (TARGET_BPABI)
28593 {
28594 /* If we have a named cpu, but we the assembler does not support that
28595 name via .cpu, put out a cpu name attribute; but don't do this if the
28596 name starts with the fictitious prefix, 'generic'. */
28597 if (arm_active_target.core_name
28598 && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28599 && !startswith (arm_active_target.core_name, "generic"))
28600 {
28601 const char* truncated_name
28602 = arm_rewrite_selected_cpu (arm_active_target.core_name);
28603 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28604 asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28605 truncated_name);
28606 }
28607
28608 if (print_tune_info)
28609 arm_print_tune_info ();
28610
28611 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28612 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28613
28614 if (TARGET_HARD_FLOAT_ABI)
28615 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28616
28617 /* Some of these attributes only apply when the corresponding features
28618 are used. However we don't have any easy way of figuring this out.
28619 Conservatively record the setting that would have been used. */
28620
28621 if (flag_rounding_math)
28622 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28623
28624 if (!flag_unsafe_math_optimizations)
28625 {
28626 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28627 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28628 }
28629 if (flag_signaling_nans)
28630 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28631
28632 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28633 flag_finite_math_only ? 1 : 3);
28634
28635 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28636 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28637 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28638 flag_short_enums ? 1 : 2);
28639
28640 /* Tag_ABI_optimization_goals. */
28641 if (optimize_size)
28642 val = 4;
28643 else if (optimize >= 2)
28644 val = 2;
28645 else if (optimize)
28646 val = 1;
28647 else
28648 val = 6;
28649 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28650
28651 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28652 unaligned_access);
28653
28654 if (arm_fp16_format)
28655 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28656 (int) arm_fp16_format);
28657
28658 if (TARGET_HAVE_PACBTI)
28659 {
28660 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
28661 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
28662 }
28663 else if (pac || bti)
28664 {
28665 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
28666 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
28667 }
28668
28669 if (bti)
28670 arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
28671 if (pac)
28672 arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
28673
28674 if (arm_lang_output_object_attributes_hook)
28675 arm_lang_output_object_attributes_hook();
28676 }
28677
28678 default_file_start ();
28679 }
28680
28681 static void
28682 arm_file_end (void)
28683 {
28684 int regno;
28685
28686 /* Just in case the last function output in the assembler had non-default
28687 architecture directives, we force the assembler state back to the default
28688 set, so that any 'calculated' build attributes are based on the default
28689 options rather than the special options for that function. */
28690 arm_print_asm_arch_directives
28691 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28692
28693 if (NEED_INDICATE_EXEC_STACK)
28694 /* Add .note.GNU-stack. */
28695 file_end_indicate_exec_stack ();
28696
28697 if (! thumb_call_reg_needed)
28698 return;
28699
28700 switch_to_section (text_section);
28701 asm_fprintf (asm_out_file, "\t.code 16\n");
28702 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28703
28704 for (regno = 0; regno < LR_REGNUM; regno++)
28705 {
28706 rtx label = thumb_call_via_label[regno];
28707
28708 if (label != 0)
28709 {
28710 targetm.asm_out.internal_label (asm_out_file, "L",
28711 CODE_LABEL_NUMBER (label));
28712 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28713 }
28714 }
28715 }
28716
28717 #ifndef ARM_PE
28718 /* Symbols in the text segment can be accessed without indirecting via the
28719 constant pool; it may take an extra binary operation, but this is still
28720 faster than indirecting via memory. Don't do this when not optimizing,
28721 since we won't be calculating al of the offsets necessary to do this
28722 simplification. */
28723
28724 static void
28725 arm_encode_section_info (tree decl, rtx rtl, int first)
28726 {
28727 if (optimize > 0 && TREE_CONSTANT (decl))
28728 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28729
28730 default_encode_section_info (decl, rtl, first);
28731 }
28732 #endif /* !ARM_PE */
28733
28734 static void
28735 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28736 {
28737 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28738 && !strcmp (prefix, "L"))
28739 {
28740 arm_ccfsm_state = 0;
28741 arm_target_insn = NULL;
28742 }
28743 default_internal_label (stream, prefix, labelno);
28744 }
28745
28746 /* Define classes to generate code as RTL or output asm to a file.
28747 Using templates then allows to use the same code to output code
28748 sequences in the two formats. */
28749 class thumb1_const_rtl
28750 {
28751 public:
28752 thumb1_const_rtl (rtx dst) : dst (dst) {}
28753
28754 void mov (HOST_WIDE_INT val)
28755 {
28756 emit_set_insn (dst, GEN_INT (val));
28757 }
28758
28759 void add (HOST_WIDE_INT val)
28760 {
28761 emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28762 }
28763
28764 void ashift (HOST_WIDE_INT shift)
28765 {
28766 emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28767 }
28768
28769 void neg ()
28770 {
28771 emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28772 }
28773
28774 private:
28775 rtx dst;
28776 };
28777
28778 class thumb1_const_print
28779 {
28780 public:
28781 thumb1_const_print (FILE *f, int regno)
28782 {
28783 t_file = f;
28784 dst_regname = reg_names[regno];
28785 }
28786
28787 void mov (HOST_WIDE_INT val)
28788 {
28789 asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28790 dst_regname, val);
28791 }
28792
28793 void add (HOST_WIDE_INT val)
28794 {
28795 asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28796 dst_regname, val);
28797 }
28798
28799 void ashift (HOST_WIDE_INT shift)
28800 {
28801 asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28802 dst_regname, shift);
28803 }
28804
28805 void neg ()
28806 {
28807 asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
28808 }
28809
28810 private:
28811 FILE *t_file;
28812 const char *dst_regname;
28813 };
28814
28815 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28816 Avoid generating useless code when one of the bytes is zero. */
28817 template <class T>
28818 void
28819 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
28820 {
28821 bool mov_done_p = false;
28822 unsigned HOST_WIDE_INT val = op1;
28823 int shift = 0;
28824 int i;
28825
28826 gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
28827
28828 if (val <= 255)
28829 {
28830 dst.mov (val);
28831 return;
28832 }
28833
28834 /* For negative numbers with the first nine bits set, build the
28835 opposite of OP1, then negate it, it's generally shorter and not
28836 longer. */
28837 if ((val & 0xFF800000) == 0xFF800000)
28838 {
28839 thumb1_gen_const_int_1 (dst, -op1);
28840 dst.neg ();
28841 return;
28842 }
28843
28844 /* In the general case, we need 7 instructions to build
28845 a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28846 do better if VAL is small enough, or
28847 right-shiftable by a suitable amount. If the
28848 right-shift enables to encode at least one less byte,
28849 it's worth it: we save a adds and a lsls at the
28850 expense of a final lsls. */
28851 int final_shift = number_of_first_bit_set (val);
28852
28853 int leading_zeroes = clz_hwi (val);
28854 int number_of_bytes_needed
28855 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
28856 / BITS_PER_UNIT) + 1;
28857 int number_of_bytes_needed2
28858 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
28859 / BITS_PER_UNIT) + 1;
28860
28861 if (number_of_bytes_needed2 < number_of_bytes_needed)
28862 val >>= final_shift;
28863 else
28864 final_shift = 0;
28865
28866 /* If we are in a very small range, we can use either a single movs
28867 or movs+adds. */
28868 if (val <= 510)
28869 {
28870 if (val > 255)
28871 {
28872 unsigned HOST_WIDE_INT high = val - 255;
28873
28874 dst.mov (high);
28875 dst.add (255);
28876 }
28877 else
28878 dst.mov (val);
28879
28880 if (final_shift > 0)
28881 dst.ashift (final_shift);
28882 }
28883 else
28884 {
28885 /* General case, emit upper 3 bytes as needed. */
28886 for (i = 0; i < 3; i++)
28887 {
28888 unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
28889
28890 if (byte)
28891 {
28892 /* We are about to emit new bits, stop accumulating a
28893 shift amount, and left-shift only if we have already
28894 emitted some upper bits. */
28895 if (mov_done_p)
28896 {
28897 dst.ashift (shift);
28898 dst.add (byte);
28899 }
28900 else
28901 dst.mov (byte);
28902
28903 /* Stop accumulating shift amount since we've just
28904 emitted some bits. */
28905 shift = 0;
28906
28907 mov_done_p = true;
28908 }
28909
28910 if (mov_done_p)
28911 shift += 8;
28912 }
28913
28914 /* Emit lower byte. */
28915 if (!mov_done_p)
28916 dst.mov (val & 0xff);
28917 else
28918 {
28919 dst.ashift (shift);
28920 if (val & 0xff)
28921 dst.add (val & 0xff);
28922 }
28923
28924 if (final_shift > 0)
28925 dst.ashift (final_shift);
28926 }
28927 }
28928
28929 /* Proxies for thumb1.md, since the thumb1_const_print and
28930 thumb1_const_rtl classes are not exported. */
28931 void
28932 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
28933 {
28934 thumb1_const_rtl t (dst);
28935 thumb1_gen_const_int_1 (t, op1);
28936 }
28937
28938 void
28939 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
28940 {
28941 thumb1_const_print t (asm_out_file, REGNO (dst));
28942 thumb1_gen_const_int_1 (t, op1);
28943 }
28944
28945 /* Output code to add DELTA to the first argument, and then jump
28946 to FUNCTION. Used for C++ multiple inheritance. */
28947
28948 static void
28949 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28950 HOST_WIDE_INT, tree function)
28951 {
28952 static int thunk_label = 0;
28953 char label[256];
28954 char labelpc[256];
28955 int mi_delta = delta;
28956 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28957 int shift = 0;
28958 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28959 ? 1 : 0);
28960 if (mi_delta < 0)
28961 mi_delta = - mi_delta;
28962
28963 final_start_function (emit_barrier (), file, 1);
28964
28965 if (TARGET_THUMB1)
28966 {
28967 int labelno = thunk_label++;
28968 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28969 /* Thunks are entered in arm mode when available. */
28970 if (TARGET_THUMB1_ONLY)
28971 {
28972 /* push r3 so we can use it as a temporary. */
28973 /* TODO: Omit this save if r3 is not used. */
28974 fputs ("\tpush {r3}\n", file);
28975
28976 /* With -mpure-code, we cannot load the address from the
28977 constant pool: we build it explicitly. */
28978 if (target_pure_code)
28979 {
28980 fputs ("\tmovs\tr3, #:upper8_15:#", file);
28981 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28982 fputc ('\n', file);
28983 fputs ("\tlsls r3, #8\n", file);
28984 fputs ("\tadds\tr3, #:upper0_7:#", file);
28985 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28986 fputc ('\n', file);
28987 fputs ("\tlsls r3, #8\n", file);
28988 fputs ("\tadds\tr3, #:lower8_15:#", file);
28989 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28990 fputc ('\n', file);
28991 fputs ("\tlsls r3, #8\n", file);
28992 fputs ("\tadds\tr3, #:lower0_7:#", file);
28993 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28994 fputc ('\n', file);
28995 }
28996 else
28997 fputs ("\tldr\tr3, ", file);
28998 }
28999 else
29000 {
29001 fputs ("\tldr\tr12, ", file);
29002 }
29003
29004 if (!target_pure_code)
29005 {
29006 assemble_name (file, label);
29007 fputc ('\n', file);
29008 }
29009
29010 if (flag_pic)
29011 {
29012 /* If we are generating PIC, the ldr instruction below loads
29013 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
29014 the address of the add + 8, so we have:
29015
29016 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
29017 = target + 1.
29018
29019 Note that we have "+ 1" because some versions of GNU ld
29020 don't set the low bit of the result for R_ARM_REL32
29021 relocations against thumb function symbols.
29022 On ARMv6M this is +4, not +8. */
29023 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
29024 assemble_name (file, labelpc);
29025 fputs (":\n", file);
29026 if (TARGET_THUMB1_ONLY)
29027 {
29028 /* This is 2 insns after the start of the thunk, so we know it
29029 is 4-byte aligned. */
29030 fputs ("\tadd\tr3, pc, r3\n", file);
29031 fputs ("\tmov r12, r3\n", file);
29032 }
29033 else
29034 fputs ("\tadd\tr12, pc, r12\n", file);
29035 }
29036 else if (TARGET_THUMB1_ONLY)
29037 fputs ("\tmov r12, r3\n", file);
29038 }
29039 if (TARGET_THUMB1_ONLY)
29040 {
29041 if (mi_delta > 255)
29042 {
29043 /* With -mpure-code, we cannot load MI_DELTA from the
29044 constant pool: we build it explicitly. */
29045 if (target_pure_code)
29046 {
29047 thumb1_const_print r3 (file, 3);
29048 thumb1_gen_const_int_1 (r3, mi_delta);
29049 }
29050 else
29051 {
29052 fputs ("\tldr\tr3, ", file);
29053 assemble_name (file, label);
29054 fputs ("+4\n", file);
29055 }
29056 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
29057 mi_op, this_regno, this_regno);
29058 }
29059 else if (mi_delta != 0)
29060 {
29061 /* Thumb1 unified syntax requires s suffix in instruction name when
29062 one of the operands is immediate. */
29063 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
29064 mi_op, this_regno, this_regno,
29065 mi_delta);
29066 }
29067 }
29068 else
29069 {
29070 /* TODO: Use movw/movt for large constants when available. */
29071 while (mi_delta != 0)
29072 {
29073 if ((mi_delta & (3 << shift)) == 0)
29074 shift += 2;
29075 else
29076 {
29077 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
29078 mi_op, this_regno, this_regno,
29079 mi_delta & (0xff << shift));
29080 mi_delta &= ~(0xff << shift);
29081 shift += 8;
29082 }
29083 }
29084 }
29085 if (TARGET_THUMB1)
29086 {
29087 if (TARGET_THUMB1_ONLY)
29088 fputs ("\tpop\t{r3}\n", file);
29089
29090 fprintf (file, "\tbx\tr12\n");
29091
29092 /* With -mpure-code, we don't need to emit literals for the
29093 function address and delta since we emitted code to build
29094 them. */
29095 if (!target_pure_code)
29096 {
29097 ASM_OUTPUT_ALIGN (file, 2);
29098 assemble_name (file, label);
29099 fputs (":\n", file);
29100 if (flag_pic)
29101 {
29102 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
29103 rtx tem = XEXP (DECL_RTL (function), 0);
29104 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
29105 pipeline offset is four rather than eight. Adjust the offset
29106 accordingly. */
29107 tem = plus_constant (GET_MODE (tem), tem,
29108 TARGET_THUMB1_ONLY ? -3 : -7);
29109 tem = gen_rtx_MINUS (GET_MODE (tem),
29110 tem,
29111 gen_rtx_SYMBOL_REF (Pmode,
29112 ggc_strdup (labelpc)));
29113 assemble_integer (tem, 4, BITS_PER_WORD, 1);
29114 }
29115 else
29116 /* Output ".word .LTHUNKn". */
29117 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
29118
29119 if (TARGET_THUMB1_ONLY && mi_delta > 255)
29120 assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
29121 }
29122 }
29123 else
29124 {
29125 fputs ("\tb\t", file);
29126 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29127 if (NEED_PLT_RELOC)
29128 fputs ("(PLT)", file);
29129 fputc ('\n', file);
29130 }
29131
29132 final_end_function ();
29133 }
29134
29135 /* MI thunk handling for TARGET_32BIT. */
29136
29137 static void
29138 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
29139 HOST_WIDE_INT vcall_offset, tree function)
29140 {
29141 const bool long_call_p = arm_is_long_call_p (function);
29142
29143 /* On ARM, this_regno is R0 or R1 depending on
29144 whether the function returns an aggregate or not.
29145 */
29146 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
29147 function)
29148 ? R1_REGNUM : R0_REGNUM);
29149
29150 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
29151 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
29152 reload_completed = 1;
29153 emit_note (NOTE_INSN_PROLOGUE_END);
29154
29155 /* Add DELTA to THIS_RTX. */
29156 if (delta != 0)
29157 arm_split_constant (PLUS, Pmode, NULL_RTX,
29158 delta, this_rtx, this_rtx, false);
29159
29160 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
29161 if (vcall_offset != 0)
29162 {
29163 /* Load *THIS_RTX. */
29164 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
29165 /* Compute *THIS_RTX + VCALL_OFFSET. */
29166 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
29167 false);
29168 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
29169 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
29170 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
29171 }
29172
29173 /* Generate a tail call to the target function. */
29174 if (!TREE_USED (function))
29175 {
29176 assemble_external (function);
29177 TREE_USED (function) = 1;
29178 }
29179 rtx funexp = XEXP (DECL_RTL (function), 0);
29180 if (long_call_p)
29181 {
29182 emit_move_insn (temp, funexp);
29183 funexp = temp;
29184 }
29185 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29186 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
29187 SIBLING_CALL_P (insn) = 1;
29188 emit_barrier ();
29189
29190 /* Indirect calls require a bit of fixup in PIC mode. */
29191 if (long_call_p)
29192 {
29193 split_all_insns_noflow ();
29194 arm_reorg ();
29195 }
29196
29197 insn = get_insns ();
29198 shorten_branches (insn);
29199 final_start_function (insn, file, 1);
29200 final (insn, file, 1);
29201 final_end_function ();
29202
29203 /* Stop pretending this is a post-reload pass. */
29204 reload_completed = 0;
29205 }
29206
29207 /* Output code to add DELTA to the first argument, and then jump
29208 to FUNCTION. Used for C++ multiple inheritance. */
29209
29210 static void
29211 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
29212 HOST_WIDE_INT vcall_offset, tree function)
29213 {
29214 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
29215
29216 assemble_start_function (thunk, fnname);
29217 if (TARGET_32BIT)
29218 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
29219 else
29220 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
29221 assemble_end_function (thunk, fnname);
29222 }
29223
29224 int
29225 arm_emit_vector_const (FILE *file, rtx x)
29226 {
29227 int i;
29228 const char * pattern;
29229
29230 gcc_assert (GET_CODE (x) == CONST_VECTOR);
29231
29232 switch (GET_MODE (x))
29233 {
29234 case E_V2SImode: pattern = "%08x"; break;
29235 case E_V4HImode: pattern = "%04x"; break;
29236 case E_V8QImode: pattern = "%02x"; break;
29237 default: gcc_unreachable ();
29238 }
29239
29240 fprintf (file, "0x");
29241 for (i = CONST_VECTOR_NUNITS (x); i--;)
29242 {
29243 rtx element;
29244
29245 element = CONST_VECTOR_ELT (x, i);
29246 fprintf (file, pattern, INTVAL (element));
29247 }
29248
29249 return 1;
29250 }
29251
29252 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29253 HFmode constant pool entries are actually loaded with ldr. */
29254 void
29255 arm_emit_fp16_const (rtx c)
29256 {
29257 long bits;
29258
29259 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
29260 if (WORDS_BIG_ENDIAN)
29261 assemble_zeros (2);
29262 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
29263 if (!WORDS_BIG_ENDIAN)
29264 assemble_zeros (2);
29265 }
29266
29267 const char *
29268 arm_output_load_gr (rtx *operands)
29269 {
29270 rtx reg;
29271 rtx offset;
29272 rtx wcgr;
29273 rtx sum;
29274
29275 if (!MEM_P (operands [1])
29276 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
29277 || !REG_P (reg = XEXP (sum, 0))
29278 || !CONST_INT_P (offset = XEXP (sum, 1))
29279 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
29280 return "wldrw%?\t%0, %1";
29281
29282 /* Fix up an out-of-range load of a GR register. */
29283 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
29284 wcgr = operands[0];
29285 operands[0] = reg;
29286 output_asm_insn ("ldr%?\t%0, %1", operands);
29287
29288 operands[0] = wcgr;
29289 operands[1] = reg;
29290 output_asm_insn ("tmcr%?\t%0, %1", operands);
29291 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
29292
29293 return "";
29294 }
29295
29296 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29297
29298 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29299 named arg and all anonymous args onto the stack.
29300 XXX I know the prologue shouldn't be pushing registers, but it is faster
29301 that way. */
29302
29303 static void
29304 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
29305 const function_arg_info &arg,
29306 int *pretend_size,
29307 int second_time ATTRIBUTE_UNUSED)
29308 {
29309 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
29310 int nregs;
29311
29312 cfun->machine->uses_anonymous_args = 1;
29313 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
29314 {
29315 nregs = pcum->aapcs_ncrn;
29316 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
29317 && (nregs & 1))
29318 {
29319 int res = arm_needs_doubleword_align (arg.mode, arg.type);
29320 if (res < 0 && warn_psabi)
29321 inform (input_location, "parameter passing for argument of "
29322 "type %qT changed in GCC 7.1", arg.type);
29323 else if (res > 0)
29324 {
29325 nregs++;
29326 if (res > 1 && warn_psabi)
29327 inform (input_location,
29328 "parameter passing for argument of type "
29329 "%qT changed in GCC 9.1", arg.type);
29330 }
29331 }
29332 }
29333 else
29334 nregs = pcum->nregs;
29335
29336 if (nregs < NUM_ARG_REGS)
29337 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
29338 }
29339
29340 /* We can't rely on the caller doing the proper promotion when
29341 using APCS or ATPCS. */
29342
29343 static bool
29344 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
29345 {
29346 return !TARGET_AAPCS_BASED;
29347 }
29348
29349 static machine_mode
29350 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
29351 machine_mode mode,
29352 int *punsignedp ATTRIBUTE_UNUSED,
29353 const_tree fntype ATTRIBUTE_UNUSED,
29354 int for_return ATTRIBUTE_UNUSED)
29355 {
29356 if (GET_MODE_CLASS (mode) == MODE_INT
29357 && GET_MODE_SIZE (mode) < 4)
29358 return SImode;
29359
29360 return mode;
29361 }
29362
29363
29364 static bool
29365 arm_default_short_enums (void)
29366 {
29367 return ARM_DEFAULT_SHORT_ENUMS;
29368 }
29369
29370
29371 /* AAPCS requires that anonymous bitfields affect structure alignment. */
29372
29373 static bool
29374 arm_align_anon_bitfield (void)
29375 {
29376 return TARGET_AAPCS_BASED;
29377 }
29378
29379
29380 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
29381
29382 static tree
29383 arm_cxx_guard_type (void)
29384 {
29385 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
29386 }
29387
29388
29389 /* The EABI says test the least significant bit of a guard variable. */
29390
29391 static bool
29392 arm_cxx_guard_mask_bit (void)
29393 {
29394 return TARGET_AAPCS_BASED;
29395 }
29396
29397
29398 /* The EABI specifies that all array cookies are 8 bytes long. */
29399
29400 static tree
29401 arm_get_cookie_size (tree type)
29402 {
29403 tree size;
29404
29405 if (!TARGET_AAPCS_BASED)
29406 return default_cxx_get_cookie_size (type);
29407
29408 size = build_int_cst (sizetype, 8);
29409 return size;
29410 }
29411
29412
29413 /* The EABI says that array cookies should also contain the element size. */
29414
29415 static bool
29416 arm_cookie_has_size (void)
29417 {
29418 return TARGET_AAPCS_BASED;
29419 }
29420
29421
29422 /* The EABI says constructors and destructors should return a pointer to
29423 the object constructed/destroyed. */
29424
29425 static bool
29426 arm_cxx_cdtor_returns_this (void)
29427 {
29428 return TARGET_AAPCS_BASED;
29429 }
29430
29431 /* The EABI says that an inline function may never be the key
29432 method. */
29433
29434 static bool
29435 arm_cxx_key_method_may_be_inline (void)
29436 {
29437 return !TARGET_AAPCS_BASED;
29438 }
29439
29440 static void
29441 arm_cxx_determine_class_data_visibility (tree decl)
29442 {
29443 if (!TARGET_AAPCS_BASED
29444 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
29445 return;
29446
29447 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29448 is exported. However, on systems without dynamic vague linkage,
29449 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
29450 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
29451 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
29452 else
29453 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
29454 DECL_VISIBILITY_SPECIFIED (decl) = 1;
29455 }
29456
29457 static bool
29458 arm_cxx_class_data_always_comdat (void)
29459 {
29460 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29461 vague linkage if the class has no key function. */
29462 return !TARGET_AAPCS_BASED;
29463 }
29464
29465
29466 /* The EABI says __aeabi_atexit should be used to register static
29467 destructors. */
29468
29469 static bool
29470 arm_cxx_use_aeabi_atexit (void)
29471 {
29472 return TARGET_AAPCS_BASED;
29473 }
29474
29475
29476 void
29477 arm_set_return_address (rtx source, rtx scratch)
29478 {
29479 arm_stack_offsets *offsets;
29480 HOST_WIDE_INT delta;
29481 rtx addr, mem;
29482 unsigned long saved_regs;
29483
29484 offsets = arm_get_frame_offsets ();
29485 saved_regs = offsets->saved_regs_mask;
29486
29487 if ((saved_regs & (1 << LR_REGNUM)) == 0)
29488 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29489 else
29490 {
29491 if (frame_pointer_needed)
29492 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29493 else
29494 {
29495 /* LR will be the first saved register. */
29496 delta = offsets->outgoing_args - (offsets->frame + 4);
29497
29498
29499 if (delta >= 4096)
29500 {
29501 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29502 GEN_INT (delta & ~4095)));
29503 addr = scratch;
29504 delta &= 4095;
29505 }
29506 else
29507 addr = stack_pointer_rtx;
29508
29509 addr = plus_constant (Pmode, addr, delta);
29510 }
29511
29512 /* The store needs to be marked to prevent DSE from deleting
29513 it as dead if it is based on fp. */
29514 mem = gen_frame_mem (Pmode, addr);
29515 MEM_VOLATILE_P (mem) = true;
29516 emit_move_insn (mem, source);
29517 }
29518 }
29519
29520
29521 void
29522 thumb_set_return_address (rtx source, rtx scratch)
29523 {
29524 arm_stack_offsets *offsets;
29525 HOST_WIDE_INT delta;
29526 HOST_WIDE_INT limit;
29527 int reg;
29528 rtx addr, mem;
29529 unsigned long mask;
29530
29531 emit_use (source);
29532
29533 offsets = arm_get_frame_offsets ();
29534 mask = offsets->saved_regs_mask;
29535 if (mask & (1 << LR_REGNUM))
29536 {
29537 limit = 1024;
29538 /* Find the saved regs. */
29539 if (frame_pointer_needed)
29540 {
29541 delta = offsets->soft_frame - offsets->saved_args;
29542 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29543 if (TARGET_THUMB1)
29544 limit = 128;
29545 }
29546 else
29547 {
29548 delta = offsets->outgoing_args - offsets->saved_args;
29549 reg = SP_REGNUM;
29550 }
29551 /* Allow for the stack frame. */
29552 if (TARGET_THUMB1 && TARGET_BACKTRACE)
29553 delta -= 16;
29554 /* The link register is always the first saved register. */
29555 delta -= 4;
29556
29557 /* Construct the address. */
29558 addr = gen_rtx_REG (SImode, reg);
29559 if (delta > limit)
29560 {
29561 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29562 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29563 addr = scratch;
29564 }
29565 else
29566 addr = plus_constant (Pmode, addr, delta);
29567
29568 /* The store needs to be marked to prevent DSE from deleting
29569 it as dead if it is based on fp. */
29570 mem = gen_frame_mem (Pmode, addr);
29571 MEM_VOLATILE_P (mem) = true;
29572 emit_move_insn (mem, source);
29573 }
29574 else
29575 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29576 }
29577
29578 /* Implements target hook vector_mode_supported_p. */
29579 bool
29580 arm_vector_mode_supported_p (machine_mode mode)
29581 {
29582 /* Neon also supports V2SImode, etc. listed in the clause below. */
29583 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29584 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29585 || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29586 || mode == V8BFmode))
29587 return true;
29588
29589 if ((TARGET_NEON || TARGET_IWMMXT)
29590 && ((mode == V2SImode)
29591 || (mode == V4HImode)
29592 || (mode == V8QImode)))
29593 return true;
29594
29595 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29596 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29597 || mode == V2HAmode))
29598 return true;
29599
29600 if (TARGET_HAVE_MVE
29601 && (VALID_MVE_SI_MODE (mode) || VALID_MVE_PRED_MODE (mode)))
29602 return true;
29603
29604 if (TARGET_HAVE_MVE_FLOAT
29605 && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29606 return true;
29607
29608 return false;
29609 }
29610
29611 /* Implements target hook array_mode_supported_p. */
29612
29613 static bool
29614 arm_array_mode_supported_p (machine_mode mode,
29615 unsigned HOST_WIDE_INT nelems)
29616 {
29617 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29618 for now, as the lane-swapping logic needs to be extended in the expanders.
29619 See PR target/82518. */
29620 if (TARGET_NEON && !BYTES_BIG_ENDIAN
29621 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29622 && (nelems >= 2 && nelems <= 4))
29623 return true;
29624
29625 if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29626 && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29627 return true;
29628
29629 return false;
29630 }
29631
29632 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29633 registers when autovectorizing for Neon, at least until multiple vector
29634 widths are supported properly by the middle-end. */
29635
29636 static machine_mode
29637 arm_preferred_simd_mode (scalar_mode mode)
29638 {
29639 if (TARGET_NEON)
29640 switch (mode)
29641 {
29642 case E_HFmode:
29643 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29644 case E_SFmode:
29645 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29646 case E_SImode:
29647 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29648 case E_HImode:
29649 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29650 case E_QImode:
29651 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29652 case E_DImode:
29653 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29654 return V2DImode;
29655 break;
29656
29657 default:;
29658 }
29659
29660 if (TARGET_REALLY_IWMMXT)
29661 switch (mode)
29662 {
29663 case E_SImode:
29664 return V2SImode;
29665 case E_HImode:
29666 return V4HImode;
29667 case E_QImode:
29668 return V8QImode;
29669
29670 default:;
29671 }
29672
29673 if (TARGET_HAVE_MVE)
29674 switch (mode)
29675 {
29676 case E_QImode:
29677 return V16QImode;
29678 case E_HImode:
29679 return V8HImode;
29680 case E_SImode:
29681 return V4SImode;
29682
29683 default:;
29684 }
29685
29686 if (TARGET_HAVE_MVE_FLOAT)
29687 switch (mode)
29688 {
29689 case E_HFmode:
29690 return V8HFmode;
29691 case E_SFmode:
29692 return V4SFmode;
29693
29694 default:;
29695 }
29696
29697 return word_mode;
29698 }
29699
29700 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29701
29702 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29703 using r0-r4 for function arguments, r7 for the stack frame and don't have
29704 enough left over to do doubleword arithmetic. For Thumb-2 all the
29705 potentially problematic instructions accept high registers so this is not
29706 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29707 that require many low registers. */
29708 static bool
29709 arm_class_likely_spilled_p (reg_class_t rclass)
29710 {
29711 if ((TARGET_THUMB1 && rclass == LO_REGS)
29712 || rclass == CC_REG)
29713 return true;
29714
29715 return default_class_likely_spilled_p (rclass);
29716 }
29717
29718 /* Implements target hook small_register_classes_for_mode_p. */
29719 bool
29720 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29721 {
29722 return TARGET_THUMB1;
29723 }
29724
29725 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29726 ARM insns and therefore guarantee that the shift count is modulo 256.
29727 DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29728 guarantee no particular behavior for out-of-range counts. */
29729
29730 static unsigned HOST_WIDE_INT
29731 arm_shift_truncation_mask (machine_mode mode)
29732 {
29733 return mode == SImode ? 255 : 0;
29734 }
29735
29736
29737 /* Map internal gcc register numbers to DWARF2 register numbers. */
29738
29739 unsigned int
29740 arm_debugger_regno (unsigned int regno)
29741 {
29742 if (regno < 16)
29743 return regno;
29744
29745 if (IS_VFP_REGNUM (regno))
29746 {
29747 /* See comment in arm_dwarf_register_span. */
29748 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29749 return 64 + regno - FIRST_VFP_REGNUM;
29750 else
29751 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29752 }
29753
29754 if (IS_IWMMXT_GR_REGNUM (regno))
29755 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29756
29757 if (IS_IWMMXT_REGNUM (regno))
29758 return 112 + regno - FIRST_IWMMXT_REGNUM;
29759
29760 if (IS_PAC_REGNUM (regno))
29761 return DWARF_PAC_REGNUM;
29762
29763 return DWARF_FRAME_REGISTERS;
29764 }
29765
29766 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29767 GCC models tham as 64 32-bit registers, so we need to describe this to
29768 the DWARF generation code. Other registers can use the default. */
29769 static rtx
29770 arm_dwarf_register_span (rtx rtl)
29771 {
29772 machine_mode mode;
29773 unsigned regno;
29774 rtx parts[16];
29775 int nregs;
29776 int i;
29777
29778 regno = REGNO (rtl);
29779 if (!IS_VFP_REGNUM (regno))
29780 return NULL_RTX;
29781
29782 /* XXX FIXME: The EABI defines two VFP register ranges:
29783 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29784 256-287: D0-D31
29785 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29786 corresponding D register. Until GDB supports this, we shall use the
29787 legacy encodings. We also use these encodings for D0-D15 for
29788 compatibility with older debuggers. */
29789 mode = GET_MODE (rtl);
29790 if (GET_MODE_SIZE (mode) < 8)
29791 return NULL_RTX;
29792
29793 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29794 {
29795 nregs = GET_MODE_SIZE (mode) / 4;
29796 for (i = 0; i < nregs; i += 2)
29797 if (TARGET_BIG_END)
29798 {
29799 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29800 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29801 }
29802 else
29803 {
29804 parts[i] = gen_rtx_REG (SImode, regno + i);
29805 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29806 }
29807 }
29808 else
29809 {
29810 nregs = GET_MODE_SIZE (mode) / 8;
29811 for (i = 0; i < nregs; i++)
29812 parts[i] = gen_rtx_REG (DImode, regno + i);
29813 }
29814
29815 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29816 }
29817
29818 #if ARM_UNWIND_INFO
29819 /* Emit unwind directives for a store-multiple instruction or stack pointer
29820 push during alignment.
29821 These should only ever be generated by the function prologue code, so
29822 expect them to have a particular form.
29823 The store-multiple instruction sometimes pushes pc as the last register,
29824 although it should not be tracked into unwind information, or for -Os
29825 sometimes pushes some dummy registers before first register that needs
29826 to be tracked in unwind information; such dummy registers are there just
29827 to avoid separate stack adjustment, and will not be restored in the
29828 epilogue. */
29829
29830 static void
29831 arm_unwind_emit_sequence (FILE * out_file, rtx p)
29832 {
29833 int i;
29834 HOST_WIDE_INT offset;
29835 HOST_WIDE_INT nregs;
29836 int reg_size;
29837 unsigned reg;
29838 unsigned lastreg;
29839 unsigned padfirst = 0, padlast = 0;
29840 rtx e;
29841
29842 e = XVECEXP (p, 0, 0);
29843 gcc_assert (GET_CODE (e) == SET);
29844
29845 /* First insn will adjust the stack pointer. */
29846 gcc_assert (GET_CODE (e) == SET
29847 && REG_P (SET_DEST (e))
29848 && REGNO (SET_DEST (e)) == SP_REGNUM
29849 && GET_CODE (SET_SRC (e)) == PLUS);
29850
29851 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29852 nregs = XVECLEN (p, 0) - 1;
29853 gcc_assert (nregs);
29854
29855 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29856 if (reg < 16 || IS_PAC_REGNUM (reg))
29857 {
29858 /* For -Os dummy registers can be pushed at the beginning to
29859 avoid separate stack pointer adjustment. */
29860 e = XVECEXP (p, 0, 1);
29861 e = XEXP (SET_DEST (e), 0);
29862 if (GET_CODE (e) == PLUS)
29863 padfirst = INTVAL (XEXP (e, 1));
29864 gcc_assert (padfirst == 0 || optimize_size);
29865 /* The function prologue may also push pc, but not annotate it as it is
29866 never restored. We turn this into a stack pointer adjustment. */
29867 e = XVECEXP (p, 0, nregs);
29868 e = XEXP (SET_DEST (e), 0);
29869 if (GET_CODE (e) == PLUS)
29870 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29871 else
29872 padlast = offset - 4;
29873 gcc_assert (padlast == 0 || padlast == 4);
29874 if (padlast == 4)
29875 fprintf (out_file, "\t.pad #4\n");
29876 reg_size = 4;
29877 fprintf (out_file, "\t.save {");
29878 }
29879 else if (IS_VFP_REGNUM (reg))
29880 {
29881 reg_size = 8;
29882 fprintf (out_file, "\t.vsave {");
29883 }
29884 else
29885 /* Unknown register type. */
29886 gcc_unreachable ();
29887
29888 /* If the stack increment doesn't match the size of the saved registers,
29889 something has gone horribly wrong. */
29890 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29891
29892 offset = padfirst;
29893 lastreg = 0;
29894 /* The remaining insns will describe the stores. */
29895 for (i = 1; i <= nregs; i++)
29896 {
29897 /* Expect (set (mem <addr>) (reg)).
29898 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29899 e = XVECEXP (p, 0, i);
29900 gcc_assert (GET_CODE (e) == SET
29901 && MEM_P (SET_DEST (e))
29902 && REG_P (SET_SRC (e)));
29903
29904 reg = REGNO (SET_SRC (e));
29905 gcc_assert (reg >= lastreg);
29906
29907 if (i != 1)
29908 fprintf (out_file, ", ");
29909 /* We can't use %r for vfp because we need to use the
29910 double precision register names. */
29911 if (IS_VFP_REGNUM (reg))
29912 asm_fprintf (out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29913 else if (IS_PAC_REGNUM (reg))
29914 asm_fprintf (asm_out_file, "ra_auth_code");
29915 else
29916 asm_fprintf (out_file, "%r", reg);
29917
29918 if (flag_checking)
29919 {
29920 /* Check that the addresses are consecutive. */
29921 e = XEXP (SET_DEST (e), 0);
29922 if (GET_CODE (e) == PLUS)
29923 gcc_assert (REG_P (XEXP (e, 0))
29924 && REGNO (XEXP (e, 0)) == SP_REGNUM
29925 && CONST_INT_P (XEXP (e, 1))
29926 && offset == INTVAL (XEXP (e, 1)));
29927 else
29928 gcc_assert (i == 1
29929 && REG_P (e)
29930 && REGNO (e) == SP_REGNUM);
29931 offset += reg_size;
29932 }
29933 }
29934 fprintf (out_file, "}\n");
29935 if (padfirst)
29936 fprintf (out_file, "\t.pad #%d\n", padfirst);
29937 }
29938
29939 /* Emit unwind directives for a SET. */
29940
29941 static void
29942 arm_unwind_emit_set (FILE * out_file, rtx p)
29943 {
29944 rtx e0;
29945 rtx e1;
29946 unsigned reg;
29947
29948 e0 = XEXP (p, 0);
29949 e1 = XEXP (p, 1);
29950 switch (GET_CODE (e0))
29951 {
29952 case MEM:
29953 /* Pushing a single register. */
29954 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29955 || !REG_P (XEXP (XEXP (e0, 0), 0))
29956 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29957 abort ();
29958
29959 asm_fprintf (out_file, "\t.save ");
29960 if (IS_VFP_REGNUM (REGNO (e1)))
29961 asm_fprintf(out_file, "{d%d}\n",
29962 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29963 else
29964 asm_fprintf(out_file, "{%r}\n", REGNO (e1));
29965 break;
29966
29967 case REG:
29968 if (REGNO (e0) == SP_REGNUM)
29969 {
29970 /* A stack increment. */
29971 if (GET_CODE (e1) != PLUS
29972 || !REG_P (XEXP (e1, 0))
29973 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29974 || !CONST_INT_P (XEXP (e1, 1)))
29975 abort ();
29976
29977 asm_fprintf (out_file, "\t.pad #%wd\n",
29978 -INTVAL (XEXP (e1, 1)));
29979 }
29980 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29981 {
29982 HOST_WIDE_INT offset;
29983
29984 if (GET_CODE (e1) == PLUS)
29985 {
29986 if (!REG_P (XEXP (e1, 0))
29987 || !CONST_INT_P (XEXP (e1, 1)))
29988 abort ();
29989 reg = REGNO (XEXP (e1, 0));
29990 offset = INTVAL (XEXP (e1, 1));
29991 asm_fprintf (out_file, "\t.setfp %r, %r, #%wd\n",
29992 HARD_FRAME_POINTER_REGNUM, reg,
29993 offset);
29994 }
29995 else if (REG_P (e1))
29996 {
29997 reg = REGNO (e1);
29998 asm_fprintf (out_file, "\t.setfp %r, %r\n",
29999 HARD_FRAME_POINTER_REGNUM, reg);
30000 }
30001 else
30002 abort ();
30003 }
30004 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
30005 {
30006 /* Move from sp to reg. */
30007 asm_fprintf (out_file, "\t.movsp %r\n", REGNO (e0));
30008 }
30009 else if (GET_CODE (e1) == PLUS
30010 && REG_P (XEXP (e1, 0))
30011 && REGNO (XEXP (e1, 0)) == SP_REGNUM
30012 && CONST_INT_P (XEXP (e1, 1)))
30013 {
30014 /* Set reg to offset from sp. */
30015 asm_fprintf (out_file, "\t.movsp %r, #%d\n",
30016 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
30017 }
30018 else if (REGNO (e0) == IP_REGNUM && arm_current_function_pac_enabled_p ())
30019 {
30020 if (cfun->machine->pacspval_needed)
30021 asm_fprintf (out_file, "\t.pacspval\n");
30022 }
30023 else
30024 abort ();
30025 break;
30026
30027 default:
30028 abort ();
30029 }
30030 }
30031
30032
30033 /* Emit unwind directives for the given insn. */
30034
30035 static void
30036 arm_unwind_emit (FILE * out_file, rtx_insn *insn)
30037 {
30038 rtx note, pat;
30039 bool handled_one = false;
30040
30041 if (arm_except_unwind_info (&global_options) != UI_TARGET)
30042 return;
30043
30044 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30045 && (TREE_NOTHROW (current_function_decl)
30046 || crtl->all_throwers_are_sibcalls))
30047 return;
30048
30049 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
30050 return;
30051
30052 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
30053 {
30054 switch (REG_NOTE_KIND (note))
30055 {
30056 case REG_FRAME_RELATED_EXPR:
30057 pat = XEXP (note, 0);
30058 goto found;
30059
30060 case REG_CFA_REGISTER:
30061 pat = XEXP (note, 0);
30062 if (pat == NULL)
30063 {
30064 pat = PATTERN (insn);
30065 if (GET_CODE (pat) == PARALLEL)
30066 pat = XVECEXP (pat, 0, 0);
30067 }
30068
30069 /* Only emitted for IS_STACKALIGN re-alignment. */
30070 {
30071 rtx dest, src;
30072 unsigned reg;
30073
30074 src = SET_SRC (pat);
30075 dest = SET_DEST (pat);
30076
30077 gcc_assert (src == stack_pointer_rtx
30078 || IS_PAC_REGNUM (REGNO (src)));
30079 reg = REGNO (dest);
30080
30081 if (IS_PAC_REGNUM (REGNO (src)))
30082 arm_unwind_emit_set (out_file, PATTERN (insn));
30083 else
30084 asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
30085 reg + 0x90, reg);
30086 }
30087 handled_one = true;
30088 break;
30089
30090 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
30091 to get correct dwarf information for shrink-wrap. We should not
30092 emit unwind information for it because these are used either for
30093 pretend arguments or notes to adjust sp and restore registers from
30094 stack. */
30095 case REG_CFA_DEF_CFA:
30096 case REG_CFA_ADJUST_CFA:
30097 case REG_CFA_RESTORE:
30098 return;
30099
30100 case REG_CFA_EXPRESSION:
30101 case REG_CFA_OFFSET:
30102 /* ??? Only handling here what we actually emit. */
30103 gcc_unreachable ();
30104
30105 default:
30106 break;
30107 }
30108 }
30109 if (handled_one)
30110 return;
30111 pat = PATTERN (insn);
30112 found:
30113
30114 switch (GET_CODE (pat))
30115 {
30116 case SET:
30117 arm_unwind_emit_set (out_file, pat);
30118 break;
30119
30120 case SEQUENCE:
30121 /* Store multiple. */
30122 arm_unwind_emit_sequence (out_file, pat);
30123 break;
30124
30125 default:
30126 abort();
30127 }
30128 }
30129
30130
30131 /* Output a reference from a function exception table to the type_info
30132 object X. The EABI specifies that the symbol should be relocated by
30133 an R_ARM_TARGET2 relocation. */
30134
30135 static bool
30136 arm_output_ttype (rtx x)
30137 {
30138 fputs ("\t.word\t", asm_out_file);
30139 output_addr_const (asm_out_file, x);
30140 /* Use special relocations for symbol references. */
30141 if (!CONST_INT_P (x))
30142 fputs ("(TARGET2)", asm_out_file);
30143 fputc ('\n', asm_out_file);
30144
30145 return TRUE;
30146 }
30147
30148 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
30149
30150 static void
30151 arm_asm_emit_except_personality (rtx personality)
30152 {
30153 fputs ("\t.personality\t", asm_out_file);
30154 output_addr_const (asm_out_file, personality);
30155 fputc ('\n', asm_out_file);
30156 }
30157 #endif /* ARM_UNWIND_INFO */
30158
30159 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
30160
30161 static void
30162 arm_asm_init_sections (void)
30163 {
30164 #if ARM_UNWIND_INFO
30165 exception_section = get_unnamed_section (0, output_section_asm_op,
30166 "\t.handlerdata");
30167 #endif /* ARM_UNWIND_INFO */
30168
30169 #ifdef OBJECT_FORMAT_ELF
30170 if (target_pure_code)
30171 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
30172 #endif
30173 }
30174
30175 /* Output unwind directives for the start/end of a function. */
30176
30177 void
30178 arm_output_fn_unwind (FILE * f, bool prologue)
30179 {
30180 if (arm_except_unwind_info (&global_options) != UI_TARGET)
30181 return;
30182
30183 if (prologue)
30184 fputs ("\t.fnstart\n", f);
30185 else
30186 {
30187 /* If this function will never be unwound, then mark it as such.
30188 The came condition is used in arm_unwind_emit to suppress
30189 the frame annotations. */
30190 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30191 && (TREE_NOTHROW (current_function_decl)
30192 || crtl->all_throwers_are_sibcalls))
30193 fputs("\t.cantunwind\n", f);
30194
30195 fputs ("\t.fnend\n", f);
30196 }
30197 }
30198
30199 static bool
30200 arm_emit_tls_decoration (FILE *fp, rtx x)
30201 {
30202 enum tls_reloc reloc;
30203 rtx val;
30204
30205 val = XVECEXP (x, 0, 0);
30206 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
30207
30208 output_addr_const (fp, val);
30209
30210 switch (reloc)
30211 {
30212 case TLS_GD32:
30213 fputs ("(tlsgd)", fp);
30214 break;
30215 case TLS_GD32_FDPIC:
30216 fputs ("(tlsgd_fdpic)", fp);
30217 break;
30218 case TLS_LDM32:
30219 fputs ("(tlsldm)", fp);
30220 break;
30221 case TLS_LDM32_FDPIC:
30222 fputs ("(tlsldm_fdpic)", fp);
30223 break;
30224 case TLS_LDO32:
30225 fputs ("(tlsldo)", fp);
30226 break;
30227 case TLS_IE32:
30228 fputs ("(gottpoff)", fp);
30229 break;
30230 case TLS_IE32_FDPIC:
30231 fputs ("(gottpoff_fdpic)", fp);
30232 break;
30233 case TLS_LE32:
30234 fputs ("(tpoff)", fp);
30235 break;
30236 case TLS_DESCSEQ:
30237 fputs ("(tlsdesc)", fp);
30238 break;
30239 default:
30240 gcc_unreachable ();
30241 }
30242
30243 switch (reloc)
30244 {
30245 case TLS_GD32:
30246 case TLS_LDM32:
30247 case TLS_IE32:
30248 case TLS_DESCSEQ:
30249 fputs (" + (. - ", fp);
30250 output_addr_const (fp, XVECEXP (x, 0, 2));
30251 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30252 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
30253 output_addr_const (fp, XVECEXP (x, 0, 3));
30254 fputc (')', fp);
30255 break;
30256 default:
30257 break;
30258 }
30259
30260 return TRUE;
30261 }
30262
30263 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
30264
30265 static void
30266 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
30267 {
30268 gcc_assert (size == 4);
30269 fputs ("\t.word\t", file);
30270 output_addr_const (file, x);
30271 fputs ("(tlsldo)", file);
30272 }
30273
30274 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
30275
30276 static bool
30277 arm_output_addr_const_extra (FILE *fp, rtx x)
30278 {
30279 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
30280 return arm_emit_tls_decoration (fp, x);
30281 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
30282 {
30283 char label[256];
30284 int labelno = INTVAL (XVECEXP (x, 0, 0));
30285
30286 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
30287 assemble_name_raw (fp, label);
30288
30289 return TRUE;
30290 }
30291 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
30292 {
30293 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
30294 if (GOT_PCREL)
30295 fputs ("+.", fp);
30296 fputs ("-(", fp);
30297 output_addr_const (fp, XVECEXP (x, 0, 0));
30298 fputc (')', fp);
30299 return TRUE;
30300 }
30301 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
30302 {
30303 output_addr_const (fp, XVECEXP (x, 0, 0));
30304 if (GOT_PCREL)
30305 fputs ("+.", fp);
30306 fputs ("-(", fp);
30307 output_addr_const (fp, XVECEXP (x, 0, 1));
30308 fputc (')', fp);
30309 return TRUE;
30310 }
30311 else if (GET_CODE (x) == CONST_VECTOR)
30312 return arm_emit_vector_const (fp, x);
30313
30314 return FALSE;
30315 }
30316
30317 /* Output assembly for a shift instruction.
30318 SET_FLAGS determines how the instruction modifies the condition codes.
30319 0 - Do not set condition codes.
30320 1 - Set condition codes.
30321 2 - Use smallest instruction. */
30322 const char *
30323 arm_output_shift(rtx * operands, int set_flags)
30324 {
30325 char pattern[100];
30326 static const char flag_chars[3] = {'?', '.', '!'};
30327 const char *shift;
30328 HOST_WIDE_INT val;
30329 char c;
30330
30331 c = flag_chars[set_flags];
30332 shift = shift_op(operands[3], &val);
30333 if (shift)
30334 {
30335 if (val != -1)
30336 operands[2] = GEN_INT(val);
30337 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
30338 }
30339 else
30340 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
30341
30342 output_asm_insn (pattern, operands);
30343 return "";
30344 }
30345
30346 /* Output assembly for a WMMX immediate shift instruction. */
30347 const char *
30348 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
30349 {
30350 int shift = INTVAL (operands[2]);
30351 char templ[50];
30352 machine_mode opmode = GET_MODE (operands[0]);
30353
30354 gcc_assert (shift >= 0);
30355
30356 /* If the shift value in the register versions is > 63 (for D qualifier),
30357 31 (for W qualifier) or 15 (for H qualifier). */
30358 if (((opmode == V4HImode) && (shift > 15))
30359 || ((opmode == V2SImode) && (shift > 31))
30360 || ((opmode == DImode) && (shift > 63)))
30361 {
30362 if (wror_or_wsra)
30363 {
30364 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30365 output_asm_insn (templ, operands);
30366 if (opmode == DImode)
30367 {
30368 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
30369 output_asm_insn (templ, operands);
30370 }
30371 }
30372 else
30373 {
30374 /* The destination register will contain all zeros. */
30375 sprintf (templ, "wzero\t%%0");
30376 output_asm_insn (templ, operands);
30377 }
30378 return "";
30379 }
30380
30381 if ((opmode == DImode) && (shift > 32))
30382 {
30383 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30384 output_asm_insn (templ, operands);
30385 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
30386 output_asm_insn (templ, operands);
30387 }
30388 else
30389 {
30390 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
30391 output_asm_insn (templ, operands);
30392 }
30393 return "";
30394 }
30395
30396 /* Output assembly for a WMMX tinsr instruction. */
30397 const char *
30398 arm_output_iwmmxt_tinsr (rtx *operands)
30399 {
30400 int mask = INTVAL (operands[3]);
30401 int i;
30402 char templ[50];
30403 int units = mode_nunits[GET_MODE (operands[0])];
30404 gcc_assert ((mask & (mask - 1)) == 0);
30405 for (i = 0; i < units; ++i)
30406 {
30407 if ((mask & 0x01) == 1)
30408 {
30409 break;
30410 }
30411 mask >>= 1;
30412 }
30413 gcc_assert (i < units);
30414 {
30415 switch (GET_MODE (operands[0]))
30416 {
30417 case E_V8QImode:
30418 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
30419 break;
30420 case E_V4HImode:
30421 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
30422 break;
30423 case E_V2SImode:
30424 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
30425 break;
30426 default:
30427 gcc_unreachable ();
30428 break;
30429 }
30430 output_asm_insn (templ, operands);
30431 }
30432 return "";
30433 }
30434
30435 /* Output a Thumb-1 casesi dispatch sequence. */
30436 const char *
30437 thumb1_output_casesi (rtx *operands)
30438 {
30439 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
30440
30441 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30442
30443 switch (GET_MODE(diff_vec))
30444 {
30445 case E_QImode:
30446 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30447 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30448 case E_HImode:
30449 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30450 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30451 case E_SImode:
30452 return "bl\t%___gnu_thumb1_case_si";
30453 default:
30454 gcc_unreachable ();
30455 }
30456 }
30457
30458 /* Output a Thumb-2 casesi instruction. */
30459 const char *
30460 thumb2_output_casesi (rtx *operands)
30461 {
30462 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30463
30464 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30465
30466 output_asm_insn ("cmp\t%0, %1", operands);
30467 output_asm_insn ("bhi\t%l3", operands);
30468 switch (GET_MODE(diff_vec))
30469 {
30470 case E_QImode:
30471 return "tbb\t[%|pc, %0]";
30472 case E_HImode:
30473 return "tbh\t[%|pc, %0, lsl #1]";
30474 case E_SImode:
30475 if (flag_pic)
30476 {
30477 output_asm_insn ("adr\t%4, %l2", operands);
30478 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
30479 output_asm_insn ("add\t%4, %4, %5", operands);
30480 return "bx\t%4";
30481 }
30482 else
30483 {
30484 output_asm_insn ("adr\t%4, %l2", operands);
30485 return "ldr\t%|pc, [%4, %0, lsl #2]";
30486 }
30487 default:
30488 gcc_unreachable ();
30489 }
30490 }
30491
30492 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
30493 per-core tuning structs. */
30494 static int
30495 arm_issue_rate (void)
30496 {
30497 return current_tune->issue_rate;
30498 }
30499
30500 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
30501 static int
30502 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30503 {
30504 if (DEBUG_INSN_P (insn))
30505 return more;
30506
30507 rtx_code code = GET_CODE (PATTERN (insn));
30508 if (code == USE || code == CLOBBER)
30509 return more;
30510
30511 if (get_attr_type (insn) == TYPE_NO_INSN)
30512 return more;
30513
30514 return more - 1;
30515 }
30516
30517 /* Return how many instructions should scheduler lookahead to choose the
30518 best one. */
30519 static int
30520 arm_first_cycle_multipass_dfa_lookahead (void)
30521 {
30522 int issue_rate = arm_issue_rate ();
30523
30524 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30525 }
30526
30527 /* Enable modeling of L2 auto-prefetcher. */
30528 static int
30529 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30530 {
30531 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30532 }
30533
30534 const char *
30535 arm_mangle_type (const_tree type)
30536 {
30537 /* The ARM ABI documents (10th October 2008) say that "__va_list"
30538 has to be managled as if it is in the "std" namespace. */
30539 if (TARGET_AAPCS_BASED
30540 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30541 return "St9__va_list";
30542
30543 /* Half-precision floating point types. */
30544 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
30545 {
30546 if (TYPE_MAIN_VARIANT (type) == float16_type_node)
30547 return NULL;
30548 if (TYPE_MODE (type) == BFmode)
30549 return "u6__bf16";
30550 else
30551 return "Dh";
30552 }
30553
30554 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30555 builtin type. */
30556 if (TYPE_NAME (type) != NULL)
30557 return arm_mangle_builtin_type (type);
30558
30559 /* Use the default mangling. */
30560 return NULL;
30561 }
30562
30563 /* Order of allocation of core registers for Thumb: this allocation is
30564 written over the corresponding initial entries of the array
30565 initialized with REG_ALLOC_ORDER. We allocate all low registers
30566 first. Saving and restoring a low register is usually cheaper than
30567 using a call-clobbered high register. */
30568
30569 static const int thumb_core_reg_alloc_order[] =
30570 {
30571 3, 2, 1, 0, 4, 5, 6, 7,
30572 12, 14, 8, 9, 10, 11
30573 };
30574
30575 /* Adjust register allocation order when compiling for Thumb. */
30576
30577 void
30578 arm_order_regs_for_local_alloc (void)
30579 {
30580 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30581 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30582 if (TARGET_THUMB)
30583 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30584 sizeof (thumb_core_reg_alloc_order));
30585 }
30586
30587 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30588
30589 bool
30590 arm_frame_pointer_required (void)
30591 {
30592 if (SUBTARGET_FRAME_POINTER_REQUIRED)
30593 return true;
30594
30595 /* If the function receives nonlocal gotos, it needs to save the frame
30596 pointer in the nonlocal_goto_save_area object. */
30597 if (cfun->has_nonlocal_label)
30598 return true;
30599
30600 /* The frame pointer is required for non-leaf APCS frames. */
30601 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30602 return true;
30603
30604 /* If we are probing the stack in the prologue, we will have a faulting
30605 instruction prior to the stack adjustment and this requires a frame
30606 pointer if we want to catch the exception using the EABI unwinder. */
30607 if (!IS_INTERRUPT (arm_current_func_type ())
30608 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30609 || flag_stack_clash_protection)
30610 && arm_except_unwind_info (&global_options) == UI_TARGET
30611 && cfun->can_throw_non_call_exceptions)
30612 {
30613 HOST_WIDE_INT size = get_frame_size ();
30614
30615 /* That's irrelevant if there is no stack adjustment. */
30616 if (size <= 0)
30617 return false;
30618
30619 /* That's relevant only if there is a stack probe. */
30620 if (crtl->is_leaf && !cfun->calls_alloca)
30621 {
30622 /* We don't have the final size of the frame so adjust. */
30623 size += 32 * UNITS_PER_WORD;
30624 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30625 return true;
30626 }
30627 else
30628 return true;
30629 }
30630
30631 return false;
30632 }
30633
30634 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30635 All modes except THUMB1 have conditional execution.
30636 If we have conditional arithmetic, return false before reload to
30637 enable some ifcvt transformations. */
30638 static bool
30639 arm_have_conditional_execution (void)
30640 {
30641 bool has_cond_exec, enable_ifcvt_trans;
30642
30643 /* Only THUMB1 cannot support conditional execution. */
30644 has_cond_exec = !TARGET_THUMB1;
30645
30646 /* Enable ifcvt transformations if we have conditional arithmetic, but only
30647 before reload. */
30648 enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30649
30650 return has_cond_exec && !enable_ifcvt_trans;
30651 }
30652
30653 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30654 static HOST_WIDE_INT
30655 arm_vector_alignment (const_tree type)
30656 {
30657 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30658
30659 if (TARGET_AAPCS_BASED)
30660 align = MIN (align, 64);
30661
30662 return align;
30663 }
30664
30665 static unsigned int
30666 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30667 {
30668 if (!TARGET_NEON_VECTORIZE_DOUBLE)
30669 {
30670 modes->safe_push (V16QImode);
30671 modes->safe_push (V8QImode);
30672 }
30673 return 0;
30674 }
30675
30676 static bool
30677 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30678 {
30679 /* Vectors which aren't in packed structures will not be less aligned than
30680 the natural alignment of their element type, so this is safe. */
30681 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30682 return !is_packed;
30683
30684 return default_builtin_vector_alignment_reachable (type, is_packed);
30685 }
30686
30687 static bool
30688 arm_builtin_support_vector_misalignment (machine_mode mode,
30689 const_tree type, int misalignment,
30690 bool is_packed)
30691 {
30692 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30693 {
30694 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30695
30696 if (is_packed)
30697 return align == 1;
30698
30699 /* If the misalignment is unknown, we should be able to handle the access
30700 so long as it is not to a member of a packed data structure. */
30701 if (misalignment == -1)
30702 return true;
30703
30704 /* Return true if the misalignment is a multiple of the natural alignment
30705 of the vector's element type. This is probably always going to be
30706 true in practice, since we've already established that this isn't a
30707 packed access. */
30708 return ((misalignment % align) == 0);
30709 }
30710
30711 return default_builtin_support_vector_misalignment (mode, type, misalignment,
30712 is_packed);
30713 }
30714
30715 static void
30716 arm_conditional_register_usage (void)
30717 {
30718 int regno;
30719
30720 if (TARGET_THUMB1 && optimize_size)
30721 {
30722 /* When optimizing for size on Thumb-1, it's better not
30723 to use the HI regs, because of the overhead of
30724 stacking them. */
30725 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30726 fixed_regs[regno] = call_used_regs[regno] = 1;
30727 }
30728
30729 /* The link register can be clobbered by any branch insn,
30730 but we have no way to track that at present, so mark
30731 it as unavailable. */
30732 if (TARGET_THUMB1)
30733 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30734
30735 if (TARGET_32BIT && TARGET_VFP_BASE)
30736 {
30737 /* VFPv3 registers are disabled when earlier VFP
30738 versions are selected due to the definition of
30739 LAST_VFP_REGNUM. */
30740 for (regno = FIRST_VFP_REGNUM;
30741 regno <= LAST_VFP_REGNUM; ++ regno)
30742 {
30743 fixed_regs[regno] = 0;
30744 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30745 || regno >= FIRST_VFP_REGNUM + 32;
30746 }
30747 if (TARGET_HAVE_MVE)
30748 fixed_regs[VPR_REGNUM] = 0;
30749 }
30750
30751 if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30752 {
30753 regno = FIRST_IWMMXT_GR_REGNUM;
30754 /* The 2002/10/09 revision of the XScale ABI has wCG0
30755 and wCG1 as call-preserved registers. The 2002/11/21
30756 revision changed this so that all wCG registers are
30757 scratch registers. */
30758 for (regno = FIRST_IWMMXT_GR_REGNUM;
30759 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30760 fixed_regs[regno] = 0;
30761 /* The XScale ABI has wR0 - wR9 as scratch registers,
30762 the rest as call-preserved registers. */
30763 for (regno = FIRST_IWMMXT_REGNUM;
30764 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30765 {
30766 fixed_regs[regno] = 0;
30767 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30768 }
30769 }
30770
30771 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30772 {
30773 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30774 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30775 }
30776 else if (TARGET_APCS_STACK)
30777 {
30778 fixed_regs[10] = 1;
30779 call_used_regs[10] = 1;
30780 }
30781 /* -mcaller-super-interworking reserves r11 for calls to
30782 _interwork_r11_call_via_rN(). Making the register global
30783 is an easy way of ensuring that it remains valid for all
30784 calls. */
30785 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30786 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30787 {
30788 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30789 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30790 if (TARGET_CALLER_INTERWORKING)
30791 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30792 }
30793
30794 /* The Q and GE bits are only accessed via special ACLE patterns. */
30795 CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30796 CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30797
30798 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30799 }
30800
30801 static reg_class_t
30802 arm_preferred_rename_class (reg_class_t rclass)
30803 {
30804 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30805 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30806 and code size can be reduced. */
30807 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30808 return LO_REGS;
30809 else
30810 return NO_REGS;
30811 }
30812
30813 /* Compute the attribute "length" of insn "*push_multi".
30814 So this function MUST be kept in sync with that insn pattern. */
30815 int
30816 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30817 {
30818 int i, regno, hi_reg;
30819 int num_saves = XVECLEN (parallel_op, 0);
30820
30821 /* ARM mode. */
30822 if (TARGET_ARM)
30823 return 4;
30824 /* Thumb1 mode. */
30825 if (TARGET_THUMB1)
30826 return 2;
30827
30828 /* Thumb2 mode. */
30829 regno = REGNO (first_op);
30830 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30831 list is 8-bit. Normally this means all registers in the list must be
30832 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
30833 encodings. There is one exception for PUSH that LR in HI_REGS can be used
30834 with 16-bit encoding. */
30835 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30836 for (i = 1; i < num_saves && !hi_reg; i++)
30837 {
30838 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30839 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30840 }
30841
30842 if (!hi_reg)
30843 return 2;
30844 return 4;
30845 }
30846
30847 /* Compute the attribute "length" of insn. Currently, this function is used
30848 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30849 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
30850 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
30851 true if OPERANDS contains insn which explicit updates base register. */
30852
30853 int
30854 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30855 {
30856 /* ARM mode. */
30857 if (TARGET_ARM)
30858 return 4;
30859 /* Thumb1 mode. */
30860 if (TARGET_THUMB1)
30861 return 2;
30862
30863 rtx parallel_op = operands[0];
30864 /* Initialize to elements number of PARALLEL. */
30865 unsigned indx = XVECLEN (parallel_op, 0) - 1;
30866 /* Initialize the value to base register. */
30867 unsigned regno = REGNO (operands[1]);
30868 /* Skip return and write back pattern.
30869 We only need register pop pattern for later analysis. */
30870 unsigned first_indx = 0;
30871 first_indx += return_pc ? 1 : 0;
30872 first_indx += write_back_p ? 1 : 0;
30873
30874 /* A pop operation can be done through LDM or POP. If the base register is SP
30875 and if it's with write back, then a LDM will be alias of POP. */
30876 bool pop_p = (regno == SP_REGNUM && write_back_p);
30877 bool ldm_p = !pop_p;
30878
30879 /* Check base register for LDM. */
30880 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30881 return 4;
30882
30883 /* Check each register in the list. */
30884 for (; indx >= first_indx; indx--)
30885 {
30886 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30887 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
30888 comment in arm_attr_length_push_multi. */
30889 if (REGNO_REG_CLASS (regno) == HI_REGS
30890 && (regno != PC_REGNUM || ldm_p))
30891 return 4;
30892 }
30893
30894 return 2;
30895 }
30896
30897 /* Compute the number of instructions emitted by output_move_double. */
30898 int
30899 arm_count_output_move_double_insns (rtx *operands)
30900 {
30901 int count;
30902 rtx ops[2];
30903 /* output_move_double may modify the operands array, so call it
30904 here on a copy of the array. */
30905 ops[0] = operands[0];
30906 ops[1] = operands[1];
30907 output_move_double (ops, false, &count);
30908 return count;
30909 }
30910
30911 /* Same as above, but operands are a register/memory pair in SImode.
30912 Assumes operands has the base register in position 0 and memory in position
30913 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
30914 int
30915 arm_count_ldrdstrd_insns (rtx *operands, bool load)
30916 {
30917 int count;
30918 rtx ops[2];
30919 int regnum, memnum;
30920 if (load)
30921 regnum = 0, memnum = 1;
30922 else
30923 regnum = 1, memnum = 0;
30924 ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
30925 ops[memnum] = adjust_address (operands[2], DImode, 0);
30926 output_move_double (ops, false, &count);
30927 return count;
30928 }
30929
30930
30931 int
30932 vfp3_const_double_for_fract_bits (rtx operand)
30933 {
30934 REAL_VALUE_TYPE r0;
30935
30936 if (!CONST_DOUBLE_P (operand))
30937 return 0;
30938
30939 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
30940 if (exact_real_inverse (DFmode, &r0)
30941 && !REAL_VALUE_NEGATIVE (r0))
30942 {
30943 if (exact_real_truncate (DFmode, &r0))
30944 {
30945 HOST_WIDE_INT value = real_to_integer (&r0);
30946 value = value & 0xffffffff;
30947 if ((value != 0) && ( (value & (value - 1)) == 0))
30948 {
30949 int ret = exact_log2 (value);
30950 gcc_assert (IN_RANGE (ret, 0, 31));
30951 return ret;
30952 }
30953 }
30954 }
30955 return 0;
30956 }
30957
30958 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30959 log2 is in [1, 32], return that log2. Otherwise return -1.
30960 This is used in the patterns for vcvt.s32.f32 floating-point to
30961 fixed-point conversions. */
30962
30963 int
30964 vfp3_const_double_for_bits (rtx x)
30965 {
30966 const REAL_VALUE_TYPE *r;
30967
30968 if (!CONST_DOUBLE_P (x))
30969 return -1;
30970
30971 r = CONST_DOUBLE_REAL_VALUE (x);
30972
30973 if (REAL_VALUE_NEGATIVE (*r)
30974 || REAL_VALUE_ISNAN (*r)
30975 || REAL_VALUE_ISINF (*r)
30976 || !real_isinteger (r, SFmode))
30977 return -1;
30978
30979 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
30980
30981 /* The exact_log2 above will have returned -1 if this is
30982 not an exact log2. */
30983 if (!IN_RANGE (hwint, 1, 32))
30984 return -1;
30985
30986 return hwint;
30987 }
30988
30989 \f
30990 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30991
30992 static void
30993 arm_pre_atomic_barrier (enum memmodel model)
30994 {
30995 if (need_atomic_barrier_p (model, true))
30996 emit_insn (gen_memory_barrier ());
30997 }
30998
30999 static void
31000 arm_post_atomic_barrier (enum memmodel model)
31001 {
31002 if (need_atomic_barrier_p (model, false))
31003 emit_insn (gen_memory_barrier ());
31004 }
31005
31006 /* Emit the load-exclusive and store-exclusive instructions.
31007 Use acquire and release versions if necessary. */
31008
31009 static void
31010 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
31011 {
31012 rtx (*gen) (rtx, rtx);
31013
31014 if (acq)
31015 {
31016 switch (mode)
31017 {
31018 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
31019 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
31020 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
31021 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
31022 default:
31023 gcc_unreachable ();
31024 }
31025 }
31026 else
31027 {
31028 switch (mode)
31029 {
31030 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
31031 case E_HImode: gen = gen_arm_load_exclusivehi; break;
31032 case E_SImode: gen = gen_arm_load_exclusivesi; break;
31033 case E_DImode: gen = gen_arm_load_exclusivedi; break;
31034 default:
31035 gcc_unreachable ();
31036 }
31037 }
31038
31039 emit_insn (gen (rval, mem));
31040 }
31041
31042 static void
31043 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
31044 rtx mem, bool rel)
31045 {
31046 rtx (*gen) (rtx, rtx, rtx);
31047
31048 if (rel)
31049 {
31050 switch (mode)
31051 {
31052 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
31053 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
31054 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
31055 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
31056 default:
31057 gcc_unreachable ();
31058 }
31059 }
31060 else
31061 {
31062 switch (mode)
31063 {
31064 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
31065 case E_HImode: gen = gen_arm_store_exclusivehi; break;
31066 case E_SImode: gen = gen_arm_store_exclusivesi; break;
31067 case E_DImode: gen = gen_arm_store_exclusivedi; break;
31068 default:
31069 gcc_unreachable ();
31070 }
31071 }
31072
31073 emit_insn (gen (bval, rval, mem));
31074 }
31075
31076 /* Mark the previous jump instruction as unlikely. */
31077
31078 static void
31079 emit_unlikely_jump (rtx insn)
31080 {
31081 rtx_insn *jump = emit_jump_insn (insn);
31082 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
31083 }
31084
31085 /* Expand a compare and swap pattern. */
31086
31087 void
31088 arm_expand_compare_and_swap (rtx operands[])
31089 {
31090 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
31091 machine_mode mode, cmp_mode;
31092
31093 bval = operands[0];
31094 rval = operands[1];
31095 mem = operands[2];
31096 oldval = operands[3];
31097 newval = operands[4];
31098 is_weak = operands[5];
31099 mod_s = operands[6];
31100 mod_f = operands[7];
31101 mode = GET_MODE (mem);
31102
31103 /* Normally the succ memory model must be stronger than fail, but in the
31104 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
31105 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
31106
31107 if (TARGET_HAVE_LDACQ
31108 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
31109 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
31110 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
31111
31112 switch (mode)
31113 {
31114 case E_QImode:
31115 case E_HImode:
31116 /* For narrow modes, we're going to perform the comparison in SImode,
31117 so do the zero-extension now. */
31118 rval = gen_reg_rtx (SImode);
31119 oldval = convert_modes (SImode, mode, oldval, true);
31120 /* FALLTHRU */
31121
31122 case E_SImode:
31123 /* Force the value into a register if needed. We waited until after
31124 the zero-extension above to do this properly. */
31125 if (!arm_add_operand (oldval, SImode))
31126 oldval = force_reg (SImode, oldval);
31127 break;
31128
31129 case E_DImode:
31130 if (!cmpdi_operand (oldval, mode))
31131 oldval = force_reg (mode, oldval);
31132 break;
31133
31134 default:
31135 gcc_unreachable ();
31136 }
31137
31138 if (TARGET_THUMB1)
31139 cmp_mode = E_SImode;
31140 else
31141 cmp_mode = CC_Zmode;
31142
31143 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
31144 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
31145 oldval, newval, is_weak, mod_s, mod_f));
31146
31147 if (mode == QImode || mode == HImode)
31148 emit_move_insn (operands[1], gen_lowpart (mode, rval));
31149
31150 /* In all cases, we arrange for success to be signaled by Z set.
31151 This arrangement allows for the boolean result to be used directly
31152 in a subsequent branch, post optimization. For Thumb-1 targets, the
31153 boolean negation of the result is also stored in bval because Thumb-1
31154 backend lacks dependency tracking for CC flag due to flag-setting not
31155 being represented at RTL level. */
31156 if (TARGET_THUMB1)
31157 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
31158 else
31159 {
31160 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
31161 emit_insn (gen_rtx_SET (bval, x));
31162 }
31163 }
31164
31165 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
31166 another memory store between the load-exclusive and store-exclusive can
31167 reset the monitor from Exclusive to Open state. This means we must wait
31168 until after reload to split the pattern, lest we get a register spill in
31169 the middle of the atomic sequence. Success of the compare and swap is
31170 indicated by the Z flag set for 32bit targets and by neg_bval being zero
31171 for Thumb-1 targets (ie. negation of the boolean value returned by
31172 atomic_compare_and_swapmode standard pattern in operand 0). */
31173
31174 void
31175 arm_split_compare_and_swap (rtx operands[])
31176 {
31177 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
31178 machine_mode mode;
31179 enum memmodel mod_s, mod_f;
31180 bool is_weak;
31181 rtx_code_label *label1, *label2;
31182 rtx x, cond;
31183
31184 rval = operands[1];
31185 mem = operands[2];
31186 oldval = operands[3];
31187 newval = operands[4];
31188 is_weak = (operands[5] != const0_rtx);
31189 mod_s_rtx = operands[6];
31190 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
31191 mod_f = memmodel_from_int (INTVAL (operands[7]));
31192 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
31193 mode = GET_MODE (mem);
31194
31195 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
31196
31197 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
31198 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
31199
31200 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
31201 a full barrier is emitted after the store-release. */
31202 if (is_armv8_sync)
31203 use_acquire = false;
31204
31205 /* Checks whether a barrier is needed and emits one accordingly. */
31206 if (!(use_acquire || use_release))
31207 arm_pre_atomic_barrier (mod_s);
31208
31209 label1 = NULL;
31210 if (!is_weak)
31211 {
31212 label1 = gen_label_rtx ();
31213 emit_label (label1);
31214 }
31215 label2 = gen_label_rtx ();
31216
31217 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
31218
31219 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31220 as required to communicate with arm_expand_compare_and_swap. */
31221 if (TARGET_32BIT)
31222 {
31223 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
31224 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31225 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31226 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
31227 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31228 }
31229 else
31230 {
31231 cond = gen_rtx_NE (VOIDmode, rval, oldval);
31232 if (thumb1_cmpneg_operand (oldval, SImode))
31233 {
31234 rtx src = rval;
31235 if (!satisfies_constraint_L (oldval))
31236 {
31237 gcc_assert (satisfies_constraint_J (oldval));
31238
31239 /* For such immediates, ADDS needs the source and destination regs
31240 to be the same.
31241
31242 Normally this would be handled by RA, but this is all happening
31243 after RA. */
31244 emit_move_insn (neg_bval, rval);
31245 src = neg_bval;
31246 }
31247
31248 emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
31249 label2, cond));
31250 }
31251 else
31252 {
31253 emit_move_insn (neg_bval, const1_rtx);
31254 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
31255 }
31256 }
31257
31258 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
31259
31260 /* Weak or strong, we want EQ to be true for success, so that we
31261 match the flags that we got from the compare above. */
31262 if (TARGET_32BIT)
31263 {
31264 cond = gen_rtx_REG (CCmode, CC_REGNUM);
31265 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
31266 emit_insn (gen_rtx_SET (cond, x));
31267 }
31268
31269 if (!is_weak)
31270 {
31271 /* Z is set to boolean value of !neg_bval, as required to communicate
31272 with arm_expand_compare_and_swap. */
31273 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
31274 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
31275 }
31276
31277 if (!is_mm_relaxed (mod_f))
31278 emit_label (label2);
31279
31280 /* Checks whether a barrier is needed and emits one accordingly. */
31281 if (is_armv8_sync
31282 || !(use_acquire || use_release))
31283 arm_post_atomic_barrier (mod_s);
31284
31285 if (is_mm_relaxed (mod_f))
31286 emit_label (label2);
31287 }
31288
31289 /* Split an atomic operation pattern. Operation is given by CODE and is one
31290 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31291 operation). Operation is performed on the content at MEM and on VALUE
31292 following the memory model MODEL_RTX. The content at MEM before and after
31293 the operation is returned in OLD_OUT and NEW_OUT respectively while the
31294 success of the operation is returned in COND. Using a scratch register or
31295 an operand register for these determines what result is returned for that
31296 pattern. */
31297
31298 void
31299 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
31300 rtx value, rtx model_rtx, rtx cond)
31301 {
31302 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
31303 machine_mode mode = GET_MODE (mem);
31304 machine_mode wmode = (mode == DImode ? DImode : SImode);
31305 rtx_code_label *label;
31306 bool all_low_regs, bind_old_new;
31307 rtx x;
31308
31309 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
31310
31311 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
31312 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
31313
31314 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
31315 a full barrier is emitted after the store-release. */
31316 if (is_armv8_sync)
31317 use_acquire = false;
31318
31319 /* Checks whether a barrier is needed and emits one accordingly. */
31320 if (!(use_acquire || use_release))
31321 arm_pre_atomic_barrier (model);
31322
31323 label = gen_label_rtx ();
31324 emit_label (label);
31325
31326 if (new_out)
31327 new_out = gen_lowpart (wmode, new_out);
31328 if (old_out)
31329 old_out = gen_lowpart (wmode, old_out);
31330 else
31331 old_out = new_out;
31332 value = simplify_gen_subreg (wmode, value, mode, 0);
31333
31334 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
31335
31336 /* Does the operation require destination and first operand to use the same
31337 register? This is decided by register constraints of relevant insn
31338 patterns in thumb1.md. */
31339 gcc_assert (!new_out || REG_P (new_out));
31340 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
31341 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
31342 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
31343 bind_old_new =
31344 (TARGET_THUMB1
31345 && code != SET
31346 && code != MINUS
31347 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
31348
31349 /* We want to return the old value while putting the result of the operation
31350 in the same register as the old value so copy the old value over to the
31351 destination register and use that register for the operation. */
31352 if (old_out && bind_old_new)
31353 {
31354 emit_move_insn (new_out, old_out);
31355 old_out = new_out;
31356 }
31357
31358 switch (code)
31359 {
31360 case SET:
31361 new_out = value;
31362 break;
31363
31364 case NOT:
31365 x = gen_rtx_AND (wmode, old_out, value);
31366 emit_insn (gen_rtx_SET (new_out, x));
31367 x = gen_rtx_NOT (wmode, new_out);
31368 emit_insn (gen_rtx_SET (new_out, x));
31369 break;
31370
31371 case MINUS:
31372 if (CONST_INT_P (value))
31373 {
31374 value = gen_int_mode (-INTVAL (value), wmode);
31375 code = PLUS;
31376 }
31377 /* FALLTHRU */
31378
31379 case PLUS:
31380 if (mode == DImode)
31381 {
31382 /* DImode plus/minus need to clobber flags. */
31383 /* The adddi3 and subdi3 patterns are incorrectly written so that
31384 they require matching operands, even when we could easily support
31385 three operands. Thankfully, this can be fixed up post-splitting,
31386 as the individual add+adc patterns do accept three operands and
31387 post-reload cprop can make these moves go away. */
31388 emit_move_insn (new_out, old_out);
31389 if (code == PLUS)
31390 x = gen_adddi3 (new_out, new_out, value);
31391 else
31392 x = gen_subdi3 (new_out, new_out, value);
31393 emit_insn (x);
31394 break;
31395 }
31396 /* FALLTHRU */
31397
31398 default:
31399 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
31400 emit_insn (gen_rtx_SET (new_out, x));
31401 break;
31402 }
31403
31404 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
31405 use_release);
31406
31407 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31408 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
31409
31410 /* Checks whether a barrier is needed and emits one accordingly. */
31411 if (is_armv8_sync
31412 || !(use_acquire || use_release))
31413 arm_post_atomic_barrier (model);
31414 }
31415 \f
31416 /* Return the mode for the MVE vector of predicates corresponding to MODE. */
31417 opt_machine_mode
31418 arm_mode_to_pred_mode (machine_mode mode)
31419 {
31420 switch (GET_MODE_NUNITS (mode))
31421 {
31422 case 16: return V16BImode;
31423 case 8: return V8BImode;
31424 case 4: return V4BImode;
31425 case 2: return V2QImode;
31426 }
31427 return opt_machine_mode ();
31428 }
31429
31430 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31431 If CAN_INVERT, store either the result or its inverse in TARGET
31432 and return true if TARGET contains the inverse. If !CAN_INVERT,
31433 always store the result in TARGET, never its inverse.
31434
31435 Note that the handling of floating-point comparisons is not
31436 IEEE compliant. */
31437
31438 bool
31439 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
31440 bool can_invert)
31441 {
31442 machine_mode cmp_result_mode = GET_MODE (target);
31443 machine_mode cmp_mode = GET_MODE (op0);
31444
31445 bool inverted;
31446
31447 /* MVE supports more comparisons than Neon. */
31448 if (TARGET_HAVE_MVE)
31449 inverted = false;
31450 else
31451 switch (code)
31452 {
31453 /* For these we need to compute the inverse of the requested
31454 comparison. */
31455 case UNORDERED:
31456 case UNLT:
31457 case UNLE:
31458 case UNGT:
31459 case UNGE:
31460 case UNEQ:
31461 case NE:
31462 code = reverse_condition_maybe_unordered (code);
31463 if (!can_invert)
31464 {
31465 /* Recursively emit the inverted comparison into a temporary
31466 and then store its inverse in TARGET. This avoids reusing
31467 TARGET (which for integer NE could be one of the inputs). */
31468 rtx tmp = gen_reg_rtx (cmp_result_mode);
31469 if (arm_expand_vector_compare (tmp, code, op0, op1, true))
31470 gcc_unreachable ();
31471 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
31472 return false;
31473 }
31474 inverted = true;
31475 break;
31476
31477 default:
31478 inverted = false;
31479 break;
31480 }
31481
31482 switch (code)
31483 {
31484 /* These are natively supported by Neon for zero comparisons, but otherwise
31485 require the operands to be swapped. For MVE, we can only compare
31486 registers. */
31487 case LE:
31488 case LT:
31489 if (!TARGET_HAVE_MVE)
31490 if (op1 != CONST0_RTX (cmp_mode))
31491 {
31492 code = swap_condition (code);
31493 std::swap (op0, op1);
31494 }
31495 /* Fall through. */
31496
31497 /* These are natively supported by Neon for both register and zero
31498 operands. MVE supports registers only. */
31499 case EQ:
31500 case GE:
31501 case GT:
31502 case NE:
31503 if (TARGET_HAVE_MVE)
31504 {
31505 switch (GET_MODE_CLASS (cmp_mode))
31506 {
31507 case MODE_VECTOR_INT:
31508 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31509 op0, force_reg (cmp_mode, op1)));
31510 break;
31511 case MODE_VECTOR_FLOAT:
31512 if (TARGET_HAVE_MVE_FLOAT)
31513 emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
31514 op0, force_reg (cmp_mode, op1)));
31515 else
31516 gcc_unreachable ();
31517 break;
31518 default:
31519 gcc_unreachable ();
31520 }
31521 }
31522 else
31523 emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
31524 return inverted;
31525
31526 /* These are natively supported for register operands only.
31527 Comparisons with zero aren't useful and should be folded
31528 or canonicalized by target-independent code. */
31529 case GEU:
31530 case GTU:
31531 if (TARGET_HAVE_MVE)
31532 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31533 op0, force_reg (cmp_mode, op1)));
31534 else
31535 emit_insn (gen_neon_vc (code, cmp_mode, target,
31536 op0, force_reg (cmp_mode, op1)));
31537 return inverted;
31538
31539 /* These require the operands to be swapped and likewise do not
31540 support comparisons with zero. */
31541 case LEU:
31542 case LTU:
31543 if (TARGET_HAVE_MVE)
31544 emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
31545 force_reg (cmp_mode, op1), op0));
31546 else
31547 emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31548 target, force_reg (cmp_mode, op1), op0));
31549 return inverted;
31550
31551 /* These need a combination of two comparisons. */
31552 case LTGT:
31553 case ORDERED:
31554 {
31555 /* Operands are LTGT iff (a > b || a > b).
31556 Operands are ORDERED iff (a > b || a <= b). */
31557 rtx gt_res = gen_reg_rtx (cmp_result_mode);
31558 rtx alt_res = gen_reg_rtx (cmp_result_mode);
31559 rtx_code alt_code = (code == LTGT ? LT : LE);
31560 if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31561 || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31562 gcc_unreachable ();
31563 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31564 gt_res, alt_res)));
31565 return inverted;
31566 }
31567
31568 default:
31569 gcc_unreachable ();
31570 }
31571 }
31572
31573 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31574 CMP_RESULT_MODE is the mode of the comparison result. */
31575
31576 void
31577 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
31578 {
31579 /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31580 arm_expand_vector_compare, and another one here. */
31581 rtx mask;
31582
31583 if (TARGET_HAVE_MVE)
31584 mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
31585 else
31586 mask = gen_reg_rtx (cmp_result_mode);
31587
31588 bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
31589 operands[4], operands[5], true);
31590 if (inverted)
31591 std::swap (operands[1], operands[2]);
31592 if (TARGET_NEON)
31593 emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
31594 mask, operands[1], operands[2]));
31595 else
31596 {
31597 machine_mode cmp_mode = GET_MODE (operands[0]);
31598
31599 switch (GET_MODE_CLASS (cmp_mode))
31600 {
31601 case MODE_VECTOR_INT:
31602 emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0],
31603 operands[1], operands[2], mask));
31604 break;
31605 case MODE_VECTOR_FLOAT:
31606 if (TARGET_HAVE_MVE_FLOAT)
31607 emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0],
31608 operands[1], operands[2], mask));
31609 else
31610 gcc_unreachable ();
31611 break;
31612 default:
31613 gcc_unreachable ();
31614 }
31615 }
31616 }
31617 \f
31618 #define MAX_VECT_LEN 16
31619
31620 struct expand_vec_perm_d
31621 {
31622 rtx target, op0, op1;
31623 vec_perm_indices perm;
31624 machine_mode vmode;
31625 bool one_vector_p;
31626 bool testing_p;
31627 };
31628
31629 /* Generate a variable permutation. */
31630
31631 static void
31632 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31633 {
31634 machine_mode vmode = GET_MODE (target);
31635 bool one_vector_p = rtx_equal_p (op0, op1);
31636
31637 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31638 gcc_checking_assert (GET_MODE (op0) == vmode);
31639 gcc_checking_assert (GET_MODE (op1) == vmode);
31640 gcc_checking_assert (GET_MODE (sel) == vmode);
31641 gcc_checking_assert (TARGET_NEON);
31642
31643 if (one_vector_p)
31644 {
31645 if (vmode == V8QImode)
31646 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31647 else
31648 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31649 }
31650 else
31651 {
31652 rtx pair;
31653
31654 if (vmode == V8QImode)
31655 {
31656 pair = gen_reg_rtx (V16QImode);
31657 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31658 pair = gen_lowpart (TImode, pair);
31659 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31660 }
31661 else
31662 {
31663 pair = gen_reg_rtx (OImode);
31664 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31665 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31666 }
31667 }
31668 }
31669
31670 void
31671 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31672 {
31673 machine_mode vmode = GET_MODE (target);
31674 unsigned int nelt = GET_MODE_NUNITS (vmode);
31675 bool one_vector_p = rtx_equal_p (op0, op1);
31676 rtx mask;
31677
31678 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31679 numbering of elements for big-endian, we must reverse the order. */
31680 gcc_checking_assert (!BYTES_BIG_ENDIAN);
31681
31682 /* The VTBL instruction does not use a modulo index, so we must take care
31683 of that ourselves. */
31684 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31685 mask = gen_const_vec_duplicate (vmode, mask);
31686 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31687
31688 arm_expand_vec_perm_1 (target, op0, op1, sel);
31689 }
31690
31691 /* Map lane ordering between architectural lane order, and GCC lane order,
31692 taking into account ABI. See comment above output_move_neon for details. */
31693
31694 static int
31695 neon_endian_lane_map (machine_mode mode, int lane)
31696 {
31697 if (BYTES_BIG_ENDIAN)
31698 {
31699 int nelems = GET_MODE_NUNITS (mode);
31700 /* Reverse lane order. */
31701 lane = (nelems - 1 - lane);
31702 /* Reverse D register order, to match ABI. */
31703 if (GET_MODE_SIZE (mode) == 16)
31704 lane = lane ^ (nelems / 2);
31705 }
31706 return lane;
31707 }
31708
31709 /* Some permutations index into pairs of vectors, this is a helper function
31710 to map indexes into those pairs of vectors. */
31711
31712 static int
31713 neon_pair_endian_lane_map (machine_mode mode, int lane)
31714 {
31715 int nelem = GET_MODE_NUNITS (mode);
31716 if (BYTES_BIG_ENDIAN)
31717 lane =
31718 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31719 return lane;
31720 }
31721
31722 /* Generate or test for an insn that supports a constant permutation. */
31723
31724 /* Recognize patterns for the VUZP insns. */
31725
31726 static bool
31727 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31728 {
31729 unsigned int i, odd, mask, nelt = d->perm.length ();
31730 rtx out0, out1, in0, in1;
31731 int first_elem;
31732 int swap_nelt;
31733
31734 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31735 return false;
31736
31737 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31738 big endian pattern on 64 bit vectors, so we correct for that. */
31739 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31740 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31741
31742 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31743
31744 if (first_elem == neon_endian_lane_map (d->vmode, 0))
31745 odd = 0;
31746 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31747 odd = 1;
31748 else
31749 return false;
31750 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31751
31752 for (i = 0; i < nelt; i++)
31753 {
31754 unsigned elt =
31755 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31756 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31757 return false;
31758 }
31759
31760 /* Success! */
31761 if (d->testing_p)
31762 return true;
31763
31764 in0 = d->op0;
31765 in1 = d->op1;
31766 if (swap_nelt != 0)
31767 std::swap (in0, in1);
31768
31769 out0 = d->target;
31770 out1 = gen_reg_rtx (d->vmode);
31771 if (odd)
31772 std::swap (out0, out1);
31773
31774 emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
31775 return true;
31776 }
31777
31778 /* Recognize patterns for the VZIP insns. */
31779
31780 static bool
31781 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
31782 {
31783 unsigned int i, high, mask, nelt = d->perm.length ();
31784 rtx out0, out1, in0, in1;
31785 int first_elem;
31786 bool is_swapped;
31787
31788 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31789 return false;
31790
31791 is_swapped = BYTES_BIG_ENDIAN;
31792
31793 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
31794
31795 high = nelt / 2;
31796 if (first_elem == neon_endian_lane_map (d->vmode, high))
31797 ;
31798 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
31799 high = 0;
31800 else
31801 return false;
31802 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31803
31804 for (i = 0; i < nelt / 2; i++)
31805 {
31806 unsigned elt =
31807 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
31808 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
31809 != elt)
31810 return false;
31811 elt =
31812 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
31813 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
31814 != elt)
31815 return false;
31816 }
31817
31818 /* Success! */
31819 if (d->testing_p)
31820 return true;
31821
31822 in0 = d->op0;
31823 in1 = d->op1;
31824 if (is_swapped)
31825 std::swap (in0, in1);
31826
31827 out0 = d->target;
31828 out1 = gen_reg_rtx (d->vmode);
31829 if (high)
31830 std::swap (out0, out1);
31831
31832 emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31833 return true;
31834 }
31835
31836 /* Recognize patterns for the VREV insns. */
31837 static bool
31838 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31839 {
31840 unsigned int i, j, diff, nelt = d->perm.length ();
31841 rtx (*gen) (machine_mode, rtx, rtx);
31842
31843 if (!d->one_vector_p)
31844 return false;
31845
31846 diff = d->perm[0];
31847 switch (diff)
31848 {
31849 case 7:
31850 switch (d->vmode)
31851 {
31852 case E_V16QImode:
31853 case E_V8QImode:
31854 gen = gen_neon_vrev64;
31855 break;
31856 default:
31857 return false;
31858 }
31859 break;
31860 case 3:
31861 switch (d->vmode)
31862 {
31863 case E_V16QImode:
31864 case E_V8QImode:
31865 gen = gen_neon_vrev32;
31866 break;
31867 case E_V8HImode:
31868 case E_V4HImode:
31869 case E_V8HFmode:
31870 case E_V4HFmode:
31871 gen = gen_neon_vrev64;
31872 break;
31873 default:
31874 return false;
31875 }
31876 break;
31877 case 1:
31878 switch (d->vmode)
31879 {
31880 case E_V16QImode:
31881 case E_V8QImode:
31882 gen = gen_neon_vrev16;
31883 break;
31884 case E_V8HImode:
31885 case E_V4HImode:
31886 gen = gen_neon_vrev32;
31887 break;
31888 case E_V4SImode:
31889 case E_V2SImode:
31890 case E_V4SFmode:
31891 case E_V2SFmode:
31892 gen = gen_neon_vrev64;
31893 break;
31894 default:
31895 return false;
31896 }
31897 break;
31898 default:
31899 return false;
31900 }
31901
31902 for (i = 0; i < nelt ; i += diff + 1)
31903 for (j = 0; j <= diff; j += 1)
31904 {
31905 /* This is guaranteed to be true as the value of diff
31906 is 7, 3, 1 and we should have enough elements in the
31907 queue to generate this. Getting a vector mask with a
31908 value of diff other than these values implies that
31909 something is wrong by the time we get here. */
31910 gcc_assert (i + j < nelt);
31911 if (d->perm[i + j] != i + diff - j)
31912 return false;
31913 }
31914
31915 /* Success! */
31916 if (d->testing_p)
31917 return true;
31918
31919 emit_insn (gen (d->vmode, d->target, d->op0));
31920 return true;
31921 }
31922
31923 /* Recognize patterns for the VTRN insns. */
31924
31925 static bool
31926 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31927 {
31928 unsigned int i, odd, mask, nelt = d->perm.length ();
31929 rtx out0, out1, in0, in1;
31930
31931 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31932 return false;
31933
31934 /* Note that these are little-endian tests. Adjust for big-endian later. */
31935 if (d->perm[0] == 0)
31936 odd = 0;
31937 else if (d->perm[0] == 1)
31938 odd = 1;
31939 else
31940 return false;
31941 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31942
31943 for (i = 0; i < nelt; i += 2)
31944 {
31945 if (d->perm[i] != i + odd)
31946 return false;
31947 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31948 return false;
31949 }
31950
31951 /* Success! */
31952 if (d->testing_p)
31953 return true;
31954
31955 in0 = d->op0;
31956 in1 = d->op1;
31957 if (BYTES_BIG_ENDIAN)
31958 {
31959 std::swap (in0, in1);
31960 odd = !odd;
31961 }
31962
31963 out0 = d->target;
31964 out1 = gen_reg_rtx (d->vmode);
31965 if (odd)
31966 std::swap (out0, out1);
31967
31968 emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
31969 return true;
31970 }
31971
31972 /* Recognize patterns for the VEXT insns. */
31973
31974 static bool
31975 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31976 {
31977 unsigned int i, nelt = d->perm.length ();
31978 rtx offset;
31979
31980 unsigned int location;
31981
31982 unsigned int next = d->perm[0] + 1;
31983
31984 /* TODO: Handle GCC's numbering of elements for big-endian. */
31985 if (BYTES_BIG_ENDIAN)
31986 return false;
31987
31988 /* Check if the extracted indexes are increasing by one. */
31989 for (i = 1; i < nelt; next++, i++)
31990 {
31991 /* If we hit the most significant element of the 2nd vector in
31992 the previous iteration, no need to test further. */
31993 if (next == 2 * nelt)
31994 return false;
31995
31996 /* If we are operating on only one vector: it could be a
31997 rotation. If there are only two elements of size < 64, let
31998 arm_evpc_neon_vrev catch it. */
31999 if (d->one_vector_p && (next == nelt))
32000 {
32001 if ((nelt == 2) && (d->vmode != V2DImode))
32002 return false;
32003 else
32004 next = 0;
32005 }
32006
32007 if (d->perm[i] != next)
32008 return false;
32009 }
32010
32011 location = d->perm[0];
32012
32013 /* Success! */
32014 if (d->testing_p)
32015 return true;
32016
32017 offset = GEN_INT (location);
32018
32019 if(d->vmode == E_DImode)
32020 return false;
32021
32022 emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
32023 return true;
32024 }
32025
32026 /* The NEON VTBL instruction is a fully variable permuation that's even
32027 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
32028 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
32029 can do slightly better by expanding this as a constant where we don't
32030 have to apply a mask. */
32031
32032 static bool
32033 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
32034 {
32035 rtx rperm[MAX_VECT_LEN], sel;
32036 machine_mode vmode = d->vmode;
32037 unsigned int i, nelt = d->perm.length ();
32038
32039 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
32040 numbering of elements for big-endian, we must reverse the order. */
32041 if (BYTES_BIG_ENDIAN)
32042 return false;
32043
32044 if (d->testing_p)
32045 return true;
32046
32047 /* Generic code will try constant permutation twice. Once with the
32048 original mode and again with the elements lowered to QImode.
32049 So wait and don't do the selector expansion ourselves. */
32050 if (vmode != V8QImode && vmode != V16QImode)
32051 return false;
32052
32053 for (i = 0; i < nelt; ++i)
32054 rperm[i] = GEN_INT (d->perm[i]);
32055 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
32056 sel = force_reg (vmode, sel);
32057
32058 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
32059 return true;
32060 }
32061
32062 static bool
32063 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
32064 {
32065 /* Check if the input mask matches vext before reordering the
32066 operands. */
32067 if (TARGET_NEON)
32068 if (arm_evpc_neon_vext (d))
32069 return true;
32070
32071 /* The pattern matching functions above are written to look for a small
32072 number to begin the sequence (0, 1, N/2). If we begin with an index
32073 from the second operand, we can swap the operands. */
32074 unsigned int nelt = d->perm.length ();
32075 if (d->perm[0] >= nelt)
32076 {
32077 d->perm.rotate_inputs (1);
32078 std::swap (d->op0, d->op1);
32079 }
32080
32081 if (TARGET_NEON)
32082 {
32083 if (arm_evpc_neon_vuzp (d))
32084 return true;
32085 if (arm_evpc_neon_vzip (d))
32086 return true;
32087 if (arm_evpc_neon_vrev (d))
32088 return true;
32089 if (arm_evpc_neon_vtrn (d))
32090 return true;
32091 return arm_evpc_neon_vtbl (d);
32092 }
32093 return false;
32094 }
32095
32096 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
32097
32098 static bool
32099 arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
32100 rtx target, rtx op0, rtx op1,
32101 const vec_perm_indices &sel)
32102 {
32103 if (vmode != op_mode)
32104 return false;
32105
32106 struct expand_vec_perm_d d;
32107 int i, nelt, which;
32108
32109 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
32110 return false;
32111
32112 d.target = target;
32113 if (op0)
32114 {
32115 rtx nop0 = force_reg (vmode, op0);
32116 if (op0 == op1)
32117 op1 = nop0;
32118 op0 = nop0;
32119 }
32120 if (op1)
32121 op1 = force_reg (vmode, op1);
32122 d.op0 = op0;
32123 d.op1 = op1;
32124
32125 d.vmode = vmode;
32126 gcc_assert (VECTOR_MODE_P (d.vmode));
32127 d.testing_p = !target;
32128
32129 nelt = GET_MODE_NUNITS (d.vmode);
32130 for (i = which = 0; i < nelt; ++i)
32131 {
32132 int ei = sel[i] & (2 * nelt - 1);
32133 which |= (ei < nelt ? 1 : 2);
32134 }
32135
32136 switch (which)
32137 {
32138 default:
32139 gcc_unreachable();
32140
32141 case 3:
32142 d.one_vector_p = false;
32143 if (d.testing_p || !rtx_equal_p (op0, op1))
32144 break;
32145
32146 /* The elements of PERM do not suggest that only the first operand
32147 is used, but both operands are identical. Allow easier matching
32148 of the permutation by folding the permutation into the single
32149 input vector. */
32150 /* FALLTHRU */
32151 case 2:
32152 d.op0 = op1;
32153 d.one_vector_p = true;
32154 break;
32155
32156 case 1:
32157 d.op1 = op0;
32158 d.one_vector_p = true;
32159 break;
32160 }
32161
32162 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
32163
32164 if (!d.testing_p)
32165 return arm_expand_vec_perm_const_1 (&d);
32166
32167 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32168 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32169 if (!d.one_vector_p)
32170 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32171
32172 start_sequence ();
32173 bool ret = arm_expand_vec_perm_const_1 (&d);
32174 end_sequence ();
32175
32176 return ret;
32177 }
32178
32179 bool
32180 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
32181 {
32182 /* If we are soft float and we do not have ldrd
32183 then all auto increment forms are ok. */
32184 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
32185 return true;
32186
32187 switch (code)
32188 {
32189 /* Post increment and Pre Decrement are supported for all
32190 instruction forms except for vector forms. */
32191 case ARM_POST_INC:
32192 case ARM_PRE_DEC:
32193 if (VECTOR_MODE_P (mode))
32194 {
32195 if (code != ARM_PRE_DEC)
32196 return true;
32197 else
32198 return false;
32199 }
32200
32201 return true;
32202
32203 case ARM_POST_DEC:
32204 case ARM_PRE_INC:
32205 /* Without LDRD and mode size greater than
32206 word size, there is no point in auto-incrementing
32207 because ldm and stm will not have these forms. */
32208 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
32209 return false;
32210
32211 /* Vector and floating point modes do not support
32212 these auto increment forms. */
32213 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
32214 return false;
32215
32216 return true;
32217
32218 default:
32219 return false;
32220
32221 }
32222
32223 return false;
32224 }
32225
32226 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32227 on ARM, since we know that shifts by negative amounts are no-ops.
32228 Additionally, the default expansion code is not available or suitable
32229 for post-reload insn splits (this can occur when the register allocator
32230 chooses not to do a shift in NEON).
32231
32232 This function is used in both initial expand and post-reload splits, and
32233 handles all kinds of 64-bit shifts.
32234
32235 Input requirements:
32236 - It is safe for the input and output to be the same register, but
32237 early-clobber rules apply for the shift amount and scratch registers.
32238 - Shift by register requires both scratch registers. In all other cases
32239 the scratch registers may be NULL.
32240 - Ashiftrt by a register also clobbers the CC register. */
32241 void
32242 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
32243 rtx amount, rtx scratch1, rtx scratch2)
32244 {
32245 rtx out_high = gen_highpart (SImode, out);
32246 rtx out_low = gen_lowpart (SImode, out);
32247 rtx in_high = gen_highpart (SImode, in);
32248 rtx in_low = gen_lowpart (SImode, in);
32249
32250 /* Terminology:
32251 in = the register pair containing the input value.
32252 out = the destination register pair.
32253 up = the high- or low-part of each pair.
32254 down = the opposite part to "up".
32255 In a shift, we can consider bits to shift from "up"-stream to
32256 "down"-stream, so in a left-shift "up" is the low-part and "down"
32257 is the high-part of each register pair. */
32258
32259 rtx out_up = code == ASHIFT ? out_low : out_high;
32260 rtx out_down = code == ASHIFT ? out_high : out_low;
32261 rtx in_up = code == ASHIFT ? in_low : in_high;
32262 rtx in_down = code == ASHIFT ? in_high : in_low;
32263
32264 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
32265 gcc_assert (out
32266 && (REG_P (out) || SUBREG_P (out))
32267 && GET_MODE (out) == DImode);
32268 gcc_assert (in
32269 && (REG_P (in) || SUBREG_P (in))
32270 && GET_MODE (in) == DImode);
32271 gcc_assert (amount
32272 && (((REG_P (amount) || SUBREG_P (amount))
32273 && GET_MODE (amount) == SImode)
32274 || CONST_INT_P (amount)));
32275 gcc_assert (scratch1 == NULL
32276 || (GET_CODE (scratch1) == SCRATCH)
32277 || (GET_MODE (scratch1) == SImode
32278 && REG_P (scratch1)));
32279 gcc_assert (scratch2 == NULL
32280 || (GET_CODE (scratch2) == SCRATCH)
32281 || (GET_MODE (scratch2) == SImode
32282 && REG_P (scratch2)));
32283 gcc_assert (!REG_P (out) || !REG_P (amount)
32284 || !HARD_REGISTER_P (out)
32285 || (REGNO (out) != REGNO (amount)
32286 && REGNO (out) + 1 != REGNO (amount)));
32287
32288 /* Macros to make following code more readable. */
32289 #define SUB_32(DEST,SRC) \
32290 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32291 #define RSB_32(DEST,SRC) \
32292 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32293 #define SUB_S_32(DEST,SRC) \
32294 gen_addsi3_compare0 ((DEST), (SRC), \
32295 GEN_INT (-32))
32296 #define SET(DEST,SRC) \
32297 gen_rtx_SET ((DEST), (SRC))
32298 #define SHIFT(CODE,SRC,AMOUNT) \
32299 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32300 #define LSHIFT(CODE,SRC,AMOUNT) \
32301 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32302 SImode, (SRC), (AMOUNT))
32303 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32304 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32305 SImode, (SRC), (AMOUNT))
32306 #define ORR(A,B) \
32307 gen_rtx_IOR (SImode, (A), (B))
32308 #define BRANCH(COND,LABEL) \
32309 gen_arm_cond_branch ((LABEL), \
32310 gen_rtx_ ## COND (CCmode, cc_reg, \
32311 const0_rtx), \
32312 cc_reg)
32313
32314 /* Shifts by register and shifts by constant are handled separately. */
32315 if (CONST_INT_P (amount))
32316 {
32317 /* We have a shift-by-constant. */
32318
32319 /* First, handle out-of-range shift amounts.
32320 In both cases we try to match the result an ARM instruction in a
32321 shift-by-register would give. This helps reduce execution
32322 differences between optimization levels, but it won't stop other
32323 parts of the compiler doing different things. This is "undefined
32324 behavior, in any case. */
32325 if (INTVAL (amount) <= 0)
32326 emit_insn (gen_movdi (out, in));
32327 else if (INTVAL (amount) >= 64)
32328 {
32329 if (code == ASHIFTRT)
32330 {
32331 rtx const31_rtx = GEN_INT (31);
32332 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
32333 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
32334 }
32335 else
32336 emit_insn (gen_movdi (out, const0_rtx));
32337 }
32338
32339 /* Now handle valid shifts. */
32340 else if (INTVAL (amount) < 32)
32341 {
32342 /* Shifts by a constant less than 32. */
32343 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
32344
32345 /* Clearing the out register in DImode first avoids lots
32346 of spilling and results in less stack usage.
32347 Later this redundant insn is completely removed.
32348 Do that only if "in" and "out" are different registers. */
32349 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32350 emit_insn (SET (out, const0_rtx));
32351 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32352 emit_insn (SET (out_down,
32353 ORR (REV_LSHIFT (code, in_up, reverse_amount),
32354 out_down)));
32355 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32356 }
32357 else
32358 {
32359 /* Shifts by a constant greater than 31. */
32360 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
32361
32362 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32363 emit_insn (SET (out, const0_rtx));
32364 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
32365 if (code == ASHIFTRT)
32366 emit_insn (gen_ashrsi3 (out_up, in_up,
32367 GEN_INT (31)));
32368 else
32369 emit_insn (SET (out_up, const0_rtx));
32370 }
32371 }
32372 else
32373 {
32374 /* We have a shift-by-register. */
32375 rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
32376
32377 /* This alternative requires the scratch registers. */
32378 gcc_assert (scratch1 && REG_P (scratch1));
32379 gcc_assert (scratch2 && REG_P (scratch2));
32380
32381 /* We will need the values "amount-32" and "32-amount" later.
32382 Swapping them around now allows the later code to be more general. */
32383 switch (code)
32384 {
32385 case ASHIFT:
32386 emit_insn (SUB_32 (scratch1, amount));
32387 emit_insn (RSB_32 (scratch2, amount));
32388 break;
32389 case ASHIFTRT:
32390 emit_insn (RSB_32 (scratch1, amount));
32391 /* Also set CC = amount > 32. */
32392 emit_insn (SUB_S_32 (scratch2, amount));
32393 break;
32394 case LSHIFTRT:
32395 emit_insn (RSB_32 (scratch1, amount));
32396 emit_insn (SUB_32 (scratch2, amount));
32397 break;
32398 default:
32399 gcc_unreachable ();
32400 }
32401
32402 /* Emit code like this:
32403
32404 arithmetic-left:
32405 out_down = in_down << amount;
32406 out_down = (in_up << (amount - 32)) | out_down;
32407 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32408 out_up = in_up << amount;
32409
32410 arithmetic-right:
32411 out_down = in_down >> amount;
32412 out_down = (in_up << (32 - amount)) | out_down;
32413 if (amount < 32)
32414 out_down = ((signed)in_up >> (amount - 32)) | out_down;
32415 out_up = in_up << amount;
32416
32417 logical-right:
32418 out_down = in_down >> amount;
32419 out_down = (in_up << (32 - amount)) | out_down;
32420 if (amount < 32)
32421 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32422 out_up = in_up << amount;
32423
32424 The ARM and Thumb2 variants are the same but implemented slightly
32425 differently. If this were only called during expand we could just
32426 use the Thumb2 case and let combine do the right thing, but this
32427 can also be called from post-reload splitters. */
32428
32429 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32430
32431 if (!TARGET_THUMB2)
32432 {
32433 /* Emit code for ARM mode. */
32434 emit_insn (SET (out_down,
32435 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
32436 if (code == ASHIFTRT)
32437 {
32438 rtx_code_label *done_label = gen_label_rtx ();
32439 emit_jump_insn (BRANCH (LT, done_label));
32440 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
32441 out_down)));
32442 emit_label (done_label);
32443 }
32444 else
32445 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
32446 out_down)));
32447 }
32448 else
32449 {
32450 /* Emit code for Thumb2 mode.
32451 Thumb2 can't do shift and or in one insn. */
32452 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
32453 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
32454
32455 if (code == ASHIFTRT)
32456 {
32457 rtx_code_label *done_label = gen_label_rtx ();
32458 emit_jump_insn (BRANCH (LT, done_label));
32459 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
32460 emit_insn (SET (out_down, ORR (out_down, scratch2)));
32461 emit_label (done_label);
32462 }
32463 else
32464 {
32465 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
32466 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
32467 }
32468 }
32469
32470 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32471 }
32472
32473 #undef SUB_32
32474 #undef RSB_32
32475 #undef SUB_S_32
32476 #undef SET
32477 #undef SHIFT
32478 #undef LSHIFT
32479 #undef REV_LSHIFT
32480 #undef ORR
32481 #undef BRANCH
32482 }
32483
32484 /* Returns true if the pattern is a valid symbolic address, which is either a
32485 symbol_ref or (symbol_ref + addend).
32486
32487 According to the ARM ELF ABI, the initial addend of REL-type relocations
32488 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32489 literal field of the instruction as a 16-bit signed value in the range
32490 -32768 <= A < 32768.
32491
32492 In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32493 unsigned range of 0 <= A < 256 as described in the AAELF32
32494 relocation handling documentation: REL-type relocations are encoded
32495 as unsigned in this case. */
32496
32497 bool
32498 arm_valid_symbolic_address_p (rtx addr)
32499 {
32500 rtx xop0, xop1 = NULL_RTX;
32501 rtx tmp = addr;
32502
32503 if (target_word_relocations)
32504 return false;
32505
32506 if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
32507 return true;
32508
32509 /* (const (plus: symbol_ref const_int)) */
32510 if (GET_CODE (addr) == CONST)
32511 tmp = XEXP (addr, 0);
32512
32513 if (GET_CODE (tmp) == PLUS)
32514 {
32515 xop0 = XEXP (tmp, 0);
32516 xop1 = XEXP (tmp, 1);
32517
32518 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
32519 {
32520 if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
32521 return IN_RANGE (INTVAL (xop1), 0, 0xff);
32522 else
32523 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
32524 }
32525 }
32526
32527 return false;
32528 }
32529
32530 /* Returns true if a valid comparison operation and makes
32531 the operands in a form that is valid. */
32532 bool
32533 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
32534 {
32535 enum rtx_code code = GET_CODE (*comparison);
32536 int code_int;
32537 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
32538 ? GET_MODE (*op2) : GET_MODE (*op1);
32539
32540 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
32541
32542 if (code == UNEQ || code == LTGT)
32543 return false;
32544
32545 code_int = (int)code;
32546 arm_canonicalize_comparison (&code_int, op1, op2, 0);
32547 PUT_CODE (*comparison, (enum rtx_code)code_int);
32548
32549 switch (mode)
32550 {
32551 case E_SImode:
32552 if (!arm_add_operand (*op1, mode))
32553 *op1 = force_reg (mode, *op1);
32554 if (!arm_add_operand (*op2, mode))
32555 *op2 = force_reg (mode, *op2);
32556 return true;
32557
32558 case E_DImode:
32559 /* gen_compare_reg() will sort out any invalid operands. */
32560 return true;
32561
32562 case E_HFmode:
32563 if (!TARGET_VFP_FP16INST)
32564 break;
32565 /* FP16 comparisons are done in SF mode. */
32566 mode = SFmode;
32567 *op1 = convert_to_mode (mode, *op1, 1);
32568 *op2 = convert_to_mode (mode, *op2, 1);
32569 /* Fall through. */
32570 case E_SFmode:
32571 case E_DFmode:
32572 if (!vfp_compare_operand (*op1, mode))
32573 *op1 = force_reg (mode, *op1);
32574 if (!vfp_compare_operand (*op2, mode))
32575 *op2 = force_reg (mode, *op2);
32576 return true;
32577 default:
32578 break;
32579 }
32580
32581 return false;
32582
32583 }
32584
32585 /* Maximum number of instructions to set block of memory. */
32586 static int
32587 arm_block_set_max_insns (void)
32588 {
32589 if (optimize_function_for_size_p (cfun))
32590 return 4;
32591 else
32592 return current_tune->max_insns_inline_memset;
32593 }
32594
32595 /* Return TRUE if it's profitable to set block of memory for
32596 non-vectorized case. VAL is the value to set the memory
32597 with. LENGTH is the number of bytes to set. ALIGN is the
32598 alignment of the destination memory in bytes. UNALIGNED_P
32599 is TRUE if we can only set the memory with instructions
32600 meeting alignment requirements. USE_STRD_P is TRUE if we
32601 can use strd to set the memory. */
32602 static bool
32603 arm_block_set_non_vect_profit_p (rtx val,
32604 unsigned HOST_WIDE_INT length,
32605 unsigned HOST_WIDE_INT align,
32606 bool unaligned_p, bool use_strd_p)
32607 {
32608 int num = 0;
32609 /* For leftovers in bytes of 0-7, we can set the memory block using
32610 strb/strh/str with minimum instruction number. */
32611 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32612
32613 if (unaligned_p)
32614 {
32615 num = arm_const_inline_cost (SET, val);
32616 num += length / align + length % align;
32617 }
32618 else if (use_strd_p)
32619 {
32620 num = arm_const_double_inline_cost (val);
32621 num += (length >> 3) + leftover[length & 7];
32622 }
32623 else
32624 {
32625 num = arm_const_inline_cost (SET, val);
32626 num += (length >> 2) + leftover[length & 3];
32627 }
32628
32629 /* We may be able to combine last pair STRH/STRB into a single STR
32630 by shifting one byte back. */
32631 if (unaligned_access && length > 3 && (length & 3) == 3)
32632 num--;
32633
32634 return (num <= arm_block_set_max_insns ());
32635 }
32636
32637 /* Return TRUE if it's profitable to set block of memory for
32638 vectorized case. LENGTH is the number of bytes to set.
32639 ALIGN is the alignment of destination memory in bytes.
32640 MODE is the vector mode used to set the memory. */
32641 static bool
32642 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32643 unsigned HOST_WIDE_INT align,
32644 machine_mode mode)
32645 {
32646 int num;
32647 bool unaligned_p = ((align & 3) != 0);
32648 unsigned int nelt = GET_MODE_NUNITS (mode);
32649
32650 /* Instruction loading constant value. */
32651 num = 1;
32652 /* Instructions storing the memory. */
32653 num += (length + nelt - 1) / nelt;
32654 /* Instructions adjusting the address expression. Only need to
32655 adjust address expression if it's 4 bytes aligned and bytes
32656 leftover can only be stored by mis-aligned store instruction. */
32657 if (!unaligned_p && (length & 3) != 0)
32658 num++;
32659
32660 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
32661 if (!unaligned_p && mode == V16QImode)
32662 num--;
32663
32664 return (num <= arm_block_set_max_insns ());
32665 }
32666
32667 /* Set a block of memory using vectorization instructions for the
32668 unaligned case. We fill the first LENGTH bytes of the memory
32669 area starting from DSTBASE with byte constant VALUE. ALIGN is
32670 the alignment requirement of memory. Return TRUE if succeeded. */
32671 static bool
32672 arm_block_set_unaligned_vect (rtx dstbase,
32673 unsigned HOST_WIDE_INT length,
32674 unsigned HOST_WIDE_INT value,
32675 unsigned HOST_WIDE_INT align)
32676 {
32677 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32678 rtx dst, mem;
32679 rtx val_vec, reg;
32680 rtx (*gen_func) (rtx, rtx);
32681 machine_mode mode;
32682 unsigned HOST_WIDE_INT v = value;
32683 unsigned int offset = 0;
32684 gcc_assert ((align & 0x3) != 0);
32685 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32686 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32687 if (length >= nelt_v16)
32688 {
32689 mode = V16QImode;
32690 gen_func = gen_movmisalignv16qi;
32691 }
32692 else
32693 {
32694 mode = V8QImode;
32695 gen_func = gen_movmisalignv8qi;
32696 }
32697 nelt_mode = GET_MODE_NUNITS (mode);
32698 gcc_assert (length >= nelt_mode);
32699 /* Skip if it isn't profitable. */
32700 if (!arm_block_set_vect_profit_p (length, align, mode))
32701 return false;
32702
32703 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32704 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32705
32706 v = sext_hwi (v, BITS_PER_WORD);
32707
32708 reg = gen_reg_rtx (mode);
32709 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32710 /* Emit instruction loading the constant value. */
32711 emit_move_insn (reg, val_vec);
32712
32713 /* Handle nelt_mode bytes in a vector. */
32714 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32715 {
32716 emit_insn ((*gen_func) (mem, reg));
32717 if (i + 2 * nelt_mode <= length)
32718 {
32719 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32720 offset += nelt_mode;
32721 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32722 }
32723 }
32724
32725 /* If there are not less than nelt_v8 bytes leftover, we must be in
32726 V16QI mode. */
32727 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32728
32729 /* Handle (8, 16) bytes leftover. */
32730 if (i + nelt_v8 < length)
32731 {
32732 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32733 offset += length - i;
32734 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32735
32736 /* We are shifting bytes back, set the alignment accordingly. */
32737 if ((length & 1) != 0 && align >= 2)
32738 set_mem_align (mem, BITS_PER_UNIT);
32739
32740 emit_insn (gen_movmisalignv16qi (mem, reg));
32741 }
32742 /* Handle (0, 8] bytes leftover. */
32743 else if (i < length && i + nelt_v8 >= length)
32744 {
32745 if (mode == V16QImode)
32746 reg = gen_lowpart (V8QImode, reg);
32747
32748 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32749 + (nelt_mode - nelt_v8))));
32750 offset += (length - i) + (nelt_mode - nelt_v8);
32751 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32752
32753 /* We are shifting bytes back, set the alignment accordingly. */
32754 if ((length & 1) != 0 && align >= 2)
32755 set_mem_align (mem, BITS_PER_UNIT);
32756
32757 emit_insn (gen_movmisalignv8qi (mem, reg));
32758 }
32759
32760 return true;
32761 }
32762
32763 /* Set a block of memory using vectorization instructions for the
32764 aligned case. We fill the first LENGTH bytes of the memory area
32765 starting from DSTBASE with byte constant VALUE. ALIGN is the
32766 alignment requirement of memory. Return TRUE if succeeded. */
32767 static bool
32768 arm_block_set_aligned_vect (rtx dstbase,
32769 unsigned HOST_WIDE_INT length,
32770 unsigned HOST_WIDE_INT value,
32771 unsigned HOST_WIDE_INT align)
32772 {
32773 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
32774 rtx dst, addr, mem;
32775 rtx val_vec, reg;
32776 machine_mode mode;
32777 unsigned int offset = 0;
32778
32779 gcc_assert ((align & 0x3) == 0);
32780 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32781 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32782 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
32783 mode = V16QImode;
32784 else
32785 mode = V8QImode;
32786
32787 nelt_mode = GET_MODE_NUNITS (mode);
32788 gcc_assert (length >= nelt_mode);
32789 /* Skip if it isn't profitable. */
32790 if (!arm_block_set_vect_profit_p (length, align, mode))
32791 return false;
32792
32793 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32794
32795 reg = gen_reg_rtx (mode);
32796 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
32797 /* Emit instruction loading the constant value. */
32798 emit_move_insn (reg, val_vec);
32799
32800 i = 0;
32801 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
32802 if (mode == V16QImode)
32803 {
32804 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32805 emit_insn (gen_movmisalignv16qi (mem, reg));
32806 i += nelt_mode;
32807 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
32808 if (i + nelt_v8 < length && i + nelt_v16 > length)
32809 {
32810 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32811 offset += length - nelt_mode;
32812 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32813 /* We are shifting bytes back, set the alignment accordingly. */
32814 if ((length & 0x3) == 0)
32815 set_mem_align (mem, BITS_PER_UNIT * 4);
32816 else if ((length & 0x1) == 0)
32817 set_mem_align (mem, BITS_PER_UNIT * 2);
32818 else
32819 set_mem_align (mem, BITS_PER_UNIT);
32820
32821 emit_insn (gen_movmisalignv16qi (mem, reg));
32822 return true;
32823 }
32824 /* Fall through for bytes leftover. */
32825 mode = V8QImode;
32826 nelt_mode = GET_MODE_NUNITS (mode);
32827 reg = gen_lowpart (V8QImode, reg);
32828 }
32829
32830 /* Handle 8 bytes in a vector. */
32831 for (; (i + nelt_mode <= length); i += nelt_mode)
32832 {
32833 addr = plus_constant (Pmode, dst, i);
32834 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
32835 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32836 emit_move_insn (mem, reg);
32837 else
32838 emit_insn (gen_unaligned_storev8qi (mem, reg));
32839 }
32840
32841 /* Handle single word leftover by shifting 4 bytes back. We can
32842 use aligned access for this case. */
32843 if (i + UNITS_PER_WORD == length)
32844 {
32845 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32846 offset += i - UNITS_PER_WORD;
32847 mem = adjust_automodify_address (dstbase, mode, addr, offset);
32848 /* We are shifting 4 bytes back, set the alignment accordingly. */
32849 if (align > UNITS_PER_WORD)
32850 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32851
32852 emit_insn (gen_unaligned_storev8qi (mem, reg));
32853 }
32854 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32855 We have to use unaligned access for this case. */
32856 else if (i < length)
32857 {
32858 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32859 offset += length - nelt_mode;
32860 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32861 /* We are shifting bytes back, set the alignment accordingly. */
32862 if ((length & 1) == 0)
32863 set_mem_align (mem, BITS_PER_UNIT * 2);
32864 else
32865 set_mem_align (mem, BITS_PER_UNIT);
32866
32867 emit_insn (gen_movmisalignv8qi (mem, reg));
32868 }
32869
32870 return true;
32871 }
32872
32873 /* Set a block of memory using plain strh/strb instructions, only
32874 using instructions allowed by ALIGN on processor. We fill the
32875 first LENGTH bytes of the memory area starting from DSTBASE
32876 with byte constant VALUE. ALIGN is the alignment requirement
32877 of memory. */
32878 static bool
32879 arm_block_set_unaligned_non_vect (rtx dstbase,
32880 unsigned HOST_WIDE_INT length,
32881 unsigned HOST_WIDE_INT value,
32882 unsigned HOST_WIDE_INT align)
32883 {
32884 unsigned int i;
32885 rtx dst, addr, mem;
32886 rtx val_exp, val_reg, reg;
32887 machine_mode mode;
32888 HOST_WIDE_INT v = value;
32889
32890 gcc_assert (align == 1 || align == 2);
32891
32892 if (align == 2)
32893 v |= (value << BITS_PER_UNIT);
32894
32895 v = sext_hwi (v, BITS_PER_WORD);
32896 val_exp = GEN_INT (v);
32897 /* Skip if it isn't profitable. */
32898 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32899 align, true, false))
32900 return false;
32901
32902 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32903 mode = (align == 2 ? HImode : QImode);
32904 val_reg = force_reg (SImode, val_exp);
32905 reg = gen_lowpart (mode, val_reg);
32906
32907 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
32908 {
32909 addr = plus_constant (Pmode, dst, i);
32910 mem = adjust_automodify_address (dstbase, mode, addr, i);
32911 emit_move_insn (mem, reg);
32912 }
32913
32914 /* Handle single byte leftover. */
32915 if (i + 1 == length)
32916 {
32917 reg = gen_lowpart (QImode, val_reg);
32918 addr = plus_constant (Pmode, dst, i);
32919 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32920 emit_move_insn (mem, reg);
32921 i++;
32922 }
32923
32924 gcc_assert (i == length);
32925 return true;
32926 }
32927
32928 /* Set a block of memory using plain strd/str/strh/strb instructions,
32929 to permit unaligned copies on processors which support unaligned
32930 semantics for those instructions. We fill the first LENGTH bytes
32931 of the memory area starting from DSTBASE with byte constant VALUE.
32932 ALIGN is the alignment requirement of memory. */
32933 static bool
32934 arm_block_set_aligned_non_vect (rtx dstbase,
32935 unsigned HOST_WIDE_INT length,
32936 unsigned HOST_WIDE_INT value,
32937 unsigned HOST_WIDE_INT align)
32938 {
32939 unsigned int i;
32940 rtx dst, addr, mem;
32941 rtx val_exp, val_reg, reg;
32942 unsigned HOST_WIDE_INT v;
32943 bool use_strd_p;
32944
32945 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32946 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32947
32948 v = (value | (value << 8) | (value << 16) | (value << 24));
32949 if (length < UNITS_PER_WORD)
32950 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32951
32952 if (use_strd_p)
32953 v |= (v << BITS_PER_WORD);
32954 else
32955 v = sext_hwi (v, BITS_PER_WORD);
32956
32957 val_exp = GEN_INT (v);
32958 /* Skip if it isn't profitable. */
32959 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32960 align, false, use_strd_p))
32961 {
32962 if (!use_strd_p)
32963 return false;
32964
32965 /* Try without strd. */
32966 v = (v >> BITS_PER_WORD);
32967 v = sext_hwi (v, BITS_PER_WORD);
32968 val_exp = GEN_INT (v);
32969 use_strd_p = false;
32970 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32971 align, false, use_strd_p))
32972 return false;
32973 }
32974
32975 i = 0;
32976 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32977 /* Handle double words using strd if possible. */
32978 if (use_strd_p)
32979 {
32980 val_reg = force_reg (DImode, val_exp);
32981 reg = val_reg;
32982 for (; (i + 8 <= length); i += 8)
32983 {
32984 addr = plus_constant (Pmode, dst, i);
32985 mem = adjust_automodify_address (dstbase, DImode, addr, i);
32986 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32987 emit_move_insn (mem, reg);
32988 else
32989 emit_insn (gen_unaligned_storedi (mem, reg));
32990 }
32991 }
32992 else
32993 val_reg = force_reg (SImode, val_exp);
32994
32995 /* Handle words. */
32996 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32997 for (; (i + 4 <= length); i += 4)
32998 {
32999 addr = plus_constant (Pmode, dst, i);
33000 mem = adjust_automodify_address (dstbase, SImode, addr, i);
33001 if ((align & 3) == 0)
33002 emit_move_insn (mem, reg);
33003 else
33004 emit_insn (gen_unaligned_storesi (mem, reg));
33005 }
33006
33007 /* Merge last pair of STRH and STRB into a STR if possible. */
33008 if (unaligned_access && i > 0 && (i + 3) == length)
33009 {
33010 addr = plus_constant (Pmode, dst, i - 1);
33011 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
33012 /* We are shifting one byte back, set the alignment accordingly. */
33013 if ((align & 1) == 0)
33014 set_mem_align (mem, BITS_PER_UNIT);
33015
33016 /* Most likely this is an unaligned access, and we can't tell at
33017 compilation time. */
33018 emit_insn (gen_unaligned_storesi (mem, reg));
33019 return true;
33020 }
33021
33022 /* Handle half word leftover. */
33023 if (i + 2 <= length)
33024 {
33025 reg = gen_lowpart (HImode, val_reg);
33026 addr = plus_constant (Pmode, dst, i);
33027 mem = adjust_automodify_address (dstbase, HImode, addr, i);
33028 if ((align & 1) == 0)
33029 emit_move_insn (mem, reg);
33030 else
33031 emit_insn (gen_unaligned_storehi (mem, reg));
33032
33033 i += 2;
33034 }
33035
33036 /* Handle single byte leftover. */
33037 if (i + 1 == length)
33038 {
33039 reg = gen_lowpart (QImode, val_reg);
33040 addr = plus_constant (Pmode, dst, i);
33041 mem = adjust_automodify_address (dstbase, QImode, addr, i);
33042 emit_move_insn (mem, reg);
33043 }
33044
33045 return true;
33046 }
33047
33048 /* Set a block of memory using vectorization instructions for both
33049 aligned and unaligned cases. We fill the first LENGTH bytes of
33050 the memory area starting from DSTBASE with byte constant VALUE.
33051 ALIGN is the alignment requirement of memory. */
33052 static bool
33053 arm_block_set_vect (rtx dstbase,
33054 unsigned HOST_WIDE_INT length,
33055 unsigned HOST_WIDE_INT value,
33056 unsigned HOST_WIDE_INT align)
33057 {
33058 /* Check whether we need to use unaligned store instruction. */
33059 if (((align & 3) != 0 || (length & 3) != 0)
33060 /* Check whether unaligned store instruction is available. */
33061 && (!unaligned_access || BYTES_BIG_ENDIAN))
33062 return false;
33063
33064 if ((align & 3) == 0)
33065 return arm_block_set_aligned_vect (dstbase, length, value, align);
33066 else
33067 return arm_block_set_unaligned_vect (dstbase, length, value, align);
33068 }
33069
33070 /* Expand string store operation. Firstly we try to do that by using
33071 vectorization instructions, then try with ARM unaligned access and
33072 double-word store if profitable. OPERANDS[0] is the destination,
33073 OPERANDS[1] is the number of bytes, operands[2] is the value to
33074 initialize the memory, OPERANDS[3] is the known alignment of the
33075 destination. */
33076 bool
33077 arm_gen_setmem (rtx *operands)
33078 {
33079 rtx dstbase = operands[0];
33080 unsigned HOST_WIDE_INT length;
33081 unsigned HOST_WIDE_INT value;
33082 unsigned HOST_WIDE_INT align;
33083
33084 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
33085 return false;
33086
33087 length = UINTVAL (operands[1]);
33088 if (length > 64)
33089 return false;
33090
33091 value = (UINTVAL (operands[2]) & 0xFF);
33092 align = UINTVAL (operands[3]);
33093 if (TARGET_NEON && length >= 8
33094 && current_tune->string_ops_prefer_neon
33095 && arm_block_set_vect (dstbase, length, value, align))
33096 return true;
33097
33098 if (!unaligned_access && (align & 3) != 0)
33099 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
33100
33101 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
33102 }
33103
33104
33105 static bool
33106 arm_macro_fusion_p (void)
33107 {
33108 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
33109 }
33110
33111 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
33112 for MOVW / MOVT macro fusion. */
33113
33114 static bool
33115 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
33116 {
33117 /* We are trying to fuse
33118 movw imm / movt imm
33119 instructions as a group that gets scheduled together. */
33120
33121 rtx set_dest = SET_DEST (curr_set);
33122
33123 if (GET_MODE (set_dest) != SImode)
33124 return false;
33125
33126 /* We are trying to match:
33127 prev (movw) == (set (reg r0) (const_int imm16))
33128 curr (movt) == (set (zero_extract (reg r0)
33129 (const_int 16)
33130 (const_int 16))
33131 (const_int imm16_1))
33132 or
33133 prev (movw) == (set (reg r1)
33134 (high (symbol_ref ("SYM"))))
33135 curr (movt) == (set (reg r0)
33136 (lo_sum (reg r1)
33137 (symbol_ref ("SYM")))) */
33138
33139 if (GET_CODE (set_dest) == ZERO_EXTRACT)
33140 {
33141 if (CONST_INT_P (SET_SRC (curr_set))
33142 && CONST_INT_P (SET_SRC (prev_set))
33143 && REG_P (XEXP (set_dest, 0))
33144 && REG_P (SET_DEST (prev_set))
33145 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
33146 return true;
33147
33148 }
33149 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
33150 && REG_P (SET_DEST (curr_set))
33151 && REG_P (SET_DEST (prev_set))
33152 && GET_CODE (SET_SRC (prev_set)) == HIGH
33153 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
33154 return true;
33155
33156 return false;
33157 }
33158
33159 static bool
33160 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
33161 {
33162 rtx prev_set = single_set (prev);
33163 rtx curr_set = single_set (curr);
33164
33165 if (!prev_set
33166 || !curr_set)
33167 return false;
33168
33169 if (any_condjump_p (curr))
33170 return false;
33171
33172 if (!arm_macro_fusion_p ())
33173 return false;
33174
33175 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
33176 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
33177 return true;
33178
33179 return false;
33180 }
33181
33182 /* Return true iff the instruction fusion described by OP is enabled. */
33183 bool
33184 arm_fusion_enabled_p (tune_params::fuse_ops op)
33185 {
33186 return current_tune->fusible_ops & op;
33187 }
33188
33189 /* Return TRUE if return address signing mechanism is enabled. */
33190 bool
33191 arm_current_function_pac_enabled_p (void)
33192 {
33193 return (aarch_ra_sign_scope == AARCH_FUNCTION_ALL
33194 || (aarch_ra_sign_scope == AARCH_FUNCTION_NON_LEAF
33195 && !crtl->is_leaf));
33196 }
33197
33198 /* Raise an error if the current target arch is not bti compatible. */
33199 void aarch_bti_arch_check (void)
33200 {
33201 if (!arm_arch8m_main)
33202 error ("This architecture does not support branch protection instructions");
33203 }
33204
33205 /* Return TRUE if Branch Target Identification Mechanism is enabled. */
33206 bool
33207 aarch_bti_enabled (void)
33208 {
33209 return aarch_enable_bti != 0;
33210 }
33211
33212 /* Check if INSN is a BTI J insn. */
33213 bool
33214 aarch_bti_j_insn_p (rtx_insn *insn)
33215 {
33216 if (!insn || !INSN_P (insn))
33217 return false;
33218
33219 rtx pat = PATTERN (insn);
33220 return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == VUNSPEC_BTI_NOP;
33221 }
33222
33223 /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction. */
33224 bool
33225 aarch_pac_insn_p (rtx x)
33226 {
33227 if (!x || !INSN_P (x))
33228 return false;
33229
33230 rtx pat = PATTERN (x);
33231
33232 if (GET_CODE (pat) == SET)
33233 {
33234 rtx tmp = XEXP (pat, 1);
33235 if (tmp
33236 && ((GET_CODE (tmp) == UNSPEC
33237 && XINT (tmp, 1) == UNSPEC_PAC_NOP)
33238 || (GET_CODE (tmp) == UNSPEC_VOLATILE
33239 && XINT (tmp, 1) == VUNSPEC_PACBTI_NOP)))
33240 return true;
33241 }
33242
33243 return false;
33244 }
33245
33246 /* Target specific mapping for aarch_gen_bti_c and aarch_gen_bti_j.
33247 For Arm, both of these map to a simple BTI instruction. */
33248
33249 rtx
33250 aarch_gen_bti_c (void)
33251 {
33252 return gen_bti_nop ();
33253 }
33254
33255 rtx
33256 aarch_gen_bti_j (void)
33257 {
33258 return gen_bti_nop ();
33259 }
33260
33261 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
33262 scheduled for speculative execution. Reject the long-running division
33263 and square-root instructions. */
33264
33265 static bool
33266 arm_sched_can_speculate_insn (rtx_insn *insn)
33267 {
33268 switch (get_attr_type (insn))
33269 {
33270 case TYPE_SDIV:
33271 case TYPE_UDIV:
33272 case TYPE_FDIVS:
33273 case TYPE_FDIVD:
33274 case TYPE_FSQRTS:
33275 case TYPE_FSQRTD:
33276 case TYPE_NEON_FP_SQRT_S:
33277 case TYPE_NEON_FP_SQRT_D:
33278 case TYPE_NEON_FP_SQRT_S_Q:
33279 case TYPE_NEON_FP_SQRT_D_Q:
33280 case TYPE_NEON_FP_DIV_S:
33281 case TYPE_NEON_FP_DIV_D:
33282 case TYPE_NEON_FP_DIV_S_Q:
33283 case TYPE_NEON_FP_DIV_D_Q:
33284 return false;
33285 default:
33286 return true;
33287 }
33288 }
33289
33290 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
33291
33292 static unsigned HOST_WIDE_INT
33293 arm_asan_shadow_offset (void)
33294 {
33295 return HOST_WIDE_INT_1U << 29;
33296 }
33297
33298
33299 /* This is a temporary fix for PR60655. Ideally we need
33300 to handle most of these cases in the generic part but
33301 currently we reject minus (..) (sym_ref). We try to
33302 ameliorate the case with minus (sym_ref1) (sym_ref2)
33303 where they are in the same section. */
33304
33305 static bool
33306 arm_const_not_ok_for_debug_p (rtx p)
33307 {
33308 tree decl_op0 = NULL;
33309 tree decl_op1 = NULL;
33310
33311 if (GET_CODE (p) == UNSPEC)
33312 return true;
33313 if (GET_CODE (p) == MINUS)
33314 {
33315 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
33316 {
33317 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
33318 if (decl_op1
33319 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
33320 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
33321 {
33322 if ((VAR_P (decl_op1)
33323 || TREE_CODE (decl_op1) == CONST_DECL)
33324 && (VAR_P (decl_op0)
33325 || TREE_CODE (decl_op0) == CONST_DECL))
33326 return (get_variable_section (decl_op1, false)
33327 != get_variable_section (decl_op0, false));
33328
33329 if (TREE_CODE (decl_op1) == LABEL_DECL
33330 && TREE_CODE (decl_op0) == LABEL_DECL)
33331 return (DECL_CONTEXT (decl_op1)
33332 != DECL_CONTEXT (decl_op0));
33333 }
33334
33335 return true;
33336 }
33337 }
33338
33339 return false;
33340 }
33341
33342 /* return TRUE if x is a reference to a value in a constant pool */
33343 extern bool
33344 arm_is_constant_pool_ref (rtx x)
33345 {
33346 return (MEM_P (x)
33347 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
33348 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
33349 }
33350
33351 /* Remember the last target of arm_set_current_function. */
33352 static GTY(()) tree arm_previous_fndecl;
33353
33354 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
33355
33356 void
33357 save_restore_target_globals (tree new_tree)
33358 {
33359 /* If we have a previous state, use it. */
33360 if (TREE_TARGET_GLOBALS (new_tree))
33361 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33362 else if (new_tree == target_option_default_node)
33363 restore_target_globals (&default_target_globals);
33364 else
33365 {
33366 /* Call target_reinit and save the state for TARGET_GLOBALS. */
33367 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
33368 }
33369
33370 arm_option_params_internal ();
33371 }
33372
33373 /* Invalidate arm_previous_fndecl. */
33374
33375 void
33376 arm_reset_previous_fndecl (void)
33377 {
33378 arm_previous_fndecl = NULL_TREE;
33379 }
33380
33381 /* Establish appropriate back-end context for processing the function
33382 FNDECL. The argument might be NULL to indicate processing at top
33383 level, outside of any function scope. */
33384
33385 static void
33386 arm_set_current_function (tree fndecl)
33387 {
33388 if (!fndecl || fndecl == arm_previous_fndecl)
33389 return;
33390
33391 tree old_tree = (arm_previous_fndecl
33392 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
33393 : NULL_TREE);
33394
33395 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33396
33397 /* If current function has no attributes but previous one did,
33398 use the default node. */
33399 if (! new_tree && old_tree)
33400 new_tree = target_option_default_node;
33401
33402 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
33403 the default have been handled by save_restore_target_globals from
33404 arm_pragma_target_parse. */
33405 if (old_tree == new_tree)
33406 return;
33407
33408 arm_previous_fndecl = fndecl;
33409
33410 /* First set the target options. */
33411 cl_target_option_restore (&global_options, &global_options_set,
33412 TREE_TARGET_OPTION (new_tree));
33413
33414 save_restore_target_globals (new_tree);
33415
33416 arm_override_options_after_change_1 (&global_options, &global_options_set);
33417 }
33418
33419 /* Implement TARGET_OPTION_PRINT. */
33420
33421 static void
33422 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
33423 {
33424 int flags = ptr->x_target_flags;
33425 const char *fpu_name;
33426
33427 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
33428 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
33429
33430 fprintf (file, "%*sselected isa %s\n", indent, "",
33431 TARGET_THUMB2_P (flags) ? "thumb2" :
33432 TARGET_THUMB_P (flags) ? "thumb1" :
33433 "arm");
33434
33435 if (ptr->x_arm_arch_string)
33436 fprintf (file, "%*sselected architecture %s\n", indent, "",
33437 ptr->x_arm_arch_string);
33438
33439 if (ptr->x_arm_cpu_string)
33440 fprintf (file, "%*sselected CPU %s\n", indent, "",
33441 ptr->x_arm_cpu_string);
33442
33443 if (ptr->x_arm_tune_string)
33444 fprintf (file, "%*sselected tune %s\n", indent, "",
33445 ptr->x_arm_tune_string);
33446
33447 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
33448 }
33449
33450 /* Hook to determine if one function can safely inline another. */
33451
33452 static bool
33453 arm_can_inline_p (tree caller, tree callee)
33454 {
33455 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33456 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33457 bool can_inline = true;
33458
33459 struct cl_target_option *caller_opts
33460 = TREE_TARGET_OPTION (caller_tree ? caller_tree
33461 : target_option_default_node);
33462
33463 struct cl_target_option *callee_opts
33464 = TREE_TARGET_OPTION (callee_tree ? callee_tree
33465 : target_option_default_node);
33466
33467 if (callee_opts == caller_opts)
33468 return true;
33469
33470 /* Callee's ISA features should be a subset of the caller's. */
33471 struct arm_build_target caller_target;
33472 struct arm_build_target callee_target;
33473 caller_target.isa = sbitmap_alloc (isa_num_bits);
33474 callee_target.isa = sbitmap_alloc (isa_num_bits);
33475
33476 arm_configure_build_target (&caller_target, caller_opts, false);
33477 arm_configure_build_target (&callee_target, callee_opts, false);
33478 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
33479 can_inline = false;
33480
33481 sbitmap_free (caller_target.isa);
33482 sbitmap_free (callee_target.isa);
33483
33484 /* OK to inline between different modes.
33485 Function with mode specific instructions, e.g using asm,
33486 must be explicitly protected with noinline. */
33487 return can_inline;
33488 }
33489
33490 /* Hook to fix function's alignment affected by target attribute. */
33491
33492 static void
33493 arm_relayout_function (tree fndecl)
33494 {
33495 if (DECL_USER_ALIGN (fndecl))
33496 return;
33497
33498 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33499
33500 if (!callee_tree)
33501 callee_tree = target_option_default_node;
33502
33503 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
33504 SET_DECL_ALIGN
33505 (fndecl,
33506 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
33507 }
33508
33509 /* Inner function to process the attribute((target(...))), take an argument and
33510 set the current options from the argument. If we have a list, recursively
33511 go over the list. */
33512
33513 static bool
33514 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
33515 {
33516 if (TREE_CODE (args) == TREE_LIST)
33517 {
33518 bool ret = true;
33519
33520 for (; args; args = TREE_CHAIN (args))
33521 if (TREE_VALUE (args)
33522 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
33523 ret = false;
33524 return ret;
33525 }
33526
33527 else if (TREE_CODE (args) != STRING_CST)
33528 {
33529 error ("attribute %<target%> argument not a string");
33530 return false;
33531 }
33532
33533 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
33534 char *q;
33535
33536 while ((q = strtok (argstr, ",")) != NULL)
33537 {
33538 argstr = NULL;
33539 if (!strcmp (q, "thumb"))
33540 {
33541 opts->x_target_flags |= MASK_THUMB;
33542 if (TARGET_FDPIC && !arm_arch_thumb2)
33543 sorry ("FDPIC mode is not supported in Thumb-1 mode");
33544 }
33545
33546 else if (!strcmp (q, "arm"))
33547 opts->x_target_flags &= ~MASK_THUMB;
33548
33549 else if (!strcmp (q, "general-regs-only"))
33550 opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
33551
33552 else if (startswith (q, "fpu="))
33553 {
33554 int fpu_index;
33555 if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
33556 &fpu_index, CL_TARGET))
33557 {
33558 error ("invalid fpu for target attribute or pragma %qs", q);
33559 return false;
33560 }
33561 if (fpu_index == TARGET_FPU_auto)
33562 {
33563 /* This doesn't really make sense until we support
33564 general dynamic selection of the architecture and all
33565 sub-features. */
33566 sorry ("auto fpu selection not currently permitted here");
33567 return false;
33568 }
33569 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
33570 }
33571 else if (startswith (q, "arch="))
33572 {
33573 char *arch = q + 5;
33574 const arch_option *arm_selected_arch
33575 = arm_parse_arch_option_name (all_architectures, "arch", arch);
33576
33577 if (!arm_selected_arch)
33578 {
33579 error ("invalid architecture for target attribute or pragma %qs",
33580 q);
33581 return false;
33582 }
33583
33584 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
33585 }
33586 else if (q[0] == '+')
33587 {
33588 opts->x_arm_arch_string
33589 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
33590 }
33591 else
33592 {
33593 error ("unknown target attribute or pragma %qs", q);
33594 return false;
33595 }
33596 }
33597
33598 return true;
33599 }
33600
33601 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
33602
33603 tree
33604 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
33605 struct gcc_options *opts_set)
33606 {
33607 struct cl_target_option cl_opts;
33608
33609 if (!arm_valid_target_attribute_rec (args, opts))
33610 return NULL_TREE;
33611
33612 cl_target_option_save (&cl_opts, opts, opts_set);
33613 arm_configure_build_target (&arm_active_target, &cl_opts, false);
33614 arm_option_check_internal (opts);
33615 /* Do any overrides, such as global options arch=xxx.
33616 We do this since arm_active_target was overridden. */
33617 arm_option_reconfigure_globals ();
33618 arm_options_perform_arch_sanity_checks ();
33619 arm_option_override_internal (opts, opts_set);
33620
33621 return build_target_option_node (opts, opts_set);
33622 }
33623
33624 static void
33625 add_attribute (const char * mode, tree *attributes)
33626 {
33627 size_t len = strlen (mode);
33628 tree value = build_string (len, mode);
33629
33630 TREE_TYPE (value) = build_array_type (char_type_node,
33631 build_index_type (size_int (len)));
33632
33633 *attributes = tree_cons (get_identifier ("target"),
33634 build_tree_list (NULL_TREE, value),
33635 *attributes);
33636 }
33637
33638 /* For testing. Insert thumb or arm modes alternatively on functions. */
33639
33640 static void
33641 arm_insert_attributes (tree fndecl, tree * attributes)
33642 {
33643 const char *mode;
33644
33645 if (! TARGET_FLIP_THUMB)
33646 return;
33647
33648 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33649 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33650 return;
33651
33652 /* Nested definitions must inherit mode. */
33653 if (current_function_decl)
33654 {
33655 mode = TARGET_THUMB ? "thumb" : "arm";
33656 add_attribute (mode, attributes);
33657 return;
33658 }
33659
33660 /* If there is already a setting don't change it. */
33661 if (lookup_attribute ("target", *attributes) != NULL)
33662 return;
33663
33664 mode = thumb_flipper ? "thumb" : "arm";
33665 add_attribute (mode, attributes);
33666
33667 thumb_flipper = !thumb_flipper;
33668 }
33669
33670 /* Hook to validate attribute((target("string"))). */
33671
33672 static bool
33673 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33674 tree args, int ARG_UNUSED (flags))
33675 {
33676 bool ret = true;
33677 struct gcc_options func_options, func_options_set;
33678 tree cur_tree, new_optimize;
33679 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33680
33681 /* Get the optimization options of the current function. */
33682 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33683
33684 /* If the function changed the optimization levels as well as setting target
33685 options, start with the optimizations specified. */
33686 if (!func_optimize)
33687 func_optimize = optimization_default_node;
33688
33689 /* Init func_options. */
33690 memset (&func_options, 0, sizeof (func_options));
33691 init_options_struct (&func_options, NULL);
33692 lang_hooks.init_options_struct (&func_options);
33693 memset (&func_options_set, 0, sizeof (func_options_set));
33694
33695 /* Initialize func_options to the defaults. */
33696 cl_optimization_restore (&func_options, &func_options_set,
33697 TREE_OPTIMIZATION (func_optimize));
33698
33699 cl_target_option_restore (&func_options, &func_options_set,
33700 TREE_TARGET_OPTION (target_option_default_node));
33701
33702 /* Set func_options flags with new target mode. */
33703 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33704 &func_options_set);
33705
33706 if (cur_tree == NULL_TREE)
33707 ret = false;
33708
33709 new_optimize = build_optimization_node (&func_options, &func_options_set);
33710
33711 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33712
33713 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33714
33715 return ret;
33716 }
33717
33718 /* Match an ISA feature bitmap to a named FPU. We always use the
33719 first entry that exactly matches the feature set, so that we
33720 effectively canonicalize the FPU name for the assembler. */
33721 static const char*
33722 arm_identify_fpu_from_isa (sbitmap isa)
33723 {
33724 auto_sbitmap fpubits (isa_num_bits);
33725 auto_sbitmap cand_fpubits (isa_num_bits);
33726
33727 bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33728
33729 /* If there are no ISA feature bits relating to the FPU, we must be
33730 doing soft-float. */
33731 if (bitmap_empty_p (fpubits))
33732 return "softvfp";
33733
33734 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33735 {
33736 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33737 if (bitmap_equal_p (fpubits, cand_fpubits))
33738 return all_fpus[i].name;
33739 }
33740 /* We must find an entry, or things have gone wrong. */
33741 gcc_unreachable ();
33742 }
33743
33744 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
33745 by the function fndecl. */
33746 void
33747 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33748 {
33749 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
33750
33751 struct cl_target_option *targ_options;
33752 if (target_parts)
33753 targ_options = TREE_TARGET_OPTION (target_parts);
33754 else
33755 targ_options = TREE_TARGET_OPTION (target_option_current_node);
33756 gcc_assert (targ_options);
33757
33758 arm_print_asm_arch_directives (stream, targ_options);
33759
33760 fprintf (stream, "\t.syntax unified\n");
33761
33762 if (TARGET_THUMB)
33763 {
33764 if (is_called_in_ARM_mode (decl)
33765 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
33766 && cfun->is_thunk))
33767 fprintf (stream, "\t.code 32\n");
33768 else if (TARGET_THUMB1)
33769 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
33770 else
33771 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
33772 }
33773 else
33774 fprintf (stream, "\t.arm\n");
33775
33776 if (TARGET_POKE_FUNCTION_NAME)
33777 arm_poke_function_name (stream, (const char *) name);
33778 }
33779
33780 /* If MEM is in the form of [base+offset], extract the two parts
33781 of address and set to BASE and OFFSET, otherwise return false
33782 after clearing BASE and OFFSET. */
33783
33784 static bool
33785 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
33786 {
33787 rtx addr;
33788
33789 gcc_assert (MEM_P (mem));
33790
33791 addr = XEXP (mem, 0);
33792
33793 /* Strip off const from addresses like (const (addr)). */
33794 if (GET_CODE (addr) == CONST)
33795 addr = XEXP (addr, 0);
33796
33797 if (REG_P (addr))
33798 {
33799 *base = addr;
33800 *offset = const0_rtx;
33801 return true;
33802 }
33803
33804 if (GET_CODE (addr) == PLUS
33805 && GET_CODE (XEXP (addr, 0)) == REG
33806 && CONST_INT_P (XEXP (addr, 1)))
33807 {
33808 *base = XEXP (addr, 0);
33809 *offset = XEXP (addr, 1);
33810 return true;
33811 }
33812
33813 *base = NULL_RTX;
33814 *offset = NULL_RTX;
33815
33816 return false;
33817 }
33818
33819 /* If INSN is a load or store of address in the form of [base+offset],
33820 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
33821 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
33822 otherwise return FALSE. */
33823
33824 static bool
33825 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
33826 {
33827 rtx x, dest, src;
33828
33829 gcc_assert (INSN_P (insn));
33830 x = PATTERN (insn);
33831 if (GET_CODE (x) != SET)
33832 return false;
33833
33834 src = SET_SRC (x);
33835 dest = SET_DEST (x);
33836 if (REG_P (src) && MEM_P (dest))
33837 {
33838 *is_load = false;
33839 extract_base_offset_in_addr (dest, base, offset);
33840 }
33841 else if (MEM_P (src) && REG_P (dest))
33842 {
33843 *is_load = true;
33844 extract_base_offset_in_addr (src, base, offset);
33845 }
33846 else
33847 return false;
33848
33849 return (*base != NULL_RTX && *offset != NULL_RTX);
33850 }
33851
33852 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33853
33854 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33855 and PRI are only calculated for these instructions. For other instruction,
33856 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
33857 instruction fusion can be supported by returning different priorities.
33858
33859 It's important that irrelevant instructions get the largest FUSION_PRI. */
33860
33861 static void
33862 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33863 int *fusion_pri, int *pri)
33864 {
33865 int tmp, off_val;
33866 bool is_load;
33867 rtx base, offset;
33868
33869 gcc_assert (INSN_P (insn));
33870
33871 tmp = max_pri - 1;
33872 if (!fusion_load_store (insn, &base, &offset, &is_load))
33873 {
33874 *pri = tmp;
33875 *fusion_pri = tmp;
33876 return;
33877 }
33878
33879 /* Load goes first. */
33880 if (is_load)
33881 *fusion_pri = tmp - 1;
33882 else
33883 *fusion_pri = tmp - 2;
33884
33885 tmp /= 2;
33886
33887 /* INSN with smaller base register goes first. */
33888 tmp -= ((REGNO (base) & 0xff) << 20);
33889
33890 /* INSN with smaller offset goes first. */
33891 off_val = (int)(INTVAL (offset));
33892 if (off_val >= 0)
33893 tmp -= (off_val & 0xfffff);
33894 else
33895 tmp += ((- off_val) & 0xfffff);
33896
33897 *pri = tmp;
33898 return;
33899 }
33900
33901
33902 /* Construct and return a PARALLEL RTX vector with elements numbering the
33903 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
33904 the vector - from the perspective of the architecture. This does not
33905 line up with GCC's perspective on lane numbers, so we end up with
33906 different masks depending on our target endian-ness. The diagram
33907 below may help. We must draw the distinction when building masks
33908 which select one half of the vector. An instruction selecting
33909 architectural low-lanes for a big-endian target, must be described using
33910 a mask selecting GCC high-lanes.
33911
33912 Big-Endian Little-Endian
33913
33914 GCC 0 1 2 3 3 2 1 0
33915 | x | x | x | x | | x | x | x | x |
33916 Architecture 3 2 1 0 3 2 1 0
33917
33918 Low Mask: { 2, 3 } { 0, 1 }
33919 High Mask: { 0, 1 } { 2, 3 }
33920 */
33921
33922 rtx
33923 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
33924 {
33925 int nunits = GET_MODE_NUNITS (mode);
33926 rtvec v = rtvec_alloc (nunits / 2);
33927 int high_base = nunits / 2;
33928 int low_base = 0;
33929 int base;
33930 rtx t1;
33931 int i;
33932
33933 if (BYTES_BIG_ENDIAN)
33934 base = high ? low_base : high_base;
33935 else
33936 base = high ? high_base : low_base;
33937
33938 for (i = 0; i < nunits / 2; i++)
33939 RTVEC_ELT (v, i) = GEN_INT (base + i);
33940
33941 t1 = gen_rtx_PARALLEL (mode, v);
33942 return t1;
33943 }
33944
33945 /* Check OP for validity as a PARALLEL RTX vector with elements
33946 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
33947 from the perspective of the architecture. See the diagram above
33948 arm_simd_vect_par_cnst_half_p for more details. */
33949
33950 bool
33951 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
33952 bool high)
33953 {
33954 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
33955 HOST_WIDE_INT count_op = XVECLEN (op, 0);
33956 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
33957 int i = 0;
33958
33959 if (!VECTOR_MODE_P (mode))
33960 return false;
33961
33962 if (count_op != count_ideal)
33963 return false;
33964
33965 for (i = 0; i < count_ideal; i++)
33966 {
33967 rtx elt_op = XVECEXP (op, 0, i);
33968 rtx elt_ideal = XVECEXP (ideal, 0, i);
33969
33970 if (!CONST_INT_P (elt_op)
33971 || INTVAL (elt_ideal) != INTVAL (elt_op))
33972 return false;
33973 }
33974 return true;
33975 }
33976
33977 /* Can output mi_thunk for all cases except for non-zero vcall_offset
33978 in Thumb1. */
33979 static bool
33980 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
33981 const_tree)
33982 {
33983 /* For now, we punt and not handle this for TARGET_THUMB1. */
33984 if (vcall_offset && TARGET_THUMB1)
33985 return false;
33986
33987 /* Otherwise ok. */
33988 return true;
33989 }
33990
33991 /* Generate RTL for a conditional branch with rtx comparison CODE in
33992 mode CC_MODE. The destination of the unlikely conditional branch
33993 is LABEL_REF. */
33994
33995 void
33996 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
33997 rtx label_ref)
33998 {
33999 rtx x;
34000 x = gen_rtx_fmt_ee (code, VOIDmode,
34001 gen_rtx_REG (cc_mode, CC_REGNUM),
34002 const0_rtx);
34003
34004 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
34005 gen_rtx_LABEL_REF (VOIDmode, label_ref),
34006 pc_rtx);
34007 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
34008 }
34009
34010 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
34011
34012 For pure-code sections there is no letter code for this attribute, so
34013 output all the section flags numerically when this is needed. */
34014
34015 static bool
34016 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
34017 {
34018
34019 if (flags & SECTION_ARM_PURECODE)
34020 {
34021 *num = 0x20000000;
34022
34023 if (!(flags & SECTION_DEBUG))
34024 *num |= 0x2;
34025 if (flags & SECTION_EXCLUDE)
34026 *num |= 0x80000000;
34027 if (flags & SECTION_WRITE)
34028 *num |= 0x1;
34029 if (flags & SECTION_CODE)
34030 *num |= 0x4;
34031 if (flags & SECTION_MERGE)
34032 *num |= 0x10;
34033 if (flags & SECTION_STRINGS)
34034 *num |= 0x20;
34035 if (flags & SECTION_TLS)
34036 *num |= 0x400;
34037 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
34038 *num |= 0x200;
34039
34040 return true;
34041 }
34042
34043 return false;
34044 }
34045
34046 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
34047
34048 If pure-code is passed as an option, make sure all functions are in
34049 sections that have the SHF_ARM_PURECODE attribute. */
34050
34051 static section *
34052 arm_function_section (tree decl, enum node_frequency freq,
34053 bool startup, bool exit)
34054 {
34055 const char * section_name;
34056 section * sec;
34057
34058 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
34059 return default_function_section (decl, freq, startup, exit);
34060
34061 if (!target_pure_code)
34062 return default_function_section (decl, freq, startup, exit);
34063
34064
34065 section_name = DECL_SECTION_NAME (decl);
34066
34067 /* If a function is not in a named section then it falls under the 'default'
34068 text section, also known as '.text'. We can preserve previous behavior as
34069 the default text section already has the SHF_ARM_PURECODE section
34070 attribute. */
34071 if (!section_name)
34072 {
34073 section *default_sec = default_function_section (decl, freq, startup,
34074 exit);
34075
34076 /* If default_sec is not null, then it must be a special section like for
34077 example .text.startup. We set the pure-code attribute and return the
34078 same section to preserve existing behavior. */
34079 if (default_sec)
34080 default_sec->common.flags |= SECTION_ARM_PURECODE;
34081 return default_sec;
34082 }
34083
34084 /* Otherwise look whether a section has already been created with
34085 'section_name'. */
34086 sec = get_named_section (decl, section_name, 0);
34087 if (!sec)
34088 /* If that is not the case passing NULL as the section's name to
34089 'get_named_section' will create a section with the declaration's
34090 section name. */
34091 sec = get_named_section (decl, NULL, 0);
34092
34093 /* Set the SHF_ARM_PURECODE attribute. */
34094 sec->common.flags |= SECTION_ARM_PURECODE;
34095
34096 return sec;
34097 }
34098
34099 /* Implements the TARGET_SECTION_FLAGS hook.
34100
34101 If DECL is a function declaration and pure-code is passed as an option
34102 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
34103 section's name and RELOC indicates whether the declarations initializer may
34104 contain runtime relocations. */
34105
34106 static unsigned int
34107 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
34108 {
34109 unsigned int flags = default_section_type_flags (decl, name, reloc);
34110
34111 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
34112 flags |= SECTION_ARM_PURECODE;
34113
34114 return flags;
34115 }
34116
34117 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
34118
34119 static void
34120 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
34121 rtx op0, rtx op1,
34122 rtx *quot_p, rtx *rem_p)
34123 {
34124 if (mode == SImode)
34125 gcc_assert (!TARGET_IDIV);
34126
34127 scalar_int_mode libval_mode
34128 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
34129
34130 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
34131 libval_mode, op0, mode, op1, mode);
34132
34133 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
34134 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
34135 GET_MODE_SIZE (mode));
34136
34137 gcc_assert (quotient);
34138 gcc_assert (remainder);
34139
34140 *quot_p = quotient;
34141 *rem_p = remainder;
34142 }
34143
34144 /* This function checks for the availability of the coprocessor builtin passed
34145 in BUILTIN for the current target. Returns true if it is available and
34146 false otherwise. If a BUILTIN is passed for which this function has not
34147 been implemented it will cause an exception. */
34148
34149 bool
34150 arm_coproc_builtin_available (enum unspecv builtin)
34151 {
34152 /* None of these builtins are available in Thumb mode if the target only
34153 supports Thumb-1. */
34154 if (TARGET_THUMB1)
34155 return false;
34156
34157 switch (builtin)
34158 {
34159 case VUNSPEC_CDP:
34160 case VUNSPEC_LDC:
34161 case VUNSPEC_LDCL:
34162 case VUNSPEC_STC:
34163 case VUNSPEC_STCL:
34164 case VUNSPEC_MCR:
34165 case VUNSPEC_MRC:
34166 if (arm_arch4)
34167 return true;
34168 break;
34169 case VUNSPEC_CDP2:
34170 case VUNSPEC_LDC2:
34171 case VUNSPEC_LDC2L:
34172 case VUNSPEC_STC2:
34173 case VUNSPEC_STC2L:
34174 case VUNSPEC_MCR2:
34175 case VUNSPEC_MRC2:
34176 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
34177 ARMv8-{A,M}. */
34178 if (arm_arch5t)
34179 return true;
34180 break;
34181 case VUNSPEC_MCRR:
34182 case VUNSPEC_MRRC:
34183 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
34184 ARMv8-{A,M}. */
34185 if (arm_arch6 || arm_arch5te)
34186 return true;
34187 break;
34188 case VUNSPEC_MCRR2:
34189 case VUNSPEC_MRRC2:
34190 if (arm_arch6)
34191 return true;
34192 break;
34193 default:
34194 gcc_unreachable ();
34195 }
34196 return false;
34197 }
34198
34199 /* This function returns true if OP is a valid memory operand for the ldc and
34200 stc coprocessor instructions and false otherwise. */
34201
34202 bool
34203 arm_coproc_ldc_stc_legitimate_address (rtx op)
34204 {
34205 HOST_WIDE_INT range;
34206 /* Has to be a memory operand. */
34207 if (!MEM_P (op))
34208 return false;
34209
34210 op = XEXP (op, 0);
34211
34212 /* We accept registers. */
34213 if (REG_P (op))
34214 return true;
34215
34216 switch GET_CODE (op)
34217 {
34218 case PLUS:
34219 {
34220 /* Or registers with an offset. */
34221 if (!REG_P (XEXP (op, 0)))
34222 return false;
34223
34224 op = XEXP (op, 1);
34225
34226 /* The offset must be an immediate though. */
34227 if (!CONST_INT_P (op))
34228 return false;
34229
34230 range = INTVAL (op);
34231
34232 /* Within the range of [-1020,1020]. */
34233 if (!IN_RANGE (range, -1020, 1020))
34234 return false;
34235
34236 /* And a multiple of 4. */
34237 return (range % 4) == 0;
34238 }
34239 case PRE_INC:
34240 case POST_INC:
34241 case PRE_DEC:
34242 case POST_DEC:
34243 return REG_P (XEXP (op, 0));
34244 default:
34245 gcc_unreachable ();
34246 }
34247 return false;
34248 }
34249
34250 /* Return the diagnostic message string if conversion from FROMTYPE to
34251 TOTYPE is not allowed, NULL otherwise. */
34252
34253 static const char *
34254 arm_invalid_conversion (const_tree fromtype, const_tree totype)
34255 {
34256 if (element_mode (fromtype) != element_mode (totype))
34257 {
34258 /* Do no allow conversions to/from BFmode scalar types. */
34259 if (TYPE_MODE (fromtype) == BFmode)
34260 return N_("invalid conversion from type %<bfloat16_t%>");
34261 if (TYPE_MODE (totype) == BFmode)
34262 return N_("invalid conversion to type %<bfloat16_t%>");
34263 }
34264
34265 /* Conversion allowed. */
34266 return NULL;
34267 }
34268
34269 /* Return the diagnostic message string if the unary operation OP is
34270 not permitted on TYPE, NULL otherwise. */
34271
34272 static const char *
34273 arm_invalid_unary_op (int op, const_tree type)
34274 {
34275 /* Reject all single-operand operations on BFmode except for &. */
34276 if (element_mode (type) == BFmode && op != ADDR_EXPR)
34277 return N_("operation not permitted on type %<bfloat16_t%>");
34278
34279 /* Operation allowed. */
34280 return NULL;
34281 }
34282
34283 /* Return the diagnostic message string if the binary operation OP is
34284 not permitted on TYPE1 and TYPE2, NULL otherwise. */
34285
34286 static const char *
34287 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
34288 const_tree type2)
34289 {
34290 /* Reject all 2-operand operations on BFmode. */
34291 if (element_mode (type1) == BFmode
34292 || element_mode (type2) == BFmode)
34293 return N_("operation not permitted on type %<bfloat16_t%>");
34294
34295 /* Operation allowed. */
34296 return NULL;
34297 }
34298
34299 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34300
34301 In VFPv1, VFP registers could only be accessed in the mode they were
34302 set, so subregs would be invalid there. However, we don't support
34303 VFPv1 at the moment, and the restriction was lifted in VFPv2.
34304
34305 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34306 VFP registers in little-endian order. We can't describe that accurately to
34307 GCC, so avoid taking subregs of such values.
34308
34309 The only exception is going from a 128-bit to a 64-bit type. In that
34310 case the data layout happens to be consistent for big-endian, so we
34311 explicitly allow that case. */
34312
34313 static bool
34314 arm_can_change_mode_class (machine_mode from, machine_mode to,
34315 reg_class_t rclass)
34316 {
34317 if (TARGET_BIG_END
34318 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
34319 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
34320 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
34321 && reg_classes_intersect_p (VFP_REGS, rclass))
34322 return false;
34323 return true;
34324 }
34325
34326 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
34327 strcpy from constants will be faster. */
34328
34329 static HOST_WIDE_INT
34330 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
34331 {
34332 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
34333 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
34334 return MAX (align, BITS_PER_WORD * factor);
34335 return align;
34336 }
34337
34338 /* Emit a speculation barrier on target architectures that do not have
34339 DSB/ISB directly. Such systems probably don't need a barrier
34340 themselves, but if the code is ever run on a later architecture, it
34341 might become a problem. */
34342 void
34343 arm_emit_speculation_barrier_function ()
34344 {
34345 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
34346 }
34347
34348 /* Have we recorded an explicit access to the Q bit of APSR?. */
34349 bool
34350 arm_q_bit_access (void)
34351 {
34352 if (cfun && cfun->decl)
34353 return lookup_attribute ("acle qbit",
34354 DECL_ATTRIBUTES (cfun->decl));
34355 return true;
34356 }
34357
34358 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
34359 bool
34360 arm_ge_bits_access (void)
34361 {
34362 if (cfun && cfun->decl)
34363 return lookup_attribute ("acle gebits",
34364 DECL_ATTRIBUTES (cfun->decl));
34365 return true;
34366 }
34367
34368 /* NULL if insn INSN is valid within a low-overhead loop.
34369 Otherwise return why doloop cannot be applied. */
34370
34371 static const char *
34372 arm_invalid_within_doloop (const rtx_insn *insn)
34373 {
34374 if (!TARGET_HAVE_LOB)
34375 return default_invalid_within_doloop (insn);
34376
34377 if (CALL_P (insn))
34378 return "Function call in the loop.";
34379
34380 if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
34381 return "LR is used inside loop.";
34382
34383 return NULL;
34384 }
34385
34386 bool
34387 arm_target_insn_ok_for_lob (rtx insn)
34388 {
34389 basic_block bb = BLOCK_FOR_INSN (insn);
34390 /* Make sure the basic block of the target insn is a simple latch
34391 having as single predecessor and successor the body of the loop
34392 itself. Only simple loops with a single basic block as body are
34393 supported for 'low over head loop' making sure that LE target is
34394 above LE itself in the generated code. */
34395
34396 return single_succ_p (bb)
34397 && single_pred_p (bb)
34398 && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
34399 && contains_no_active_insn_p (bb);
34400 }
34401
34402 #if CHECKING_P
34403 namespace selftest {
34404
34405 /* Scan the static data tables generated by parsecpu.awk looking for
34406 potential issues with the data. We primarily check for
34407 inconsistencies in the option extensions at present (extensions
34408 that duplicate others but aren't marked as aliases). Furthermore,
34409 for correct canonicalization later options must never be a subset
34410 of an earlier option. Any extension should also only specify other
34411 feature bits and never an architecture bit. The architecture is inferred
34412 from the declaration of the extension. */
34413 static void
34414 arm_test_cpu_arch_data (void)
34415 {
34416 const arch_option *arch;
34417 const cpu_option *cpu;
34418 auto_sbitmap target_isa (isa_num_bits);
34419 auto_sbitmap isa1 (isa_num_bits);
34420 auto_sbitmap isa2 (isa_num_bits);
34421
34422 for (arch = all_architectures; arch->common.name != NULL; ++arch)
34423 {
34424 const cpu_arch_extension *ext1, *ext2;
34425
34426 if (arch->common.extensions == NULL)
34427 continue;
34428
34429 arm_initialize_isa (target_isa, arch->common.isa_bits);
34430
34431 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
34432 {
34433 if (ext1->alias)
34434 continue;
34435
34436 arm_initialize_isa (isa1, ext1->isa_bits);
34437 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34438 {
34439 if (ext2->alias || ext1->remove != ext2->remove)
34440 continue;
34441
34442 arm_initialize_isa (isa2, ext2->isa_bits);
34443 /* If the option is a subset of the parent option, it doesn't
34444 add anything and so isn't useful. */
34445 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34446
34447 /* If the extension specifies any architectural bits then
34448 disallow it. Extensions should only specify feature bits. */
34449 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34450 }
34451 }
34452 }
34453
34454 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
34455 {
34456 const cpu_arch_extension *ext1, *ext2;
34457
34458 if (cpu->common.extensions == NULL)
34459 continue;
34460
34461 arm_initialize_isa (target_isa, arch->common.isa_bits);
34462
34463 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
34464 {
34465 if (ext1->alias)
34466 continue;
34467
34468 arm_initialize_isa (isa1, ext1->isa_bits);
34469 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34470 {
34471 if (ext2->alias || ext1->remove != ext2->remove)
34472 continue;
34473
34474 arm_initialize_isa (isa2, ext2->isa_bits);
34475 /* If the option is a subset of the parent option, it doesn't
34476 add anything and so isn't useful. */
34477 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34478
34479 /* If the extension specifies any architectural bits then
34480 disallow it. Extensions should only specify feature bits. */
34481 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34482 }
34483 }
34484 }
34485 }
34486
34487 /* Scan the static data tables generated by parsecpu.awk looking for
34488 potential issues with the data. Here we check for consistency between the
34489 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34490 a feature bit that is not defined by any FPU flag. */
34491 static void
34492 arm_test_fpu_data (void)
34493 {
34494 auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
34495 auto_sbitmap fpubits (isa_num_bits);
34496 auto_sbitmap tmpset (isa_num_bits);
34497
34498 static const enum isa_feature fpu_bitlist_internal[]
34499 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
34500 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
34501
34502 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
34503 {
34504 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
34505 bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
34506 bitmap_clear (isa_all_fpubits_internal);
34507 bitmap_copy (isa_all_fpubits_internal, tmpset);
34508 }
34509
34510 if (!bitmap_empty_p (isa_all_fpubits_internal))
34511 {
34512 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
34513 " group that are not defined by any FPU.\n"
34514 " Check your arm-cpus.in.\n");
34515 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
34516 }
34517 }
34518
34519 static void
34520 arm_run_selftests (void)
34521 {
34522 arm_test_cpu_arch_data ();
34523 arm_test_fpu_data ();
34524 }
34525 } /* Namespace selftest. */
34526
34527 #undef TARGET_RUN_TARGET_SELFTESTS
34528 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34529 #endif /* CHECKING_P */
34530
34531 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34532 global variable based guard use the default else
34533 return a null tree. */
34534 static tree
34535 arm_stack_protect_guard (void)
34536 {
34537 if (arm_stack_protector_guard == SSP_GLOBAL)
34538 return default_stack_protect_guard ();
34539
34540 return NULL_TREE;
34541 }
34542
34543 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34544 Unlike the arm version, we do NOT implement asm flag outputs. */
34545
34546 rtx_insn *
34547 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
34548 vec<machine_mode> & /*input_modes*/,
34549 vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
34550 HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
34551 {
34552 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
34553 if (startswith (constraints[i], "=@cc"))
34554 {
34555 sorry ("%<asm%> flags not supported in thumb1 mode");
34556 break;
34557 }
34558 return NULL;
34559 }
34560
34561 /* Generate code to enable conditional branches in functions over 1 MiB.
34562 Parameters are:
34563 operands: is the operands list of the asm insn (see arm_cond_branch or
34564 arm_cond_branch_reversed).
34565 pos_label: is an index into the operands array where operands[pos_label] is
34566 the asm label of the final jump destination.
34567 dest: is a string which is used to generate the asm label of the intermediate
34568 destination
34569 branch_format: is a string denoting the intermediate branch format, e.g.
34570 "beq", "bne", etc. */
34571
34572 const char *
34573 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
34574 const char * branch_format)
34575 {
34576 rtx_code_label * tmp_label = gen_label_rtx ();
34577 char label_buf[256];
34578 char buffer[128];
34579 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
34580 CODE_LABEL_NUMBER (tmp_label));
34581 const char *label_ptr = arm_strip_name_encoding (label_buf);
34582 rtx dest_label = operands[pos_label];
34583 operands[pos_label] = tmp_label;
34584
34585 snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
34586 output_asm_insn (buffer, operands);
34587
34588 snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
34589 operands[pos_label] = dest_label;
34590 output_asm_insn (buffer, operands);
34591 return "";
34592 }
34593
34594 /* If given mode matches, load from memory to LO_REGS.
34595 (i.e [Rn], Rn <= LO_REGS). */
34596 enum reg_class
34597 arm_mode_base_reg_class (machine_mode mode)
34598 {
34599 if (TARGET_HAVE_MVE
34600 && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
34601 return LO_REGS;
34602
34603 return MODE_BASE_REG_REG_CLASS (mode);
34604 }
34605
34606 struct gcc_target targetm = TARGET_INITIALIZER;
34607
34608 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
34609
34610 opt_machine_mode
34611 arm_get_mask_mode (machine_mode mode)
34612 {
34613 if (TARGET_HAVE_MVE)
34614 return arm_mode_to_pred_mode (mode);
34615
34616 return default_get_mask_mode (mode);
34617 }
34618
34619 #include "gt-arm.h"