]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.cc
[PATCH 10/15] arm: Implement cortex-M return signing address codegen
[thirdparty/gcc.git] / gcc / config / arm / arm.cc
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2023 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "tm_p.h"
38 #include "stringpool.h"
39 #include "attribs.h"
40 #include "optabs.h"
41 #include "regs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "cgraph.h"
45 #include "diagnostic-core.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "varasm.h"
51 #include "output.h"
52 #include "insn-attr.h"
53 #include "flags.h"
54 #include "reload.h"
55 #include "explow.h"
56 #include "expr.h"
57 #include "cfgrtl.h"
58 #include "sched-int.h"
59 #include "common/common-target.h"
60 #include "langhooks.h"
61 #include "intl.h"
62 #include "libfuncs.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "selftest.h"
73 #include "tree-vectorizer.h"
74 #include "opts.h"
75 #include "aarch-common.h"
76 #include "aarch-common-protos.h"
77
78 /* This file should be included last. */
79 #include "target-def.h"
80
81 /* Forward definitions of types. */
82 typedef struct minipool_node Mnode;
83 typedef struct minipool_fixup Mfix;
84
85 void (*arm_lang_output_object_attributes_hook)(void);
86
87 struct four_ints
88 {
89 int i[4];
90 };
91
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx);
94 static int arm_needs_doubleword_align (machine_mode, const_tree);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets *arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
100 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap);
103 static int arm_address_register_rtx_p (rtx, int);
104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
105 static bool is_called_in_ARM_mode (tree);
106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
111 inline static int thumb1_index_register_rtx_p (rtx, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx, int);
117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
118 static bool arm_print_operand_punct_valid_p (unsigned char code);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
120 static arm_cc get_arm_condition_code (rtx);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx *, const char *, const char *,
123 int, HOST_WIDE_INT);
124 static const char *shift_op (rtx, HOST_WIDE_INT *);
125 static struct machine_function *arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
129 static Mnode *add_minipool_forward_ref (Mfix *);
130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
131 static Mnode *add_minipool_backward_ref (Mfix *);
132 static void assign_minipool_offsets (Mfix *);
133 static void arm_print_value (FILE *, rtx);
134 static void dump_minipool (rtx_insn *);
135 static int arm_barrier_cost (rtx_insn *);
136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
139 machine_mode, rtx);
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree);
145 static unsigned long arm_compute_func_type (void);
146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
151 #endif
152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree, const_tree);
157 static void arm_set_default_type_attributes (tree);
158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code,
161 unsigned HOST_WIDE_INT val,
162 struct four_ints *return_sequence);
163 static int optimal_immediate_sequence_1 (enum rtx_code code,
164 unsigned HOST_WIDE_INT val,
165 struct four_ints *return_sequence,
166 int i);
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree, tree);
169 static machine_mode arm_promote_function_mode (const_tree,
170 machine_mode, int *,
171 const_tree, int);
172 static bool arm_return_in_memory (const_tree, const_tree);
173 static rtx arm_function_value (const_tree, const_tree, bool);
174 static rtx arm_libcall_value_1 (machine_mode);
175 static rtx arm_libcall_value (machine_mode, const_rtx);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
179 tree);
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
182 static bool arm_legitimate_constant_p (machine_mode, rtx);
183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
184 static int arm_insn_cost (rtx_insn *, bool);
185 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
186 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
187 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
188 static void emit_constant_insn (rtx cond, rtx pattern);
189 static rtx_insn *emit_set_insn (rtx, rtx);
190 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
191 static rtx emit_multi_reg_push (unsigned long, unsigned long);
192 static void arm_emit_multi_reg_pop (unsigned long);
193 static int vfp_emit_fstmd (int, int);
194 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
195 static int arm_arg_partial_bytes (cumulative_args_t,
196 const function_arg_info &);
197 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
198 static void arm_function_arg_advance (cumulative_args_t,
199 const function_arg_info &);
200 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
201 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
202 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
203 const_tree);
204 static rtx aapcs_libcall_value (machine_mode);
205 static int aapcs_select_return_coproc (const_tree, const_tree);
206
207 #ifdef OBJECT_FORMAT_ELF
208 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
209 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
210 #endif
211 #ifndef ARM_PE
212 static void arm_encode_section_info (tree, rtx, int);
213 #endif
214
215 static void arm_file_end (void);
216 static void arm_file_start (void);
217 static void arm_insert_attributes (tree, tree *);
218
219 static void arm_setup_incoming_varargs (cumulative_args_t,
220 const function_arg_info &, int *, int);
221 static bool arm_pass_by_reference (cumulative_args_t,
222 const function_arg_info &);
223 static bool arm_promote_prototypes (const_tree);
224 static bool arm_default_short_enums (void);
225 static bool arm_align_anon_bitfield (void);
226 static bool arm_return_in_msb (const_tree);
227 static bool arm_must_pass_in_stack (const function_arg_info &);
228 static bool arm_return_in_memory (const_tree, const_tree);
229 #if ARM_UNWIND_INFO
230 static void arm_unwind_emit (FILE *, rtx_insn *);
231 static bool arm_output_ttype (rtx);
232 static void arm_asm_emit_except_personality (rtx);
233 #endif
234 static void arm_asm_init_sections (void);
235 static rtx arm_dwarf_register_span (rtx);
236
237 static tree arm_cxx_guard_type (void);
238 static bool arm_cxx_guard_mask_bit (void);
239 static tree arm_get_cookie_size (tree);
240 static bool arm_cookie_has_size (void);
241 static bool arm_cxx_cdtor_returns_this (void);
242 static bool arm_cxx_key_method_may_be_inline (void);
243 static void arm_cxx_determine_class_data_visibility (tree);
244 static bool arm_cxx_class_data_always_comdat (void);
245 static bool arm_cxx_use_aeabi_atexit (void);
246 static void arm_init_libfuncs (void);
247 static tree arm_build_builtin_va_list (void);
248 static void arm_expand_builtin_va_start (tree, rtx);
249 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
250 static void arm_option_override (void);
251 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
252 struct cl_target_option *);
253 static void arm_override_options_after_change (void);
254 static void arm_option_print (FILE *, int, struct cl_target_option *);
255 static void arm_set_current_function (tree);
256 static bool arm_can_inline_p (tree, tree);
257 static void arm_relayout_function (tree);
258 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
259 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
260 static bool arm_sched_can_speculate_insn (rtx_insn *);
261 static bool arm_macro_fusion_p (void);
262 static bool arm_cannot_copy_insn_p (rtx_insn *);
263 static int arm_issue_rate (void);
264 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
265 static int arm_first_cycle_multipass_dfa_lookahead (void);
266 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
267 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
268 static bool arm_output_addr_const_extra (FILE *, rtx);
269 static bool arm_allocate_stack_slots_for_args (void);
270 static bool arm_warn_func_return (tree);
271 static tree arm_promoted_type (const_tree t);
272 static bool arm_scalar_mode_supported_p (scalar_mode);
273 static bool arm_frame_pointer_required (void);
274 static bool arm_can_eliminate (const int, const int);
275 static void arm_asm_trampoline_template (FILE *);
276 static void arm_trampoline_init (rtx, tree, rtx);
277 static rtx arm_trampoline_adjust_address (rtx);
278 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
279 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
280 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
281 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
282 static bool arm_array_mode_supported_p (machine_mode,
283 unsigned HOST_WIDE_INT);
284 static machine_mode arm_preferred_simd_mode (scalar_mode);
285 static bool arm_class_likely_spilled_p (reg_class_t);
286 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
287 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
288 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
289 const_tree type,
290 int misalignment,
291 bool is_packed);
292 static void arm_conditional_register_usage (void);
293 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
294 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
295 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
296 static int arm_default_branch_cost (bool, bool);
297 static int arm_cortex_a5_branch_cost (bool, bool);
298 static int arm_cortex_m_branch_cost (bool, bool);
299 static int arm_cortex_m7_branch_cost (bool, bool);
300
301 static bool arm_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, rtx,
302 rtx, const vec_perm_indices &);
303
304 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
305
306 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
307 tree vectype,
308 int misalign ATTRIBUTE_UNUSED);
309
310 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
311 bool op0_preserve_value);
312 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
313
314 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
315 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
316 const_tree);
317 static section *arm_function_section (tree, enum node_frequency, bool, bool);
318 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
319 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
320 int reloc);
321 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
322 static opt_scalar_float_mode arm_floatn_mode (int, bool);
323 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
324 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
325 static bool arm_modes_tieable_p (machine_mode, machine_mode);
326 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
327 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
328 vec<machine_mode> &,
329 vec<const char *> &, vec<rtx> &,
330 HARD_REG_SET &, location_t);
331 static const char *arm_identify_fpu_from_isa (sbitmap);
332 \f
333 /* Table of machine attributes. */
334 static const struct attribute_spec arm_attribute_table[] =
335 {
336 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
337 affects_type_identity, handler, exclude } */
338 /* Function calls made to this symbol must be done indirectly, because
339 it may lie outside of the 26 bit addressing range of a normal function
340 call. */
341 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
342 /* Whereas these functions are always known to reside within the 26 bit
343 addressing range. */
344 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
345 /* Specify the procedure call conventions for a function. */
346 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
347 NULL },
348 /* Interrupt Service Routines have special prologue and epilogue requirements. */
349 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
350 NULL },
351 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
352 NULL },
353 { "naked", 0, 0, true, false, false, false,
354 arm_handle_fndecl_attribute, NULL },
355 #ifdef ARM_PE
356 /* ARM/PE has three new attributes:
357 interfacearm - ?
358 dllexport - for exporting a function/variable that will live in a dll
359 dllimport - for importing a function/variable from a dll
360
361 Microsoft allows multiple declspecs in one __declspec, separating
362 them with spaces. We do NOT support this. Instead, use __declspec
363 multiple times.
364 */
365 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
366 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
367 { "interfacearm", 0, 0, true, false, false, false,
368 arm_handle_fndecl_attribute, NULL },
369 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
370 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
371 NULL },
372 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
373 NULL },
374 { "notshared", 0, 0, false, true, false, false,
375 arm_handle_notshared_attribute, NULL },
376 #endif
377 /* ARMv8-M Security Extensions support. */
378 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
379 arm_handle_cmse_nonsecure_entry, NULL },
380 { "cmse_nonsecure_call", 0, 0, false, false, false, true,
381 arm_handle_cmse_nonsecure_call, NULL },
382 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL },
383 { NULL, 0, 0, false, false, false, false, NULL, NULL }
384 };
385 \f
386 /* Initialize the GCC target structure. */
387 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
388 #undef TARGET_MERGE_DECL_ATTRIBUTES
389 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
390 #endif
391
392 #undef TARGET_CHECK_BUILTIN_CALL
393 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
394
395 #undef TARGET_LEGITIMIZE_ADDRESS
396 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
397
398 #undef TARGET_ATTRIBUTE_TABLE
399 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
400
401 #undef TARGET_INSERT_ATTRIBUTES
402 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
403
404 #undef TARGET_ASM_FILE_START
405 #define TARGET_ASM_FILE_START arm_file_start
406 #undef TARGET_ASM_FILE_END
407 #define TARGET_ASM_FILE_END arm_file_end
408
409 #undef TARGET_ASM_ALIGNED_SI_OP
410 #define TARGET_ASM_ALIGNED_SI_OP NULL
411 #undef TARGET_ASM_INTEGER
412 #define TARGET_ASM_INTEGER arm_assemble_integer
413
414 #undef TARGET_PRINT_OPERAND
415 #define TARGET_PRINT_OPERAND arm_print_operand
416 #undef TARGET_PRINT_OPERAND_ADDRESS
417 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
418 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
419 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
420
421 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
422 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
423
424 #undef TARGET_ASM_FUNCTION_PROLOGUE
425 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
426
427 #undef TARGET_ASM_FUNCTION_EPILOGUE
428 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
429
430 #undef TARGET_CAN_INLINE_P
431 #define TARGET_CAN_INLINE_P arm_can_inline_p
432
433 #undef TARGET_RELAYOUT_FUNCTION
434 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
435
436 #undef TARGET_OPTION_OVERRIDE
437 #define TARGET_OPTION_OVERRIDE arm_option_override
438
439 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
440 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
441
442 #undef TARGET_OPTION_RESTORE
443 #define TARGET_OPTION_RESTORE arm_option_restore
444
445 #undef TARGET_OPTION_PRINT
446 #define TARGET_OPTION_PRINT arm_option_print
447
448 #undef TARGET_COMP_TYPE_ATTRIBUTES
449 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
450
451 #undef TARGET_SCHED_CAN_SPECULATE_INSN
452 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
453
454 #undef TARGET_SCHED_MACRO_FUSION_P
455 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
456
457 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
458 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
459
460 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
461 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
462
463 #undef TARGET_SCHED_ADJUST_COST
464 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
465
466 #undef TARGET_SET_CURRENT_FUNCTION
467 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
468
469 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
470 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
471
472 #undef TARGET_SCHED_REORDER
473 #define TARGET_SCHED_REORDER arm_sched_reorder
474
475 #undef TARGET_REGISTER_MOVE_COST
476 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
477
478 #undef TARGET_MEMORY_MOVE_COST
479 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
480
481 #undef TARGET_ENCODE_SECTION_INFO
482 #ifdef ARM_PE
483 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
484 #else
485 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
486 #endif
487
488 #undef TARGET_STRIP_NAME_ENCODING
489 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
490
491 #undef TARGET_ASM_INTERNAL_LABEL
492 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
493
494 #undef TARGET_FLOATN_MODE
495 #define TARGET_FLOATN_MODE arm_floatn_mode
496
497 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
498 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
499
500 #undef TARGET_FUNCTION_VALUE
501 #define TARGET_FUNCTION_VALUE arm_function_value
502
503 #undef TARGET_LIBCALL_VALUE
504 #define TARGET_LIBCALL_VALUE arm_libcall_value
505
506 #undef TARGET_FUNCTION_VALUE_REGNO_P
507 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
508
509 #undef TARGET_ASM_OUTPUT_MI_THUNK
510 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
511 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
512 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
513
514 #undef TARGET_RTX_COSTS
515 #define TARGET_RTX_COSTS arm_rtx_costs
516 #undef TARGET_ADDRESS_COST
517 #define TARGET_ADDRESS_COST arm_address_cost
518 #undef TARGET_INSN_COST
519 #define TARGET_INSN_COST arm_insn_cost
520
521 #undef TARGET_SHIFT_TRUNCATION_MASK
522 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
523 #undef TARGET_VECTOR_MODE_SUPPORTED_P
524 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
525 #undef TARGET_ARRAY_MODE_SUPPORTED_P
526 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
527 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
528 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
529 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
530 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
531 arm_autovectorize_vector_modes
532
533 #undef TARGET_MACHINE_DEPENDENT_REORG
534 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
535
536 #undef TARGET_INIT_BUILTINS
537 #define TARGET_INIT_BUILTINS arm_init_builtins
538 #undef TARGET_EXPAND_BUILTIN
539 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
540 #undef TARGET_BUILTIN_DECL
541 #define TARGET_BUILTIN_DECL arm_builtin_decl
542
543 #undef TARGET_INIT_LIBFUNCS
544 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
545
546 #undef TARGET_PROMOTE_FUNCTION_MODE
547 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
548 #undef TARGET_PROMOTE_PROTOTYPES
549 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
550 #undef TARGET_PASS_BY_REFERENCE
551 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
552 #undef TARGET_ARG_PARTIAL_BYTES
553 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
554 #undef TARGET_FUNCTION_ARG
555 #define TARGET_FUNCTION_ARG arm_function_arg
556 #undef TARGET_FUNCTION_ARG_ADVANCE
557 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
558 #undef TARGET_FUNCTION_ARG_PADDING
559 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
560 #undef TARGET_FUNCTION_ARG_BOUNDARY
561 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
562
563 #undef TARGET_SETUP_INCOMING_VARARGS
564 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
565
566 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
567 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
568
569 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
570 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
571 #undef TARGET_TRAMPOLINE_INIT
572 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
573 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
574 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
575
576 #undef TARGET_WARN_FUNC_RETURN
577 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
578
579 #undef TARGET_DEFAULT_SHORT_ENUMS
580 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
581
582 #undef TARGET_ALIGN_ANON_BITFIELD
583 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
584
585 #undef TARGET_NARROW_VOLATILE_BITFIELD
586 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
587
588 #undef TARGET_CXX_GUARD_TYPE
589 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
590
591 #undef TARGET_CXX_GUARD_MASK_BIT
592 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
593
594 #undef TARGET_CXX_GET_COOKIE_SIZE
595 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
596
597 #undef TARGET_CXX_COOKIE_HAS_SIZE
598 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
599
600 #undef TARGET_CXX_CDTOR_RETURNS_THIS
601 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
602
603 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
604 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
605
606 #undef TARGET_CXX_USE_AEABI_ATEXIT
607 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
608
609 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
610 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
611 arm_cxx_determine_class_data_visibility
612
613 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
614 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
615
616 #undef TARGET_RETURN_IN_MSB
617 #define TARGET_RETURN_IN_MSB arm_return_in_msb
618
619 #undef TARGET_RETURN_IN_MEMORY
620 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
621
622 #undef TARGET_MUST_PASS_IN_STACK
623 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
624
625 #if ARM_UNWIND_INFO
626 #undef TARGET_ASM_UNWIND_EMIT
627 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
628
629 /* EABI unwinding tables use a different format for the typeinfo tables. */
630 #undef TARGET_ASM_TTYPE
631 #define TARGET_ASM_TTYPE arm_output_ttype
632
633 #undef TARGET_ARM_EABI_UNWINDER
634 #define TARGET_ARM_EABI_UNWINDER true
635
636 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
637 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
638
639 #endif /* ARM_UNWIND_INFO */
640
641 #undef TARGET_ASM_INIT_SECTIONS
642 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
643
644 #undef TARGET_DWARF_REGISTER_SPAN
645 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
646
647 #undef TARGET_CANNOT_COPY_INSN_P
648 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
649
650 #ifdef HAVE_AS_TLS
651 #undef TARGET_HAVE_TLS
652 #define TARGET_HAVE_TLS true
653 #endif
654
655 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
656 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
657
658 #undef TARGET_LEGITIMATE_CONSTANT_P
659 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
660
661 #undef TARGET_CANNOT_FORCE_CONST_MEM
662 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
663
664 #undef TARGET_MAX_ANCHOR_OFFSET
665 #define TARGET_MAX_ANCHOR_OFFSET 4095
666
667 /* The minimum is set such that the total size of the block
668 for a particular anchor is -4088 + 1 + 4095 bytes, which is
669 divisible by eight, ensuring natural spacing of anchors. */
670 #undef TARGET_MIN_ANCHOR_OFFSET
671 #define TARGET_MIN_ANCHOR_OFFSET -4088
672
673 #undef TARGET_SCHED_ISSUE_RATE
674 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
675
676 #undef TARGET_SCHED_VARIABLE_ISSUE
677 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
678
679 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
680 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
681 arm_first_cycle_multipass_dfa_lookahead
682
683 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
684 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
685 arm_first_cycle_multipass_dfa_lookahead_guard
686
687 #undef TARGET_MANGLE_TYPE
688 #define TARGET_MANGLE_TYPE arm_mangle_type
689
690 #undef TARGET_INVALID_CONVERSION
691 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
692
693 #undef TARGET_INVALID_UNARY_OP
694 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
695
696 #undef TARGET_INVALID_BINARY_OP
697 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
698
699 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
700 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
701
702 #undef TARGET_BUILD_BUILTIN_VA_LIST
703 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
704 #undef TARGET_EXPAND_BUILTIN_VA_START
705 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
706 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
707 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
708
709 #ifdef HAVE_AS_TLS
710 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
711 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
712 #endif
713
714 #undef TARGET_LEGITIMATE_ADDRESS_P
715 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
716
717 #undef TARGET_PREFERRED_RELOAD_CLASS
718 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
719
720 #undef TARGET_PROMOTED_TYPE
721 #define TARGET_PROMOTED_TYPE arm_promoted_type
722
723 #undef TARGET_SCALAR_MODE_SUPPORTED_P
724 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
725
726 #undef TARGET_COMPUTE_FRAME_LAYOUT
727 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
728
729 #undef TARGET_FRAME_POINTER_REQUIRED
730 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
731
732 #undef TARGET_CAN_ELIMINATE
733 #define TARGET_CAN_ELIMINATE arm_can_eliminate
734
735 #undef TARGET_CONDITIONAL_REGISTER_USAGE
736 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
737
738 #undef TARGET_CLASS_LIKELY_SPILLED_P
739 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
740
741 #undef TARGET_VECTORIZE_BUILTINS
742 #define TARGET_VECTORIZE_BUILTINS
743
744 #undef TARGET_VECTOR_ALIGNMENT
745 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
746
747 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
748 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
749 arm_vector_alignment_reachable
750
751 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
752 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
753 arm_builtin_support_vector_misalignment
754
755 #undef TARGET_PREFERRED_RENAME_CLASS
756 #define TARGET_PREFERRED_RENAME_CLASS \
757 arm_preferred_rename_class
758
759 #undef TARGET_VECTORIZE_VEC_PERM_CONST
760 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
761
762 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
763 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
764 arm_builtin_vectorization_cost
765
766 #undef TARGET_CANONICALIZE_COMPARISON
767 #define TARGET_CANONICALIZE_COMPARISON \
768 arm_canonicalize_comparison
769
770 #undef TARGET_ASAN_SHADOW_OFFSET
771 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
772
773 #undef MAX_INSN_PER_IT_BLOCK
774 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
775
776 #undef TARGET_CAN_USE_DOLOOP_P
777 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
778
779 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
780 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
781
782 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
783 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
784
785 #undef TARGET_SCHED_FUSION_PRIORITY
786 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
787
788 #undef TARGET_ASM_FUNCTION_SECTION
789 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
790
791 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
792 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
793
794 #undef TARGET_SECTION_TYPE_FLAGS
795 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
796
797 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
798 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
799
800 #undef TARGET_C_EXCESS_PRECISION
801 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
802
803 /* Although the architecture reserves bits 0 and 1, only the former is
804 used for ARM/Thumb ISA selection in v7 and earlier versions. */
805 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
806 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
807
808 #undef TARGET_FIXED_CONDITION_CODE_REGS
809 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
810
811 #undef TARGET_HARD_REGNO_NREGS
812 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
813 #undef TARGET_HARD_REGNO_MODE_OK
814 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
815
816 #undef TARGET_MODES_TIEABLE_P
817 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
818
819 #undef TARGET_CAN_CHANGE_MODE_CLASS
820 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
821
822 #undef TARGET_CONSTANT_ALIGNMENT
823 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
824
825 #undef TARGET_INVALID_WITHIN_DOLOOP
826 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
827
828 #undef TARGET_MD_ASM_ADJUST
829 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
830
831 #undef TARGET_STACK_PROTECT_GUARD
832 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
833
834 #undef TARGET_VECTORIZE_GET_MASK_MODE
835 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
836 \f
837 /* Obstack for minipool constant handling. */
838 static struct obstack minipool_obstack;
839 static char * minipool_startobj;
840
841 /* The maximum number of insns skipped which
842 will be conditionalised if possible. */
843 static int max_insns_skipped = 5;
844
845 /* True if we are currently building a constant table. */
846 int making_const_table;
847
848 /* The processor for which instructions should be scheduled. */
849 enum processor_type arm_tune = TARGET_CPU_arm_none;
850
851 /* The current tuning set. */
852 const struct tune_params *current_tune;
853
854 /* Which floating point hardware to schedule for. */
855 int arm_fpu_attr;
856
857 /* Used for Thumb call_via trampolines. */
858 rtx thumb_call_via_label[14];
859 static int thumb_call_reg_needed;
860
861 /* The bits in this mask specify which instruction scheduling options should
862 be used. */
863 unsigned int tune_flags = 0;
864
865 /* The highest ARM architecture version supported by the
866 target. */
867 enum base_architecture arm_base_arch = BASE_ARCH_0;
868
869 /* Active target architecture and tuning. */
870
871 struct arm_build_target arm_active_target;
872
873 /* The following are used in the arm.md file as equivalents to bits
874 in the above two flag variables. */
875
876 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
877 int arm_arch4 = 0;
878
879 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
880 int arm_arch4t = 0;
881
882 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
883 int arm_arch5t = 0;
884
885 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
886 int arm_arch5te = 0;
887
888 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
889 int arm_arch6 = 0;
890
891 /* Nonzero if this chip supports the ARM 6K extensions. */
892 int arm_arch6k = 0;
893
894 /* Nonzero if this chip supports the ARM 6KZ extensions. */
895 int arm_arch6kz = 0;
896
897 /* Nonzero if instructions present in ARMv6-M can be used. */
898 int arm_arch6m = 0;
899
900 /* Nonzero if this chip supports the ARM 7 extensions. */
901 int arm_arch7 = 0;
902
903 /* Nonzero if this chip supports the Large Physical Address Extension. */
904 int arm_arch_lpae = 0;
905
906 /* Nonzero if instructions not present in the 'M' profile can be used. */
907 int arm_arch_notm = 0;
908
909 /* Nonzero if instructions present in ARMv7E-M can be used. */
910 int arm_arch7em = 0;
911
912 /* Nonzero if instructions present in ARMv8 can be used. */
913 int arm_arch8 = 0;
914
915 /* Nonzero if this chip supports the ARMv8.1 extensions. */
916 int arm_arch8_1 = 0;
917
918 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
919 int arm_arch8_2 = 0;
920
921 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
922 int arm_arch8_3 = 0;
923
924 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
925 int arm_arch8_4 = 0;
926
927 /* Nonzero if this chip supports the ARM Architecture 8-M Mainline
928 extensions. */
929 int arm_arch8m_main = 0;
930
931 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
932 extensions. */
933 int arm_arch8_1m_main = 0;
934
935 /* Nonzero if this chip supports the FP16 instructions extension of ARM
936 Architecture 8.2. */
937 int arm_fp16_inst = 0;
938
939 /* Nonzero if this chip can benefit from load scheduling. */
940 int arm_ld_sched = 0;
941
942 /* Nonzero if this chip is a StrongARM. */
943 int arm_tune_strongarm = 0;
944
945 /* Nonzero if this chip supports Intel Wireless MMX technology. */
946 int arm_arch_iwmmxt = 0;
947
948 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
949 int arm_arch_iwmmxt2 = 0;
950
951 /* Nonzero if this chip is an XScale. */
952 int arm_arch_xscale = 0;
953
954 /* Nonzero if tuning for XScale */
955 int arm_tune_xscale = 0;
956
957 /* Nonzero if we want to tune for stores that access the write-buffer.
958 This typically means an ARM6 or ARM7 with MMU or MPU. */
959 int arm_tune_wbuf = 0;
960
961 /* Nonzero if tuning for Cortex-A9. */
962 int arm_tune_cortex_a9 = 0;
963
964 /* Nonzero if we should define __THUMB_INTERWORK__ in the
965 preprocessor.
966 XXX This is a bit of a hack, it's intended to help work around
967 problems in GLD which doesn't understand that armv5t code is
968 interworking clean. */
969 int arm_cpp_interwork = 0;
970
971 /* Nonzero if chip supports Thumb 1. */
972 int arm_arch_thumb1;
973
974 /* Nonzero if chip supports Thumb 2. */
975 int arm_arch_thumb2;
976
977 /* Nonzero if chip supports integer division instruction. */
978 int arm_arch_arm_hwdiv;
979 int arm_arch_thumb_hwdiv;
980
981 /* Nonzero if chip disallows volatile memory access in IT block. */
982 int arm_arch_no_volatile_ce;
983
984 /* Nonzero if we shouldn't use literal pools. */
985 bool arm_disable_literal_pool = false;
986
987 /* The register number to be used for the PIC offset register. */
988 unsigned arm_pic_register = INVALID_REGNUM;
989
990 enum arm_pcs arm_pcs_default;
991
992 /* For an explanation of these variables, see final_prescan_insn below. */
993 int arm_ccfsm_state;
994 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
995 enum arm_cond_code arm_current_cc;
996
997 rtx arm_target_insn;
998 int arm_target_label;
999 /* The number of conditionally executed insns, including the current insn. */
1000 int arm_condexec_count = 0;
1001 /* A bitmask specifying the patterns for the IT block.
1002 Zero means do not output an IT block before this insn. */
1003 int arm_condexec_mask = 0;
1004 /* The number of bits used in arm_condexec_mask. */
1005 int arm_condexec_masklen = 0;
1006
1007 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1008 int arm_arch_crc = 0;
1009
1010 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1011 int arm_arch_dotprod = 0;
1012
1013 /* Nonzero if chip supports the ARMv8-M security extensions. */
1014 int arm_arch_cmse = 0;
1015
1016 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1017 int arm_m_profile_small_mul = 0;
1018
1019 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1020 int arm_arch_i8mm = 0;
1021
1022 /* Nonzero if chip supports the BFloat16 instructions. */
1023 int arm_arch_bf16 = 0;
1024
1025 /* Nonzero if chip supports the Custom Datapath Extension. */
1026 int arm_arch_cde = 0;
1027 int arm_arch_cde_coproc = 0;
1028 const int arm_arch_cde_coproc_bits[] = {
1029 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1030 };
1031
1032 /* The condition codes of the ARM, and the inverse function. */
1033 static const char * const arm_condition_codes[] =
1034 {
1035 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1036 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1037 };
1038
1039 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1040 int arm_regs_in_sequence[] =
1041 {
1042 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1043 };
1044
1045 #define DEF_FP_SYSREG(reg) #reg,
1046 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1047 FP_SYSREGS
1048 };
1049 #undef DEF_FP_SYSREG
1050
1051 #define ARM_LSL_NAME "lsl"
1052 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1053
1054 #define THUMB2_WORK_REGS \
1055 (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1056 | (1 << SP_REGNUM) \
1057 | (1 << PC_REGNUM) \
1058 | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM \
1059 ? (1 << PIC_OFFSET_TABLE_REGNUM) \
1060 : 0)))
1061 \f
1062 /* Initialization code. */
1063
1064 struct cpu_tune
1065 {
1066 enum processor_type scheduler;
1067 unsigned int tune_flags;
1068 const struct tune_params *tune;
1069 };
1070
1071 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1072 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1073 { \
1074 num_slots, \
1075 l1_size, \
1076 l1_line_size \
1077 }
1078
1079 /* arm generic vectorizer costs. */
1080 static const
1081 struct cpu_vec_costs arm_default_vec_cost = {
1082 1, /* scalar_stmt_cost. */
1083 1, /* scalar load_cost. */
1084 1, /* scalar_store_cost. */
1085 1, /* vec_stmt_cost. */
1086 1, /* vec_to_scalar_cost. */
1087 1, /* scalar_to_vec_cost. */
1088 1, /* vec_align_load_cost. */
1089 1, /* vec_unalign_load_cost. */
1090 1, /* vec_unalign_store_cost. */
1091 1, /* vec_store_cost. */
1092 3, /* cond_taken_branch_cost. */
1093 1, /* cond_not_taken_branch_cost. */
1094 };
1095
1096 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1097 #include "aarch-cost-tables.h"
1098
1099
1100
1101 const struct cpu_cost_table cortexa9_extra_costs =
1102 {
1103 /* ALU */
1104 {
1105 0, /* arith. */
1106 0, /* logical. */
1107 0, /* shift. */
1108 COSTS_N_INSNS (1), /* shift_reg. */
1109 COSTS_N_INSNS (1), /* arith_shift. */
1110 COSTS_N_INSNS (2), /* arith_shift_reg. */
1111 0, /* log_shift. */
1112 COSTS_N_INSNS (1), /* log_shift_reg. */
1113 COSTS_N_INSNS (1), /* extend. */
1114 COSTS_N_INSNS (2), /* extend_arith. */
1115 COSTS_N_INSNS (1), /* bfi. */
1116 COSTS_N_INSNS (1), /* bfx. */
1117 0, /* clz. */
1118 0, /* rev. */
1119 0, /* non_exec. */
1120 true /* non_exec_costs_exec. */
1121 },
1122 {
1123 /* MULT SImode */
1124 {
1125 COSTS_N_INSNS (3), /* simple. */
1126 COSTS_N_INSNS (3), /* flag_setting. */
1127 COSTS_N_INSNS (2), /* extend. */
1128 COSTS_N_INSNS (3), /* add. */
1129 COSTS_N_INSNS (2), /* extend_add. */
1130 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1131 },
1132 /* MULT DImode */
1133 {
1134 0, /* simple (N/A). */
1135 0, /* flag_setting (N/A). */
1136 COSTS_N_INSNS (4), /* extend. */
1137 0, /* add (N/A). */
1138 COSTS_N_INSNS (4), /* extend_add. */
1139 0 /* idiv (N/A). */
1140 }
1141 },
1142 /* LD/ST */
1143 {
1144 COSTS_N_INSNS (2), /* load. */
1145 COSTS_N_INSNS (2), /* load_sign_extend. */
1146 COSTS_N_INSNS (2), /* ldrd. */
1147 COSTS_N_INSNS (2), /* ldm_1st. */
1148 1, /* ldm_regs_per_insn_1st. */
1149 2, /* ldm_regs_per_insn_subsequent. */
1150 COSTS_N_INSNS (5), /* loadf. */
1151 COSTS_N_INSNS (5), /* loadd. */
1152 COSTS_N_INSNS (1), /* load_unaligned. */
1153 COSTS_N_INSNS (2), /* store. */
1154 COSTS_N_INSNS (2), /* strd. */
1155 COSTS_N_INSNS (2), /* stm_1st. */
1156 1, /* stm_regs_per_insn_1st. */
1157 2, /* stm_regs_per_insn_subsequent. */
1158 COSTS_N_INSNS (1), /* storef. */
1159 COSTS_N_INSNS (1), /* stored. */
1160 COSTS_N_INSNS (1), /* store_unaligned. */
1161 COSTS_N_INSNS (1), /* loadv. */
1162 COSTS_N_INSNS (1) /* storev. */
1163 },
1164 {
1165 /* FP SFmode */
1166 {
1167 COSTS_N_INSNS (14), /* div. */
1168 COSTS_N_INSNS (4), /* mult. */
1169 COSTS_N_INSNS (7), /* mult_addsub. */
1170 COSTS_N_INSNS (30), /* fma. */
1171 COSTS_N_INSNS (3), /* addsub. */
1172 COSTS_N_INSNS (1), /* fpconst. */
1173 COSTS_N_INSNS (1), /* neg. */
1174 COSTS_N_INSNS (3), /* compare. */
1175 COSTS_N_INSNS (3), /* widen. */
1176 COSTS_N_INSNS (3), /* narrow. */
1177 COSTS_N_INSNS (3), /* toint. */
1178 COSTS_N_INSNS (3), /* fromint. */
1179 COSTS_N_INSNS (3) /* roundint. */
1180 },
1181 /* FP DFmode */
1182 {
1183 COSTS_N_INSNS (24), /* div. */
1184 COSTS_N_INSNS (5), /* mult. */
1185 COSTS_N_INSNS (8), /* mult_addsub. */
1186 COSTS_N_INSNS (30), /* fma. */
1187 COSTS_N_INSNS (3), /* addsub. */
1188 COSTS_N_INSNS (1), /* fpconst. */
1189 COSTS_N_INSNS (1), /* neg. */
1190 COSTS_N_INSNS (3), /* compare. */
1191 COSTS_N_INSNS (3), /* widen. */
1192 COSTS_N_INSNS (3), /* narrow. */
1193 COSTS_N_INSNS (3), /* toint. */
1194 COSTS_N_INSNS (3), /* fromint. */
1195 COSTS_N_INSNS (3) /* roundint. */
1196 }
1197 },
1198 /* Vector */
1199 {
1200 COSTS_N_INSNS (1), /* alu. */
1201 COSTS_N_INSNS (4), /* mult. */
1202 COSTS_N_INSNS (1), /* movi. */
1203 COSTS_N_INSNS (2), /* dup. */
1204 COSTS_N_INSNS (2) /* extract. */
1205 }
1206 };
1207
1208 const struct cpu_cost_table cortexa8_extra_costs =
1209 {
1210 /* ALU */
1211 {
1212 0, /* arith. */
1213 0, /* logical. */
1214 COSTS_N_INSNS (1), /* shift. */
1215 0, /* shift_reg. */
1216 COSTS_N_INSNS (1), /* arith_shift. */
1217 0, /* arith_shift_reg. */
1218 COSTS_N_INSNS (1), /* log_shift. */
1219 0, /* log_shift_reg. */
1220 0, /* extend. */
1221 0, /* extend_arith. */
1222 0, /* bfi. */
1223 0, /* bfx. */
1224 0, /* clz. */
1225 0, /* rev. */
1226 0, /* non_exec. */
1227 true /* non_exec_costs_exec. */
1228 },
1229 {
1230 /* MULT SImode */
1231 {
1232 COSTS_N_INSNS (1), /* simple. */
1233 COSTS_N_INSNS (1), /* flag_setting. */
1234 COSTS_N_INSNS (1), /* extend. */
1235 COSTS_N_INSNS (1), /* add. */
1236 COSTS_N_INSNS (1), /* extend_add. */
1237 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1238 },
1239 /* MULT DImode */
1240 {
1241 0, /* simple (N/A). */
1242 0, /* flag_setting (N/A). */
1243 COSTS_N_INSNS (2), /* extend. */
1244 0, /* add (N/A). */
1245 COSTS_N_INSNS (2), /* extend_add. */
1246 0 /* idiv (N/A). */
1247 }
1248 },
1249 /* LD/ST */
1250 {
1251 COSTS_N_INSNS (1), /* load. */
1252 COSTS_N_INSNS (1), /* load_sign_extend. */
1253 COSTS_N_INSNS (1), /* ldrd. */
1254 COSTS_N_INSNS (1), /* ldm_1st. */
1255 1, /* ldm_regs_per_insn_1st. */
1256 2, /* ldm_regs_per_insn_subsequent. */
1257 COSTS_N_INSNS (1), /* loadf. */
1258 COSTS_N_INSNS (1), /* loadd. */
1259 COSTS_N_INSNS (1), /* load_unaligned. */
1260 COSTS_N_INSNS (1), /* store. */
1261 COSTS_N_INSNS (1), /* strd. */
1262 COSTS_N_INSNS (1), /* stm_1st. */
1263 1, /* stm_regs_per_insn_1st. */
1264 2, /* stm_regs_per_insn_subsequent. */
1265 COSTS_N_INSNS (1), /* storef. */
1266 COSTS_N_INSNS (1), /* stored. */
1267 COSTS_N_INSNS (1), /* store_unaligned. */
1268 COSTS_N_INSNS (1), /* loadv. */
1269 COSTS_N_INSNS (1) /* storev. */
1270 },
1271 {
1272 /* FP SFmode */
1273 {
1274 COSTS_N_INSNS (36), /* div. */
1275 COSTS_N_INSNS (11), /* mult. */
1276 COSTS_N_INSNS (20), /* mult_addsub. */
1277 COSTS_N_INSNS (30), /* fma. */
1278 COSTS_N_INSNS (9), /* addsub. */
1279 COSTS_N_INSNS (3), /* fpconst. */
1280 COSTS_N_INSNS (3), /* neg. */
1281 COSTS_N_INSNS (6), /* compare. */
1282 COSTS_N_INSNS (4), /* widen. */
1283 COSTS_N_INSNS (4), /* narrow. */
1284 COSTS_N_INSNS (8), /* toint. */
1285 COSTS_N_INSNS (8), /* fromint. */
1286 COSTS_N_INSNS (8) /* roundint. */
1287 },
1288 /* FP DFmode */
1289 {
1290 COSTS_N_INSNS (64), /* div. */
1291 COSTS_N_INSNS (16), /* mult. */
1292 COSTS_N_INSNS (25), /* mult_addsub. */
1293 COSTS_N_INSNS (30), /* fma. */
1294 COSTS_N_INSNS (9), /* addsub. */
1295 COSTS_N_INSNS (3), /* fpconst. */
1296 COSTS_N_INSNS (3), /* neg. */
1297 COSTS_N_INSNS (6), /* compare. */
1298 COSTS_N_INSNS (6), /* widen. */
1299 COSTS_N_INSNS (6), /* narrow. */
1300 COSTS_N_INSNS (8), /* toint. */
1301 COSTS_N_INSNS (8), /* fromint. */
1302 COSTS_N_INSNS (8) /* roundint. */
1303 }
1304 },
1305 /* Vector */
1306 {
1307 COSTS_N_INSNS (1), /* alu. */
1308 COSTS_N_INSNS (4), /* mult. */
1309 COSTS_N_INSNS (1), /* movi. */
1310 COSTS_N_INSNS (2), /* dup. */
1311 COSTS_N_INSNS (2) /* extract. */
1312 }
1313 };
1314
1315 const struct cpu_cost_table cortexa5_extra_costs =
1316 {
1317 /* ALU */
1318 {
1319 0, /* arith. */
1320 0, /* logical. */
1321 COSTS_N_INSNS (1), /* shift. */
1322 COSTS_N_INSNS (1), /* shift_reg. */
1323 COSTS_N_INSNS (1), /* arith_shift. */
1324 COSTS_N_INSNS (1), /* arith_shift_reg. */
1325 COSTS_N_INSNS (1), /* log_shift. */
1326 COSTS_N_INSNS (1), /* log_shift_reg. */
1327 COSTS_N_INSNS (1), /* extend. */
1328 COSTS_N_INSNS (1), /* extend_arith. */
1329 COSTS_N_INSNS (1), /* bfi. */
1330 COSTS_N_INSNS (1), /* bfx. */
1331 COSTS_N_INSNS (1), /* clz. */
1332 COSTS_N_INSNS (1), /* rev. */
1333 0, /* non_exec. */
1334 true /* non_exec_costs_exec. */
1335 },
1336
1337 {
1338 /* MULT SImode */
1339 {
1340 0, /* simple. */
1341 COSTS_N_INSNS (1), /* flag_setting. */
1342 COSTS_N_INSNS (1), /* extend. */
1343 COSTS_N_INSNS (1), /* add. */
1344 COSTS_N_INSNS (1), /* extend_add. */
1345 COSTS_N_INSNS (7) /* idiv. */
1346 },
1347 /* MULT DImode */
1348 {
1349 0, /* simple (N/A). */
1350 0, /* flag_setting (N/A). */
1351 COSTS_N_INSNS (1), /* extend. */
1352 0, /* add. */
1353 COSTS_N_INSNS (2), /* extend_add. */
1354 0 /* idiv (N/A). */
1355 }
1356 },
1357 /* LD/ST */
1358 {
1359 COSTS_N_INSNS (1), /* load. */
1360 COSTS_N_INSNS (1), /* load_sign_extend. */
1361 COSTS_N_INSNS (6), /* ldrd. */
1362 COSTS_N_INSNS (1), /* ldm_1st. */
1363 1, /* ldm_regs_per_insn_1st. */
1364 2, /* ldm_regs_per_insn_subsequent. */
1365 COSTS_N_INSNS (2), /* loadf. */
1366 COSTS_N_INSNS (4), /* loadd. */
1367 COSTS_N_INSNS (1), /* load_unaligned. */
1368 COSTS_N_INSNS (1), /* store. */
1369 COSTS_N_INSNS (3), /* strd. */
1370 COSTS_N_INSNS (1), /* stm_1st. */
1371 1, /* stm_regs_per_insn_1st. */
1372 2, /* stm_regs_per_insn_subsequent. */
1373 COSTS_N_INSNS (2), /* storef. */
1374 COSTS_N_INSNS (2), /* stored. */
1375 COSTS_N_INSNS (1), /* store_unaligned. */
1376 COSTS_N_INSNS (1), /* loadv. */
1377 COSTS_N_INSNS (1) /* storev. */
1378 },
1379 {
1380 /* FP SFmode */
1381 {
1382 COSTS_N_INSNS (15), /* div. */
1383 COSTS_N_INSNS (3), /* mult. */
1384 COSTS_N_INSNS (7), /* mult_addsub. */
1385 COSTS_N_INSNS (7), /* fma. */
1386 COSTS_N_INSNS (3), /* addsub. */
1387 COSTS_N_INSNS (3), /* fpconst. */
1388 COSTS_N_INSNS (3), /* neg. */
1389 COSTS_N_INSNS (3), /* compare. */
1390 COSTS_N_INSNS (3), /* widen. */
1391 COSTS_N_INSNS (3), /* narrow. */
1392 COSTS_N_INSNS (3), /* toint. */
1393 COSTS_N_INSNS (3), /* fromint. */
1394 COSTS_N_INSNS (3) /* roundint. */
1395 },
1396 /* FP DFmode */
1397 {
1398 COSTS_N_INSNS (30), /* div. */
1399 COSTS_N_INSNS (6), /* mult. */
1400 COSTS_N_INSNS (10), /* mult_addsub. */
1401 COSTS_N_INSNS (7), /* fma. */
1402 COSTS_N_INSNS (3), /* addsub. */
1403 COSTS_N_INSNS (3), /* fpconst. */
1404 COSTS_N_INSNS (3), /* neg. */
1405 COSTS_N_INSNS (3), /* compare. */
1406 COSTS_N_INSNS (3), /* widen. */
1407 COSTS_N_INSNS (3), /* narrow. */
1408 COSTS_N_INSNS (3), /* toint. */
1409 COSTS_N_INSNS (3), /* fromint. */
1410 COSTS_N_INSNS (3) /* roundint. */
1411 }
1412 },
1413 /* Vector */
1414 {
1415 COSTS_N_INSNS (1), /* alu. */
1416 COSTS_N_INSNS (4), /* mult. */
1417 COSTS_N_INSNS (1), /* movi. */
1418 COSTS_N_INSNS (2), /* dup. */
1419 COSTS_N_INSNS (2) /* extract. */
1420 }
1421 };
1422
1423
1424 const struct cpu_cost_table cortexa7_extra_costs =
1425 {
1426 /* ALU */
1427 {
1428 0, /* arith. */
1429 0, /* logical. */
1430 COSTS_N_INSNS (1), /* shift. */
1431 COSTS_N_INSNS (1), /* shift_reg. */
1432 COSTS_N_INSNS (1), /* arith_shift. */
1433 COSTS_N_INSNS (1), /* arith_shift_reg. */
1434 COSTS_N_INSNS (1), /* log_shift. */
1435 COSTS_N_INSNS (1), /* log_shift_reg. */
1436 COSTS_N_INSNS (1), /* extend. */
1437 COSTS_N_INSNS (1), /* extend_arith. */
1438 COSTS_N_INSNS (1), /* bfi. */
1439 COSTS_N_INSNS (1), /* bfx. */
1440 COSTS_N_INSNS (1), /* clz. */
1441 COSTS_N_INSNS (1), /* rev. */
1442 0, /* non_exec. */
1443 true /* non_exec_costs_exec. */
1444 },
1445
1446 {
1447 /* MULT SImode */
1448 {
1449 0, /* simple. */
1450 COSTS_N_INSNS (1), /* flag_setting. */
1451 COSTS_N_INSNS (1), /* extend. */
1452 COSTS_N_INSNS (1), /* add. */
1453 COSTS_N_INSNS (1), /* extend_add. */
1454 COSTS_N_INSNS (7) /* idiv. */
1455 },
1456 /* MULT DImode */
1457 {
1458 0, /* simple (N/A). */
1459 0, /* flag_setting (N/A). */
1460 COSTS_N_INSNS (1), /* extend. */
1461 0, /* add. */
1462 COSTS_N_INSNS (2), /* extend_add. */
1463 0 /* idiv (N/A). */
1464 }
1465 },
1466 /* LD/ST */
1467 {
1468 COSTS_N_INSNS (1), /* load. */
1469 COSTS_N_INSNS (1), /* load_sign_extend. */
1470 COSTS_N_INSNS (3), /* ldrd. */
1471 COSTS_N_INSNS (1), /* ldm_1st. */
1472 1, /* ldm_regs_per_insn_1st. */
1473 2, /* ldm_regs_per_insn_subsequent. */
1474 COSTS_N_INSNS (2), /* loadf. */
1475 COSTS_N_INSNS (2), /* loadd. */
1476 COSTS_N_INSNS (1), /* load_unaligned. */
1477 COSTS_N_INSNS (1), /* store. */
1478 COSTS_N_INSNS (3), /* strd. */
1479 COSTS_N_INSNS (1), /* stm_1st. */
1480 1, /* stm_regs_per_insn_1st. */
1481 2, /* stm_regs_per_insn_subsequent. */
1482 COSTS_N_INSNS (2), /* storef. */
1483 COSTS_N_INSNS (2), /* stored. */
1484 COSTS_N_INSNS (1), /* store_unaligned. */
1485 COSTS_N_INSNS (1), /* loadv. */
1486 COSTS_N_INSNS (1) /* storev. */
1487 },
1488 {
1489 /* FP SFmode */
1490 {
1491 COSTS_N_INSNS (15), /* div. */
1492 COSTS_N_INSNS (3), /* mult. */
1493 COSTS_N_INSNS (7), /* mult_addsub. */
1494 COSTS_N_INSNS (7), /* fma. */
1495 COSTS_N_INSNS (3), /* addsub. */
1496 COSTS_N_INSNS (3), /* fpconst. */
1497 COSTS_N_INSNS (3), /* neg. */
1498 COSTS_N_INSNS (3), /* compare. */
1499 COSTS_N_INSNS (3), /* widen. */
1500 COSTS_N_INSNS (3), /* narrow. */
1501 COSTS_N_INSNS (3), /* toint. */
1502 COSTS_N_INSNS (3), /* fromint. */
1503 COSTS_N_INSNS (3) /* roundint. */
1504 },
1505 /* FP DFmode */
1506 {
1507 COSTS_N_INSNS (30), /* div. */
1508 COSTS_N_INSNS (6), /* mult. */
1509 COSTS_N_INSNS (10), /* mult_addsub. */
1510 COSTS_N_INSNS (7), /* fma. */
1511 COSTS_N_INSNS (3), /* addsub. */
1512 COSTS_N_INSNS (3), /* fpconst. */
1513 COSTS_N_INSNS (3), /* neg. */
1514 COSTS_N_INSNS (3), /* compare. */
1515 COSTS_N_INSNS (3), /* widen. */
1516 COSTS_N_INSNS (3), /* narrow. */
1517 COSTS_N_INSNS (3), /* toint. */
1518 COSTS_N_INSNS (3), /* fromint. */
1519 COSTS_N_INSNS (3) /* roundint. */
1520 }
1521 },
1522 /* Vector */
1523 {
1524 COSTS_N_INSNS (1), /* alu. */
1525 COSTS_N_INSNS (4), /* mult. */
1526 COSTS_N_INSNS (1), /* movi. */
1527 COSTS_N_INSNS (2), /* dup. */
1528 COSTS_N_INSNS (2) /* extract. */
1529 }
1530 };
1531
1532 const struct cpu_cost_table cortexa12_extra_costs =
1533 {
1534 /* ALU */
1535 {
1536 0, /* arith. */
1537 0, /* logical. */
1538 0, /* shift. */
1539 COSTS_N_INSNS (1), /* shift_reg. */
1540 COSTS_N_INSNS (1), /* arith_shift. */
1541 COSTS_N_INSNS (1), /* arith_shift_reg. */
1542 COSTS_N_INSNS (1), /* log_shift. */
1543 COSTS_N_INSNS (1), /* log_shift_reg. */
1544 0, /* extend. */
1545 COSTS_N_INSNS (1), /* extend_arith. */
1546 0, /* bfi. */
1547 COSTS_N_INSNS (1), /* bfx. */
1548 COSTS_N_INSNS (1), /* clz. */
1549 COSTS_N_INSNS (1), /* rev. */
1550 0, /* non_exec. */
1551 true /* non_exec_costs_exec. */
1552 },
1553 /* MULT SImode */
1554 {
1555 {
1556 COSTS_N_INSNS (2), /* simple. */
1557 COSTS_N_INSNS (3), /* flag_setting. */
1558 COSTS_N_INSNS (2), /* extend. */
1559 COSTS_N_INSNS (3), /* add. */
1560 COSTS_N_INSNS (2), /* extend_add. */
1561 COSTS_N_INSNS (18) /* idiv. */
1562 },
1563 /* MULT DImode */
1564 {
1565 0, /* simple (N/A). */
1566 0, /* flag_setting (N/A). */
1567 COSTS_N_INSNS (3), /* extend. */
1568 0, /* add (N/A). */
1569 COSTS_N_INSNS (3), /* extend_add. */
1570 0 /* idiv (N/A). */
1571 }
1572 },
1573 /* LD/ST */
1574 {
1575 COSTS_N_INSNS (3), /* load. */
1576 COSTS_N_INSNS (3), /* load_sign_extend. */
1577 COSTS_N_INSNS (3), /* ldrd. */
1578 COSTS_N_INSNS (3), /* ldm_1st. */
1579 1, /* ldm_regs_per_insn_1st. */
1580 2, /* ldm_regs_per_insn_subsequent. */
1581 COSTS_N_INSNS (3), /* loadf. */
1582 COSTS_N_INSNS (3), /* loadd. */
1583 0, /* load_unaligned. */
1584 0, /* store. */
1585 0, /* strd. */
1586 0, /* stm_1st. */
1587 1, /* stm_regs_per_insn_1st. */
1588 2, /* stm_regs_per_insn_subsequent. */
1589 COSTS_N_INSNS (2), /* storef. */
1590 COSTS_N_INSNS (2), /* stored. */
1591 0, /* store_unaligned. */
1592 COSTS_N_INSNS (1), /* loadv. */
1593 COSTS_N_INSNS (1) /* storev. */
1594 },
1595 {
1596 /* FP SFmode */
1597 {
1598 COSTS_N_INSNS (17), /* div. */
1599 COSTS_N_INSNS (4), /* mult. */
1600 COSTS_N_INSNS (8), /* mult_addsub. */
1601 COSTS_N_INSNS (8), /* fma. */
1602 COSTS_N_INSNS (4), /* addsub. */
1603 COSTS_N_INSNS (2), /* fpconst. */
1604 COSTS_N_INSNS (2), /* neg. */
1605 COSTS_N_INSNS (2), /* compare. */
1606 COSTS_N_INSNS (4), /* widen. */
1607 COSTS_N_INSNS (4), /* narrow. */
1608 COSTS_N_INSNS (4), /* toint. */
1609 COSTS_N_INSNS (4), /* fromint. */
1610 COSTS_N_INSNS (4) /* roundint. */
1611 },
1612 /* FP DFmode */
1613 {
1614 COSTS_N_INSNS (31), /* div. */
1615 COSTS_N_INSNS (4), /* mult. */
1616 COSTS_N_INSNS (8), /* mult_addsub. */
1617 COSTS_N_INSNS (8), /* fma. */
1618 COSTS_N_INSNS (4), /* addsub. */
1619 COSTS_N_INSNS (2), /* fpconst. */
1620 COSTS_N_INSNS (2), /* neg. */
1621 COSTS_N_INSNS (2), /* compare. */
1622 COSTS_N_INSNS (4), /* widen. */
1623 COSTS_N_INSNS (4), /* narrow. */
1624 COSTS_N_INSNS (4), /* toint. */
1625 COSTS_N_INSNS (4), /* fromint. */
1626 COSTS_N_INSNS (4) /* roundint. */
1627 }
1628 },
1629 /* Vector */
1630 {
1631 COSTS_N_INSNS (1), /* alu. */
1632 COSTS_N_INSNS (4), /* mult. */
1633 COSTS_N_INSNS (1), /* movi. */
1634 COSTS_N_INSNS (2), /* dup. */
1635 COSTS_N_INSNS (2) /* extract. */
1636 }
1637 };
1638
1639 const struct cpu_cost_table cortexa15_extra_costs =
1640 {
1641 /* ALU */
1642 {
1643 0, /* arith. */
1644 0, /* logical. */
1645 0, /* shift. */
1646 0, /* shift_reg. */
1647 COSTS_N_INSNS (1), /* arith_shift. */
1648 COSTS_N_INSNS (1), /* arith_shift_reg. */
1649 COSTS_N_INSNS (1), /* log_shift. */
1650 COSTS_N_INSNS (1), /* log_shift_reg. */
1651 0, /* extend. */
1652 COSTS_N_INSNS (1), /* extend_arith. */
1653 COSTS_N_INSNS (1), /* bfi. */
1654 0, /* bfx. */
1655 0, /* clz. */
1656 0, /* rev. */
1657 0, /* non_exec. */
1658 true /* non_exec_costs_exec. */
1659 },
1660 /* MULT SImode */
1661 {
1662 {
1663 COSTS_N_INSNS (2), /* simple. */
1664 COSTS_N_INSNS (3), /* flag_setting. */
1665 COSTS_N_INSNS (2), /* extend. */
1666 COSTS_N_INSNS (2), /* add. */
1667 COSTS_N_INSNS (2), /* extend_add. */
1668 COSTS_N_INSNS (18) /* idiv. */
1669 },
1670 /* MULT DImode */
1671 {
1672 0, /* simple (N/A). */
1673 0, /* flag_setting (N/A). */
1674 COSTS_N_INSNS (3), /* extend. */
1675 0, /* add (N/A). */
1676 COSTS_N_INSNS (3), /* extend_add. */
1677 0 /* idiv (N/A). */
1678 }
1679 },
1680 /* LD/ST */
1681 {
1682 COSTS_N_INSNS (3), /* load. */
1683 COSTS_N_INSNS (3), /* load_sign_extend. */
1684 COSTS_N_INSNS (3), /* ldrd. */
1685 COSTS_N_INSNS (4), /* ldm_1st. */
1686 1, /* ldm_regs_per_insn_1st. */
1687 2, /* ldm_regs_per_insn_subsequent. */
1688 COSTS_N_INSNS (4), /* loadf. */
1689 COSTS_N_INSNS (4), /* loadd. */
1690 0, /* load_unaligned. */
1691 0, /* store. */
1692 0, /* strd. */
1693 COSTS_N_INSNS (1), /* stm_1st. */
1694 1, /* stm_regs_per_insn_1st. */
1695 2, /* stm_regs_per_insn_subsequent. */
1696 0, /* storef. */
1697 0, /* stored. */
1698 0, /* store_unaligned. */
1699 COSTS_N_INSNS (1), /* loadv. */
1700 COSTS_N_INSNS (1) /* storev. */
1701 },
1702 {
1703 /* FP SFmode */
1704 {
1705 COSTS_N_INSNS (17), /* div. */
1706 COSTS_N_INSNS (4), /* mult. */
1707 COSTS_N_INSNS (8), /* mult_addsub. */
1708 COSTS_N_INSNS (8), /* fma. */
1709 COSTS_N_INSNS (4), /* addsub. */
1710 COSTS_N_INSNS (2), /* fpconst. */
1711 COSTS_N_INSNS (2), /* neg. */
1712 COSTS_N_INSNS (5), /* compare. */
1713 COSTS_N_INSNS (4), /* widen. */
1714 COSTS_N_INSNS (4), /* narrow. */
1715 COSTS_N_INSNS (4), /* toint. */
1716 COSTS_N_INSNS (4), /* fromint. */
1717 COSTS_N_INSNS (4) /* roundint. */
1718 },
1719 /* FP DFmode */
1720 {
1721 COSTS_N_INSNS (31), /* div. */
1722 COSTS_N_INSNS (4), /* mult. */
1723 COSTS_N_INSNS (8), /* mult_addsub. */
1724 COSTS_N_INSNS (8), /* fma. */
1725 COSTS_N_INSNS (4), /* addsub. */
1726 COSTS_N_INSNS (2), /* fpconst. */
1727 COSTS_N_INSNS (2), /* neg. */
1728 COSTS_N_INSNS (2), /* compare. */
1729 COSTS_N_INSNS (4), /* widen. */
1730 COSTS_N_INSNS (4), /* narrow. */
1731 COSTS_N_INSNS (4), /* toint. */
1732 COSTS_N_INSNS (4), /* fromint. */
1733 COSTS_N_INSNS (4) /* roundint. */
1734 }
1735 },
1736 /* Vector */
1737 {
1738 COSTS_N_INSNS (1), /* alu. */
1739 COSTS_N_INSNS (4), /* mult. */
1740 COSTS_N_INSNS (1), /* movi. */
1741 COSTS_N_INSNS (2), /* dup. */
1742 COSTS_N_INSNS (2) /* extract. */
1743 }
1744 };
1745
1746 const struct cpu_cost_table v7m_extra_costs =
1747 {
1748 /* ALU */
1749 {
1750 0, /* arith. */
1751 0, /* logical. */
1752 0, /* shift. */
1753 0, /* shift_reg. */
1754 0, /* arith_shift. */
1755 COSTS_N_INSNS (1), /* arith_shift_reg. */
1756 0, /* log_shift. */
1757 COSTS_N_INSNS (1), /* log_shift_reg. */
1758 0, /* extend. */
1759 COSTS_N_INSNS (1), /* extend_arith. */
1760 0, /* bfi. */
1761 0, /* bfx. */
1762 0, /* clz. */
1763 0, /* rev. */
1764 COSTS_N_INSNS (1), /* non_exec. */
1765 false /* non_exec_costs_exec. */
1766 },
1767 {
1768 /* MULT SImode */
1769 {
1770 COSTS_N_INSNS (1), /* simple. */
1771 COSTS_N_INSNS (1), /* flag_setting. */
1772 COSTS_N_INSNS (2), /* extend. */
1773 COSTS_N_INSNS (1), /* add. */
1774 COSTS_N_INSNS (3), /* extend_add. */
1775 COSTS_N_INSNS (8) /* idiv. */
1776 },
1777 /* MULT DImode */
1778 {
1779 0, /* simple (N/A). */
1780 0, /* flag_setting (N/A). */
1781 COSTS_N_INSNS (2), /* extend. */
1782 0, /* add (N/A). */
1783 COSTS_N_INSNS (3), /* extend_add. */
1784 0 /* idiv (N/A). */
1785 }
1786 },
1787 /* LD/ST */
1788 {
1789 COSTS_N_INSNS (2), /* load. */
1790 0, /* load_sign_extend. */
1791 COSTS_N_INSNS (3), /* ldrd. */
1792 COSTS_N_INSNS (2), /* ldm_1st. */
1793 1, /* ldm_regs_per_insn_1st. */
1794 1, /* ldm_regs_per_insn_subsequent. */
1795 COSTS_N_INSNS (2), /* loadf. */
1796 COSTS_N_INSNS (3), /* loadd. */
1797 COSTS_N_INSNS (1), /* load_unaligned. */
1798 COSTS_N_INSNS (2), /* store. */
1799 COSTS_N_INSNS (3), /* strd. */
1800 COSTS_N_INSNS (2), /* stm_1st. */
1801 1, /* stm_regs_per_insn_1st. */
1802 1, /* stm_regs_per_insn_subsequent. */
1803 COSTS_N_INSNS (2), /* storef. */
1804 COSTS_N_INSNS (3), /* stored. */
1805 COSTS_N_INSNS (1), /* store_unaligned. */
1806 COSTS_N_INSNS (1), /* loadv. */
1807 COSTS_N_INSNS (1) /* storev. */
1808 },
1809 {
1810 /* FP SFmode */
1811 {
1812 COSTS_N_INSNS (7), /* div. */
1813 COSTS_N_INSNS (2), /* mult. */
1814 COSTS_N_INSNS (5), /* mult_addsub. */
1815 COSTS_N_INSNS (3), /* fma. */
1816 COSTS_N_INSNS (1), /* addsub. */
1817 0, /* fpconst. */
1818 0, /* neg. */
1819 0, /* compare. */
1820 0, /* widen. */
1821 0, /* narrow. */
1822 0, /* toint. */
1823 0, /* fromint. */
1824 0 /* roundint. */
1825 },
1826 /* FP DFmode */
1827 {
1828 COSTS_N_INSNS (15), /* div. */
1829 COSTS_N_INSNS (5), /* mult. */
1830 COSTS_N_INSNS (7), /* mult_addsub. */
1831 COSTS_N_INSNS (7), /* fma. */
1832 COSTS_N_INSNS (3), /* addsub. */
1833 0, /* fpconst. */
1834 0, /* neg. */
1835 0, /* compare. */
1836 0, /* widen. */
1837 0, /* narrow. */
1838 0, /* toint. */
1839 0, /* fromint. */
1840 0 /* roundint. */
1841 }
1842 },
1843 /* Vector */
1844 {
1845 COSTS_N_INSNS (1), /* alu. */
1846 COSTS_N_INSNS (4), /* mult. */
1847 COSTS_N_INSNS (1), /* movi. */
1848 COSTS_N_INSNS (2), /* dup. */
1849 COSTS_N_INSNS (2) /* extract. */
1850 }
1851 };
1852
1853 const struct addr_mode_cost_table generic_addr_mode_costs =
1854 {
1855 /* int. */
1856 {
1857 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1858 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1859 COSTS_N_INSNS (0) /* AMO_WB. */
1860 },
1861 /* float. */
1862 {
1863 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1864 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1865 COSTS_N_INSNS (0) /* AMO_WB. */
1866 },
1867 /* vector. */
1868 {
1869 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1870 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1871 COSTS_N_INSNS (0) /* AMO_WB. */
1872 }
1873 };
1874
1875 const struct tune_params arm_slowmul_tune =
1876 {
1877 &generic_extra_costs, /* Insn extra costs. */
1878 &generic_addr_mode_costs, /* Addressing mode costs. */
1879 NULL, /* Sched adj cost. */
1880 arm_default_branch_cost,
1881 &arm_default_vec_cost,
1882 3, /* Constant limit. */
1883 5, /* Max cond insns. */
1884 8, /* Memset max inline. */
1885 1, /* Issue rate. */
1886 ARM_PREFETCH_NOT_BENEFICIAL,
1887 tune_params::PREF_CONST_POOL_TRUE,
1888 tune_params::PREF_LDRD_FALSE,
1889 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1890 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1891 tune_params::DISPARAGE_FLAGS_NEITHER,
1892 tune_params::PREF_NEON_STRINGOPS_FALSE,
1893 tune_params::FUSE_NOTHING,
1894 tune_params::SCHED_AUTOPREF_OFF
1895 };
1896
1897 const struct tune_params arm_fastmul_tune =
1898 {
1899 &generic_extra_costs, /* Insn extra costs. */
1900 &generic_addr_mode_costs, /* Addressing mode costs. */
1901 NULL, /* Sched adj cost. */
1902 arm_default_branch_cost,
1903 &arm_default_vec_cost,
1904 1, /* Constant limit. */
1905 5, /* Max cond insns. */
1906 8, /* Memset max inline. */
1907 1, /* Issue rate. */
1908 ARM_PREFETCH_NOT_BENEFICIAL,
1909 tune_params::PREF_CONST_POOL_TRUE,
1910 tune_params::PREF_LDRD_FALSE,
1911 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1912 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1913 tune_params::DISPARAGE_FLAGS_NEITHER,
1914 tune_params::PREF_NEON_STRINGOPS_FALSE,
1915 tune_params::FUSE_NOTHING,
1916 tune_params::SCHED_AUTOPREF_OFF
1917 };
1918
1919 /* StrongARM has early execution of branches, so a sequence that is worth
1920 skipping is shorter. Set max_insns_skipped to a lower value. */
1921
1922 const struct tune_params arm_strongarm_tune =
1923 {
1924 &generic_extra_costs, /* Insn extra costs. */
1925 &generic_addr_mode_costs, /* Addressing mode costs. */
1926 NULL, /* Sched adj cost. */
1927 arm_default_branch_cost,
1928 &arm_default_vec_cost,
1929 1, /* Constant limit. */
1930 3, /* Max cond insns. */
1931 8, /* Memset max inline. */
1932 1, /* Issue rate. */
1933 ARM_PREFETCH_NOT_BENEFICIAL,
1934 tune_params::PREF_CONST_POOL_TRUE,
1935 tune_params::PREF_LDRD_FALSE,
1936 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1937 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1938 tune_params::DISPARAGE_FLAGS_NEITHER,
1939 tune_params::PREF_NEON_STRINGOPS_FALSE,
1940 tune_params::FUSE_NOTHING,
1941 tune_params::SCHED_AUTOPREF_OFF
1942 };
1943
1944 const struct tune_params arm_xscale_tune =
1945 {
1946 &generic_extra_costs, /* Insn extra costs. */
1947 &generic_addr_mode_costs, /* Addressing mode costs. */
1948 xscale_sched_adjust_cost,
1949 arm_default_branch_cost,
1950 &arm_default_vec_cost,
1951 2, /* Constant limit. */
1952 3, /* Max cond insns. */
1953 8, /* Memset max inline. */
1954 1, /* Issue rate. */
1955 ARM_PREFETCH_NOT_BENEFICIAL,
1956 tune_params::PREF_CONST_POOL_TRUE,
1957 tune_params::PREF_LDRD_FALSE,
1958 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1959 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1960 tune_params::DISPARAGE_FLAGS_NEITHER,
1961 tune_params::PREF_NEON_STRINGOPS_FALSE,
1962 tune_params::FUSE_NOTHING,
1963 tune_params::SCHED_AUTOPREF_OFF
1964 };
1965
1966 const struct tune_params arm_9e_tune =
1967 {
1968 &generic_extra_costs, /* Insn extra costs. */
1969 &generic_addr_mode_costs, /* Addressing mode costs. */
1970 NULL, /* Sched adj cost. */
1971 arm_default_branch_cost,
1972 &arm_default_vec_cost,
1973 1, /* Constant limit. */
1974 5, /* Max cond insns. */
1975 8, /* Memset max inline. */
1976 1, /* Issue rate. */
1977 ARM_PREFETCH_NOT_BENEFICIAL,
1978 tune_params::PREF_CONST_POOL_TRUE,
1979 tune_params::PREF_LDRD_FALSE,
1980 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1981 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1982 tune_params::DISPARAGE_FLAGS_NEITHER,
1983 tune_params::PREF_NEON_STRINGOPS_FALSE,
1984 tune_params::FUSE_NOTHING,
1985 tune_params::SCHED_AUTOPREF_OFF
1986 };
1987
1988 const struct tune_params arm_marvell_pj4_tune =
1989 {
1990 &generic_extra_costs, /* Insn extra costs. */
1991 &generic_addr_mode_costs, /* Addressing mode costs. */
1992 NULL, /* Sched adj cost. */
1993 arm_default_branch_cost,
1994 &arm_default_vec_cost,
1995 1, /* Constant limit. */
1996 5, /* Max cond insns. */
1997 8, /* Memset max inline. */
1998 2, /* Issue rate. */
1999 ARM_PREFETCH_NOT_BENEFICIAL,
2000 tune_params::PREF_CONST_POOL_TRUE,
2001 tune_params::PREF_LDRD_FALSE,
2002 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2003 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2004 tune_params::DISPARAGE_FLAGS_NEITHER,
2005 tune_params::PREF_NEON_STRINGOPS_FALSE,
2006 tune_params::FUSE_NOTHING,
2007 tune_params::SCHED_AUTOPREF_OFF
2008 };
2009
2010 const struct tune_params arm_v6t2_tune =
2011 {
2012 &generic_extra_costs, /* Insn extra costs. */
2013 &generic_addr_mode_costs, /* Addressing mode costs. */
2014 NULL, /* Sched adj cost. */
2015 arm_default_branch_cost,
2016 &arm_default_vec_cost,
2017 1, /* Constant limit. */
2018 5, /* Max cond insns. */
2019 8, /* Memset max inline. */
2020 1, /* Issue rate. */
2021 ARM_PREFETCH_NOT_BENEFICIAL,
2022 tune_params::PREF_CONST_POOL_FALSE,
2023 tune_params::PREF_LDRD_FALSE,
2024 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2025 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2026 tune_params::DISPARAGE_FLAGS_NEITHER,
2027 tune_params::PREF_NEON_STRINGOPS_FALSE,
2028 tune_params::FUSE_NOTHING,
2029 tune_params::SCHED_AUTOPREF_OFF
2030 };
2031
2032
2033 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2034 const struct tune_params arm_cortex_tune =
2035 {
2036 &generic_extra_costs,
2037 &generic_addr_mode_costs, /* Addressing mode costs. */
2038 NULL, /* Sched adj cost. */
2039 arm_default_branch_cost,
2040 &arm_default_vec_cost,
2041 1, /* Constant limit. */
2042 5, /* Max cond insns. */
2043 8, /* Memset max inline. */
2044 2, /* Issue rate. */
2045 ARM_PREFETCH_NOT_BENEFICIAL,
2046 tune_params::PREF_CONST_POOL_FALSE,
2047 tune_params::PREF_LDRD_FALSE,
2048 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2049 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2050 tune_params::DISPARAGE_FLAGS_NEITHER,
2051 tune_params::PREF_NEON_STRINGOPS_FALSE,
2052 tune_params::FUSE_NOTHING,
2053 tune_params::SCHED_AUTOPREF_OFF
2054 };
2055
2056 const struct tune_params arm_cortex_a8_tune =
2057 {
2058 &cortexa8_extra_costs,
2059 &generic_addr_mode_costs, /* Addressing mode costs. */
2060 NULL, /* Sched adj cost. */
2061 arm_default_branch_cost,
2062 &arm_default_vec_cost,
2063 1, /* Constant limit. */
2064 5, /* Max cond insns. */
2065 8, /* Memset max inline. */
2066 2, /* Issue rate. */
2067 ARM_PREFETCH_NOT_BENEFICIAL,
2068 tune_params::PREF_CONST_POOL_FALSE,
2069 tune_params::PREF_LDRD_FALSE,
2070 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2071 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2072 tune_params::DISPARAGE_FLAGS_NEITHER,
2073 tune_params::PREF_NEON_STRINGOPS_TRUE,
2074 tune_params::FUSE_NOTHING,
2075 tune_params::SCHED_AUTOPREF_OFF
2076 };
2077
2078 const struct tune_params arm_cortex_a7_tune =
2079 {
2080 &cortexa7_extra_costs,
2081 &generic_addr_mode_costs, /* Addressing mode costs. */
2082 NULL, /* Sched adj cost. */
2083 arm_default_branch_cost,
2084 &arm_default_vec_cost,
2085 1, /* Constant limit. */
2086 5, /* Max cond insns. */
2087 8, /* Memset max inline. */
2088 2, /* Issue rate. */
2089 ARM_PREFETCH_NOT_BENEFICIAL,
2090 tune_params::PREF_CONST_POOL_FALSE,
2091 tune_params::PREF_LDRD_FALSE,
2092 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2094 tune_params::DISPARAGE_FLAGS_NEITHER,
2095 tune_params::PREF_NEON_STRINGOPS_TRUE,
2096 tune_params::FUSE_NOTHING,
2097 tune_params::SCHED_AUTOPREF_OFF
2098 };
2099
2100 const struct tune_params arm_cortex_a15_tune =
2101 {
2102 &cortexa15_extra_costs,
2103 &generic_addr_mode_costs, /* Addressing mode costs. */
2104 NULL, /* Sched adj cost. */
2105 arm_default_branch_cost,
2106 &arm_default_vec_cost,
2107 1, /* Constant limit. */
2108 2, /* Max cond insns. */
2109 8, /* Memset max inline. */
2110 3, /* Issue rate. */
2111 ARM_PREFETCH_NOT_BENEFICIAL,
2112 tune_params::PREF_CONST_POOL_FALSE,
2113 tune_params::PREF_LDRD_TRUE,
2114 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2116 tune_params::DISPARAGE_FLAGS_ALL,
2117 tune_params::PREF_NEON_STRINGOPS_TRUE,
2118 tune_params::FUSE_NOTHING,
2119 tune_params::SCHED_AUTOPREF_FULL
2120 };
2121
2122 const struct tune_params arm_cortex_a35_tune =
2123 {
2124 &cortexa53_extra_costs,
2125 &generic_addr_mode_costs, /* Addressing mode costs. */
2126 NULL, /* Sched adj cost. */
2127 arm_default_branch_cost,
2128 &arm_default_vec_cost,
2129 1, /* Constant limit. */
2130 5, /* Max cond insns. */
2131 8, /* Memset max inline. */
2132 1, /* Issue rate. */
2133 ARM_PREFETCH_NOT_BENEFICIAL,
2134 tune_params::PREF_CONST_POOL_FALSE,
2135 tune_params::PREF_LDRD_FALSE,
2136 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2137 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2138 tune_params::DISPARAGE_FLAGS_NEITHER,
2139 tune_params::PREF_NEON_STRINGOPS_TRUE,
2140 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2141 tune_params::SCHED_AUTOPREF_OFF
2142 };
2143
2144 const struct tune_params arm_cortex_a53_tune =
2145 {
2146 &cortexa53_extra_costs,
2147 &generic_addr_mode_costs, /* Addressing mode costs. */
2148 NULL, /* Sched adj cost. */
2149 arm_default_branch_cost,
2150 &arm_default_vec_cost,
2151 1, /* Constant limit. */
2152 5, /* Max cond insns. */
2153 8, /* Memset max inline. */
2154 2, /* Issue rate. */
2155 ARM_PREFETCH_NOT_BENEFICIAL,
2156 tune_params::PREF_CONST_POOL_FALSE,
2157 tune_params::PREF_LDRD_FALSE,
2158 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2159 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2160 tune_params::DISPARAGE_FLAGS_NEITHER,
2161 tune_params::PREF_NEON_STRINGOPS_TRUE,
2162 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2163 tune_params::SCHED_AUTOPREF_OFF
2164 };
2165
2166 const struct tune_params arm_cortex_a57_tune =
2167 {
2168 &cortexa57_extra_costs,
2169 &generic_addr_mode_costs, /* addressing mode costs */
2170 NULL, /* Sched adj cost. */
2171 arm_default_branch_cost,
2172 &arm_default_vec_cost,
2173 1, /* Constant limit. */
2174 2, /* Max cond insns. */
2175 8, /* Memset max inline. */
2176 3, /* Issue rate. */
2177 ARM_PREFETCH_NOT_BENEFICIAL,
2178 tune_params::PREF_CONST_POOL_FALSE,
2179 tune_params::PREF_LDRD_TRUE,
2180 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2181 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2182 tune_params::DISPARAGE_FLAGS_ALL,
2183 tune_params::PREF_NEON_STRINGOPS_TRUE,
2184 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2185 tune_params::SCHED_AUTOPREF_FULL
2186 };
2187
2188 const struct tune_params arm_exynosm1_tune =
2189 {
2190 &exynosm1_extra_costs,
2191 &generic_addr_mode_costs, /* Addressing mode costs. */
2192 NULL, /* Sched adj cost. */
2193 arm_default_branch_cost,
2194 &arm_default_vec_cost,
2195 1, /* Constant limit. */
2196 2, /* Max cond insns. */
2197 8, /* Memset max inline. */
2198 3, /* Issue rate. */
2199 ARM_PREFETCH_NOT_BENEFICIAL,
2200 tune_params::PREF_CONST_POOL_FALSE,
2201 tune_params::PREF_LDRD_TRUE,
2202 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2203 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2204 tune_params::DISPARAGE_FLAGS_ALL,
2205 tune_params::PREF_NEON_STRINGOPS_TRUE,
2206 tune_params::FUSE_NOTHING,
2207 tune_params::SCHED_AUTOPREF_OFF
2208 };
2209
2210 const struct tune_params arm_xgene1_tune =
2211 {
2212 &xgene1_extra_costs,
2213 &generic_addr_mode_costs, /* Addressing mode costs. */
2214 NULL, /* Sched adj cost. */
2215 arm_default_branch_cost,
2216 &arm_default_vec_cost,
2217 1, /* Constant limit. */
2218 2, /* Max cond insns. */
2219 32, /* Memset max inline. */
2220 4, /* Issue rate. */
2221 ARM_PREFETCH_NOT_BENEFICIAL,
2222 tune_params::PREF_CONST_POOL_FALSE,
2223 tune_params::PREF_LDRD_TRUE,
2224 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2225 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2226 tune_params::DISPARAGE_FLAGS_ALL,
2227 tune_params::PREF_NEON_STRINGOPS_FALSE,
2228 tune_params::FUSE_NOTHING,
2229 tune_params::SCHED_AUTOPREF_OFF
2230 };
2231
2232 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2233 less appealing. Set max_insns_skipped to a low value. */
2234
2235 const struct tune_params arm_cortex_a5_tune =
2236 {
2237 &cortexa5_extra_costs,
2238 &generic_addr_mode_costs, /* Addressing mode costs. */
2239 NULL, /* Sched adj cost. */
2240 arm_cortex_a5_branch_cost,
2241 &arm_default_vec_cost,
2242 1, /* Constant limit. */
2243 1, /* Max cond insns. */
2244 8, /* Memset max inline. */
2245 2, /* Issue rate. */
2246 ARM_PREFETCH_NOT_BENEFICIAL,
2247 tune_params::PREF_CONST_POOL_FALSE,
2248 tune_params::PREF_LDRD_FALSE,
2249 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2251 tune_params::DISPARAGE_FLAGS_NEITHER,
2252 tune_params::PREF_NEON_STRINGOPS_TRUE,
2253 tune_params::FUSE_NOTHING,
2254 tune_params::SCHED_AUTOPREF_OFF
2255 };
2256
2257 const struct tune_params arm_cortex_a9_tune =
2258 {
2259 &cortexa9_extra_costs,
2260 &generic_addr_mode_costs, /* Addressing mode costs. */
2261 cortex_a9_sched_adjust_cost,
2262 arm_default_branch_cost,
2263 &arm_default_vec_cost,
2264 1, /* Constant limit. */
2265 5, /* Max cond insns. */
2266 8, /* Memset max inline. */
2267 2, /* Issue rate. */
2268 ARM_PREFETCH_BENEFICIAL(4,32,32),
2269 tune_params::PREF_CONST_POOL_FALSE,
2270 tune_params::PREF_LDRD_FALSE,
2271 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2272 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2273 tune_params::DISPARAGE_FLAGS_NEITHER,
2274 tune_params::PREF_NEON_STRINGOPS_FALSE,
2275 tune_params::FUSE_NOTHING,
2276 tune_params::SCHED_AUTOPREF_OFF
2277 };
2278
2279 const struct tune_params arm_cortex_a12_tune =
2280 {
2281 &cortexa12_extra_costs,
2282 &generic_addr_mode_costs, /* Addressing mode costs. */
2283 NULL, /* Sched adj cost. */
2284 arm_default_branch_cost,
2285 &arm_default_vec_cost, /* Vectorizer costs. */
2286 1, /* Constant limit. */
2287 2, /* Max cond insns. */
2288 8, /* Memset max inline. */
2289 2, /* Issue rate. */
2290 ARM_PREFETCH_NOT_BENEFICIAL,
2291 tune_params::PREF_CONST_POOL_FALSE,
2292 tune_params::PREF_LDRD_TRUE,
2293 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2294 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2295 tune_params::DISPARAGE_FLAGS_ALL,
2296 tune_params::PREF_NEON_STRINGOPS_TRUE,
2297 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2298 tune_params::SCHED_AUTOPREF_OFF
2299 };
2300
2301 const struct tune_params arm_cortex_a73_tune =
2302 {
2303 &cortexa57_extra_costs,
2304 &generic_addr_mode_costs, /* Addressing mode costs. */
2305 NULL, /* Sched adj cost. */
2306 arm_default_branch_cost,
2307 &arm_default_vec_cost, /* Vectorizer costs. */
2308 1, /* Constant limit. */
2309 2, /* Max cond insns. */
2310 8, /* Memset max inline. */
2311 2, /* Issue rate. */
2312 ARM_PREFETCH_NOT_BENEFICIAL,
2313 tune_params::PREF_CONST_POOL_FALSE,
2314 tune_params::PREF_LDRD_TRUE,
2315 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2316 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2317 tune_params::DISPARAGE_FLAGS_ALL,
2318 tune_params::PREF_NEON_STRINGOPS_TRUE,
2319 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2320 tune_params::SCHED_AUTOPREF_FULL
2321 };
2322
2323 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2324 cycle to execute each. An LDR from the constant pool also takes two cycles
2325 to execute, but mildly increases pipelining opportunity (consecutive
2326 loads/stores can be pipelined together, saving one cycle), and may also
2327 improve icache utilisation. Hence we prefer the constant pool for such
2328 processors. */
2329
2330 const struct tune_params arm_v7m_tune =
2331 {
2332 &v7m_extra_costs,
2333 &generic_addr_mode_costs, /* Addressing mode costs. */
2334 NULL, /* Sched adj cost. */
2335 arm_cortex_m_branch_cost,
2336 &arm_default_vec_cost,
2337 1, /* Constant limit. */
2338 2, /* Max cond insns. */
2339 8, /* Memset max inline. */
2340 1, /* Issue rate. */
2341 ARM_PREFETCH_NOT_BENEFICIAL,
2342 tune_params::PREF_CONST_POOL_TRUE,
2343 tune_params::PREF_LDRD_FALSE,
2344 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2345 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2346 tune_params::DISPARAGE_FLAGS_NEITHER,
2347 tune_params::PREF_NEON_STRINGOPS_FALSE,
2348 tune_params::FUSE_NOTHING,
2349 tune_params::SCHED_AUTOPREF_OFF
2350 };
2351
2352 /* Cortex-M7 tuning. */
2353
2354 const struct tune_params arm_cortex_m7_tune =
2355 {
2356 &v7m_extra_costs,
2357 &generic_addr_mode_costs, /* Addressing mode costs. */
2358 NULL, /* Sched adj cost. */
2359 arm_cortex_m7_branch_cost,
2360 &arm_default_vec_cost,
2361 0, /* Constant limit. */
2362 1, /* Max cond insns. */
2363 8, /* Memset max inline. */
2364 2, /* Issue rate. */
2365 ARM_PREFETCH_NOT_BENEFICIAL,
2366 tune_params::PREF_CONST_POOL_TRUE,
2367 tune_params::PREF_LDRD_FALSE,
2368 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2369 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2370 tune_params::DISPARAGE_FLAGS_NEITHER,
2371 tune_params::PREF_NEON_STRINGOPS_FALSE,
2372 tune_params::FUSE_NOTHING,
2373 tune_params::SCHED_AUTOPREF_OFF
2374 };
2375
2376 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2377 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2378 cortex-m23. */
2379 const struct tune_params arm_v6m_tune =
2380 {
2381 &generic_extra_costs, /* Insn extra costs. */
2382 &generic_addr_mode_costs, /* Addressing mode costs. */
2383 NULL, /* Sched adj cost. */
2384 arm_default_branch_cost,
2385 &arm_default_vec_cost, /* Vectorizer costs. */
2386 1, /* Constant limit. */
2387 5, /* Max cond insns. */
2388 8, /* Memset max inline. */
2389 1, /* Issue rate. */
2390 ARM_PREFETCH_NOT_BENEFICIAL,
2391 tune_params::PREF_CONST_POOL_FALSE,
2392 tune_params::PREF_LDRD_FALSE,
2393 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2394 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2395 tune_params::DISPARAGE_FLAGS_NEITHER,
2396 tune_params::PREF_NEON_STRINGOPS_FALSE,
2397 tune_params::FUSE_NOTHING,
2398 tune_params::SCHED_AUTOPREF_OFF
2399 };
2400
2401 const struct tune_params arm_fa726te_tune =
2402 {
2403 &generic_extra_costs, /* Insn extra costs. */
2404 &generic_addr_mode_costs, /* Addressing mode costs. */
2405 fa726te_sched_adjust_cost,
2406 arm_default_branch_cost,
2407 &arm_default_vec_cost,
2408 1, /* Constant limit. */
2409 5, /* Max cond insns. */
2410 8, /* Memset max inline. */
2411 2, /* Issue rate. */
2412 ARM_PREFETCH_NOT_BENEFICIAL,
2413 tune_params::PREF_CONST_POOL_TRUE,
2414 tune_params::PREF_LDRD_FALSE,
2415 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2416 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2417 tune_params::DISPARAGE_FLAGS_NEITHER,
2418 tune_params::PREF_NEON_STRINGOPS_FALSE,
2419 tune_params::FUSE_NOTHING,
2420 tune_params::SCHED_AUTOPREF_OFF
2421 };
2422
2423 /* Key type for Pointer Authentication extension. */
2424 enum aarch_key_type aarch_ra_sign_key = AARCH_KEY_A;
2425
2426 char *accepted_branch_protection_string = NULL;
2427
2428 /* Auto-generated CPU, FPU and architecture tables. */
2429 #include "arm-cpu-data.h"
2430
2431 /* The name of the preprocessor macro to define for this architecture. PROFILE
2432 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2433 is thus chosen to be big enough to hold the longest architecture name. */
2434
2435 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2436
2437 /* Supported TLS relocations. */
2438
2439 enum tls_reloc {
2440 TLS_GD32,
2441 TLS_GD32_FDPIC,
2442 TLS_LDM32,
2443 TLS_LDM32_FDPIC,
2444 TLS_LDO32,
2445 TLS_IE32,
2446 TLS_IE32_FDPIC,
2447 TLS_LE32,
2448 TLS_DESCSEQ /* GNU scheme */
2449 };
2450
2451 /* The maximum number of insns to be used when loading a constant. */
2452 inline static int
2453 arm_constant_limit (bool size_p)
2454 {
2455 return size_p ? 1 : current_tune->constant_limit;
2456 }
2457
2458 /* Emit an insn that's a simple single-set. Both the operands must be known
2459 to be valid. */
2460 inline static rtx_insn *
2461 emit_set_insn (rtx x, rtx y)
2462 {
2463 return emit_insn (gen_rtx_SET (x, y));
2464 }
2465
2466 /* Return the number of bits set in VALUE. */
2467 static unsigned
2468 bit_count (unsigned long value)
2469 {
2470 unsigned long count = 0;
2471
2472 while (value)
2473 {
2474 count++;
2475 value &= value - 1; /* Clear the least-significant set bit. */
2476 }
2477
2478 return count;
2479 }
2480
2481 /* Return the number of bits set in BMAP. */
2482 static unsigned
2483 bitmap_popcount (const sbitmap bmap)
2484 {
2485 unsigned int count = 0;
2486 unsigned int n = 0;
2487 sbitmap_iterator sbi;
2488
2489 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2490 count++;
2491 return count;
2492 }
2493
2494 typedef struct
2495 {
2496 machine_mode mode;
2497 const char *name;
2498 } arm_fixed_mode_set;
2499
2500 /* A small helper for setting fixed-point library libfuncs. */
2501
2502 static void
2503 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2504 const char *funcname, const char *modename,
2505 int num_suffix)
2506 {
2507 char buffer[50];
2508
2509 if (num_suffix == 0)
2510 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2511 else
2512 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2513
2514 set_optab_libfunc (optable, mode, buffer);
2515 }
2516
2517 static void
2518 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2519 machine_mode from, const char *funcname,
2520 const char *toname, const char *fromname)
2521 {
2522 char buffer[50];
2523 const char *maybe_suffix_2 = "";
2524
2525 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2526 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2527 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2528 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2529 maybe_suffix_2 = "2";
2530
2531 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2532 maybe_suffix_2);
2533
2534 set_conv_libfunc (optable, to, from, buffer);
2535 }
2536
2537 static GTY(()) rtx speculation_barrier_libfunc;
2538
2539 /* Record that we have no arithmetic or comparison libfuncs for
2540 machine mode MODE. */
2541
2542 static void
2543 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2544 {
2545 /* Arithmetic. */
2546 set_optab_libfunc (add_optab, mode, NULL);
2547 set_optab_libfunc (sdiv_optab, mode, NULL);
2548 set_optab_libfunc (smul_optab, mode, NULL);
2549 set_optab_libfunc (neg_optab, mode, NULL);
2550 set_optab_libfunc (sub_optab, mode, NULL);
2551
2552 /* Comparisons. */
2553 set_optab_libfunc (eq_optab, mode, NULL);
2554 set_optab_libfunc (ne_optab, mode, NULL);
2555 set_optab_libfunc (lt_optab, mode, NULL);
2556 set_optab_libfunc (le_optab, mode, NULL);
2557 set_optab_libfunc (ge_optab, mode, NULL);
2558 set_optab_libfunc (gt_optab, mode, NULL);
2559 set_optab_libfunc (unord_optab, mode, NULL);
2560 }
2561
2562 /* Set up library functions unique to ARM. */
2563 static void
2564 arm_init_libfuncs (void)
2565 {
2566 machine_mode mode_iter;
2567
2568 /* For Linux, we have access to kernel support for atomic operations. */
2569 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2570 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2571
2572 /* There are no special library functions unless we are using the
2573 ARM BPABI. */
2574 if (!TARGET_BPABI)
2575 return;
2576
2577 /* The functions below are described in Section 4 of the "Run-Time
2578 ABI for the ARM architecture", Version 1.0. */
2579
2580 /* Double-precision floating-point arithmetic. Table 2. */
2581 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2582 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2583 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2584 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2585 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2586
2587 /* Double-precision comparisons. Table 3. */
2588 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2589 set_optab_libfunc (ne_optab, DFmode, NULL);
2590 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2591 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2592 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2593 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2594 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2595
2596 /* Single-precision floating-point arithmetic. Table 4. */
2597 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2598 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2599 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2600 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2601 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2602
2603 /* Single-precision comparisons. Table 5. */
2604 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2605 set_optab_libfunc (ne_optab, SFmode, NULL);
2606 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2607 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2608 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2609 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2610 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2611
2612 /* Floating-point to integer conversions. Table 6. */
2613 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2614 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2615 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2616 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2617 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2618 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2619 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2620 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2621
2622 /* Conversions between floating types. Table 7. */
2623 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2624 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2625
2626 /* Integer to floating-point conversions. Table 8. */
2627 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2628 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2629 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2630 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2631 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2632 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2633 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2634 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2635
2636 /* Long long. Table 9. */
2637 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2638 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2639 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2640 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2641 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2642 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2643 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2644 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2645
2646 /* Integer (32/32->32) division. \S 4.3.1. */
2647 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2648 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2649
2650 /* The divmod functions are designed so that they can be used for
2651 plain division, even though they return both the quotient and the
2652 remainder. The quotient is returned in the usual location (i.e.,
2653 r0 for SImode, {r0, r1} for DImode), just as would be expected
2654 for an ordinary division routine. Because the AAPCS calling
2655 conventions specify that all of { r0, r1, r2, r3 } are
2656 callee-saved registers, there is no need to tell the compiler
2657 explicitly that those registers are clobbered by these
2658 routines. */
2659 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2660 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2661
2662 /* For SImode division the ABI provides div-without-mod routines,
2663 which are faster. */
2664 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2665 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2666
2667 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2668 divmod libcalls instead. */
2669 set_optab_libfunc (smod_optab, DImode, NULL);
2670 set_optab_libfunc (umod_optab, DImode, NULL);
2671 set_optab_libfunc (smod_optab, SImode, NULL);
2672 set_optab_libfunc (umod_optab, SImode, NULL);
2673
2674 /* Half-precision float operations. The compiler handles all operations
2675 with NULL libfuncs by converting the SFmode. */
2676 switch (arm_fp16_format)
2677 {
2678 case ARM_FP16_FORMAT_IEEE:
2679 case ARM_FP16_FORMAT_ALTERNATIVE:
2680
2681 /* Conversions. */
2682 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2683 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2684 ? "__gnu_f2h_ieee"
2685 : "__gnu_f2h_alternative"));
2686 set_conv_libfunc (sext_optab, SFmode, HFmode,
2687 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2688 ? "__gnu_h2f_ieee"
2689 : "__gnu_h2f_alternative"));
2690
2691 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2692 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2693 ? "__gnu_d2h_ieee"
2694 : "__gnu_d2h_alternative"));
2695
2696 arm_block_arith_comp_libfuncs_for_mode (HFmode);
2697 break;
2698
2699 default:
2700 break;
2701 }
2702
2703 /* For all possible libcalls in BFmode, record NULL. */
2704 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2705 {
2706 set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2707 set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2708 set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2709 set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2710 }
2711 arm_block_arith_comp_libfuncs_for_mode (BFmode);
2712
2713 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2714 {
2715 const arm_fixed_mode_set fixed_arith_modes[] =
2716 {
2717 { E_QQmode, "qq" },
2718 { E_UQQmode, "uqq" },
2719 { E_HQmode, "hq" },
2720 { E_UHQmode, "uhq" },
2721 { E_SQmode, "sq" },
2722 { E_USQmode, "usq" },
2723 { E_DQmode, "dq" },
2724 { E_UDQmode, "udq" },
2725 { E_TQmode, "tq" },
2726 { E_UTQmode, "utq" },
2727 { E_HAmode, "ha" },
2728 { E_UHAmode, "uha" },
2729 { E_SAmode, "sa" },
2730 { E_USAmode, "usa" },
2731 { E_DAmode, "da" },
2732 { E_UDAmode, "uda" },
2733 { E_TAmode, "ta" },
2734 { E_UTAmode, "uta" }
2735 };
2736 const arm_fixed_mode_set fixed_conv_modes[] =
2737 {
2738 { E_QQmode, "qq" },
2739 { E_UQQmode, "uqq" },
2740 { E_HQmode, "hq" },
2741 { E_UHQmode, "uhq" },
2742 { E_SQmode, "sq" },
2743 { E_USQmode, "usq" },
2744 { E_DQmode, "dq" },
2745 { E_UDQmode, "udq" },
2746 { E_TQmode, "tq" },
2747 { E_UTQmode, "utq" },
2748 { E_HAmode, "ha" },
2749 { E_UHAmode, "uha" },
2750 { E_SAmode, "sa" },
2751 { E_USAmode, "usa" },
2752 { E_DAmode, "da" },
2753 { E_UDAmode, "uda" },
2754 { E_TAmode, "ta" },
2755 { E_UTAmode, "uta" },
2756 { E_QImode, "qi" },
2757 { E_HImode, "hi" },
2758 { E_SImode, "si" },
2759 { E_DImode, "di" },
2760 { E_TImode, "ti" },
2761 { E_SFmode, "sf" },
2762 { E_DFmode, "df" }
2763 };
2764 unsigned int i, j;
2765
2766 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2767 {
2768 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2769 "add", fixed_arith_modes[i].name, 3);
2770 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2771 "ssadd", fixed_arith_modes[i].name, 3);
2772 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2773 "usadd", fixed_arith_modes[i].name, 3);
2774 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2775 "sub", fixed_arith_modes[i].name, 3);
2776 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2777 "sssub", fixed_arith_modes[i].name, 3);
2778 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2779 "ussub", fixed_arith_modes[i].name, 3);
2780 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2781 "mul", fixed_arith_modes[i].name, 3);
2782 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2783 "ssmul", fixed_arith_modes[i].name, 3);
2784 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2785 "usmul", fixed_arith_modes[i].name, 3);
2786 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2787 "div", fixed_arith_modes[i].name, 3);
2788 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2789 "udiv", fixed_arith_modes[i].name, 3);
2790 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2791 "ssdiv", fixed_arith_modes[i].name, 3);
2792 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2793 "usdiv", fixed_arith_modes[i].name, 3);
2794 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2795 "neg", fixed_arith_modes[i].name, 2);
2796 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2797 "ssneg", fixed_arith_modes[i].name, 2);
2798 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2799 "usneg", fixed_arith_modes[i].name, 2);
2800 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2801 "ashl", fixed_arith_modes[i].name, 3);
2802 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2803 "ashr", fixed_arith_modes[i].name, 3);
2804 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2805 "lshr", fixed_arith_modes[i].name, 3);
2806 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2807 "ssashl", fixed_arith_modes[i].name, 3);
2808 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2809 "usashl", fixed_arith_modes[i].name, 3);
2810 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2811 "cmp", fixed_arith_modes[i].name, 2);
2812 }
2813
2814 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2815 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2816 {
2817 if (i == j
2818 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2819 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2820 continue;
2821
2822 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2823 fixed_conv_modes[j].mode, "fract",
2824 fixed_conv_modes[i].name,
2825 fixed_conv_modes[j].name);
2826 arm_set_fixed_conv_libfunc (satfract_optab,
2827 fixed_conv_modes[i].mode,
2828 fixed_conv_modes[j].mode, "satfract",
2829 fixed_conv_modes[i].name,
2830 fixed_conv_modes[j].name);
2831 arm_set_fixed_conv_libfunc (fractuns_optab,
2832 fixed_conv_modes[i].mode,
2833 fixed_conv_modes[j].mode, "fractuns",
2834 fixed_conv_modes[i].name,
2835 fixed_conv_modes[j].name);
2836 arm_set_fixed_conv_libfunc (satfractuns_optab,
2837 fixed_conv_modes[i].mode,
2838 fixed_conv_modes[j].mode, "satfractuns",
2839 fixed_conv_modes[i].name,
2840 fixed_conv_modes[j].name);
2841 }
2842 }
2843
2844 if (TARGET_AAPCS_BASED)
2845 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2846
2847 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2848 }
2849
2850 /* On AAPCS systems, this is the "struct __va_list". */
2851 static GTY(()) tree va_list_type;
2852
2853 /* Return the type to use as __builtin_va_list. */
2854 static tree
2855 arm_build_builtin_va_list (void)
2856 {
2857 tree va_list_name;
2858 tree ap_field;
2859
2860 if (!TARGET_AAPCS_BASED)
2861 return std_build_builtin_va_list ();
2862
2863 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2864 defined as:
2865
2866 struct __va_list
2867 {
2868 void *__ap;
2869 };
2870
2871 The C Library ABI further reinforces this definition in \S
2872 4.1.
2873
2874 We must follow this definition exactly. The structure tag
2875 name is visible in C++ mangled names, and thus forms a part
2876 of the ABI. The field name may be used by people who
2877 #include <stdarg.h>. */
2878 /* Create the type. */
2879 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2880 /* Give it the required name. */
2881 va_list_name = build_decl (BUILTINS_LOCATION,
2882 TYPE_DECL,
2883 get_identifier ("__va_list"),
2884 va_list_type);
2885 DECL_ARTIFICIAL (va_list_name) = 1;
2886 TYPE_NAME (va_list_type) = va_list_name;
2887 TYPE_STUB_DECL (va_list_type) = va_list_name;
2888 /* Create the __ap field. */
2889 ap_field = build_decl (BUILTINS_LOCATION,
2890 FIELD_DECL,
2891 get_identifier ("__ap"),
2892 ptr_type_node);
2893 DECL_ARTIFICIAL (ap_field) = 1;
2894 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2895 TYPE_FIELDS (va_list_type) = ap_field;
2896 /* Compute its layout. */
2897 layout_type (va_list_type);
2898
2899 return va_list_type;
2900 }
2901
2902 /* Return an expression of type "void *" pointing to the next
2903 available argument in a variable-argument list. VALIST is the
2904 user-level va_list object, of type __builtin_va_list. */
2905 static tree
2906 arm_extract_valist_ptr (tree valist)
2907 {
2908 if (TREE_TYPE (valist) == error_mark_node)
2909 return error_mark_node;
2910
2911 /* On an AAPCS target, the pointer is stored within "struct
2912 va_list". */
2913 if (TARGET_AAPCS_BASED)
2914 {
2915 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2916 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2917 valist, ap_field, NULL_TREE);
2918 }
2919
2920 return valist;
2921 }
2922
2923 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2924 static void
2925 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2926 {
2927 valist = arm_extract_valist_ptr (valist);
2928 std_expand_builtin_va_start (valist, nextarg);
2929 }
2930
2931 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2932 static tree
2933 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2934 gimple_seq *post_p)
2935 {
2936 valist = arm_extract_valist_ptr (valist);
2937 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2938 }
2939
2940 /* Check any incompatible options that the user has specified. */
2941 static void
2942 arm_option_check_internal (struct gcc_options *opts)
2943 {
2944 int flags = opts->x_target_flags;
2945
2946 /* iWMMXt and NEON are incompatible. */
2947 if (TARGET_IWMMXT
2948 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2949 error ("iWMMXt and NEON are incompatible");
2950
2951 /* Make sure that the processor choice does not conflict with any of the
2952 other command line choices. */
2953 if (TARGET_ARM_P (flags)
2954 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2955 error ("target CPU does not support ARM mode");
2956
2957 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2958 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2959 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2960
2961 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2962 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2963
2964 /* If this target is normally configured to use APCS frames, warn if they
2965 are turned off and debugging is turned on. */
2966 if (TARGET_ARM_P (flags)
2967 && write_symbols != NO_DEBUG
2968 && !TARGET_APCS_FRAME
2969 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2970 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2971 "debugging");
2972
2973 /* iWMMXt unsupported under Thumb mode. */
2974 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2975 error ("iWMMXt unsupported under Thumb mode");
2976
2977 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2978 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2979
2980 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2981 {
2982 error ("RTP PIC is incompatible with Thumb");
2983 flag_pic = 0;
2984 }
2985
2986 if (target_pure_code || target_slow_flash_data)
2987 {
2988 const char *flag = (target_pure_code ? "-mpure-code" :
2989 "-mslow-flash-data");
2990 bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
2991
2992 /* We only support -mslow-flash-data on M-profile targets with
2993 MOVT. */
2994 if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
2995 error ("%s only supports non-pic code on M-profile targets with the "
2996 "MOVT instruction", flag);
2997
2998 /* We only support -mpure-code on M-profile targets. */
2999 if (target_pure_code && common_unsupported_modes)
3000 error ("%s only supports non-pic code on M-profile targets", flag);
3001
3002 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
3003 -mword-relocations forbids relocation of MOVT/MOVW. */
3004 if (target_word_relocations)
3005 error ("%s incompatible with %<-mword-relocations%>", flag);
3006 }
3007 }
3008
3009 /* Recompute the global settings depending on target attribute options. */
3010
3011 static void
3012 arm_option_params_internal (void)
3013 {
3014 /* If we are not using the default (ARM mode) section anchor offset
3015 ranges, then set the correct ranges now. */
3016 if (TARGET_THUMB1)
3017 {
3018 /* Thumb-1 LDR instructions cannot have negative offsets.
3019 Permissible positive offset ranges are 5-bit (for byte loads),
3020 6-bit (for halfword loads), or 7-bit (for word loads).
3021 Empirical results suggest a 7-bit anchor range gives the best
3022 overall code size. */
3023 targetm.min_anchor_offset = 0;
3024 targetm.max_anchor_offset = 127;
3025 }
3026 else if (TARGET_THUMB2)
3027 {
3028 /* The minimum is set such that the total size of the block
3029 for a particular anchor is 248 + 1 + 4095 bytes, which is
3030 divisible by eight, ensuring natural spacing of anchors. */
3031 targetm.min_anchor_offset = -248;
3032 targetm.max_anchor_offset = 4095;
3033 }
3034 else
3035 {
3036 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3037 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3038 }
3039
3040 /* Increase the number of conditional instructions with -Os. */
3041 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3042
3043 /* For THUMB2, we limit the conditional sequence to one IT block. */
3044 if (TARGET_THUMB2)
3045 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3046
3047 if (TARGET_THUMB1)
3048 targetm.md_asm_adjust = thumb1_md_asm_adjust;
3049 else
3050 targetm.md_asm_adjust = arm_md_asm_adjust;
3051 }
3052
3053 /* True if -mflip-thumb should next add an attribute for the default
3054 mode, false if it should next add an attribute for the opposite mode. */
3055 static GTY(()) bool thumb_flipper;
3056
3057 /* Options after initial target override. */
3058 static GTY(()) tree init_optimize;
3059
3060 static void
3061 arm_override_options_after_change_1 (struct gcc_options *opts,
3062 struct gcc_options *opts_set)
3063 {
3064 /* -falign-functions without argument: supply one. */
3065 if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3066 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3067 && opts->x_optimize_size ? "2" : "4";
3068 }
3069
3070 /* Implement targetm.override_options_after_change. */
3071
3072 static void
3073 arm_override_options_after_change (void)
3074 {
3075 arm_override_options_after_change_1 (&global_options, &global_options_set);
3076 }
3077
3078 /* Implement TARGET_OPTION_RESTORE. */
3079 static void
3080 arm_option_restore (struct gcc_options */* opts */,
3081 struct gcc_options */* opts_set */,
3082 struct cl_target_option *ptr)
3083 {
3084 arm_configure_build_target (&arm_active_target, ptr, false);
3085 arm_option_reconfigure_globals ();
3086 }
3087
3088 /* Reset options between modes that the user has specified. */
3089 static void
3090 arm_option_override_internal (struct gcc_options *opts,
3091 struct gcc_options *opts_set)
3092 {
3093 arm_override_options_after_change_1 (opts, opts_set);
3094
3095 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3096 {
3097 /* The default is to enable interworking, so this warning message would
3098 be confusing to users who have just compiled with
3099 eg, -march=armv4. */
3100 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3101 opts->x_target_flags &= ~MASK_INTERWORK;
3102 }
3103
3104 if (TARGET_THUMB_P (opts->x_target_flags)
3105 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3106 {
3107 warning (0, "target CPU does not support THUMB instructions");
3108 opts->x_target_flags &= ~MASK_THUMB;
3109 }
3110
3111 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3112 {
3113 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3114 opts->x_target_flags &= ~MASK_APCS_FRAME;
3115 }
3116
3117 /* Callee super interworking implies thumb interworking. Adding
3118 this to the flags here simplifies the logic elsewhere. */
3119 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3120 opts->x_target_flags |= MASK_INTERWORK;
3121
3122 /* need to remember initial values so combinaisons of options like
3123 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3124 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3125
3126 if (! opts_set->x_arm_restrict_it)
3127 opts->x_arm_restrict_it = arm_arch8;
3128
3129 /* ARM execution state and M profile don't have [restrict] IT. */
3130 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3131 opts->x_arm_restrict_it = 0;
3132
3133 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3134 if (!opts_set->x_arm_restrict_it
3135 && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3136 opts->x_arm_restrict_it = 0;
3137
3138 /* Enable -munaligned-access by default for
3139 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3140 i.e. Thumb2 and ARM state only.
3141 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3142 - ARMv8 architecture-base processors.
3143
3144 Disable -munaligned-access by default for
3145 - all pre-ARMv6 architecture-based processors
3146 - ARMv6-M architecture-based processors
3147 - ARMv8-M Baseline processors. */
3148
3149 if (! opts_set->x_unaligned_access)
3150 {
3151 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3152 && arm_arch6 && (arm_arch_notm || arm_arch7));
3153 }
3154 else if (opts->x_unaligned_access == 1
3155 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3156 {
3157 warning (0, "target CPU does not support unaligned accesses");
3158 opts->x_unaligned_access = 0;
3159 }
3160
3161 /* Don't warn since it's on by default in -O2. */
3162 if (TARGET_THUMB1_P (opts->x_target_flags))
3163 opts->x_flag_schedule_insns = 0;
3164 else
3165 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3166
3167 /* Disable shrink-wrap when optimizing function for size, since it tends to
3168 generate additional returns. */
3169 if (optimize_function_for_size_p (cfun)
3170 && TARGET_THUMB2_P (opts->x_target_flags))
3171 opts->x_flag_shrink_wrap = false;
3172 else
3173 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3174
3175 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3176 - epilogue_insns - does not accurately model the corresponding insns
3177 emitted in the asm file. In particular, see the comment in thumb_exit
3178 'Find out how many of the (return) argument registers we can corrupt'.
3179 As a consequence, the epilogue may clobber registers without fipa-ra
3180 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3181 TODO: Accurately model clobbers for epilogue_insns and reenable
3182 fipa-ra. */
3183 if (TARGET_THUMB1_P (opts->x_target_flags))
3184 opts->x_flag_ipa_ra = 0;
3185 else
3186 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3187
3188 /* Thumb2 inline assembly code should always use unified syntax.
3189 This will apply to ARM and Thumb1 eventually. */
3190 if (TARGET_THUMB2_P (opts->x_target_flags))
3191 opts->x_inline_asm_unified = true;
3192
3193 if (arm_stack_protector_guard == SSP_GLOBAL
3194 && opts->x_arm_stack_protector_guard_offset_str)
3195 {
3196 error ("incompatible options %<-mstack-protector-guard=global%> and "
3197 "%<-mstack-protector-guard-offset=%s%>",
3198 arm_stack_protector_guard_offset_str);
3199 }
3200
3201 if (opts->x_arm_stack_protector_guard_offset_str)
3202 {
3203 char *end;
3204 const char *str = arm_stack_protector_guard_offset_str;
3205 errno = 0;
3206 long offs = strtol (arm_stack_protector_guard_offset_str, &end, 0);
3207 if (!*str || *end || errno)
3208 error ("%qs is not a valid offset in %qs", str,
3209 "-mstack-protector-guard-offset=");
3210 arm_stack_protector_guard_offset = offs;
3211 }
3212
3213 if (arm_current_function_pac_enabled_p ())
3214 {
3215 if (!arm_arch8m_main)
3216 error ("This architecture does not support branch protection "
3217 "instructions");
3218 if (TARGET_TPCS_FRAME)
3219 sorry ("Return address signing is not supported with %<-mtpcs-frame%>.");
3220 }
3221
3222 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3223 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3224 #endif
3225 }
3226
3227 static sbitmap isa_all_fpubits_internal;
3228 static sbitmap isa_all_fpbits;
3229 static sbitmap isa_quirkbits;
3230
3231 /* Configure a build target TARGET from the user-specified options OPTS and
3232 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3233 architecture have been specified, but the two are not identical. */
3234 void
3235 arm_configure_build_target (struct arm_build_target *target,
3236 struct cl_target_option *opts,
3237 bool warn_compatible)
3238 {
3239 const cpu_option *arm_selected_tune = NULL;
3240 const arch_option *arm_selected_arch = NULL;
3241 const cpu_option *arm_selected_cpu = NULL;
3242 const arm_fpu_desc *arm_selected_fpu = NULL;
3243 const char *tune_opts = NULL;
3244 const char *arch_opts = NULL;
3245 const char *cpu_opts = NULL;
3246
3247 bitmap_clear (target->isa);
3248 target->core_name = NULL;
3249 target->arch_name = NULL;
3250
3251 if (opts->x_arm_arch_string)
3252 {
3253 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3254 "-march",
3255 opts->x_arm_arch_string);
3256 arch_opts = strchr (opts->x_arm_arch_string, '+');
3257 }
3258
3259 if (opts->x_arm_cpu_string)
3260 {
3261 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3262 opts->x_arm_cpu_string);
3263 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3264 arm_selected_tune = arm_selected_cpu;
3265 /* If taking the tuning from -mcpu, we don't need to rescan the
3266 options for tuning. */
3267 }
3268
3269 if (opts->x_arm_tune_string)
3270 {
3271 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3272 opts->x_arm_tune_string);
3273 tune_opts = strchr (opts->x_arm_tune_string, '+');
3274 }
3275
3276 if (opts->x_arm_branch_protection_string)
3277 {
3278 aarch_validate_mbranch_protection (opts->x_arm_branch_protection_string);
3279
3280 if (aarch_ra_sign_key != AARCH_KEY_A)
3281 {
3282 warning (0, "invalid key type for %<-mbranch-protection=%>");
3283 aarch_ra_sign_key = AARCH_KEY_A;
3284 }
3285 }
3286
3287 if (arm_selected_arch)
3288 {
3289 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3290 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3291 arch_opts);
3292
3293 if (arm_selected_cpu)
3294 {
3295 auto_sbitmap cpu_isa (isa_num_bits);
3296 auto_sbitmap isa_delta (isa_num_bits);
3297
3298 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3299 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3300 cpu_opts);
3301 bitmap_xor (isa_delta, cpu_isa, target->isa);
3302 /* Ignore any bits that are quirk bits. */
3303 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3304 /* If the user (or the default configuration) has specified a
3305 specific FPU, then ignore any bits that depend on the FPU
3306 configuration. Do similarly if using the soft-float
3307 ABI. */
3308 if (opts->x_arm_fpu_index != TARGET_FPU_auto
3309 || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3310 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3311
3312 if (!bitmap_empty_p (isa_delta))
3313 {
3314 if (warn_compatible)
3315 warning (0, "switch %<-mcpu=%s%> conflicts "
3316 "with switch %<-march=%s%>",
3317 opts->x_arm_cpu_string,
3318 opts->x_arm_arch_string);
3319
3320 /* -march wins for code generation.
3321 -mcpu wins for default tuning. */
3322 if (!arm_selected_tune)
3323 arm_selected_tune = arm_selected_cpu;
3324
3325 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3326 target->arch_name = arm_selected_arch->common.name;
3327 }
3328 else
3329 {
3330 /* Architecture and CPU are essentially the same.
3331 Prefer the CPU setting. */
3332 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3333 target->core_name = arm_selected_cpu->common.name;
3334 /* Copy the CPU's capabilities, so that we inherit the
3335 appropriate extensions and quirks. */
3336 bitmap_copy (target->isa, cpu_isa);
3337 }
3338 }
3339 else
3340 {
3341 /* Pick a CPU based on the architecture. */
3342 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3343 target->arch_name = arm_selected_arch->common.name;
3344 /* Note: target->core_name is left unset in this path. */
3345 }
3346 }
3347 else if (arm_selected_cpu)
3348 {
3349 target->core_name = arm_selected_cpu->common.name;
3350 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3351 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3352 cpu_opts);
3353 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3354 }
3355 /* If the user did not specify a processor or architecture, choose
3356 one for them. */
3357 else
3358 {
3359 const cpu_option *sel;
3360 auto_sbitmap sought_isa (isa_num_bits);
3361 bitmap_clear (sought_isa);
3362 auto_sbitmap default_isa (isa_num_bits);
3363
3364 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3365 TARGET_CPU_DEFAULT);
3366 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3367 gcc_assert (arm_selected_cpu->common.name);
3368
3369 /* RWE: All of the selection logic below (to the end of this
3370 'if' clause) looks somewhat suspect. It appears to be mostly
3371 there to support forcing thumb support when the default CPU
3372 does not have thumb (somewhat dubious in terms of what the
3373 user might be expecting). I think it should be removed once
3374 support for the pre-thumb era cores is removed. */
3375 sel = arm_selected_cpu;
3376 arm_initialize_isa (default_isa, sel->common.isa_bits);
3377 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3378 cpu_opts);
3379
3380 /* Now check to see if the user has specified any command line
3381 switches that require certain abilities from the cpu. */
3382
3383 if (TARGET_INTERWORK || TARGET_THUMB)
3384 bitmap_set_bit (sought_isa, isa_bit_thumb);
3385
3386 /* If there are such requirements and the default CPU does not
3387 satisfy them, we need to run over the complete list of
3388 cores looking for one that is satisfactory. */
3389 if (!bitmap_empty_p (sought_isa)
3390 && !bitmap_subset_p (sought_isa, default_isa))
3391 {
3392 auto_sbitmap candidate_isa (isa_num_bits);
3393 /* We're only interested in a CPU with at least the
3394 capabilities of the default CPU and the required
3395 additional features. */
3396 bitmap_ior (default_isa, default_isa, sought_isa);
3397
3398 /* Try to locate a CPU type that supports all of the abilities
3399 of the default CPU, plus the extra abilities requested by
3400 the user. */
3401 for (sel = all_cores; sel->common.name != NULL; sel++)
3402 {
3403 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3404 /* An exact match? */
3405 if (bitmap_equal_p (default_isa, candidate_isa))
3406 break;
3407 }
3408
3409 if (sel->common.name == NULL)
3410 {
3411 unsigned current_bit_count = isa_num_bits;
3412 const cpu_option *best_fit = NULL;
3413
3414 /* Ideally we would like to issue an error message here
3415 saying that it was not possible to find a CPU compatible
3416 with the default CPU, but which also supports the command
3417 line options specified by the programmer, and so they
3418 ought to use the -mcpu=<name> command line option to
3419 override the default CPU type.
3420
3421 If we cannot find a CPU that has exactly the
3422 characteristics of the default CPU and the given
3423 command line options we scan the array again looking
3424 for a best match. The best match must have at least
3425 the capabilities of the perfect match. */
3426 for (sel = all_cores; sel->common.name != NULL; sel++)
3427 {
3428 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3429
3430 if (bitmap_subset_p (default_isa, candidate_isa))
3431 {
3432 unsigned count;
3433
3434 bitmap_and_compl (candidate_isa, candidate_isa,
3435 default_isa);
3436 count = bitmap_popcount (candidate_isa);
3437
3438 if (count < current_bit_count)
3439 {
3440 best_fit = sel;
3441 current_bit_count = count;
3442 }
3443 }
3444
3445 gcc_assert (best_fit);
3446 sel = best_fit;
3447 }
3448 }
3449 arm_selected_cpu = sel;
3450 }
3451
3452 /* Now we know the CPU, we can finally initialize the target
3453 structure. */
3454 target->core_name = arm_selected_cpu->common.name;
3455 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3456 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3457 cpu_opts);
3458 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3459 }
3460
3461 gcc_assert (arm_selected_cpu);
3462 gcc_assert (arm_selected_arch);
3463
3464 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3465 {
3466 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3467 auto_sbitmap fpu_bits (isa_num_bits);
3468
3469 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3470 /* This should clear out ALL bits relating to the FPU/simd
3471 extensions, to avoid potentially invalid combinations later on
3472 that we can't match. At present we only clear out those bits
3473 that can be set by -mfpu. This should be fixed in GCC-12. */
3474 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3475 bitmap_ior (target->isa, target->isa, fpu_bits);
3476 }
3477
3478 /* If we have the soft-float ABI, clear any feature bits relating to use of
3479 floating-point operations. They'll just confuse things later on. */
3480 if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3481 bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3482
3483 /* There may be implied bits which we still need to enable. These are
3484 non-named features which are needed to complete other sets of features,
3485 but cannot be enabled from arm-cpus.in due to being shared between
3486 multiple fgroups. Each entry in all_implied_fbits is of the form
3487 ante -> cons, meaning that if the feature "ante" is enabled, we should
3488 implicitly enable "cons". */
3489 const struct fbit_implication *impl = all_implied_fbits;
3490 while (impl->ante)
3491 {
3492 if (bitmap_bit_p (target->isa, impl->ante))
3493 bitmap_set_bit (target->isa, impl->cons);
3494 impl++;
3495 }
3496
3497 if (!arm_selected_tune)
3498 arm_selected_tune = arm_selected_cpu;
3499 else /* Validate the features passed to -mtune. */
3500 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3501
3502 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3503
3504 /* Finish initializing the target structure. */
3505 if (!target->arch_name)
3506 target->arch_name = arm_selected_arch->common.name;
3507 target->arch_pp_name = arm_selected_arch->arch;
3508 target->base_arch = arm_selected_arch->base_arch;
3509 target->profile = arm_selected_arch->profile;
3510
3511 target->tune_flags = tune_data->tune_flags;
3512 target->tune = tune_data->tune;
3513 target->tune_core = tune_data->scheduler;
3514 }
3515
3516 /* Fix up any incompatible options that the user has specified. */
3517 static void
3518 arm_option_override (void)
3519 {
3520 static const enum isa_feature fpu_bitlist_internal[]
3521 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3522 /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main. */
3523 static const enum isa_feature fp_bitlist[]
3524 = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3525 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3526 cl_target_option opts;
3527
3528 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3529 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3530
3531 isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3532 isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3533 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3534 arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3535
3536 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3537
3538 if (!OPTION_SET_P (arm_fpu_index))
3539 {
3540 bool ok;
3541 int fpu_index;
3542
3543 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3544 CL_TARGET);
3545 gcc_assert (ok);
3546 arm_fpu_index = (enum fpu_type) fpu_index;
3547 }
3548
3549 cl_target_option_save (&opts, &global_options, &global_options_set);
3550 arm_configure_build_target (&arm_active_target, &opts, true);
3551
3552 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3553 SUBTARGET_OVERRIDE_OPTIONS;
3554 #endif
3555
3556 /* Initialize boolean versions of the architectural flags, for use
3557 in the arm.md file and for enabling feature flags. */
3558 arm_option_reconfigure_globals ();
3559
3560 arm_tune = arm_active_target.tune_core;
3561 tune_flags = arm_active_target.tune_flags;
3562 current_tune = arm_active_target.tune;
3563
3564 /* TBD: Dwarf info for apcs frame is not handled yet. */
3565 if (TARGET_APCS_FRAME)
3566 flag_shrink_wrap = false;
3567
3568 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3569 {
3570 warning (0, "%<-mapcs-stack-check%> incompatible with "
3571 "%<-mno-apcs-frame%>");
3572 target_flags |= MASK_APCS_FRAME;
3573 }
3574
3575 if (TARGET_POKE_FUNCTION_NAME)
3576 target_flags |= MASK_APCS_FRAME;
3577
3578 if (TARGET_APCS_REENT && flag_pic)
3579 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3580
3581 if (TARGET_APCS_REENT)
3582 warning (0, "APCS reentrant code not supported. Ignored");
3583
3584 /* Set up some tuning parameters. */
3585 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3586 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3587 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3588 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3589 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3590 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3591
3592 /* For arm2/3 there is no need to do any scheduling if we are doing
3593 software floating-point. */
3594 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3595 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3596
3597 /* Override the default structure alignment for AAPCS ABI. */
3598 if (!OPTION_SET_P (arm_structure_size_boundary))
3599 {
3600 if (TARGET_AAPCS_BASED)
3601 arm_structure_size_boundary = 8;
3602 }
3603 else
3604 {
3605 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3606
3607 if (arm_structure_size_boundary != 8
3608 && arm_structure_size_boundary != 32
3609 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3610 {
3611 if (ARM_DOUBLEWORD_ALIGN)
3612 warning (0,
3613 "structure size boundary can only be set to 8, 32 or 64");
3614 else
3615 warning (0, "structure size boundary can only be set to 8 or 32");
3616 arm_structure_size_boundary
3617 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3618 }
3619 }
3620
3621 if (TARGET_VXWORKS_RTP)
3622 {
3623 if (!OPTION_SET_P (arm_pic_data_is_text_relative))
3624 arm_pic_data_is_text_relative = 0;
3625 }
3626 else if (flag_pic
3627 && !arm_pic_data_is_text_relative
3628 && !(OPTION_SET_P (target_flags) & MASK_SINGLE_PIC_BASE))
3629 /* When text & data segments don't have a fixed displacement, the
3630 intended use is with a single, read only, pic base register.
3631 Unless the user explicitly requested not to do that, set
3632 it. */
3633 target_flags |= MASK_SINGLE_PIC_BASE;
3634
3635 /* If stack checking is disabled, we can use r10 as the PIC register,
3636 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3637 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3638 {
3639 if (TARGET_VXWORKS_RTP)
3640 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3641 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3642 }
3643
3644 if (flag_pic && TARGET_VXWORKS_RTP)
3645 arm_pic_register = 9;
3646
3647 /* If in FDPIC mode then force arm_pic_register to be r9. */
3648 if (TARGET_FDPIC)
3649 {
3650 arm_pic_register = FDPIC_REGNUM;
3651 if (TARGET_THUMB1)
3652 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3653 }
3654
3655 if (arm_pic_register_string != NULL)
3656 {
3657 int pic_register = decode_reg_name (arm_pic_register_string);
3658
3659 if (!flag_pic)
3660 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3661
3662 /* Prevent the user from choosing an obviously stupid PIC register. */
3663 else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3664 || pic_register == HARD_FRAME_POINTER_REGNUM
3665 || pic_register == STACK_POINTER_REGNUM
3666 || pic_register >= PC_REGNUM
3667 || (TARGET_VXWORKS_RTP
3668 && (unsigned int) pic_register != arm_pic_register))
3669 error ("unable to use %qs for PIC register", arm_pic_register_string);
3670 else
3671 arm_pic_register = pic_register;
3672 }
3673
3674 if (flag_pic)
3675 target_word_relocations = 1;
3676
3677 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3678 if (fix_cm3_ldrd == 2)
3679 {
3680 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3681 fix_cm3_ldrd = 1;
3682 else
3683 fix_cm3_ldrd = 0;
3684 }
3685
3686 /* Enable fix_vlldm by default if required. */
3687 if (fix_vlldm == 2)
3688 {
3689 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3690 fix_vlldm = 1;
3691 else
3692 fix_vlldm = 0;
3693 }
3694
3695 /* Enable fix_aes by default if required. */
3696 if (fix_aes_erratum_1742098 == 2)
3697 {
3698 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_aes_1742098))
3699 fix_aes_erratum_1742098 = 1;
3700 else
3701 fix_aes_erratum_1742098 = 0;
3702 }
3703
3704 /* Hot/Cold partitioning is not currently supported, since we can't
3705 handle literal pool placement in that case. */
3706 if (flag_reorder_blocks_and_partition)
3707 {
3708 inform (input_location,
3709 "%<-freorder-blocks-and-partition%> not supported "
3710 "on this architecture");
3711 flag_reorder_blocks_and_partition = 0;
3712 flag_reorder_blocks = 1;
3713 }
3714
3715 if (flag_pic)
3716 /* Hoisting PIC address calculations more aggressively provides a small,
3717 but measurable, size reduction for PIC code. Therefore, we decrease
3718 the bar for unrestricted expression hoisting to the cost of PIC address
3719 calculation, which is 2 instructions. */
3720 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3721 param_gcse_unrestricted_cost, 2);
3722
3723 /* ARM EABI defaults to strict volatile bitfields. */
3724 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3725 && abi_version_at_least(2))
3726 flag_strict_volatile_bitfields = 1;
3727
3728 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3729 have deemed it beneficial (signified by setting
3730 prefetch.num_slots to 1 or more). */
3731 if (flag_prefetch_loop_arrays < 0
3732 && HAVE_prefetch
3733 && optimize >= 3
3734 && current_tune->prefetch.num_slots > 0)
3735 flag_prefetch_loop_arrays = 1;
3736
3737 /* Set up parameters to be used in prefetching algorithm. Do not
3738 override the defaults unless we are tuning for a core we have
3739 researched values for. */
3740 if (current_tune->prefetch.num_slots > 0)
3741 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3742 param_simultaneous_prefetches,
3743 current_tune->prefetch.num_slots);
3744 if (current_tune->prefetch.l1_cache_line_size >= 0)
3745 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3746 param_l1_cache_line_size,
3747 current_tune->prefetch.l1_cache_line_size);
3748 if (current_tune->prefetch.l1_cache_line_size >= 0)
3749 {
3750 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3751 param_destruct_interfere_size,
3752 current_tune->prefetch.l1_cache_line_size);
3753 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3754 param_construct_interfere_size,
3755 current_tune->prefetch.l1_cache_line_size);
3756 }
3757 else
3758 {
3759 /* For a generic ARM target, JF Bastien proposed using 64 for both. */
3760 /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3761 constructive? */
3762 /* More recent Cortex chips have a 64-byte cache line, but are marked
3763 ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults. */
3764 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3765 param_destruct_interfere_size, 64);
3766 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3767 param_construct_interfere_size, 64);
3768 }
3769
3770 if (current_tune->prefetch.l1_cache_size >= 0)
3771 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3772 param_l1_cache_size,
3773 current_tune->prefetch.l1_cache_size);
3774
3775 /* Look through ready list and all of queue for instructions
3776 relevant for L2 auto-prefetcher. */
3777 int sched_autopref_queue_depth;
3778
3779 switch (current_tune->sched_autopref)
3780 {
3781 case tune_params::SCHED_AUTOPREF_OFF:
3782 sched_autopref_queue_depth = -1;
3783 break;
3784
3785 case tune_params::SCHED_AUTOPREF_RANK:
3786 sched_autopref_queue_depth = 0;
3787 break;
3788
3789 case tune_params::SCHED_AUTOPREF_FULL:
3790 sched_autopref_queue_depth = max_insn_queue_index + 1;
3791 break;
3792
3793 default:
3794 gcc_unreachable ();
3795 }
3796
3797 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3798 param_sched_autopref_queue_depth,
3799 sched_autopref_queue_depth);
3800
3801 /* Currently, for slow flash data, we just disable literal pools. We also
3802 disable it for pure-code. */
3803 if (target_slow_flash_data || target_pure_code)
3804 arm_disable_literal_pool = true;
3805
3806 /* Disable scheduling fusion by default if it's not armv7 processor
3807 or doesn't prefer ldrd/strd. */
3808 if (flag_schedule_fusion == 2
3809 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3810 flag_schedule_fusion = 0;
3811
3812 /* Need to remember initial options before they are overriden. */
3813 init_optimize = build_optimization_node (&global_options,
3814 &global_options_set);
3815
3816 arm_options_perform_arch_sanity_checks ();
3817 arm_option_override_internal (&global_options, &global_options_set);
3818 arm_option_check_internal (&global_options);
3819 arm_option_params_internal ();
3820
3821 /* Create the default target_options structure. */
3822 target_option_default_node = target_option_current_node
3823 = build_target_option_node (&global_options, &global_options_set);
3824
3825 /* Register global variables with the garbage collector. */
3826 arm_add_gc_roots ();
3827
3828 /* Init initial mode for testing. */
3829 thumb_flipper = TARGET_THUMB;
3830 }
3831
3832
3833 /* Reconfigure global status flags from the active_target.isa. */
3834 void
3835 arm_option_reconfigure_globals (void)
3836 {
3837 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3838 arm_base_arch = arm_active_target.base_arch;
3839
3840 /* Initialize boolean versions of the architectural flags, for use
3841 in the arm.md file. */
3842 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3843 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3844 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3845 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3846 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3847 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3848 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3849 arm_arch6m = arm_arch6 && !arm_arch_notm;
3850 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3851 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3852 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3853 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3854 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3855 arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3856 arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3857 arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3858 isa_bit_armv8_1m_main);
3859 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3860 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3861 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3862 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3863 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3864 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3865 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3866 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3867 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3868 arm_arch8m_main = arm_arch7 && arm_arch_cmse;
3869 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3870 arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3871 arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3872
3873 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3874 if (arm_fp16_inst)
3875 {
3876 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3877 error ("selected fp16 options are incompatible");
3878 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3879 }
3880
3881 arm_arch_cde = 0;
3882 arm_arch_cde_coproc = 0;
3883 int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3884 isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3885 isa_bit_cdecp6, isa_bit_cdecp7};
3886 for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3887 {
3888 int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3889 if (cde_bit)
3890 {
3891 arm_arch_cde |= cde_bit;
3892 arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3893 }
3894 }
3895
3896 /* And finally, set up some quirks. */
3897 arm_arch_no_volatile_ce
3898 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3899 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3900 isa_bit_quirk_armv6kz);
3901
3902 /* Use the cp15 method if it is available. */
3903 if (target_thread_pointer == TP_AUTO)
3904 {
3905 if (arm_arch6k && !TARGET_THUMB1)
3906 target_thread_pointer = TP_CP15;
3907 else
3908 target_thread_pointer = TP_SOFT;
3909 }
3910
3911 if (!TARGET_HARD_TP && arm_stack_protector_guard == SSP_TLSREG)
3912 error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3913 }
3914
3915 /* Perform some validation between the desired architecture and the rest of the
3916 options. */
3917 void
3918 arm_options_perform_arch_sanity_checks (void)
3919 {
3920 /* V5T code we generate is completely interworking capable, so we turn off
3921 TARGET_INTERWORK here to avoid many tests later on. */
3922
3923 /* XXX However, we must pass the right pre-processor defines to CPP
3924 or GLD can get confused. This is a hack. */
3925 if (TARGET_INTERWORK)
3926 arm_cpp_interwork = 1;
3927
3928 if (arm_arch5t)
3929 target_flags &= ~MASK_INTERWORK;
3930
3931 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3932 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3933
3934 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3935 error ("iwmmxt abi requires an iwmmxt capable cpu");
3936
3937 /* BPABI targets use linker tricks to allow interworking on cores
3938 without thumb support. */
3939 if (TARGET_INTERWORK
3940 && !TARGET_BPABI
3941 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3942 {
3943 warning (0, "target CPU does not support interworking" );
3944 target_flags &= ~MASK_INTERWORK;
3945 }
3946
3947 /* If soft-float is specified then don't use FPU. */
3948 if (TARGET_SOFT_FLOAT)
3949 arm_fpu_attr = FPU_NONE;
3950 else
3951 arm_fpu_attr = FPU_VFP;
3952
3953 if (TARGET_AAPCS_BASED)
3954 {
3955 if (TARGET_CALLER_INTERWORKING)
3956 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3957 else
3958 if (TARGET_CALLEE_INTERWORKING)
3959 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3960 }
3961
3962 /* __fp16 support currently assumes the core has ldrh. */
3963 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3964 sorry ("%<__fp16%> and no ldrh");
3965
3966 if (use_cmse && !arm_arch_cmse)
3967 error ("target CPU does not support ARMv8-M Security Extensions");
3968
3969 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3970 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3971 if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3972 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3973
3974
3975 if (TARGET_AAPCS_BASED)
3976 {
3977 if (arm_abi == ARM_ABI_IWMMXT)
3978 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3979 else if (TARGET_HARD_FLOAT_ABI)
3980 {
3981 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3982 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
3983 && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
3984 error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
3985 }
3986 else
3987 arm_pcs_default = ARM_PCS_AAPCS;
3988 }
3989 else
3990 {
3991 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3992 sorry ("%<-mfloat-abi=hard%> and VFP");
3993
3994 if (arm_abi == ARM_ABI_APCS)
3995 arm_pcs_default = ARM_PCS_APCS;
3996 else
3997 arm_pcs_default = ARM_PCS_ATPCS;
3998 }
3999 }
4000
4001 /* Test whether a local function descriptor is canonical, i.e.,
4002 whether we can use GOTOFFFUNCDESC to compute the address of the
4003 function. */
4004 static bool
4005 arm_fdpic_local_funcdesc_p (rtx fnx)
4006 {
4007 tree fn;
4008 enum symbol_visibility vis;
4009 bool ret;
4010
4011 if (!TARGET_FDPIC)
4012 return true;
4013
4014 if (! SYMBOL_REF_LOCAL_P (fnx))
4015 return false;
4016
4017 fn = SYMBOL_REF_DECL (fnx);
4018
4019 if (! fn)
4020 return false;
4021
4022 vis = DECL_VISIBILITY (fn);
4023
4024 if (vis == VISIBILITY_PROTECTED)
4025 /* Private function descriptors for protected functions are not
4026 canonical. Temporarily change the visibility to global so that
4027 we can ensure uniqueness of funcdesc pointers. */
4028 DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
4029
4030 ret = default_binds_local_p_1 (fn, flag_pic);
4031
4032 DECL_VISIBILITY (fn) = vis;
4033
4034 return ret;
4035 }
4036
4037 static void
4038 arm_add_gc_roots (void)
4039 {
4040 gcc_obstack_init(&minipool_obstack);
4041 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
4042 }
4043 \f
4044 /* A table of known ARM exception types.
4045 For use with the interrupt function attribute. */
4046
4047 typedef struct
4048 {
4049 const char *const arg;
4050 const unsigned long return_value;
4051 }
4052 isr_attribute_arg;
4053
4054 static const isr_attribute_arg isr_attribute_args [] =
4055 {
4056 { "IRQ", ARM_FT_ISR },
4057 { "irq", ARM_FT_ISR },
4058 { "FIQ", ARM_FT_FIQ },
4059 { "fiq", ARM_FT_FIQ },
4060 { "ABORT", ARM_FT_ISR },
4061 { "abort", ARM_FT_ISR },
4062 { "UNDEF", ARM_FT_EXCEPTION },
4063 { "undef", ARM_FT_EXCEPTION },
4064 { "SWI", ARM_FT_EXCEPTION },
4065 { "swi", ARM_FT_EXCEPTION },
4066 { NULL, ARM_FT_NORMAL }
4067 };
4068
4069 /* Returns the (interrupt) function type of the current
4070 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
4071
4072 static unsigned long
4073 arm_isr_value (tree argument)
4074 {
4075 const isr_attribute_arg * ptr;
4076 const char * arg;
4077
4078 if (!arm_arch_notm)
4079 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
4080
4081 /* No argument - default to IRQ. */
4082 if (argument == NULL_TREE)
4083 return ARM_FT_ISR;
4084
4085 /* Get the value of the argument. */
4086 if (TREE_VALUE (argument) == NULL_TREE
4087 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
4088 return ARM_FT_UNKNOWN;
4089
4090 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
4091
4092 /* Check it against the list of known arguments. */
4093 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
4094 if (streq (arg, ptr->arg))
4095 return ptr->return_value;
4096
4097 /* An unrecognized interrupt type. */
4098 return ARM_FT_UNKNOWN;
4099 }
4100
4101 /* Computes the type of the current function. */
4102
4103 static unsigned long
4104 arm_compute_func_type (void)
4105 {
4106 unsigned long type = ARM_FT_UNKNOWN;
4107 tree a;
4108 tree attr;
4109
4110 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4111
4112 /* Decide if the current function is volatile. Such functions
4113 never return, and many memory cycles can be saved by not storing
4114 register values that will never be needed again. This optimization
4115 was added to speed up context switching in a kernel application. */
4116 if (optimize > 0
4117 && (TREE_NOTHROW (current_function_decl)
4118 || !(flag_unwind_tables
4119 || (flag_exceptions
4120 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4121 && TREE_THIS_VOLATILE (current_function_decl))
4122 type |= ARM_FT_VOLATILE;
4123
4124 if (cfun->static_chain_decl != NULL)
4125 type |= ARM_FT_NESTED;
4126
4127 attr = DECL_ATTRIBUTES (current_function_decl);
4128
4129 a = lookup_attribute ("naked", attr);
4130 if (a != NULL_TREE)
4131 type |= ARM_FT_NAKED;
4132
4133 a = lookup_attribute ("isr", attr);
4134 if (a == NULL_TREE)
4135 a = lookup_attribute ("interrupt", attr);
4136
4137 if (a == NULL_TREE)
4138 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4139 else
4140 type |= arm_isr_value (TREE_VALUE (a));
4141
4142 if (lookup_attribute ("cmse_nonsecure_entry", attr))
4143 type |= ARM_FT_CMSE_ENTRY;
4144
4145 return type;
4146 }
4147
4148 /* Returns the type of the current function. */
4149
4150 unsigned long
4151 arm_current_func_type (void)
4152 {
4153 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4154 cfun->machine->func_type = arm_compute_func_type ();
4155
4156 return cfun->machine->func_type;
4157 }
4158
4159 bool
4160 arm_allocate_stack_slots_for_args (void)
4161 {
4162 /* Naked functions should not allocate stack slots for arguments. */
4163 return !IS_NAKED (arm_current_func_type ());
4164 }
4165
4166 static bool
4167 arm_warn_func_return (tree decl)
4168 {
4169 /* Naked functions are implemented entirely in assembly, including the
4170 return sequence, so suppress warnings about this. */
4171 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4172 }
4173
4174 \f
4175 /* Output assembler code for a block containing the constant parts
4176 of a trampoline, leaving space for the variable parts.
4177
4178 On the ARM, (if r8 is the static chain regnum, and remembering that
4179 referencing pc adds an offset of 8) the trampoline looks like:
4180 ldr r8, [pc, #0]
4181 ldr pc, [pc]
4182 .word static chain value
4183 .word function's address
4184 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4185
4186 In FDPIC mode, the trampoline looks like:
4187 .word trampoline address
4188 .word trampoline GOT address
4189 ldr r12, [pc, #8] ; #4 for Arm mode
4190 ldr r9, [pc, #8] ; #4 for Arm mode
4191 ldr pc, [pc, #8] ; #4 for Arm mode
4192 .word static chain value
4193 .word GOT address
4194 .word function's address
4195 */
4196
4197 static void
4198 arm_asm_trampoline_template (FILE *f)
4199 {
4200 fprintf (f, "\t.syntax unified\n");
4201
4202 if (TARGET_FDPIC)
4203 {
4204 /* The first two words are a function descriptor pointing to the
4205 trampoline code just below. */
4206 if (TARGET_ARM)
4207 fprintf (f, "\t.arm\n");
4208 else if (TARGET_THUMB2)
4209 fprintf (f, "\t.thumb\n");
4210 else
4211 /* Only ARM and Thumb-2 are supported. */
4212 gcc_unreachable ();
4213
4214 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4215 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4216 /* Trampoline code which sets the static chain register but also
4217 PIC register before jumping into real code. */
4218 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4219 STATIC_CHAIN_REGNUM, PC_REGNUM,
4220 TARGET_THUMB2 ? 8 : 4);
4221 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4222 PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4223 TARGET_THUMB2 ? 8 : 4);
4224 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4225 PC_REGNUM, PC_REGNUM,
4226 TARGET_THUMB2 ? 8 : 4);
4227 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4228 }
4229 else if (TARGET_ARM)
4230 {
4231 fprintf (f, "\t.arm\n");
4232 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4233 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4234 }
4235 else if (TARGET_THUMB2)
4236 {
4237 fprintf (f, "\t.thumb\n");
4238 /* The Thumb-2 trampoline is similar to the arm implementation.
4239 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4240 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4241 STATIC_CHAIN_REGNUM, PC_REGNUM);
4242 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4243 }
4244 else
4245 {
4246 ASM_OUTPUT_ALIGN (f, 2);
4247 fprintf (f, "\t.code\t16\n");
4248 fprintf (f, ".Ltrampoline_start:\n");
4249 asm_fprintf (f, "\tpush\t{r0, r1}\n");
4250 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4251 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4252 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4253 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4254 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4255 }
4256 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4257 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4258 }
4259
4260 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4261
4262 static void
4263 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4264 {
4265 rtx fnaddr, mem, a_tramp;
4266
4267 emit_block_move (m_tramp, assemble_trampoline_template (),
4268 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4269
4270 if (TARGET_FDPIC)
4271 {
4272 rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4273 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4274 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4275 /* The function start address is at offset 8, but in Thumb mode
4276 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4277 below. */
4278 rtx trampoline_code_start
4279 = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4280
4281 /* Write initial funcdesc which points to the trampoline. */
4282 mem = adjust_address (m_tramp, SImode, 0);
4283 emit_move_insn (mem, trampoline_code_start);
4284 mem = adjust_address (m_tramp, SImode, 4);
4285 emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4286 /* Setup static chain. */
4287 mem = adjust_address (m_tramp, SImode, 20);
4288 emit_move_insn (mem, chain_value);
4289 /* GOT + real function entry point. */
4290 mem = adjust_address (m_tramp, SImode, 24);
4291 emit_move_insn (mem, gotaddr);
4292 mem = adjust_address (m_tramp, SImode, 28);
4293 emit_move_insn (mem, fnaddr);
4294 }
4295 else
4296 {
4297 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4298 emit_move_insn (mem, chain_value);
4299
4300 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4301 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4302 emit_move_insn (mem, fnaddr);
4303 }
4304
4305 a_tramp = XEXP (m_tramp, 0);
4306 maybe_emit_call_builtin___clear_cache (a_tramp,
4307 plus_constant (ptr_mode,
4308 a_tramp,
4309 TRAMPOLINE_SIZE));
4310 }
4311
4312 /* Thumb trampolines should be entered in thumb mode, so set
4313 the bottom bit of the address. */
4314
4315 static rtx
4316 arm_trampoline_adjust_address (rtx addr)
4317 {
4318 /* For FDPIC don't fix trampoline address since it's a function
4319 descriptor and not a function address. */
4320 if (TARGET_THUMB && !TARGET_FDPIC)
4321 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4322 NULL, 0, OPTAB_LIB_WIDEN);
4323 return addr;
4324 }
4325 \f
4326 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4327 includes call-clobbered registers too. If this is a leaf function
4328 we can just examine the registers used by the RTL, but otherwise we
4329 have to assume that whatever function is called might clobber
4330 anything, and so we have to save all the call-clobbered registers
4331 as well. */
4332 static inline bool reg_needs_saving_p (unsigned reg)
4333 {
4334 unsigned long func_type = arm_current_func_type ();
4335
4336 if (IS_INTERRUPT (func_type))
4337 if (df_regs_ever_live_p (reg)
4338 /* Save call-clobbered core registers. */
4339 || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4340 return true;
4341 else
4342 return false;
4343 else
4344 if (!df_regs_ever_live_p (reg)
4345 || call_used_or_fixed_reg_p (reg))
4346 return false;
4347 else
4348 return true;
4349 }
4350
4351 /* Return 1 if it is possible to return using a single instruction.
4352 If SIBLING is non-null, this is a test for a return before a sibling
4353 call. SIBLING is the call insn, so we can examine its register usage. */
4354
4355 int
4356 use_return_insn (int iscond, rtx sibling)
4357 {
4358 int regno;
4359 unsigned int func_type;
4360 unsigned long saved_int_regs;
4361 unsigned HOST_WIDE_INT stack_adjust;
4362 arm_stack_offsets *offsets;
4363
4364 /* Never use a return instruction before reload has run. */
4365 if (!reload_completed)
4366 return 0;
4367
4368 /* Never use a return instruction when return address signing
4369 mechanism is enabled as it requires more than one
4370 instruction. */
4371 if (arm_current_function_pac_enabled_p ())
4372 return 0;
4373
4374 func_type = arm_current_func_type ();
4375
4376 /* Naked, volatile and stack alignment functions need special
4377 consideration. */
4378 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4379 return 0;
4380
4381 /* So do interrupt functions that use the frame pointer and Thumb
4382 interrupt functions. */
4383 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4384 return 0;
4385
4386 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4387 && !optimize_function_for_size_p (cfun))
4388 return 0;
4389
4390 offsets = arm_get_frame_offsets ();
4391 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4392
4393 /* As do variadic functions. */
4394 if (crtl->args.pretend_args_size
4395 || cfun->machine->uses_anonymous_args
4396 /* Or if the function calls __builtin_eh_return () */
4397 || crtl->calls_eh_return
4398 /* Or if the function calls alloca */
4399 || cfun->calls_alloca
4400 /* Or if there is a stack adjustment. However, if the stack pointer
4401 is saved on the stack, we can use a pre-incrementing stack load. */
4402 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4403 && stack_adjust == 4))
4404 /* Or if the static chain register was saved above the frame, under the
4405 assumption that the stack pointer isn't saved on the stack. */
4406 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4407 && arm_compute_static_chain_stack_bytes() != 0))
4408 return 0;
4409
4410 saved_int_regs = offsets->saved_regs_mask;
4411
4412 /* Unfortunately, the insn
4413
4414 ldmib sp, {..., sp, ...}
4415
4416 triggers a bug on most SA-110 based devices, such that the stack
4417 pointer won't be correctly restored if the instruction takes a
4418 page fault. We work around this problem by popping r3 along with
4419 the other registers, since that is never slower than executing
4420 another instruction.
4421
4422 We test for !arm_arch5t here, because code for any architecture
4423 less than this could potentially be run on one of the buggy
4424 chips. */
4425 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4426 {
4427 /* Validate that r3 is a call-clobbered register (always true in
4428 the default abi) ... */
4429 if (!call_used_or_fixed_reg_p (3))
4430 return 0;
4431
4432 /* ... that it isn't being used for a return value ... */
4433 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4434 return 0;
4435
4436 /* ... or for a tail-call argument ... */
4437 if (sibling)
4438 {
4439 gcc_assert (CALL_P (sibling));
4440
4441 if (find_regno_fusage (sibling, USE, 3))
4442 return 0;
4443 }
4444
4445 /* ... and that there are no call-saved registers in r0-r2
4446 (always true in the default ABI). */
4447 if (saved_int_regs & 0x7)
4448 return 0;
4449 }
4450
4451 /* Can't be done if interworking with Thumb, and any registers have been
4452 stacked. */
4453 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4454 return 0;
4455
4456 /* On StrongARM, conditional returns are expensive if they aren't
4457 taken and multiple registers have been stacked. */
4458 if (iscond && arm_tune_strongarm)
4459 {
4460 /* Conditional return when just the LR is stored is a simple
4461 conditional-load instruction, that's not expensive. */
4462 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4463 return 0;
4464
4465 if (flag_pic
4466 && arm_pic_register != INVALID_REGNUM
4467 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4468 return 0;
4469 }
4470
4471 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4472 several instructions if anything needs to be popped. Armv8.1-M Mainline
4473 also needs several instructions to save and restore FP context. */
4474 if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4475 return 0;
4476
4477 /* If there are saved registers but the LR isn't saved, then we need
4478 two instructions for the return. */
4479 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4480 return 0;
4481
4482 /* Can't be done if any of the VFP regs are pushed,
4483 since this also requires an insn. */
4484 if (TARGET_VFP_BASE)
4485 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4486 if (reg_needs_saving_p (regno))
4487 return 0;
4488
4489 if (TARGET_REALLY_IWMMXT)
4490 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4491 if (reg_needs_saving_p (regno))
4492 return 0;
4493
4494 return 1;
4495 }
4496
4497 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4498 shrink-wrapping if possible. This is the case if we need to emit a
4499 prologue, which we can test by looking at the offsets. */
4500 bool
4501 use_simple_return_p (void)
4502 {
4503 arm_stack_offsets *offsets;
4504
4505 /* Note this function can be called before or after reload. */
4506 if (!reload_completed)
4507 arm_compute_frame_layout ();
4508
4509 offsets = arm_get_frame_offsets ();
4510 return offsets->outgoing_args != 0;
4511 }
4512
4513 /* Return TRUE if int I is a valid immediate ARM constant. */
4514
4515 int
4516 const_ok_for_arm (HOST_WIDE_INT i)
4517 {
4518 int lowbit;
4519
4520 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4521 be all zero, or all one. */
4522 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4523 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4524 != ((~(unsigned HOST_WIDE_INT) 0)
4525 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4526 return FALSE;
4527
4528 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4529
4530 /* Fast return for 0 and small values. We must do this for zero, since
4531 the code below can't handle that one case. */
4532 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4533 return TRUE;
4534
4535 /* Get the number of trailing zeros. */
4536 lowbit = ffs((int) i) - 1;
4537
4538 /* Only even shifts are allowed in ARM mode so round down to the
4539 nearest even number. */
4540 if (TARGET_ARM)
4541 lowbit &= ~1;
4542
4543 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4544 return TRUE;
4545
4546 if (TARGET_ARM)
4547 {
4548 /* Allow rotated constants in ARM mode. */
4549 if (lowbit <= 4
4550 && ((i & ~0xc000003f) == 0
4551 || (i & ~0xf000000f) == 0
4552 || (i & ~0xfc000003) == 0))
4553 return TRUE;
4554 }
4555 else if (TARGET_THUMB2)
4556 {
4557 HOST_WIDE_INT v;
4558
4559 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4560 v = i & 0xff;
4561 v |= v << 16;
4562 if (i == v || i == (v | (v << 8)))
4563 return TRUE;
4564
4565 /* Allow repeated pattern 0xXY00XY00. */
4566 v = i & 0xff00;
4567 v |= v << 16;
4568 if (i == v)
4569 return TRUE;
4570 }
4571 else if (TARGET_HAVE_MOVT)
4572 {
4573 /* Thumb-1 Targets with MOVT. */
4574 if (i > 0xffff)
4575 return FALSE;
4576 else
4577 return TRUE;
4578 }
4579
4580 return FALSE;
4581 }
4582
4583 /* Return true if I is a valid constant for the operation CODE. */
4584 int
4585 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4586 {
4587 if (const_ok_for_arm (i))
4588 return 1;
4589
4590 switch (code)
4591 {
4592 case SET:
4593 /* See if we can use movw. */
4594 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4595 return 1;
4596 else
4597 /* Otherwise, try mvn. */
4598 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4599
4600 case PLUS:
4601 /* See if we can use addw or subw. */
4602 if (TARGET_THUMB2
4603 && ((i & 0xfffff000) == 0
4604 || ((-i) & 0xfffff000) == 0))
4605 return 1;
4606 /* Fall through. */
4607 case COMPARE:
4608 case EQ:
4609 case NE:
4610 case GT:
4611 case LE:
4612 case LT:
4613 case GE:
4614 case GEU:
4615 case LTU:
4616 case GTU:
4617 case LEU:
4618 case UNORDERED:
4619 case ORDERED:
4620 case UNEQ:
4621 case UNGE:
4622 case UNLT:
4623 case UNGT:
4624 case UNLE:
4625 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4626
4627 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4628 case XOR:
4629 return 0;
4630
4631 case IOR:
4632 if (TARGET_THUMB2)
4633 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4634 return 0;
4635
4636 case AND:
4637 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4638
4639 default:
4640 gcc_unreachable ();
4641 }
4642 }
4643
4644 /* Return true if I is a valid di mode constant for the operation CODE. */
4645 int
4646 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4647 {
4648 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4649 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4650 rtx hi = GEN_INT (hi_val);
4651 rtx lo = GEN_INT (lo_val);
4652
4653 if (TARGET_THUMB1)
4654 return 0;
4655
4656 switch (code)
4657 {
4658 case AND:
4659 case IOR:
4660 case XOR:
4661 return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4662 || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4663 case PLUS:
4664 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4665
4666 default:
4667 return 0;
4668 }
4669 }
4670
4671 /* Emit a sequence of insns to handle a large constant.
4672 CODE is the code of the operation required, it can be any of SET, PLUS,
4673 IOR, AND, XOR, MINUS;
4674 MODE is the mode in which the operation is being performed;
4675 VAL is the integer to operate on;
4676 SOURCE is the other operand (a register, or a null-pointer for SET);
4677 SUBTARGETS means it is safe to create scratch registers if that will
4678 either produce a simpler sequence, or we will want to cse the values.
4679 Return value is the number of insns emitted. */
4680
4681 /* ??? Tweak this for thumb2. */
4682 int
4683 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4684 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4685 {
4686 rtx cond;
4687
4688 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4689 cond = COND_EXEC_TEST (PATTERN (insn));
4690 else
4691 cond = NULL_RTX;
4692
4693 if (subtargets || code == SET
4694 || (REG_P (target) && REG_P (source)
4695 && REGNO (target) != REGNO (source)))
4696 {
4697 /* After arm_reorg has been called, we can't fix up expensive
4698 constants by pushing them into memory so we must synthesize
4699 them in-line, regardless of the cost. This is only likely to
4700 be more costly on chips that have load delay slots and we are
4701 compiling without running the scheduler (so no splitting
4702 occurred before the final instruction emission).
4703
4704 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4705 */
4706 if (!cfun->machine->after_arm_reorg
4707 && !cond
4708 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4709 1, 0)
4710 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4711 + (code != SET))))
4712 {
4713 if (code == SET)
4714 {
4715 /* Currently SET is the only monadic value for CODE, all
4716 the rest are diadic. */
4717 if (TARGET_USE_MOVT)
4718 arm_emit_movpair (target, GEN_INT (val));
4719 else
4720 emit_set_insn (target, GEN_INT (val));
4721
4722 return 1;
4723 }
4724 else
4725 {
4726 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4727
4728 if (TARGET_USE_MOVT)
4729 arm_emit_movpair (temp, GEN_INT (val));
4730 else
4731 emit_set_insn (temp, GEN_INT (val));
4732
4733 /* For MINUS, the value is subtracted from, since we never
4734 have subtraction of a constant. */
4735 if (code == MINUS)
4736 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4737 else
4738 emit_set_insn (target,
4739 gen_rtx_fmt_ee (code, mode, source, temp));
4740 return 2;
4741 }
4742 }
4743 }
4744
4745 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4746 1);
4747 }
4748
4749 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4750 ARM/THUMB2 immediates, and add up to VAL.
4751 Thr function return value gives the number of insns required. */
4752 static int
4753 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4754 struct four_ints *return_sequence)
4755 {
4756 int best_consecutive_zeros = 0;
4757 int i;
4758 int best_start = 0;
4759 int insns1, insns2;
4760 struct four_ints tmp_sequence;
4761
4762 /* If we aren't targeting ARM, the best place to start is always at
4763 the bottom, otherwise look more closely. */
4764 if (TARGET_ARM)
4765 {
4766 for (i = 0; i < 32; i += 2)
4767 {
4768 int consecutive_zeros = 0;
4769
4770 if (!(val & (3 << i)))
4771 {
4772 while ((i < 32) && !(val & (3 << i)))
4773 {
4774 consecutive_zeros += 2;
4775 i += 2;
4776 }
4777 if (consecutive_zeros > best_consecutive_zeros)
4778 {
4779 best_consecutive_zeros = consecutive_zeros;
4780 best_start = i - consecutive_zeros;
4781 }
4782 i -= 2;
4783 }
4784 }
4785 }
4786
4787 /* So long as it won't require any more insns to do so, it's
4788 desirable to emit a small constant (in bits 0...9) in the last
4789 insn. This way there is more chance that it can be combined with
4790 a later addressing insn to form a pre-indexed load or store
4791 operation. Consider:
4792
4793 *((volatile int *)0xe0000100) = 1;
4794 *((volatile int *)0xe0000110) = 2;
4795
4796 We want this to wind up as:
4797
4798 mov rA, #0xe0000000
4799 mov rB, #1
4800 str rB, [rA, #0x100]
4801 mov rB, #2
4802 str rB, [rA, #0x110]
4803
4804 rather than having to synthesize both large constants from scratch.
4805
4806 Therefore, we calculate how many insns would be required to emit
4807 the constant starting from `best_start', and also starting from
4808 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4809 yield a shorter sequence, we may as well use zero. */
4810 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4811 if (best_start != 0
4812 && ((HOST_WIDE_INT_1U << best_start) < val))
4813 {
4814 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4815 if (insns2 <= insns1)
4816 {
4817 *return_sequence = tmp_sequence;
4818 insns1 = insns2;
4819 }
4820 }
4821
4822 return insns1;
4823 }
4824
4825 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4826 static int
4827 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4828 struct four_ints *return_sequence, int i)
4829 {
4830 int remainder = val & 0xffffffff;
4831 int insns = 0;
4832
4833 /* Try and find a way of doing the job in either two or three
4834 instructions.
4835
4836 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4837 location. We start at position I. This may be the MSB, or
4838 optimial_immediate_sequence may have positioned it at the largest block
4839 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4840 wrapping around to the top of the word when we drop off the bottom.
4841 In the worst case this code should produce no more than four insns.
4842
4843 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4844 constants, shifted to any arbitrary location. We should always start
4845 at the MSB. */
4846 do
4847 {
4848 int end;
4849 unsigned int b1, b2, b3, b4;
4850 unsigned HOST_WIDE_INT result;
4851 int loc;
4852
4853 gcc_assert (insns < 4);
4854
4855 if (i <= 0)
4856 i += 32;
4857
4858 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4859 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4860 {
4861 loc = i;
4862 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4863 /* We can use addw/subw for the last 12 bits. */
4864 result = remainder;
4865 else
4866 {
4867 /* Use an 8-bit shifted/rotated immediate. */
4868 end = i - 8;
4869 if (end < 0)
4870 end += 32;
4871 result = remainder & ((0x0ff << end)
4872 | ((i < end) ? (0xff >> (32 - end))
4873 : 0));
4874 i -= 8;
4875 }
4876 }
4877 else
4878 {
4879 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4880 arbitrary shifts. */
4881 i -= TARGET_ARM ? 2 : 1;
4882 continue;
4883 }
4884
4885 /* Next, see if we can do a better job with a thumb2 replicated
4886 constant.
4887
4888 We do it this way around to catch the cases like 0x01F001E0 where
4889 two 8-bit immediates would work, but a replicated constant would
4890 make it worse.
4891
4892 TODO: 16-bit constants that don't clear all the bits, but still win.
4893 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4894 if (TARGET_THUMB2)
4895 {
4896 b1 = (remainder & 0xff000000) >> 24;
4897 b2 = (remainder & 0x00ff0000) >> 16;
4898 b3 = (remainder & 0x0000ff00) >> 8;
4899 b4 = remainder & 0xff;
4900
4901 if (loc > 24)
4902 {
4903 /* The 8-bit immediate already found clears b1 (and maybe b2),
4904 but must leave b3 and b4 alone. */
4905
4906 /* First try to find a 32-bit replicated constant that clears
4907 almost everything. We can assume that we can't do it in one,
4908 or else we wouldn't be here. */
4909 unsigned int tmp = b1 & b2 & b3 & b4;
4910 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4911 + (tmp << 24);
4912 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4913 + (tmp == b3) + (tmp == b4);
4914 if (tmp
4915 && (matching_bytes >= 3
4916 || (matching_bytes == 2
4917 && const_ok_for_op (remainder & ~tmp2, code))))
4918 {
4919 /* At least 3 of the bytes match, and the fourth has at
4920 least as many bits set, or two of the bytes match
4921 and it will only require one more insn to finish. */
4922 result = tmp2;
4923 i = tmp != b1 ? 32
4924 : tmp != b2 ? 24
4925 : tmp != b3 ? 16
4926 : 8;
4927 }
4928
4929 /* Second, try to find a 16-bit replicated constant that can
4930 leave three of the bytes clear. If b2 or b4 is already
4931 zero, then we can. If the 8-bit from above would not
4932 clear b2 anyway, then we still win. */
4933 else if (b1 == b3 && (!b2 || !b4
4934 || (remainder & 0x00ff0000 & ~result)))
4935 {
4936 result = remainder & 0xff00ff00;
4937 i = 24;
4938 }
4939 }
4940 else if (loc > 16)
4941 {
4942 /* The 8-bit immediate already found clears b2 (and maybe b3)
4943 and we don't get here unless b1 is alredy clear, but it will
4944 leave b4 unchanged. */
4945
4946 /* If we can clear b2 and b4 at once, then we win, since the
4947 8-bits couldn't possibly reach that far. */
4948 if (b2 == b4)
4949 {
4950 result = remainder & 0x00ff00ff;
4951 i = 16;
4952 }
4953 }
4954 }
4955
4956 return_sequence->i[insns++] = result;
4957 remainder &= ~result;
4958
4959 if (code == SET || code == MINUS)
4960 code = PLUS;
4961 }
4962 while (remainder);
4963
4964 return insns;
4965 }
4966
4967 /* Emit an instruction with the indicated PATTERN. If COND is
4968 non-NULL, conditionalize the execution of the instruction on COND
4969 being true. */
4970
4971 static void
4972 emit_constant_insn (rtx cond, rtx pattern)
4973 {
4974 if (cond)
4975 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4976 emit_insn (pattern);
4977 }
4978
4979 /* As above, but extra parameter GENERATE which, if clear, suppresses
4980 RTL generation. */
4981
4982 static int
4983 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4984 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4985 int subtargets, int generate)
4986 {
4987 int can_invert = 0;
4988 int can_negate = 0;
4989 int final_invert = 0;
4990 int i;
4991 int set_sign_bit_copies = 0;
4992 int clear_sign_bit_copies = 0;
4993 int clear_zero_bit_copies = 0;
4994 int set_zero_bit_copies = 0;
4995 int insns = 0, neg_insns, inv_insns;
4996 unsigned HOST_WIDE_INT temp1, temp2;
4997 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4998 struct four_ints *immediates;
4999 struct four_ints pos_immediates, neg_immediates, inv_immediates;
5000
5001 /* Find out which operations are safe for a given CODE. Also do a quick
5002 check for degenerate cases; these can occur when DImode operations
5003 are split. */
5004 switch (code)
5005 {
5006 case SET:
5007 can_invert = 1;
5008 break;
5009
5010 case PLUS:
5011 can_negate = 1;
5012 break;
5013
5014 case IOR:
5015 if (remainder == 0xffffffff)
5016 {
5017 if (generate)
5018 emit_constant_insn (cond,
5019 gen_rtx_SET (target,
5020 GEN_INT (ARM_SIGN_EXTEND (val))));
5021 return 1;
5022 }
5023
5024 if (remainder == 0)
5025 {
5026 if (reload_completed && rtx_equal_p (target, source))
5027 return 0;
5028
5029 if (generate)
5030 emit_constant_insn (cond, gen_rtx_SET (target, source));
5031 return 1;
5032 }
5033 break;
5034
5035 case AND:
5036 if (remainder == 0)
5037 {
5038 if (generate)
5039 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
5040 return 1;
5041 }
5042 if (remainder == 0xffffffff)
5043 {
5044 if (reload_completed && rtx_equal_p (target, source))
5045 return 0;
5046 if (generate)
5047 emit_constant_insn (cond, gen_rtx_SET (target, source));
5048 return 1;
5049 }
5050 can_invert = 1;
5051 break;
5052
5053 case XOR:
5054 if (remainder == 0)
5055 {
5056 if (reload_completed && rtx_equal_p (target, source))
5057 return 0;
5058 if (generate)
5059 emit_constant_insn (cond, gen_rtx_SET (target, source));
5060 return 1;
5061 }
5062
5063 if (remainder == 0xffffffff)
5064 {
5065 if (generate)
5066 emit_constant_insn (cond,
5067 gen_rtx_SET (target,
5068 gen_rtx_NOT (mode, source)));
5069 return 1;
5070 }
5071 final_invert = 1;
5072 break;
5073
5074 case MINUS:
5075 /* We treat MINUS as (val - source), since (source - val) is always
5076 passed as (source + (-val)). */
5077 if (remainder == 0)
5078 {
5079 if (generate)
5080 emit_constant_insn (cond,
5081 gen_rtx_SET (target,
5082 gen_rtx_NEG (mode, source)));
5083 return 1;
5084 }
5085 if (const_ok_for_arm (val))
5086 {
5087 if (generate)
5088 emit_constant_insn (cond,
5089 gen_rtx_SET (target,
5090 gen_rtx_MINUS (mode, GEN_INT (val),
5091 source)));
5092 return 1;
5093 }
5094
5095 break;
5096
5097 default:
5098 gcc_unreachable ();
5099 }
5100
5101 /* If we can do it in one insn get out quickly. */
5102 if (const_ok_for_op (val, code))
5103 {
5104 if (generate)
5105 emit_constant_insn (cond,
5106 gen_rtx_SET (target,
5107 (source
5108 ? gen_rtx_fmt_ee (code, mode, source,
5109 GEN_INT (val))
5110 : GEN_INT (val))));
5111 return 1;
5112 }
5113
5114 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5115 insn. */
5116 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5117 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5118 {
5119 if (generate)
5120 {
5121 if (mode == SImode && i == 16)
5122 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5123 smaller insn. */
5124 emit_constant_insn (cond,
5125 gen_zero_extendhisi2
5126 (target, gen_lowpart (HImode, source)));
5127 else
5128 /* Extz only supports SImode, but we can coerce the operands
5129 into that mode. */
5130 emit_constant_insn (cond,
5131 gen_extzv_t2 (gen_lowpart (SImode, target),
5132 gen_lowpart (SImode, source),
5133 GEN_INT (i), const0_rtx));
5134 }
5135
5136 return 1;
5137 }
5138
5139 /* Calculate a few attributes that may be useful for specific
5140 optimizations. */
5141 /* Count number of leading zeros. */
5142 for (i = 31; i >= 0; i--)
5143 {
5144 if ((remainder & (1 << i)) == 0)
5145 clear_sign_bit_copies++;
5146 else
5147 break;
5148 }
5149
5150 /* Count number of leading 1's. */
5151 for (i = 31; i >= 0; i--)
5152 {
5153 if ((remainder & (1 << i)) != 0)
5154 set_sign_bit_copies++;
5155 else
5156 break;
5157 }
5158
5159 /* Count number of trailing zero's. */
5160 for (i = 0; i <= 31; i++)
5161 {
5162 if ((remainder & (1 << i)) == 0)
5163 clear_zero_bit_copies++;
5164 else
5165 break;
5166 }
5167
5168 /* Count number of trailing 1's. */
5169 for (i = 0; i <= 31; i++)
5170 {
5171 if ((remainder & (1 << i)) != 0)
5172 set_zero_bit_copies++;
5173 else
5174 break;
5175 }
5176
5177 switch (code)
5178 {
5179 case SET:
5180 /* See if we can do this by sign_extending a constant that is known
5181 to be negative. This is a good, way of doing it, since the shift
5182 may well merge into a subsequent insn. */
5183 if (set_sign_bit_copies > 1)
5184 {
5185 if (const_ok_for_arm
5186 (temp1 = ARM_SIGN_EXTEND (remainder
5187 << (set_sign_bit_copies - 1))))
5188 {
5189 if (generate)
5190 {
5191 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5192 emit_constant_insn (cond,
5193 gen_rtx_SET (new_src, GEN_INT (temp1)));
5194 emit_constant_insn (cond,
5195 gen_ashrsi3 (target, new_src,
5196 GEN_INT (set_sign_bit_copies - 1)));
5197 }
5198 return 2;
5199 }
5200 /* For an inverted constant, we will need to set the low bits,
5201 these will be shifted out of harm's way. */
5202 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5203 if (const_ok_for_arm (~temp1))
5204 {
5205 if (generate)
5206 {
5207 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5208 emit_constant_insn (cond,
5209 gen_rtx_SET (new_src, GEN_INT (temp1)));
5210 emit_constant_insn (cond,
5211 gen_ashrsi3 (target, new_src,
5212 GEN_INT (set_sign_bit_copies - 1)));
5213 }
5214 return 2;
5215 }
5216 }
5217
5218 /* See if we can calculate the value as the difference between two
5219 valid immediates. */
5220 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5221 {
5222 int topshift = clear_sign_bit_copies & ~1;
5223
5224 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5225 & (0xff000000 >> topshift));
5226
5227 /* If temp1 is zero, then that means the 9 most significant
5228 bits of remainder were 1 and we've caused it to overflow.
5229 When topshift is 0 we don't need to do anything since we
5230 can borrow from 'bit 32'. */
5231 if (temp1 == 0 && topshift != 0)
5232 temp1 = 0x80000000 >> (topshift - 1);
5233
5234 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5235
5236 if (const_ok_for_arm (temp2))
5237 {
5238 if (generate)
5239 {
5240 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5241 emit_constant_insn (cond,
5242 gen_rtx_SET (new_src, GEN_INT (temp1)));
5243 emit_constant_insn (cond,
5244 gen_addsi3 (target, new_src,
5245 GEN_INT (-temp2)));
5246 }
5247
5248 return 2;
5249 }
5250 }
5251
5252 /* See if we can generate this by setting the bottom (or the top)
5253 16 bits, and then shifting these into the other half of the
5254 word. We only look for the simplest cases, to do more would cost
5255 too much. Be careful, however, not to generate this when the
5256 alternative would take fewer insns. */
5257 if (val & 0xffff0000)
5258 {
5259 temp1 = remainder & 0xffff0000;
5260 temp2 = remainder & 0x0000ffff;
5261
5262 /* Overlaps outside this range are best done using other methods. */
5263 for (i = 9; i < 24; i++)
5264 {
5265 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5266 && !const_ok_for_arm (temp2))
5267 {
5268 rtx new_src = (subtargets
5269 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5270 : target);
5271 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5272 source, subtargets, generate);
5273 source = new_src;
5274 if (generate)
5275 emit_constant_insn
5276 (cond,
5277 gen_rtx_SET
5278 (target,
5279 gen_rtx_IOR (mode,
5280 gen_rtx_ASHIFT (mode, source,
5281 GEN_INT (i)),
5282 source)));
5283 return insns + 1;
5284 }
5285 }
5286
5287 /* Don't duplicate cases already considered. */
5288 for (i = 17; i < 24; i++)
5289 {
5290 if (((temp1 | (temp1 >> i)) == remainder)
5291 && !const_ok_for_arm (temp1))
5292 {
5293 rtx new_src = (subtargets
5294 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5295 : target);
5296 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5297 source, subtargets, generate);
5298 source = new_src;
5299 if (generate)
5300 emit_constant_insn
5301 (cond,
5302 gen_rtx_SET (target,
5303 gen_rtx_IOR
5304 (mode,
5305 gen_rtx_LSHIFTRT (mode, source,
5306 GEN_INT (i)),
5307 source)));
5308 return insns + 1;
5309 }
5310 }
5311 }
5312 break;
5313
5314 case IOR:
5315 case XOR:
5316 /* If we have IOR or XOR, and the constant can be loaded in a
5317 single instruction, and we can find a temporary to put it in,
5318 then this can be done in two instructions instead of 3-4. */
5319 if (subtargets
5320 /* TARGET can't be NULL if SUBTARGETS is 0 */
5321 || (reload_completed && !reg_mentioned_p (target, source)))
5322 {
5323 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5324 {
5325 if (generate)
5326 {
5327 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5328
5329 emit_constant_insn (cond,
5330 gen_rtx_SET (sub, GEN_INT (val)));
5331 emit_constant_insn (cond,
5332 gen_rtx_SET (target,
5333 gen_rtx_fmt_ee (code, mode,
5334 source, sub)));
5335 }
5336 return 2;
5337 }
5338 }
5339
5340 if (code == XOR)
5341 break;
5342
5343 /* Convert.
5344 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5345 and the remainder 0s for e.g. 0xfff00000)
5346 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5347
5348 This can be done in 2 instructions by using shifts with mov or mvn.
5349 e.g. for
5350 x = x | 0xfff00000;
5351 we generate.
5352 mvn r0, r0, asl #12
5353 mvn r0, r0, lsr #12 */
5354 if (set_sign_bit_copies > 8
5355 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5356 {
5357 if (generate)
5358 {
5359 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5360 rtx shift = GEN_INT (set_sign_bit_copies);
5361
5362 emit_constant_insn
5363 (cond,
5364 gen_rtx_SET (sub,
5365 gen_rtx_NOT (mode,
5366 gen_rtx_ASHIFT (mode,
5367 source,
5368 shift))));
5369 emit_constant_insn
5370 (cond,
5371 gen_rtx_SET (target,
5372 gen_rtx_NOT (mode,
5373 gen_rtx_LSHIFTRT (mode, sub,
5374 shift))));
5375 }
5376 return 2;
5377 }
5378
5379 /* Convert
5380 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5381 to
5382 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5383
5384 For eg. r0 = r0 | 0xfff
5385 mvn r0, r0, lsr #12
5386 mvn r0, r0, asl #12
5387
5388 */
5389 if (set_zero_bit_copies > 8
5390 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5391 {
5392 if (generate)
5393 {
5394 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5395 rtx shift = GEN_INT (set_zero_bit_copies);
5396
5397 emit_constant_insn
5398 (cond,
5399 gen_rtx_SET (sub,
5400 gen_rtx_NOT (mode,
5401 gen_rtx_LSHIFTRT (mode,
5402 source,
5403 shift))));
5404 emit_constant_insn
5405 (cond,
5406 gen_rtx_SET (target,
5407 gen_rtx_NOT (mode,
5408 gen_rtx_ASHIFT (mode, sub,
5409 shift))));
5410 }
5411 return 2;
5412 }
5413
5414 /* This will never be reached for Thumb2 because orn is a valid
5415 instruction. This is for Thumb1 and the ARM 32 bit cases.
5416
5417 x = y | constant (such that ~constant is a valid constant)
5418 Transform this to
5419 x = ~(~y & ~constant).
5420 */
5421 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5422 {
5423 if (generate)
5424 {
5425 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5426 emit_constant_insn (cond,
5427 gen_rtx_SET (sub,
5428 gen_rtx_NOT (mode, source)));
5429 source = sub;
5430 if (subtargets)
5431 sub = gen_reg_rtx (mode);
5432 emit_constant_insn (cond,
5433 gen_rtx_SET (sub,
5434 gen_rtx_AND (mode, source,
5435 GEN_INT (temp1))));
5436 emit_constant_insn (cond,
5437 gen_rtx_SET (target,
5438 gen_rtx_NOT (mode, sub)));
5439 }
5440 return 3;
5441 }
5442 break;
5443
5444 case AND:
5445 /* See if two shifts will do 2 or more insn's worth of work. */
5446 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5447 {
5448 HOST_WIDE_INT shift_mask = ((0xffffffff
5449 << (32 - clear_sign_bit_copies))
5450 & 0xffffffff);
5451
5452 if ((remainder | shift_mask) != 0xffffffff)
5453 {
5454 HOST_WIDE_INT new_val
5455 = ARM_SIGN_EXTEND (remainder | shift_mask);
5456
5457 if (generate)
5458 {
5459 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5460 insns = arm_gen_constant (AND, SImode, cond, new_val,
5461 new_src, source, subtargets, 1);
5462 source = new_src;
5463 }
5464 else
5465 {
5466 rtx targ = subtargets ? NULL_RTX : target;
5467 insns = arm_gen_constant (AND, mode, cond, new_val,
5468 targ, source, subtargets, 0);
5469 }
5470 }
5471
5472 if (generate)
5473 {
5474 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5475 rtx shift = GEN_INT (clear_sign_bit_copies);
5476
5477 emit_insn (gen_ashlsi3 (new_src, source, shift));
5478 emit_insn (gen_lshrsi3 (target, new_src, shift));
5479 }
5480
5481 return insns + 2;
5482 }
5483
5484 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5485 {
5486 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5487
5488 if ((remainder | shift_mask) != 0xffffffff)
5489 {
5490 HOST_WIDE_INT new_val
5491 = ARM_SIGN_EXTEND (remainder | shift_mask);
5492 if (generate)
5493 {
5494 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5495
5496 insns = arm_gen_constant (AND, mode, cond, new_val,
5497 new_src, source, subtargets, 1);
5498 source = new_src;
5499 }
5500 else
5501 {
5502 rtx targ = subtargets ? NULL_RTX : target;
5503
5504 insns = arm_gen_constant (AND, mode, cond, new_val,
5505 targ, source, subtargets, 0);
5506 }
5507 }
5508
5509 if (generate)
5510 {
5511 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5512 rtx shift = GEN_INT (clear_zero_bit_copies);
5513
5514 emit_insn (gen_lshrsi3 (new_src, source, shift));
5515 emit_insn (gen_ashlsi3 (target, new_src, shift));
5516 }
5517
5518 return insns + 2;
5519 }
5520
5521 break;
5522
5523 default:
5524 break;
5525 }
5526
5527 /* Calculate what the instruction sequences would be if we generated it
5528 normally, negated, or inverted. */
5529 if (code == AND)
5530 /* AND cannot be split into multiple insns, so invert and use BIC. */
5531 insns = 99;
5532 else
5533 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5534
5535 if (can_negate)
5536 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5537 &neg_immediates);
5538 else
5539 neg_insns = 99;
5540
5541 if (can_invert || final_invert)
5542 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5543 &inv_immediates);
5544 else
5545 inv_insns = 99;
5546
5547 immediates = &pos_immediates;
5548
5549 /* Is the negated immediate sequence more efficient? */
5550 if (neg_insns < insns && neg_insns <= inv_insns)
5551 {
5552 insns = neg_insns;
5553 immediates = &neg_immediates;
5554 }
5555 else
5556 can_negate = 0;
5557
5558 /* Is the inverted immediate sequence more efficient?
5559 We must allow for an extra NOT instruction for XOR operations, although
5560 there is some chance that the final 'mvn' will get optimized later. */
5561 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5562 {
5563 insns = inv_insns;
5564 immediates = &inv_immediates;
5565 }
5566 else
5567 {
5568 can_invert = 0;
5569 final_invert = 0;
5570 }
5571
5572 /* Now output the chosen sequence as instructions. */
5573 if (generate)
5574 {
5575 for (i = 0; i < insns; i++)
5576 {
5577 rtx new_src, temp1_rtx;
5578
5579 temp1 = immediates->i[i];
5580
5581 if (code == SET || code == MINUS)
5582 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5583 else if ((final_invert || i < (insns - 1)) && subtargets)
5584 new_src = gen_reg_rtx (mode);
5585 else
5586 new_src = target;
5587
5588 if (can_invert)
5589 temp1 = ~temp1;
5590 else if (can_negate)
5591 temp1 = -temp1;
5592
5593 temp1 = trunc_int_for_mode (temp1, mode);
5594 temp1_rtx = GEN_INT (temp1);
5595
5596 if (code == SET)
5597 ;
5598 else if (code == MINUS)
5599 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5600 else
5601 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5602
5603 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5604 source = new_src;
5605
5606 if (code == SET)
5607 {
5608 can_negate = can_invert;
5609 can_invert = 0;
5610 code = PLUS;
5611 }
5612 else if (code == MINUS)
5613 code = PLUS;
5614 }
5615 }
5616
5617 if (final_invert)
5618 {
5619 if (generate)
5620 emit_constant_insn (cond, gen_rtx_SET (target,
5621 gen_rtx_NOT (mode, source)));
5622 insns++;
5623 }
5624
5625 return insns;
5626 }
5627
5628 /* Return TRUE if op is a constant where both the low and top words are
5629 suitable for RSB/RSC instructions. This is never true for Thumb, since
5630 we do not have RSC in that case. */
5631 static bool
5632 arm_const_double_prefer_rsbs_rsc (rtx op)
5633 {
5634 /* Thumb lacks RSC, so we never prefer that sequence. */
5635 if (TARGET_THUMB || !CONST_INT_P (op))
5636 return false;
5637 HOST_WIDE_INT hi, lo;
5638 lo = UINTVAL (op) & 0xffffffffULL;
5639 hi = UINTVAL (op) >> 32;
5640 return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5641 }
5642
5643 /* Canonicalize a comparison so that we are more likely to recognize it.
5644 This can be done for a few constant compares, where we can make the
5645 immediate value easier to load. */
5646
5647 static void
5648 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5649 bool op0_preserve_value)
5650 {
5651 machine_mode mode;
5652 unsigned HOST_WIDE_INT i, maxval;
5653
5654 mode = GET_MODE (*op0);
5655 if (mode == VOIDmode)
5656 mode = GET_MODE (*op1);
5657
5658 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5659
5660 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5661 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5662 either reversed or (for constant OP1) adjusted to GE/LT.
5663 Similarly for GTU/LEU in Thumb mode. */
5664 if (mode == DImode)
5665 {
5666
5667 if (*code == GT || *code == LE
5668 || *code == GTU || *code == LEU)
5669 {
5670 /* Missing comparison. First try to use an available
5671 comparison. */
5672 if (CONST_INT_P (*op1))
5673 {
5674 i = INTVAL (*op1);
5675 switch (*code)
5676 {
5677 case GT:
5678 case LE:
5679 if (i != maxval)
5680 {
5681 /* Try to convert to GE/LT, unless that would be more
5682 expensive. */
5683 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5684 && arm_const_double_prefer_rsbs_rsc (*op1))
5685 return;
5686 *op1 = GEN_INT (i + 1);
5687 *code = *code == GT ? GE : LT;
5688 }
5689 else
5690 {
5691 /* GT maxval is always false, LE maxval is always true.
5692 We can't fold that away here as we must make a
5693 comparison, but we can fold them to comparisons
5694 with the same result that can be handled:
5695 op0 GT maxval -> op0 LT minval
5696 op0 LE maxval -> op0 GE minval
5697 where minval = (-maxval - 1). */
5698 *op1 = GEN_INT (-maxval - 1);
5699 *code = *code == GT ? LT : GE;
5700 }
5701 return;
5702
5703 case GTU:
5704 case LEU:
5705 if (i != ~((unsigned HOST_WIDE_INT) 0))
5706 {
5707 /* Try to convert to GEU/LTU, unless that would
5708 be more expensive. */
5709 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5710 && arm_const_double_prefer_rsbs_rsc (*op1))
5711 return;
5712 *op1 = GEN_INT (i + 1);
5713 *code = *code == GTU ? GEU : LTU;
5714 }
5715 else
5716 {
5717 /* GTU ~0 is always false, LEU ~0 is always true.
5718 We can't fold that away here as we must make a
5719 comparison, but we can fold them to comparisons
5720 with the same result that can be handled:
5721 op0 GTU ~0 -> op0 LTU 0
5722 op0 LEU ~0 -> op0 GEU 0. */
5723 *op1 = const0_rtx;
5724 *code = *code == GTU ? LTU : GEU;
5725 }
5726 return;
5727
5728 default:
5729 gcc_unreachable ();
5730 }
5731 }
5732
5733 if (!op0_preserve_value)
5734 {
5735 std::swap (*op0, *op1);
5736 *code = (int)swap_condition ((enum rtx_code)*code);
5737 }
5738 }
5739 return;
5740 }
5741
5742 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5743 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5744 to facilitate possible combining with a cmp into 'ands'. */
5745 if (mode == SImode
5746 && GET_CODE (*op0) == ZERO_EXTEND
5747 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5748 && GET_MODE (XEXP (*op0, 0)) == QImode
5749 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5750 && subreg_lowpart_p (XEXP (*op0, 0))
5751 && *op1 == const0_rtx)
5752 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5753 GEN_INT (255));
5754
5755 /* Comparisons smaller than DImode. Only adjust comparisons against
5756 an out-of-range constant. */
5757 if (!CONST_INT_P (*op1)
5758 || const_ok_for_arm (INTVAL (*op1))
5759 || const_ok_for_arm (- INTVAL (*op1)))
5760 return;
5761
5762 i = INTVAL (*op1);
5763
5764 switch (*code)
5765 {
5766 case EQ:
5767 case NE:
5768 return;
5769
5770 case GT:
5771 case LE:
5772 if (i != maxval
5773 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5774 {
5775 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5776 *code = *code == GT ? GE : LT;
5777 return;
5778 }
5779 break;
5780
5781 case GE:
5782 case LT:
5783 if (i != ~maxval
5784 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5785 {
5786 *op1 = GEN_INT (i - 1);
5787 *code = *code == GE ? GT : LE;
5788 return;
5789 }
5790 break;
5791
5792 case GTU:
5793 case LEU:
5794 if (i != ~((unsigned HOST_WIDE_INT) 0)
5795 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5796 {
5797 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5798 *code = *code == GTU ? GEU : LTU;
5799 return;
5800 }
5801 break;
5802
5803 case GEU:
5804 case LTU:
5805 if (i != 0
5806 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5807 {
5808 *op1 = GEN_INT (i - 1);
5809 *code = *code == GEU ? GTU : LEU;
5810 return;
5811 }
5812 break;
5813
5814 default:
5815 gcc_unreachable ();
5816 }
5817 }
5818
5819
5820 /* Define how to find the value returned by a function. */
5821
5822 static rtx
5823 arm_function_value(const_tree type, const_tree func,
5824 bool outgoing ATTRIBUTE_UNUSED)
5825 {
5826 machine_mode mode;
5827 int unsignedp ATTRIBUTE_UNUSED;
5828 rtx r ATTRIBUTE_UNUSED;
5829
5830 mode = TYPE_MODE (type);
5831
5832 if (TARGET_AAPCS_BASED)
5833 return aapcs_allocate_return_reg (mode, type, func);
5834
5835 /* Promote integer types. */
5836 if (INTEGRAL_TYPE_P (type))
5837 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5838
5839 /* Promotes small structs returned in a register to full-word size
5840 for big-endian AAPCS. */
5841 if (arm_return_in_msb (type))
5842 {
5843 HOST_WIDE_INT size = int_size_in_bytes (type);
5844 if (size % UNITS_PER_WORD != 0)
5845 {
5846 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5847 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5848 }
5849 }
5850
5851 return arm_libcall_value_1 (mode);
5852 }
5853
5854 /* libcall hashtable helpers. */
5855
5856 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5857 {
5858 static inline hashval_t hash (const rtx_def *);
5859 static inline bool equal (const rtx_def *, const rtx_def *);
5860 static inline void remove (rtx_def *);
5861 };
5862
5863 inline bool
5864 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5865 {
5866 return rtx_equal_p (p1, p2);
5867 }
5868
5869 inline hashval_t
5870 libcall_hasher::hash (const rtx_def *p1)
5871 {
5872 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5873 }
5874
5875 typedef hash_table<libcall_hasher> libcall_table_type;
5876
5877 static void
5878 add_libcall (libcall_table_type *htab, rtx libcall)
5879 {
5880 *htab->find_slot (libcall, INSERT) = libcall;
5881 }
5882
5883 static bool
5884 arm_libcall_uses_aapcs_base (const_rtx libcall)
5885 {
5886 static bool init_done = false;
5887 static libcall_table_type *libcall_htab = NULL;
5888
5889 if (!init_done)
5890 {
5891 init_done = true;
5892
5893 libcall_htab = new libcall_table_type (31);
5894 add_libcall (libcall_htab,
5895 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5896 add_libcall (libcall_htab,
5897 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5898 add_libcall (libcall_htab,
5899 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5900 add_libcall (libcall_htab,
5901 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5902
5903 add_libcall (libcall_htab,
5904 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5905 add_libcall (libcall_htab,
5906 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5907 add_libcall (libcall_htab,
5908 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5909 add_libcall (libcall_htab,
5910 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5911
5912 add_libcall (libcall_htab,
5913 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5914 add_libcall (libcall_htab,
5915 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5916 add_libcall (libcall_htab,
5917 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5918 add_libcall (libcall_htab,
5919 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5920 add_libcall (libcall_htab,
5921 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5922 add_libcall (libcall_htab,
5923 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5924 add_libcall (libcall_htab,
5925 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5926 add_libcall (libcall_htab,
5927 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5928 add_libcall (libcall_htab,
5929 convert_optab_libfunc (sfix_optab, SImode, SFmode));
5930 add_libcall (libcall_htab,
5931 convert_optab_libfunc (ufix_optab, SImode, SFmode));
5932
5933 /* Values from double-precision helper functions are returned in core
5934 registers if the selected core only supports single-precision
5935 arithmetic, even if we are using the hard-float ABI. The same is
5936 true for single-precision helpers except in case of MVE, because in
5937 MVE we will be using the hard-float ABI on a CPU which doesn't support
5938 single-precision operations in hardware. In MVE the following check
5939 enables use of emulation for the single-precision arithmetic
5940 operations. */
5941 if (TARGET_HAVE_MVE)
5942 {
5943 add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5944 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5945 add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5946 add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5947 add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5948 add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5949 add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5950 add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5951 add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5952 add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5953 add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5954 }
5955 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5956 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5957 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5958 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5959 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5960 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5961 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5962 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5963 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5964 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5965 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5966 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5967 SFmode));
5968 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5969 DFmode));
5970 add_libcall (libcall_htab,
5971 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5972 }
5973
5974 return libcall && libcall_htab->find (libcall) != NULL;
5975 }
5976
5977 static rtx
5978 arm_libcall_value_1 (machine_mode mode)
5979 {
5980 if (TARGET_AAPCS_BASED)
5981 return aapcs_libcall_value (mode);
5982 else if (TARGET_IWMMXT_ABI
5983 && arm_vector_mode_supported_p (mode))
5984 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5985 else
5986 return gen_rtx_REG (mode, ARG_REGISTER (1));
5987 }
5988
5989 /* Define how to find the value returned by a library function
5990 assuming the value has mode MODE. */
5991
5992 static rtx
5993 arm_libcall_value (machine_mode mode, const_rtx libcall)
5994 {
5995 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5996 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5997 {
5998 /* The following libcalls return their result in integer registers,
5999 even though they return a floating point value. */
6000 if (arm_libcall_uses_aapcs_base (libcall))
6001 return gen_rtx_REG (mode, ARG_REGISTER(1));
6002
6003 }
6004
6005 return arm_libcall_value_1 (mode);
6006 }
6007
6008 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
6009
6010 static bool
6011 arm_function_value_regno_p (const unsigned int regno)
6012 {
6013 if (regno == ARG_REGISTER (1)
6014 || (TARGET_32BIT
6015 && TARGET_AAPCS_BASED
6016 && TARGET_HARD_FLOAT
6017 && regno == FIRST_VFP_REGNUM)
6018 || (TARGET_IWMMXT_ABI
6019 && regno == FIRST_IWMMXT_REGNUM))
6020 return true;
6021
6022 return false;
6023 }
6024
6025 /* Determine the amount of memory needed to store the possible return
6026 registers of an untyped call. */
6027 int
6028 arm_apply_result_size (void)
6029 {
6030 int size = 16;
6031
6032 if (TARGET_32BIT)
6033 {
6034 if (TARGET_HARD_FLOAT_ABI)
6035 size += 32;
6036 if (TARGET_IWMMXT_ABI)
6037 size += 8;
6038 }
6039
6040 return size;
6041 }
6042
6043 /* Decide whether TYPE should be returned in memory (true)
6044 or in a register (false). FNTYPE is the type of the function making
6045 the call. */
6046 static bool
6047 arm_return_in_memory (const_tree type, const_tree fntype)
6048 {
6049 HOST_WIDE_INT size;
6050
6051 size = int_size_in_bytes (type); /* Negative if not fixed size. */
6052
6053 if (TARGET_AAPCS_BASED)
6054 {
6055 /* Simple, non-aggregate types (ie not including vectors and
6056 complex) are always returned in a register (or registers).
6057 We don't care about which register here, so we can short-cut
6058 some of the detail. */
6059 if (!AGGREGATE_TYPE_P (type)
6060 && TREE_CODE (type) != VECTOR_TYPE
6061 && TREE_CODE (type) != COMPLEX_TYPE)
6062 return false;
6063
6064 /* Any return value that is no larger than one word can be
6065 returned in r0. */
6066 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
6067 return false;
6068
6069 /* Check any available co-processors to see if they accept the
6070 type as a register candidate (VFP, for example, can return
6071 some aggregates in consecutive registers). These aren't
6072 available if the call is variadic. */
6073 if (aapcs_select_return_coproc (type, fntype) >= 0)
6074 return false;
6075
6076 /* Vector values should be returned using ARM registers, not
6077 memory (unless they're over 16 bytes, which will break since
6078 we only have four call-clobbered registers to play with). */
6079 if (TREE_CODE (type) == VECTOR_TYPE)
6080 return (size < 0 || size > (4 * UNITS_PER_WORD));
6081
6082 /* The rest go in memory. */
6083 return true;
6084 }
6085
6086 if (TREE_CODE (type) == VECTOR_TYPE)
6087 return (size < 0 || size > (4 * UNITS_PER_WORD));
6088
6089 if (!AGGREGATE_TYPE_P (type) &&
6090 (TREE_CODE (type) != VECTOR_TYPE))
6091 /* All simple types are returned in registers. */
6092 return false;
6093
6094 if (arm_abi != ARM_ABI_APCS)
6095 {
6096 /* ATPCS and later return aggregate types in memory only if they are
6097 larger than a word (or are variable size). */
6098 return (size < 0 || size > UNITS_PER_WORD);
6099 }
6100
6101 /* For the arm-wince targets we choose to be compatible with Microsoft's
6102 ARM and Thumb compilers, which always return aggregates in memory. */
6103 #ifndef ARM_WINCE
6104 /* All structures/unions bigger than one word are returned in memory.
6105 Also catch the case where int_size_in_bytes returns -1. In this case
6106 the aggregate is either huge or of variable size, and in either case
6107 we will want to return it via memory and not in a register. */
6108 if (size < 0 || size > UNITS_PER_WORD)
6109 return true;
6110
6111 if (TREE_CODE (type) == RECORD_TYPE)
6112 {
6113 tree field;
6114
6115 /* For a struct the APCS says that we only return in a register
6116 if the type is 'integer like' and every addressable element
6117 has an offset of zero. For practical purposes this means
6118 that the structure can have at most one non bit-field element
6119 and that this element must be the first one in the structure. */
6120
6121 /* Find the first field, ignoring non FIELD_DECL things which will
6122 have been created by C++. */
6123 /* NOTE: This code is deprecated and has not been updated to handle
6124 DECL_FIELD_ABI_IGNORED. */
6125 for (field = TYPE_FIELDS (type);
6126 field && TREE_CODE (field) != FIELD_DECL;
6127 field = DECL_CHAIN (field))
6128 continue;
6129
6130 if (field == NULL)
6131 return false; /* An empty structure. Allowed by an extension to ANSI C. */
6132
6133 /* Check that the first field is valid for returning in a register. */
6134
6135 /* ... Floats are not allowed */
6136 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6137 return true;
6138
6139 /* ... Aggregates that are not themselves valid for returning in
6140 a register are not allowed. */
6141 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6142 return true;
6143
6144 /* Now check the remaining fields, if any. Only bitfields are allowed,
6145 since they are not addressable. */
6146 for (field = DECL_CHAIN (field);
6147 field;
6148 field = DECL_CHAIN (field))
6149 {
6150 if (TREE_CODE (field) != FIELD_DECL)
6151 continue;
6152
6153 if (!DECL_BIT_FIELD_TYPE (field))
6154 return true;
6155 }
6156
6157 return false;
6158 }
6159
6160 if (TREE_CODE (type) == UNION_TYPE)
6161 {
6162 tree field;
6163
6164 /* Unions can be returned in registers if every element is
6165 integral, or can be returned in an integer register. */
6166 for (field = TYPE_FIELDS (type);
6167 field;
6168 field = DECL_CHAIN (field))
6169 {
6170 if (TREE_CODE (field) != FIELD_DECL)
6171 continue;
6172
6173 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6174 return true;
6175
6176 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6177 return true;
6178 }
6179
6180 return false;
6181 }
6182 #endif /* not ARM_WINCE */
6183
6184 /* Return all other types in memory. */
6185 return true;
6186 }
6187
6188 const struct pcs_attribute_arg
6189 {
6190 const char *arg;
6191 enum arm_pcs value;
6192 } pcs_attribute_args[] =
6193 {
6194 {"aapcs", ARM_PCS_AAPCS},
6195 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6196 #if 0
6197 /* We could recognize these, but changes would be needed elsewhere
6198 * to implement them. */
6199 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6200 {"atpcs", ARM_PCS_ATPCS},
6201 {"apcs", ARM_PCS_APCS},
6202 #endif
6203 {NULL, ARM_PCS_UNKNOWN}
6204 };
6205
6206 static enum arm_pcs
6207 arm_pcs_from_attribute (tree attr)
6208 {
6209 const struct pcs_attribute_arg *ptr;
6210 const char *arg;
6211
6212 /* Get the value of the argument. */
6213 if (TREE_VALUE (attr) == NULL_TREE
6214 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6215 return ARM_PCS_UNKNOWN;
6216
6217 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6218
6219 /* Check it against the list of known arguments. */
6220 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6221 if (streq (arg, ptr->arg))
6222 return ptr->value;
6223
6224 /* An unrecognized interrupt type. */
6225 return ARM_PCS_UNKNOWN;
6226 }
6227
6228 /* Get the PCS variant to use for this call. TYPE is the function's type
6229 specification, DECL is the specific declartion. DECL may be null if
6230 the call could be indirect or if this is a library call. */
6231 static enum arm_pcs
6232 arm_get_pcs_model (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
6233 {
6234 bool user_convention = false;
6235 enum arm_pcs user_pcs = arm_pcs_default;
6236 tree attr;
6237
6238 gcc_assert (type);
6239
6240 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6241 if (attr)
6242 {
6243 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6244 user_convention = true;
6245 }
6246
6247 if (TARGET_AAPCS_BASED)
6248 {
6249 /* Detect varargs functions. These always use the base rules
6250 (no argument is ever a candidate for a co-processor
6251 register). */
6252 bool base_rules = stdarg_p (type);
6253
6254 if (user_convention)
6255 {
6256 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6257 sorry ("non-AAPCS derived PCS variant");
6258 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6259 error ("variadic functions must use the base AAPCS variant");
6260 }
6261
6262 if (base_rules)
6263 return ARM_PCS_AAPCS;
6264 else if (user_convention)
6265 return user_pcs;
6266 #if 0
6267 /* Unfortunately, this is not safe and can lead to wrong code
6268 being generated (PR96882). Not all calls into the back-end
6269 pass the DECL, so it is unsafe to make any PCS-changing
6270 decisions based on it. In particular the RETURN_IN_MEMORY
6271 hook is only ever passed a TYPE. This needs revisiting to
6272 see if there are any partial improvements that can be
6273 re-enabled. */
6274 else if (decl && flag_unit_at_a_time)
6275 {
6276 /* Local functions never leak outside this compilation unit,
6277 so we are free to use whatever conventions are
6278 appropriate. */
6279 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6280 cgraph_node *local_info_node
6281 = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6282 if (local_info_node && local_info_node->local)
6283 return ARM_PCS_AAPCS_LOCAL;
6284 }
6285 #endif
6286 }
6287 else if (user_convention && user_pcs != arm_pcs_default)
6288 sorry ("PCS variant");
6289
6290 /* For everything else we use the target's default. */
6291 return arm_pcs_default;
6292 }
6293
6294
6295 static void
6296 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6297 const_tree fntype ATTRIBUTE_UNUSED,
6298 rtx libcall ATTRIBUTE_UNUSED,
6299 const_tree fndecl ATTRIBUTE_UNUSED)
6300 {
6301 /* Record the unallocated VFP registers. */
6302 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6303 pcum->aapcs_vfp_reg_alloc = 0;
6304 }
6305
6306 /* Bitmasks that indicate whether earlier versions of GCC would have
6307 taken a different path through the ABI logic. This should result in
6308 a -Wpsabi warning if the earlier path led to a different ABI decision.
6309
6310 WARN_PSABI_EMPTY_CXX17_BASE
6311 Indicates that the type includes an artificial empty C++17 base field
6312 that, prior to GCC 10.1, would prevent the type from being treated as
6313 a HFA or HVA. See PR94711 for details.
6314
6315 WARN_PSABI_NO_UNIQUE_ADDRESS
6316 Indicates that the type includes an empty [[no_unique_address]] field
6317 that, prior to GCC 10.1, would prevent the type from being treated as
6318 a HFA or HVA. */
6319 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6320 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6321 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD = 1U << 2;
6322
6323 /* Walk down the type tree of TYPE counting consecutive base elements.
6324 If *MODEP is VOIDmode, then set it to the first valid floating point
6325 type. If a non-floating point type is found, or if a floating point
6326 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6327 otherwise return the count in the sub-tree.
6328
6329 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6330 function has changed its behavior relative to earlier versions of GCC.
6331 Normally the argument should be nonnull and point to a zero-initialized
6332 variable. The function then records whether the ABI decision might
6333 be affected by a known fix to the ABI logic, setting the associated
6334 WARN_PSABI_* bits if so.
6335
6336 When the argument is instead a null pointer, the function tries to
6337 simulate the behavior of GCC before all such ABI fixes were made.
6338 This is useful to check whether the function returns something
6339 different after the ABI fixes. */
6340 static int
6341 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6342 unsigned int *warn_psabi_flags)
6343 {
6344 machine_mode mode;
6345 HOST_WIDE_INT size;
6346
6347 switch (TREE_CODE (type))
6348 {
6349 case REAL_TYPE:
6350 mode = TYPE_MODE (type);
6351 if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6352 return -1;
6353
6354 if (*modep == VOIDmode)
6355 *modep = mode;
6356
6357 if (*modep == mode)
6358 return 1;
6359
6360 break;
6361
6362 case COMPLEX_TYPE:
6363 mode = TYPE_MODE (TREE_TYPE (type));
6364 if (mode != DFmode && mode != SFmode)
6365 return -1;
6366
6367 if (*modep == VOIDmode)
6368 *modep = mode;
6369
6370 if (*modep == mode)
6371 return 2;
6372
6373 break;
6374
6375 case VECTOR_TYPE:
6376 /* Use V2SImode and V4SImode as representatives of all 64-bit
6377 and 128-bit vector types, whether or not those modes are
6378 supported with the present options. */
6379 size = int_size_in_bytes (type);
6380 switch (size)
6381 {
6382 case 8:
6383 mode = V2SImode;
6384 break;
6385 case 16:
6386 mode = V4SImode;
6387 break;
6388 default:
6389 return -1;
6390 }
6391
6392 if (*modep == VOIDmode)
6393 *modep = mode;
6394
6395 /* Vector modes are considered to be opaque: two vectors are
6396 equivalent for the purposes of being homogeneous aggregates
6397 if they are the same size. */
6398 if (*modep == mode)
6399 return 1;
6400
6401 break;
6402
6403 case ARRAY_TYPE:
6404 {
6405 int count;
6406 tree index = TYPE_DOMAIN (type);
6407
6408 /* Can't handle incomplete types nor sizes that are not
6409 fixed. */
6410 if (!COMPLETE_TYPE_P (type)
6411 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6412 return -1;
6413
6414 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6415 warn_psabi_flags);
6416 if (count == -1
6417 || !index
6418 || !TYPE_MAX_VALUE (index)
6419 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6420 || !TYPE_MIN_VALUE (index)
6421 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6422 || count < 0)
6423 return -1;
6424
6425 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6426 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6427
6428 /* There must be no padding. */
6429 if (wi::to_wide (TYPE_SIZE (type))
6430 != count * GET_MODE_BITSIZE (*modep))
6431 return -1;
6432
6433 return count;
6434 }
6435
6436 case RECORD_TYPE:
6437 {
6438 int count = 0;
6439 int sub_count;
6440 tree field;
6441
6442 /* Can't handle incomplete types nor sizes that are not
6443 fixed. */
6444 if (!COMPLETE_TYPE_P (type)
6445 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6446 return -1;
6447
6448 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6449 {
6450 if (TREE_CODE (field) != FIELD_DECL)
6451 continue;
6452
6453 if (DECL_FIELD_ABI_IGNORED (field))
6454 {
6455 /* See whether this is something that earlier versions of
6456 GCC failed to ignore. */
6457 unsigned int flag;
6458 if (lookup_attribute ("no_unique_address",
6459 DECL_ATTRIBUTES (field)))
6460 flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6461 else if (cxx17_empty_base_field_p (field))
6462 flag = WARN_PSABI_EMPTY_CXX17_BASE;
6463 else
6464 /* No compatibility problem. */
6465 continue;
6466
6467 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6468 if (warn_psabi_flags)
6469 {
6470 *warn_psabi_flags |= flag;
6471 continue;
6472 }
6473 }
6474 /* A zero-width bitfield may affect layout in some
6475 circumstances, but adds no members. The determination
6476 of whether or not a type is an HFA is performed after
6477 layout is complete, so if the type still looks like an
6478 HFA afterwards, it is still classed as one. This is
6479 potentially an ABI break for the hard-float ABI. */
6480 else if (DECL_BIT_FIELD (field)
6481 && integer_zerop (DECL_SIZE (field)))
6482 {
6483 /* Prior to GCC-12 these fields were striped early,
6484 hiding them from the back-end entirely and
6485 resulting in the correct behaviour for argument
6486 passing. Simulate that old behaviour without
6487 generating a warning. */
6488 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
6489 continue;
6490 if (warn_psabi_flags)
6491 {
6492 *warn_psabi_flags |= WARN_PSABI_ZERO_WIDTH_BITFIELD;
6493 continue;
6494 }
6495 }
6496
6497 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6498 warn_psabi_flags);
6499 if (sub_count < 0)
6500 return -1;
6501 count += sub_count;
6502 }
6503
6504 /* There must be no padding. */
6505 if (wi::to_wide (TYPE_SIZE (type))
6506 != count * GET_MODE_BITSIZE (*modep))
6507 return -1;
6508
6509 return count;
6510 }
6511
6512 case UNION_TYPE:
6513 case QUAL_UNION_TYPE:
6514 {
6515 /* These aren't very interesting except in a degenerate case. */
6516 int count = 0;
6517 int sub_count;
6518 tree field;
6519
6520 /* Can't handle incomplete types nor sizes that are not
6521 fixed. */
6522 if (!COMPLETE_TYPE_P (type)
6523 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6524 return -1;
6525
6526 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6527 {
6528 if (TREE_CODE (field) != FIELD_DECL)
6529 continue;
6530
6531 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6532 warn_psabi_flags);
6533 if (sub_count < 0)
6534 return -1;
6535 count = count > sub_count ? count : sub_count;
6536 }
6537
6538 /* There must be no padding. */
6539 if (wi::to_wide (TYPE_SIZE (type))
6540 != count * GET_MODE_BITSIZE (*modep))
6541 return -1;
6542
6543 return count;
6544 }
6545
6546 default:
6547 break;
6548 }
6549
6550 return -1;
6551 }
6552
6553 /* Return true if PCS_VARIANT should use VFP registers. */
6554 static bool
6555 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6556 {
6557 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6558 {
6559 static bool seen_thumb1_vfp = false;
6560
6561 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6562 {
6563 sorry ("Thumb-1 %<hard-float%> VFP ABI");
6564 /* sorry() is not immediately fatal, so only display this once. */
6565 seen_thumb1_vfp = true;
6566 }
6567
6568 return true;
6569 }
6570
6571 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6572 return false;
6573
6574 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6575 (TARGET_VFP_DOUBLE || !is_double));
6576 }
6577
6578 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6579 suitable for passing or returning in VFP registers for the PCS
6580 variant selected. If it is, then *BASE_MODE is updated to contain
6581 a machine mode describing each element of the argument's type and
6582 *COUNT to hold the number of such elements. */
6583 static bool
6584 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6585 machine_mode mode, const_tree type,
6586 machine_mode *base_mode, int *count)
6587 {
6588 machine_mode new_mode = VOIDmode;
6589
6590 /* If we have the type information, prefer that to working things
6591 out from the mode. */
6592 if (type)
6593 {
6594 unsigned int warn_psabi_flags = 0;
6595 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6596 &warn_psabi_flags);
6597 if (ag_count > 0 && ag_count <= 4)
6598 {
6599 static unsigned last_reported_type_uid;
6600 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6601 int alt;
6602 if (warn_psabi
6603 && warn_psabi_flags
6604 && uid != last_reported_type_uid
6605 && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6606 != ag_count))
6607 {
6608 const char *url10
6609 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6610 const char *url12
6611 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
6612 gcc_assert (alt == -1);
6613 last_reported_type_uid = uid;
6614 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6615 qualification. */
6616 if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6617 inform (input_location, "parameter passing for argument of "
6618 "type %qT with %<[[no_unique_address]]%> members "
6619 "changed %{in GCC 10.1%}",
6620 TYPE_MAIN_VARIANT (type), url10);
6621 else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6622 inform (input_location, "parameter passing for argument of "
6623 "type %qT when C++17 is enabled changed to match "
6624 "C++14 %{in GCC 10.1%}",
6625 TYPE_MAIN_VARIANT (type), url10);
6626 else if (warn_psabi_flags & WARN_PSABI_ZERO_WIDTH_BITFIELD)
6627 inform (input_location, "parameter passing for argument of "
6628 "type %qT changed %{in GCC 12.1%}",
6629 TYPE_MAIN_VARIANT (type), url12);
6630 }
6631 *count = ag_count;
6632 }
6633 else
6634 return false;
6635 }
6636 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6637 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6638 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6639 {
6640 *count = 1;
6641 new_mode = mode;
6642 }
6643 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6644 {
6645 *count = 2;
6646 new_mode = (mode == DCmode ? DFmode : SFmode);
6647 }
6648 else
6649 return false;
6650
6651
6652 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6653 return false;
6654
6655 *base_mode = new_mode;
6656
6657 if (TARGET_GENERAL_REGS_ONLY)
6658 error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6659 type);
6660
6661 return true;
6662 }
6663
6664 static bool
6665 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6666 machine_mode mode, const_tree type)
6667 {
6668 int count ATTRIBUTE_UNUSED;
6669 machine_mode ag_mode ATTRIBUTE_UNUSED;
6670
6671 if (!use_vfp_abi (pcs_variant, false))
6672 return false;
6673 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6674 &ag_mode, &count);
6675 }
6676
6677 static bool
6678 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6679 const_tree type)
6680 {
6681 if (!use_vfp_abi (pcum->pcs_variant, false))
6682 return false;
6683
6684 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6685 &pcum->aapcs_vfp_rmode,
6686 &pcum->aapcs_vfp_rcount);
6687 }
6688
6689 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6690 for the behaviour of this function. */
6691
6692 static bool
6693 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6694 const_tree type ATTRIBUTE_UNUSED)
6695 {
6696 int rmode_size
6697 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6698 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6699 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6700 int regno;
6701
6702 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6703 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6704 {
6705 pcum->aapcs_vfp_reg_alloc = mask << regno;
6706 if (mode == BLKmode
6707 || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6708 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6709 {
6710 int i;
6711 int rcount = pcum->aapcs_vfp_rcount;
6712 int rshift = shift;
6713 machine_mode rmode = pcum->aapcs_vfp_rmode;
6714 rtx par;
6715 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6716 {
6717 /* Avoid using unsupported vector modes. */
6718 if (rmode == V2SImode)
6719 rmode = DImode;
6720 else if (rmode == V4SImode)
6721 {
6722 rmode = DImode;
6723 rcount *= 2;
6724 rshift /= 2;
6725 }
6726 }
6727 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6728 for (i = 0; i < rcount; i++)
6729 {
6730 rtx tmp = gen_rtx_REG (rmode,
6731 FIRST_VFP_REGNUM + regno + i * rshift);
6732 tmp = gen_rtx_EXPR_LIST
6733 (VOIDmode, tmp,
6734 GEN_INT (i * GET_MODE_SIZE (rmode)));
6735 XVECEXP (par, 0, i) = tmp;
6736 }
6737
6738 pcum->aapcs_reg = par;
6739 }
6740 else
6741 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6742 return true;
6743 }
6744 return false;
6745 }
6746
6747 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6748 comment there for the behaviour of this function. */
6749
6750 static rtx
6751 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6752 machine_mode mode,
6753 const_tree type ATTRIBUTE_UNUSED)
6754 {
6755 if (!use_vfp_abi (pcs_variant, false))
6756 return NULL;
6757
6758 if (mode == BLKmode
6759 || (GET_MODE_CLASS (mode) == MODE_INT
6760 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6761 && !(TARGET_NEON || TARGET_HAVE_MVE)))
6762 {
6763 int count;
6764 machine_mode ag_mode;
6765 int i;
6766 rtx par;
6767 int shift;
6768
6769 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6770 &ag_mode, &count);
6771
6772 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6773 {
6774 if (ag_mode == V2SImode)
6775 ag_mode = DImode;
6776 else if (ag_mode == V4SImode)
6777 {
6778 ag_mode = DImode;
6779 count *= 2;
6780 }
6781 }
6782 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6783 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6784 for (i = 0; i < count; i++)
6785 {
6786 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6787 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6788 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6789 XVECEXP (par, 0, i) = tmp;
6790 }
6791
6792 return par;
6793 }
6794
6795 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6796 }
6797
6798 static void
6799 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6800 machine_mode mode ATTRIBUTE_UNUSED,
6801 const_tree type ATTRIBUTE_UNUSED)
6802 {
6803 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6804 pcum->aapcs_vfp_reg_alloc = 0;
6805 return;
6806 }
6807
6808 #define AAPCS_CP(X) \
6809 { \
6810 aapcs_ ## X ## _cum_init, \
6811 aapcs_ ## X ## _is_call_candidate, \
6812 aapcs_ ## X ## _allocate, \
6813 aapcs_ ## X ## _is_return_candidate, \
6814 aapcs_ ## X ## _allocate_return_reg, \
6815 aapcs_ ## X ## _advance \
6816 }
6817
6818 /* Table of co-processors that can be used to pass arguments in
6819 registers. Idealy no arugment should be a candidate for more than
6820 one co-processor table entry, but the table is processed in order
6821 and stops after the first match. If that entry then fails to put
6822 the argument into a co-processor register, the argument will go on
6823 the stack. */
6824 static struct
6825 {
6826 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6827 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6828
6829 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6830 BLKmode) is a candidate for this co-processor's registers; this
6831 function should ignore any position-dependent state in
6832 CUMULATIVE_ARGS and only use call-type dependent information. */
6833 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6834
6835 /* Return true if the argument does get a co-processor register; it
6836 should set aapcs_reg to an RTX of the register allocated as is
6837 required for a return from FUNCTION_ARG. */
6838 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6839
6840 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6841 be returned in this co-processor's registers. */
6842 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6843
6844 /* Allocate and return an RTX element to hold the return type of a call. This
6845 routine must not fail and will only be called if is_return_candidate
6846 returned true with the same parameters. */
6847 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6848
6849 /* Finish processing this argument and prepare to start processing
6850 the next one. */
6851 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6852 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6853 {
6854 AAPCS_CP(vfp)
6855 };
6856
6857 #undef AAPCS_CP
6858
6859 static int
6860 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6861 const_tree type)
6862 {
6863 int i;
6864
6865 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6866 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6867 return i;
6868
6869 return -1;
6870 }
6871
6872 static int
6873 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6874 {
6875 /* We aren't passed a decl, so we can't check that a call is local.
6876 However, it isn't clear that that would be a win anyway, since it
6877 might limit some tail-calling opportunities. */
6878 enum arm_pcs pcs_variant;
6879
6880 if (fntype)
6881 {
6882 const_tree fndecl = NULL_TREE;
6883
6884 if (TREE_CODE (fntype) == FUNCTION_DECL)
6885 {
6886 fndecl = fntype;
6887 fntype = TREE_TYPE (fntype);
6888 }
6889
6890 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6891 }
6892 else
6893 pcs_variant = arm_pcs_default;
6894
6895 if (pcs_variant != ARM_PCS_AAPCS)
6896 {
6897 int i;
6898
6899 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6900 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6901 TYPE_MODE (type),
6902 type))
6903 return i;
6904 }
6905 return -1;
6906 }
6907
6908 static rtx
6909 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6910 const_tree fntype)
6911 {
6912 /* We aren't passed a decl, so we can't check that a call is local.
6913 However, it isn't clear that that would be a win anyway, since it
6914 might limit some tail-calling opportunities. */
6915 enum arm_pcs pcs_variant;
6916 int unsignedp ATTRIBUTE_UNUSED;
6917
6918 if (fntype)
6919 {
6920 const_tree fndecl = NULL_TREE;
6921
6922 if (TREE_CODE (fntype) == FUNCTION_DECL)
6923 {
6924 fndecl = fntype;
6925 fntype = TREE_TYPE (fntype);
6926 }
6927
6928 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6929 }
6930 else
6931 pcs_variant = arm_pcs_default;
6932
6933 /* Promote integer types. */
6934 if (type && INTEGRAL_TYPE_P (type))
6935 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6936
6937 if (pcs_variant != ARM_PCS_AAPCS)
6938 {
6939 int i;
6940
6941 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6942 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6943 type))
6944 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6945 mode, type);
6946 }
6947
6948 /* Promotes small structs returned in a register to full-word size
6949 for big-endian AAPCS. */
6950 if (type && arm_return_in_msb (type))
6951 {
6952 HOST_WIDE_INT size = int_size_in_bytes (type);
6953 if (size % UNITS_PER_WORD != 0)
6954 {
6955 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6956 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6957 }
6958 }
6959
6960 return gen_rtx_REG (mode, R0_REGNUM);
6961 }
6962
6963 static rtx
6964 aapcs_libcall_value (machine_mode mode)
6965 {
6966 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6967 && GET_MODE_SIZE (mode) <= 4)
6968 mode = SImode;
6969
6970 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6971 }
6972
6973 /* Lay out a function argument using the AAPCS rules. The rule
6974 numbers referred to here are those in the AAPCS. */
6975 static void
6976 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6977 const_tree type, bool named)
6978 {
6979 int nregs, nregs2;
6980 int ncrn;
6981
6982 /* We only need to do this once per argument. */
6983 if (pcum->aapcs_arg_processed)
6984 return;
6985
6986 pcum->aapcs_arg_processed = true;
6987
6988 /* Special case: if named is false then we are handling an incoming
6989 anonymous argument which is on the stack. */
6990 if (!named)
6991 return;
6992
6993 /* Is this a potential co-processor register candidate? */
6994 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6995 {
6996 int slot = aapcs_select_call_coproc (pcum, mode, type);
6997 pcum->aapcs_cprc_slot = slot;
6998
6999 /* We don't have to apply any of the rules from part B of the
7000 preparation phase, these are handled elsewhere in the
7001 compiler. */
7002
7003 if (slot >= 0)
7004 {
7005 /* A Co-processor register candidate goes either in its own
7006 class of registers or on the stack. */
7007 if (!pcum->aapcs_cprc_failed[slot])
7008 {
7009 /* C1.cp - Try to allocate the argument to co-processor
7010 registers. */
7011 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
7012 return;
7013
7014 /* C2.cp - Put the argument on the stack and note that we
7015 can't assign any more candidates in this slot. We also
7016 need to note that we have allocated stack space, so that
7017 we won't later try to split a non-cprc candidate between
7018 core registers and the stack. */
7019 pcum->aapcs_cprc_failed[slot] = true;
7020 pcum->can_split = false;
7021 }
7022
7023 /* We didn't get a register, so this argument goes on the
7024 stack. */
7025 gcc_assert (pcum->can_split == false);
7026 return;
7027 }
7028 }
7029
7030 /* C3 - For double-word aligned arguments, round the NCRN up to the
7031 next even number. */
7032 ncrn = pcum->aapcs_ncrn;
7033 if (ncrn & 1)
7034 {
7035 int res = arm_needs_doubleword_align (mode, type);
7036 /* Only warn during RTL expansion of call stmts, otherwise we would
7037 warn e.g. during gimplification even on functions that will be
7038 always inlined, and we'd warn multiple times. Don't warn when
7039 called in expand_function_start either, as we warn instead in
7040 arm_function_arg_boundary in that case. */
7041 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
7042 inform (input_location, "parameter passing for argument of type "
7043 "%qT changed in GCC 7.1", type);
7044 else if (res > 0)
7045 ncrn++;
7046 }
7047
7048 nregs = ARM_NUM_REGS2(mode, type);
7049
7050 /* Sigh, this test should really assert that nregs > 0, but a GCC
7051 extension allows empty structs and then gives them empty size; it
7052 then allows such a structure to be passed by value. For some of
7053 the code below we have to pretend that such an argument has
7054 non-zero size so that we 'locate' it correctly either in
7055 registers or on the stack. */
7056 gcc_assert (nregs >= 0);
7057
7058 nregs2 = nregs ? nregs : 1;
7059
7060 /* C4 - Argument fits entirely in core registers. */
7061 if (ncrn + nregs2 <= NUM_ARG_REGS)
7062 {
7063 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7064 pcum->aapcs_next_ncrn = ncrn + nregs;
7065 return;
7066 }
7067
7068 /* C5 - Some core registers left and there are no arguments already
7069 on the stack: split this argument between the remaining core
7070 registers and the stack. */
7071 if (ncrn < NUM_ARG_REGS && pcum->can_split)
7072 {
7073 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7074 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7075 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
7076 return;
7077 }
7078
7079 /* C6 - NCRN is set to 4. */
7080 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7081
7082 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
7083 return;
7084 }
7085
7086 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7087 for a call to a function whose data type is FNTYPE.
7088 For a library call, FNTYPE is NULL. */
7089 void
7090 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
7091 rtx libname,
7092 tree fndecl ATTRIBUTE_UNUSED)
7093 {
7094 /* Long call handling. */
7095 if (fntype)
7096 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
7097 else
7098 pcum->pcs_variant = arm_pcs_default;
7099
7100 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7101 {
7102 if (arm_libcall_uses_aapcs_base (libname))
7103 pcum->pcs_variant = ARM_PCS_AAPCS;
7104
7105 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
7106 pcum->aapcs_reg = NULL_RTX;
7107 pcum->aapcs_partial = 0;
7108 pcum->aapcs_arg_processed = false;
7109 pcum->aapcs_cprc_slot = -1;
7110 pcum->can_split = true;
7111
7112 if (pcum->pcs_variant != ARM_PCS_AAPCS)
7113 {
7114 int i;
7115
7116 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7117 {
7118 pcum->aapcs_cprc_failed[i] = false;
7119 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
7120 }
7121 }
7122 return;
7123 }
7124
7125 /* Legacy ABIs */
7126
7127 /* On the ARM, the offset starts at 0. */
7128 pcum->nregs = 0;
7129 pcum->iwmmxt_nregs = 0;
7130 pcum->can_split = true;
7131
7132 /* Varargs vectors are treated the same as long long.
7133 named_count avoids having to change the way arm handles 'named' */
7134 pcum->named_count = 0;
7135 pcum->nargs = 0;
7136
7137 if (TARGET_REALLY_IWMMXT && fntype)
7138 {
7139 tree fn_arg;
7140
7141 for (fn_arg = TYPE_ARG_TYPES (fntype);
7142 fn_arg;
7143 fn_arg = TREE_CHAIN (fn_arg))
7144 pcum->named_count += 1;
7145
7146 if (! pcum->named_count)
7147 pcum->named_count = INT_MAX;
7148 }
7149 }
7150
7151 /* Return 2 if double word alignment is required for argument passing,
7152 but wasn't required before the fix for PR88469.
7153 Return 1 if double word alignment is required for argument passing.
7154 Return -1 if double word alignment used to be required for argument
7155 passing before PR77728 ABI fix, but is not required anymore.
7156 Return 0 if double word alignment is not required and wasn't requried
7157 before either. */
7158 static int
7159 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7160 {
7161 if (!type)
7162 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7163
7164 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
7165 if (!AGGREGATE_TYPE_P (type))
7166 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7167
7168 /* Array types: Use member alignment of element type. */
7169 if (TREE_CODE (type) == ARRAY_TYPE)
7170 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7171
7172 int ret = 0;
7173 int ret2 = 0;
7174 /* Record/aggregate types: Use greatest member alignment of any member.
7175
7176 Note that we explicitly consider zero-sized fields here, even though
7177 they don't map to AAPCS machine types. For example, in:
7178
7179 struct __attribute__((aligned(8))) empty {};
7180
7181 struct s {
7182 [[no_unique_address]] empty e;
7183 int x;
7184 };
7185
7186 "s" contains only one Fundamental Data Type (the int field)
7187 but gains 8-byte alignment and size thanks to "e". */
7188 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7189 if (DECL_ALIGN (field) > PARM_BOUNDARY)
7190 {
7191 if (TREE_CODE (field) == FIELD_DECL)
7192 return 1;
7193 else
7194 /* Before PR77728 fix, we were incorrectly considering also
7195 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7196 Make sure we can warn about that with -Wpsabi. */
7197 ret = -1;
7198 }
7199 else if (TREE_CODE (field) == FIELD_DECL
7200 && DECL_BIT_FIELD_TYPE (field)
7201 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7202 ret2 = 1;
7203
7204 if (ret2)
7205 return 2;
7206
7207 return ret;
7208 }
7209
7210
7211 /* Determine where to put an argument to a function.
7212 Value is zero to push the argument on the stack,
7213 or a hard register in which to store the argument.
7214
7215 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7216 the preceding args and about the function being called.
7217 ARG is a description of the argument.
7218
7219 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7220 other arguments are passed on the stack. If (NAMED == 0) (which happens
7221 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7222 defined), say it is passed in the stack (function_prologue will
7223 indeed make it pass in the stack if necessary). */
7224
7225 static rtx
7226 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7227 {
7228 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7229 int nregs;
7230
7231 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7232 a call insn (op3 of a call_value insn). */
7233 if (arg.end_marker_p ())
7234 return const0_rtx;
7235
7236 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7237 {
7238 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7239 return pcum->aapcs_reg;
7240 }
7241
7242 /* Varargs vectors are treated the same as long long.
7243 named_count avoids having to change the way arm handles 'named' */
7244 if (TARGET_IWMMXT_ABI
7245 && arm_vector_mode_supported_p (arg.mode)
7246 && pcum->named_count > pcum->nargs + 1)
7247 {
7248 if (pcum->iwmmxt_nregs <= 9)
7249 return gen_rtx_REG (arg.mode,
7250 pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7251 else
7252 {
7253 pcum->can_split = false;
7254 return NULL_RTX;
7255 }
7256 }
7257
7258 /* Put doubleword aligned quantities in even register pairs. */
7259 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7260 {
7261 int res = arm_needs_doubleword_align (arg.mode, arg.type);
7262 if (res < 0 && warn_psabi)
7263 inform (input_location, "parameter passing for argument of type "
7264 "%qT changed in GCC 7.1", arg.type);
7265 else if (res > 0)
7266 {
7267 pcum->nregs++;
7268 if (res > 1 && warn_psabi)
7269 inform (input_location, "parameter passing for argument of type "
7270 "%qT changed in GCC 9.1", arg.type);
7271 }
7272 }
7273
7274 /* Only allow splitting an arg between regs and memory if all preceding
7275 args were allocated to regs. For args passed by reference we only count
7276 the reference pointer. */
7277 if (pcum->can_split)
7278 nregs = 1;
7279 else
7280 nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7281
7282 if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7283 return NULL_RTX;
7284
7285 return gen_rtx_REG (arg.mode, pcum->nregs);
7286 }
7287
7288 static unsigned int
7289 arm_function_arg_boundary (machine_mode mode, const_tree type)
7290 {
7291 if (!ARM_DOUBLEWORD_ALIGN)
7292 return PARM_BOUNDARY;
7293
7294 int res = arm_needs_doubleword_align (mode, type);
7295 if (res < 0 && warn_psabi)
7296 inform (input_location, "parameter passing for argument of type %qT "
7297 "changed in GCC 7.1", type);
7298 if (res > 1 && warn_psabi)
7299 inform (input_location, "parameter passing for argument of type "
7300 "%qT changed in GCC 9.1", type);
7301
7302 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7303 }
7304
7305 static int
7306 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7307 {
7308 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7309 int nregs = pcum->nregs;
7310
7311 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7312 {
7313 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7314 return pcum->aapcs_partial;
7315 }
7316
7317 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7318 return 0;
7319
7320 if (NUM_ARG_REGS > nregs
7321 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7322 && pcum->can_split)
7323 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7324
7325 return 0;
7326 }
7327
7328 /* Update the data in PCUM to advance over argument ARG. */
7329
7330 static void
7331 arm_function_arg_advance (cumulative_args_t pcum_v,
7332 const function_arg_info &arg)
7333 {
7334 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7335
7336 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7337 {
7338 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7339
7340 if (pcum->aapcs_cprc_slot >= 0)
7341 {
7342 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7343 arg.type);
7344 pcum->aapcs_cprc_slot = -1;
7345 }
7346
7347 /* Generic stuff. */
7348 pcum->aapcs_arg_processed = false;
7349 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7350 pcum->aapcs_reg = NULL_RTX;
7351 pcum->aapcs_partial = 0;
7352 }
7353 else
7354 {
7355 pcum->nargs += 1;
7356 if (arm_vector_mode_supported_p (arg.mode)
7357 && pcum->named_count > pcum->nargs
7358 && TARGET_IWMMXT_ABI)
7359 pcum->iwmmxt_nregs += 1;
7360 else
7361 pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7362 }
7363 }
7364
7365 /* Variable sized types are passed by reference. This is a GCC
7366 extension to the ARM ABI. */
7367
7368 static bool
7369 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7370 {
7371 return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7372 }
7373 \f
7374 /* Encode the current state of the #pragma [no_]long_calls. */
7375 typedef enum
7376 {
7377 OFF, /* No #pragma [no_]long_calls is in effect. */
7378 LONG, /* #pragma long_calls is in effect. */
7379 SHORT /* #pragma no_long_calls is in effect. */
7380 } arm_pragma_enum;
7381
7382 static arm_pragma_enum arm_pragma_long_calls = OFF;
7383
7384 void
7385 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7386 {
7387 arm_pragma_long_calls = LONG;
7388 }
7389
7390 void
7391 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7392 {
7393 arm_pragma_long_calls = SHORT;
7394 }
7395
7396 void
7397 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7398 {
7399 arm_pragma_long_calls = OFF;
7400 }
7401 \f
7402 /* Handle an attribute requiring a FUNCTION_DECL;
7403 arguments as in struct attribute_spec.handler. */
7404 static tree
7405 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7406 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7407 {
7408 if (TREE_CODE (*node) != FUNCTION_DECL)
7409 {
7410 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7411 name);
7412 *no_add_attrs = true;
7413 }
7414
7415 return NULL_TREE;
7416 }
7417
7418 /* Handle an "interrupt" or "isr" attribute;
7419 arguments as in struct attribute_spec.handler. */
7420 static tree
7421 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7422 bool *no_add_attrs)
7423 {
7424 if (DECL_P (*node))
7425 {
7426 if (TREE_CODE (*node) != FUNCTION_DECL)
7427 {
7428 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7429 name);
7430 *no_add_attrs = true;
7431 }
7432 else if (TARGET_VFP_BASE)
7433 {
7434 warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7435 name);
7436 }
7437 /* FIXME: the argument if any is checked for type attributes;
7438 should it be checked for decl ones? */
7439 }
7440 else
7441 {
7442 if (TREE_CODE (*node) == FUNCTION_TYPE
7443 || TREE_CODE (*node) == METHOD_TYPE)
7444 {
7445 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7446 {
7447 warning (OPT_Wattributes, "%qE attribute ignored",
7448 name);
7449 *no_add_attrs = true;
7450 }
7451 }
7452 else if (TREE_CODE (*node) == POINTER_TYPE
7453 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
7454 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
7455 && arm_isr_value (args) != ARM_FT_UNKNOWN)
7456 {
7457 *node = build_variant_type_copy (*node);
7458 TREE_TYPE (*node) = build_type_attribute_variant
7459 (TREE_TYPE (*node),
7460 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7461 *no_add_attrs = true;
7462 }
7463 else
7464 {
7465 /* Possibly pass this attribute on from the type to a decl. */
7466 if (flags & ((int) ATTR_FLAG_DECL_NEXT
7467 | (int) ATTR_FLAG_FUNCTION_NEXT
7468 | (int) ATTR_FLAG_ARRAY_NEXT))
7469 {
7470 *no_add_attrs = true;
7471 return tree_cons (name, args, NULL_TREE);
7472 }
7473 else
7474 {
7475 warning (OPT_Wattributes, "%qE attribute ignored",
7476 name);
7477 }
7478 }
7479 }
7480
7481 return NULL_TREE;
7482 }
7483
7484 /* Handle a "pcs" attribute; arguments as in struct
7485 attribute_spec.handler. */
7486 static tree
7487 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7488 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7489 {
7490 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7491 {
7492 warning (OPT_Wattributes, "%qE attribute ignored", name);
7493 *no_add_attrs = true;
7494 }
7495 return NULL_TREE;
7496 }
7497
7498 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7499 /* Handle the "notshared" attribute. This attribute is another way of
7500 requesting hidden visibility. ARM's compiler supports
7501 "__declspec(notshared)"; we support the same thing via an
7502 attribute. */
7503
7504 static tree
7505 arm_handle_notshared_attribute (tree *node,
7506 tree name ATTRIBUTE_UNUSED,
7507 tree args ATTRIBUTE_UNUSED,
7508 int flags ATTRIBUTE_UNUSED,
7509 bool *no_add_attrs)
7510 {
7511 tree decl = TYPE_NAME (*node);
7512
7513 if (decl)
7514 {
7515 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7516 DECL_VISIBILITY_SPECIFIED (decl) = 1;
7517 *no_add_attrs = false;
7518 }
7519 return NULL_TREE;
7520 }
7521 #endif
7522
7523 /* This function returns true if a function with declaration FNDECL and type
7524 FNTYPE uses the stack to pass arguments or return variables and false
7525 otherwise. This is used for functions with the attributes
7526 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7527 diagnostic messages if the stack is used. NAME is the name of the attribute
7528 used. */
7529
7530 static bool
7531 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7532 {
7533 function_args_iterator args_iter;
7534 CUMULATIVE_ARGS args_so_far_v;
7535 cumulative_args_t args_so_far;
7536 bool first_param = true;
7537 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7538
7539 /* Error out if any argument is passed on the stack. */
7540 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7541 args_so_far = pack_cumulative_args (&args_so_far_v);
7542 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7543 {
7544 rtx arg_rtx;
7545
7546 prev_arg_type = arg_type;
7547 if (VOID_TYPE_P (arg_type))
7548 continue;
7549
7550 function_arg_info arg (arg_type, /*named=*/true);
7551 if (!first_param)
7552 /* ??? We should advance after processing the argument and pass
7553 the argument we're advancing past. */
7554 arm_function_arg_advance (args_so_far, arg);
7555 arg_rtx = arm_function_arg (args_so_far, arg);
7556 if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7557 {
7558 error ("%qE attribute not available to functions with arguments "
7559 "passed on the stack", name);
7560 return true;
7561 }
7562 first_param = false;
7563 }
7564
7565 /* Error out for variadic functions since we cannot control how many
7566 arguments will be passed and thus stack could be used. stdarg_p () is not
7567 used for the checking to avoid browsing arguments twice. */
7568 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7569 {
7570 error ("%qE attribute not available to functions with variable number "
7571 "of arguments", name);
7572 return true;
7573 }
7574
7575 /* Error out if return value is passed on the stack. */
7576 ret_type = TREE_TYPE (fntype);
7577 if (arm_return_in_memory (ret_type, fntype))
7578 {
7579 error ("%qE attribute not available to functions that return value on "
7580 "the stack", name);
7581 return true;
7582 }
7583 return false;
7584 }
7585
7586 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7587 function will check whether the attribute is allowed here and will add the
7588 attribute to the function declaration tree or otherwise issue a warning. */
7589
7590 static tree
7591 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7592 tree /* args */,
7593 int /* flags */,
7594 bool *no_add_attrs)
7595 {
7596 tree fndecl;
7597
7598 if (!use_cmse)
7599 {
7600 *no_add_attrs = true;
7601 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7602 "option", name);
7603 return NULL_TREE;
7604 }
7605
7606 /* Ignore attribute for function types. */
7607 if (TREE_CODE (*node) != FUNCTION_DECL)
7608 {
7609 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7610 name);
7611 *no_add_attrs = true;
7612 return NULL_TREE;
7613 }
7614
7615 fndecl = *node;
7616
7617 /* Warn for static linkage functions. */
7618 if (!TREE_PUBLIC (fndecl))
7619 {
7620 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7621 "with static linkage", name);
7622 *no_add_attrs = true;
7623 return NULL_TREE;
7624 }
7625
7626 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7627 TREE_TYPE (fndecl));
7628 return NULL_TREE;
7629 }
7630
7631
7632 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7633 function will check whether the attribute is allowed here and will add the
7634 attribute to the function type tree or otherwise issue a diagnostic. The
7635 reason we check this at declaration time is to only allow the use of the
7636 attribute with declarations of function pointers and not function
7637 declarations. This function checks NODE is of the expected type and issues
7638 diagnostics otherwise using NAME. If it is not of the expected type
7639 *NO_ADD_ATTRS will be set to true. */
7640
7641 static tree
7642 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7643 tree /* args */,
7644 int /* flags */,
7645 bool *no_add_attrs)
7646 {
7647 tree decl = NULL_TREE;
7648 tree fntype, type;
7649
7650 if (!use_cmse)
7651 {
7652 *no_add_attrs = true;
7653 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7654 "option", name);
7655 return NULL_TREE;
7656 }
7657
7658 if (DECL_P (*node))
7659 {
7660 fntype = TREE_TYPE (*node);
7661
7662 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7663 decl = *node;
7664 }
7665 else
7666 fntype = *node;
7667
7668 while (fntype && TREE_CODE (fntype) == POINTER_TYPE)
7669 fntype = TREE_TYPE (fntype);
7670
7671 if ((DECL_P (*node) && !decl) || TREE_CODE (fntype) != FUNCTION_TYPE)
7672 {
7673 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7674 "function pointer", name);
7675 *no_add_attrs = true;
7676 return NULL_TREE;
7677 }
7678
7679 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7680
7681 if (*no_add_attrs)
7682 return NULL_TREE;
7683
7684 /* Prevent trees being shared among function types with and without
7685 cmse_nonsecure_call attribute. */
7686 if (decl)
7687 {
7688 type = build_distinct_type_copy (TREE_TYPE (decl));
7689 TREE_TYPE (decl) = type;
7690 }
7691 else
7692 {
7693 type = build_distinct_type_copy (*node);
7694 *node = type;
7695 }
7696
7697 fntype = type;
7698
7699 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7700 {
7701 type = fntype;
7702 fntype = TREE_TYPE (fntype);
7703 fntype = build_distinct_type_copy (fntype);
7704 TREE_TYPE (type) = fntype;
7705 }
7706
7707 /* Construct a type attribute and add it to the function type. */
7708 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7709 TYPE_ATTRIBUTES (fntype));
7710 TYPE_ATTRIBUTES (fntype) = attrs;
7711 return NULL_TREE;
7712 }
7713
7714 /* Return 0 if the attributes for two types are incompatible, 1 if they
7715 are compatible, and 2 if they are nearly compatible (which causes a
7716 warning to be generated). */
7717 static int
7718 arm_comp_type_attributes (const_tree type1, const_tree type2)
7719 {
7720 int l1, l2, s1, s2;
7721
7722 tree attrs1 = lookup_attribute ("Advanced SIMD type",
7723 TYPE_ATTRIBUTES (type1));
7724 tree attrs2 = lookup_attribute ("Advanced SIMD type",
7725 TYPE_ATTRIBUTES (type2));
7726 if (bool (attrs1) != bool (attrs2))
7727 return 0;
7728 if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7729 return 0;
7730
7731 /* Check for mismatch of non-default calling convention. */
7732 if (TREE_CODE (type1) != FUNCTION_TYPE)
7733 return 1;
7734
7735 /* Check for mismatched call attributes. */
7736 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7737 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7738 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7739 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7740
7741 /* Only bother to check if an attribute is defined. */
7742 if (l1 | l2 | s1 | s2)
7743 {
7744 /* If one type has an attribute, the other must have the same attribute. */
7745 if ((l1 != l2) || (s1 != s2))
7746 return 0;
7747
7748 /* Disallow mixed attributes. */
7749 if ((l1 & s2) || (l2 & s1))
7750 return 0;
7751 }
7752
7753 /* Check for mismatched ISR attribute. */
7754 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7755 if (! l1)
7756 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7757 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7758 if (! l2)
7759 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7760 if (l1 != l2)
7761 return 0;
7762
7763 l1 = lookup_attribute ("cmse_nonsecure_call",
7764 TYPE_ATTRIBUTES (type1)) != NULL;
7765 l2 = lookup_attribute ("cmse_nonsecure_call",
7766 TYPE_ATTRIBUTES (type2)) != NULL;
7767
7768 if (l1 != l2)
7769 return 0;
7770
7771 return 1;
7772 }
7773
7774 /* Assigns default attributes to newly defined type. This is used to
7775 set short_call/long_call attributes for function types of
7776 functions defined inside corresponding #pragma scopes. */
7777 static void
7778 arm_set_default_type_attributes (tree type)
7779 {
7780 /* Add __attribute__ ((long_call)) to all functions, when
7781 inside #pragma long_calls or __attribute__ ((short_call)),
7782 when inside #pragma no_long_calls. */
7783 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7784 {
7785 tree type_attr_list, attr_name;
7786 type_attr_list = TYPE_ATTRIBUTES (type);
7787
7788 if (arm_pragma_long_calls == LONG)
7789 attr_name = get_identifier ("long_call");
7790 else if (arm_pragma_long_calls == SHORT)
7791 attr_name = get_identifier ("short_call");
7792 else
7793 return;
7794
7795 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7796 TYPE_ATTRIBUTES (type) = type_attr_list;
7797 }
7798 }
7799 \f
7800 /* Return true if DECL is known to be linked into section SECTION. */
7801
7802 static bool
7803 arm_function_in_section_p (tree decl, section *section)
7804 {
7805 /* We can only be certain about the prevailing symbol definition. */
7806 if (!decl_binds_to_current_def_p (decl))
7807 return false;
7808
7809 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7810 if (!DECL_SECTION_NAME (decl))
7811 {
7812 /* Make sure that we will not create a unique section for DECL. */
7813 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7814 return false;
7815 }
7816
7817 return function_section (decl) == section;
7818 }
7819
7820 /* Return nonzero if a 32-bit "long_call" should be generated for
7821 a call from the current function to DECL. We generate a long_call
7822 if the function:
7823
7824 a. has an __attribute__((long call))
7825 or b. is within the scope of a #pragma long_calls
7826 or c. the -mlong-calls command line switch has been specified
7827
7828 However we do not generate a long call if the function:
7829
7830 d. has an __attribute__ ((short_call))
7831 or e. is inside the scope of a #pragma no_long_calls
7832 or f. is defined in the same section as the current function. */
7833
7834 bool
7835 arm_is_long_call_p (tree decl)
7836 {
7837 tree attrs;
7838
7839 if (!decl)
7840 return TARGET_LONG_CALLS;
7841
7842 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7843 if (lookup_attribute ("short_call", attrs))
7844 return false;
7845
7846 /* For "f", be conservative, and only cater for cases in which the
7847 whole of the current function is placed in the same section. */
7848 if (!flag_reorder_blocks_and_partition
7849 && TREE_CODE (decl) == FUNCTION_DECL
7850 && arm_function_in_section_p (decl, current_function_section ()))
7851 return false;
7852
7853 if (lookup_attribute ("long_call", attrs))
7854 return true;
7855
7856 return TARGET_LONG_CALLS;
7857 }
7858
7859 /* Return nonzero if it is ok to make a tail-call to DECL. */
7860 static bool
7861 arm_function_ok_for_sibcall (tree decl, tree exp)
7862 {
7863 unsigned long func_type;
7864
7865 if (cfun->machine->sibcall_blocked)
7866 return false;
7867
7868 if (TARGET_FDPIC)
7869 {
7870 /* In FDPIC, never tailcall something for which we have no decl:
7871 the target function could be in a different module, requiring
7872 a different FDPIC register value. */
7873 if (decl == NULL)
7874 return false;
7875 }
7876
7877 /* Never tailcall something if we are generating code for Thumb-1. */
7878 if (TARGET_THUMB1)
7879 return false;
7880
7881 /* The PIC register is live on entry to VxWorks PLT entries, so we
7882 must make the call before restoring the PIC register. */
7883 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7884 return false;
7885
7886 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7887 may be used both as target of the call and base register for restoring
7888 the VFP registers */
7889 if (TARGET_APCS_FRAME && TARGET_ARM
7890 && TARGET_HARD_FLOAT
7891 && decl && arm_is_long_call_p (decl))
7892 return false;
7893
7894 /* If we are interworking and the function is not declared static
7895 then we can't tail-call it unless we know that it exists in this
7896 compilation unit (since it might be a Thumb routine). */
7897 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7898 && !TREE_ASM_WRITTEN (decl))
7899 return false;
7900
7901 func_type = arm_current_func_type ();
7902 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7903 if (IS_INTERRUPT (func_type))
7904 return false;
7905
7906 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7907 generated for entry functions themselves. */
7908 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7909 return false;
7910
7911 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7912 this would complicate matters for later code generation. */
7913 if (TREE_CODE (exp) == CALL_EXPR)
7914 {
7915 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7916 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7917 return false;
7918 }
7919
7920 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7921 {
7922 /* Check that the return value locations are the same. For
7923 example that we aren't returning a value from the sibling in
7924 a VFP register but then need to transfer it to a core
7925 register. */
7926 rtx a, b;
7927 tree decl_or_type = decl;
7928
7929 /* If it is an indirect function pointer, get the function type. */
7930 if (!decl)
7931 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7932
7933 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7934 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7935 cfun->decl, false);
7936 if (!rtx_equal_p (a, b))
7937 return false;
7938 }
7939
7940 /* Never tailcall if function may be called with a misaligned SP. */
7941 if (IS_STACKALIGN (func_type))
7942 return false;
7943
7944 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7945 references should become a NOP. Don't convert such calls into
7946 sibling calls. */
7947 if (TARGET_AAPCS_BASED
7948 && arm_abi == ARM_ABI_AAPCS
7949 && decl
7950 && DECL_WEAK (decl))
7951 return false;
7952
7953 /* We cannot do a tailcall for an indirect call by descriptor if all the
7954 argument registers are used because the only register left to load the
7955 address is IP and it will already contain the static chain. */
7956 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7957 {
7958 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7959 CUMULATIVE_ARGS cum;
7960 cumulative_args_t cum_v;
7961
7962 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7963 cum_v = pack_cumulative_args (&cum);
7964
7965 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7966 {
7967 tree type = TREE_VALUE (t);
7968 if (!VOID_TYPE_P (type))
7969 {
7970 function_arg_info arg (type, /*named=*/true);
7971 arm_function_arg_advance (cum_v, arg);
7972 }
7973 }
7974
7975 function_arg_info arg (integer_type_node, /*named=*/true);
7976 if (!arm_function_arg (cum_v, arg))
7977 return false;
7978 }
7979
7980 /* Everything else is ok. */
7981 return true;
7982 }
7983
7984 \f
7985 /* Addressing mode support functions. */
7986
7987 /* Return nonzero if X is a legitimate immediate operand when compiling
7988 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7989 int
7990 legitimate_pic_operand_p (rtx x)
7991 {
7992 if (SYMBOL_REF_P (x)
7993 || (GET_CODE (x) == CONST
7994 && GET_CODE (XEXP (x, 0)) == PLUS
7995 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7996 return 0;
7997
7998 return 1;
7999 }
8000
8001 /* Record that the current function needs a PIC register. If PIC_REG is null,
8002 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
8003 both case cfun->machine->pic_reg is initialized if we have not already done
8004 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
8005 PIC register is reloaded in the current position of the instruction stream
8006 irregardless of whether it was loaded before. Otherwise, it is only loaded
8007 if not already done so (crtl->uses_pic_offset_table is null). Note that
8008 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
8009 is only supported iff COMPUTE_NOW is false. */
8010
8011 static void
8012 require_pic_register (rtx pic_reg, bool compute_now)
8013 {
8014 gcc_assert (compute_now == (pic_reg != NULL_RTX));
8015
8016 /* A lot of the logic here is made obscure by the fact that this
8017 routine gets called as part of the rtx cost estimation process.
8018 We don't want those calls to affect any assumptions about the real
8019 function; and further, we can't call entry_of_function() until we
8020 start the real expansion process. */
8021 if (!crtl->uses_pic_offset_table || compute_now)
8022 {
8023 gcc_assert (can_create_pseudo_p ()
8024 || (pic_reg != NULL_RTX
8025 && REG_P (pic_reg)
8026 && GET_MODE (pic_reg) == Pmode));
8027 if (arm_pic_register != INVALID_REGNUM
8028 && !compute_now
8029 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
8030 {
8031 if (!cfun->machine->pic_reg)
8032 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
8033
8034 /* Play games to avoid marking the function as needing pic
8035 if we are being called as part of the cost-estimation
8036 process. */
8037 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8038 crtl->uses_pic_offset_table = 1;
8039 }
8040 else
8041 {
8042 rtx_insn *seq, *insn;
8043
8044 if (pic_reg == NULL_RTX)
8045 pic_reg = gen_reg_rtx (Pmode);
8046 if (!cfun->machine->pic_reg)
8047 cfun->machine->pic_reg = pic_reg;
8048
8049 /* Play games to avoid marking the function as needing pic
8050 if we are being called as part of the cost-estimation
8051 process. */
8052 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8053 {
8054 crtl->uses_pic_offset_table = 1;
8055 start_sequence ();
8056
8057 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
8058 && arm_pic_register > LAST_LO_REGNUM
8059 && !compute_now)
8060 emit_move_insn (cfun->machine->pic_reg,
8061 gen_rtx_REG (Pmode, arm_pic_register));
8062 else
8063 arm_load_pic_register (0UL, pic_reg);
8064
8065 seq = get_insns ();
8066 end_sequence ();
8067
8068 for (insn = seq; insn; insn = NEXT_INSN (insn))
8069 if (INSN_P (insn))
8070 INSN_LOCATION (insn) = prologue_location;
8071
8072 /* We can be called during expansion of PHI nodes, where
8073 we can't yet emit instructions directly in the final
8074 insn stream. Queue the insns on the entry edge, they will
8075 be committed after everything else is expanded. */
8076 if (currently_expanding_to_rtl)
8077 insert_insn_on_edge (seq,
8078 single_succ_edge
8079 (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
8080 else
8081 emit_insn (seq);
8082 }
8083 }
8084 }
8085 }
8086
8087 /* Generate insns to calculate the address of ORIG in pic mode. */
8088 static rtx_insn *
8089 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
8090 {
8091 rtx pat;
8092 rtx mem;
8093
8094 pat = gen_calculate_pic_address (reg, pic_reg, orig);
8095
8096 /* Make the MEM as close to a constant as possible. */
8097 mem = SET_SRC (pat);
8098 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
8099 MEM_READONLY_P (mem) = 1;
8100 MEM_NOTRAP_P (mem) = 1;
8101
8102 return emit_insn (pat);
8103 }
8104
8105 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
8106 created to hold the result of the load. If not NULL, PIC_REG indicates
8107 which register to use as PIC register, otherwise it is decided by register
8108 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
8109 location in the instruction stream, irregardless of whether it was loaded
8110 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8111 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8112
8113 Returns the register REG into which the PIC load is performed. */
8114
8115 rtx
8116 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
8117 bool compute_now)
8118 {
8119 gcc_assert (compute_now == (pic_reg != NULL_RTX));
8120
8121 if (SYMBOL_REF_P (orig)
8122 || LABEL_REF_P (orig))
8123 {
8124 if (reg == 0)
8125 {
8126 gcc_assert (can_create_pseudo_p ());
8127 reg = gen_reg_rtx (Pmode);
8128 }
8129
8130 /* VxWorks does not impose a fixed gap between segments; the run-time
8131 gap can be different from the object-file gap. We therefore can't
8132 use GOTOFF unless we are absolutely sure that the symbol is in the
8133 same segment as the GOT. Unfortunately, the flexibility of linker
8134 scripts means that we can't be sure of that in general, so assume
8135 that GOTOFF is never valid on VxWorks. */
8136 /* References to weak symbols cannot be resolved locally: they
8137 may be overridden by a non-weak definition at link time. */
8138 rtx_insn *insn;
8139 if ((LABEL_REF_P (orig)
8140 || (SYMBOL_REF_P (orig)
8141 && SYMBOL_REF_LOCAL_P (orig)
8142 && (SYMBOL_REF_DECL (orig)
8143 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
8144 && (!SYMBOL_REF_FUNCTION_P (orig)
8145 || arm_fdpic_local_funcdesc_p (orig))))
8146 && NEED_GOT_RELOC
8147 && arm_pic_data_is_text_relative)
8148 insn = arm_pic_static_addr (orig, reg);
8149 else
8150 {
8151 /* If this function doesn't have a pic register, create one now. */
8152 require_pic_register (pic_reg, compute_now);
8153
8154 if (pic_reg == NULL_RTX)
8155 pic_reg = cfun->machine->pic_reg;
8156
8157 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8158 }
8159
8160 /* Put a REG_EQUAL note on this insn, so that it can be optimized
8161 by loop. */
8162 set_unique_reg_note (insn, REG_EQUAL, orig);
8163
8164 return reg;
8165 }
8166 else if (GET_CODE (orig) == CONST)
8167 {
8168 rtx base, offset;
8169
8170 if (GET_CODE (XEXP (orig, 0)) == PLUS
8171 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8172 return orig;
8173
8174 /* Handle the case where we have: const (UNSPEC_TLS). */
8175 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8176 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8177 return orig;
8178
8179 /* Handle the case where we have:
8180 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
8181 CONST_INT. */
8182 if (GET_CODE (XEXP (orig, 0)) == PLUS
8183 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8184 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8185 {
8186 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8187 return orig;
8188 }
8189
8190 if (reg == 0)
8191 {
8192 gcc_assert (can_create_pseudo_p ());
8193 reg = gen_reg_rtx (Pmode);
8194 }
8195
8196 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8197
8198 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8199 pic_reg, compute_now);
8200 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8201 base == reg ? 0 : reg, pic_reg,
8202 compute_now);
8203
8204 if (CONST_INT_P (offset))
8205 {
8206 /* The base register doesn't really matter, we only want to
8207 test the index for the appropriate mode. */
8208 if (!arm_legitimate_index_p (mode, offset, SET, 0))
8209 {
8210 gcc_assert (can_create_pseudo_p ());
8211 offset = force_reg (Pmode, offset);
8212 }
8213
8214 if (CONST_INT_P (offset))
8215 return plus_constant (Pmode, base, INTVAL (offset));
8216 }
8217
8218 if (GET_MODE_SIZE (mode) > 4
8219 && (GET_MODE_CLASS (mode) == MODE_INT
8220 || TARGET_SOFT_FLOAT))
8221 {
8222 emit_insn (gen_addsi3 (reg, base, offset));
8223 return reg;
8224 }
8225
8226 return gen_rtx_PLUS (Pmode, base, offset);
8227 }
8228
8229 return orig;
8230 }
8231
8232
8233 /* Generate insns that produce the address of the stack canary */
8234 rtx
8235 arm_stack_protect_tls_canary_mem (bool reload)
8236 {
8237 rtx tp = gen_reg_rtx (SImode);
8238 if (reload)
8239 emit_insn (gen_reload_tp_hard (tp));
8240 else
8241 emit_insn (gen_load_tp_hard (tp));
8242
8243 rtx reg = gen_reg_rtx (SImode);
8244 rtx offset = GEN_INT (arm_stack_protector_guard_offset);
8245 emit_set_insn (reg, gen_rtx_PLUS (SImode, tp, offset));
8246 return gen_rtx_MEM (SImode, reg);
8247 }
8248
8249
8250 /* Whether a register is callee saved or not. This is necessary because high
8251 registers are marked as caller saved when optimizing for size on Thumb-1
8252 targets despite being callee saved in order to avoid using them. */
8253 #define callee_saved_reg_p(reg) \
8254 (!call_used_or_fixed_reg_p (reg) \
8255 || (TARGET_THUMB1 && optimize_size \
8256 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8257
8258 /* Return a mask for the call-clobbered low registers that are unused
8259 at the end of the prologue. */
8260 static unsigned long
8261 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8262 {
8263 unsigned long mask = 0;
8264 bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8265
8266 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8267 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8268 mask |= 1 << (reg - FIRST_LO_REGNUM);
8269 return mask;
8270 }
8271
8272 /* Similarly for the start of the epilogue. */
8273 static unsigned long
8274 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8275 {
8276 unsigned long mask = 0;
8277 bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8278
8279 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8280 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8281 mask |= 1 << (reg - FIRST_LO_REGNUM);
8282 return mask;
8283 }
8284
8285 /* Find a spare register to use during the prolog of a function. */
8286
8287 static int
8288 thumb_find_work_register (unsigned long pushed_regs_mask)
8289 {
8290 int reg;
8291
8292 unsigned long unused_regs
8293 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8294
8295 /* Check the argument registers first as these are call-used. The
8296 register allocation order means that sometimes r3 might be used
8297 but earlier argument registers might not, so check them all. */
8298 for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8299 if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8300 return reg;
8301
8302 /* Otherwise look for a call-saved register that is going to be pushed. */
8303 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8304 if (pushed_regs_mask & (1 << reg))
8305 return reg;
8306
8307 if (TARGET_THUMB2)
8308 {
8309 /* Thumb-2 can use high regs. */
8310 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8311 if (pushed_regs_mask & (1 << reg))
8312 return reg;
8313 }
8314 /* Something went wrong - thumb_compute_save_reg_mask()
8315 should have arranged for a suitable register to be pushed. */
8316 gcc_unreachable ();
8317 }
8318
8319 static GTY(()) int pic_labelno;
8320
8321 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8322 low register. */
8323
8324 void
8325 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8326 {
8327 rtx l1, labelno, pic_tmp, pic_rtx;
8328
8329 if (crtl->uses_pic_offset_table == 0
8330 || TARGET_SINGLE_PIC_BASE
8331 || TARGET_FDPIC)
8332 return;
8333
8334 gcc_assert (flag_pic);
8335
8336 if (pic_reg == NULL_RTX)
8337 pic_reg = cfun->machine->pic_reg;
8338 if (TARGET_VXWORKS_RTP)
8339 {
8340 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8341 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8342 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8343
8344 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8345
8346 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8347 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8348 }
8349 else
8350 {
8351 /* We use an UNSPEC rather than a LABEL_REF because this label
8352 never appears in the code stream. */
8353
8354 labelno = GEN_INT (pic_labelno++);
8355 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8356 l1 = gen_rtx_CONST (VOIDmode, l1);
8357
8358 /* On the ARM the PC register contains 'dot + 8' at the time of the
8359 addition, on the Thumb it is 'dot + 4'. */
8360 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8361 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8362 UNSPEC_GOTSYM_OFF);
8363 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8364
8365 if (TARGET_32BIT)
8366 {
8367 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8368 }
8369 else /* TARGET_THUMB1 */
8370 {
8371 if (arm_pic_register != INVALID_REGNUM
8372 && REGNO (pic_reg) > LAST_LO_REGNUM)
8373 {
8374 /* We will have pushed the pic register, so we should always be
8375 able to find a work register. */
8376 pic_tmp = gen_rtx_REG (SImode,
8377 thumb_find_work_register (saved_regs));
8378 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8379 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8380 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8381 }
8382 else if (arm_pic_register != INVALID_REGNUM
8383 && arm_pic_register > LAST_LO_REGNUM
8384 && REGNO (pic_reg) <= LAST_LO_REGNUM)
8385 {
8386 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8387 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8388 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8389 }
8390 else
8391 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8392 }
8393 }
8394
8395 /* Need to emit this whether or not we obey regdecls,
8396 since setjmp/longjmp can cause life info to screw up. */
8397 emit_use (pic_reg);
8398 }
8399
8400 /* Try to determine whether an object, referenced via ORIG, will be
8401 placed in the text or data segment. This is used in FDPIC mode, to
8402 decide which relocations to use when accessing ORIG. *IS_READONLY
8403 is set to true if ORIG is a read-only location, false otherwise.
8404 Return true if we could determine the location of ORIG, false
8405 otherwise. *IS_READONLY is valid only when we return true. */
8406 static bool
8407 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8408 {
8409 *is_readonly = false;
8410
8411 if (LABEL_REF_P (orig))
8412 {
8413 *is_readonly = true;
8414 return true;
8415 }
8416
8417 if (SYMBOL_REF_P (orig))
8418 {
8419 if (CONSTANT_POOL_ADDRESS_P (orig))
8420 {
8421 *is_readonly = true;
8422 return true;
8423 }
8424 if (SYMBOL_REF_LOCAL_P (orig)
8425 && !SYMBOL_REF_EXTERNAL_P (orig)
8426 && SYMBOL_REF_DECL (orig)
8427 && (!DECL_P (SYMBOL_REF_DECL (orig))
8428 || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8429 {
8430 tree decl = SYMBOL_REF_DECL (orig);
8431 tree init = (TREE_CODE (decl) == VAR_DECL)
8432 ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8433 ? decl : 0;
8434 int reloc = 0;
8435 bool named_section, readonly;
8436
8437 if (init && init != error_mark_node)
8438 reloc = compute_reloc_for_constant (init);
8439
8440 named_section = TREE_CODE (decl) == VAR_DECL
8441 && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8442 readonly = decl_readonly_section (decl, reloc);
8443
8444 /* We don't know where the link script will put a named
8445 section, so return false in such a case. */
8446 if (named_section)
8447 return false;
8448
8449 *is_readonly = readonly;
8450 return true;
8451 }
8452
8453 /* We don't know. */
8454 return false;
8455 }
8456
8457 gcc_unreachable ();
8458 }
8459
8460 /* Generate code to load the address of a static var when flag_pic is set. */
8461 static rtx_insn *
8462 arm_pic_static_addr (rtx orig, rtx reg)
8463 {
8464 rtx l1, labelno, offset_rtx;
8465 rtx_insn *insn;
8466
8467 gcc_assert (flag_pic);
8468
8469 bool is_readonly = false;
8470 bool info_known = false;
8471
8472 if (TARGET_FDPIC
8473 && SYMBOL_REF_P (orig)
8474 && !SYMBOL_REF_FUNCTION_P (orig))
8475 info_known = arm_is_segment_info_known (orig, &is_readonly);
8476
8477 if (TARGET_FDPIC
8478 && SYMBOL_REF_P (orig)
8479 && !SYMBOL_REF_FUNCTION_P (orig)
8480 && !info_known)
8481 {
8482 /* We don't know where orig is stored, so we have be
8483 pessimistic and use a GOT relocation. */
8484 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8485
8486 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8487 }
8488 else if (TARGET_FDPIC
8489 && SYMBOL_REF_P (orig)
8490 && (SYMBOL_REF_FUNCTION_P (orig)
8491 || !is_readonly))
8492 {
8493 /* We use the GOTOFF relocation. */
8494 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8495
8496 rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8497 emit_insn (gen_movsi (reg, l1));
8498 insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8499 }
8500 else
8501 {
8502 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8503 PC-relative access. */
8504 /* We use an UNSPEC rather than a LABEL_REF because this label
8505 never appears in the code stream. */
8506 labelno = GEN_INT (pic_labelno++);
8507 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8508 l1 = gen_rtx_CONST (VOIDmode, l1);
8509
8510 /* On the ARM the PC register contains 'dot + 8' at the time of the
8511 addition, on the Thumb it is 'dot + 4'. */
8512 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8513 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8514 UNSPEC_SYMBOL_OFFSET);
8515 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8516
8517 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8518 labelno));
8519 }
8520
8521 return insn;
8522 }
8523
8524 /* Return nonzero if X is valid as an ARM state addressing register. */
8525 static int
8526 arm_address_register_rtx_p (rtx x, int strict_p)
8527 {
8528 int regno;
8529
8530 if (!REG_P (x))
8531 return 0;
8532
8533 regno = REGNO (x);
8534
8535 if (strict_p)
8536 return ARM_REGNO_OK_FOR_BASE_P (regno);
8537
8538 return (regno <= LAST_ARM_REGNUM
8539 || regno >= FIRST_PSEUDO_REGISTER
8540 || regno == FRAME_POINTER_REGNUM
8541 || regno == ARG_POINTER_REGNUM);
8542 }
8543
8544 /* Return TRUE if this rtx is the difference of a symbol and a label,
8545 and will reduce to a PC-relative relocation in the object file.
8546 Expressions like this can be left alone when generating PIC, rather
8547 than forced through the GOT. */
8548 static int
8549 pcrel_constant_p (rtx x)
8550 {
8551 if (GET_CODE (x) == MINUS)
8552 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8553
8554 return FALSE;
8555 }
8556
8557 /* Return true if X will surely end up in an index register after next
8558 splitting pass. */
8559 static bool
8560 will_be_in_index_register (const_rtx x)
8561 {
8562 /* arm.md: calculate_pic_address will split this into a register. */
8563 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8564 }
8565
8566 /* Return nonzero if X is a valid ARM state address operand. */
8567 int
8568 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8569 int strict_p)
8570 {
8571 bool use_ldrd;
8572 enum rtx_code code = GET_CODE (x);
8573
8574 if (arm_address_register_rtx_p (x, strict_p))
8575 return 1;
8576
8577 use_ldrd = (TARGET_LDRD
8578 && (mode == DImode || mode == DFmode));
8579
8580 if (code == POST_INC || code == PRE_DEC
8581 || ((code == PRE_INC || code == POST_DEC)
8582 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8583 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8584
8585 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8586 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8587 && GET_CODE (XEXP (x, 1)) == PLUS
8588 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8589 {
8590 rtx addend = XEXP (XEXP (x, 1), 1);
8591
8592 /* Don't allow ldrd post increment by register because it's hard
8593 to fixup invalid register choices. */
8594 if (use_ldrd
8595 && GET_CODE (x) == POST_MODIFY
8596 && REG_P (addend))
8597 return 0;
8598
8599 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8600 && arm_legitimate_index_p (mode, addend, outer, strict_p));
8601 }
8602
8603 /* After reload constants split into minipools will have addresses
8604 from a LABEL_REF. */
8605 else if (reload_completed
8606 && (code == LABEL_REF
8607 || (code == CONST
8608 && GET_CODE (XEXP (x, 0)) == PLUS
8609 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8610 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8611 return 1;
8612
8613 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8614 return 0;
8615
8616 else if (code == PLUS)
8617 {
8618 rtx xop0 = XEXP (x, 0);
8619 rtx xop1 = XEXP (x, 1);
8620
8621 return ((arm_address_register_rtx_p (xop0, strict_p)
8622 && ((CONST_INT_P (xop1)
8623 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8624 || (!strict_p && will_be_in_index_register (xop1))))
8625 || (arm_address_register_rtx_p (xop1, strict_p)
8626 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8627 }
8628
8629 #if 0
8630 /* Reload currently can't handle MINUS, so disable this for now */
8631 else if (GET_CODE (x) == MINUS)
8632 {
8633 rtx xop0 = XEXP (x, 0);
8634 rtx xop1 = XEXP (x, 1);
8635
8636 return (arm_address_register_rtx_p (xop0, strict_p)
8637 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8638 }
8639 #endif
8640
8641 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8642 && code == SYMBOL_REF
8643 && CONSTANT_POOL_ADDRESS_P (x)
8644 && ! (flag_pic
8645 && symbol_mentioned_p (get_pool_constant (x))
8646 && ! pcrel_constant_p (get_pool_constant (x))))
8647 return 1;
8648
8649 return 0;
8650 }
8651
8652 /* Return true if we can avoid creating a constant pool entry for x. */
8653 static bool
8654 can_avoid_literal_pool_for_label_p (rtx x)
8655 {
8656 /* Normally we can assign constant values to target registers without
8657 the help of constant pool. But there are cases we have to use constant
8658 pool like:
8659 1) assign a label to register.
8660 2) sign-extend a 8bit value to 32bit and then assign to register.
8661
8662 Constant pool access in format:
8663 (set (reg r0) (mem (symbol_ref (".LC0"))))
8664 will cause the use of literal pool (later in function arm_reorg).
8665 So here we mark such format as an invalid format, then the compiler
8666 will adjust it into:
8667 (set (reg r0) (symbol_ref (".LC0")))
8668 (set (reg r0) (mem (reg r0))).
8669 No extra register is required, and (mem (reg r0)) won't cause the use
8670 of literal pools. */
8671 if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8672 && CONSTANT_POOL_ADDRESS_P (x))
8673 return 1;
8674 return 0;
8675 }
8676
8677
8678 /* Return nonzero if X is a valid Thumb-2 address operand. */
8679 static int
8680 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8681 {
8682 bool use_ldrd;
8683 enum rtx_code code = GET_CODE (x);
8684
8685 if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
8686 return mve_vector_mem_operand (mode, x, strict_p);
8687
8688 if (arm_address_register_rtx_p (x, strict_p))
8689 return 1;
8690
8691 use_ldrd = (TARGET_LDRD
8692 && (mode == DImode || mode == DFmode));
8693
8694 if (code == POST_INC || code == PRE_DEC
8695 || ((code == PRE_INC || code == POST_DEC)
8696 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8697 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8698
8699 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8700 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8701 && GET_CODE (XEXP (x, 1)) == PLUS
8702 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8703 {
8704 /* Thumb-2 only has autoincrement by constant. */
8705 rtx addend = XEXP (XEXP (x, 1), 1);
8706 HOST_WIDE_INT offset;
8707
8708 if (!CONST_INT_P (addend))
8709 return 0;
8710
8711 offset = INTVAL(addend);
8712 if (GET_MODE_SIZE (mode) <= 4)
8713 return (offset > -256 && offset < 256);
8714
8715 return (use_ldrd && offset > -1024 && offset < 1024
8716 && (offset & 3) == 0);
8717 }
8718
8719 /* After reload constants split into minipools will have addresses
8720 from a LABEL_REF. */
8721 else if (reload_completed
8722 && (code == LABEL_REF
8723 || (code == CONST
8724 && GET_CODE (XEXP (x, 0)) == PLUS
8725 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8726 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8727 return 1;
8728
8729 else if (mode == TImode
8730 || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8731 || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8732 return 0;
8733
8734 else if (code == PLUS)
8735 {
8736 rtx xop0 = XEXP (x, 0);
8737 rtx xop1 = XEXP (x, 1);
8738
8739 return ((arm_address_register_rtx_p (xop0, strict_p)
8740 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8741 || (!strict_p && will_be_in_index_register (xop1))))
8742 || (arm_address_register_rtx_p (xop1, strict_p)
8743 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8744 }
8745
8746 else if (can_avoid_literal_pool_for_label_p (x))
8747 return 0;
8748
8749 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8750 && code == SYMBOL_REF
8751 && CONSTANT_POOL_ADDRESS_P (x)
8752 && ! (flag_pic
8753 && symbol_mentioned_p (get_pool_constant (x))
8754 && ! pcrel_constant_p (get_pool_constant (x))))
8755 return 1;
8756
8757 return 0;
8758 }
8759
8760 /* Return nonzero if INDEX is valid for an address index operand in
8761 ARM state. */
8762 static int
8763 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8764 int strict_p)
8765 {
8766 HOST_WIDE_INT range;
8767 enum rtx_code code = GET_CODE (index);
8768
8769 /* Standard coprocessor addressing modes. */
8770 if (TARGET_HARD_FLOAT
8771 && (mode == SFmode || mode == DFmode))
8772 return (code == CONST_INT && INTVAL (index) < 1024
8773 && INTVAL (index) > -1024
8774 && (INTVAL (index) & 3) == 0);
8775
8776 /* For quad modes, we restrict the constant offset to be slightly less
8777 than what the instruction format permits. We do this because for
8778 quad mode moves, we will actually decompose them into two separate
8779 double-mode reads or writes. INDEX must therefore be a valid
8780 (double-mode) offset and so should INDEX+8. */
8781 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8782 return (code == CONST_INT
8783 && INTVAL (index) < 1016
8784 && INTVAL (index) > -1024
8785 && (INTVAL (index) & 3) == 0);
8786
8787 /* We have no such constraint on double mode offsets, so we permit the
8788 full range of the instruction format. */
8789 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8790 return (code == CONST_INT
8791 && INTVAL (index) < 1024
8792 && INTVAL (index) > -1024
8793 && (INTVAL (index) & 3) == 0);
8794
8795 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8796 return (code == CONST_INT
8797 && INTVAL (index) < 1024
8798 && INTVAL (index) > -1024
8799 && (INTVAL (index) & 3) == 0);
8800
8801 if (arm_address_register_rtx_p (index, strict_p)
8802 && (GET_MODE_SIZE (mode) <= 4))
8803 return 1;
8804
8805 if (mode == DImode || mode == DFmode)
8806 {
8807 if (code == CONST_INT)
8808 {
8809 HOST_WIDE_INT val = INTVAL (index);
8810
8811 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8812 If vldr is selected it uses arm_coproc_mem_operand. */
8813 if (TARGET_LDRD)
8814 return val > -256 && val < 256;
8815 else
8816 return val > -4096 && val < 4092;
8817 }
8818
8819 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8820 }
8821
8822 if (GET_MODE_SIZE (mode) <= 4
8823 && ! (arm_arch4
8824 && (mode == HImode
8825 || mode == HFmode
8826 || (mode == QImode && outer == SIGN_EXTEND))))
8827 {
8828 if (code == MULT)
8829 {
8830 rtx xiop0 = XEXP (index, 0);
8831 rtx xiop1 = XEXP (index, 1);
8832
8833 return ((arm_address_register_rtx_p (xiop0, strict_p)
8834 && power_of_two_operand (xiop1, SImode))
8835 || (arm_address_register_rtx_p (xiop1, strict_p)
8836 && power_of_two_operand (xiop0, SImode)));
8837 }
8838 else if (code == LSHIFTRT || code == ASHIFTRT
8839 || code == ASHIFT || code == ROTATERT)
8840 {
8841 rtx op = XEXP (index, 1);
8842
8843 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8844 && CONST_INT_P (op)
8845 && INTVAL (op) > 0
8846 && INTVAL (op) <= 31);
8847 }
8848 }
8849
8850 /* For ARM v4 we may be doing a sign-extend operation during the
8851 load. */
8852 if (arm_arch4)
8853 {
8854 if (mode == HImode
8855 || mode == HFmode
8856 || (outer == SIGN_EXTEND && mode == QImode))
8857 range = 256;
8858 else
8859 range = 4096;
8860 }
8861 else
8862 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8863
8864 return (code == CONST_INT
8865 && INTVAL (index) < range
8866 && INTVAL (index) > -range);
8867 }
8868
8869 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8870 index operand. i.e. 1, 2, 4 or 8. */
8871 static bool
8872 thumb2_index_mul_operand (rtx op)
8873 {
8874 HOST_WIDE_INT val;
8875
8876 if (!CONST_INT_P (op))
8877 return false;
8878
8879 val = INTVAL(op);
8880 return (val == 1 || val == 2 || val == 4 || val == 8);
8881 }
8882
8883 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8884 static int
8885 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8886 {
8887 enum rtx_code code = GET_CODE (index);
8888
8889 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8890 /* Standard coprocessor addressing modes. */
8891 if (TARGET_VFP_BASE
8892 && (mode == SFmode || mode == DFmode))
8893 return (code == CONST_INT && INTVAL (index) < 1024
8894 /* Thumb-2 allows only > -256 index range for it's core register
8895 load/stores. Since we allow SF/DF in core registers, we have
8896 to use the intersection between -256~4096 (core) and -1024~1024
8897 (coprocessor). */
8898 && INTVAL (index) > -256
8899 && (INTVAL (index) & 3) == 0);
8900
8901 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8902 {
8903 /* For DImode assume values will usually live in core regs
8904 and only allow LDRD addressing modes. */
8905 if (!TARGET_LDRD || mode != DImode)
8906 return (code == CONST_INT
8907 && INTVAL (index) < 1024
8908 && INTVAL (index) > -1024
8909 && (INTVAL (index) & 3) == 0);
8910 }
8911
8912 /* For quad modes, we restrict the constant offset to be slightly less
8913 than what the instruction format permits. We do this because for
8914 quad mode moves, we will actually decompose them into two separate
8915 double-mode reads or writes. INDEX must therefore be a valid
8916 (double-mode) offset and so should INDEX+8. */
8917 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8918 return (code == CONST_INT
8919 && INTVAL (index) < 1016
8920 && INTVAL (index) > -1024
8921 && (INTVAL (index) & 3) == 0);
8922
8923 /* We have no such constraint on double mode offsets, so we permit the
8924 full range of the instruction format. */
8925 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8926 return (code == CONST_INT
8927 && INTVAL (index) < 1024
8928 && INTVAL (index) > -1024
8929 && (INTVAL (index) & 3) == 0);
8930
8931 if (arm_address_register_rtx_p (index, strict_p)
8932 && (GET_MODE_SIZE (mode) <= 4))
8933 return 1;
8934
8935 if (mode == DImode || mode == DFmode)
8936 {
8937 if (code == CONST_INT)
8938 {
8939 HOST_WIDE_INT val = INTVAL (index);
8940 /* Thumb-2 ldrd only has reg+const addressing modes.
8941 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8942 If vldr is selected it uses arm_coproc_mem_operand. */
8943 if (TARGET_LDRD)
8944 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8945 else
8946 return IN_RANGE (val, -255, 4095 - 4);
8947 }
8948 else
8949 return 0;
8950 }
8951
8952 if (code == MULT)
8953 {
8954 rtx xiop0 = XEXP (index, 0);
8955 rtx xiop1 = XEXP (index, 1);
8956
8957 return ((arm_address_register_rtx_p (xiop0, strict_p)
8958 && thumb2_index_mul_operand (xiop1))
8959 || (arm_address_register_rtx_p (xiop1, strict_p)
8960 && thumb2_index_mul_operand (xiop0)));
8961 }
8962 else if (code == ASHIFT)
8963 {
8964 rtx op = XEXP (index, 1);
8965
8966 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8967 && CONST_INT_P (op)
8968 && INTVAL (op) > 0
8969 && INTVAL (op) <= 3);
8970 }
8971
8972 return (code == CONST_INT
8973 && INTVAL (index) < 4096
8974 && INTVAL (index) > -256);
8975 }
8976
8977 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8978 static int
8979 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8980 {
8981 int regno;
8982
8983 if (!REG_P (x))
8984 return 0;
8985
8986 regno = REGNO (x);
8987
8988 if (strict_p)
8989 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8990
8991 return (regno <= LAST_LO_REGNUM
8992 || regno > LAST_VIRTUAL_REGISTER
8993 || regno == FRAME_POINTER_REGNUM
8994 || (GET_MODE_SIZE (mode) >= 4
8995 && (regno == STACK_POINTER_REGNUM
8996 || regno >= FIRST_PSEUDO_REGISTER
8997 || x == hard_frame_pointer_rtx
8998 || x == arg_pointer_rtx)));
8999 }
9000
9001 /* Return nonzero if x is a legitimate index register. This is the case
9002 for any base register that can access a QImode object. */
9003 inline static int
9004 thumb1_index_register_rtx_p (rtx x, int strict_p)
9005 {
9006 return thumb1_base_register_rtx_p (x, QImode, strict_p);
9007 }
9008
9009 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
9010
9011 The AP may be eliminated to either the SP or the FP, so we use the
9012 least common denominator, e.g. SImode, and offsets from 0 to 64.
9013
9014 ??? Verify whether the above is the right approach.
9015
9016 ??? Also, the FP may be eliminated to the SP, so perhaps that
9017 needs special handling also.
9018
9019 ??? Look at how the mips16 port solves this problem. It probably uses
9020 better ways to solve some of these problems.
9021
9022 Although it is not incorrect, we don't accept QImode and HImode
9023 addresses based on the frame pointer or arg pointer until the
9024 reload pass starts. This is so that eliminating such addresses
9025 into stack based ones won't produce impossible code. */
9026 int
9027 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
9028 {
9029 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
9030 return 0;
9031
9032 /* ??? Not clear if this is right. Experiment. */
9033 if (GET_MODE_SIZE (mode) < 4
9034 && !(reload_in_progress || reload_completed)
9035 && (reg_mentioned_p (frame_pointer_rtx, x)
9036 || reg_mentioned_p (arg_pointer_rtx, x)
9037 || reg_mentioned_p (virtual_incoming_args_rtx, x)
9038 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
9039 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
9040 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
9041 return 0;
9042
9043 /* Accept any base register. SP only in SImode or larger. */
9044 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
9045 return 1;
9046
9047 /* This is PC relative data before arm_reorg runs. */
9048 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
9049 && SYMBOL_REF_P (x)
9050 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
9051 && !arm_disable_literal_pool)
9052 return 1;
9053
9054 /* This is PC relative data after arm_reorg runs. */
9055 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
9056 && reload_completed
9057 && (LABEL_REF_P (x)
9058 || (GET_CODE (x) == CONST
9059 && GET_CODE (XEXP (x, 0)) == PLUS
9060 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
9061 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
9062 return 1;
9063
9064 /* Post-inc indexing only supported for SImode and larger. */
9065 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
9066 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
9067 return 1;
9068
9069 else if (GET_CODE (x) == PLUS)
9070 {
9071 /* REG+REG address can be any two index registers. */
9072 /* We disallow FRAME+REG addressing since we know that FRAME
9073 will be replaced with STACK, and SP relative addressing only
9074 permits SP+OFFSET. */
9075 if (GET_MODE_SIZE (mode) <= 4
9076 && XEXP (x, 0) != frame_pointer_rtx
9077 && XEXP (x, 1) != frame_pointer_rtx
9078 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9079 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
9080 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
9081 return 1;
9082
9083 /* REG+const has 5-7 bit offset for non-SP registers. */
9084 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9085 || XEXP (x, 0) == arg_pointer_rtx)
9086 && CONST_INT_P (XEXP (x, 1))
9087 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
9088 return 1;
9089
9090 /* REG+const has 10-bit offset for SP, but only SImode and
9091 larger is supported. */
9092 /* ??? Should probably check for DI/DFmode overflow here
9093 just like GO_IF_LEGITIMATE_OFFSET does. */
9094 else if (REG_P (XEXP (x, 0))
9095 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
9096 && GET_MODE_SIZE (mode) >= 4
9097 && CONST_INT_P (XEXP (x, 1))
9098 && INTVAL (XEXP (x, 1)) >= 0
9099 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
9100 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9101 return 1;
9102
9103 else if (REG_P (XEXP (x, 0))
9104 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
9105 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
9106 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
9107 && REGNO (XEXP (x, 0))
9108 <= LAST_VIRTUAL_POINTER_REGISTER))
9109 && GET_MODE_SIZE (mode) >= 4
9110 && CONST_INT_P (XEXP (x, 1))
9111 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9112 return 1;
9113 }
9114
9115 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
9116 && GET_MODE_SIZE (mode) == 4
9117 && SYMBOL_REF_P (x)
9118 && CONSTANT_POOL_ADDRESS_P (x)
9119 && !arm_disable_literal_pool
9120 && ! (flag_pic
9121 && symbol_mentioned_p (get_pool_constant (x))
9122 && ! pcrel_constant_p (get_pool_constant (x))))
9123 return 1;
9124
9125 return 0;
9126 }
9127
9128 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9129 instruction of mode MODE. */
9130 int
9131 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
9132 {
9133 switch (GET_MODE_SIZE (mode))
9134 {
9135 case 1:
9136 return val >= 0 && val < 32;
9137
9138 case 2:
9139 return val >= 0 && val < 64 && (val & 1) == 0;
9140
9141 default:
9142 return (val >= 0
9143 && (val + GET_MODE_SIZE (mode)) <= 128
9144 && (val & 3) == 0);
9145 }
9146 }
9147
9148 bool
9149 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
9150 {
9151 if (TARGET_ARM)
9152 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
9153 else if (TARGET_THUMB2)
9154 return thumb2_legitimate_address_p (mode, x, strict_p);
9155 else /* if (TARGET_THUMB1) */
9156 return thumb1_legitimate_address_p (mode, x, strict_p);
9157 }
9158
9159 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9160
9161 Given an rtx X being reloaded into a reg required to be
9162 in class CLASS, return the class of reg to actually use.
9163 In general this is just CLASS, but for the Thumb core registers and
9164 immediate constants we prefer a LO_REGS class or a subset. */
9165
9166 static reg_class_t
9167 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
9168 {
9169 if (TARGET_32BIT)
9170 return rclass;
9171 else
9172 {
9173 if (rclass == GENERAL_REGS)
9174 return LO_REGS;
9175 else
9176 return rclass;
9177 }
9178 }
9179
9180 /* Build the SYMBOL_REF for __tls_get_addr. */
9181
9182 static GTY(()) rtx tls_get_addr_libfunc;
9183
9184 static rtx
9185 get_tls_get_addr (void)
9186 {
9187 if (!tls_get_addr_libfunc)
9188 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9189 return tls_get_addr_libfunc;
9190 }
9191
9192 rtx
9193 arm_load_tp (rtx target)
9194 {
9195 if (!target)
9196 target = gen_reg_rtx (SImode);
9197
9198 if (TARGET_HARD_TP)
9199 {
9200 /* Can return in any reg. */
9201 emit_insn (gen_load_tp_hard (target));
9202 }
9203 else
9204 {
9205 /* Always returned in r0. Immediately copy the result into a pseudo,
9206 otherwise other uses of r0 (e.g. setting up function arguments) may
9207 clobber the value. */
9208
9209 rtx tmp;
9210
9211 if (TARGET_FDPIC)
9212 {
9213 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9214 rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9215
9216 emit_insn (gen_load_tp_soft_fdpic ());
9217
9218 /* Restore r9. */
9219 emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9220 }
9221 else
9222 emit_insn (gen_load_tp_soft ());
9223
9224 tmp = gen_rtx_REG (SImode, R0_REGNUM);
9225 emit_move_insn (target, tmp);
9226 }
9227 return target;
9228 }
9229
9230 static rtx
9231 load_tls_operand (rtx x, rtx reg)
9232 {
9233 rtx tmp;
9234
9235 if (reg == NULL_RTX)
9236 reg = gen_reg_rtx (SImode);
9237
9238 tmp = gen_rtx_CONST (SImode, x);
9239
9240 emit_move_insn (reg, tmp);
9241
9242 return reg;
9243 }
9244
9245 static rtx_insn *
9246 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9247 {
9248 rtx label, labelno = NULL_RTX, sum;
9249
9250 gcc_assert (reloc != TLS_DESCSEQ);
9251 start_sequence ();
9252
9253 if (TARGET_FDPIC)
9254 {
9255 sum = gen_rtx_UNSPEC (Pmode,
9256 gen_rtvec (2, x, GEN_INT (reloc)),
9257 UNSPEC_TLS);
9258 }
9259 else
9260 {
9261 labelno = GEN_INT (pic_labelno++);
9262 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9263 label = gen_rtx_CONST (VOIDmode, label);
9264
9265 sum = gen_rtx_UNSPEC (Pmode,
9266 gen_rtvec (4, x, GEN_INT (reloc), label,
9267 GEN_INT (TARGET_ARM ? 8 : 4)),
9268 UNSPEC_TLS);
9269 }
9270 reg = load_tls_operand (sum, reg);
9271
9272 if (TARGET_FDPIC)
9273 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9274 else if (TARGET_ARM)
9275 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9276 else
9277 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9278
9279 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9280 LCT_PURE, /* LCT_CONST? */
9281 Pmode, reg, Pmode);
9282
9283 rtx_insn *insns = get_insns ();
9284 end_sequence ();
9285
9286 return insns;
9287 }
9288
9289 static rtx
9290 arm_tls_descseq_addr (rtx x, rtx reg)
9291 {
9292 rtx labelno = GEN_INT (pic_labelno++);
9293 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9294 rtx sum = gen_rtx_UNSPEC (Pmode,
9295 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9296 gen_rtx_CONST (VOIDmode, label),
9297 GEN_INT (!TARGET_ARM)),
9298 UNSPEC_TLS);
9299 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9300
9301 emit_insn (gen_tlscall (x, labelno));
9302 if (!reg)
9303 reg = gen_reg_rtx (SImode);
9304 else
9305 gcc_assert (REGNO (reg) != R0_REGNUM);
9306
9307 emit_move_insn (reg, reg0);
9308
9309 return reg;
9310 }
9311
9312
9313 rtx
9314 legitimize_tls_address (rtx x, rtx reg)
9315 {
9316 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9317 rtx_insn *insns;
9318 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9319
9320 switch (model)
9321 {
9322 case TLS_MODEL_GLOBAL_DYNAMIC:
9323 if (TARGET_GNU2_TLS)
9324 {
9325 gcc_assert (!TARGET_FDPIC);
9326
9327 reg = arm_tls_descseq_addr (x, reg);
9328
9329 tp = arm_load_tp (NULL_RTX);
9330
9331 dest = gen_rtx_PLUS (Pmode, tp, reg);
9332 }
9333 else
9334 {
9335 /* Original scheme */
9336 if (TARGET_FDPIC)
9337 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9338 else
9339 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9340 dest = gen_reg_rtx (Pmode);
9341 emit_libcall_block (insns, dest, ret, x);
9342 }
9343 return dest;
9344
9345 case TLS_MODEL_LOCAL_DYNAMIC:
9346 if (TARGET_GNU2_TLS)
9347 {
9348 gcc_assert (!TARGET_FDPIC);
9349
9350 reg = arm_tls_descseq_addr (x, reg);
9351
9352 tp = arm_load_tp (NULL_RTX);
9353
9354 dest = gen_rtx_PLUS (Pmode, tp, reg);
9355 }
9356 else
9357 {
9358 if (TARGET_FDPIC)
9359 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9360 else
9361 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9362
9363 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9364 share the LDM result with other LD model accesses. */
9365 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9366 UNSPEC_TLS);
9367 dest = gen_reg_rtx (Pmode);
9368 emit_libcall_block (insns, dest, ret, eqv);
9369
9370 /* Load the addend. */
9371 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9372 GEN_INT (TLS_LDO32)),
9373 UNSPEC_TLS);
9374 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9375 dest = gen_rtx_PLUS (Pmode, dest, addend);
9376 }
9377 return dest;
9378
9379 case TLS_MODEL_INITIAL_EXEC:
9380 if (TARGET_FDPIC)
9381 {
9382 sum = gen_rtx_UNSPEC (Pmode,
9383 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9384 UNSPEC_TLS);
9385 reg = load_tls_operand (sum, reg);
9386 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9387 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9388 }
9389 else
9390 {
9391 labelno = GEN_INT (pic_labelno++);
9392 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9393 label = gen_rtx_CONST (VOIDmode, label);
9394 sum = gen_rtx_UNSPEC (Pmode,
9395 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9396 GEN_INT (TARGET_ARM ? 8 : 4)),
9397 UNSPEC_TLS);
9398 reg = load_tls_operand (sum, reg);
9399
9400 if (TARGET_ARM)
9401 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9402 else if (TARGET_THUMB2)
9403 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9404 else
9405 {
9406 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9407 emit_move_insn (reg, gen_const_mem (SImode, reg));
9408 }
9409 }
9410
9411 tp = arm_load_tp (NULL_RTX);
9412
9413 return gen_rtx_PLUS (Pmode, tp, reg);
9414
9415 case TLS_MODEL_LOCAL_EXEC:
9416 tp = arm_load_tp (NULL_RTX);
9417
9418 reg = gen_rtx_UNSPEC (Pmode,
9419 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9420 UNSPEC_TLS);
9421 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9422
9423 return gen_rtx_PLUS (Pmode, tp, reg);
9424
9425 default:
9426 abort ();
9427 }
9428 }
9429
9430 /* Try machine-dependent ways of modifying an illegitimate address
9431 to be legitimate. If we find one, return the new, valid address. */
9432 rtx
9433 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9434 {
9435 if (arm_tls_referenced_p (x))
9436 {
9437 rtx addend = NULL;
9438
9439 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9440 {
9441 addend = XEXP (XEXP (x, 0), 1);
9442 x = XEXP (XEXP (x, 0), 0);
9443 }
9444
9445 if (!SYMBOL_REF_P (x))
9446 return x;
9447
9448 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9449
9450 x = legitimize_tls_address (x, NULL_RTX);
9451
9452 if (addend)
9453 {
9454 x = gen_rtx_PLUS (SImode, x, addend);
9455 orig_x = x;
9456 }
9457 else
9458 return x;
9459 }
9460
9461 if (TARGET_THUMB1)
9462 return thumb_legitimize_address (x, orig_x, mode);
9463
9464 if (GET_CODE (x) == PLUS)
9465 {
9466 rtx xop0 = XEXP (x, 0);
9467 rtx xop1 = XEXP (x, 1);
9468
9469 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9470 xop0 = force_reg (SImode, xop0);
9471
9472 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9473 && !symbol_mentioned_p (xop1))
9474 xop1 = force_reg (SImode, xop1);
9475
9476 if (ARM_BASE_REGISTER_RTX_P (xop0)
9477 && CONST_INT_P (xop1))
9478 {
9479 HOST_WIDE_INT n, low_n;
9480 rtx base_reg, val;
9481 n = INTVAL (xop1);
9482
9483 /* VFP addressing modes actually allow greater offsets, but for
9484 now we just stick with the lowest common denominator. */
9485 if (mode == DImode || mode == DFmode)
9486 {
9487 low_n = n & 0x0f;
9488 n &= ~0x0f;
9489 if (low_n > 4)
9490 {
9491 n += 16;
9492 low_n -= 16;
9493 }
9494 }
9495 else
9496 {
9497 low_n = ((mode) == TImode ? 0
9498 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9499 n -= low_n;
9500 }
9501
9502 base_reg = gen_reg_rtx (SImode);
9503 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9504 emit_move_insn (base_reg, val);
9505 x = plus_constant (Pmode, base_reg, low_n);
9506 }
9507 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9508 x = gen_rtx_PLUS (SImode, xop0, xop1);
9509 }
9510
9511 /* XXX We don't allow MINUS any more -- see comment in
9512 arm_legitimate_address_outer_p (). */
9513 else if (GET_CODE (x) == MINUS)
9514 {
9515 rtx xop0 = XEXP (x, 0);
9516 rtx xop1 = XEXP (x, 1);
9517
9518 if (CONSTANT_P (xop0))
9519 xop0 = force_reg (SImode, xop0);
9520
9521 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9522 xop1 = force_reg (SImode, xop1);
9523
9524 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9525 x = gen_rtx_MINUS (SImode, xop0, xop1);
9526 }
9527
9528 /* Make sure to take full advantage of the pre-indexed addressing mode
9529 with absolute addresses which often allows for the base register to
9530 be factorized for multiple adjacent memory references, and it might
9531 even allows for the mini pool to be avoided entirely. */
9532 else if (CONST_INT_P (x) && optimize > 0)
9533 {
9534 unsigned int bits;
9535 HOST_WIDE_INT mask, base, index;
9536 rtx base_reg;
9537
9538 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9539 only use a 8-bit index. So let's use a 12-bit index for
9540 SImode only and hope that arm_gen_constant will enable LDRB
9541 to use more bits. */
9542 bits = (mode == SImode) ? 12 : 8;
9543 mask = (1 << bits) - 1;
9544 base = INTVAL (x) & ~mask;
9545 index = INTVAL (x) & mask;
9546 if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9547 {
9548 /* It'll most probably be more efficient to generate the
9549 base with more bits set and use a negative index instead.
9550 Don't do this for Thumb as negative offsets are much more
9551 limited. */
9552 base |= mask;
9553 index -= mask;
9554 }
9555 base_reg = force_reg (SImode, GEN_INT (base));
9556 x = plus_constant (Pmode, base_reg, index);
9557 }
9558
9559 if (flag_pic)
9560 {
9561 /* We need to find and carefully transform any SYMBOL and LABEL
9562 references; so go back to the original address expression. */
9563 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9564 false /*compute_now*/);
9565
9566 if (new_x != orig_x)
9567 x = new_x;
9568 }
9569
9570 return x;
9571 }
9572
9573
9574 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9575 to be legitimate. If we find one, return the new, valid address. */
9576 rtx
9577 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9578 {
9579 if (GET_CODE (x) == PLUS
9580 && CONST_INT_P (XEXP (x, 1))
9581 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9582 || INTVAL (XEXP (x, 1)) < 0))
9583 {
9584 rtx xop0 = XEXP (x, 0);
9585 rtx xop1 = XEXP (x, 1);
9586 HOST_WIDE_INT offset = INTVAL (xop1);
9587
9588 /* Try and fold the offset into a biasing of the base register and
9589 then offsetting that. Don't do this when optimizing for space
9590 since it can cause too many CSEs. */
9591 if (optimize_size && offset >= 0
9592 && offset < 256 + 31 * GET_MODE_SIZE (mode))
9593 {
9594 HOST_WIDE_INT delta;
9595
9596 if (offset >= 256)
9597 delta = offset - (256 - GET_MODE_SIZE (mode));
9598 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9599 delta = 31 * GET_MODE_SIZE (mode);
9600 else
9601 delta = offset & (~31 * GET_MODE_SIZE (mode));
9602
9603 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9604 NULL_RTX);
9605 x = plus_constant (Pmode, xop0, delta);
9606 }
9607 else if (offset < 0 && offset > -256)
9608 /* Small negative offsets are best done with a subtract before the
9609 dereference, forcing these into a register normally takes two
9610 instructions. */
9611 x = force_operand (x, NULL_RTX);
9612 else
9613 {
9614 /* For the remaining cases, force the constant into a register. */
9615 xop1 = force_reg (SImode, xop1);
9616 x = gen_rtx_PLUS (SImode, xop0, xop1);
9617 }
9618 }
9619 else if (GET_CODE (x) == PLUS
9620 && s_register_operand (XEXP (x, 1), SImode)
9621 && !s_register_operand (XEXP (x, 0), SImode))
9622 {
9623 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9624
9625 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9626 }
9627
9628 if (flag_pic)
9629 {
9630 /* We need to find and carefully transform any SYMBOL and LABEL
9631 references; so go back to the original address expression. */
9632 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9633 false /*compute_now*/);
9634
9635 if (new_x != orig_x)
9636 x = new_x;
9637 }
9638
9639 return x;
9640 }
9641
9642 /* Return TRUE if X contains any TLS symbol references. */
9643
9644 bool
9645 arm_tls_referenced_p (rtx x)
9646 {
9647 if (! TARGET_HAVE_TLS)
9648 return false;
9649
9650 subrtx_iterator::array_type array;
9651 FOR_EACH_SUBRTX (iter, array, x, ALL)
9652 {
9653 const_rtx x = *iter;
9654 if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9655 {
9656 /* ARM currently does not provide relocations to encode TLS variables
9657 into AArch32 instructions, only data, so there is no way to
9658 currently implement these if a literal pool is disabled. */
9659 if (arm_disable_literal_pool)
9660 sorry ("accessing thread-local storage is not currently supported "
9661 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9662
9663 return true;
9664 }
9665
9666 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9667 TLS offsets, not real symbol references. */
9668 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9669 iter.skip_subrtxes ();
9670 }
9671 return false;
9672 }
9673
9674 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9675
9676 On the ARM, allow any integer (invalid ones are removed later by insn
9677 patterns), nice doubles and symbol_refs which refer to the function's
9678 constant pool XXX.
9679
9680 When generating pic allow anything. */
9681
9682 static bool
9683 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9684 {
9685 if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9686 return false;
9687
9688 return flag_pic || !label_mentioned_p (x);
9689 }
9690
9691 static bool
9692 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9693 {
9694 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9695 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9696 for ARMv8-M Baseline or later the result is valid. */
9697 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9698 x = XEXP (x, 0);
9699
9700 return (CONST_INT_P (x)
9701 || CONST_DOUBLE_P (x)
9702 || CONSTANT_ADDRESS_P (x)
9703 || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9704 /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9705 we build the symbol address with upper/lower
9706 relocations. */
9707 || (TARGET_THUMB1
9708 && !label_mentioned_p (x)
9709 && arm_valid_symbolic_address_p (x)
9710 && arm_disable_literal_pool)
9711 || flag_pic);
9712 }
9713
9714 static bool
9715 arm_legitimate_constant_p (machine_mode mode, rtx x)
9716 {
9717 return (!arm_cannot_force_const_mem (mode, x)
9718 && (TARGET_32BIT
9719 ? arm_legitimate_constant_p_1 (mode, x)
9720 : thumb_legitimate_constant_p (mode, x)));
9721 }
9722
9723 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9724
9725 static bool
9726 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9727 {
9728 rtx base, offset;
9729 split_const (x, &base, &offset);
9730
9731 if (SYMBOL_REF_P (base))
9732 {
9733 /* Function symbols cannot have an offset due to the Thumb bit. */
9734 if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9735 && INTVAL (offset) != 0)
9736 return true;
9737
9738 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9739 && !offset_within_block_p (base, INTVAL (offset)))
9740 return true;
9741 }
9742 return arm_tls_referenced_p (x);
9743 }
9744 \f
9745 #define REG_OR_SUBREG_REG(X) \
9746 (REG_P (X) \
9747 || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9748
9749 #define REG_OR_SUBREG_RTX(X) \
9750 (REG_P (X) ? (X) : SUBREG_REG (X))
9751
9752 static inline int
9753 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9754 {
9755 machine_mode mode = GET_MODE (x);
9756 int total, words;
9757
9758 switch (code)
9759 {
9760 case ASHIFT:
9761 case ASHIFTRT:
9762 case LSHIFTRT:
9763 case ROTATERT:
9764 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9765
9766 case PLUS:
9767 case MINUS:
9768 case COMPARE:
9769 case NEG:
9770 case NOT:
9771 return COSTS_N_INSNS (1);
9772
9773 case MULT:
9774 if (arm_arch6m && arm_m_profile_small_mul)
9775 return COSTS_N_INSNS (32);
9776
9777 if (CONST_INT_P (XEXP (x, 1)))
9778 {
9779 int cycles = 0;
9780 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9781
9782 while (i)
9783 {
9784 i >>= 2;
9785 cycles++;
9786 }
9787 return COSTS_N_INSNS (2) + cycles;
9788 }
9789 return COSTS_N_INSNS (1) + 16;
9790
9791 case SET:
9792 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9793 the mode. */
9794 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9795 return (COSTS_N_INSNS (words)
9796 + 4 * ((MEM_P (SET_SRC (x)))
9797 + MEM_P (SET_DEST (x))));
9798
9799 case CONST_INT:
9800 if (outer == SET)
9801 {
9802 if (UINTVAL (x) < 256
9803 /* 16-bit constant. */
9804 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9805 return 0;
9806 if (thumb_shiftable_const (INTVAL (x)))
9807 return COSTS_N_INSNS (2);
9808 return arm_disable_literal_pool
9809 ? COSTS_N_INSNS (8)
9810 : COSTS_N_INSNS (3);
9811 }
9812 else if ((outer == PLUS || outer == COMPARE)
9813 && INTVAL (x) < 256 && INTVAL (x) > -256)
9814 return 0;
9815 else if ((outer == IOR || outer == XOR || outer == AND)
9816 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9817 return COSTS_N_INSNS (1);
9818 else if (outer == AND)
9819 {
9820 int i;
9821 /* This duplicates the tests in the andsi3 expander. */
9822 for (i = 9; i <= 31; i++)
9823 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9824 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9825 return COSTS_N_INSNS (2);
9826 }
9827 else if (outer == ASHIFT || outer == ASHIFTRT
9828 || outer == LSHIFTRT)
9829 return 0;
9830 return COSTS_N_INSNS (2);
9831
9832 case CONST:
9833 case CONST_DOUBLE:
9834 case LABEL_REF:
9835 case SYMBOL_REF:
9836 return COSTS_N_INSNS (3);
9837
9838 case UDIV:
9839 case UMOD:
9840 case DIV:
9841 case MOD:
9842 return 100;
9843
9844 case TRUNCATE:
9845 return 99;
9846
9847 case AND:
9848 case XOR:
9849 case IOR:
9850 /* XXX guess. */
9851 return 8;
9852
9853 case MEM:
9854 /* XXX another guess. */
9855 /* Memory costs quite a lot for the first word, but subsequent words
9856 load at the equivalent of a single insn each. */
9857 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9858 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9859 ? 4 : 0));
9860
9861 case IF_THEN_ELSE:
9862 /* XXX a guess. */
9863 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9864 return 14;
9865 return 2;
9866
9867 case SIGN_EXTEND:
9868 case ZERO_EXTEND:
9869 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9870 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9871
9872 if (mode == SImode)
9873 return total;
9874
9875 if (arm_arch6)
9876 return total + COSTS_N_INSNS (1);
9877
9878 /* Assume a two-shift sequence. Increase the cost slightly so
9879 we prefer actual shifts over an extend operation. */
9880 return total + 1 + COSTS_N_INSNS (2);
9881
9882 default:
9883 return 99;
9884 }
9885 }
9886
9887 /* Estimates the size cost of thumb1 instructions.
9888 For now most of the code is copied from thumb1_rtx_costs. We need more
9889 fine grain tuning when we have more related test cases. */
9890 static inline int
9891 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9892 {
9893 machine_mode mode = GET_MODE (x);
9894 int words, cost;
9895
9896 switch (code)
9897 {
9898 case ASHIFT:
9899 case ASHIFTRT:
9900 case LSHIFTRT:
9901 case ROTATERT:
9902 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9903
9904 case PLUS:
9905 case MINUS:
9906 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9907 defined by RTL expansion, especially for the expansion of
9908 multiplication. */
9909 if ((GET_CODE (XEXP (x, 0)) == MULT
9910 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9911 || (GET_CODE (XEXP (x, 1)) == MULT
9912 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9913 return COSTS_N_INSNS (2);
9914 /* Fall through. */
9915 case COMPARE:
9916 case NEG:
9917 case NOT:
9918 return COSTS_N_INSNS (1);
9919
9920 case MULT:
9921 if (CONST_INT_P (XEXP (x, 1)))
9922 {
9923 /* Thumb1 mul instruction can't operate on const. We must Load it
9924 into a register first. */
9925 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9926 /* For the targets which have a very small and high-latency multiply
9927 unit, we prefer to synthesize the mult with up to 5 instructions,
9928 giving a good balance between size and performance. */
9929 if (arm_arch6m && arm_m_profile_small_mul)
9930 return COSTS_N_INSNS (5);
9931 else
9932 return COSTS_N_INSNS (1) + const_size;
9933 }
9934 return COSTS_N_INSNS (1);
9935
9936 case SET:
9937 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9938 the mode. */
9939 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9940 cost = COSTS_N_INSNS (words);
9941 if (satisfies_constraint_J (SET_SRC (x))
9942 || satisfies_constraint_K (SET_SRC (x))
9943 /* Too big an immediate for a 2-byte mov, using MOVT. */
9944 || (CONST_INT_P (SET_SRC (x))
9945 && UINTVAL (SET_SRC (x)) >= 256
9946 && TARGET_HAVE_MOVT
9947 && satisfies_constraint_j (SET_SRC (x)))
9948 /* thumb1_movdi_insn. */
9949 || ((words > 1) && MEM_P (SET_SRC (x))))
9950 cost += COSTS_N_INSNS (1);
9951 return cost;
9952
9953 case CONST_INT:
9954 if (outer == SET)
9955 {
9956 if (UINTVAL (x) < 256)
9957 return COSTS_N_INSNS (1);
9958 /* movw is 4byte long. */
9959 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9960 return COSTS_N_INSNS (2);
9961 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9962 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9963 return COSTS_N_INSNS (2);
9964 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9965 if (thumb_shiftable_const (INTVAL (x)))
9966 return COSTS_N_INSNS (2);
9967 return arm_disable_literal_pool
9968 ? COSTS_N_INSNS (8)
9969 : COSTS_N_INSNS (3);
9970 }
9971 else if ((outer == PLUS || outer == COMPARE)
9972 && INTVAL (x) < 256 && INTVAL (x) > -256)
9973 return 0;
9974 else if ((outer == IOR || outer == XOR || outer == AND)
9975 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9976 return COSTS_N_INSNS (1);
9977 else if (outer == AND)
9978 {
9979 int i;
9980 /* This duplicates the tests in the andsi3 expander. */
9981 for (i = 9; i <= 31; i++)
9982 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9983 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9984 return COSTS_N_INSNS (2);
9985 }
9986 else if (outer == ASHIFT || outer == ASHIFTRT
9987 || outer == LSHIFTRT)
9988 return 0;
9989 return COSTS_N_INSNS (2);
9990
9991 case CONST:
9992 case CONST_DOUBLE:
9993 case LABEL_REF:
9994 case SYMBOL_REF:
9995 return COSTS_N_INSNS (3);
9996
9997 case UDIV:
9998 case UMOD:
9999 case DIV:
10000 case MOD:
10001 return 100;
10002
10003 case TRUNCATE:
10004 return 99;
10005
10006 case AND:
10007 case XOR:
10008 case IOR:
10009 return COSTS_N_INSNS (1);
10010
10011 case MEM:
10012 return (COSTS_N_INSNS (1)
10013 + COSTS_N_INSNS (1)
10014 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
10015 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
10016 ? COSTS_N_INSNS (1) : 0));
10017
10018 case IF_THEN_ELSE:
10019 /* XXX a guess. */
10020 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10021 return 14;
10022 return 2;
10023
10024 case ZERO_EXTEND:
10025 /* XXX still guessing. */
10026 switch (GET_MODE (XEXP (x, 0)))
10027 {
10028 case E_QImode:
10029 return (1 + (mode == DImode ? 4 : 0)
10030 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10031
10032 case E_HImode:
10033 return (4 + (mode == DImode ? 4 : 0)
10034 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10035
10036 case E_SImode:
10037 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10038
10039 default:
10040 return 99;
10041 }
10042
10043 default:
10044 return 99;
10045 }
10046 }
10047
10048 /* Helper function for arm_rtx_costs. If one operand of the OP, a
10049 PLUS, adds the carry flag, then return the other operand. If
10050 neither is a carry, return OP unchanged. */
10051 static rtx
10052 strip_carry_operation (rtx op)
10053 {
10054 gcc_assert (GET_CODE (op) == PLUS);
10055 if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
10056 return XEXP (op, 1);
10057 else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
10058 return XEXP (op, 0);
10059 return op;
10060 }
10061
10062 /* Helper function for arm_rtx_costs. If the operand is a valid shift
10063 operand, then return the operand that is being shifted. If the shift
10064 is not by a constant, then set SHIFT_REG to point to the operand.
10065 Return NULL if OP is not a shifter operand. */
10066 static rtx
10067 shifter_op_p (rtx op, rtx *shift_reg)
10068 {
10069 enum rtx_code code = GET_CODE (op);
10070
10071 if (code == MULT && CONST_INT_P (XEXP (op, 1))
10072 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
10073 return XEXP (op, 0);
10074 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
10075 return XEXP (op, 0);
10076 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
10077 || code == ASHIFTRT)
10078 {
10079 if (!CONST_INT_P (XEXP (op, 1)))
10080 *shift_reg = XEXP (op, 1);
10081 return XEXP (op, 0);
10082 }
10083
10084 return NULL;
10085 }
10086
10087 static bool
10088 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
10089 {
10090 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
10091 rtx_code code = GET_CODE (x);
10092 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
10093
10094 switch (XINT (x, 1))
10095 {
10096 case UNSPEC_UNALIGNED_LOAD:
10097 /* We can only do unaligned loads into the integer unit, and we can't
10098 use LDM or LDRD. */
10099 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10100 if (speed_p)
10101 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
10102 + extra_cost->ldst.load_unaligned);
10103
10104 #ifdef NOT_YET
10105 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10106 ADDR_SPACE_GENERIC, speed_p);
10107 #endif
10108 return true;
10109
10110 case UNSPEC_UNALIGNED_STORE:
10111 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10112 if (speed_p)
10113 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
10114 + extra_cost->ldst.store_unaligned);
10115
10116 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
10117 #ifdef NOT_YET
10118 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10119 ADDR_SPACE_GENERIC, speed_p);
10120 #endif
10121 return true;
10122
10123 case UNSPEC_VRINTZ:
10124 case UNSPEC_VRINTP:
10125 case UNSPEC_VRINTM:
10126 case UNSPEC_VRINTR:
10127 case UNSPEC_VRINTX:
10128 case UNSPEC_VRINTA:
10129 if (speed_p)
10130 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
10131
10132 return true;
10133 default:
10134 *cost = COSTS_N_INSNS (2);
10135 break;
10136 }
10137 return true;
10138 }
10139
10140 /* Cost of a libcall. We assume one insn per argument, an amount for the
10141 call (one insn for -Os) and then one for processing the result. */
10142 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10143
10144 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
10145 do \
10146 { \
10147 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
10148 if (shift_op != NULL \
10149 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
10150 { \
10151 if (shift_reg) \
10152 { \
10153 if (speed_p) \
10154 *cost += extra_cost->alu.arith_shift_reg; \
10155 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10156 ASHIFT, 1, speed_p); \
10157 } \
10158 else if (speed_p) \
10159 *cost += extra_cost->alu.arith_shift; \
10160 \
10161 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
10162 ASHIFT, 0, speed_p) \
10163 + rtx_cost (XEXP (x, 1 - IDX), \
10164 GET_MODE (shift_op), \
10165 OP, 1, speed_p)); \
10166 return true; \
10167 } \
10168 } \
10169 while (0)
10170
10171 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
10172 considering the costs of the addressing mode and memory access
10173 separately. */
10174 static bool
10175 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
10176 int *cost, bool speed_p)
10177 {
10178 machine_mode mode = GET_MODE (x);
10179
10180 *cost = COSTS_N_INSNS (1);
10181
10182 if (flag_pic
10183 && GET_CODE (XEXP (x, 0)) == PLUS
10184 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
10185 /* This will be split into two instructions. Add the cost of the
10186 additional instruction here. The cost of the memory access is computed
10187 below. See arm.md:calculate_pic_address. */
10188 *cost += COSTS_N_INSNS (1);
10189
10190 /* Calculate cost of the addressing mode. */
10191 if (speed_p)
10192 {
10193 arm_addr_mode_op op_type;
10194 switch (GET_CODE (XEXP (x, 0)))
10195 {
10196 default:
10197 case REG:
10198 op_type = AMO_DEFAULT;
10199 break;
10200 case MINUS:
10201 /* MINUS does not appear in RTL, but the architecture supports it,
10202 so handle this case defensively. */
10203 /* fall through */
10204 case PLUS:
10205 op_type = AMO_NO_WB;
10206 break;
10207 case PRE_INC:
10208 case PRE_DEC:
10209 case POST_INC:
10210 case POST_DEC:
10211 case PRE_MODIFY:
10212 case POST_MODIFY:
10213 op_type = AMO_WB;
10214 break;
10215 }
10216
10217 if (VECTOR_MODE_P (mode))
10218 *cost += current_tune->addr_mode_costs->vector[op_type];
10219 else if (FLOAT_MODE_P (mode))
10220 *cost += current_tune->addr_mode_costs->fp[op_type];
10221 else
10222 *cost += current_tune->addr_mode_costs->integer[op_type];
10223 }
10224
10225 /* Calculate cost of memory access. */
10226 if (speed_p)
10227 {
10228 if (FLOAT_MODE_P (mode))
10229 {
10230 if (GET_MODE_SIZE (mode) == 8)
10231 *cost += extra_cost->ldst.loadd;
10232 else
10233 *cost += extra_cost->ldst.loadf;
10234 }
10235 else if (VECTOR_MODE_P (mode))
10236 *cost += extra_cost->ldst.loadv;
10237 else
10238 {
10239 /* Integer modes */
10240 if (GET_MODE_SIZE (mode) == 8)
10241 *cost += extra_cost->ldst.ldrd;
10242 else
10243 *cost += extra_cost->ldst.load;
10244 }
10245 }
10246
10247 return true;
10248 }
10249
10250 /* Helper for arm_bfi_p. */
10251 static bool
10252 arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
10253 {
10254 unsigned HOST_WIDE_INT const1;
10255 unsigned HOST_WIDE_INT const2 = 0;
10256
10257 if (!CONST_INT_P (XEXP (op0, 1)))
10258 return false;
10259
10260 const1 = UINTVAL (XEXP (op0, 1));
10261 if (!CONST_INT_P (XEXP (op1, 1))
10262 || ~UINTVAL (XEXP (op1, 1)) != const1)
10263 return false;
10264
10265 if (GET_CODE (XEXP (op0, 0)) == ASHIFT
10266 && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
10267 {
10268 const2 = UINTVAL (XEXP (XEXP (op0, 0), 1));
10269 *sub0 = XEXP (XEXP (op0, 0), 0);
10270 }
10271 else
10272 *sub0 = XEXP (op0, 0);
10273
10274 if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
10275 return false;
10276
10277 *sub1 = XEXP (op1, 0);
10278 return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
10279 }
10280
10281 /* Recognize a BFI idiom. Helper for arm_rtx_costs_internal. The
10282 format looks something like:
10283
10284 (IOR (AND (reg1) (~const1))
10285 (AND (ASHIFT (reg2) (const2))
10286 (const1)))
10287
10288 where const1 is a consecutive sequence of 1-bits with the
10289 least-significant non-zero bit starting at bit position const2. If
10290 const2 is zero, then the shift will not appear at all, due to
10291 canonicalization. The two arms of the IOR expression may be
10292 flipped. */
10293 static bool
10294 arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
10295 {
10296 if (GET_CODE (x) != IOR)
10297 return false;
10298 if (GET_CODE (XEXP (x, 0)) != AND
10299 || GET_CODE (XEXP (x, 1)) != AND)
10300 return false;
10301 return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
10302 || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
10303 }
10304
10305 /* RTX costs. Make an estimate of the cost of executing the operation
10306 X, which is contained within an operation with code OUTER_CODE.
10307 SPEED_P indicates whether the cost desired is the performance cost,
10308 or the size cost. The estimate is stored in COST and the return
10309 value is TRUE if the cost calculation is final, or FALSE if the
10310 caller should recurse through the operands of X to add additional
10311 costs.
10312
10313 We currently make no attempt to model the size savings of Thumb-2
10314 16-bit instructions. At the normal points in compilation where
10315 this code is called we have no measure of whether the condition
10316 flags are live or not, and thus no realistic way to determine what
10317 the size will eventually be. */
10318 static bool
10319 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10320 const struct cpu_cost_table *extra_cost,
10321 int *cost, bool speed_p)
10322 {
10323 machine_mode mode = GET_MODE (x);
10324
10325 *cost = COSTS_N_INSNS (1);
10326
10327 if (TARGET_THUMB1)
10328 {
10329 if (speed_p)
10330 *cost = thumb1_rtx_costs (x, code, outer_code);
10331 else
10332 *cost = thumb1_size_rtx_costs (x, code, outer_code);
10333 return true;
10334 }
10335
10336 switch (code)
10337 {
10338 case SET:
10339 *cost = 0;
10340 /* SET RTXs don't have a mode so we get it from the destination. */
10341 mode = GET_MODE (SET_DEST (x));
10342
10343 if (REG_P (SET_SRC (x))
10344 && REG_P (SET_DEST (x)))
10345 {
10346 /* Assume that most copies can be done with a single insn,
10347 unless we don't have HW FP, in which case everything
10348 larger than word mode will require two insns. */
10349 *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10350 && GET_MODE_SIZE (mode) > 4)
10351 || mode == DImode)
10352 ? 2 : 1);
10353 /* Conditional register moves can be encoded
10354 in 16 bits in Thumb mode. */
10355 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10356 *cost >>= 1;
10357
10358 return true;
10359 }
10360
10361 if (CONST_INT_P (SET_SRC (x)))
10362 {
10363 /* Handle CONST_INT here, since the value doesn't have a mode
10364 and we would otherwise be unable to work out the true cost. */
10365 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10366 0, speed_p);
10367 outer_code = SET;
10368 /* Slightly lower the cost of setting a core reg to a constant.
10369 This helps break up chains and allows for better scheduling. */
10370 if (REG_P (SET_DEST (x))
10371 && REGNO (SET_DEST (x)) <= LR_REGNUM)
10372 *cost -= 1;
10373 x = SET_SRC (x);
10374 /* Immediate moves with an immediate in the range [0, 255] can be
10375 encoded in 16 bits in Thumb mode. */
10376 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10377 && INTVAL (x) >= 0 && INTVAL (x) <=255)
10378 *cost >>= 1;
10379 goto const_int_cost;
10380 }
10381
10382 return false;
10383
10384 case MEM:
10385 return arm_mem_costs (x, extra_cost, cost, speed_p);
10386
10387 case PARALLEL:
10388 {
10389 /* Calculations of LDM costs are complex. We assume an initial cost
10390 (ldm_1st) which will load the number of registers mentioned in
10391 ldm_regs_per_insn_1st registers; then each additional
10392 ldm_regs_per_insn_subsequent registers cost one more insn. The
10393 formula for N regs is thus:
10394
10395 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10396 + ldm_regs_per_insn_subsequent - 1)
10397 / ldm_regs_per_insn_subsequent).
10398
10399 Additional costs may also be added for addressing. A similar
10400 formula is used for STM. */
10401
10402 bool is_ldm = load_multiple_operation (x, SImode);
10403 bool is_stm = store_multiple_operation (x, SImode);
10404
10405 if (is_ldm || is_stm)
10406 {
10407 if (speed_p)
10408 {
10409 HOST_WIDE_INT nregs = XVECLEN (x, 0);
10410 HOST_WIDE_INT regs_per_insn_1st = is_ldm
10411 ? extra_cost->ldst.ldm_regs_per_insn_1st
10412 : extra_cost->ldst.stm_regs_per_insn_1st;
10413 HOST_WIDE_INT regs_per_insn_sub = is_ldm
10414 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10415 : extra_cost->ldst.stm_regs_per_insn_subsequent;
10416
10417 *cost += regs_per_insn_1st
10418 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10419 + regs_per_insn_sub - 1)
10420 / regs_per_insn_sub);
10421 return true;
10422 }
10423
10424 }
10425 return false;
10426 }
10427 case DIV:
10428 case UDIV:
10429 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10430 && (mode == SFmode || !TARGET_VFP_SINGLE))
10431 *cost += COSTS_N_INSNS (speed_p
10432 ? extra_cost->fp[mode != SFmode].div : 0);
10433 else if (mode == SImode && TARGET_IDIV)
10434 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10435 else
10436 *cost = LIBCALL_COST (2);
10437
10438 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10439 possible udiv is prefered. */
10440 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10441 return false; /* All arguments must be in registers. */
10442
10443 case MOD:
10444 /* MOD by a power of 2 can be expanded as:
10445 rsbs r1, r0, #0
10446 and r0, r0, #(n - 1)
10447 and r1, r1, #(n - 1)
10448 rsbpl r0, r1, #0. */
10449 if (CONST_INT_P (XEXP (x, 1))
10450 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10451 && mode == SImode)
10452 {
10453 *cost += COSTS_N_INSNS (3);
10454
10455 if (speed_p)
10456 *cost += 2 * extra_cost->alu.logical
10457 + extra_cost->alu.arith;
10458 return true;
10459 }
10460
10461 /* Fall-through. */
10462 case UMOD:
10463 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10464 possible udiv is prefered. */
10465 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10466 return false; /* All arguments must be in registers. */
10467
10468 case ROTATE:
10469 if (mode == SImode && REG_P (XEXP (x, 1)))
10470 {
10471 *cost += (COSTS_N_INSNS (1)
10472 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10473 if (speed_p)
10474 *cost += extra_cost->alu.shift_reg;
10475 return true;
10476 }
10477 /* Fall through */
10478 case ROTATERT:
10479 case ASHIFT:
10480 case LSHIFTRT:
10481 case ASHIFTRT:
10482 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10483 {
10484 *cost += (COSTS_N_INSNS (2)
10485 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10486 if (speed_p)
10487 *cost += 2 * extra_cost->alu.shift;
10488 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10489 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10490 *cost += 1;
10491 return true;
10492 }
10493 else if (mode == SImode)
10494 {
10495 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10496 /* Slightly disparage register shifts at -Os, but not by much. */
10497 if (!CONST_INT_P (XEXP (x, 1)))
10498 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10499 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10500 return true;
10501 }
10502 else if (GET_MODE_CLASS (mode) == MODE_INT
10503 && GET_MODE_SIZE (mode) < 4)
10504 {
10505 if (code == ASHIFT)
10506 {
10507 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10508 /* Slightly disparage register shifts at -Os, but not by
10509 much. */
10510 if (!CONST_INT_P (XEXP (x, 1)))
10511 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10512 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10513 }
10514 else if (code == LSHIFTRT || code == ASHIFTRT)
10515 {
10516 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10517 {
10518 /* Can use SBFX/UBFX. */
10519 if (speed_p)
10520 *cost += extra_cost->alu.bfx;
10521 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10522 }
10523 else
10524 {
10525 *cost += COSTS_N_INSNS (1);
10526 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10527 if (speed_p)
10528 {
10529 if (CONST_INT_P (XEXP (x, 1)))
10530 *cost += 2 * extra_cost->alu.shift;
10531 else
10532 *cost += (extra_cost->alu.shift
10533 + extra_cost->alu.shift_reg);
10534 }
10535 else
10536 /* Slightly disparage register shifts. */
10537 *cost += !CONST_INT_P (XEXP (x, 1));
10538 }
10539 }
10540 else /* Rotates. */
10541 {
10542 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10543 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10544 if (speed_p)
10545 {
10546 if (CONST_INT_P (XEXP (x, 1)))
10547 *cost += (2 * extra_cost->alu.shift
10548 + extra_cost->alu.log_shift);
10549 else
10550 *cost += (extra_cost->alu.shift
10551 + extra_cost->alu.shift_reg
10552 + extra_cost->alu.log_shift_reg);
10553 }
10554 }
10555 return true;
10556 }
10557
10558 *cost = LIBCALL_COST (2);
10559 return false;
10560
10561 case BSWAP:
10562 if (arm_arch6)
10563 {
10564 if (mode == SImode)
10565 {
10566 if (speed_p)
10567 *cost += extra_cost->alu.rev;
10568
10569 return false;
10570 }
10571 }
10572 else
10573 {
10574 /* No rev instruction available. Look at arm_legacy_rev
10575 and thumb_legacy_rev for the form of RTL used then. */
10576 if (TARGET_THUMB)
10577 {
10578 *cost += COSTS_N_INSNS (9);
10579
10580 if (speed_p)
10581 {
10582 *cost += 6 * extra_cost->alu.shift;
10583 *cost += 3 * extra_cost->alu.logical;
10584 }
10585 }
10586 else
10587 {
10588 *cost += COSTS_N_INSNS (4);
10589
10590 if (speed_p)
10591 {
10592 *cost += 2 * extra_cost->alu.shift;
10593 *cost += extra_cost->alu.arith_shift;
10594 *cost += 2 * extra_cost->alu.logical;
10595 }
10596 }
10597 return true;
10598 }
10599 return false;
10600
10601 case MINUS:
10602 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10603 && (mode == SFmode || !TARGET_VFP_SINGLE))
10604 {
10605 if (GET_CODE (XEXP (x, 0)) == MULT
10606 || GET_CODE (XEXP (x, 1)) == MULT)
10607 {
10608 rtx mul_op0, mul_op1, sub_op;
10609
10610 if (speed_p)
10611 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10612
10613 if (GET_CODE (XEXP (x, 0)) == MULT)
10614 {
10615 mul_op0 = XEXP (XEXP (x, 0), 0);
10616 mul_op1 = XEXP (XEXP (x, 0), 1);
10617 sub_op = XEXP (x, 1);
10618 }
10619 else
10620 {
10621 mul_op0 = XEXP (XEXP (x, 1), 0);
10622 mul_op1 = XEXP (XEXP (x, 1), 1);
10623 sub_op = XEXP (x, 0);
10624 }
10625
10626 /* The first operand of the multiply may be optionally
10627 negated. */
10628 if (GET_CODE (mul_op0) == NEG)
10629 mul_op0 = XEXP (mul_op0, 0);
10630
10631 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10632 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10633 + rtx_cost (sub_op, mode, code, 0, speed_p));
10634
10635 return true;
10636 }
10637
10638 if (speed_p)
10639 *cost += extra_cost->fp[mode != SFmode].addsub;
10640 return false;
10641 }
10642
10643 if (mode == SImode)
10644 {
10645 rtx shift_by_reg = NULL;
10646 rtx shift_op;
10647 rtx non_shift_op;
10648 rtx op0 = XEXP (x, 0);
10649 rtx op1 = XEXP (x, 1);
10650
10651 /* Factor out any borrow operation. There's more than one way
10652 of expressing this; try to recognize them all. */
10653 if (GET_CODE (op0) == MINUS)
10654 {
10655 if (arm_borrow_operation (op1, SImode))
10656 {
10657 op1 = XEXP (op0, 1);
10658 op0 = XEXP (op0, 0);
10659 }
10660 else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10661 op0 = XEXP (op0, 0);
10662 }
10663 else if (GET_CODE (op1) == PLUS
10664 && arm_borrow_operation (XEXP (op1, 0), SImode))
10665 op1 = XEXP (op1, 0);
10666 else if (GET_CODE (op0) == NEG
10667 && arm_borrow_operation (op1, SImode))
10668 {
10669 /* Negate with carry-in. For Thumb2 this is done with
10670 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10671 RSC instruction that exists in Arm mode. */
10672 if (speed_p)
10673 *cost += (TARGET_THUMB2
10674 ? extra_cost->alu.arith_shift
10675 : extra_cost->alu.arith);
10676 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10677 return true;
10678 }
10679 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10680 Note we do mean ~borrow here. */
10681 else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10682 {
10683 *cost += rtx_cost (op1, mode, code, 1, speed_p);
10684 return true;
10685 }
10686
10687 shift_op = shifter_op_p (op0, &shift_by_reg);
10688 if (shift_op == NULL)
10689 {
10690 shift_op = shifter_op_p (op1, &shift_by_reg);
10691 non_shift_op = op0;
10692 }
10693 else
10694 non_shift_op = op1;
10695
10696 if (shift_op != NULL)
10697 {
10698 if (shift_by_reg != NULL)
10699 {
10700 if (speed_p)
10701 *cost += extra_cost->alu.arith_shift_reg;
10702 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10703 }
10704 else if (speed_p)
10705 *cost += extra_cost->alu.arith_shift;
10706
10707 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10708 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10709 return true;
10710 }
10711
10712 if (arm_arch_thumb2
10713 && GET_CODE (XEXP (x, 1)) == MULT)
10714 {
10715 /* MLS. */
10716 if (speed_p)
10717 *cost += extra_cost->mult[0].add;
10718 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10719 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10720 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10721 return true;
10722 }
10723
10724 if (CONST_INT_P (op0))
10725 {
10726 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10727 INTVAL (op0), NULL_RTX,
10728 NULL_RTX, 1, 0);
10729 *cost = COSTS_N_INSNS (insns);
10730 if (speed_p)
10731 *cost += insns * extra_cost->alu.arith;
10732 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10733 return true;
10734 }
10735 else if (speed_p)
10736 *cost += extra_cost->alu.arith;
10737
10738 /* Don't recurse as we don't want to cost any borrow that
10739 we've stripped. */
10740 *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10741 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10742 return true;
10743 }
10744
10745 if (GET_MODE_CLASS (mode) == MODE_INT
10746 && GET_MODE_SIZE (mode) < 4)
10747 {
10748 rtx shift_op, shift_reg;
10749 shift_reg = NULL;
10750
10751 /* We check both sides of the MINUS for shifter operands since,
10752 unlike PLUS, it's not commutative. */
10753
10754 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10755 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10756
10757 /* Slightly disparage, as we might need to widen the result. */
10758 *cost += 1;
10759 if (speed_p)
10760 *cost += extra_cost->alu.arith;
10761
10762 if (CONST_INT_P (XEXP (x, 0)))
10763 {
10764 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10765 return true;
10766 }
10767
10768 return false;
10769 }
10770
10771 if (mode == DImode)
10772 {
10773 *cost += COSTS_N_INSNS (1);
10774
10775 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10776 {
10777 rtx op1 = XEXP (x, 1);
10778
10779 if (speed_p)
10780 *cost += 2 * extra_cost->alu.arith;
10781
10782 if (GET_CODE (op1) == ZERO_EXTEND)
10783 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10784 0, speed_p);
10785 else
10786 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10787 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10788 0, speed_p);
10789 return true;
10790 }
10791 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10792 {
10793 if (speed_p)
10794 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10795 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10796 0, speed_p)
10797 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10798 return true;
10799 }
10800 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10801 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10802 {
10803 if (speed_p)
10804 *cost += (extra_cost->alu.arith
10805 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10806 ? extra_cost->alu.arith
10807 : extra_cost->alu.arith_shift));
10808 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10809 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10810 GET_CODE (XEXP (x, 1)), 0, speed_p));
10811 return true;
10812 }
10813
10814 if (speed_p)
10815 *cost += 2 * extra_cost->alu.arith;
10816 return false;
10817 }
10818
10819 /* Vector mode? */
10820
10821 *cost = LIBCALL_COST (2);
10822 return false;
10823
10824 case PLUS:
10825 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10826 && (mode == SFmode || !TARGET_VFP_SINGLE))
10827 {
10828 if (GET_CODE (XEXP (x, 0)) == MULT)
10829 {
10830 rtx mul_op0, mul_op1, add_op;
10831
10832 if (speed_p)
10833 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10834
10835 mul_op0 = XEXP (XEXP (x, 0), 0);
10836 mul_op1 = XEXP (XEXP (x, 0), 1);
10837 add_op = XEXP (x, 1);
10838
10839 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10840 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10841 + rtx_cost (add_op, mode, code, 0, speed_p));
10842
10843 return true;
10844 }
10845
10846 if (speed_p)
10847 *cost += extra_cost->fp[mode != SFmode].addsub;
10848 return false;
10849 }
10850 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10851 {
10852 *cost = LIBCALL_COST (2);
10853 return false;
10854 }
10855
10856 /* Narrow modes can be synthesized in SImode, but the range
10857 of useful sub-operations is limited. Check for shift operations
10858 on one of the operands. Only left shifts can be used in the
10859 narrow modes. */
10860 if (GET_MODE_CLASS (mode) == MODE_INT
10861 && GET_MODE_SIZE (mode) < 4)
10862 {
10863 rtx shift_op, shift_reg;
10864 shift_reg = NULL;
10865
10866 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10867
10868 if (CONST_INT_P (XEXP (x, 1)))
10869 {
10870 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10871 INTVAL (XEXP (x, 1)), NULL_RTX,
10872 NULL_RTX, 1, 0);
10873 *cost = COSTS_N_INSNS (insns);
10874 if (speed_p)
10875 *cost += insns * extra_cost->alu.arith;
10876 /* Slightly penalize a narrow operation as the result may
10877 need widening. */
10878 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10879 return true;
10880 }
10881
10882 /* Slightly penalize a narrow operation as the result may
10883 need widening. */
10884 *cost += 1;
10885 if (speed_p)
10886 *cost += extra_cost->alu.arith;
10887
10888 return false;
10889 }
10890
10891 if (mode == SImode)
10892 {
10893 rtx shift_op, shift_reg;
10894
10895 if (TARGET_INT_SIMD
10896 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10897 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10898 {
10899 /* UXTA[BH] or SXTA[BH]. */
10900 if (speed_p)
10901 *cost += extra_cost->alu.extend_arith;
10902 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10903 0, speed_p)
10904 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10905 return true;
10906 }
10907
10908 rtx op0 = XEXP (x, 0);
10909 rtx op1 = XEXP (x, 1);
10910
10911 /* Handle a side effect of adding in the carry to an addition. */
10912 if (GET_CODE (op0) == PLUS
10913 && arm_carry_operation (op1, mode))
10914 {
10915 op1 = XEXP (op0, 1);
10916 op0 = XEXP (op0, 0);
10917 }
10918 else if (GET_CODE (op1) == PLUS
10919 && arm_carry_operation (op0, mode))
10920 {
10921 op0 = XEXP (op1, 0);
10922 op1 = XEXP (op1, 1);
10923 }
10924 else if (GET_CODE (op0) == PLUS)
10925 {
10926 op0 = strip_carry_operation (op0);
10927 if (swap_commutative_operands_p (op0, op1))
10928 std::swap (op0, op1);
10929 }
10930
10931 if (arm_carry_operation (op0, mode))
10932 {
10933 /* Adding the carry to a register is a canonicalization of
10934 adding 0 to the register plus the carry. */
10935 if (speed_p)
10936 *cost += extra_cost->alu.arith;
10937 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10938 return true;
10939 }
10940
10941 shift_reg = NULL;
10942 shift_op = shifter_op_p (op0, &shift_reg);
10943 if (shift_op != NULL)
10944 {
10945 if (shift_reg)
10946 {
10947 if (speed_p)
10948 *cost += extra_cost->alu.arith_shift_reg;
10949 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10950 }
10951 else if (speed_p)
10952 *cost += extra_cost->alu.arith_shift;
10953
10954 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10955 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10956 return true;
10957 }
10958
10959 if (GET_CODE (op0) == MULT)
10960 {
10961 rtx mul_op = op0;
10962
10963 if (TARGET_DSP_MULTIPLY
10964 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10965 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10966 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10967 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10968 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10969 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10970 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10971 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10972 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10973 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10974 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10975 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10976 == 16))))))
10977 {
10978 /* SMLA[BT][BT]. */
10979 if (speed_p)
10980 *cost += extra_cost->mult[0].extend_add;
10981 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10982 SIGN_EXTEND, 0, speed_p)
10983 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10984 SIGN_EXTEND, 0, speed_p)
10985 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10986 return true;
10987 }
10988
10989 if (speed_p)
10990 *cost += extra_cost->mult[0].add;
10991 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10992 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10993 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10994 return true;
10995 }
10996
10997 if (CONST_INT_P (op1))
10998 {
10999 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
11000 INTVAL (op1), NULL_RTX,
11001 NULL_RTX, 1, 0);
11002 *cost = COSTS_N_INSNS (insns);
11003 if (speed_p)
11004 *cost += insns * extra_cost->alu.arith;
11005 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11006 return true;
11007 }
11008
11009 if (speed_p)
11010 *cost += extra_cost->alu.arith;
11011
11012 /* Don't recurse here because we want to test the operands
11013 without any carry operation. */
11014 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11015 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
11016 return true;
11017 }
11018
11019 if (mode == DImode)
11020 {
11021 if (GET_CODE (XEXP (x, 0)) == MULT
11022 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
11023 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
11024 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
11025 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
11026 {
11027 if (speed_p)
11028 *cost += extra_cost->mult[1].extend_add;
11029 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11030 ZERO_EXTEND, 0, speed_p)
11031 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
11032 ZERO_EXTEND, 0, speed_p)
11033 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11034 return true;
11035 }
11036
11037 *cost += COSTS_N_INSNS (1);
11038
11039 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11040 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
11041 {
11042 if (speed_p)
11043 *cost += (extra_cost->alu.arith
11044 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11045 ? extra_cost->alu.arith
11046 : extra_cost->alu.arith_shift));
11047
11048 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
11049 0, speed_p)
11050 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11051 return true;
11052 }
11053
11054 if (speed_p)
11055 *cost += 2 * extra_cost->alu.arith;
11056 return false;
11057 }
11058
11059 /* Vector mode? */
11060 *cost = LIBCALL_COST (2);
11061 return false;
11062 case IOR:
11063 {
11064 rtx sub0, sub1;
11065 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
11066 {
11067 if (speed_p)
11068 *cost += extra_cost->alu.rev;
11069
11070 return true;
11071 }
11072 else if (mode == SImode && arm_arch_thumb2
11073 && arm_bfi_p (x, &sub0, &sub1))
11074 {
11075 *cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
11076 *cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
11077 if (speed_p)
11078 *cost += extra_cost->alu.bfi;
11079
11080 return true;
11081 }
11082 }
11083
11084 /* Fall through. */
11085 case AND: case XOR:
11086 if (mode == SImode)
11087 {
11088 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
11089 rtx op0 = XEXP (x, 0);
11090 rtx shift_op, shift_reg;
11091
11092 if (subcode == NOT
11093 && (code == AND
11094 || (code == IOR && TARGET_THUMB2)))
11095 op0 = XEXP (op0, 0);
11096
11097 shift_reg = NULL;
11098 shift_op = shifter_op_p (op0, &shift_reg);
11099 if (shift_op != NULL)
11100 {
11101 if (shift_reg)
11102 {
11103 if (speed_p)
11104 *cost += extra_cost->alu.log_shift_reg;
11105 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11106 }
11107 else if (speed_p)
11108 *cost += extra_cost->alu.log_shift;
11109
11110 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11111 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11112 return true;
11113 }
11114
11115 if (CONST_INT_P (XEXP (x, 1)))
11116 {
11117 int insns = arm_gen_constant (code, SImode, NULL_RTX,
11118 INTVAL (XEXP (x, 1)), NULL_RTX,
11119 NULL_RTX, 1, 0);
11120
11121 *cost = COSTS_N_INSNS (insns);
11122 if (speed_p)
11123 *cost += insns * extra_cost->alu.logical;
11124 *cost += rtx_cost (op0, mode, code, 0, speed_p);
11125 return true;
11126 }
11127
11128 if (speed_p)
11129 *cost += extra_cost->alu.logical;
11130 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
11131 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11132 return true;
11133 }
11134
11135 if (mode == DImode)
11136 {
11137 rtx op0 = XEXP (x, 0);
11138 enum rtx_code subcode = GET_CODE (op0);
11139
11140 *cost += COSTS_N_INSNS (1);
11141
11142 if (subcode == NOT
11143 && (code == AND
11144 || (code == IOR && TARGET_THUMB2)))
11145 op0 = XEXP (op0, 0);
11146
11147 if (GET_CODE (op0) == ZERO_EXTEND)
11148 {
11149 if (speed_p)
11150 *cost += 2 * extra_cost->alu.logical;
11151
11152 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
11153 0, speed_p)
11154 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11155 return true;
11156 }
11157 else if (GET_CODE (op0) == SIGN_EXTEND)
11158 {
11159 if (speed_p)
11160 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
11161
11162 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
11163 0, speed_p)
11164 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11165 return true;
11166 }
11167
11168 if (speed_p)
11169 *cost += 2 * extra_cost->alu.logical;
11170
11171 return true;
11172 }
11173 /* Vector mode? */
11174
11175 *cost = LIBCALL_COST (2);
11176 return false;
11177
11178 case MULT:
11179 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11180 && (mode == SFmode || !TARGET_VFP_SINGLE))
11181 {
11182 rtx op0 = XEXP (x, 0);
11183
11184 if (GET_CODE (op0) == NEG && !flag_rounding_math)
11185 op0 = XEXP (op0, 0);
11186
11187 if (speed_p)
11188 *cost += extra_cost->fp[mode != SFmode].mult;
11189
11190 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
11191 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
11192 return true;
11193 }
11194 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11195 {
11196 *cost = LIBCALL_COST (2);
11197 return false;
11198 }
11199
11200 if (mode == SImode)
11201 {
11202 if (TARGET_DSP_MULTIPLY
11203 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11204 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11205 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11206 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11207 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
11208 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11209 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11210 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
11211 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11212 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11213 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11214 && (INTVAL (XEXP (XEXP (x, 1), 1))
11215 == 16))))))
11216 {
11217 /* SMUL[TB][TB]. */
11218 if (speed_p)
11219 *cost += extra_cost->mult[0].extend;
11220 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
11221 SIGN_EXTEND, 0, speed_p);
11222 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
11223 SIGN_EXTEND, 1, speed_p);
11224 return true;
11225 }
11226 if (speed_p)
11227 *cost += extra_cost->mult[0].simple;
11228 return false;
11229 }
11230
11231 if (mode == DImode)
11232 {
11233 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11234 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
11235 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11236 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
11237 {
11238 if (speed_p)
11239 *cost += extra_cost->mult[1].extend;
11240 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
11241 ZERO_EXTEND, 0, speed_p)
11242 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
11243 ZERO_EXTEND, 0, speed_p));
11244 return true;
11245 }
11246
11247 *cost = LIBCALL_COST (2);
11248 return false;
11249 }
11250
11251 /* Vector mode? */
11252 *cost = LIBCALL_COST (2);
11253 return false;
11254
11255 case NEG:
11256 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11257 && (mode == SFmode || !TARGET_VFP_SINGLE))
11258 {
11259 if (GET_CODE (XEXP (x, 0)) == MULT)
11260 {
11261 /* VNMUL. */
11262 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11263 return true;
11264 }
11265
11266 if (speed_p)
11267 *cost += extra_cost->fp[mode != SFmode].neg;
11268
11269 return false;
11270 }
11271 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11272 {
11273 *cost = LIBCALL_COST (1);
11274 return false;
11275 }
11276
11277 if (mode == SImode)
11278 {
11279 if (GET_CODE (XEXP (x, 0)) == ABS)
11280 {
11281 *cost += COSTS_N_INSNS (1);
11282 /* Assume the non-flag-changing variant. */
11283 if (speed_p)
11284 *cost += (extra_cost->alu.log_shift
11285 + extra_cost->alu.arith_shift);
11286 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11287 return true;
11288 }
11289
11290 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11291 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11292 {
11293 *cost += COSTS_N_INSNS (1);
11294 /* No extra cost for MOV imm and MVN imm. */
11295 /* If the comparison op is using the flags, there's no further
11296 cost, otherwise we need to add the cost of the comparison. */
11297 if (!(REG_P (XEXP (XEXP (x, 0), 0))
11298 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11299 && XEXP (XEXP (x, 0), 1) == const0_rtx))
11300 {
11301 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11302 *cost += (COSTS_N_INSNS (1)
11303 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11304 0, speed_p)
11305 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11306 1, speed_p));
11307 if (speed_p)
11308 *cost += extra_cost->alu.arith;
11309 }
11310 return true;
11311 }
11312
11313 if (speed_p)
11314 *cost += extra_cost->alu.arith;
11315 return false;
11316 }
11317
11318 if (GET_MODE_CLASS (mode) == MODE_INT
11319 && GET_MODE_SIZE (mode) < 4)
11320 {
11321 /* Slightly disparage, as we might need an extend operation. */
11322 *cost += 1;
11323 if (speed_p)
11324 *cost += extra_cost->alu.arith;
11325 return false;
11326 }
11327
11328 if (mode == DImode)
11329 {
11330 *cost += COSTS_N_INSNS (1);
11331 if (speed_p)
11332 *cost += 2 * extra_cost->alu.arith;
11333 return false;
11334 }
11335
11336 /* Vector mode? */
11337 *cost = LIBCALL_COST (1);
11338 return false;
11339
11340 case NOT:
11341 if (mode == SImode)
11342 {
11343 rtx shift_op;
11344 rtx shift_reg = NULL;
11345
11346 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11347
11348 if (shift_op)
11349 {
11350 if (shift_reg != NULL)
11351 {
11352 if (speed_p)
11353 *cost += extra_cost->alu.log_shift_reg;
11354 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11355 }
11356 else if (speed_p)
11357 *cost += extra_cost->alu.log_shift;
11358 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11359 return true;
11360 }
11361
11362 if (speed_p)
11363 *cost += extra_cost->alu.logical;
11364 return false;
11365 }
11366 if (mode == DImode)
11367 {
11368 *cost += COSTS_N_INSNS (1);
11369 return false;
11370 }
11371
11372 /* Vector mode? */
11373
11374 *cost += LIBCALL_COST (1);
11375 return false;
11376
11377 case IF_THEN_ELSE:
11378 {
11379 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11380 {
11381 *cost += COSTS_N_INSNS (3);
11382 return true;
11383 }
11384 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11385 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11386
11387 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11388 /* Assume that if one arm of the if_then_else is a register,
11389 that it will be tied with the result and eliminate the
11390 conditional insn. */
11391 if (REG_P (XEXP (x, 1)))
11392 *cost += op2cost;
11393 else if (REG_P (XEXP (x, 2)))
11394 *cost += op1cost;
11395 else
11396 {
11397 if (speed_p)
11398 {
11399 if (extra_cost->alu.non_exec_costs_exec)
11400 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11401 else
11402 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11403 }
11404 else
11405 *cost += op1cost + op2cost;
11406 }
11407 }
11408 return true;
11409
11410 case COMPARE:
11411 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11412 *cost = 0;
11413 else
11414 {
11415 machine_mode op0mode;
11416 /* We'll mostly assume that the cost of a compare is the cost of the
11417 LHS. However, there are some notable exceptions. */
11418
11419 /* Floating point compares are never done as side-effects. */
11420 op0mode = GET_MODE (XEXP (x, 0));
11421 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11422 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11423 {
11424 if (speed_p)
11425 *cost += extra_cost->fp[op0mode != SFmode].compare;
11426
11427 if (XEXP (x, 1) == CONST0_RTX (op0mode))
11428 {
11429 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11430 return true;
11431 }
11432
11433 return false;
11434 }
11435 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11436 {
11437 *cost = LIBCALL_COST (2);
11438 return false;
11439 }
11440
11441 /* DImode compares normally take two insns. */
11442 if (op0mode == DImode)
11443 {
11444 *cost += COSTS_N_INSNS (1);
11445 if (speed_p)
11446 *cost += 2 * extra_cost->alu.arith;
11447 return false;
11448 }
11449
11450 if (op0mode == SImode)
11451 {
11452 rtx shift_op;
11453 rtx shift_reg;
11454
11455 if (XEXP (x, 1) == const0_rtx
11456 && !(REG_P (XEXP (x, 0))
11457 || (GET_CODE (XEXP (x, 0)) == SUBREG
11458 && REG_P (SUBREG_REG (XEXP (x, 0))))))
11459 {
11460 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11461
11462 /* Multiply operations that set the flags are often
11463 significantly more expensive. */
11464 if (speed_p
11465 && GET_CODE (XEXP (x, 0)) == MULT
11466 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11467 *cost += extra_cost->mult[0].flag_setting;
11468
11469 if (speed_p
11470 && GET_CODE (XEXP (x, 0)) == PLUS
11471 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11472 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11473 0), 1), mode))
11474 *cost += extra_cost->mult[0].flag_setting;
11475 return true;
11476 }
11477
11478 shift_reg = NULL;
11479 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11480 if (shift_op != NULL)
11481 {
11482 if (shift_reg != NULL)
11483 {
11484 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11485 1, speed_p);
11486 if (speed_p)
11487 *cost += extra_cost->alu.arith_shift_reg;
11488 }
11489 else if (speed_p)
11490 *cost += extra_cost->alu.arith_shift;
11491 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11492 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11493 return true;
11494 }
11495
11496 if (speed_p)
11497 *cost += extra_cost->alu.arith;
11498 if (CONST_INT_P (XEXP (x, 1))
11499 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11500 {
11501 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11502 return true;
11503 }
11504 return false;
11505 }
11506
11507 /* Vector mode? */
11508
11509 *cost = LIBCALL_COST (2);
11510 return false;
11511 }
11512 return true;
11513
11514 case EQ:
11515 case GE:
11516 case GT:
11517 case LE:
11518 case LT:
11519 /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11520 vcle and vclt). */
11521 if (TARGET_NEON
11522 && TARGET_HARD_FLOAT
11523 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11524 && (XEXP (x, 1) == CONST0_RTX (mode)))
11525 {
11526 *cost = 0;
11527 return true;
11528 }
11529
11530 /* Fall through. */
11531 case NE:
11532 case LTU:
11533 case LEU:
11534 case GEU:
11535 case GTU:
11536 case ORDERED:
11537 case UNORDERED:
11538 case UNEQ:
11539 case UNLE:
11540 case UNLT:
11541 case UNGE:
11542 case UNGT:
11543 case LTGT:
11544 if (outer_code == SET)
11545 {
11546 /* Is it a store-flag operation? */
11547 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11548 && XEXP (x, 1) == const0_rtx)
11549 {
11550 /* Thumb also needs an IT insn. */
11551 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11552 return true;
11553 }
11554 if (XEXP (x, 1) == const0_rtx)
11555 {
11556 switch (code)
11557 {
11558 case LT:
11559 /* LSR Rd, Rn, #31. */
11560 if (speed_p)
11561 *cost += extra_cost->alu.shift;
11562 break;
11563
11564 case EQ:
11565 /* RSBS T1, Rn, #0
11566 ADC Rd, Rn, T1. */
11567
11568 case NE:
11569 /* SUBS T1, Rn, #1
11570 SBC Rd, Rn, T1. */
11571 *cost += COSTS_N_INSNS (1);
11572 break;
11573
11574 case LE:
11575 /* RSBS T1, Rn, Rn, LSR #31
11576 ADC Rd, Rn, T1. */
11577 *cost += COSTS_N_INSNS (1);
11578 if (speed_p)
11579 *cost += extra_cost->alu.arith_shift;
11580 break;
11581
11582 case GT:
11583 /* RSB Rd, Rn, Rn, ASR #1
11584 LSR Rd, Rd, #31. */
11585 *cost += COSTS_N_INSNS (1);
11586 if (speed_p)
11587 *cost += (extra_cost->alu.arith_shift
11588 + extra_cost->alu.shift);
11589 break;
11590
11591 case GE:
11592 /* ASR Rd, Rn, #31
11593 ADD Rd, Rn, #1. */
11594 *cost += COSTS_N_INSNS (1);
11595 if (speed_p)
11596 *cost += extra_cost->alu.shift;
11597 break;
11598
11599 default:
11600 /* Remaining cases are either meaningless or would take
11601 three insns anyway. */
11602 *cost = COSTS_N_INSNS (3);
11603 break;
11604 }
11605 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11606 return true;
11607 }
11608 else
11609 {
11610 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11611 if (CONST_INT_P (XEXP (x, 1))
11612 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11613 {
11614 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11615 return true;
11616 }
11617
11618 return false;
11619 }
11620 }
11621 /* Not directly inside a set. If it involves the condition code
11622 register it must be the condition for a branch, cond_exec or
11623 I_T_E operation. Since the comparison is performed elsewhere
11624 this is just the control part which has no additional
11625 cost. */
11626 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11627 && XEXP (x, 1) == const0_rtx)
11628 {
11629 *cost = 0;
11630 return true;
11631 }
11632 return false;
11633
11634 case ABS:
11635 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11636 && (mode == SFmode || !TARGET_VFP_SINGLE))
11637 {
11638 if (speed_p)
11639 *cost += extra_cost->fp[mode != SFmode].neg;
11640
11641 return false;
11642 }
11643 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11644 {
11645 *cost = LIBCALL_COST (1);
11646 return false;
11647 }
11648
11649 if (mode == SImode)
11650 {
11651 if (speed_p)
11652 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11653 return false;
11654 }
11655 /* Vector mode? */
11656 *cost = LIBCALL_COST (1);
11657 return false;
11658
11659 case SIGN_EXTEND:
11660 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11661 && MEM_P (XEXP (x, 0)))
11662 {
11663 if (mode == DImode)
11664 *cost += COSTS_N_INSNS (1);
11665
11666 if (!speed_p)
11667 return true;
11668
11669 if (GET_MODE (XEXP (x, 0)) == SImode)
11670 *cost += extra_cost->ldst.load;
11671 else
11672 *cost += extra_cost->ldst.load_sign_extend;
11673
11674 if (mode == DImode)
11675 *cost += extra_cost->alu.shift;
11676
11677 return true;
11678 }
11679
11680 /* Widening from less than 32-bits requires an extend operation. */
11681 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11682 {
11683 /* We have SXTB/SXTH. */
11684 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11685 if (speed_p)
11686 *cost += extra_cost->alu.extend;
11687 }
11688 else if (GET_MODE (XEXP (x, 0)) != SImode)
11689 {
11690 /* Needs two shifts. */
11691 *cost += COSTS_N_INSNS (1);
11692 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11693 if (speed_p)
11694 *cost += 2 * extra_cost->alu.shift;
11695 }
11696
11697 /* Widening beyond 32-bits requires one more insn. */
11698 if (mode == DImode)
11699 {
11700 *cost += COSTS_N_INSNS (1);
11701 if (speed_p)
11702 *cost += extra_cost->alu.shift;
11703 }
11704
11705 return true;
11706
11707 case ZERO_EXTEND:
11708 if ((arm_arch4
11709 || GET_MODE (XEXP (x, 0)) == SImode
11710 || GET_MODE (XEXP (x, 0)) == QImode)
11711 && MEM_P (XEXP (x, 0)))
11712 {
11713 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11714
11715 if (mode == DImode)
11716 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11717
11718 return true;
11719 }
11720
11721 /* Widening from less than 32-bits requires an extend operation. */
11722 if (GET_MODE (XEXP (x, 0)) == QImode)
11723 {
11724 /* UXTB can be a shorter instruction in Thumb2, but it might
11725 be slower than the AND Rd, Rn, #255 alternative. When
11726 optimizing for speed it should never be slower to use
11727 AND, and we don't really model 16-bit vs 32-bit insns
11728 here. */
11729 if (speed_p)
11730 *cost += extra_cost->alu.logical;
11731 }
11732 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11733 {
11734 /* We have UXTB/UXTH. */
11735 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11736 if (speed_p)
11737 *cost += extra_cost->alu.extend;
11738 }
11739 else if (GET_MODE (XEXP (x, 0)) != SImode)
11740 {
11741 /* Needs two shifts. It's marginally preferable to use
11742 shifts rather than two BIC instructions as the second
11743 shift may merge with a subsequent insn as a shifter
11744 op. */
11745 *cost = COSTS_N_INSNS (2);
11746 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11747 if (speed_p)
11748 *cost += 2 * extra_cost->alu.shift;
11749 }
11750
11751 /* Widening beyond 32-bits requires one more insn. */
11752 if (mode == DImode)
11753 {
11754 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11755 }
11756
11757 return true;
11758
11759 case CONST_INT:
11760 *cost = 0;
11761 /* CONST_INT has no mode, so we cannot tell for sure how many
11762 insns are really going to be needed. The best we can do is
11763 look at the value passed. If it fits in SImode, then assume
11764 that's the mode it will be used for. Otherwise assume it
11765 will be used in DImode. */
11766 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11767 mode = SImode;
11768 else
11769 mode = DImode;
11770
11771 /* Avoid blowing up in arm_gen_constant (). */
11772 if (!(outer_code == PLUS
11773 || outer_code == AND
11774 || outer_code == IOR
11775 || outer_code == XOR
11776 || outer_code == MINUS))
11777 outer_code = SET;
11778
11779 const_int_cost:
11780 if (mode == SImode)
11781 {
11782 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11783 INTVAL (x), NULL, NULL,
11784 0, 0));
11785 /* Extra costs? */
11786 }
11787 else
11788 {
11789 *cost += COSTS_N_INSNS (arm_gen_constant
11790 (outer_code, SImode, NULL,
11791 trunc_int_for_mode (INTVAL (x), SImode),
11792 NULL, NULL, 0, 0)
11793 + arm_gen_constant (outer_code, SImode, NULL,
11794 INTVAL (x) >> 32, NULL,
11795 NULL, 0, 0));
11796 /* Extra costs? */
11797 }
11798
11799 return true;
11800
11801 case CONST:
11802 case LABEL_REF:
11803 case SYMBOL_REF:
11804 if (speed_p)
11805 {
11806 if (arm_arch_thumb2 && !flag_pic)
11807 *cost += COSTS_N_INSNS (1);
11808 else
11809 *cost += extra_cost->ldst.load;
11810 }
11811 else
11812 *cost += COSTS_N_INSNS (1);
11813
11814 if (flag_pic)
11815 {
11816 *cost += COSTS_N_INSNS (1);
11817 if (speed_p)
11818 *cost += extra_cost->alu.arith;
11819 }
11820
11821 return true;
11822
11823 case CONST_FIXED:
11824 *cost = COSTS_N_INSNS (4);
11825 /* Fixme. */
11826 return true;
11827
11828 case CONST_DOUBLE:
11829 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11830 && (mode == SFmode || !TARGET_VFP_SINGLE))
11831 {
11832 if (vfp3_const_double_rtx (x))
11833 {
11834 if (speed_p)
11835 *cost += extra_cost->fp[mode == DFmode].fpconst;
11836 return true;
11837 }
11838
11839 if (speed_p)
11840 {
11841 if (mode == DFmode)
11842 *cost += extra_cost->ldst.loadd;
11843 else
11844 *cost += extra_cost->ldst.loadf;
11845 }
11846 else
11847 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11848
11849 return true;
11850 }
11851 *cost = COSTS_N_INSNS (4);
11852 return true;
11853
11854 case CONST_VECTOR:
11855 /* Fixme. */
11856 if (((TARGET_NEON && TARGET_HARD_FLOAT
11857 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11858 || TARGET_HAVE_MVE)
11859 && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11860 *cost = COSTS_N_INSNS (1);
11861 else
11862 *cost = COSTS_N_INSNS (4);
11863 return true;
11864
11865 case HIGH:
11866 case LO_SUM:
11867 /* When optimizing for size, we prefer constant pool entries to
11868 MOVW/MOVT pairs, so bump the cost of these slightly. */
11869 if (!speed_p)
11870 *cost += 1;
11871 return true;
11872
11873 case CLZ:
11874 if (speed_p)
11875 *cost += extra_cost->alu.clz;
11876 return false;
11877
11878 case SMIN:
11879 if (XEXP (x, 1) == const0_rtx)
11880 {
11881 if (speed_p)
11882 *cost += extra_cost->alu.log_shift;
11883 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11884 return true;
11885 }
11886 /* Fall through. */
11887 case SMAX:
11888 case UMIN:
11889 case UMAX:
11890 *cost += COSTS_N_INSNS (1);
11891 return false;
11892
11893 case TRUNCATE:
11894 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11895 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11896 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11897 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11898 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11899 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11900 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11901 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11902 == ZERO_EXTEND))))
11903 {
11904 if (speed_p)
11905 *cost += extra_cost->mult[1].extend;
11906 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11907 ZERO_EXTEND, 0, speed_p)
11908 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11909 ZERO_EXTEND, 0, speed_p));
11910 return true;
11911 }
11912 *cost = LIBCALL_COST (1);
11913 return false;
11914
11915 case UNSPEC_VOLATILE:
11916 case UNSPEC:
11917 return arm_unspec_cost (x, outer_code, speed_p, cost);
11918
11919 case PC:
11920 /* Reading the PC is like reading any other register. Writing it
11921 is more expensive, but we take that into account elsewhere. */
11922 *cost = 0;
11923 return true;
11924
11925 case ZERO_EXTRACT:
11926 /* TODO: Simple zero_extract of bottom bits using AND. */
11927 /* Fall through. */
11928 case SIGN_EXTRACT:
11929 if (arm_arch6
11930 && mode == SImode
11931 && CONST_INT_P (XEXP (x, 1))
11932 && CONST_INT_P (XEXP (x, 2)))
11933 {
11934 if (speed_p)
11935 *cost += extra_cost->alu.bfx;
11936 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11937 return true;
11938 }
11939 /* Without UBFX/SBFX, need to resort to shift operations. */
11940 *cost += COSTS_N_INSNS (1);
11941 if (speed_p)
11942 *cost += 2 * extra_cost->alu.shift;
11943 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11944 return true;
11945
11946 case FLOAT_EXTEND:
11947 if (TARGET_HARD_FLOAT)
11948 {
11949 if (speed_p)
11950 *cost += extra_cost->fp[mode == DFmode].widen;
11951 if (!TARGET_VFP5
11952 && GET_MODE (XEXP (x, 0)) == HFmode)
11953 {
11954 /* Pre v8, widening HF->DF is a two-step process, first
11955 widening to SFmode. */
11956 *cost += COSTS_N_INSNS (1);
11957 if (speed_p)
11958 *cost += extra_cost->fp[0].widen;
11959 }
11960 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11961 return true;
11962 }
11963
11964 *cost = LIBCALL_COST (1);
11965 return false;
11966
11967 case FLOAT_TRUNCATE:
11968 if (TARGET_HARD_FLOAT)
11969 {
11970 if (speed_p)
11971 *cost += extra_cost->fp[mode == DFmode].narrow;
11972 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11973 return true;
11974 /* Vector modes? */
11975 }
11976 *cost = LIBCALL_COST (1);
11977 return false;
11978
11979 case FMA:
11980 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11981 {
11982 rtx op0 = XEXP (x, 0);
11983 rtx op1 = XEXP (x, 1);
11984 rtx op2 = XEXP (x, 2);
11985
11986
11987 /* vfms or vfnma. */
11988 if (GET_CODE (op0) == NEG)
11989 op0 = XEXP (op0, 0);
11990
11991 /* vfnms or vfnma. */
11992 if (GET_CODE (op2) == NEG)
11993 op2 = XEXP (op2, 0);
11994
11995 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11996 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11997 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11998
11999 if (speed_p)
12000 *cost += extra_cost->fp[mode ==DFmode].fma;
12001
12002 return true;
12003 }
12004
12005 *cost = LIBCALL_COST (3);
12006 return false;
12007
12008 case FIX:
12009 case UNSIGNED_FIX:
12010 if (TARGET_HARD_FLOAT)
12011 {
12012 /* The *combine_vcvtf2i reduces a vmul+vcvt into
12013 a vcvt fixed-point conversion. */
12014 if (code == FIX && mode == SImode
12015 && GET_CODE (XEXP (x, 0)) == FIX
12016 && GET_MODE (XEXP (x, 0)) == SFmode
12017 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12018 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
12019 > 0)
12020 {
12021 if (speed_p)
12022 *cost += extra_cost->fp[0].toint;
12023
12024 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
12025 code, 0, speed_p);
12026 return true;
12027 }
12028
12029 if (GET_MODE_CLASS (mode) == MODE_INT)
12030 {
12031 mode = GET_MODE (XEXP (x, 0));
12032 if (speed_p)
12033 *cost += extra_cost->fp[mode == DFmode].toint;
12034 /* Strip of the 'cost' of rounding towards zero. */
12035 if (GET_CODE (XEXP (x, 0)) == FIX)
12036 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
12037 0, speed_p);
12038 else
12039 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
12040 /* ??? Increase the cost to deal with transferring from
12041 FP -> CORE registers? */
12042 return true;
12043 }
12044 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
12045 && TARGET_VFP5)
12046 {
12047 if (speed_p)
12048 *cost += extra_cost->fp[mode == DFmode].roundint;
12049 return false;
12050 }
12051 /* Vector costs? */
12052 }
12053 *cost = LIBCALL_COST (1);
12054 return false;
12055
12056 case FLOAT:
12057 case UNSIGNED_FLOAT:
12058 if (TARGET_HARD_FLOAT)
12059 {
12060 /* ??? Increase the cost to deal with transferring from CORE
12061 -> FP registers? */
12062 if (speed_p)
12063 *cost += extra_cost->fp[mode == DFmode].fromint;
12064 return false;
12065 }
12066 *cost = LIBCALL_COST (1);
12067 return false;
12068
12069 case CALL:
12070 return true;
12071
12072 case ASM_OPERANDS:
12073 {
12074 /* Just a guess. Guess number of instructions in the asm
12075 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
12076 though (see PR60663). */
12077 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
12078 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
12079
12080 *cost = COSTS_N_INSNS (asm_length + num_operands);
12081 return true;
12082 }
12083 default:
12084 if (mode != VOIDmode)
12085 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
12086 else
12087 *cost = COSTS_N_INSNS (4); /* Who knows? */
12088 return false;
12089 }
12090 }
12091
12092 #undef HANDLE_NARROW_SHIFT_ARITH
12093
12094 /* RTX costs entry point. */
12095
12096 static bool
12097 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
12098 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
12099 {
12100 bool result;
12101 int code = GET_CODE (x);
12102 gcc_assert (current_tune->insn_extra_cost);
12103
12104 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
12105 (enum rtx_code) outer_code,
12106 current_tune->insn_extra_cost,
12107 total, speed);
12108
12109 if (dump_file && arm_verbose_cost)
12110 {
12111 print_rtl_single (dump_file, x);
12112 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
12113 *total, result ? "final" : "partial");
12114 }
12115 return result;
12116 }
12117
12118 static int
12119 arm_insn_cost (rtx_insn *insn, bool speed)
12120 {
12121 int cost;
12122
12123 /* Don't cost a simple reg-reg move at a full insn cost: such moves
12124 will likely disappear during register allocation. */
12125 if (!reload_completed
12126 && GET_CODE (PATTERN (insn)) == SET
12127 && REG_P (SET_DEST (PATTERN (insn)))
12128 && REG_P (SET_SRC (PATTERN (insn))))
12129 return 2;
12130 cost = pattern_cost (PATTERN (insn), speed);
12131 /* If the cost is zero, then it's likely a complex insn. We don't want the
12132 cost of these to be less than something we know about. */
12133 return cost ? cost : COSTS_N_INSNS (2);
12134 }
12135
12136 /* All address computations that can be done are free, but rtx cost returns
12137 the same for practically all of them. So we weight the different types
12138 of address here in the order (most pref first):
12139 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
12140 static inline int
12141 arm_arm_address_cost (rtx x)
12142 {
12143 enum rtx_code c = GET_CODE (x);
12144
12145 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
12146 return 0;
12147 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
12148 return 10;
12149
12150 if (c == PLUS)
12151 {
12152 if (CONST_INT_P (XEXP (x, 1)))
12153 return 2;
12154
12155 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
12156 return 3;
12157
12158 return 4;
12159 }
12160
12161 return 6;
12162 }
12163
12164 static inline int
12165 arm_thumb_address_cost (rtx x)
12166 {
12167 enum rtx_code c = GET_CODE (x);
12168
12169 if (c == REG)
12170 return 1;
12171 if (c == PLUS
12172 && REG_P (XEXP (x, 0))
12173 && CONST_INT_P (XEXP (x, 1)))
12174 return 1;
12175
12176 return 2;
12177 }
12178
12179 static int
12180 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
12181 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
12182 {
12183 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
12184 }
12185
12186 /* Adjust cost hook for XScale. */
12187 static bool
12188 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12189 int * cost)
12190 {
12191 /* Some true dependencies can have a higher cost depending
12192 on precisely how certain input operands are used. */
12193 if (dep_type == 0
12194 && recog_memoized (insn) >= 0
12195 && recog_memoized (dep) >= 0)
12196 {
12197 int shift_opnum = get_attr_shift (insn);
12198 enum attr_type attr_type = get_attr_type (dep);
12199
12200 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12201 operand for INSN. If we have a shifted input operand and the
12202 instruction we depend on is another ALU instruction, then we may
12203 have to account for an additional stall. */
12204 if (shift_opnum != 0
12205 && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
12206 || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
12207 || attr_type == TYPE_ALUS_SHIFT_IMM
12208 || attr_type == TYPE_LOGIC_SHIFT_IMM
12209 || attr_type == TYPE_LOGICS_SHIFT_IMM
12210 || attr_type == TYPE_ALU_SHIFT_REG
12211 || attr_type == TYPE_ALUS_SHIFT_REG
12212 || attr_type == TYPE_LOGIC_SHIFT_REG
12213 || attr_type == TYPE_LOGICS_SHIFT_REG
12214 || attr_type == TYPE_MOV_SHIFT
12215 || attr_type == TYPE_MVN_SHIFT
12216 || attr_type == TYPE_MOV_SHIFT_REG
12217 || attr_type == TYPE_MVN_SHIFT_REG))
12218 {
12219 rtx shifted_operand;
12220 int opno;
12221
12222 /* Get the shifted operand. */
12223 extract_insn (insn);
12224 shifted_operand = recog_data.operand[shift_opnum];
12225
12226 /* Iterate over all the operands in DEP. If we write an operand
12227 that overlaps with SHIFTED_OPERAND, then we have increase the
12228 cost of this dependency. */
12229 extract_insn (dep);
12230 preprocess_constraints (dep);
12231 for (opno = 0; opno < recog_data.n_operands; opno++)
12232 {
12233 /* We can ignore strict inputs. */
12234 if (recog_data.operand_type[opno] == OP_IN)
12235 continue;
12236
12237 if (reg_overlap_mentioned_p (recog_data.operand[opno],
12238 shifted_operand))
12239 {
12240 *cost = 2;
12241 return false;
12242 }
12243 }
12244 }
12245 }
12246 return true;
12247 }
12248
12249 /* Adjust cost hook for Cortex A9. */
12250 static bool
12251 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12252 int * cost)
12253 {
12254 switch (dep_type)
12255 {
12256 case REG_DEP_ANTI:
12257 *cost = 0;
12258 return false;
12259
12260 case REG_DEP_TRUE:
12261 case REG_DEP_OUTPUT:
12262 if (recog_memoized (insn) >= 0
12263 && recog_memoized (dep) >= 0)
12264 {
12265 if (GET_CODE (PATTERN (insn)) == SET)
12266 {
12267 if (GET_MODE_CLASS
12268 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
12269 || GET_MODE_CLASS
12270 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
12271 {
12272 enum attr_type attr_type_insn = get_attr_type (insn);
12273 enum attr_type attr_type_dep = get_attr_type (dep);
12274
12275 /* By default all dependencies of the form
12276 s0 = s0 <op> s1
12277 s0 = s0 <op> s2
12278 have an extra latency of 1 cycle because
12279 of the input and output dependency in this
12280 case. However this gets modeled as an true
12281 dependency and hence all these checks. */
12282 if (REG_P (SET_DEST (PATTERN (insn)))
12283 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
12284 {
12285 /* FMACS is a special case where the dependent
12286 instruction can be issued 3 cycles before
12287 the normal latency in case of an output
12288 dependency. */
12289 if ((attr_type_insn == TYPE_FMACS
12290 || attr_type_insn == TYPE_FMACD)
12291 && (attr_type_dep == TYPE_FMACS
12292 || attr_type_dep == TYPE_FMACD))
12293 {
12294 if (dep_type == REG_DEP_OUTPUT)
12295 *cost = insn_default_latency (dep) - 3;
12296 else
12297 *cost = insn_default_latency (dep);
12298 return false;
12299 }
12300 else
12301 {
12302 if (dep_type == REG_DEP_OUTPUT)
12303 *cost = insn_default_latency (dep) + 1;
12304 else
12305 *cost = insn_default_latency (dep);
12306 }
12307 return false;
12308 }
12309 }
12310 }
12311 }
12312 break;
12313
12314 default:
12315 gcc_unreachable ();
12316 }
12317
12318 return true;
12319 }
12320
12321 /* Adjust cost hook for FA726TE. */
12322 static bool
12323 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12324 int * cost)
12325 {
12326 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12327 have penalty of 3. */
12328 if (dep_type == REG_DEP_TRUE
12329 && recog_memoized (insn) >= 0
12330 && recog_memoized (dep) >= 0
12331 && get_attr_conds (dep) == CONDS_SET)
12332 {
12333 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12334 if (get_attr_conds (insn) == CONDS_USE
12335 && get_attr_type (insn) != TYPE_BRANCH)
12336 {
12337 *cost = 3;
12338 return false;
12339 }
12340
12341 if (GET_CODE (PATTERN (insn)) == COND_EXEC
12342 || get_attr_conds (insn) == CONDS_USE)
12343 {
12344 *cost = 0;
12345 return false;
12346 }
12347 }
12348
12349 return true;
12350 }
12351
12352 /* Implement TARGET_REGISTER_MOVE_COST.
12353
12354 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12355 it is typically more expensive than a single memory access. We set
12356 the cost to less than two memory accesses so that floating
12357 point to integer conversion does not go through memory. */
12358
12359 int
12360 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12361 reg_class_t from, reg_class_t to)
12362 {
12363 if (TARGET_32BIT)
12364 {
12365 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12366 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12367 return 15;
12368 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12369 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12370 return 4;
12371 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12372 return 20;
12373 else
12374 return 2;
12375 }
12376 else
12377 {
12378 if (from == HI_REGS || to == HI_REGS)
12379 return 4;
12380 else
12381 return 2;
12382 }
12383 }
12384
12385 /* Implement TARGET_MEMORY_MOVE_COST. */
12386
12387 int
12388 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12389 bool in ATTRIBUTE_UNUSED)
12390 {
12391 if (TARGET_32BIT)
12392 return 10;
12393 else
12394 {
12395 if (GET_MODE_SIZE (mode) < 4)
12396 return 8;
12397 else
12398 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12399 }
12400 }
12401
12402 /* Vectorizer cost model implementation. */
12403
12404 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12405 static int
12406 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12407 tree vectype,
12408 int misalign ATTRIBUTE_UNUSED)
12409 {
12410 unsigned elements;
12411
12412 switch (type_of_cost)
12413 {
12414 case scalar_stmt:
12415 return current_tune->vec_costs->scalar_stmt_cost;
12416
12417 case scalar_load:
12418 return current_tune->vec_costs->scalar_load_cost;
12419
12420 case scalar_store:
12421 return current_tune->vec_costs->scalar_store_cost;
12422
12423 case vector_stmt:
12424 return current_tune->vec_costs->vec_stmt_cost;
12425
12426 case vector_load:
12427 return current_tune->vec_costs->vec_align_load_cost;
12428
12429 case vector_store:
12430 return current_tune->vec_costs->vec_store_cost;
12431
12432 case vec_to_scalar:
12433 return current_tune->vec_costs->vec_to_scalar_cost;
12434
12435 case scalar_to_vec:
12436 return current_tune->vec_costs->scalar_to_vec_cost;
12437
12438 case unaligned_load:
12439 case vector_gather_load:
12440 return current_tune->vec_costs->vec_unalign_load_cost;
12441
12442 case unaligned_store:
12443 case vector_scatter_store:
12444 return current_tune->vec_costs->vec_unalign_store_cost;
12445
12446 case cond_branch_taken:
12447 return current_tune->vec_costs->cond_taken_branch_cost;
12448
12449 case cond_branch_not_taken:
12450 return current_tune->vec_costs->cond_not_taken_branch_cost;
12451
12452 case vec_perm:
12453 case vec_promote_demote:
12454 return current_tune->vec_costs->vec_stmt_cost;
12455
12456 case vec_construct:
12457 elements = TYPE_VECTOR_SUBPARTS (vectype);
12458 return elements / 2 + 1;
12459
12460 default:
12461 gcc_unreachable ();
12462 }
12463 }
12464
12465 /* Return true if and only if this insn can dual-issue only as older. */
12466 static bool
12467 cortexa7_older_only (rtx_insn *insn)
12468 {
12469 if (recog_memoized (insn) < 0)
12470 return false;
12471
12472 switch (get_attr_type (insn))
12473 {
12474 case TYPE_ALU_DSP_REG:
12475 case TYPE_ALU_SREG:
12476 case TYPE_ALUS_SREG:
12477 case TYPE_LOGIC_REG:
12478 case TYPE_LOGICS_REG:
12479 case TYPE_ADC_REG:
12480 case TYPE_ADCS_REG:
12481 case TYPE_ADR:
12482 case TYPE_BFM:
12483 case TYPE_REV:
12484 case TYPE_MVN_REG:
12485 case TYPE_SHIFT_IMM:
12486 case TYPE_SHIFT_REG:
12487 case TYPE_LOAD_BYTE:
12488 case TYPE_LOAD_4:
12489 case TYPE_STORE_4:
12490 case TYPE_FFARITHS:
12491 case TYPE_FADDS:
12492 case TYPE_FFARITHD:
12493 case TYPE_FADDD:
12494 case TYPE_FMOV:
12495 case TYPE_F_CVT:
12496 case TYPE_FCMPS:
12497 case TYPE_FCMPD:
12498 case TYPE_FCONSTS:
12499 case TYPE_FCONSTD:
12500 case TYPE_FMULS:
12501 case TYPE_FMACS:
12502 case TYPE_FMULD:
12503 case TYPE_FMACD:
12504 case TYPE_FDIVS:
12505 case TYPE_FDIVD:
12506 case TYPE_F_MRC:
12507 case TYPE_F_MRRC:
12508 case TYPE_F_FLAG:
12509 case TYPE_F_LOADS:
12510 case TYPE_F_STORES:
12511 return true;
12512 default:
12513 return false;
12514 }
12515 }
12516
12517 /* Return true if and only if this insn can dual-issue as younger. */
12518 static bool
12519 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12520 {
12521 if (recog_memoized (insn) < 0)
12522 {
12523 if (verbose > 5)
12524 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12525 return false;
12526 }
12527
12528 switch (get_attr_type (insn))
12529 {
12530 case TYPE_ALU_IMM:
12531 case TYPE_ALUS_IMM:
12532 case TYPE_LOGIC_IMM:
12533 case TYPE_LOGICS_IMM:
12534 case TYPE_EXTEND:
12535 case TYPE_MVN_IMM:
12536 case TYPE_MOV_IMM:
12537 case TYPE_MOV_REG:
12538 case TYPE_MOV_SHIFT:
12539 case TYPE_MOV_SHIFT_REG:
12540 case TYPE_BRANCH:
12541 case TYPE_CALL:
12542 return true;
12543 default:
12544 return false;
12545 }
12546 }
12547
12548
12549 /* Look for an instruction that can dual issue only as an older
12550 instruction, and move it in front of any instructions that can
12551 dual-issue as younger, while preserving the relative order of all
12552 other instructions in the ready list. This is a hueuristic to help
12553 dual-issue in later cycles, by postponing issue of more flexible
12554 instructions. This heuristic may affect dual issue opportunities
12555 in the current cycle. */
12556 static void
12557 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12558 int *n_readyp, int clock)
12559 {
12560 int i;
12561 int first_older_only = -1, first_younger = -1;
12562
12563 if (verbose > 5)
12564 fprintf (file,
12565 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12566 clock,
12567 *n_readyp);
12568
12569 /* Traverse the ready list from the head (the instruction to issue
12570 first), and looking for the first instruction that can issue as
12571 younger and the first instruction that can dual-issue only as
12572 older. */
12573 for (i = *n_readyp - 1; i >= 0; i--)
12574 {
12575 rtx_insn *insn = ready[i];
12576 if (cortexa7_older_only (insn))
12577 {
12578 first_older_only = i;
12579 if (verbose > 5)
12580 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12581 break;
12582 }
12583 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12584 first_younger = i;
12585 }
12586
12587 /* Nothing to reorder because either no younger insn found or insn
12588 that can dual-issue only as older appears before any insn that
12589 can dual-issue as younger. */
12590 if (first_younger == -1)
12591 {
12592 if (verbose > 5)
12593 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12594 return;
12595 }
12596
12597 /* Nothing to reorder because no older-only insn in the ready list. */
12598 if (first_older_only == -1)
12599 {
12600 if (verbose > 5)
12601 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12602 return;
12603 }
12604
12605 /* Move first_older_only insn before first_younger. */
12606 if (verbose > 5)
12607 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12608 INSN_UID(ready [first_older_only]),
12609 INSN_UID(ready [first_younger]));
12610 rtx_insn *first_older_only_insn = ready [first_older_only];
12611 for (i = first_older_only; i < first_younger; i++)
12612 {
12613 ready[i] = ready[i+1];
12614 }
12615
12616 ready[i] = first_older_only_insn;
12617 return;
12618 }
12619
12620 /* Implement TARGET_SCHED_REORDER. */
12621 static int
12622 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12623 int clock)
12624 {
12625 switch (arm_tune)
12626 {
12627 case TARGET_CPU_cortexa7:
12628 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12629 break;
12630 default:
12631 /* Do nothing for other cores. */
12632 break;
12633 }
12634
12635 return arm_issue_rate ();
12636 }
12637
12638 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12639 It corrects the value of COST based on the relationship between
12640 INSN and DEP through the dependence LINK. It returns the new
12641 value. There is a per-core adjust_cost hook to adjust scheduler costs
12642 and the per-core hook can choose to completely override the generic
12643 adjust_cost function. Only put bits of code into arm_adjust_cost that
12644 are common across all cores. */
12645 static int
12646 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12647 unsigned int)
12648 {
12649 rtx i_pat, d_pat;
12650
12651 /* When generating Thumb-1 code, we want to place flag-setting operations
12652 close to a conditional branch which depends on them, so that we can
12653 omit the comparison. */
12654 if (TARGET_THUMB1
12655 && dep_type == 0
12656 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12657 && recog_memoized (dep) >= 0
12658 && get_attr_conds (dep) == CONDS_SET)
12659 return 0;
12660
12661 if (current_tune->sched_adjust_cost != NULL)
12662 {
12663 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12664 return cost;
12665 }
12666
12667 /* XXX Is this strictly true? */
12668 if (dep_type == REG_DEP_ANTI
12669 || dep_type == REG_DEP_OUTPUT)
12670 return 0;
12671
12672 /* Call insns don't incur a stall, even if they follow a load. */
12673 if (dep_type == 0
12674 && CALL_P (insn))
12675 return 1;
12676
12677 if ((i_pat = single_set (insn)) != NULL
12678 && MEM_P (SET_SRC (i_pat))
12679 && (d_pat = single_set (dep)) != NULL
12680 && MEM_P (SET_DEST (d_pat)))
12681 {
12682 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12683 /* This is a load after a store, there is no conflict if the load reads
12684 from a cached area. Assume that loads from the stack, and from the
12685 constant pool are cached, and that others will miss. This is a
12686 hack. */
12687
12688 if ((SYMBOL_REF_P (src_mem)
12689 && CONSTANT_POOL_ADDRESS_P (src_mem))
12690 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12691 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12692 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12693 return 1;
12694 }
12695
12696 return cost;
12697 }
12698
12699 int
12700 arm_max_conditional_execute (void)
12701 {
12702 return max_insns_skipped;
12703 }
12704
12705 static int
12706 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12707 {
12708 if (TARGET_32BIT)
12709 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12710 else
12711 return (optimize > 0) ? 2 : 0;
12712 }
12713
12714 static int
12715 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12716 {
12717 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12718 }
12719
12720 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12721 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12722 sequences of non-executed instructions in IT blocks probably take the same
12723 amount of time as executed instructions (and the IT instruction itself takes
12724 space in icache). This function was experimentally determined to give good
12725 results on a popular embedded benchmark. */
12726
12727 static int
12728 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12729 {
12730 return (TARGET_32BIT && speed_p) ? 1
12731 : arm_default_branch_cost (speed_p, predictable_p);
12732 }
12733
12734 static int
12735 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12736 {
12737 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12738 }
12739
12740 static bool fp_consts_inited = false;
12741
12742 static REAL_VALUE_TYPE value_fp0;
12743
12744 static void
12745 init_fp_table (void)
12746 {
12747 REAL_VALUE_TYPE r;
12748
12749 r = REAL_VALUE_ATOF ("0", DFmode);
12750 value_fp0 = r;
12751 fp_consts_inited = true;
12752 }
12753
12754 /* Return TRUE if rtx X is a valid immediate FP constant. */
12755 int
12756 arm_const_double_rtx (rtx x)
12757 {
12758 const REAL_VALUE_TYPE *r;
12759
12760 if (!fp_consts_inited)
12761 init_fp_table ();
12762
12763 r = CONST_DOUBLE_REAL_VALUE (x);
12764 if (REAL_VALUE_MINUS_ZERO (*r))
12765 return 0;
12766
12767 if (real_equal (r, &value_fp0))
12768 return 1;
12769
12770 return 0;
12771 }
12772
12773 /* VFPv3 has a fairly wide range of representable immediates, formed from
12774 "quarter-precision" floating-point values. These can be evaluated using this
12775 formula (with ^ for exponentiation):
12776
12777 -1^s * n * 2^-r
12778
12779 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12780 16 <= n <= 31 and 0 <= r <= 7.
12781
12782 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12783
12784 - A (most-significant) is the sign bit.
12785 - BCD are the exponent (encoded as r XOR 3).
12786 - EFGH are the mantissa (encoded as n - 16).
12787 */
12788
12789 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12790 fconst[sd] instruction, or -1 if X isn't suitable. */
12791 static int
12792 vfp3_const_double_index (rtx x)
12793 {
12794 REAL_VALUE_TYPE r, m;
12795 int sign, exponent;
12796 unsigned HOST_WIDE_INT mantissa, mant_hi;
12797 unsigned HOST_WIDE_INT mask;
12798 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12799 bool fail;
12800
12801 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12802 return -1;
12803
12804 r = *CONST_DOUBLE_REAL_VALUE (x);
12805
12806 /* We can't represent these things, so detect them first. */
12807 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12808 return -1;
12809
12810 /* Extract sign, exponent and mantissa. */
12811 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12812 r = real_value_abs (&r);
12813 exponent = REAL_EXP (&r);
12814 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12815 highest (sign) bit, with a fixed binary point at bit point_pos.
12816 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12817 bits for the mantissa, this may fail (low bits would be lost). */
12818 real_ldexp (&m, &r, point_pos - exponent);
12819 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12820 mantissa = w.elt (0);
12821 mant_hi = w.elt (1);
12822
12823 /* If there are bits set in the low part of the mantissa, we can't
12824 represent this value. */
12825 if (mantissa != 0)
12826 return -1;
12827
12828 /* Now make it so that mantissa contains the most-significant bits, and move
12829 the point_pos to indicate that the least-significant bits have been
12830 discarded. */
12831 point_pos -= HOST_BITS_PER_WIDE_INT;
12832 mantissa = mant_hi;
12833
12834 /* We can permit four significant bits of mantissa only, plus a high bit
12835 which is always 1. */
12836 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12837 if ((mantissa & mask) != 0)
12838 return -1;
12839
12840 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12841 mantissa >>= point_pos - 5;
12842
12843 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12844 floating-point immediate zero with Neon using an integer-zero load, but
12845 that case is handled elsewhere.) */
12846 if (mantissa == 0)
12847 return -1;
12848
12849 gcc_assert (mantissa >= 16 && mantissa <= 31);
12850
12851 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12852 normalized significands are in the range [1, 2). (Our mantissa is shifted
12853 left 4 places at this point relative to normalized IEEE754 values). GCC
12854 internally uses [0.5, 1) (see real.cc), so the exponent returned from
12855 REAL_EXP must be altered. */
12856 exponent = 5 - exponent;
12857
12858 if (exponent < 0 || exponent > 7)
12859 return -1;
12860
12861 /* Sign, mantissa and exponent are now in the correct form to plug into the
12862 formula described in the comment above. */
12863 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12864 }
12865
12866 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12867 int
12868 vfp3_const_double_rtx (rtx x)
12869 {
12870 if (!TARGET_VFP3)
12871 return 0;
12872
12873 return vfp3_const_double_index (x) != -1;
12874 }
12875
12876 /* Recognize immediates which can be used in various Neon and MVE instructions.
12877 Legal immediates are described by the following table (for VMVN variants, the
12878 bitwise inverse of the constant shown is recognized. In either case, VMOV
12879 is output and the correct instruction to use for a given constant is chosen
12880 by the assembler). The constant shown is replicated across all elements of
12881 the destination vector.
12882
12883 insn elems variant constant (binary)
12884 ---- ----- ------- -----------------
12885 vmov i32 0 00000000 00000000 00000000 abcdefgh
12886 vmov i32 1 00000000 00000000 abcdefgh 00000000
12887 vmov i32 2 00000000 abcdefgh 00000000 00000000
12888 vmov i32 3 abcdefgh 00000000 00000000 00000000
12889 vmov i16 4 00000000 abcdefgh
12890 vmov i16 5 abcdefgh 00000000
12891 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12892 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12893 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12894 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12895 vmvn i16 10 00000000 abcdefgh
12896 vmvn i16 11 abcdefgh 00000000
12897 vmov i32 12 00000000 00000000 abcdefgh 11111111
12898 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12899 vmov i32 14 00000000 abcdefgh 11111111 11111111
12900 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12901 vmov i8 16 abcdefgh
12902 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12903 eeeeeeee ffffffff gggggggg hhhhhhhh
12904 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12905 vmov f32 19 00000000 00000000 00000000 00000000
12906
12907 For case 18, B = !b. Representable values are exactly those accepted by
12908 vfp3_const_double_index, but are output as floating-point numbers rather
12909 than indices.
12910
12911 For case 19, we will change it to vmov.i32 when assembling.
12912
12913 Variants 0-5 (inclusive) may also be used as immediates for the second
12914 operand of VORR/VBIC instructions.
12915
12916 The INVERSE argument causes the bitwise inverse of the given operand to be
12917 recognized instead (used for recognizing legal immediates for the VAND/VORN
12918 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12919 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12920 output, rather than the real insns vbic/vorr).
12921
12922 INVERSE makes no difference to the recognition of float vectors.
12923
12924 The return value is the variant of immediate as shown in the above table, or
12925 -1 if the given value doesn't match any of the listed patterns.
12926 */
12927 static int
12928 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12929 rtx *modconst, int *elementwidth)
12930 {
12931 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12932 matches = 1; \
12933 for (i = 0; i < idx; i += (STRIDE)) \
12934 if (!(TEST)) \
12935 matches = 0; \
12936 if (matches) \
12937 { \
12938 immtype = (CLASS); \
12939 elsize = (ELSIZE); \
12940 break; \
12941 }
12942
12943 unsigned int i, elsize = 0, idx = 0, n_elts;
12944 unsigned int innersize;
12945 unsigned char bytes[16] = {};
12946 int immtype = -1, matches;
12947 unsigned int invmask = inverse ? 0xff : 0;
12948 bool vector = GET_CODE (op) == CONST_VECTOR;
12949
12950 if (vector)
12951 n_elts = CONST_VECTOR_NUNITS (op);
12952 else
12953 {
12954 n_elts = 1;
12955 gcc_assert (mode != VOIDmode);
12956 }
12957
12958 innersize = GET_MODE_UNIT_SIZE (mode);
12959
12960 /* Only support 128-bit vectors for MVE. */
12961 if (TARGET_HAVE_MVE
12962 && (!vector
12963 || (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
12964 || n_elts * innersize != 16))
12965 return -1;
12966
12967 if (!TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
12968 return -1;
12969
12970 /* Vectors of float constants. */
12971 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12972 {
12973 rtx el0 = CONST_VECTOR_ELT (op, 0);
12974
12975 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12976 return -1;
12977
12978 /* FP16 vectors cannot be represented. */
12979 if (GET_MODE_INNER (mode) == HFmode)
12980 return -1;
12981
12982 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12983 are distinct in this context. */
12984 if (!const_vec_duplicate_p (op))
12985 return -1;
12986
12987 if (modconst)
12988 *modconst = CONST_VECTOR_ELT (op, 0);
12989
12990 if (elementwidth)
12991 *elementwidth = 0;
12992
12993 if (el0 == CONST0_RTX (GET_MODE (el0)))
12994 return 19;
12995 else
12996 return 18;
12997 }
12998
12999 /* The tricks done in the code below apply for little-endian vector layout.
13000 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
13001 FIXME: Implement logic for big-endian vectors. */
13002 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
13003 return -1;
13004
13005 /* Splat vector constant out into a byte vector. */
13006 for (i = 0; i < n_elts; i++)
13007 {
13008 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
13009 unsigned HOST_WIDE_INT elpart;
13010
13011 gcc_assert (CONST_INT_P (el));
13012 elpart = INTVAL (el);
13013
13014 for (unsigned int byte = 0; byte < innersize; byte++)
13015 {
13016 bytes[idx++] = (elpart & 0xff) ^ invmask;
13017 elpart >>= BITS_PER_UNIT;
13018 }
13019 }
13020
13021 /* Sanity check. */
13022 gcc_assert (idx == GET_MODE_SIZE (mode));
13023
13024 do
13025 {
13026 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
13027 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13028
13029 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13030 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13031
13032 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
13033 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13034
13035 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
13036 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
13037
13038 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
13039
13040 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
13041
13042 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
13043 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13044
13045 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13046 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13047
13048 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
13049 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13050
13051 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
13052 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
13053
13054 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
13055
13056 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
13057
13058 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13059 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13060
13061 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13062 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13063
13064 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
13065 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13066
13067 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
13068 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13069
13070 CHECK (1, 8, 16, bytes[i] == bytes[0]);
13071
13072 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
13073 && bytes[i] == bytes[(i + 8) % idx]);
13074 }
13075 while (0);
13076
13077 if (immtype == -1)
13078 return -1;
13079
13080 if (elementwidth)
13081 *elementwidth = elsize;
13082
13083 if (modconst)
13084 {
13085 unsigned HOST_WIDE_INT imm = 0;
13086
13087 /* Un-invert bytes of recognized vector, if necessary. */
13088 if (invmask != 0)
13089 for (i = 0; i < idx; i++)
13090 bytes[i] ^= invmask;
13091
13092 if (immtype == 17)
13093 {
13094 /* FIXME: Broken on 32-bit H_W_I hosts. */
13095 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
13096
13097 for (i = 0; i < 8; i++)
13098 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
13099 << (i * BITS_PER_UNIT);
13100
13101 *modconst = GEN_INT (imm);
13102 }
13103 else
13104 {
13105 unsigned HOST_WIDE_INT imm = 0;
13106
13107 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
13108 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
13109
13110 *modconst = GEN_INT (imm);
13111 }
13112 }
13113
13114 return immtype;
13115 #undef CHECK
13116 }
13117
13118 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13119 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
13120 (or zero for float elements), and a modified constant (whatever should be
13121 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
13122 modified to "simd_immediate_valid_for_move" as this function will be used
13123 both by neon and mve. */
13124 int
13125 simd_immediate_valid_for_move (rtx op, machine_mode mode,
13126 rtx *modconst, int *elementwidth)
13127 {
13128 rtx tmpconst;
13129 int tmpwidth;
13130 int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
13131
13132 if (retval == -1)
13133 return 0;
13134
13135 if (modconst)
13136 *modconst = tmpconst;
13137
13138 if (elementwidth)
13139 *elementwidth = tmpwidth;
13140
13141 return 1;
13142 }
13143
13144 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
13145 the immediate is valid, write a constant suitable for using as an operand
13146 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13147 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
13148
13149 int
13150 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
13151 rtx *modconst, int *elementwidth)
13152 {
13153 rtx tmpconst;
13154 int tmpwidth;
13155 int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
13156
13157 if (retval < 0 || retval > 5)
13158 return 0;
13159
13160 if (modconst)
13161 *modconst = tmpconst;
13162
13163 if (elementwidth)
13164 *elementwidth = tmpwidth;
13165
13166 return 1;
13167 }
13168
13169 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
13170 the immediate is valid, write a constant suitable for using as an operand
13171 to VSHR/VSHL to *MODCONST and the corresponding element width to
13172 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13173 because they have different limitations. */
13174
13175 int
13176 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
13177 rtx *modconst, int *elementwidth,
13178 bool isleftshift)
13179 {
13180 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
13181 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
13182 unsigned HOST_WIDE_INT last_elt = 0;
13183 unsigned HOST_WIDE_INT maxshift;
13184
13185 /* Split vector constant out into a byte vector. */
13186 for (i = 0; i < n_elts; i++)
13187 {
13188 rtx el = CONST_VECTOR_ELT (op, i);
13189 unsigned HOST_WIDE_INT elpart;
13190
13191 if (CONST_INT_P (el))
13192 elpart = INTVAL (el);
13193 else if (CONST_DOUBLE_P (el))
13194 return 0;
13195 else
13196 gcc_unreachable ();
13197
13198 if (i != 0 && elpart != last_elt)
13199 return 0;
13200
13201 last_elt = elpart;
13202 }
13203
13204 /* Shift less than element size. */
13205 maxshift = innersize * 8;
13206
13207 if (isleftshift)
13208 {
13209 /* Left shift immediate value can be from 0 to <size>-1. */
13210 if (last_elt >= maxshift)
13211 return 0;
13212 }
13213 else
13214 {
13215 /* Right shift immediate value can be from 1 to <size>. */
13216 if (last_elt == 0 || last_elt > maxshift)
13217 return 0;
13218 }
13219
13220 if (elementwidth)
13221 *elementwidth = innersize * 8;
13222
13223 if (modconst)
13224 *modconst = CONST_VECTOR_ELT (op, 0);
13225
13226 return 1;
13227 }
13228
13229 /* Return a string suitable for output of Neon immediate logic operation
13230 MNEM. */
13231
13232 char *
13233 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
13234 int inverse, int quad)
13235 {
13236 int width, is_valid;
13237 static char templ[40];
13238
13239 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
13240
13241 gcc_assert (is_valid != 0);
13242
13243 if (quad)
13244 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
13245 else
13246 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
13247
13248 return templ;
13249 }
13250
13251 /* Return a string suitable for output of Neon immediate shift operation
13252 (VSHR or VSHL) MNEM. */
13253
13254 char *
13255 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13256 machine_mode mode, int quad,
13257 bool isleftshift)
13258 {
13259 int width, is_valid;
13260 static char templ[40];
13261
13262 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13263 gcc_assert (is_valid != 0);
13264
13265 if (quad)
13266 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13267 else
13268 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13269
13270 return templ;
13271 }
13272
13273 /* Output a sequence of pairwise operations to implement a reduction.
13274 NOTE: We do "too much work" here, because pairwise operations work on two
13275 registers-worth of operands in one go. Unfortunately we can't exploit those
13276 extra calculations to do the full operation in fewer steps, I don't think.
13277 Although all vector elements of the result but the first are ignored, we
13278 actually calculate the same result in each of the elements. An alternative
13279 such as initially loading a vector with zero to use as each of the second
13280 operands would use up an additional register and take an extra instruction,
13281 for no particular gain. */
13282
13283 void
13284 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13285 rtx (*reduc) (rtx, rtx, rtx))
13286 {
13287 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13288 rtx tmpsum = op1;
13289
13290 for (i = parts / 2; i >= 1; i /= 2)
13291 {
13292 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13293 emit_insn (reduc (dest, tmpsum, tmpsum));
13294 tmpsum = dest;
13295 }
13296 }
13297
13298 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13299 loaded into a register using VDUP.
13300
13301 If this is the case, and GENERATE is set, we also generate
13302 instructions to do this and return an RTX to assign to the register. */
13303
13304 static rtx
13305 neon_vdup_constant (rtx vals, bool generate)
13306 {
13307 machine_mode mode = GET_MODE (vals);
13308 machine_mode inner_mode = GET_MODE_INNER (mode);
13309 rtx x;
13310
13311 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13312 return NULL_RTX;
13313
13314 if (!const_vec_duplicate_p (vals, &x))
13315 /* The elements are not all the same. We could handle repeating
13316 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13317 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13318 vdup.i16). */
13319 return NULL_RTX;
13320
13321 if (!generate)
13322 return x;
13323
13324 /* We can load this constant by using VDUP and a constant in a
13325 single ARM register. This will be cheaper than a vector
13326 load. */
13327
13328 x = copy_to_mode_reg (inner_mode, x);
13329 return gen_vec_duplicate (mode, x);
13330 }
13331
13332 /* Return a HI representation of CONST_VEC suitable for MVE predicates. */
13333 rtx
13334 mve_bool_vec_to_const (rtx const_vec)
13335 {
13336 int n_elts = GET_MODE_NUNITS ( GET_MODE (const_vec));
13337 int repeat = 16 / n_elts;
13338 int i;
13339 int hi_val = 0;
13340
13341 for (i = 0; i < n_elts; i++)
13342 {
13343 rtx el = CONST_VECTOR_ELT (const_vec, i);
13344 unsigned HOST_WIDE_INT elpart;
13345
13346 gcc_assert (CONST_INT_P (el));
13347 elpart = INTVAL (el);
13348
13349 for (int j = 0; j < repeat; j++)
13350 hi_val |= elpart << (i * repeat + j);
13351 }
13352 return gen_int_mode (hi_val, HImode);
13353 }
13354
13355 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13356 constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13357 into a register.
13358
13359 If this is the case, and GENERATE is set, we also generate code to do
13360 this and return an RTX to copy into the register. */
13361
13362 rtx
13363 neon_make_constant (rtx vals, bool generate)
13364 {
13365 machine_mode mode = GET_MODE (vals);
13366 rtx target;
13367 rtx const_vec = NULL_RTX;
13368 int n_elts = GET_MODE_NUNITS (mode);
13369 int n_const = 0;
13370 int i;
13371
13372 if (GET_CODE (vals) == CONST_VECTOR)
13373 const_vec = vals;
13374 else if (GET_CODE (vals) == PARALLEL)
13375 {
13376 /* A CONST_VECTOR must contain only CONST_INTs and
13377 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13378 Only store valid constants in a CONST_VECTOR. */
13379 for (i = 0; i < n_elts; ++i)
13380 {
13381 rtx x = XVECEXP (vals, 0, i);
13382 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13383 n_const++;
13384 }
13385 if (n_const == n_elts)
13386 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13387 }
13388 else
13389 gcc_unreachable ();
13390
13391 if (const_vec != NULL
13392 && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13393 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13394 return const_vec;
13395 else if (TARGET_HAVE_MVE && (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL))
13396 return mve_bool_vec_to_const (const_vec);
13397 else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13398 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13399 pipeline cycle; creating the constant takes one or two ARM
13400 pipeline cycles. */
13401 return target;
13402 else if (const_vec != NULL_RTX)
13403 /* Load from constant pool. On Cortex-A8 this takes two cycles
13404 (for either double or quad vectors). We cannot take advantage
13405 of single-cycle VLD1 because we need a PC-relative addressing
13406 mode. */
13407 return arm_disable_literal_pool ? NULL_RTX : const_vec;
13408 else
13409 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13410 We cannot construct an initializer. */
13411 return NULL_RTX;
13412 }
13413
13414 /* Initialize vector TARGET to VALS. */
13415
13416 void
13417 neon_expand_vector_init (rtx target, rtx vals)
13418 {
13419 machine_mode mode = GET_MODE (target);
13420 machine_mode inner_mode = GET_MODE_INNER (mode);
13421 int n_elts = GET_MODE_NUNITS (mode);
13422 int n_var = 0, one_var = -1;
13423 bool all_same = true;
13424 rtx x, mem;
13425 int i;
13426
13427 for (i = 0; i < n_elts; ++i)
13428 {
13429 x = XVECEXP (vals, 0, i);
13430 if (!CONSTANT_P (x))
13431 ++n_var, one_var = i;
13432
13433 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13434 all_same = false;
13435 }
13436
13437 if (n_var == 0)
13438 {
13439 rtx constant = neon_make_constant (vals);
13440 if (constant != NULL_RTX)
13441 {
13442 emit_move_insn (target, constant);
13443 return;
13444 }
13445 }
13446
13447 /* Splat a single non-constant element if we can. */
13448 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13449 {
13450 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13451 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13452 return;
13453 }
13454
13455 /* One field is non-constant. Load constant then overwrite varying
13456 field. This is more efficient than using the stack. */
13457 if (n_var == 1)
13458 {
13459 rtx copy = copy_rtx (vals);
13460 rtx merge_mask = GEN_INT (1 << one_var);
13461
13462 /* Load constant part of vector, substitute neighboring value for
13463 varying element. */
13464 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13465 neon_expand_vector_init (target, copy);
13466
13467 /* Insert variable. */
13468 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13469 emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13470 return;
13471 }
13472
13473 /* Construct the vector in memory one field at a time
13474 and load the whole vector. */
13475 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13476 for (i = 0; i < n_elts; i++)
13477 emit_move_insn (adjust_address_nv (mem, inner_mode,
13478 i * GET_MODE_SIZE (inner_mode)),
13479 XVECEXP (vals, 0, i));
13480 emit_move_insn (target, mem);
13481 }
13482
13483 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13484 ERR if it doesn't. EXP indicates the source location, which includes the
13485 inlining history for intrinsics. */
13486
13487 static void
13488 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13489 const_tree exp, const char *desc)
13490 {
13491 HOST_WIDE_INT lane;
13492
13493 gcc_assert (CONST_INT_P (operand));
13494
13495 lane = INTVAL (operand);
13496
13497 if (lane < low || lane >= high)
13498 {
13499 if (exp)
13500 error_at (EXPR_LOCATION (exp),
13501 "%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13502 else
13503 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13504 }
13505 }
13506
13507 /* Bounds-check lanes. */
13508
13509 void
13510 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13511 const_tree exp)
13512 {
13513 bounds_check (operand, low, high, exp, "lane");
13514 }
13515
13516 /* Bounds-check constants. */
13517
13518 void
13519 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13520 {
13521 bounds_check (operand, low, high, NULL_TREE, "constant");
13522 }
13523
13524 HOST_WIDE_INT
13525 neon_element_bits (machine_mode mode)
13526 {
13527 return GET_MODE_UNIT_BITSIZE (mode);
13528 }
13529
13530 \f
13531 /* Predicates for `match_operand' and `match_operator'. */
13532
13533 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13534 WB level is 2 if full writeback address modes are allowed, 1
13535 if limited writeback address modes (POST_INC and PRE_DEC) are
13536 allowed and 0 if no writeback at all is supported. */
13537
13538 int
13539 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13540 {
13541 gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13542 rtx ind;
13543
13544 /* Reject eliminable registers. */
13545 if (! (reload_in_progress || reload_completed || lra_in_progress)
13546 && ( reg_mentioned_p (frame_pointer_rtx, op)
13547 || reg_mentioned_p (arg_pointer_rtx, op)
13548 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13549 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13550 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13551 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13552 return FALSE;
13553
13554 /* Constants are converted into offsets from labels. */
13555 if (!MEM_P (op))
13556 return FALSE;
13557
13558 ind = XEXP (op, 0);
13559
13560 if (reload_completed
13561 && (LABEL_REF_P (ind)
13562 || (GET_CODE (ind) == CONST
13563 && GET_CODE (XEXP (ind, 0)) == PLUS
13564 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13565 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13566 return TRUE;
13567
13568 /* Match: (mem (reg)). */
13569 if (REG_P (ind))
13570 return arm_address_register_rtx_p (ind, 0);
13571
13572 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13573 acceptable in any case (subject to verification by
13574 arm_address_register_rtx_p). We need full writeback to accept
13575 PRE_INC and POST_DEC, and at least restricted writeback for
13576 PRE_INC and POST_DEC. */
13577 if (wb_level > 0
13578 && (GET_CODE (ind) == POST_INC
13579 || GET_CODE (ind) == PRE_DEC
13580 || (wb_level > 1
13581 && (GET_CODE (ind) == PRE_INC
13582 || GET_CODE (ind) == POST_DEC))))
13583 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13584
13585 if (wb_level > 1
13586 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13587 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13588 && GET_CODE (XEXP (ind, 1)) == PLUS
13589 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13590 ind = XEXP (ind, 1);
13591
13592 /* Match:
13593 (plus (reg)
13594 (const))
13595
13596 The encoded immediate for 16-bit modes is multiplied by 2,
13597 while the encoded immediate for 32-bit and 64-bit modes is
13598 multiplied by 4. */
13599 int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13600 if (GET_CODE (ind) == PLUS
13601 && REG_P (XEXP (ind, 0))
13602 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13603 && CONST_INT_P (XEXP (ind, 1))
13604 && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13605 && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13606 return TRUE;
13607
13608 return FALSE;
13609 }
13610
13611 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13612 WB is true if full writeback address modes are allowed and is false
13613 if limited writeback address modes (POST_INC and PRE_DEC) are
13614 allowed. */
13615
13616 int arm_coproc_mem_operand (rtx op, bool wb)
13617 {
13618 return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13619 }
13620
13621 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13622 context in which no writeback address modes are allowed. */
13623
13624 int
13625 arm_coproc_mem_operand_no_writeback (rtx op)
13626 {
13627 return arm_coproc_mem_operand_wb (op, 0);
13628 }
13629
13630 /* This function returns TRUE on matching mode and op.
13631 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13632 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13633 int
13634 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13635 {
13636 enum rtx_code code;
13637 int val, reg_no;
13638
13639 /* Match: (mem (reg)). */
13640 if (REG_P (op))
13641 {
13642 int reg_no = REGNO (op);
13643 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13644 ? reg_no <= LAST_LO_REGNUM
13645 : reg_no < LAST_ARM_REGNUM)
13646 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13647 }
13648 code = GET_CODE (op);
13649
13650 if (code == POST_INC || code == PRE_DEC
13651 || code == PRE_INC || code == POST_DEC)
13652 {
13653 reg_no = REGNO (XEXP (op, 0));
13654 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13655 ? reg_no <= LAST_LO_REGNUM
13656 :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13657 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13658 }
13659 else if (((code == POST_MODIFY || code == PRE_MODIFY)
13660 && GET_CODE (XEXP (op, 1)) == PLUS
13661 && XEXP (op, 0) == XEXP (XEXP (op, 1), 0)
13662 && REG_P (XEXP (op, 0))
13663 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT)
13664 /* Make sure to only accept PLUS after reload_completed, otherwise
13665 this will interfere with auto_inc's pattern detection. */
13666 || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
13667 && GET_CODE (XEXP (op, 1)) == CONST_INT))
13668 {
13669 reg_no = REGNO (XEXP (op, 0));
13670 if (code == PLUS)
13671 val = INTVAL (XEXP (op, 1));
13672 else
13673 val = INTVAL (XEXP(XEXP (op, 1), 1));
13674
13675 switch (mode)
13676 {
13677 case E_V16QImode:
13678 case E_V8QImode:
13679 case E_V4QImode:
13680 if (abs (val) > 127)
13681 return FALSE;
13682 break;
13683 case E_V8HImode:
13684 case E_V8HFmode:
13685 case E_V4HImode:
13686 case E_V4HFmode:
13687 if (val % 2 != 0 || abs (val) > 254)
13688 return FALSE;
13689 break;
13690 case E_V4SImode:
13691 case E_V4SFmode:
13692 if (val % 4 != 0 || abs (val) > 508)
13693 return FALSE;
13694 break;
13695 default:
13696 return FALSE;
13697 }
13698 return ((!strict && reg_no >= FIRST_PSEUDO_REGISTER)
13699 || (MVE_STN_LDW_MODE (mode)
13700 ? reg_no <= LAST_LO_REGNUM
13701 : (reg_no < LAST_ARM_REGNUM
13702 && (code == PLUS || reg_no != SP_REGNUM))));
13703 }
13704 return FALSE;
13705 }
13706
13707 /* Return TRUE if OP is a memory operand which we can load or store a vector
13708 to/from. TYPE is one of the following values:
13709 0 - Vector load/stor (vldr)
13710 1 - Core registers (ldm)
13711 2 - Element/structure loads (vld1)
13712 */
13713 int
13714 neon_vector_mem_operand (rtx op, int type, bool strict)
13715 {
13716 rtx ind;
13717
13718 /* Reject eliminable registers. */
13719 if (strict && ! (reload_in_progress || reload_completed)
13720 && (reg_mentioned_p (frame_pointer_rtx, op)
13721 || reg_mentioned_p (arg_pointer_rtx, op)
13722 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13723 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13724 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13725 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13726 return FALSE;
13727
13728 /* Constants are converted into offsets from labels. */
13729 if (!MEM_P (op))
13730 return FALSE;
13731
13732 ind = XEXP (op, 0);
13733
13734 if (reload_completed
13735 && (LABEL_REF_P (ind)
13736 || (GET_CODE (ind) == CONST
13737 && GET_CODE (XEXP (ind, 0)) == PLUS
13738 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13739 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13740 return TRUE;
13741
13742 /* Match: (mem (reg)). */
13743 if (REG_P (ind))
13744 return arm_address_register_rtx_p (ind, 0);
13745
13746 /* Allow post-increment with Neon registers. */
13747 if ((type != 1 && GET_CODE (ind) == POST_INC)
13748 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13749 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13750
13751 /* Allow post-increment by register for VLDn */
13752 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13753 && GET_CODE (XEXP (ind, 1)) == PLUS
13754 && REG_P (XEXP (XEXP (ind, 1), 1))
13755 && REG_P (XEXP (ind, 0))
13756 && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13757 return true;
13758
13759 /* Match:
13760 (plus (reg)
13761 (const)). */
13762 if (type == 0
13763 && GET_CODE (ind) == PLUS
13764 && REG_P (XEXP (ind, 0))
13765 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13766 && CONST_INT_P (XEXP (ind, 1))
13767 && INTVAL (XEXP (ind, 1)) > -1024
13768 /* For quad modes, we restrict the constant offset to be slightly less
13769 than what the instruction format permits. We have no such constraint
13770 on double mode offsets. (This must match arm_legitimate_index_p.) */
13771 && (INTVAL (XEXP (ind, 1))
13772 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13773 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13774 return TRUE;
13775
13776 return FALSE;
13777 }
13778
13779 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13780 type. */
13781 int
13782 mve_struct_mem_operand (rtx op)
13783 {
13784 rtx ind = XEXP (op, 0);
13785
13786 /* Match: (mem (reg)). */
13787 if (REG_P (ind))
13788 return arm_address_register_rtx_p (ind, 0);
13789
13790 /* Allow only post-increment by the mode size. */
13791 if (GET_CODE (ind) == POST_INC)
13792 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13793
13794 return FALSE;
13795 }
13796
13797 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13798 type. */
13799 int
13800 neon_struct_mem_operand (rtx op)
13801 {
13802 rtx ind;
13803
13804 /* Reject eliminable registers. */
13805 if (! (reload_in_progress || reload_completed)
13806 && ( reg_mentioned_p (frame_pointer_rtx, op)
13807 || reg_mentioned_p (arg_pointer_rtx, op)
13808 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13809 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13810 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13811 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13812 return FALSE;
13813
13814 /* Constants are converted into offsets from labels. */
13815 if (!MEM_P (op))
13816 return FALSE;
13817
13818 ind = XEXP (op, 0);
13819
13820 if (reload_completed
13821 && (LABEL_REF_P (ind)
13822 || (GET_CODE (ind) == CONST
13823 && GET_CODE (XEXP (ind, 0)) == PLUS
13824 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13825 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13826 return TRUE;
13827
13828 /* Match: (mem (reg)). */
13829 if (REG_P (ind))
13830 return arm_address_register_rtx_p (ind, 0);
13831
13832 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13833 if (GET_CODE (ind) == POST_INC
13834 || GET_CODE (ind) == PRE_DEC)
13835 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13836
13837 return FALSE;
13838 }
13839
13840 /* Prepares the operands for the VCMLA by lane instruction such that the right
13841 register number is selected. This instruction is special in that it always
13842 requires a D register, however there is a choice to be made between Dn[0],
13843 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13844
13845 The VCMLA by lane function always selects two values. For instance given D0
13846 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13847 used by the instruction. However given V4SF then index 0 and 1 are valid as
13848 D0[0] or D1[0] are both valid.
13849
13850 This function centralizes that information based on OPERANDS, OPERANDS[3]
13851 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13852 updated to contain the right index. */
13853
13854 rtx *
13855 neon_vcmla_lane_prepare_operands (rtx *operands)
13856 {
13857 int lane = INTVAL (operands[4]);
13858 machine_mode constmode = SImode;
13859 machine_mode mode = GET_MODE (operands[3]);
13860 int regno = REGNO (operands[3]);
13861 regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13862 if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13863 {
13864 operands[3] = gen_int_mode (regno + 1, constmode);
13865 operands[4]
13866 = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13867 }
13868 else
13869 {
13870 operands[3] = gen_int_mode (regno, constmode);
13871 operands[4] = gen_int_mode (lane, constmode);
13872 }
13873 return operands;
13874 }
13875
13876
13877 /* Return true if X is a register that will be eliminated later on. */
13878 int
13879 arm_eliminable_register (rtx x)
13880 {
13881 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13882 || REGNO (x) == ARG_POINTER_REGNUM
13883 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13884 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13885 }
13886
13887 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13888 coprocessor registers. Otherwise return NO_REGS. */
13889
13890 enum reg_class
13891 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13892 {
13893 if (mode == HFmode)
13894 {
13895 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13896 return GENERAL_REGS;
13897 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13898 return NO_REGS;
13899 return GENERAL_REGS;
13900 }
13901
13902 /* The neon move patterns handle all legitimate vector and struct
13903 addresses. */
13904 if (TARGET_NEON
13905 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13906 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13907 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13908 || VALID_NEON_STRUCT_MODE (mode)))
13909 return NO_REGS;
13910
13911 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13912 return NO_REGS;
13913
13914 return GENERAL_REGS;
13915 }
13916
13917 /* Values which must be returned in the most-significant end of the return
13918 register. */
13919
13920 static bool
13921 arm_return_in_msb (const_tree valtype)
13922 {
13923 return (TARGET_AAPCS_BASED
13924 && BYTES_BIG_ENDIAN
13925 && (AGGREGATE_TYPE_P (valtype)
13926 || TREE_CODE (valtype) == COMPLEX_TYPE
13927 || FIXED_POINT_TYPE_P (valtype)));
13928 }
13929
13930 /* Return TRUE if X references a SYMBOL_REF. */
13931 int
13932 symbol_mentioned_p (rtx x)
13933 {
13934 const char * fmt;
13935 int i;
13936
13937 if (SYMBOL_REF_P (x))
13938 return 1;
13939
13940 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13941 are constant offsets, not symbols. */
13942 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13943 return 0;
13944
13945 fmt = GET_RTX_FORMAT (GET_CODE (x));
13946
13947 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13948 {
13949 if (fmt[i] == 'E')
13950 {
13951 int j;
13952
13953 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13954 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13955 return 1;
13956 }
13957 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13958 return 1;
13959 }
13960
13961 return 0;
13962 }
13963
13964 /* Return TRUE if X references a LABEL_REF. */
13965 int
13966 label_mentioned_p (rtx x)
13967 {
13968 const char * fmt;
13969 int i;
13970
13971 if (LABEL_REF_P (x))
13972 return 1;
13973
13974 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13975 instruction, but they are constant offsets, not symbols. */
13976 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13977 return 0;
13978
13979 fmt = GET_RTX_FORMAT (GET_CODE (x));
13980 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13981 {
13982 if (fmt[i] == 'E')
13983 {
13984 int j;
13985
13986 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13987 if (label_mentioned_p (XVECEXP (x, i, j)))
13988 return 1;
13989 }
13990 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13991 return 1;
13992 }
13993
13994 return 0;
13995 }
13996
13997 int
13998 tls_mentioned_p (rtx x)
13999 {
14000 switch (GET_CODE (x))
14001 {
14002 case CONST:
14003 return tls_mentioned_p (XEXP (x, 0));
14004
14005 case UNSPEC:
14006 if (XINT (x, 1) == UNSPEC_TLS)
14007 return 1;
14008
14009 /* Fall through. */
14010 default:
14011 return 0;
14012 }
14013 }
14014
14015 /* Must not copy any rtx that uses a pc-relative address.
14016 Also, disallow copying of load-exclusive instructions that
14017 may appear after splitting of compare-and-swap-style operations
14018 so as to prevent those loops from being transformed away from their
14019 canonical forms (see PR 69904). */
14020
14021 static bool
14022 arm_cannot_copy_insn_p (rtx_insn *insn)
14023 {
14024 /* The tls call insn cannot be copied, as it is paired with a data
14025 word. */
14026 if (recog_memoized (insn) == CODE_FOR_tlscall)
14027 return true;
14028
14029 subrtx_iterator::array_type array;
14030 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
14031 {
14032 const_rtx x = *iter;
14033 if (GET_CODE (x) == UNSPEC
14034 && (XINT (x, 1) == UNSPEC_PIC_BASE
14035 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
14036 return true;
14037 }
14038
14039 rtx set = single_set (insn);
14040 if (set)
14041 {
14042 rtx src = SET_SRC (set);
14043 if (GET_CODE (src) == ZERO_EXTEND)
14044 src = XEXP (src, 0);
14045
14046 /* Catch the load-exclusive and load-acquire operations. */
14047 if (GET_CODE (src) == UNSPEC_VOLATILE
14048 && (XINT (src, 1) == VUNSPEC_LL
14049 || XINT (src, 1) == VUNSPEC_LAX))
14050 return true;
14051 }
14052 return false;
14053 }
14054
14055 enum rtx_code
14056 minmax_code (rtx x)
14057 {
14058 enum rtx_code code = GET_CODE (x);
14059
14060 switch (code)
14061 {
14062 case SMAX:
14063 return GE;
14064 case SMIN:
14065 return LE;
14066 case UMIN:
14067 return LEU;
14068 case UMAX:
14069 return GEU;
14070 default:
14071 gcc_unreachable ();
14072 }
14073 }
14074
14075 /* Match pair of min/max operators that can be implemented via usat/ssat. */
14076
14077 bool
14078 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
14079 int *mask, bool *signed_sat)
14080 {
14081 /* The high bound must be a power of two minus one. */
14082 int log = exact_log2 (INTVAL (hi_bound) + 1);
14083 if (log == -1)
14084 return false;
14085
14086 /* The low bound is either zero (for usat) or one less than the
14087 negation of the high bound (for ssat). */
14088 if (INTVAL (lo_bound) == 0)
14089 {
14090 if (mask)
14091 *mask = log;
14092 if (signed_sat)
14093 *signed_sat = false;
14094
14095 return true;
14096 }
14097
14098 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
14099 {
14100 if (mask)
14101 *mask = log + 1;
14102 if (signed_sat)
14103 *signed_sat = true;
14104
14105 return true;
14106 }
14107
14108 return false;
14109 }
14110
14111 /* Return 1 if memory locations are adjacent. */
14112 int
14113 adjacent_mem_locations (rtx a, rtx b)
14114 {
14115 /* We don't guarantee to preserve the order of these memory refs. */
14116 if (volatile_refs_p (a) || volatile_refs_p (b))
14117 return 0;
14118
14119 if ((REG_P (XEXP (a, 0))
14120 || (GET_CODE (XEXP (a, 0)) == PLUS
14121 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
14122 && (REG_P (XEXP (b, 0))
14123 || (GET_CODE (XEXP (b, 0)) == PLUS
14124 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
14125 {
14126 HOST_WIDE_INT val0 = 0, val1 = 0;
14127 rtx reg0, reg1;
14128 int val_diff;
14129
14130 if (GET_CODE (XEXP (a, 0)) == PLUS)
14131 {
14132 reg0 = XEXP (XEXP (a, 0), 0);
14133 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
14134 }
14135 else
14136 reg0 = XEXP (a, 0);
14137
14138 if (GET_CODE (XEXP (b, 0)) == PLUS)
14139 {
14140 reg1 = XEXP (XEXP (b, 0), 0);
14141 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
14142 }
14143 else
14144 reg1 = XEXP (b, 0);
14145
14146 /* Don't accept any offset that will require multiple
14147 instructions to handle, since this would cause the
14148 arith_adjacentmem pattern to output an overlong sequence. */
14149 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
14150 return 0;
14151
14152 /* Don't allow an eliminable register: register elimination can make
14153 the offset too large. */
14154 if (arm_eliminable_register (reg0))
14155 return 0;
14156
14157 val_diff = val1 - val0;
14158
14159 if (arm_ld_sched)
14160 {
14161 /* If the target has load delay slots, then there's no benefit
14162 to using an ldm instruction unless the offset is zero and
14163 we are optimizing for size. */
14164 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
14165 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
14166 && (val_diff == 4 || val_diff == -4));
14167 }
14168
14169 return ((REGNO (reg0) == REGNO (reg1))
14170 && (val_diff == 4 || val_diff == -4));
14171 }
14172
14173 return 0;
14174 }
14175
14176 /* Return true if OP is a valid load or store multiple operation. LOAD is true
14177 for load operations, false for store operations. CONSECUTIVE is true
14178 if the register numbers in the operation must be consecutive in the register
14179 bank. RETURN_PC is true if value is to be loaded in PC.
14180 The pattern we are trying to match for load is:
14181 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14182 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14183 :
14184 :
14185 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14186 ]
14187 where
14188 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14189 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14190 3. If consecutive is TRUE, then for kth register being loaded,
14191 REGNO (R_dk) = REGNO (R_d0) + k.
14192 The pattern for store is similar. */
14193 bool
14194 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
14195 bool consecutive, bool return_pc)
14196 {
14197 HOST_WIDE_INT count = XVECLEN (op, 0);
14198 rtx reg, mem, addr;
14199 unsigned regno;
14200 unsigned first_regno;
14201 HOST_WIDE_INT i = 1, base = 0, offset = 0;
14202 rtx elt;
14203 bool addr_reg_in_reglist = false;
14204 bool update = false;
14205 int reg_increment;
14206 int offset_adj;
14207 int regs_per_val;
14208
14209 /* If not in SImode, then registers must be consecutive
14210 (e.g., VLDM instructions for DFmode). */
14211 gcc_assert ((mode == SImode) || consecutive);
14212 /* Setting return_pc for stores is illegal. */
14213 gcc_assert (!return_pc || load);
14214
14215 /* Set up the increments and the regs per val based on the mode. */
14216 reg_increment = GET_MODE_SIZE (mode);
14217 regs_per_val = reg_increment / 4;
14218 offset_adj = return_pc ? 1 : 0;
14219
14220 if (count <= 1
14221 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
14222 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
14223 return false;
14224
14225 /* Check if this is a write-back. */
14226 elt = XVECEXP (op, 0, offset_adj);
14227 if (GET_CODE (SET_SRC (elt)) == PLUS)
14228 {
14229 i++;
14230 base = 1;
14231 update = true;
14232
14233 /* The offset adjustment must be the number of registers being
14234 popped times the size of a single register. */
14235 if (!REG_P (SET_DEST (elt))
14236 || !REG_P (XEXP (SET_SRC (elt), 0))
14237 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
14238 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
14239 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
14240 ((count - 1 - offset_adj) * reg_increment))
14241 return false;
14242 }
14243
14244 i = i + offset_adj;
14245 base = base + offset_adj;
14246 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14247 success depends on the type: VLDM can do just one reg,
14248 LDM must do at least two. */
14249 if ((count <= i) && (mode == SImode))
14250 return false;
14251
14252 elt = XVECEXP (op, 0, i - 1);
14253 if (GET_CODE (elt) != SET)
14254 return false;
14255
14256 if (load)
14257 {
14258 reg = SET_DEST (elt);
14259 mem = SET_SRC (elt);
14260 }
14261 else
14262 {
14263 reg = SET_SRC (elt);
14264 mem = SET_DEST (elt);
14265 }
14266
14267 if (!REG_P (reg) || !MEM_P (mem))
14268 return false;
14269
14270 regno = REGNO (reg);
14271 first_regno = regno;
14272 addr = XEXP (mem, 0);
14273 if (GET_CODE (addr) == PLUS)
14274 {
14275 if (!CONST_INT_P (XEXP (addr, 1)))
14276 return false;
14277
14278 offset = INTVAL (XEXP (addr, 1));
14279 addr = XEXP (addr, 0);
14280 }
14281
14282 if (!REG_P (addr))
14283 return false;
14284
14285 /* Don't allow SP to be loaded unless it is also the base register. It
14286 guarantees that SP is reset correctly when an LDM instruction
14287 is interrupted. Otherwise, we might end up with a corrupt stack. */
14288 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14289 return false;
14290
14291 if (regno == REGNO (addr))
14292 addr_reg_in_reglist = true;
14293
14294 for (; i < count; i++)
14295 {
14296 elt = XVECEXP (op, 0, i);
14297 if (GET_CODE (elt) != SET)
14298 return false;
14299
14300 if (load)
14301 {
14302 reg = SET_DEST (elt);
14303 mem = SET_SRC (elt);
14304 }
14305 else
14306 {
14307 reg = SET_SRC (elt);
14308 mem = SET_DEST (elt);
14309 }
14310
14311 if (!REG_P (reg)
14312 || GET_MODE (reg) != mode
14313 || REGNO (reg) <= regno
14314 || (consecutive
14315 && (REGNO (reg) !=
14316 (unsigned int) (first_regno + regs_per_val * (i - base))))
14317 /* Don't allow SP to be loaded unless it is also the base register. It
14318 guarantees that SP is reset correctly when an LDM instruction
14319 is interrupted. Otherwise, we might end up with a corrupt stack. */
14320 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14321 || !MEM_P (mem)
14322 || GET_MODE (mem) != mode
14323 || ((GET_CODE (XEXP (mem, 0)) != PLUS
14324 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14325 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14326 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14327 offset + (i - base) * reg_increment))
14328 && (!REG_P (XEXP (mem, 0))
14329 || offset + (i - base) * reg_increment != 0)))
14330 return false;
14331
14332 regno = REGNO (reg);
14333 if (regno == REGNO (addr))
14334 addr_reg_in_reglist = true;
14335 }
14336
14337 if (load)
14338 {
14339 if (update && addr_reg_in_reglist)
14340 return false;
14341
14342 /* For Thumb-1, address register is always modified - either by write-back
14343 or by explicit load. If the pattern does not describe an update,
14344 then the address register must be in the list of loaded registers. */
14345 if (TARGET_THUMB1)
14346 return update || addr_reg_in_reglist;
14347 }
14348
14349 return true;
14350 }
14351
14352 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14353 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14354 following form:
14355
14356 [(set (reg:SI <N>) (const_int 0))
14357 (set (reg:SI <M>) (const_int 0))
14358 ...
14359 (unspec_volatile [(const_int 0)]
14360 VUNSPEC_CLRM_APSR)
14361 (clobber (reg:CC CC_REGNUM))
14362 ]
14363
14364 Any number (including 0) of set expressions is valid, the volatile unspec is
14365 optional. All registers but SP and PC are allowed and registers must be in
14366 strict increasing order.
14367
14368 To be a valid VSCCLRM pattern, OP must have the following form:
14369
14370 [(unspec_volatile [(const_int 0)]
14371 VUNSPEC_VSCCLRM_VPR)
14372 (set (reg:SF <N>) (const_int 0))
14373 (set (reg:SF <M>) (const_int 0))
14374 ...
14375 ]
14376
14377 As with CLRM, any number (including 0) of set expressions is valid, however
14378 the volatile unspec is mandatory here. Any VFP single-precision register is
14379 accepted but all registers must be consecutive and in increasing order. */
14380
14381 bool
14382 clear_operation_p (rtx op, bool vfp)
14383 {
14384 unsigned regno;
14385 unsigned last_regno = INVALID_REGNUM;
14386 rtx elt, reg, zero;
14387 int count = XVECLEN (op, 0);
14388 int first_set = vfp ? 1 : 0;
14389 machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14390
14391 for (int i = first_set; i < count; i++)
14392 {
14393 elt = XVECEXP (op, 0, i);
14394
14395 if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14396 {
14397 if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14398 || XVECLEN (elt, 0) != 1
14399 || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14400 || i != count - 2)
14401 return false;
14402
14403 continue;
14404 }
14405
14406 if (GET_CODE (elt) == CLOBBER)
14407 continue;
14408
14409 if (GET_CODE (elt) != SET)
14410 return false;
14411
14412 reg = SET_DEST (elt);
14413 zero = SET_SRC (elt);
14414
14415 if (!REG_P (reg)
14416 || GET_MODE (reg) != expected_mode
14417 || zero != CONST0_RTX (SImode))
14418 return false;
14419
14420 regno = REGNO (reg);
14421
14422 if (vfp)
14423 {
14424 if (i != first_set && regno != last_regno + 1)
14425 return false;
14426 }
14427 else
14428 {
14429 if (regno == SP_REGNUM || regno == PC_REGNUM)
14430 return false;
14431 if (i != first_set && regno <= last_regno)
14432 return false;
14433 }
14434
14435 last_regno = regno;
14436 }
14437
14438 return true;
14439 }
14440
14441 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14442 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14443 instruction. ADD_OFFSET is nonzero if the base address register needs
14444 to be modified with an add instruction before we can use it. */
14445
14446 static bool
14447 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14448 int nops, HOST_WIDE_INT add_offset)
14449 {
14450 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14451 if the offset isn't small enough. The reason 2 ldrs are faster
14452 is because these ARMs are able to do more than one cache access
14453 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14454 whilst the ARM8 has a double bandwidth cache. This means that
14455 these cores can do both an instruction fetch and a data fetch in
14456 a single cycle, so the trick of calculating the address into a
14457 scratch register (one of the result regs) and then doing a load
14458 multiple actually becomes slower (and no smaller in code size).
14459 That is the transformation
14460
14461 ldr rd1, [rbase + offset]
14462 ldr rd2, [rbase + offset + 4]
14463
14464 to
14465
14466 add rd1, rbase, offset
14467 ldmia rd1, {rd1, rd2}
14468
14469 produces worse code -- '3 cycles + any stalls on rd2' instead of
14470 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14471 access per cycle, the first sequence could never complete in less
14472 than 6 cycles, whereas the ldm sequence would only take 5 and
14473 would make better use of sequential accesses if not hitting the
14474 cache.
14475
14476 We cheat here and test 'arm_ld_sched' which we currently know to
14477 only be true for the ARM8, ARM9 and StrongARM. If this ever
14478 changes, then the test below needs to be reworked. */
14479 if (nops == 2 && arm_ld_sched && add_offset != 0)
14480 return false;
14481
14482 /* XScale has load-store double instructions, but they have stricter
14483 alignment requirements than load-store multiple, so we cannot
14484 use them.
14485
14486 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14487 the pipeline until completion.
14488
14489 NREGS CYCLES
14490 1 3
14491 2 4
14492 3 5
14493 4 6
14494
14495 An ldr instruction takes 1-3 cycles, but does not block the
14496 pipeline.
14497
14498 NREGS CYCLES
14499 1 1-3
14500 2 2-6
14501 3 3-9
14502 4 4-12
14503
14504 Best case ldr will always win. However, the more ldr instructions
14505 we issue, the less likely we are to be able to schedule them well.
14506 Using ldr instructions also increases code size.
14507
14508 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14509 for counts of 3 or 4 regs. */
14510 if (nops <= 2 && arm_tune_xscale && !optimize_size)
14511 return false;
14512 return true;
14513 }
14514
14515 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14516 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14517 an array ORDER which describes the sequence to use when accessing the
14518 offsets that produces an ascending order. In this sequence, each
14519 offset must be larger by exactly 4 than the previous one. ORDER[0]
14520 must have been filled in with the lowest offset by the caller.
14521 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14522 we use to verify that ORDER produces an ascending order of registers.
14523 Return true if it was possible to construct such an order, false if
14524 not. */
14525
14526 static bool
14527 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14528 int *unsorted_regs)
14529 {
14530 int i;
14531 for (i = 1; i < nops; i++)
14532 {
14533 int j;
14534
14535 order[i] = order[i - 1];
14536 for (j = 0; j < nops; j++)
14537 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14538 {
14539 /* We must find exactly one offset that is higher than the
14540 previous one by 4. */
14541 if (order[i] != order[i - 1])
14542 return false;
14543 order[i] = j;
14544 }
14545 if (order[i] == order[i - 1])
14546 return false;
14547 /* The register numbers must be ascending. */
14548 if (unsorted_regs != NULL
14549 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14550 return false;
14551 }
14552 return true;
14553 }
14554
14555 /* Used to determine in a peephole whether a sequence of load
14556 instructions can be changed into a load-multiple instruction.
14557 NOPS is the number of separate load instructions we are examining. The
14558 first NOPS entries in OPERANDS are the destination registers, the
14559 next NOPS entries are memory operands. If this function is
14560 successful, *BASE is set to the common base register of the memory
14561 accesses; *LOAD_OFFSET is set to the first memory location's offset
14562 from that base register.
14563 REGS is an array filled in with the destination register numbers.
14564 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14565 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14566 the sequence of registers in REGS matches the loads from ascending memory
14567 locations, and the function verifies that the register numbers are
14568 themselves ascending. If CHECK_REGS is false, the register numbers
14569 are stored in the order they are found in the operands. */
14570 static int
14571 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14572 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14573 {
14574 int unsorted_regs[MAX_LDM_STM_OPS];
14575 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14576 int order[MAX_LDM_STM_OPS];
14577 int base_reg = -1;
14578 int i, ldm_case;
14579
14580 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14581 easily extended if required. */
14582 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14583
14584 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14585
14586 /* Loop over the operands and check that the memory references are
14587 suitable (i.e. immediate offsets from the same base register). At
14588 the same time, extract the target register, and the memory
14589 offsets. */
14590 for (i = 0; i < nops; i++)
14591 {
14592 rtx reg;
14593 rtx offset;
14594
14595 /* Convert a subreg of a mem into the mem itself. */
14596 if (GET_CODE (operands[nops + i]) == SUBREG)
14597 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14598
14599 gcc_assert (MEM_P (operands[nops + i]));
14600
14601 /* Don't reorder volatile memory references; it doesn't seem worth
14602 looking for the case where the order is ok anyway. */
14603 if (MEM_VOLATILE_P (operands[nops + i]))
14604 return 0;
14605
14606 offset = const0_rtx;
14607
14608 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14609 || (SUBREG_P (reg)
14610 && REG_P (reg = SUBREG_REG (reg))))
14611 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14612 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14613 || (SUBREG_P (reg)
14614 && REG_P (reg = SUBREG_REG (reg))))
14615 && (CONST_INT_P (offset
14616 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14617 {
14618 if (i == 0)
14619 {
14620 base_reg = REGNO (reg);
14621 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14622 return 0;
14623 }
14624 else if (base_reg != (int) REGNO (reg))
14625 /* Not addressed from the same base register. */
14626 return 0;
14627
14628 unsorted_regs[i] = (REG_P (operands[i])
14629 ? REGNO (operands[i])
14630 : REGNO (SUBREG_REG (operands[i])));
14631
14632 /* If it isn't an integer register, or if it overwrites the
14633 base register but isn't the last insn in the list, then
14634 we can't do this. */
14635 if (unsorted_regs[i] < 0
14636 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14637 || unsorted_regs[i] > 14
14638 || (i != nops - 1 && unsorted_regs[i] == base_reg))
14639 return 0;
14640
14641 /* Don't allow SP to be loaded unless it is also the base
14642 register. It guarantees that SP is reset correctly when
14643 an LDM instruction is interrupted. Otherwise, we might
14644 end up with a corrupt stack. */
14645 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14646 return 0;
14647
14648 unsorted_offsets[i] = INTVAL (offset);
14649 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14650 order[0] = i;
14651 }
14652 else
14653 /* Not a suitable memory address. */
14654 return 0;
14655 }
14656
14657 /* All the useful information has now been extracted from the
14658 operands into unsorted_regs and unsorted_offsets; additionally,
14659 order[0] has been set to the lowest offset in the list. Sort
14660 the offsets into order, verifying that they are adjacent, and
14661 check that the register numbers are ascending. */
14662 if (!compute_offset_order (nops, unsorted_offsets, order,
14663 check_regs ? unsorted_regs : NULL))
14664 return 0;
14665
14666 if (saved_order)
14667 memcpy (saved_order, order, sizeof order);
14668
14669 if (base)
14670 {
14671 *base = base_reg;
14672
14673 for (i = 0; i < nops; i++)
14674 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14675
14676 *load_offset = unsorted_offsets[order[0]];
14677 }
14678
14679 if (unsorted_offsets[order[0]] == 0)
14680 ldm_case = 1; /* ldmia */
14681 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14682 ldm_case = 2; /* ldmib */
14683 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14684 ldm_case = 3; /* ldmda */
14685 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14686 ldm_case = 4; /* ldmdb */
14687 else if (const_ok_for_arm (unsorted_offsets[order[0]])
14688 || const_ok_for_arm (-unsorted_offsets[order[0]]))
14689 ldm_case = 5;
14690 else
14691 return 0;
14692
14693 if (!multiple_operation_profitable_p (false, nops,
14694 ldm_case == 5
14695 ? unsorted_offsets[order[0]] : 0))
14696 return 0;
14697
14698 return ldm_case;
14699 }
14700
14701 /* Used to determine in a peephole whether a sequence of store instructions can
14702 be changed into a store-multiple instruction.
14703 NOPS is the number of separate store instructions we are examining.
14704 NOPS_TOTAL is the total number of instructions recognized by the peephole
14705 pattern.
14706 The first NOPS entries in OPERANDS are the source registers, the next
14707 NOPS entries are memory operands. If this function is successful, *BASE is
14708 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14709 to the first memory location's offset from that base register. REGS is an
14710 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14711 likewise filled with the corresponding rtx's.
14712 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14713 numbers to an ascending order of stores.
14714 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14715 from ascending memory locations, and the function verifies that the register
14716 numbers are themselves ascending. If CHECK_REGS is false, the register
14717 numbers are stored in the order they are found in the operands. */
14718 static int
14719 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14720 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14721 HOST_WIDE_INT *load_offset, bool check_regs)
14722 {
14723 int unsorted_regs[MAX_LDM_STM_OPS];
14724 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14725 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14726 int order[MAX_LDM_STM_OPS];
14727 int base_reg = -1;
14728 rtx base_reg_rtx = NULL;
14729 int i, stm_case;
14730
14731 /* Write back of base register is currently only supported for Thumb 1. */
14732 int base_writeback = TARGET_THUMB1;
14733
14734 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14735 easily extended if required. */
14736 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14737
14738 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14739
14740 /* Loop over the operands and check that the memory references are
14741 suitable (i.e. immediate offsets from the same base register). At
14742 the same time, extract the target register, and the memory
14743 offsets. */
14744 for (i = 0; i < nops; i++)
14745 {
14746 rtx reg;
14747 rtx offset;
14748
14749 /* Convert a subreg of a mem into the mem itself. */
14750 if (GET_CODE (operands[nops + i]) == SUBREG)
14751 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14752
14753 gcc_assert (MEM_P (operands[nops + i]));
14754
14755 /* Don't reorder volatile memory references; it doesn't seem worth
14756 looking for the case where the order is ok anyway. */
14757 if (MEM_VOLATILE_P (operands[nops + i]))
14758 return 0;
14759
14760 offset = const0_rtx;
14761
14762 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14763 || (SUBREG_P (reg)
14764 && REG_P (reg = SUBREG_REG (reg))))
14765 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14766 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14767 || (SUBREG_P (reg)
14768 && REG_P (reg = SUBREG_REG (reg))))
14769 && (CONST_INT_P (offset
14770 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14771 {
14772 unsorted_reg_rtxs[i] = (REG_P (operands[i])
14773 ? operands[i] : SUBREG_REG (operands[i]));
14774 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14775
14776 if (i == 0)
14777 {
14778 base_reg = REGNO (reg);
14779 base_reg_rtx = reg;
14780 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14781 return 0;
14782 }
14783 else if (base_reg != (int) REGNO (reg))
14784 /* Not addressed from the same base register. */
14785 return 0;
14786
14787 /* If it isn't an integer register, then we can't do this. */
14788 if (unsorted_regs[i] < 0
14789 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14790 /* The effects are unpredictable if the base register is
14791 both updated and stored. */
14792 || (base_writeback && unsorted_regs[i] == base_reg)
14793 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14794 || unsorted_regs[i] > 14)
14795 return 0;
14796
14797 unsorted_offsets[i] = INTVAL (offset);
14798 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14799 order[0] = i;
14800 }
14801 else
14802 /* Not a suitable memory address. */
14803 return 0;
14804 }
14805
14806 /* All the useful information has now been extracted from the
14807 operands into unsorted_regs and unsorted_offsets; additionally,
14808 order[0] has been set to the lowest offset in the list. Sort
14809 the offsets into order, verifying that they are adjacent, and
14810 check that the register numbers are ascending. */
14811 if (!compute_offset_order (nops, unsorted_offsets, order,
14812 check_regs ? unsorted_regs : NULL))
14813 return 0;
14814
14815 if (saved_order)
14816 memcpy (saved_order, order, sizeof order);
14817
14818 if (base)
14819 {
14820 *base = base_reg;
14821
14822 for (i = 0; i < nops; i++)
14823 {
14824 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14825 if (reg_rtxs)
14826 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14827 }
14828
14829 *load_offset = unsorted_offsets[order[0]];
14830 }
14831
14832 if (TARGET_THUMB1
14833 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14834 return 0;
14835
14836 if (unsorted_offsets[order[0]] == 0)
14837 stm_case = 1; /* stmia */
14838 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14839 stm_case = 2; /* stmib */
14840 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14841 stm_case = 3; /* stmda */
14842 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14843 stm_case = 4; /* stmdb */
14844 else
14845 return 0;
14846
14847 if (!multiple_operation_profitable_p (false, nops, 0))
14848 return 0;
14849
14850 return stm_case;
14851 }
14852 \f
14853 /* Routines for use in generating RTL. */
14854
14855 /* Generate a load-multiple instruction. COUNT is the number of loads in
14856 the instruction; REGS and MEMS are arrays containing the operands.
14857 BASEREG is the base register to be used in addressing the memory operands.
14858 WBACK_OFFSET is nonzero if the instruction should update the base
14859 register. */
14860
14861 static rtx
14862 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14863 HOST_WIDE_INT wback_offset)
14864 {
14865 int i = 0, j;
14866 rtx result;
14867
14868 if (!multiple_operation_profitable_p (false, count, 0))
14869 {
14870 rtx seq;
14871
14872 start_sequence ();
14873
14874 for (i = 0; i < count; i++)
14875 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14876
14877 if (wback_offset != 0)
14878 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14879
14880 seq = get_insns ();
14881 end_sequence ();
14882
14883 return seq;
14884 }
14885
14886 result = gen_rtx_PARALLEL (VOIDmode,
14887 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14888 if (wback_offset != 0)
14889 {
14890 XVECEXP (result, 0, 0)
14891 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14892 i = 1;
14893 count++;
14894 }
14895
14896 for (j = 0; i < count; i++, j++)
14897 XVECEXP (result, 0, i)
14898 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14899
14900 return result;
14901 }
14902
14903 /* Generate a store-multiple instruction. COUNT is the number of stores in
14904 the instruction; REGS and MEMS are arrays containing the operands.
14905 BASEREG is the base register to be used in addressing the memory operands.
14906 WBACK_OFFSET is nonzero if the instruction should update the base
14907 register. */
14908
14909 static rtx
14910 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14911 HOST_WIDE_INT wback_offset)
14912 {
14913 int i = 0, j;
14914 rtx result;
14915
14916 if (GET_CODE (basereg) == PLUS)
14917 basereg = XEXP (basereg, 0);
14918
14919 if (!multiple_operation_profitable_p (false, count, 0))
14920 {
14921 rtx seq;
14922
14923 start_sequence ();
14924
14925 for (i = 0; i < count; i++)
14926 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14927
14928 if (wback_offset != 0)
14929 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14930
14931 seq = get_insns ();
14932 end_sequence ();
14933
14934 return seq;
14935 }
14936
14937 result = gen_rtx_PARALLEL (VOIDmode,
14938 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14939 if (wback_offset != 0)
14940 {
14941 XVECEXP (result, 0, 0)
14942 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14943 i = 1;
14944 count++;
14945 }
14946
14947 for (j = 0; i < count; i++, j++)
14948 XVECEXP (result, 0, i)
14949 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14950
14951 return result;
14952 }
14953
14954 /* Generate either a load-multiple or a store-multiple instruction. This
14955 function can be used in situations where we can start with a single MEM
14956 rtx and adjust its address upwards.
14957 COUNT is the number of operations in the instruction, not counting a
14958 possible update of the base register. REGS is an array containing the
14959 register operands.
14960 BASEREG is the base register to be used in addressing the memory operands,
14961 which are constructed from BASEMEM.
14962 WRITE_BACK specifies whether the generated instruction should include an
14963 update of the base register.
14964 OFFSETP is used to pass an offset to and from this function; this offset
14965 is not used when constructing the address (instead BASEMEM should have an
14966 appropriate offset in its address), it is used only for setting
14967 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14968
14969 static rtx
14970 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14971 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14972 {
14973 rtx mems[MAX_LDM_STM_OPS];
14974 HOST_WIDE_INT offset = *offsetp;
14975 int i;
14976
14977 gcc_assert (count <= MAX_LDM_STM_OPS);
14978
14979 if (GET_CODE (basereg) == PLUS)
14980 basereg = XEXP (basereg, 0);
14981
14982 for (i = 0; i < count; i++)
14983 {
14984 rtx addr = plus_constant (Pmode, basereg, i * 4);
14985 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14986 offset += 4;
14987 }
14988
14989 if (write_back)
14990 *offsetp = offset;
14991
14992 if (is_load)
14993 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14994 write_back ? 4 * count : 0);
14995 else
14996 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14997 write_back ? 4 * count : 0);
14998 }
14999
15000 rtx
15001 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
15002 rtx basemem, HOST_WIDE_INT *offsetp)
15003 {
15004 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
15005 offsetp);
15006 }
15007
15008 rtx
15009 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
15010 rtx basemem, HOST_WIDE_INT *offsetp)
15011 {
15012 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
15013 offsetp);
15014 }
15015
15016 /* Called from a peephole2 expander to turn a sequence of loads into an
15017 LDM instruction. OPERANDS are the operands found by the peephole matcher;
15018 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
15019 is true if we can reorder the registers because they are used commutatively
15020 subsequently.
15021 Returns true iff we could generate a new instruction. */
15022
15023 bool
15024 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
15025 {
15026 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15027 rtx mems[MAX_LDM_STM_OPS];
15028 int i, j, base_reg;
15029 rtx base_reg_rtx;
15030 HOST_WIDE_INT offset;
15031 int write_back = FALSE;
15032 int ldm_case;
15033 rtx addr;
15034
15035 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
15036 &base_reg, &offset, !sort_regs);
15037
15038 if (ldm_case == 0)
15039 return false;
15040
15041 if (sort_regs)
15042 for (i = 0; i < nops - 1; i++)
15043 for (j = i + 1; j < nops; j++)
15044 if (regs[i] > regs[j])
15045 {
15046 int t = regs[i];
15047 regs[i] = regs[j];
15048 regs[j] = t;
15049 }
15050 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15051
15052 if (TARGET_THUMB1)
15053 {
15054 gcc_assert (ldm_case == 1 || ldm_case == 5);
15055
15056 /* Thumb-1 ldm uses writeback except if the base is loaded. */
15057 write_back = true;
15058 for (i = 0; i < nops; i++)
15059 if (base_reg == regs[i])
15060 write_back = false;
15061
15062 /* Ensure the base is dead if it is updated. */
15063 if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
15064 return false;
15065 }
15066
15067 if (ldm_case == 5)
15068 {
15069 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
15070 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
15071 offset = 0;
15072 base_reg_rtx = newbase;
15073 }
15074
15075 for (i = 0; i < nops; i++)
15076 {
15077 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15078 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15079 SImode, addr, 0);
15080 }
15081 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
15082 write_back ? offset + i * 4 : 0));
15083 return true;
15084 }
15085
15086 /* Called from a peephole2 expander to turn a sequence of stores into an
15087 STM instruction. OPERANDS are the operands found by the peephole matcher;
15088 NOPS indicates how many separate stores we are trying to combine.
15089 Returns true iff we could generate a new instruction. */
15090
15091 bool
15092 gen_stm_seq (rtx *operands, int nops)
15093 {
15094 int i;
15095 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15096 rtx mems[MAX_LDM_STM_OPS];
15097 int base_reg;
15098 rtx base_reg_rtx;
15099 HOST_WIDE_INT offset;
15100 int write_back = FALSE;
15101 int stm_case;
15102 rtx addr;
15103 bool base_reg_dies;
15104
15105 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
15106 mem_order, &base_reg, &offset, true);
15107
15108 if (stm_case == 0)
15109 return false;
15110
15111 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15112
15113 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
15114 if (TARGET_THUMB1)
15115 {
15116 gcc_assert (base_reg_dies);
15117 write_back = TRUE;
15118 }
15119
15120 if (stm_case == 5)
15121 {
15122 gcc_assert (base_reg_dies);
15123 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15124 offset = 0;
15125 }
15126
15127 addr = plus_constant (Pmode, base_reg_rtx, offset);
15128
15129 for (i = 0; i < nops; i++)
15130 {
15131 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15132 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15133 SImode, addr, 0);
15134 }
15135 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
15136 write_back ? offset + i * 4 : 0));
15137 return true;
15138 }
15139
15140 /* Called from a peephole2 expander to turn a sequence of stores that are
15141 preceded by constant loads into an STM instruction. OPERANDS are the
15142 operands found by the peephole matcher; NOPS indicates how many
15143 separate stores we are trying to combine; there are 2 * NOPS
15144 instructions in the peephole.
15145 Returns true iff we could generate a new instruction. */
15146
15147 bool
15148 gen_const_stm_seq (rtx *operands, int nops)
15149 {
15150 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
15151 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15152 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
15153 rtx mems[MAX_LDM_STM_OPS];
15154 int base_reg;
15155 rtx base_reg_rtx;
15156 HOST_WIDE_INT offset;
15157 int write_back = FALSE;
15158 int stm_case;
15159 rtx addr;
15160 bool base_reg_dies;
15161 int i, j;
15162 HARD_REG_SET allocated;
15163
15164 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
15165 mem_order, &base_reg, &offset, false);
15166
15167 if (stm_case == 0)
15168 return false;
15169
15170 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
15171
15172 /* If the same register is used more than once, try to find a free
15173 register. */
15174 CLEAR_HARD_REG_SET (allocated);
15175 for (i = 0; i < nops; i++)
15176 {
15177 for (j = i + 1; j < nops; j++)
15178 if (regs[i] == regs[j])
15179 {
15180 rtx t = peep2_find_free_register (0, nops * 2,
15181 TARGET_THUMB1 ? "l" : "r",
15182 SImode, &allocated);
15183 if (t == NULL_RTX)
15184 return false;
15185 reg_rtxs[i] = t;
15186 regs[i] = REGNO (t);
15187 }
15188 }
15189
15190 /* Compute an ordering that maps the register numbers to an ascending
15191 sequence. */
15192 reg_order[0] = 0;
15193 for (i = 0; i < nops; i++)
15194 if (regs[i] < regs[reg_order[0]])
15195 reg_order[0] = i;
15196
15197 for (i = 1; i < nops; i++)
15198 {
15199 int this_order = reg_order[i - 1];
15200 for (j = 0; j < nops; j++)
15201 if (regs[j] > regs[reg_order[i - 1]]
15202 && (this_order == reg_order[i - 1]
15203 || regs[j] < regs[this_order]))
15204 this_order = j;
15205 reg_order[i] = this_order;
15206 }
15207
15208 /* Ensure that registers that must be live after the instruction end
15209 up with the correct value. */
15210 for (i = 0; i < nops; i++)
15211 {
15212 int this_order = reg_order[i];
15213 if ((this_order != mem_order[i]
15214 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
15215 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
15216 return false;
15217 }
15218
15219 /* Load the constants. */
15220 for (i = 0; i < nops; i++)
15221 {
15222 rtx op = operands[2 * nops + mem_order[i]];
15223 sorted_regs[i] = regs[reg_order[i]];
15224 emit_move_insn (reg_rtxs[reg_order[i]], op);
15225 }
15226
15227 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15228
15229 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
15230 if (TARGET_THUMB1)
15231 {
15232 gcc_assert (base_reg_dies);
15233 write_back = TRUE;
15234 }
15235
15236 if (stm_case == 5)
15237 {
15238 gcc_assert (base_reg_dies);
15239 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15240 offset = 0;
15241 }
15242
15243 addr = plus_constant (Pmode, base_reg_rtx, offset);
15244
15245 for (i = 0; i < nops; i++)
15246 {
15247 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15248 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15249 SImode, addr, 0);
15250 }
15251 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
15252 write_back ? offset + i * 4 : 0));
15253 return true;
15254 }
15255
15256 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15257 unaligned copies on processors which support unaligned semantics for those
15258 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
15259 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15260 An interleave factor of 1 (the minimum) will perform no interleaving.
15261 Load/store multiple are used for aligned addresses where possible. */
15262
15263 static void
15264 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
15265 HOST_WIDE_INT length,
15266 unsigned int interleave_factor)
15267 {
15268 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
15269 int *regnos = XALLOCAVEC (int, interleave_factor);
15270 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
15271 HOST_WIDE_INT i, j;
15272 HOST_WIDE_INT remaining = length, words;
15273 rtx halfword_tmp = NULL, byte_tmp = NULL;
15274 rtx dst, src;
15275 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
15276 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
15277 HOST_WIDE_INT srcoffset, dstoffset;
15278 HOST_WIDE_INT src_autoinc, dst_autoinc;
15279 rtx mem, addr;
15280
15281 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
15282
15283 /* Use hard registers if we have aligned source or destination so we can use
15284 load/store multiple with contiguous registers. */
15285 if (dst_aligned || src_aligned)
15286 for (i = 0; i < interleave_factor; i++)
15287 regs[i] = gen_rtx_REG (SImode, i);
15288 else
15289 for (i = 0; i < interleave_factor; i++)
15290 regs[i] = gen_reg_rtx (SImode);
15291
15292 dst = copy_addr_to_reg (XEXP (dstbase, 0));
15293 src = copy_addr_to_reg (XEXP (srcbase, 0));
15294
15295 srcoffset = dstoffset = 0;
15296
15297 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15298 For copying the last bytes we want to subtract this offset again. */
15299 src_autoinc = dst_autoinc = 0;
15300
15301 for (i = 0; i < interleave_factor; i++)
15302 regnos[i] = i;
15303
15304 /* Copy BLOCK_SIZE_BYTES chunks. */
15305
15306 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15307 {
15308 /* Load words. */
15309 if (src_aligned && interleave_factor > 1)
15310 {
15311 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15312 TRUE, srcbase, &srcoffset));
15313 src_autoinc += UNITS_PER_WORD * interleave_factor;
15314 }
15315 else
15316 {
15317 for (j = 0; j < interleave_factor; j++)
15318 {
15319 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15320 - src_autoinc));
15321 mem = adjust_automodify_address (srcbase, SImode, addr,
15322 srcoffset + j * UNITS_PER_WORD);
15323 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15324 }
15325 srcoffset += block_size_bytes;
15326 }
15327
15328 /* Store words. */
15329 if (dst_aligned && interleave_factor > 1)
15330 {
15331 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15332 TRUE, dstbase, &dstoffset));
15333 dst_autoinc += UNITS_PER_WORD * interleave_factor;
15334 }
15335 else
15336 {
15337 for (j = 0; j < interleave_factor; j++)
15338 {
15339 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15340 - dst_autoinc));
15341 mem = adjust_automodify_address (dstbase, SImode, addr,
15342 dstoffset + j * UNITS_PER_WORD);
15343 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15344 }
15345 dstoffset += block_size_bytes;
15346 }
15347
15348 remaining -= block_size_bytes;
15349 }
15350
15351 /* Copy any whole words left (note these aren't interleaved with any
15352 subsequent halfword/byte load/stores in the interests of simplicity). */
15353
15354 words = remaining / UNITS_PER_WORD;
15355
15356 gcc_assert (words < interleave_factor);
15357
15358 if (src_aligned && words > 1)
15359 {
15360 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15361 &srcoffset));
15362 src_autoinc += UNITS_PER_WORD * words;
15363 }
15364 else
15365 {
15366 for (j = 0; j < words; j++)
15367 {
15368 addr = plus_constant (Pmode, src,
15369 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15370 mem = adjust_automodify_address (srcbase, SImode, addr,
15371 srcoffset + j * UNITS_PER_WORD);
15372 if (src_aligned)
15373 emit_move_insn (regs[j], mem);
15374 else
15375 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15376 }
15377 srcoffset += words * UNITS_PER_WORD;
15378 }
15379
15380 if (dst_aligned && words > 1)
15381 {
15382 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15383 &dstoffset));
15384 dst_autoinc += words * UNITS_PER_WORD;
15385 }
15386 else
15387 {
15388 for (j = 0; j < words; j++)
15389 {
15390 addr = plus_constant (Pmode, dst,
15391 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15392 mem = adjust_automodify_address (dstbase, SImode, addr,
15393 dstoffset + j * UNITS_PER_WORD);
15394 if (dst_aligned)
15395 emit_move_insn (mem, regs[j]);
15396 else
15397 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15398 }
15399 dstoffset += words * UNITS_PER_WORD;
15400 }
15401
15402 remaining -= words * UNITS_PER_WORD;
15403
15404 gcc_assert (remaining < 4);
15405
15406 /* Copy a halfword if necessary. */
15407
15408 if (remaining >= 2)
15409 {
15410 halfword_tmp = gen_reg_rtx (SImode);
15411
15412 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15413 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15414 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15415
15416 /* Either write out immediately, or delay until we've loaded the last
15417 byte, depending on interleave factor. */
15418 if (interleave_factor == 1)
15419 {
15420 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15421 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15422 emit_insn (gen_unaligned_storehi (mem,
15423 gen_lowpart (HImode, halfword_tmp)));
15424 halfword_tmp = NULL;
15425 dstoffset += 2;
15426 }
15427
15428 remaining -= 2;
15429 srcoffset += 2;
15430 }
15431
15432 gcc_assert (remaining < 2);
15433
15434 /* Copy last byte. */
15435
15436 if ((remaining & 1) != 0)
15437 {
15438 byte_tmp = gen_reg_rtx (SImode);
15439
15440 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15441 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15442 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15443
15444 if (interleave_factor == 1)
15445 {
15446 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15447 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15448 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15449 byte_tmp = NULL;
15450 dstoffset++;
15451 }
15452
15453 remaining--;
15454 srcoffset++;
15455 }
15456
15457 /* Store last halfword if we haven't done so already. */
15458
15459 if (halfword_tmp)
15460 {
15461 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15462 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15463 emit_insn (gen_unaligned_storehi (mem,
15464 gen_lowpart (HImode, halfword_tmp)));
15465 dstoffset += 2;
15466 }
15467
15468 /* Likewise for last byte. */
15469
15470 if (byte_tmp)
15471 {
15472 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15473 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15474 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15475 dstoffset++;
15476 }
15477
15478 gcc_assert (remaining == 0 && srcoffset == dstoffset);
15479 }
15480
15481 /* From mips_adjust_block_mem:
15482
15483 Helper function for doing a loop-based block operation on memory
15484 reference MEM. Each iteration of the loop will operate on LENGTH
15485 bytes of MEM.
15486
15487 Create a new base register for use within the loop and point it to
15488 the start of MEM. Create a new memory reference that uses this
15489 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15490
15491 static void
15492 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15493 rtx *loop_mem)
15494 {
15495 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15496
15497 /* Although the new mem does not refer to a known location,
15498 it does keep up to LENGTH bytes of alignment. */
15499 *loop_mem = change_address (mem, BLKmode, *loop_reg);
15500 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15501 }
15502
15503 /* From mips_block_move_loop:
15504
15505 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15506 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15507 the memory regions do not overlap. */
15508
15509 static void
15510 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15511 unsigned int interleave_factor,
15512 HOST_WIDE_INT bytes_per_iter)
15513 {
15514 rtx src_reg, dest_reg, final_src, test;
15515 HOST_WIDE_INT leftover;
15516
15517 leftover = length % bytes_per_iter;
15518 length -= leftover;
15519
15520 /* Create registers and memory references for use within the loop. */
15521 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15522 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15523
15524 /* Calculate the value that SRC_REG should have after the last iteration of
15525 the loop. */
15526 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15527 0, 0, OPTAB_WIDEN);
15528
15529 /* Emit the start of the loop. */
15530 rtx_code_label *label = gen_label_rtx ();
15531 emit_label (label);
15532
15533 /* Emit the loop body. */
15534 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15535 interleave_factor);
15536
15537 /* Move on to the next block. */
15538 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15539 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15540
15541 /* Emit the loop condition. */
15542 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15543 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15544
15545 /* Mop up any left-over bytes. */
15546 if (leftover)
15547 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15548 }
15549
15550 /* Emit a block move when either the source or destination is unaligned (not
15551 aligned to a four-byte boundary). This may need further tuning depending on
15552 core type, optimize_size setting, etc. */
15553
15554 static int
15555 arm_cpymemqi_unaligned (rtx *operands)
15556 {
15557 HOST_WIDE_INT length = INTVAL (operands[2]);
15558
15559 if (optimize_size)
15560 {
15561 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15562 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15563 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15564 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15565 or dst_aligned though: allow more interleaving in those cases since the
15566 resulting code can be smaller. */
15567 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15568 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15569
15570 if (length > 12)
15571 arm_block_move_unaligned_loop (operands[0], operands[1], length,
15572 interleave_factor, bytes_per_iter);
15573 else
15574 arm_block_move_unaligned_straight (operands[0], operands[1], length,
15575 interleave_factor);
15576 }
15577 else
15578 {
15579 /* Note that the loop created by arm_block_move_unaligned_loop may be
15580 subject to loop unrolling, which makes tuning this condition a little
15581 redundant. */
15582 if (length > 32)
15583 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15584 else
15585 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15586 }
15587
15588 return 1;
15589 }
15590
15591 int
15592 arm_gen_cpymemqi (rtx *operands)
15593 {
15594 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15595 HOST_WIDE_INT srcoffset, dstoffset;
15596 rtx src, dst, srcbase, dstbase;
15597 rtx part_bytes_reg = NULL;
15598 rtx mem;
15599
15600 if (!CONST_INT_P (operands[2])
15601 || !CONST_INT_P (operands[3])
15602 || INTVAL (operands[2]) > 64)
15603 return 0;
15604
15605 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15606 return arm_cpymemqi_unaligned (operands);
15607
15608 if (INTVAL (operands[3]) & 3)
15609 return 0;
15610
15611 dstbase = operands[0];
15612 srcbase = operands[1];
15613
15614 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15615 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15616
15617 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15618 out_words_to_go = INTVAL (operands[2]) / 4;
15619 last_bytes = INTVAL (operands[2]) & 3;
15620 dstoffset = srcoffset = 0;
15621
15622 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15623 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15624
15625 while (in_words_to_go >= 2)
15626 {
15627 if (in_words_to_go > 4)
15628 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15629 TRUE, srcbase, &srcoffset));
15630 else
15631 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15632 src, FALSE, srcbase,
15633 &srcoffset));
15634
15635 if (out_words_to_go)
15636 {
15637 if (out_words_to_go > 4)
15638 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15639 TRUE, dstbase, &dstoffset));
15640 else if (out_words_to_go != 1)
15641 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15642 out_words_to_go, dst,
15643 (last_bytes == 0
15644 ? FALSE : TRUE),
15645 dstbase, &dstoffset));
15646 else
15647 {
15648 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15649 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15650 if (last_bytes != 0)
15651 {
15652 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15653 dstoffset += 4;
15654 }
15655 }
15656 }
15657
15658 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15659 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15660 }
15661
15662 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15663 if (out_words_to_go)
15664 {
15665 rtx sreg;
15666
15667 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15668 sreg = copy_to_reg (mem);
15669
15670 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15671 emit_move_insn (mem, sreg);
15672 in_words_to_go--;
15673
15674 gcc_assert (!in_words_to_go); /* Sanity check */
15675 }
15676
15677 if (in_words_to_go)
15678 {
15679 gcc_assert (in_words_to_go > 0);
15680
15681 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15682 part_bytes_reg = copy_to_mode_reg (SImode, mem);
15683 }
15684
15685 gcc_assert (!last_bytes || part_bytes_reg);
15686
15687 if (BYTES_BIG_ENDIAN && last_bytes)
15688 {
15689 rtx tmp = gen_reg_rtx (SImode);
15690
15691 /* The bytes we want are in the top end of the word. */
15692 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15693 GEN_INT (8 * (4 - last_bytes))));
15694 part_bytes_reg = tmp;
15695
15696 while (last_bytes)
15697 {
15698 mem = adjust_automodify_address (dstbase, QImode,
15699 plus_constant (Pmode, dst,
15700 last_bytes - 1),
15701 dstoffset + last_bytes - 1);
15702 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15703
15704 if (--last_bytes)
15705 {
15706 tmp = gen_reg_rtx (SImode);
15707 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15708 part_bytes_reg = tmp;
15709 }
15710 }
15711
15712 }
15713 else
15714 {
15715 if (last_bytes > 1)
15716 {
15717 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15718 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15719 last_bytes -= 2;
15720 if (last_bytes)
15721 {
15722 rtx tmp = gen_reg_rtx (SImode);
15723 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15724 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15725 part_bytes_reg = tmp;
15726 dstoffset += 2;
15727 }
15728 }
15729
15730 if (last_bytes)
15731 {
15732 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15733 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15734 }
15735 }
15736
15737 return 1;
15738 }
15739
15740 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15741 by mode size. */
15742 inline static rtx
15743 next_consecutive_mem (rtx mem)
15744 {
15745 machine_mode mode = GET_MODE (mem);
15746 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15747 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15748
15749 return adjust_automodify_address (mem, mode, addr, offset);
15750 }
15751
15752 /* Copy using LDRD/STRD instructions whenever possible.
15753 Returns true upon success. */
15754 bool
15755 gen_cpymem_ldrd_strd (rtx *operands)
15756 {
15757 unsigned HOST_WIDE_INT len;
15758 HOST_WIDE_INT align;
15759 rtx src, dst, base;
15760 rtx reg0;
15761 bool src_aligned, dst_aligned;
15762 bool src_volatile, dst_volatile;
15763
15764 gcc_assert (CONST_INT_P (operands[2]));
15765 gcc_assert (CONST_INT_P (operands[3]));
15766
15767 len = UINTVAL (operands[2]);
15768 if (len > 64)
15769 return false;
15770
15771 /* Maximum alignment we can assume for both src and dst buffers. */
15772 align = INTVAL (operands[3]);
15773
15774 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15775 return false;
15776
15777 /* Place src and dst addresses in registers
15778 and update the corresponding mem rtx. */
15779 dst = operands[0];
15780 dst_volatile = MEM_VOLATILE_P (dst);
15781 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15782 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15783 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15784
15785 src = operands[1];
15786 src_volatile = MEM_VOLATILE_P (src);
15787 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15788 base = copy_to_mode_reg (SImode, XEXP (src, 0));
15789 src = adjust_automodify_address (src, VOIDmode, base, 0);
15790
15791 if (!unaligned_access && !(src_aligned && dst_aligned))
15792 return false;
15793
15794 if (src_volatile || dst_volatile)
15795 return false;
15796
15797 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15798 if (!(dst_aligned || src_aligned))
15799 return arm_gen_cpymemqi (operands);
15800
15801 /* If the either src or dst is unaligned we'll be accessing it as pairs
15802 of unaligned SImode accesses. Otherwise we can generate DImode
15803 ldrd/strd instructions. */
15804 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15805 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15806
15807 while (len >= 8)
15808 {
15809 len -= 8;
15810 reg0 = gen_reg_rtx (DImode);
15811 rtx first_reg = NULL_RTX;
15812 rtx second_reg = NULL_RTX;
15813
15814 if (!src_aligned || !dst_aligned)
15815 {
15816 if (BYTES_BIG_ENDIAN)
15817 {
15818 second_reg = gen_lowpart (SImode, reg0);
15819 first_reg = gen_highpart_mode (SImode, DImode, reg0);
15820 }
15821 else
15822 {
15823 first_reg = gen_lowpart (SImode, reg0);
15824 second_reg = gen_highpart_mode (SImode, DImode, reg0);
15825 }
15826 }
15827 if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15828 emit_move_insn (reg0, src);
15829 else if (src_aligned)
15830 emit_insn (gen_unaligned_loaddi (reg0, src));
15831 else
15832 {
15833 emit_insn (gen_unaligned_loadsi (first_reg, src));
15834 src = next_consecutive_mem (src);
15835 emit_insn (gen_unaligned_loadsi (second_reg, src));
15836 }
15837
15838 if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15839 emit_move_insn (dst, reg0);
15840 else if (dst_aligned)
15841 emit_insn (gen_unaligned_storedi (dst, reg0));
15842 else
15843 {
15844 emit_insn (gen_unaligned_storesi (dst, first_reg));
15845 dst = next_consecutive_mem (dst);
15846 emit_insn (gen_unaligned_storesi (dst, second_reg));
15847 }
15848
15849 src = next_consecutive_mem (src);
15850 dst = next_consecutive_mem (dst);
15851 }
15852
15853 gcc_assert (len < 8);
15854 if (len >= 4)
15855 {
15856 /* More than a word but less than a double-word to copy. Copy a word. */
15857 reg0 = gen_reg_rtx (SImode);
15858 src = adjust_address (src, SImode, 0);
15859 dst = adjust_address (dst, SImode, 0);
15860 if (src_aligned)
15861 emit_move_insn (reg0, src);
15862 else
15863 emit_insn (gen_unaligned_loadsi (reg0, src));
15864
15865 if (dst_aligned)
15866 emit_move_insn (dst, reg0);
15867 else
15868 emit_insn (gen_unaligned_storesi (dst, reg0));
15869
15870 src = next_consecutive_mem (src);
15871 dst = next_consecutive_mem (dst);
15872 len -= 4;
15873 }
15874
15875 if (len == 0)
15876 return true;
15877
15878 /* Copy the remaining bytes. */
15879 if (len >= 2)
15880 {
15881 dst = adjust_address (dst, HImode, 0);
15882 src = adjust_address (src, HImode, 0);
15883 reg0 = gen_reg_rtx (SImode);
15884 if (src_aligned)
15885 emit_insn (gen_zero_extendhisi2 (reg0, src));
15886 else
15887 emit_insn (gen_unaligned_loadhiu (reg0, src));
15888
15889 if (dst_aligned)
15890 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15891 else
15892 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15893
15894 src = next_consecutive_mem (src);
15895 dst = next_consecutive_mem (dst);
15896 if (len == 2)
15897 return true;
15898 }
15899
15900 dst = adjust_address (dst, QImode, 0);
15901 src = adjust_address (src, QImode, 0);
15902 reg0 = gen_reg_rtx (QImode);
15903 emit_move_insn (reg0, src);
15904 emit_move_insn (dst, reg0);
15905 return true;
15906 }
15907
15908 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15909 into its component 32-bit subregs. OP2 may be an immediate
15910 constant and we want to simplify it in that case. */
15911 void
15912 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15913 rtx *lo_op2, rtx *hi_op2)
15914 {
15915 *lo_op1 = gen_lowpart (SImode, op1);
15916 *hi_op1 = gen_highpart (SImode, op1);
15917 *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15918 subreg_lowpart_offset (SImode, DImode));
15919 *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15920 subreg_highpart_offset (SImode, DImode));
15921 }
15922
15923 /* Select a dominance comparison mode if possible for a test of the general
15924 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15925 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15926 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15927 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15928 In all cases OP will be either EQ or NE, but we don't need to know which
15929 here. If we are unable to support a dominance comparison we return
15930 CC mode. This will then fail to match for the RTL expressions that
15931 generate this call. */
15932 machine_mode
15933 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15934 {
15935 enum rtx_code cond1, cond2;
15936 int swapped = 0;
15937
15938 /* Currently we will probably get the wrong result if the individual
15939 comparisons are not simple. This also ensures that it is safe to
15940 reverse a comparison if necessary. */
15941 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15942 != CCmode)
15943 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15944 != CCmode))
15945 return CCmode;
15946
15947 /* The if_then_else variant of this tests the second condition if the
15948 first passes, but is true if the first fails. Reverse the first
15949 condition to get a true "inclusive-or" expression. */
15950 if (cond_or == DOM_CC_NX_OR_Y)
15951 cond1 = reverse_condition (cond1);
15952
15953 /* If the comparisons are not equal, and one doesn't dominate the other,
15954 then we can't do this. */
15955 if (cond1 != cond2
15956 && !comparison_dominates_p (cond1, cond2)
15957 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15958 return CCmode;
15959
15960 if (swapped)
15961 std::swap (cond1, cond2);
15962
15963 switch (cond1)
15964 {
15965 case EQ:
15966 if (cond_or == DOM_CC_X_AND_Y)
15967 return CC_DEQmode;
15968
15969 switch (cond2)
15970 {
15971 case EQ: return CC_DEQmode;
15972 case LE: return CC_DLEmode;
15973 case LEU: return CC_DLEUmode;
15974 case GE: return CC_DGEmode;
15975 case GEU: return CC_DGEUmode;
15976 default: gcc_unreachable ();
15977 }
15978
15979 case LT:
15980 if (cond_or == DOM_CC_X_AND_Y)
15981 return CC_DLTmode;
15982
15983 switch (cond2)
15984 {
15985 case LT:
15986 return CC_DLTmode;
15987 case LE:
15988 return CC_DLEmode;
15989 case NE:
15990 return CC_DNEmode;
15991 default:
15992 gcc_unreachable ();
15993 }
15994
15995 case GT:
15996 if (cond_or == DOM_CC_X_AND_Y)
15997 return CC_DGTmode;
15998
15999 switch (cond2)
16000 {
16001 case GT:
16002 return CC_DGTmode;
16003 case GE:
16004 return CC_DGEmode;
16005 case NE:
16006 return CC_DNEmode;
16007 default:
16008 gcc_unreachable ();
16009 }
16010
16011 case LTU:
16012 if (cond_or == DOM_CC_X_AND_Y)
16013 return CC_DLTUmode;
16014
16015 switch (cond2)
16016 {
16017 case LTU:
16018 return CC_DLTUmode;
16019 case LEU:
16020 return CC_DLEUmode;
16021 case NE:
16022 return CC_DNEmode;
16023 default:
16024 gcc_unreachable ();
16025 }
16026
16027 case GTU:
16028 if (cond_or == DOM_CC_X_AND_Y)
16029 return CC_DGTUmode;
16030
16031 switch (cond2)
16032 {
16033 case GTU:
16034 return CC_DGTUmode;
16035 case GEU:
16036 return CC_DGEUmode;
16037 case NE:
16038 return CC_DNEmode;
16039 default:
16040 gcc_unreachable ();
16041 }
16042
16043 /* The remaining cases only occur when both comparisons are the
16044 same. */
16045 case NE:
16046 gcc_assert (cond1 == cond2);
16047 return CC_DNEmode;
16048
16049 case LE:
16050 gcc_assert (cond1 == cond2);
16051 return CC_DLEmode;
16052
16053 case GE:
16054 gcc_assert (cond1 == cond2);
16055 return CC_DGEmode;
16056
16057 case LEU:
16058 gcc_assert (cond1 == cond2);
16059 return CC_DLEUmode;
16060
16061 case GEU:
16062 gcc_assert (cond1 == cond2);
16063 return CC_DGEUmode;
16064
16065 default:
16066 gcc_unreachable ();
16067 }
16068 }
16069
16070 machine_mode
16071 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
16072 {
16073 /* All floating point compares return CCFP if it is an equality
16074 comparison, and CCFPE otherwise. */
16075 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16076 {
16077 switch (op)
16078 {
16079 case EQ:
16080 case NE:
16081 case UNORDERED:
16082 case ORDERED:
16083 case UNLT:
16084 case UNLE:
16085 case UNGT:
16086 case UNGE:
16087 case UNEQ:
16088 case LTGT:
16089 return CCFPmode;
16090
16091 case LT:
16092 case LE:
16093 case GT:
16094 case GE:
16095 return CCFPEmode;
16096
16097 default:
16098 gcc_unreachable ();
16099 }
16100 }
16101
16102 /* A compare with a shifted operand. Because of canonicalization, the
16103 comparison will have to be swapped when we emit the assembler. */
16104 if (GET_MODE (y) == SImode
16105 && (REG_P (y) || (SUBREG_P (y)))
16106 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16107 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
16108 || GET_CODE (x) == ROTATERT))
16109 return CC_SWPmode;
16110
16111 /* A widened compare of the sum of a value plus a carry against a
16112 constant. This is a representation of RSC. We want to swap the
16113 result of the comparison at output. Not valid if the Z bit is
16114 needed. */
16115 if (GET_MODE (x) == DImode
16116 && GET_CODE (x) == PLUS
16117 && arm_borrow_operation (XEXP (x, 1), DImode)
16118 && CONST_INT_P (y)
16119 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16120 && (op == LE || op == GT))
16121 || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
16122 && (op == LEU || op == GTU))))
16123 return CC_SWPmode;
16124
16125 /* If X is a constant we want to use CC_RSBmode. This is
16126 non-canonical, but arm_gen_compare_reg uses this to generate the
16127 correct canonical form. */
16128 if (GET_MODE (y) == SImode
16129 && (REG_P (y) || SUBREG_P (y))
16130 && CONST_INT_P (x))
16131 return CC_RSBmode;
16132
16133 /* This operation is performed swapped, but since we only rely on the Z
16134 flag we don't need an additional mode. */
16135 if (GET_MODE (y) == SImode
16136 && (REG_P (y) || (SUBREG_P (y)))
16137 && GET_CODE (x) == NEG
16138 && (op == EQ || op == NE))
16139 return CC_Zmode;
16140
16141 /* This is a special case that is used by combine to allow a
16142 comparison of a shifted byte load to be split into a zero-extend
16143 followed by a comparison of the shifted integer (only valid for
16144 equalities and unsigned inequalities). */
16145 if (GET_MODE (x) == SImode
16146 && GET_CODE (x) == ASHIFT
16147 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
16148 && GET_CODE (XEXP (x, 0)) == SUBREG
16149 && MEM_P (SUBREG_REG (XEXP (x, 0)))
16150 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
16151 && (op == EQ || op == NE
16152 || op == GEU || op == GTU || op == LTU || op == LEU)
16153 && CONST_INT_P (y))
16154 return CC_Zmode;
16155
16156 /* A construct for a conditional compare, if the false arm contains
16157 0, then both conditions must be true, otherwise either condition
16158 must be true. Not all conditions are possible, so CCmode is
16159 returned if it can't be done. */
16160 if (GET_CODE (x) == IF_THEN_ELSE
16161 && (XEXP (x, 2) == const0_rtx
16162 || XEXP (x, 2) == const1_rtx)
16163 && COMPARISON_P (XEXP (x, 0))
16164 && COMPARISON_P (XEXP (x, 1)))
16165 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16166 INTVAL (XEXP (x, 2)));
16167
16168 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
16169 if (GET_CODE (x) == AND
16170 && (op == EQ || op == NE)
16171 && COMPARISON_P (XEXP (x, 0))
16172 && COMPARISON_P (XEXP (x, 1)))
16173 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16174 DOM_CC_X_AND_Y);
16175
16176 if (GET_CODE (x) == IOR
16177 && (op == EQ || op == NE)
16178 && COMPARISON_P (XEXP (x, 0))
16179 && COMPARISON_P (XEXP (x, 1)))
16180 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16181 DOM_CC_X_OR_Y);
16182
16183 /* An operation (on Thumb) where we want to test for a single bit.
16184 This is done by shifting that bit up into the top bit of a
16185 scratch register; we can then branch on the sign bit. */
16186 if (TARGET_THUMB1
16187 && GET_MODE (x) == SImode
16188 && (op == EQ || op == NE)
16189 && GET_CODE (x) == ZERO_EXTRACT
16190 && XEXP (x, 1) == const1_rtx)
16191 return CC_Nmode;
16192
16193 /* An operation that sets the condition codes as a side-effect, the
16194 V flag is not set correctly, so we can only use comparisons where
16195 this doesn't matter. (For LT and GE we can use "mi" and "pl"
16196 instead.) */
16197 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
16198 if (GET_MODE (x) == SImode
16199 && y == const0_rtx
16200 && (op == EQ || op == NE || op == LT || op == GE)
16201 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
16202 || GET_CODE (x) == AND || GET_CODE (x) == IOR
16203 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
16204 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
16205 || GET_CODE (x) == LSHIFTRT
16206 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16207 || GET_CODE (x) == ROTATERT
16208 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
16209 return CC_NZmode;
16210
16211 /* A comparison of ~reg with a const is really a special
16212 canoncialization of compare (~const, reg), which is a reverse
16213 subtract operation. We may not get here if CONST is 0, but that
16214 doesn't matter because ~0 isn't a valid immediate for RSB. */
16215 if (GET_MODE (x) == SImode
16216 && GET_CODE (x) == NOT
16217 && CONST_INT_P (y))
16218 return CC_RSBmode;
16219
16220 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
16221 return CC_Zmode;
16222
16223 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
16224 && GET_CODE (x) == PLUS
16225 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
16226 return CC_Cmode;
16227
16228 if (GET_MODE (x) == DImode
16229 && GET_CODE (x) == PLUS
16230 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
16231 && CONST_INT_P (y)
16232 && UINTVAL (y) == 0x800000000
16233 && (op == GEU || op == LTU))
16234 return CC_ADCmode;
16235
16236 if (GET_MODE (x) == DImode
16237 && (op == GE || op == LT)
16238 && GET_CODE (x) == SIGN_EXTEND
16239 && ((GET_CODE (y) == PLUS
16240 && arm_borrow_operation (XEXP (y, 0), DImode))
16241 || arm_borrow_operation (y, DImode)))
16242 return CC_NVmode;
16243
16244 if (GET_MODE (x) == DImode
16245 && (op == GEU || op == LTU)
16246 && GET_CODE (x) == ZERO_EXTEND
16247 && ((GET_CODE (y) == PLUS
16248 && arm_borrow_operation (XEXP (y, 0), DImode))
16249 || arm_borrow_operation (y, DImode)))
16250 return CC_Bmode;
16251
16252 if (GET_MODE (x) == DImode
16253 && (op == EQ || op == NE)
16254 && (GET_CODE (x) == PLUS
16255 || GET_CODE (x) == MINUS)
16256 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16257 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
16258 && GET_CODE (y) == SIGN_EXTEND
16259 && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
16260 return CC_Vmode;
16261
16262 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
16263 return GET_MODE (x);
16264
16265 return CCmode;
16266 }
16267
16268 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
16269 the sequence of instructions needed to generate a suitable condition
16270 code register. Return the CC register result. */
16271 static rtx
16272 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16273 {
16274 machine_mode mode;
16275 rtx cc_reg;
16276
16277 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
16278 gcc_assert (TARGET_32BIT);
16279 gcc_assert (!CONST_INT_P (x));
16280
16281 rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
16282 subreg_lowpart_offset (SImode, DImode));
16283 rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
16284 subreg_highpart_offset (SImode, DImode));
16285 rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
16286 subreg_lowpart_offset (SImode, DImode));
16287 rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
16288 subreg_highpart_offset (SImode, DImode));
16289 switch (code)
16290 {
16291 case EQ:
16292 case NE:
16293 {
16294 if (y_lo == const0_rtx || y_hi == const0_rtx)
16295 {
16296 if (y_lo != const0_rtx)
16297 {
16298 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16299
16300 gcc_assert (y_hi == const0_rtx);
16301 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16302 if (!arm_add_operand (y_lo, SImode))
16303 y_lo = force_reg (SImode, y_lo);
16304 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16305 x_lo = scratch2;
16306 }
16307 else if (y_hi != const0_rtx)
16308 {
16309 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16310
16311 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16312 if (!arm_add_operand (y_hi, SImode))
16313 y_hi = force_reg (SImode, y_hi);
16314 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16315 x_hi = scratch2;
16316 }
16317
16318 if (!scratch)
16319 {
16320 gcc_assert (!reload_completed);
16321 scratch = gen_rtx_SCRATCH (SImode);
16322 }
16323
16324 rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16325 cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16326
16327 rtx set
16328 = gen_rtx_SET (cc_reg,
16329 gen_rtx_COMPARE (CC_NZmode,
16330 gen_rtx_IOR (SImode, x_lo, x_hi),
16331 const0_rtx));
16332 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16333 clobber)));
16334 return cc_reg;
16335 }
16336
16337 if (!arm_add_operand (y_lo, SImode))
16338 y_lo = force_reg (SImode, y_lo);
16339
16340 if (!arm_add_operand (y_hi, SImode))
16341 y_hi = force_reg (SImode, y_hi);
16342
16343 rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16344 rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16345 rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16346 mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16347 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16348
16349 emit_insn (gen_rtx_SET (cc_reg,
16350 gen_rtx_COMPARE (mode, conjunction,
16351 const0_rtx)));
16352 return cc_reg;
16353 }
16354
16355 case LT:
16356 case GE:
16357 {
16358 if (y_lo == const0_rtx)
16359 {
16360 /* If the low word of y is 0, then this is simply a normal
16361 compare of the upper words. */
16362 if (!arm_add_operand (y_hi, SImode))
16363 y_hi = force_reg (SImode, y_hi);
16364
16365 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16366 }
16367
16368 if (!arm_add_operand (y_lo, SImode))
16369 y_lo = force_reg (SImode, y_lo);
16370
16371 rtx cmp1
16372 = gen_rtx_LTU (DImode,
16373 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16374 const0_rtx);
16375
16376 if (!scratch)
16377 scratch = gen_rtx_SCRATCH (SImode);
16378
16379 if (!arm_not_operand (y_hi, SImode))
16380 y_hi = force_reg (SImode, y_hi);
16381
16382 rtx_insn *insn;
16383 if (y_hi == const0_rtx)
16384 insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16385 cmp1));
16386 else if (CONST_INT_P (y_hi))
16387 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16388 y_hi, cmp1));
16389 else
16390 insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16391 cmp1));
16392 return SET_DEST (single_set (insn));
16393 }
16394
16395 case LE:
16396 case GT:
16397 {
16398 /* During expansion, we only expect to get here if y is a
16399 constant that we want to handle, otherwise we should have
16400 swapped the operands already. */
16401 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16402
16403 if (!const_ok_for_arm (INTVAL (y_lo)))
16404 y_lo = force_reg (SImode, y_lo);
16405
16406 /* Perform a reverse subtract and compare. */
16407 rtx cmp1
16408 = gen_rtx_LTU (DImode,
16409 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16410 const0_rtx);
16411 rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16412 x_hi, cmp1));
16413 return SET_DEST (single_set (insn));
16414 }
16415
16416 case LTU:
16417 case GEU:
16418 {
16419 if (y_lo == const0_rtx)
16420 {
16421 /* If the low word of y is 0, then this is simply a normal
16422 compare of the upper words. */
16423 if (!arm_add_operand (y_hi, SImode))
16424 y_hi = force_reg (SImode, y_hi);
16425
16426 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16427 }
16428
16429 if (!arm_add_operand (y_lo, SImode))
16430 y_lo = force_reg (SImode, y_lo);
16431
16432 rtx cmp1
16433 = gen_rtx_LTU (DImode,
16434 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16435 const0_rtx);
16436
16437 if (!scratch)
16438 scratch = gen_rtx_SCRATCH (SImode);
16439 if (!arm_not_operand (y_hi, SImode))
16440 y_hi = force_reg (SImode, y_hi);
16441
16442 rtx_insn *insn;
16443 if (y_hi == const0_rtx)
16444 insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16445 cmp1));
16446 else if (CONST_INT_P (y_hi))
16447 {
16448 /* Constant is viewed as unsigned when zero-extended. */
16449 y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16450 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16451 y_hi, cmp1));
16452 }
16453 else
16454 insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16455 cmp1));
16456 return SET_DEST (single_set (insn));
16457 }
16458
16459 case LEU:
16460 case GTU:
16461 {
16462 /* During expansion, we only expect to get here if y is a
16463 constant that we want to handle, otherwise we should have
16464 swapped the operands already. */
16465 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16466
16467 if (!const_ok_for_arm (INTVAL (y_lo)))
16468 y_lo = force_reg (SImode, y_lo);
16469
16470 /* Perform a reverse subtract and compare. */
16471 rtx cmp1
16472 = gen_rtx_LTU (DImode,
16473 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16474 const0_rtx);
16475 y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16476 rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16477 x_hi, cmp1));
16478 return SET_DEST (single_set (insn));
16479 }
16480
16481 default:
16482 gcc_unreachable ();
16483 }
16484 }
16485
16486 /* X and Y are two things to compare using CODE. Emit the compare insn and
16487 return the rtx for register 0 in the proper mode. */
16488 rtx
16489 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16490 {
16491 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16492 return arm_gen_dicompare_reg (code, x, y, scratch);
16493
16494 machine_mode mode = SELECT_CC_MODE (code, x, y);
16495 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16496 if (mode == CC_RSBmode)
16497 {
16498 if (!scratch)
16499 scratch = gen_rtx_SCRATCH (SImode);
16500 emit_insn (gen_rsb_imm_compare_scratch (scratch,
16501 GEN_INT (~UINTVAL (x)), y));
16502 }
16503 else
16504 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16505
16506 return cc_reg;
16507 }
16508
16509 /* Generate a sequence of insns that will generate the correct return
16510 address mask depending on the physical architecture that the program
16511 is running on. */
16512 rtx
16513 arm_gen_return_addr_mask (void)
16514 {
16515 rtx reg = gen_reg_rtx (Pmode);
16516
16517 emit_insn (gen_return_addr_mask (reg));
16518 return reg;
16519 }
16520
16521 void
16522 arm_reload_in_hi (rtx *operands)
16523 {
16524 rtx ref = operands[1];
16525 rtx base, scratch;
16526 HOST_WIDE_INT offset = 0;
16527
16528 if (SUBREG_P (ref))
16529 {
16530 offset = SUBREG_BYTE (ref);
16531 ref = SUBREG_REG (ref);
16532 }
16533
16534 if (REG_P (ref))
16535 {
16536 /* We have a pseudo which has been spilt onto the stack; there
16537 are two cases here: the first where there is a simple
16538 stack-slot replacement and a second where the stack-slot is
16539 out of range, or is used as a subreg. */
16540 if (reg_equiv_mem (REGNO (ref)))
16541 {
16542 ref = reg_equiv_mem (REGNO (ref));
16543 base = find_replacement (&XEXP (ref, 0));
16544 }
16545 else
16546 /* The slot is out of range, or was dressed up in a SUBREG. */
16547 base = reg_equiv_address (REGNO (ref));
16548
16549 /* PR 62554: If there is no equivalent memory location then just move
16550 the value as an SImode register move. This happens when the target
16551 architecture variant does not have an HImode register move. */
16552 if (base == NULL)
16553 {
16554 gcc_assert (REG_P (operands[0]));
16555 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16556 gen_rtx_SUBREG (SImode, ref, 0)));
16557 return;
16558 }
16559 }
16560 else
16561 base = find_replacement (&XEXP (ref, 0));
16562
16563 /* Handle the case where the address is too complex to be offset by 1. */
16564 if (GET_CODE (base) == MINUS
16565 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16566 {
16567 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16568
16569 emit_set_insn (base_plus, base);
16570 base = base_plus;
16571 }
16572 else if (GET_CODE (base) == PLUS)
16573 {
16574 /* The addend must be CONST_INT, or we would have dealt with it above. */
16575 HOST_WIDE_INT hi, lo;
16576
16577 offset += INTVAL (XEXP (base, 1));
16578 base = XEXP (base, 0);
16579
16580 /* Rework the address into a legal sequence of insns. */
16581 /* Valid range for lo is -4095 -> 4095 */
16582 lo = (offset >= 0
16583 ? (offset & 0xfff)
16584 : -((-offset) & 0xfff));
16585
16586 /* Corner case, if lo is the max offset then we would be out of range
16587 once we have added the additional 1 below, so bump the msb into the
16588 pre-loading insn(s). */
16589 if (lo == 4095)
16590 lo &= 0x7ff;
16591
16592 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16593 ^ (HOST_WIDE_INT) 0x80000000)
16594 - (HOST_WIDE_INT) 0x80000000);
16595
16596 gcc_assert (hi + lo == offset);
16597
16598 if (hi != 0)
16599 {
16600 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16601
16602 /* Get the base address; addsi3 knows how to handle constants
16603 that require more than one insn. */
16604 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16605 base = base_plus;
16606 offset = lo;
16607 }
16608 }
16609
16610 /* Operands[2] may overlap operands[0] (though it won't overlap
16611 operands[1]), that's why we asked for a DImode reg -- so we can
16612 use the bit that does not overlap. */
16613 if (REGNO (operands[2]) == REGNO (operands[0]))
16614 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16615 else
16616 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16617
16618 emit_insn (gen_zero_extendqisi2 (scratch,
16619 gen_rtx_MEM (QImode,
16620 plus_constant (Pmode, base,
16621 offset))));
16622 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16623 gen_rtx_MEM (QImode,
16624 plus_constant (Pmode, base,
16625 offset + 1))));
16626 if (!BYTES_BIG_ENDIAN)
16627 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16628 gen_rtx_IOR (SImode,
16629 gen_rtx_ASHIFT
16630 (SImode,
16631 gen_rtx_SUBREG (SImode, operands[0], 0),
16632 GEN_INT (8)),
16633 scratch));
16634 else
16635 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16636 gen_rtx_IOR (SImode,
16637 gen_rtx_ASHIFT (SImode, scratch,
16638 GEN_INT (8)),
16639 gen_rtx_SUBREG (SImode, operands[0], 0)));
16640 }
16641
16642 /* Handle storing a half-word to memory during reload by synthesizing as two
16643 byte stores. Take care not to clobber the input values until after we
16644 have moved them somewhere safe. This code assumes that if the DImode
16645 scratch in operands[2] overlaps either the input value or output address
16646 in some way, then that value must die in this insn (we absolutely need
16647 two scratch registers for some corner cases). */
16648 void
16649 arm_reload_out_hi (rtx *operands)
16650 {
16651 rtx ref = operands[0];
16652 rtx outval = operands[1];
16653 rtx base, scratch;
16654 HOST_WIDE_INT offset = 0;
16655
16656 if (SUBREG_P (ref))
16657 {
16658 offset = SUBREG_BYTE (ref);
16659 ref = SUBREG_REG (ref);
16660 }
16661
16662 if (REG_P (ref))
16663 {
16664 /* We have a pseudo which has been spilt onto the stack; there
16665 are two cases here: the first where there is a simple
16666 stack-slot replacement and a second where the stack-slot is
16667 out of range, or is used as a subreg. */
16668 if (reg_equiv_mem (REGNO (ref)))
16669 {
16670 ref = reg_equiv_mem (REGNO (ref));
16671 base = find_replacement (&XEXP (ref, 0));
16672 }
16673 else
16674 /* The slot is out of range, or was dressed up in a SUBREG. */
16675 base = reg_equiv_address (REGNO (ref));
16676
16677 /* PR 62254: If there is no equivalent memory location then just move
16678 the value as an SImode register move. This happens when the target
16679 architecture variant does not have an HImode register move. */
16680 if (base == NULL)
16681 {
16682 gcc_assert (REG_P (outval) || SUBREG_P (outval));
16683
16684 if (REG_P (outval))
16685 {
16686 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16687 gen_rtx_SUBREG (SImode, outval, 0)));
16688 }
16689 else /* SUBREG_P (outval) */
16690 {
16691 if (GET_MODE (SUBREG_REG (outval)) == SImode)
16692 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16693 SUBREG_REG (outval)));
16694 else
16695 /* FIXME: Handle other cases ? */
16696 gcc_unreachable ();
16697 }
16698 return;
16699 }
16700 }
16701 else
16702 base = find_replacement (&XEXP (ref, 0));
16703
16704 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16705
16706 /* Handle the case where the address is too complex to be offset by 1. */
16707 if (GET_CODE (base) == MINUS
16708 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16709 {
16710 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16711
16712 /* Be careful not to destroy OUTVAL. */
16713 if (reg_overlap_mentioned_p (base_plus, outval))
16714 {
16715 /* Updating base_plus might destroy outval, see if we can
16716 swap the scratch and base_plus. */
16717 if (!reg_overlap_mentioned_p (scratch, outval))
16718 std::swap (scratch, base_plus);
16719 else
16720 {
16721 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16722
16723 /* Be conservative and copy OUTVAL into the scratch now,
16724 this should only be necessary if outval is a subreg
16725 of something larger than a word. */
16726 /* XXX Might this clobber base? I can't see how it can,
16727 since scratch is known to overlap with OUTVAL, and
16728 must be wider than a word. */
16729 emit_insn (gen_movhi (scratch_hi, outval));
16730 outval = scratch_hi;
16731 }
16732 }
16733
16734 emit_set_insn (base_plus, base);
16735 base = base_plus;
16736 }
16737 else if (GET_CODE (base) == PLUS)
16738 {
16739 /* The addend must be CONST_INT, or we would have dealt with it above. */
16740 HOST_WIDE_INT hi, lo;
16741
16742 offset += INTVAL (XEXP (base, 1));
16743 base = XEXP (base, 0);
16744
16745 /* Rework the address into a legal sequence of insns. */
16746 /* Valid range for lo is -4095 -> 4095 */
16747 lo = (offset >= 0
16748 ? (offset & 0xfff)
16749 : -((-offset) & 0xfff));
16750
16751 /* Corner case, if lo is the max offset then we would be out of range
16752 once we have added the additional 1 below, so bump the msb into the
16753 pre-loading insn(s). */
16754 if (lo == 4095)
16755 lo &= 0x7ff;
16756
16757 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16758 ^ (HOST_WIDE_INT) 0x80000000)
16759 - (HOST_WIDE_INT) 0x80000000);
16760
16761 gcc_assert (hi + lo == offset);
16762
16763 if (hi != 0)
16764 {
16765 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16766
16767 /* Be careful not to destroy OUTVAL. */
16768 if (reg_overlap_mentioned_p (base_plus, outval))
16769 {
16770 /* Updating base_plus might destroy outval, see if we
16771 can swap the scratch and base_plus. */
16772 if (!reg_overlap_mentioned_p (scratch, outval))
16773 std::swap (scratch, base_plus);
16774 else
16775 {
16776 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16777
16778 /* Be conservative and copy outval into scratch now,
16779 this should only be necessary if outval is a
16780 subreg of something larger than a word. */
16781 /* XXX Might this clobber base? I can't see how it
16782 can, since scratch is known to overlap with
16783 outval. */
16784 emit_insn (gen_movhi (scratch_hi, outval));
16785 outval = scratch_hi;
16786 }
16787 }
16788
16789 /* Get the base address; addsi3 knows how to handle constants
16790 that require more than one insn. */
16791 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16792 base = base_plus;
16793 offset = lo;
16794 }
16795 }
16796
16797 if (BYTES_BIG_ENDIAN)
16798 {
16799 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16800 plus_constant (Pmode, base,
16801 offset + 1)),
16802 gen_lowpart (QImode, outval)));
16803 emit_insn (gen_lshrsi3 (scratch,
16804 gen_rtx_SUBREG (SImode, outval, 0),
16805 GEN_INT (8)));
16806 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16807 offset)),
16808 gen_lowpart (QImode, scratch)));
16809 }
16810 else
16811 {
16812 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16813 offset)),
16814 gen_lowpart (QImode, outval)));
16815 emit_insn (gen_lshrsi3 (scratch,
16816 gen_rtx_SUBREG (SImode, outval, 0),
16817 GEN_INT (8)));
16818 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16819 plus_constant (Pmode, base,
16820 offset + 1)),
16821 gen_lowpart (QImode, scratch)));
16822 }
16823 }
16824
16825 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16826 (padded to the size of a word) should be passed in a register. */
16827
16828 static bool
16829 arm_must_pass_in_stack (const function_arg_info &arg)
16830 {
16831 if (TARGET_AAPCS_BASED)
16832 return must_pass_in_stack_var_size (arg);
16833 else
16834 return must_pass_in_stack_var_size_or_pad (arg);
16835 }
16836
16837
16838 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16839 byte of a stack argument has useful data. For legacy APCS ABIs we use
16840 the default. For AAPCS based ABIs small aggregate types are placed
16841 in the lowest memory address. */
16842
16843 static pad_direction
16844 arm_function_arg_padding (machine_mode mode, const_tree type)
16845 {
16846 if (!TARGET_AAPCS_BASED)
16847 return default_function_arg_padding (mode, type);
16848
16849 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16850 return PAD_DOWNWARD;
16851
16852 return PAD_UPWARD;
16853 }
16854
16855
16856 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16857 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16858 register has useful data, and return the opposite if the most
16859 significant byte does. */
16860
16861 bool
16862 arm_pad_reg_upward (machine_mode mode,
16863 tree type, int first ATTRIBUTE_UNUSED)
16864 {
16865 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16866 {
16867 /* For AAPCS, small aggregates, small fixed-point types,
16868 and small complex types are always padded upwards. */
16869 if (type)
16870 {
16871 if ((AGGREGATE_TYPE_P (type)
16872 || TREE_CODE (type) == COMPLEX_TYPE
16873 || FIXED_POINT_TYPE_P (type))
16874 && int_size_in_bytes (type) <= 4)
16875 return true;
16876 }
16877 else
16878 {
16879 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16880 && GET_MODE_SIZE (mode) <= 4)
16881 return true;
16882 }
16883 }
16884
16885 /* Otherwise, use default padding. */
16886 return !BYTES_BIG_ENDIAN;
16887 }
16888
16889 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16890 assuming that the address in the base register is word aligned. */
16891 bool
16892 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16893 {
16894 HOST_WIDE_INT max_offset;
16895
16896 /* Offset must be a multiple of 4 in Thumb mode. */
16897 if (TARGET_THUMB2 && ((offset & 3) != 0))
16898 return false;
16899
16900 if (TARGET_THUMB2)
16901 max_offset = 1020;
16902 else if (TARGET_ARM)
16903 max_offset = 255;
16904 else
16905 return false;
16906
16907 return ((offset <= max_offset) && (offset >= -max_offset));
16908 }
16909
16910 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16911 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16912 Assumes that the address in the base register RN is word aligned. Pattern
16913 guarantees that both memory accesses use the same base register,
16914 the offsets are constants within the range, and the gap between the offsets is 4.
16915 If preload complete then check that registers are legal. WBACK indicates whether
16916 address is updated. LOAD indicates whether memory access is load or store. */
16917 bool
16918 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16919 bool wback, bool load)
16920 {
16921 unsigned int t, t2, n;
16922
16923 if (!reload_completed)
16924 return true;
16925
16926 if (!offset_ok_for_ldrd_strd (offset))
16927 return false;
16928
16929 t = REGNO (rt);
16930 t2 = REGNO (rt2);
16931 n = REGNO (rn);
16932
16933 if ((TARGET_THUMB2)
16934 && ((wback && (n == t || n == t2))
16935 || (t == SP_REGNUM)
16936 || (t == PC_REGNUM)
16937 || (t2 == SP_REGNUM)
16938 || (t2 == PC_REGNUM)
16939 || (!load && (n == PC_REGNUM))
16940 || (load && (t == t2))
16941 /* Triggers Cortex-M3 LDRD errata. */
16942 || (!wback && load && fix_cm3_ldrd && (n == t))))
16943 return false;
16944
16945 if ((TARGET_ARM)
16946 && ((wback && (n == t || n == t2))
16947 || (t2 == PC_REGNUM)
16948 || (t % 2 != 0) /* First destination register is not even. */
16949 || (t2 != t + 1)
16950 /* PC can be used as base register (for offset addressing only),
16951 but it is depricated. */
16952 || (n == PC_REGNUM)))
16953 return false;
16954
16955 return true;
16956 }
16957
16958 /* Return true if a 64-bit access with alignment ALIGN and with a
16959 constant offset OFFSET from the base pointer is permitted on this
16960 architecture. */
16961 static bool
16962 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
16963 {
16964 return (unaligned_access
16965 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
16966 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
16967 }
16968
16969 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
16970 operand MEM's address contains an immediate offset from the base
16971 register and has no side effects, in which case it sets BASE,
16972 OFFSET and ALIGN accordingly. */
16973 static bool
16974 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
16975 {
16976 rtx addr;
16977
16978 gcc_assert (base != NULL && offset != NULL);
16979
16980 /* TODO: Handle more general memory operand patterns, such as
16981 PRE_DEC and PRE_INC. */
16982
16983 if (side_effects_p (mem))
16984 return false;
16985
16986 /* Can't deal with subregs. */
16987 if (SUBREG_P (mem))
16988 return false;
16989
16990 gcc_assert (MEM_P (mem));
16991
16992 *offset = const0_rtx;
16993 *align = MEM_ALIGN (mem);
16994
16995 addr = XEXP (mem, 0);
16996
16997 /* If addr isn't valid for DImode, then we can't handle it. */
16998 if (!arm_legitimate_address_p (DImode, addr,
16999 reload_in_progress || reload_completed))
17000 return false;
17001
17002 if (REG_P (addr))
17003 {
17004 *base = addr;
17005 return true;
17006 }
17007 else if (GET_CODE (addr) == PLUS)
17008 {
17009 *base = XEXP (addr, 0);
17010 *offset = XEXP (addr, 1);
17011 return (REG_P (*base) && CONST_INT_P (*offset));
17012 }
17013
17014 return false;
17015 }
17016
17017 /* Called from a peephole2 to replace two word-size accesses with a
17018 single LDRD/STRD instruction. Returns true iff we can generate a
17019 new instruction sequence. That is, both accesses use the same base
17020 register and the gap between constant offsets is 4. This function
17021 may reorder its operands to match ldrd/strd RTL templates.
17022 OPERANDS are the operands found by the peephole matcher;
17023 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
17024 corresponding memory operands. LOAD indicaates whether the access
17025 is load or store. CONST_STORE indicates a store of constant
17026 integer values held in OPERANDS[4,5] and assumes that the pattern
17027 is of length 4 insn, for the purpose of checking dead registers.
17028 COMMUTE indicates that register operands may be reordered. */
17029 bool
17030 gen_operands_ldrd_strd (rtx *operands, bool load,
17031 bool const_store, bool commute)
17032 {
17033 int nops = 2;
17034 HOST_WIDE_INT offsets[2], offset, align[2];
17035 rtx base = NULL_RTX;
17036 rtx cur_base, cur_offset, tmp;
17037 int i, gap;
17038 HARD_REG_SET regset;
17039
17040 gcc_assert (!const_store || !load);
17041 /* Check that the memory references are immediate offsets from the
17042 same base register. Extract the base register, the destination
17043 registers, and the corresponding memory offsets. */
17044 for (i = 0; i < nops; i++)
17045 {
17046 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17047 &align[i]))
17048 return false;
17049
17050 if (i == 0)
17051 base = cur_base;
17052 else if (REGNO (base) != REGNO (cur_base))
17053 return false;
17054
17055 offsets[i] = INTVAL (cur_offset);
17056 if (GET_CODE (operands[i]) == SUBREG)
17057 {
17058 tmp = SUBREG_REG (operands[i]);
17059 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
17060 operands[i] = tmp;
17061 }
17062 }
17063
17064 /* Make sure there is no dependency between the individual loads. */
17065 if (load && REGNO (operands[0]) == REGNO (base))
17066 return false; /* RAW */
17067
17068 if (load && REGNO (operands[0]) == REGNO (operands[1]))
17069 return false; /* WAW */
17070
17071 /* If the same input register is used in both stores
17072 when storing different constants, try to find a free register.
17073 For example, the code
17074 mov r0, 0
17075 str r0, [r2]
17076 mov r0, 1
17077 str r0, [r2, #4]
17078 can be transformed into
17079 mov r1, 0
17080 mov r0, 1
17081 strd r1, r0, [r2]
17082 in Thumb mode assuming that r1 is free.
17083 For ARM mode do the same but only if the starting register
17084 can be made to be even. */
17085 if (const_store
17086 && REGNO (operands[0]) == REGNO (operands[1])
17087 && INTVAL (operands[4]) != INTVAL (operands[5]))
17088 {
17089 if (TARGET_THUMB2)
17090 {
17091 CLEAR_HARD_REG_SET (regset);
17092 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17093 if (tmp == NULL_RTX)
17094 return false;
17095
17096 /* Use the new register in the first load to ensure that
17097 if the original input register is not dead after peephole,
17098 then it will have the correct constant value. */
17099 operands[0] = tmp;
17100 }
17101 else if (TARGET_ARM)
17102 {
17103 int regno = REGNO (operands[0]);
17104 if (!peep2_reg_dead_p (4, operands[0]))
17105 {
17106 /* When the input register is even and is not dead after the
17107 pattern, it has to hold the second constant but we cannot
17108 form a legal STRD in ARM mode with this register as the second
17109 register. */
17110 if (regno % 2 == 0)
17111 return false;
17112
17113 /* Is regno-1 free? */
17114 SET_HARD_REG_SET (regset);
17115 CLEAR_HARD_REG_BIT(regset, regno - 1);
17116 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17117 if (tmp == NULL_RTX)
17118 return false;
17119
17120 operands[0] = tmp;
17121 }
17122 else
17123 {
17124 /* Find a DImode register. */
17125 CLEAR_HARD_REG_SET (regset);
17126 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17127 if (tmp != NULL_RTX)
17128 {
17129 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17130 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17131 }
17132 else
17133 {
17134 /* Can we use the input register to form a DI register? */
17135 SET_HARD_REG_SET (regset);
17136 CLEAR_HARD_REG_BIT(regset,
17137 regno % 2 == 0 ? regno + 1 : regno - 1);
17138 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17139 if (tmp == NULL_RTX)
17140 return false;
17141 operands[regno % 2 == 1 ? 0 : 1] = tmp;
17142 }
17143 }
17144
17145 gcc_assert (operands[0] != NULL_RTX);
17146 gcc_assert (operands[1] != NULL_RTX);
17147 gcc_assert (REGNO (operands[0]) % 2 == 0);
17148 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
17149 }
17150 }
17151
17152 /* Make sure the instructions are ordered with lower memory access first. */
17153 if (offsets[0] > offsets[1])
17154 {
17155 gap = offsets[0] - offsets[1];
17156 offset = offsets[1];
17157
17158 /* Swap the instructions such that lower memory is accessed first. */
17159 std::swap (operands[0], operands[1]);
17160 std::swap (operands[2], operands[3]);
17161 std::swap (align[0], align[1]);
17162 if (const_store)
17163 std::swap (operands[4], operands[5]);
17164 }
17165 else
17166 {
17167 gap = offsets[1] - offsets[0];
17168 offset = offsets[0];
17169 }
17170
17171 /* Make sure accesses are to consecutive memory locations. */
17172 if (gap != GET_MODE_SIZE (SImode))
17173 return false;
17174
17175 if (!align_ok_ldrd_strd (align[0], offset))
17176 return false;
17177
17178 /* Make sure we generate legal instructions. */
17179 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17180 false, load))
17181 return true;
17182
17183 /* In Thumb state, where registers are almost unconstrained, there
17184 is little hope to fix it. */
17185 if (TARGET_THUMB2)
17186 return false;
17187
17188 if (load && commute)
17189 {
17190 /* Try reordering registers. */
17191 std::swap (operands[0], operands[1]);
17192 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17193 false, load))
17194 return true;
17195 }
17196
17197 if (const_store)
17198 {
17199 /* If input registers are dead after this pattern, they can be
17200 reordered or replaced by other registers that are free in the
17201 current pattern. */
17202 if (!peep2_reg_dead_p (4, operands[0])
17203 || !peep2_reg_dead_p (4, operands[1]))
17204 return false;
17205
17206 /* Try to reorder the input registers. */
17207 /* For example, the code
17208 mov r0, 0
17209 mov r1, 1
17210 str r1, [r2]
17211 str r0, [r2, #4]
17212 can be transformed into
17213 mov r1, 0
17214 mov r0, 1
17215 strd r0, [r2]
17216 */
17217 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
17218 false, false))
17219 {
17220 std::swap (operands[0], operands[1]);
17221 return true;
17222 }
17223
17224 /* Try to find a free DI register. */
17225 CLEAR_HARD_REG_SET (regset);
17226 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
17227 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
17228 while (true)
17229 {
17230 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17231 if (tmp == NULL_RTX)
17232 return false;
17233
17234 /* DREG must be an even-numbered register in DImode.
17235 Split it into SI registers. */
17236 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17237 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17238 gcc_assert (operands[0] != NULL_RTX);
17239 gcc_assert (operands[1] != NULL_RTX);
17240 gcc_assert (REGNO (operands[0]) % 2 == 0);
17241 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
17242
17243 return (operands_ok_ldrd_strd (operands[0], operands[1],
17244 base, offset,
17245 false, load));
17246 }
17247 }
17248
17249 return false;
17250 }
17251
17252
17253 /* Return true if parallel execution of the two word-size accesses provided
17254 could be satisfied with a single LDRD/STRD instruction. Two word-size
17255 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17256 register operands and OPERANDS[2,3] are the corresponding memory operands.
17257 */
17258 bool
17259 valid_operands_ldrd_strd (rtx *operands, bool load)
17260 {
17261 int nops = 2;
17262 HOST_WIDE_INT offsets[2], offset, align[2];
17263 rtx base = NULL_RTX;
17264 rtx cur_base, cur_offset;
17265 int i, gap;
17266
17267 /* Check that the memory references are immediate offsets from the
17268 same base register. Extract the base register, the destination
17269 registers, and the corresponding memory offsets. */
17270 for (i = 0; i < nops; i++)
17271 {
17272 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17273 &align[i]))
17274 return false;
17275
17276 if (i == 0)
17277 base = cur_base;
17278 else if (REGNO (base) != REGNO (cur_base))
17279 return false;
17280
17281 offsets[i] = INTVAL (cur_offset);
17282 if (GET_CODE (operands[i]) == SUBREG)
17283 return false;
17284 }
17285
17286 if (offsets[0] > offsets[1])
17287 return false;
17288
17289 gap = offsets[1] - offsets[0];
17290 offset = offsets[0];
17291
17292 /* Make sure accesses are to consecutive memory locations. */
17293 if (gap != GET_MODE_SIZE (SImode))
17294 return false;
17295
17296 if (!align_ok_ldrd_strd (align[0], offset))
17297 return false;
17298
17299 return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17300 false, load);
17301 }
17302
17303 \f
17304 /* Print a symbolic form of X to the debug file, F. */
17305 static void
17306 arm_print_value (FILE *f, rtx x)
17307 {
17308 switch (GET_CODE (x))
17309 {
17310 case CONST_INT:
17311 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17312 return;
17313
17314 case CONST_DOUBLE:
17315 {
17316 char fpstr[20];
17317 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17318 sizeof (fpstr), 0, 1);
17319 fputs (fpstr, f);
17320 }
17321 return;
17322
17323 case CONST_VECTOR:
17324 {
17325 int i;
17326
17327 fprintf (f, "<");
17328 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17329 {
17330 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17331 if (i < (CONST_VECTOR_NUNITS (x) - 1))
17332 fputc (',', f);
17333 }
17334 fprintf (f, ">");
17335 }
17336 return;
17337
17338 case CONST_STRING:
17339 fprintf (f, "\"%s\"", XSTR (x, 0));
17340 return;
17341
17342 case SYMBOL_REF:
17343 fprintf (f, "`%s'", XSTR (x, 0));
17344 return;
17345
17346 case LABEL_REF:
17347 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17348 return;
17349
17350 case CONST:
17351 arm_print_value (f, XEXP (x, 0));
17352 return;
17353
17354 case PLUS:
17355 arm_print_value (f, XEXP (x, 0));
17356 fprintf (f, "+");
17357 arm_print_value (f, XEXP (x, 1));
17358 return;
17359
17360 case PC:
17361 fprintf (f, "pc");
17362 return;
17363
17364 default:
17365 fprintf (f, "????");
17366 return;
17367 }
17368 }
17369 \f
17370 /* Routines for manipulation of the constant pool. */
17371
17372 /* Arm instructions cannot load a large constant directly into a
17373 register; they have to come from a pc relative load. The constant
17374 must therefore be placed in the addressable range of the pc
17375 relative load. Depending on the precise pc relative load
17376 instruction the range is somewhere between 256 bytes and 4k. This
17377 means that we often have to dump a constant inside a function, and
17378 generate code to branch around it.
17379
17380 It is important to minimize this, since the branches will slow
17381 things down and make the code larger.
17382
17383 Normally we can hide the table after an existing unconditional
17384 branch so that there is no interruption of the flow, but in the
17385 worst case the code looks like this:
17386
17387 ldr rn, L1
17388 ...
17389 b L2
17390 align
17391 L1: .long value
17392 L2:
17393 ...
17394
17395 ldr rn, L3
17396 ...
17397 b L4
17398 align
17399 L3: .long value
17400 L4:
17401 ...
17402
17403 We fix this by performing a scan after scheduling, which notices
17404 which instructions need to have their operands fetched from the
17405 constant table and builds the table.
17406
17407 The algorithm starts by building a table of all the constants that
17408 need fixing up and all the natural barriers in the function (places
17409 where a constant table can be dropped without breaking the flow).
17410 For each fixup we note how far the pc-relative replacement will be
17411 able to reach and the offset of the instruction into the function.
17412
17413 Having built the table we then group the fixes together to form
17414 tables that are as large as possible (subject to addressing
17415 constraints) and emit each table of constants after the last
17416 barrier that is within range of all the instructions in the group.
17417 If a group does not contain a barrier, then we forcibly create one
17418 by inserting a jump instruction into the flow. Once the table has
17419 been inserted, the insns are then modified to reference the
17420 relevant entry in the pool.
17421
17422 Possible enhancements to the algorithm (not implemented) are:
17423
17424 1) For some processors and object formats, there may be benefit in
17425 aligning the pools to the start of cache lines; this alignment
17426 would need to be taken into account when calculating addressability
17427 of a pool. */
17428
17429 /* These typedefs are located at the start of this file, so that
17430 they can be used in the prototypes there. This comment is to
17431 remind readers of that fact so that the following structures
17432 can be understood more easily.
17433
17434 typedef struct minipool_node Mnode;
17435 typedef struct minipool_fixup Mfix; */
17436
17437 struct minipool_node
17438 {
17439 /* Doubly linked chain of entries. */
17440 Mnode * next;
17441 Mnode * prev;
17442 /* The maximum offset into the code that this entry can be placed. While
17443 pushing fixes for forward references, all entries are sorted in order
17444 of increasing max_address. */
17445 HOST_WIDE_INT max_address;
17446 /* Similarly for an entry inserted for a backwards ref. */
17447 HOST_WIDE_INT min_address;
17448 /* The number of fixes referencing this entry. This can become zero
17449 if we "unpush" an entry. In this case we ignore the entry when we
17450 come to emit the code. */
17451 int refcount;
17452 /* The offset from the start of the minipool. */
17453 HOST_WIDE_INT offset;
17454 /* The value in table. */
17455 rtx value;
17456 /* The mode of value. */
17457 machine_mode mode;
17458 /* The size of the value. With iWMMXt enabled
17459 sizes > 4 also imply an alignment of 8-bytes. */
17460 int fix_size;
17461 };
17462
17463 struct minipool_fixup
17464 {
17465 Mfix * next;
17466 rtx_insn * insn;
17467 HOST_WIDE_INT address;
17468 rtx * loc;
17469 machine_mode mode;
17470 int fix_size;
17471 rtx value;
17472 Mnode * minipool;
17473 HOST_WIDE_INT forwards;
17474 HOST_WIDE_INT backwards;
17475 };
17476
17477 /* Fixes less than a word need padding out to a word boundary. */
17478 #define MINIPOOL_FIX_SIZE(mode) \
17479 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17480
17481 static Mnode * minipool_vector_head;
17482 static Mnode * minipool_vector_tail;
17483 static rtx_code_label *minipool_vector_label;
17484 static int minipool_pad;
17485
17486 /* The linked list of all minipool fixes required for this function. */
17487 Mfix * minipool_fix_head;
17488 Mfix * minipool_fix_tail;
17489 /* The fix entry for the current minipool, once it has been placed. */
17490 Mfix * minipool_barrier;
17491
17492 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17493 #define JUMP_TABLES_IN_TEXT_SECTION 0
17494 #endif
17495
17496 static HOST_WIDE_INT
17497 get_jump_table_size (rtx_jump_table_data *insn)
17498 {
17499 /* ADDR_VECs only take room if read-only data does into the text
17500 section. */
17501 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17502 {
17503 rtx body = PATTERN (insn);
17504 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17505 HOST_WIDE_INT size;
17506 HOST_WIDE_INT modesize;
17507
17508 modesize = GET_MODE_SIZE (GET_MODE (body));
17509 size = modesize * XVECLEN (body, elt);
17510 switch (modesize)
17511 {
17512 case 1:
17513 /* Round up size of TBB table to a halfword boundary. */
17514 size = (size + 1) & ~HOST_WIDE_INT_1;
17515 break;
17516 case 2:
17517 /* No padding necessary for TBH. */
17518 break;
17519 case 4:
17520 /* Add two bytes for alignment on Thumb. */
17521 if (TARGET_THUMB)
17522 size += 2;
17523 break;
17524 default:
17525 gcc_unreachable ();
17526 }
17527 return size;
17528 }
17529
17530 return 0;
17531 }
17532
17533 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17534 function descriptor) into a register and the GOT address into the
17535 FDPIC register, returning an rtx for the register holding the
17536 function address. */
17537
17538 rtx
17539 arm_load_function_descriptor (rtx funcdesc)
17540 {
17541 rtx fnaddr_reg = gen_reg_rtx (Pmode);
17542 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17543 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17544 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17545
17546 emit_move_insn (fnaddr_reg, fnaddr);
17547
17548 /* The ABI requires the entry point address to be loaded first, but
17549 since we cannot support lazy binding for lack of atomic load of
17550 two 32-bits values, we do not need to bother to prevent the
17551 previous load from being moved after that of the GOT address. */
17552 emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17553
17554 return fnaddr_reg;
17555 }
17556
17557 /* Return the maximum amount of padding that will be inserted before
17558 label LABEL. */
17559 static HOST_WIDE_INT
17560 get_label_padding (rtx label)
17561 {
17562 HOST_WIDE_INT align, min_insn_size;
17563
17564 align = 1 << label_to_alignment (label).levels[0].log;
17565 min_insn_size = TARGET_THUMB ? 2 : 4;
17566 return align > min_insn_size ? align - min_insn_size : 0;
17567 }
17568
17569 /* Move a minipool fix MP from its current location to before MAX_MP.
17570 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17571 constraints may need updating. */
17572 static Mnode *
17573 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17574 HOST_WIDE_INT max_address)
17575 {
17576 /* The code below assumes these are different. */
17577 gcc_assert (mp != max_mp);
17578
17579 if (max_mp == NULL)
17580 {
17581 if (max_address < mp->max_address)
17582 mp->max_address = max_address;
17583 }
17584 else
17585 {
17586 if (max_address > max_mp->max_address - mp->fix_size)
17587 mp->max_address = max_mp->max_address - mp->fix_size;
17588 else
17589 mp->max_address = max_address;
17590
17591 /* Unlink MP from its current position. Since max_mp is non-null,
17592 mp->prev must be non-null. */
17593 mp->prev->next = mp->next;
17594 if (mp->next != NULL)
17595 mp->next->prev = mp->prev;
17596 else
17597 minipool_vector_tail = mp->prev;
17598
17599 /* Re-insert it before MAX_MP. */
17600 mp->next = max_mp;
17601 mp->prev = max_mp->prev;
17602 max_mp->prev = mp;
17603
17604 if (mp->prev != NULL)
17605 mp->prev->next = mp;
17606 else
17607 minipool_vector_head = mp;
17608 }
17609
17610 /* Save the new entry. */
17611 max_mp = mp;
17612
17613 /* Scan over the preceding entries and adjust their addresses as
17614 required. */
17615 while (mp->prev != NULL
17616 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17617 {
17618 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17619 mp = mp->prev;
17620 }
17621
17622 return max_mp;
17623 }
17624
17625 /* Add a constant to the minipool for a forward reference. Returns the
17626 node added or NULL if the constant will not fit in this pool. */
17627 static Mnode *
17628 add_minipool_forward_ref (Mfix *fix)
17629 {
17630 /* If set, max_mp is the first pool_entry that has a lower
17631 constraint than the one we are trying to add. */
17632 Mnode * max_mp = NULL;
17633 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17634 Mnode * mp;
17635
17636 /* If the minipool starts before the end of FIX->INSN then this FIX
17637 cannot be placed into the current pool. Furthermore, adding the
17638 new constant pool entry may cause the pool to start FIX_SIZE bytes
17639 earlier. */
17640 if (minipool_vector_head &&
17641 (fix->address + get_attr_length (fix->insn)
17642 >= minipool_vector_head->max_address - fix->fix_size))
17643 return NULL;
17644
17645 /* Scan the pool to see if a constant with the same value has
17646 already been added. While we are doing this, also note the
17647 location where we must insert the constant if it doesn't already
17648 exist. */
17649 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17650 {
17651 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17652 && fix->mode == mp->mode
17653 && (!LABEL_P (fix->value)
17654 || (CODE_LABEL_NUMBER (fix->value)
17655 == CODE_LABEL_NUMBER (mp->value)))
17656 && rtx_equal_p (fix->value, mp->value))
17657 {
17658 /* More than one fix references this entry. */
17659 mp->refcount++;
17660 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17661 }
17662
17663 /* Note the insertion point if necessary. */
17664 if (max_mp == NULL
17665 && mp->max_address > max_address)
17666 max_mp = mp;
17667
17668 /* If we are inserting an 8-bytes aligned quantity and
17669 we have not already found an insertion point, then
17670 make sure that all such 8-byte aligned quantities are
17671 placed at the start of the pool. */
17672 if (ARM_DOUBLEWORD_ALIGN
17673 && max_mp == NULL
17674 && fix->fix_size >= 8
17675 && mp->fix_size < 8)
17676 {
17677 max_mp = mp;
17678 max_address = mp->max_address;
17679 }
17680 }
17681
17682 /* The value is not currently in the minipool, so we need to create
17683 a new entry for it. If MAX_MP is NULL, the entry will be put on
17684 the end of the list since the placement is less constrained than
17685 any existing entry. Otherwise, we insert the new fix before
17686 MAX_MP and, if necessary, adjust the constraints on the other
17687 entries. */
17688 mp = XNEW (Mnode);
17689 mp->fix_size = fix->fix_size;
17690 mp->mode = fix->mode;
17691 mp->value = fix->value;
17692 mp->refcount = 1;
17693 /* Not yet required for a backwards ref. */
17694 mp->min_address = -65536;
17695
17696 if (max_mp == NULL)
17697 {
17698 mp->max_address = max_address;
17699 mp->next = NULL;
17700 mp->prev = minipool_vector_tail;
17701
17702 if (mp->prev == NULL)
17703 {
17704 minipool_vector_head = mp;
17705 minipool_vector_label = gen_label_rtx ();
17706 }
17707 else
17708 mp->prev->next = mp;
17709
17710 minipool_vector_tail = mp;
17711 }
17712 else
17713 {
17714 if (max_address > max_mp->max_address - mp->fix_size)
17715 mp->max_address = max_mp->max_address - mp->fix_size;
17716 else
17717 mp->max_address = max_address;
17718
17719 mp->next = max_mp;
17720 mp->prev = max_mp->prev;
17721 max_mp->prev = mp;
17722 if (mp->prev != NULL)
17723 mp->prev->next = mp;
17724 else
17725 minipool_vector_head = mp;
17726 }
17727
17728 /* Save the new entry. */
17729 max_mp = mp;
17730
17731 /* Scan over the preceding entries and adjust their addresses as
17732 required. */
17733 while (mp->prev != NULL
17734 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17735 {
17736 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17737 mp = mp->prev;
17738 }
17739
17740 return max_mp;
17741 }
17742
17743 static Mnode *
17744 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17745 HOST_WIDE_INT min_address)
17746 {
17747 HOST_WIDE_INT offset;
17748
17749 /* The code below assumes these are different. */
17750 gcc_assert (mp != min_mp);
17751
17752 if (min_mp == NULL)
17753 {
17754 if (min_address > mp->min_address)
17755 mp->min_address = min_address;
17756 }
17757 else
17758 {
17759 /* We will adjust this below if it is too loose. */
17760 mp->min_address = min_address;
17761
17762 /* Unlink MP from its current position. Since min_mp is non-null,
17763 mp->next must be non-null. */
17764 mp->next->prev = mp->prev;
17765 if (mp->prev != NULL)
17766 mp->prev->next = mp->next;
17767 else
17768 minipool_vector_head = mp->next;
17769
17770 /* Reinsert it after MIN_MP. */
17771 mp->prev = min_mp;
17772 mp->next = min_mp->next;
17773 min_mp->next = mp;
17774 if (mp->next != NULL)
17775 mp->next->prev = mp;
17776 else
17777 minipool_vector_tail = mp;
17778 }
17779
17780 min_mp = mp;
17781
17782 offset = 0;
17783 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17784 {
17785 mp->offset = offset;
17786 if (mp->refcount > 0)
17787 offset += mp->fix_size;
17788
17789 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17790 mp->next->min_address = mp->min_address + mp->fix_size;
17791 }
17792
17793 return min_mp;
17794 }
17795
17796 /* Add a constant to the minipool for a backward reference. Returns the
17797 node added or NULL if the constant will not fit in this pool.
17798
17799 Note that the code for insertion for a backwards reference can be
17800 somewhat confusing because the calculated offsets for each fix do
17801 not take into account the size of the pool (which is still under
17802 construction. */
17803 static Mnode *
17804 add_minipool_backward_ref (Mfix *fix)
17805 {
17806 /* If set, min_mp is the last pool_entry that has a lower constraint
17807 than the one we are trying to add. */
17808 Mnode *min_mp = NULL;
17809 /* This can be negative, since it is only a constraint. */
17810 HOST_WIDE_INT min_address = fix->address - fix->backwards;
17811 Mnode *mp;
17812
17813 /* If we can't reach the current pool from this insn, or if we can't
17814 insert this entry at the end of the pool without pushing other
17815 fixes out of range, then we don't try. This ensures that we
17816 can't fail later on. */
17817 if (min_address >= minipool_barrier->address
17818 || (minipool_vector_tail->min_address + fix->fix_size
17819 >= minipool_barrier->address))
17820 return NULL;
17821
17822 /* Scan the pool to see if a constant with the same value has
17823 already been added. While we are doing this, also note the
17824 location where we must insert the constant if it doesn't already
17825 exist. */
17826 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17827 {
17828 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17829 && fix->mode == mp->mode
17830 && (!LABEL_P (fix->value)
17831 || (CODE_LABEL_NUMBER (fix->value)
17832 == CODE_LABEL_NUMBER (mp->value)))
17833 && rtx_equal_p (fix->value, mp->value)
17834 /* Check that there is enough slack to move this entry to the
17835 end of the table (this is conservative). */
17836 && (mp->max_address
17837 > (minipool_barrier->address
17838 + minipool_vector_tail->offset
17839 + minipool_vector_tail->fix_size)))
17840 {
17841 mp->refcount++;
17842 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17843 }
17844
17845 if (min_mp != NULL)
17846 mp->min_address += fix->fix_size;
17847 else
17848 {
17849 /* Note the insertion point if necessary. */
17850 if (mp->min_address < min_address)
17851 {
17852 /* For now, we do not allow the insertion of 8-byte alignment
17853 requiring nodes anywhere but at the start of the pool. */
17854 if (ARM_DOUBLEWORD_ALIGN
17855 && fix->fix_size >= 8 && mp->fix_size < 8)
17856 return NULL;
17857 else
17858 min_mp = mp;
17859 }
17860 else if (mp->max_address
17861 < minipool_barrier->address + mp->offset + fix->fix_size)
17862 {
17863 /* Inserting before this entry would push the fix beyond
17864 its maximum address (which can happen if we have
17865 re-located a forwards fix); force the new fix to come
17866 after it. */
17867 if (ARM_DOUBLEWORD_ALIGN
17868 && fix->fix_size >= 8 && mp->fix_size < 8)
17869 return NULL;
17870 else
17871 {
17872 min_mp = mp;
17873 min_address = mp->min_address + fix->fix_size;
17874 }
17875 }
17876 /* Do not insert a non-8-byte aligned quantity before 8-byte
17877 aligned quantities. */
17878 else if (ARM_DOUBLEWORD_ALIGN
17879 && fix->fix_size < 8
17880 && mp->fix_size >= 8)
17881 {
17882 min_mp = mp;
17883 min_address = mp->min_address + fix->fix_size;
17884 }
17885 }
17886 }
17887
17888 /* We need to create a new entry. */
17889 mp = XNEW (Mnode);
17890 mp->fix_size = fix->fix_size;
17891 mp->mode = fix->mode;
17892 mp->value = fix->value;
17893 mp->refcount = 1;
17894 mp->max_address = minipool_barrier->address + 65536;
17895
17896 mp->min_address = min_address;
17897
17898 if (min_mp == NULL)
17899 {
17900 mp->prev = NULL;
17901 mp->next = minipool_vector_head;
17902
17903 if (mp->next == NULL)
17904 {
17905 minipool_vector_tail = mp;
17906 minipool_vector_label = gen_label_rtx ();
17907 }
17908 else
17909 mp->next->prev = mp;
17910
17911 minipool_vector_head = mp;
17912 }
17913 else
17914 {
17915 mp->next = min_mp->next;
17916 mp->prev = min_mp;
17917 min_mp->next = mp;
17918
17919 if (mp->next != NULL)
17920 mp->next->prev = mp;
17921 else
17922 minipool_vector_tail = mp;
17923 }
17924
17925 /* Save the new entry. */
17926 min_mp = mp;
17927
17928 if (mp->prev)
17929 mp = mp->prev;
17930 else
17931 mp->offset = 0;
17932
17933 /* Scan over the following entries and adjust their offsets. */
17934 while (mp->next != NULL)
17935 {
17936 if (mp->next->min_address < mp->min_address + mp->fix_size)
17937 mp->next->min_address = mp->min_address + mp->fix_size;
17938
17939 if (mp->refcount)
17940 mp->next->offset = mp->offset + mp->fix_size;
17941 else
17942 mp->next->offset = mp->offset;
17943
17944 mp = mp->next;
17945 }
17946
17947 return min_mp;
17948 }
17949
17950 static void
17951 assign_minipool_offsets (Mfix *barrier)
17952 {
17953 HOST_WIDE_INT offset = 0;
17954 Mnode *mp;
17955
17956 minipool_barrier = barrier;
17957
17958 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17959 {
17960 mp->offset = offset;
17961
17962 if (mp->refcount > 0)
17963 offset += mp->fix_size;
17964 }
17965 }
17966
17967 /* Output the literal table */
17968 static void
17969 dump_minipool (rtx_insn *scan)
17970 {
17971 Mnode * mp;
17972 Mnode * nmp;
17973 int align64 = 0;
17974
17975 if (ARM_DOUBLEWORD_ALIGN)
17976 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17977 if (mp->refcount > 0 && mp->fix_size >= 8)
17978 {
17979 align64 = 1;
17980 break;
17981 }
17982
17983 if (dump_file)
17984 fprintf (dump_file,
17985 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17986 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
17987
17988 scan = emit_label_after (gen_label_rtx (), scan);
17989 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
17990 scan = emit_label_after (minipool_vector_label, scan);
17991
17992 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
17993 {
17994 if (mp->refcount > 0)
17995 {
17996 if (dump_file)
17997 {
17998 fprintf (dump_file,
17999 ";; Offset %u, min %ld, max %ld ",
18000 (unsigned) mp->offset, (unsigned long) mp->min_address,
18001 (unsigned long) mp->max_address);
18002 arm_print_value (dump_file, mp->value);
18003 fputc ('\n', dump_file);
18004 }
18005
18006 rtx val = copy_rtx (mp->value);
18007
18008 switch (GET_MODE_SIZE (mp->mode))
18009 {
18010 #ifdef HAVE_consttable_1
18011 case 1:
18012 scan = emit_insn_after (gen_consttable_1 (val), scan);
18013 break;
18014
18015 #endif
18016 #ifdef HAVE_consttable_2
18017 case 2:
18018 scan = emit_insn_after (gen_consttable_2 (val), scan);
18019 break;
18020
18021 #endif
18022 #ifdef HAVE_consttable_4
18023 case 4:
18024 scan = emit_insn_after (gen_consttable_4 (val), scan);
18025 break;
18026
18027 #endif
18028 #ifdef HAVE_consttable_8
18029 case 8:
18030 scan = emit_insn_after (gen_consttable_8 (val), scan);
18031 break;
18032
18033 #endif
18034 #ifdef HAVE_consttable_16
18035 case 16:
18036 scan = emit_insn_after (gen_consttable_16 (val), scan);
18037 break;
18038
18039 #endif
18040 default:
18041 gcc_unreachable ();
18042 }
18043 }
18044
18045 nmp = mp->next;
18046 free (mp);
18047 }
18048
18049 minipool_vector_head = minipool_vector_tail = NULL;
18050 scan = emit_insn_after (gen_consttable_end (), scan);
18051 scan = emit_barrier_after (scan);
18052 }
18053
18054 /* Return the cost of forcibly inserting a barrier after INSN. */
18055 static int
18056 arm_barrier_cost (rtx_insn *insn)
18057 {
18058 /* Basing the location of the pool on the loop depth is preferable,
18059 but at the moment, the basic block information seems to be
18060 corrupt by this stage of the compilation. */
18061 int base_cost = 50;
18062 rtx_insn *next = next_nonnote_insn (insn);
18063
18064 if (next != NULL && LABEL_P (next))
18065 base_cost -= 20;
18066
18067 switch (GET_CODE (insn))
18068 {
18069 case CODE_LABEL:
18070 /* It will always be better to place the table before the label, rather
18071 than after it. */
18072 return 50;
18073
18074 case INSN:
18075 case CALL_INSN:
18076 return base_cost;
18077
18078 case JUMP_INSN:
18079 return base_cost - 10;
18080
18081 default:
18082 return base_cost + 10;
18083 }
18084 }
18085
18086 /* Find the best place in the insn stream in the range
18087 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18088 Create the barrier by inserting a jump and add a new fix entry for
18089 it. */
18090 static Mfix *
18091 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
18092 {
18093 HOST_WIDE_INT count = 0;
18094 rtx_barrier *barrier;
18095 rtx_insn *from = fix->insn;
18096 /* The instruction after which we will insert the jump. */
18097 rtx_insn *selected = NULL;
18098 int selected_cost;
18099 /* The address at which the jump instruction will be placed. */
18100 HOST_WIDE_INT selected_address;
18101 Mfix * new_fix;
18102 HOST_WIDE_INT max_count = max_address - fix->address;
18103 rtx_code_label *label = gen_label_rtx ();
18104
18105 selected_cost = arm_barrier_cost (from);
18106 selected_address = fix->address;
18107
18108 while (from && count < max_count)
18109 {
18110 rtx_jump_table_data *tmp;
18111 int new_cost;
18112
18113 /* This code shouldn't have been called if there was a natural barrier
18114 within range. */
18115 gcc_assert (!BARRIER_P (from));
18116
18117 /* Count the length of this insn. This must stay in sync with the
18118 code that pushes minipool fixes. */
18119 if (LABEL_P (from))
18120 count += get_label_padding (from);
18121 else
18122 count += get_attr_length (from);
18123
18124 /* If there is a jump table, add its length. */
18125 if (tablejump_p (from, NULL, &tmp))
18126 {
18127 count += get_jump_table_size (tmp);
18128
18129 /* Jump tables aren't in a basic block, so base the cost on
18130 the dispatch insn. If we select this location, we will
18131 still put the pool after the table. */
18132 new_cost = arm_barrier_cost (from);
18133
18134 if (count < max_count
18135 && (!selected || new_cost <= selected_cost))
18136 {
18137 selected = tmp;
18138 selected_cost = new_cost;
18139 selected_address = fix->address + count;
18140 }
18141
18142 /* Continue after the dispatch table. */
18143 from = NEXT_INSN (tmp);
18144 continue;
18145 }
18146
18147 new_cost = arm_barrier_cost (from);
18148
18149 if (count < max_count
18150 && (!selected || new_cost <= selected_cost))
18151 {
18152 selected = from;
18153 selected_cost = new_cost;
18154 selected_address = fix->address + count;
18155 }
18156
18157 from = NEXT_INSN (from);
18158 }
18159
18160 /* Make sure that we found a place to insert the jump. */
18161 gcc_assert (selected);
18162
18163 /* Create a new JUMP_INSN that branches around a barrier. */
18164 from = emit_jump_insn_after (gen_jump (label), selected);
18165 JUMP_LABEL (from) = label;
18166 barrier = emit_barrier_after (from);
18167 emit_label_after (label, barrier);
18168
18169 /* Create a minipool barrier entry for the new barrier. */
18170 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
18171 new_fix->insn = barrier;
18172 new_fix->address = selected_address;
18173 new_fix->next = fix->next;
18174 fix->next = new_fix;
18175
18176 return new_fix;
18177 }
18178
18179 /* Record that there is a natural barrier in the insn stream at
18180 ADDRESS. */
18181 static void
18182 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
18183 {
18184 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18185
18186 fix->insn = insn;
18187 fix->address = address;
18188
18189 fix->next = NULL;
18190 if (minipool_fix_head != NULL)
18191 minipool_fix_tail->next = fix;
18192 else
18193 minipool_fix_head = fix;
18194
18195 minipool_fix_tail = fix;
18196 }
18197
18198 /* Record INSN, which will need fixing up to load a value from the
18199 minipool. ADDRESS is the offset of the insn since the start of the
18200 function; LOC is a pointer to the part of the insn which requires
18201 fixing; VALUE is the constant that must be loaded, which is of type
18202 MODE. */
18203 static void
18204 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
18205 machine_mode mode, rtx value)
18206 {
18207 gcc_assert (!arm_disable_literal_pool);
18208 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18209
18210 fix->insn = insn;
18211 fix->address = address;
18212 fix->loc = loc;
18213 fix->mode = mode;
18214 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
18215 fix->value = value;
18216 fix->forwards = get_attr_pool_range (insn);
18217 fix->backwards = get_attr_neg_pool_range (insn);
18218 fix->minipool = NULL;
18219
18220 /* If an insn doesn't have a range defined for it, then it isn't
18221 expecting to be reworked by this code. Better to stop now than
18222 to generate duff assembly code. */
18223 gcc_assert (fix->forwards || fix->backwards);
18224
18225 /* If an entry requires 8-byte alignment then assume all constant pools
18226 require 4 bytes of padding. Trying to do this later on a per-pool
18227 basis is awkward because existing pool entries have to be modified. */
18228 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
18229 minipool_pad = 4;
18230
18231 if (dump_file)
18232 {
18233 fprintf (dump_file,
18234 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18235 GET_MODE_NAME (mode),
18236 INSN_UID (insn), (unsigned long) address,
18237 -1 * (long)fix->backwards, (long)fix->forwards);
18238 arm_print_value (dump_file, fix->value);
18239 fprintf (dump_file, "\n");
18240 }
18241
18242 /* Add it to the chain of fixes. */
18243 fix->next = NULL;
18244
18245 if (minipool_fix_head != NULL)
18246 minipool_fix_tail->next = fix;
18247 else
18248 minipool_fix_head = fix;
18249
18250 minipool_fix_tail = fix;
18251 }
18252
18253 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18254 Returns the number of insns needed, or 99 if we always want to synthesize
18255 the value. */
18256 int
18257 arm_max_const_double_inline_cost ()
18258 {
18259 return ((optimize_size || arm_ld_sched) ? 3 : 4);
18260 }
18261
18262 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18263 Returns the number of insns needed, or 99 if we don't know how to
18264 do it. */
18265 int
18266 arm_const_double_inline_cost (rtx val)
18267 {
18268 rtx lowpart, highpart;
18269 machine_mode mode;
18270
18271 mode = GET_MODE (val);
18272
18273 if (mode == VOIDmode)
18274 mode = DImode;
18275
18276 gcc_assert (GET_MODE_SIZE (mode) == 8);
18277
18278 lowpart = gen_lowpart (SImode, val);
18279 highpart = gen_highpart_mode (SImode, mode, val);
18280
18281 gcc_assert (CONST_INT_P (lowpart));
18282 gcc_assert (CONST_INT_P (highpart));
18283
18284 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
18285 NULL_RTX, NULL_RTX, 0, 0)
18286 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
18287 NULL_RTX, NULL_RTX, 0, 0));
18288 }
18289
18290 /* Cost of loading a SImode constant. */
18291 static inline int
18292 arm_const_inline_cost (enum rtx_code code, rtx val)
18293 {
18294 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18295 NULL_RTX, NULL_RTX, 1, 0);
18296 }
18297
18298 /* Return true if it is worthwhile to split a 64-bit constant into two
18299 32-bit operations. This is the case if optimizing for size, or
18300 if we have load delay slots, or if one 32-bit part can be done with
18301 a single data operation. */
18302 bool
18303 arm_const_double_by_parts (rtx val)
18304 {
18305 machine_mode mode = GET_MODE (val);
18306 rtx part;
18307
18308 if (optimize_size || arm_ld_sched)
18309 return true;
18310
18311 if (mode == VOIDmode)
18312 mode = DImode;
18313
18314 part = gen_highpart_mode (SImode, mode, val);
18315
18316 gcc_assert (CONST_INT_P (part));
18317
18318 if (const_ok_for_arm (INTVAL (part))
18319 || const_ok_for_arm (~INTVAL (part)))
18320 return true;
18321
18322 part = gen_lowpart (SImode, val);
18323
18324 gcc_assert (CONST_INT_P (part));
18325
18326 if (const_ok_for_arm (INTVAL (part))
18327 || const_ok_for_arm (~INTVAL (part)))
18328 return true;
18329
18330 return false;
18331 }
18332
18333 /* Return true if it is possible to inline both the high and low parts
18334 of a 64-bit constant into 32-bit data processing instructions. */
18335 bool
18336 arm_const_double_by_immediates (rtx val)
18337 {
18338 machine_mode mode = GET_MODE (val);
18339 rtx part;
18340
18341 if (mode == VOIDmode)
18342 mode = DImode;
18343
18344 part = gen_highpart_mode (SImode, mode, val);
18345
18346 gcc_assert (CONST_INT_P (part));
18347
18348 if (!const_ok_for_arm (INTVAL (part)))
18349 return false;
18350
18351 part = gen_lowpart (SImode, val);
18352
18353 gcc_assert (CONST_INT_P (part));
18354
18355 if (!const_ok_for_arm (INTVAL (part)))
18356 return false;
18357
18358 return true;
18359 }
18360
18361 /* Scan INSN and note any of its operands that need fixing.
18362 If DO_PUSHES is false we do not actually push any of the fixups
18363 needed. */
18364 static void
18365 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18366 {
18367 int opno;
18368
18369 extract_constrain_insn (insn);
18370
18371 if (recog_data.n_alternatives == 0)
18372 return;
18373
18374 /* Fill in recog_op_alt with information about the constraints of
18375 this insn. */
18376 preprocess_constraints (insn);
18377
18378 const operand_alternative *op_alt = which_op_alt ();
18379 for (opno = 0; opno < recog_data.n_operands; opno++)
18380 {
18381 /* Things we need to fix can only occur in inputs. */
18382 if (recog_data.operand_type[opno] != OP_IN)
18383 continue;
18384
18385 /* If this alternative is a memory reference, then any mention
18386 of constants in this alternative is really to fool reload
18387 into allowing us to accept one there. We need to fix them up
18388 now so that we output the right code. */
18389 if (op_alt[opno].memory_ok)
18390 {
18391 rtx op = recog_data.operand[opno];
18392
18393 if (CONSTANT_P (op))
18394 {
18395 if (do_pushes)
18396 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18397 recog_data.operand_mode[opno], op);
18398 }
18399 else if (MEM_P (op)
18400 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18401 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18402 {
18403 if (do_pushes)
18404 {
18405 rtx cop = avoid_constant_pool_reference (op);
18406
18407 /* Casting the address of something to a mode narrower
18408 than a word can cause avoid_constant_pool_reference()
18409 to return the pool reference itself. That's no good to
18410 us here. Lets just hope that we can use the
18411 constant pool value directly. */
18412 if (op == cop)
18413 cop = get_pool_constant (XEXP (op, 0));
18414
18415 push_minipool_fix (insn, address,
18416 recog_data.operand_loc[opno],
18417 recog_data.operand_mode[opno], cop);
18418 }
18419
18420 }
18421 }
18422 }
18423
18424 return;
18425 }
18426
18427 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18428 and unions in the context of ARMv8-M Security Extensions. It is used as a
18429 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18430 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18431 or four masks, depending on whether it is being computed for a
18432 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18433 respectively. The tree for the type of the argument or a field within an
18434 argument is passed in ARG_TYPE, the current register this argument or field
18435 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18436 argument or field starts at is passed in STARTING_BIT and the last used bit
18437 is kept in LAST_USED_BIT which is also updated accordingly. */
18438
18439 static unsigned HOST_WIDE_INT
18440 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18441 uint32_t * padding_bits_to_clear,
18442 unsigned starting_bit, int * last_used_bit)
18443
18444 {
18445 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18446
18447 if (TREE_CODE (arg_type) == RECORD_TYPE)
18448 {
18449 unsigned current_bit = starting_bit;
18450 tree field;
18451 long int offset, size;
18452
18453
18454 field = TYPE_FIELDS (arg_type);
18455 while (field)
18456 {
18457 /* The offset within a structure is always an offset from
18458 the start of that structure. Make sure we take that into the
18459 calculation of the register based offset that we use here. */
18460 offset = starting_bit;
18461 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18462 offset %= 32;
18463
18464 /* This is the actual size of the field, for bitfields this is the
18465 bitfield width and not the container size. */
18466 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18467
18468 if (*last_used_bit != offset)
18469 {
18470 if (offset < *last_used_bit)
18471 {
18472 /* This field's offset is before the 'last_used_bit', that
18473 means this field goes on the next register. So we need to
18474 pad the rest of the current register and increase the
18475 register number. */
18476 uint32_t mask;
18477 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18478 mask++;
18479
18480 padding_bits_to_clear[*regno] |= mask;
18481 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18482 (*regno)++;
18483 }
18484 else
18485 {
18486 /* Otherwise we pad the bits between the last field's end and
18487 the start of the new field. */
18488 uint32_t mask;
18489
18490 mask = ((uint32_t)-1) >> (32 - offset);
18491 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18492 padding_bits_to_clear[*regno] |= mask;
18493 }
18494 current_bit = offset;
18495 }
18496
18497 /* Calculate further padding bits for inner structs/unions too. */
18498 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18499 {
18500 *last_used_bit = current_bit;
18501 not_to_clear_reg_mask
18502 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18503 padding_bits_to_clear, offset,
18504 last_used_bit);
18505 }
18506 else
18507 {
18508 /* Update 'current_bit' with this field's size. If the
18509 'current_bit' lies in a subsequent register, update 'regno' and
18510 reset 'current_bit' to point to the current bit in that new
18511 register. */
18512 current_bit += size;
18513 while (current_bit >= 32)
18514 {
18515 current_bit-=32;
18516 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18517 (*regno)++;
18518 }
18519 *last_used_bit = current_bit;
18520 }
18521
18522 field = TREE_CHAIN (field);
18523 }
18524 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18525 }
18526 else if (TREE_CODE (arg_type) == UNION_TYPE)
18527 {
18528 tree field, field_t;
18529 int i, regno_t, field_size;
18530 int max_reg = -1;
18531 int max_bit = -1;
18532 uint32_t mask;
18533 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18534 = {-1, -1, -1, -1};
18535
18536 /* To compute the padding bits in a union we only consider bits as
18537 padding bits if they are always either a padding bit or fall outside a
18538 fields size for all fields in the union. */
18539 field = TYPE_FIELDS (arg_type);
18540 while (field)
18541 {
18542 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18543 = {0U, 0U, 0U, 0U};
18544 int last_used_bit_t = *last_used_bit;
18545 regno_t = *regno;
18546 field_t = TREE_TYPE (field);
18547
18548 /* If the field's type is either a record or a union make sure to
18549 compute their padding bits too. */
18550 if (RECORD_OR_UNION_TYPE_P (field_t))
18551 not_to_clear_reg_mask
18552 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18553 &padding_bits_to_clear_t[0],
18554 starting_bit, &last_used_bit_t);
18555 else
18556 {
18557 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18558 regno_t = (field_size / 32) + *regno;
18559 last_used_bit_t = (starting_bit + field_size) % 32;
18560 }
18561
18562 for (i = *regno; i < regno_t; i++)
18563 {
18564 /* For all but the last register used by this field only keep the
18565 padding bits that were padding bits in this field. */
18566 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18567 }
18568
18569 /* For the last register, keep all padding bits that were padding
18570 bits in this field and any padding bits that are still valid
18571 as padding bits but fall outside of this field's size. */
18572 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18573 padding_bits_to_clear_res[regno_t]
18574 &= padding_bits_to_clear_t[regno_t] | mask;
18575
18576 /* Update the maximum size of the fields in terms of registers used
18577 ('max_reg') and the 'last_used_bit' in said register. */
18578 if (max_reg < regno_t)
18579 {
18580 max_reg = regno_t;
18581 max_bit = last_used_bit_t;
18582 }
18583 else if (max_reg == regno_t && max_bit < last_used_bit_t)
18584 max_bit = last_used_bit_t;
18585
18586 field = TREE_CHAIN (field);
18587 }
18588
18589 /* Update the current padding_bits_to_clear using the intersection of the
18590 padding bits of all the fields. */
18591 for (i=*regno; i < max_reg; i++)
18592 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18593
18594 /* Do not keep trailing padding bits, we do not know yet whether this
18595 is the end of the argument. */
18596 mask = ((uint32_t) 1 << max_bit) - 1;
18597 padding_bits_to_clear[max_reg]
18598 |= padding_bits_to_clear_res[max_reg] & mask;
18599
18600 *regno = max_reg;
18601 *last_used_bit = max_bit;
18602 }
18603 else
18604 /* This function should only be used for structs and unions. */
18605 gcc_unreachable ();
18606
18607 return not_to_clear_reg_mask;
18608 }
18609
18610 /* In the context of ARMv8-M Security Extensions, this function is used for both
18611 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18612 registers are used when returning or passing arguments, which is then
18613 returned as a mask. It will also compute a mask to indicate padding/unused
18614 bits for each of these registers, and passes this through the
18615 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18616 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18617 the starting register used to pass this argument or return value is passed
18618 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18619 for struct and union types. */
18620
18621 static unsigned HOST_WIDE_INT
18622 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18623 uint32_t * padding_bits_to_clear)
18624
18625 {
18626 int last_used_bit = 0;
18627 unsigned HOST_WIDE_INT not_to_clear_mask;
18628
18629 if (RECORD_OR_UNION_TYPE_P (arg_type))
18630 {
18631 not_to_clear_mask
18632 = comp_not_to_clear_mask_str_un (arg_type, &regno,
18633 padding_bits_to_clear, 0,
18634 &last_used_bit);
18635
18636
18637 /* If the 'last_used_bit' is not zero, that means we are still using a
18638 part of the last 'regno'. In such cases we must clear the trailing
18639 bits. Otherwise we are not using regno and we should mark it as to
18640 clear. */
18641 if (last_used_bit != 0)
18642 padding_bits_to_clear[regno]
18643 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18644 else
18645 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18646 }
18647 else
18648 {
18649 not_to_clear_mask = 0;
18650 /* We are not dealing with structs nor unions. So these arguments may be
18651 passed in floating point registers too. In some cases a BLKmode is
18652 used when returning or passing arguments in multiple VFP registers. */
18653 if (GET_MODE (arg_rtx) == BLKmode)
18654 {
18655 int i, arg_regs;
18656 rtx reg;
18657
18658 /* This should really only occur when dealing with the hard-float
18659 ABI. */
18660 gcc_assert (TARGET_HARD_FLOAT_ABI);
18661
18662 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18663 {
18664 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18665 gcc_assert (REG_P (reg));
18666
18667 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18668
18669 /* If we are dealing with DF mode, make sure we don't
18670 clear either of the registers it addresses. */
18671 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18672 if (arg_regs > 1)
18673 {
18674 unsigned HOST_WIDE_INT mask;
18675 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18676 mask -= HOST_WIDE_INT_1U << REGNO (reg);
18677 not_to_clear_mask |= mask;
18678 }
18679 }
18680 }
18681 else
18682 {
18683 /* Otherwise we can rely on the MODE to determine how many registers
18684 are being used by this argument. */
18685 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18686 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18687 if (arg_regs > 1)
18688 {
18689 unsigned HOST_WIDE_INT
18690 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18691 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18692 not_to_clear_mask |= mask;
18693 }
18694 }
18695 }
18696
18697 return not_to_clear_mask;
18698 }
18699
18700 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18701 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18702 are to be fully cleared, using the value in register CLEARING_REG if more
18703 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18704 the bits that needs to be cleared in caller-saved core registers, with
18705 SCRATCH_REG used as a scratch register for that clearing.
18706
18707 NOTE: one of three following assertions must hold:
18708 - SCRATCH_REG is a low register
18709 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18710 in TO_CLEAR_BITMAP)
18711 - CLEARING_REG is a low register. */
18712
18713 static void
18714 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18715 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18716 {
18717 bool saved_clearing = false;
18718 rtx saved_clearing_reg = NULL_RTX;
18719 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18720
18721 gcc_assert (arm_arch_cmse);
18722
18723 if (!bitmap_empty_p (to_clear_bitmap))
18724 {
18725 minregno = bitmap_first_set_bit (to_clear_bitmap);
18726 maxregno = bitmap_last_set_bit (to_clear_bitmap);
18727 }
18728 clearing_regno = REGNO (clearing_reg);
18729
18730 /* Clear padding bits. */
18731 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18732 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18733 {
18734 uint64_t mask;
18735 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18736
18737 if (padding_bits_to_clear[i] == 0)
18738 continue;
18739
18740 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18741 CLEARING_REG as scratch. */
18742 if (TARGET_THUMB1
18743 && REGNO (scratch_reg) > LAST_LO_REGNUM)
18744 {
18745 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18746 such that we can use clearing_reg to clear the unused bits in the
18747 arguments. */
18748 if ((clearing_regno > maxregno
18749 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18750 && !saved_clearing)
18751 {
18752 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18753 emit_move_insn (scratch_reg, clearing_reg);
18754 saved_clearing = true;
18755 saved_clearing_reg = scratch_reg;
18756 }
18757 scratch_reg = clearing_reg;
18758 }
18759
18760 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18761 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18762 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18763
18764 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18765 mask = (~padding_bits_to_clear[i]) >> 16;
18766 rtx16 = gen_int_mode (16, SImode);
18767 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18768 if (mask)
18769 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18770
18771 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18772 }
18773 if (saved_clearing)
18774 emit_move_insn (clearing_reg, saved_clearing_reg);
18775
18776
18777 /* Clear full registers. */
18778
18779 if (TARGET_HAVE_FPCXT_CMSE)
18780 {
18781 rtvec vunspec_vec;
18782 int i, j, k, nb_regs;
18783 rtx use_seq, par, reg, set, vunspec;
18784 int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18785 auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18786 auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18787
18788 for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18789 {
18790 /* Find next register to clear and exit if none. */
18791 for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18792 if (i > maxregno)
18793 break;
18794
18795 /* Compute number of consecutive registers to clear. */
18796 for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18797 j++);
18798 nb_regs = j - i;
18799
18800 /* Create VSCCLRM RTX pattern. */
18801 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18802 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18803 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18804 VUNSPEC_VSCCLRM_VPR);
18805 XVECEXP (par, 0, 0) = vunspec;
18806
18807 /* Insert VFP register clearing RTX in the pattern. */
18808 start_sequence ();
18809 for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18810 {
18811 if (!bitmap_bit_p (to_clear_bitmap, j))
18812 continue;
18813
18814 reg = gen_rtx_REG (SFmode, j);
18815 set = gen_rtx_SET (reg, const0_rtx);
18816 XVECEXP (par, 0, k++) = set;
18817 emit_use (reg);
18818 }
18819 use_seq = get_insns ();
18820 end_sequence ();
18821
18822 emit_insn_after (use_seq, emit_insn (par));
18823 }
18824
18825 /* Get set of core registers to clear. */
18826 bitmap_clear (core_regs_bitmap);
18827 bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18828 IP_REGNUM - R0_REGNUM + 1);
18829 bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18830 core_regs_bitmap);
18831 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18832
18833 if (bitmap_empty_p (to_clear_core_bitmap))
18834 return;
18835
18836 /* Create clrm RTX pattern. */
18837 nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18838 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18839
18840 /* Insert core register clearing RTX in the pattern. */
18841 start_sequence ();
18842 for (j = 0, i = minregno; j < nb_regs; i++)
18843 {
18844 if (!bitmap_bit_p (to_clear_core_bitmap, i))
18845 continue;
18846
18847 reg = gen_rtx_REG (SImode, i);
18848 set = gen_rtx_SET (reg, const0_rtx);
18849 XVECEXP (par, 0, j++) = set;
18850 emit_use (reg);
18851 }
18852
18853 /* Insert APSR register clearing RTX in the pattern
18854 * along with clobbering CC. */
18855 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18856 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18857 VUNSPEC_CLRM_APSR);
18858
18859 XVECEXP (par, 0, j++) = vunspec;
18860
18861 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18862 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18863 XVECEXP (par, 0, j) = clobber;
18864
18865 use_seq = get_insns ();
18866 end_sequence ();
18867
18868 emit_insn_after (use_seq, emit_insn (par));
18869 }
18870 else
18871 {
18872 /* If not marked for clearing, clearing_reg already does not contain
18873 any secret. */
18874 if (clearing_regno <= maxregno
18875 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18876 {
18877 emit_move_insn (clearing_reg, const0_rtx);
18878 emit_use (clearing_reg);
18879 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18880 }
18881
18882 for (regno = minregno; regno <= maxregno; regno++)
18883 {
18884 if (!bitmap_bit_p (to_clear_bitmap, regno))
18885 continue;
18886
18887 if (IS_VFP_REGNUM (regno))
18888 {
18889 /* If regno is an even vfp register and its successor is also to
18890 be cleared, use vmov. */
18891 if (TARGET_VFP_DOUBLE
18892 && VFP_REGNO_OK_FOR_DOUBLE (regno)
18893 && bitmap_bit_p (to_clear_bitmap, regno + 1))
18894 {
18895 emit_move_insn (gen_rtx_REG (DFmode, regno),
18896 CONST1_RTX (DFmode));
18897 emit_use (gen_rtx_REG (DFmode, regno));
18898 regno++;
18899 }
18900 else
18901 {
18902 emit_move_insn (gen_rtx_REG (SFmode, regno),
18903 CONST1_RTX (SFmode));
18904 emit_use (gen_rtx_REG (SFmode, regno));
18905 }
18906 }
18907 else
18908 {
18909 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18910 emit_use (gen_rtx_REG (SImode, regno));
18911 }
18912 }
18913 }
18914 }
18915
18916 /* Clear core and caller-saved VFP registers not used to pass arguments before
18917 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
18918 registers is done in the __gnu_cmse_nonsecure_call libcall. See
18919 libgcc/config/arm/cmse_nonsecure_call.S. */
18920
18921 static void
18922 cmse_nonsecure_call_inline_register_clear (void)
18923 {
18924 basic_block bb;
18925
18926 FOR_EACH_BB_FN (bb, cfun)
18927 {
18928 rtx_insn *insn;
18929
18930 FOR_BB_INSNS (bb, insn)
18931 {
18932 bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18933 /* frame = VFP regs + FPSCR + VPR. */
18934 unsigned lazy_store_stack_frame_size
18935 = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18936 unsigned long callee_saved_mask
18937 = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18938 & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18939 unsigned address_regnum, regno;
18940 unsigned max_int_regno
18941 = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
18942 unsigned max_fp_regno
18943 = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
18944 unsigned maxregno
18945 = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
18946 auto_sbitmap to_clear_bitmap (maxregno + 1);
18947 rtx_insn *seq;
18948 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
18949 rtx address;
18950 CUMULATIVE_ARGS args_so_far_v;
18951 cumulative_args_t args_so_far;
18952 tree arg_type, fntype;
18953 bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
18954 function_args_iterator args_iter;
18955 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
18956
18957 if (!NONDEBUG_INSN_P (insn))
18958 continue;
18959
18960 if (!CALL_P (insn))
18961 continue;
18962
18963 pat = PATTERN (insn);
18964 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
18965 call = XVECEXP (pat, 0, 0);
18966
18967 /* Get the real call RTX if the insn sets a value, ie. returns. */
18968 if (GET_CODE (call) == SET)
18969 call = SET_SRC (call);
18970
18971 /* Check if it is a cmse_nonsecure_call. */
18972 unspec = XEXP (call, 0);
18973 if (GET_CODE (unspec) != UNSPEC
18974 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
18975 continue;
18976
18977 /* Mark registers that needs to be cleared. Those that holds a
18978 parameter are removed from the set further below. */
18979 bitmap_clear (to_clear_bitmap);
18980 bitmap_set_range (to_clear_bitmap, R0_REGNUM,
18981 max_int_regno - R0_REGNUM + 1);
18982
18983 /* Only look at the caller-saved floating point registers in case of
18984 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
18985 lazy store and loads which clear both caller- and callee-saved
18986 registers. */
18987 if (!lazy_fpclear)
18988 {
18989 auto_sbitmap float_bitmap (maxregno + 1);
18990
18991 bitmap_clear (float_bitmap);
18992 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
18993 max_fp_regno - FIRST_VFP_REGNUM + 1);
18994 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
18995 }
18996
18997 /* Make sure the register used to hold the function address is not
18998 cleared. */
18999 address = RTVEC_ELT (XVEC (unspec, 0), 0);
19000 gcc_assert (MEM_P (address));
19001 gcc_assert (REG_P (XEXP (address, 0)));
19002 address_regnum = REGNO (XEXP (address, 0));
19003 if (address_regnum <= max_int_regno)
19004 bitmap_clear_bit (to_clear_bitmap, address_regnum);
19005
19006 /* Set basic block of call insn so that df rescan is performed on
19007 insns inserted here. */
19008 set_block_for_insn (insn, bb);
19009 df_set_flags (DF_DEFER_INSN_RESCAN);
19010 start_sequence ();
19011
19012 /* Make sure the scheduler doesn't schedule other insns beyond
19013 here. */
19014 emit_insn (gen_blockage ());
19015
19016 /* Walk through all arguments and clear registers appropriately.
19017 */
19018 fntype = TREE_TYPE (MEM_EXPR (address));
19019 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
19020 NULL_TREE);
19021 args_so_far = pack_cumulative_args (&args_so_far_v);
19022 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
19023 {
19024 rtx arg_rtx;
19025 uint64_t to_clear_args_mask;
19026
19027 if (VOID_TYPE_P (arg_type))
19028 continue;
19029
19030 function_arg_info arg (arg_type, /*named=*/true);
19031 if (!first_param)
19032 /* ??? We should advance after processing the argument and pass
19033 the argument we're advancing past. */
19034 arm_function_arg_advance (args_so_far, arg);
19035
19036 arg_rtx = arm_function_arg (args_so_far, arg);
19037 gcc_assert (REG_P (arg_rtx));
19038 to_clear_args_mask
19039 = compute_not_to_clear_mask (arg_type, arg_rtx,
19040 REGNO (arg_rtx),
19041 &padding_bits_to_clear[0]);
19042 if (to_clear_args_mask)
19043 {
19044 for (regno = R0_REGNUM; regno <= maxregno; regno++)
19045 {
19046 if (to_clear_args_mask & (1ULL << regno))
19047 bitmap_clear_bit (to_clear_bitmap, regno);
19048 }
19049 }
19050
19051 first_param = false;
19052 }
19053
19054 /* We use right shift and left shift to clear the LSB of the address
19055 we jump to instead of using bic, to avoid having to use an extra
19056 register on Thumb-1. */
19057 clearing_reg = XEXP (address, 0);
19058 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
19059 emit_insn (gen_rtx_SET (clearing_reg, shift));
19060 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
19061 emit_insn (gen_rtx_SET (clearing_reg, shift));
19062
19063 if (clear_callee_saved)
19064 {
19065 rtx push_insn =
19066 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
19067 /* Disable frame debug info in push because it needs to be
19068 disabled for pop (see below). */
19069 RTX_FRAME_RELATED_P (push_insn) = 0;
19070
19071 /* Lazy store multiple. */
19072 if (lazy_fpclear)
19073 {
19074 rtx imm;
19075 rtx_insn *add_insn;
19076
19077 imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
19078 add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19079 stack_pointer_rtx, imm));
19080 /* If we have the frame pointer, then it will be the
19081 CFA reg. Otherwise, the stack pointer is the CFA
19082 reg, so we need to emit a CFA adjust. */
19083 if (!frame_pointer_needed)
19084 arm_add_cfa_adjust_cfa_note (add_insn,
19085 - lazy_store_stack_frame_size,
19086 stack_pointer_rtx,
19087 stack_pointer_rtx);
19088 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
19089 }
19090 /* Save VFP callee-saved registers. */
19091 else
19092 {
19093 vfp_emit_fstmd (D7_VFP_REGNUM + 1,
19094 (max_fp_regno - D7_VFP_REGNUM) / 2);
19095 /* Disable frame debug info in push because it needs to be
19096 disabled for vpop (see below). */
19097 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19098 }
19099 }
19100
19101 /* Clear caller-saved registers that leak before doing a non-secure
19102 call. */
19103 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
19104 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
19105 NUM_ARG_REGS, ip_reg, clearing_reg);
19106
19107 seq = get_insns ();
19108 end_sequence ();
19109 emit_insn_before (seq, insn);
19110
19111 if (TARGET_HAVE_FPCXT_CMSE)
19112 {
19113 rtx_insn *last, *pop_insn, *after = insn;
19114
19115 start_sequence ();
19116
19117 /* Lazy load multiple done as part of libcall in Armv8-M. */
19118 if (lazy_fpclear)
19119 {
19120 rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
19121 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
19122 rtx_insn *add_insn =
19123 emit_insn (gen_addsi3 (stack_pointer_rtx,
19124 stack_pointer_rtx, imm));
19125 if (!frame_pointer_needed)
19126 arm_add_cfa_adjust_cfa_note (add_insn,
19127 lazy_store_stack_frame_size,
19128 stack_pointer_rtx,
19129 stack_pointer_rtx);
19130 }
19131 /* Restore VFP callee-saved registers. */
19132 else
19133 {
19134 int nb_callee_saved_vfp_regs =
19135 (max_fp_regno - D7_VFP_REGNUM) / 2;
19136 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
19137 nb_callee_saved_vfp_regs,
19138 stack_pointer_rtx);
19139 /* Disable frame debug info in vpop because the SP adjustment
19140 is made using a CFA adjustment note while CFA used is
19141 sometimes R7. This then causes an assert failure in the
19142 CFI note creation code. */
19143 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19144 }
19145
19146 arm_emit_multi_reg_pop (callee_saved_mask);
19147 pop_insn = get_last_insn ();
19148
19149 /* Disable frame debug info in pop because they reset the state
19150 of popped registers to what it was at the beginning of the
19151 function, before the prologue. This leads to incorrect state
19152 when doing the pop after the nonsecure call for registers that
19153 are pushed both in prologue and before the nonsecure call.
19154
19155 It also occasionally triggers an assert failure in CFI note
19156 creation code when there are two codepaths to the epilogue,
19157 one of which does not go through the nonsecure call.
19158 Obviously this mean that debugging between the push and pop is
19159 not reliable. */
19160 RTX_FRAME_RELATED_P (pop_insn) = 0;
19161
19162 seq = get_insns ();
19163 last = get_last_insn ();
19164 end_sequence ();
19165
19166 emit_insn_after (seq, after);
19167
19168 /* Skip pop we have just inserted after nonsecure call, we know
19169 it does not contain a nonsecure call. */
19170 insn = last;
19171 }
19172 }
19173 }
19174 }
19175
19176 /* Rewrite move insn into subtract of 0 if the condition codes will
19177 be useful in next conditional jump insn. */
19178
19179 static void
19180 thumb1_reorg (void)
19181 {
19182 basic_block bb;
19183
19184 FOR_EACH_BB_FN (bb, cfun)
19185 {
19186 rtx dest, src;
19187 rtx cmp, op0, op1, set = NULL;
19188 rtx_insn *prev, *insn = BB_END (bb);
19189 bool insn_clobbered = false;
19190
19191 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
19192 insn = PREV_INSN (insn);
19193
19194 /* Find the last cbranchsi4_insn in basic block BB. */
19195 if (insn == BB_HEAD (bb)
19196 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19197 continue;
19198
19199 /* Get the register with which we are comparing. */
19200 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
19201 op0 = XEXP (cmp, 0);
19202 op1 = XEXP (cmp, 1);
19203
19204 /* Check that comparison is against ZERO. */
19205 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
19206 continue;
19207
19208 /* Find the first flag setting insn before INSN in basic block BB. */
19209 gcc_assert (insn != BB_HEAD (bb));
19210 for (prev = PREV_INSN (insn);
19211 (!insn_clobbered
19212 && prev != BB_HEAD (bb)
19213 && (NOTE_P (prev)
19214 || DEBUG_INSN_P (prev)
19215 || ((set = single_set (prev)) != NULL
19216 && get_attr_conds (prev) == CONDS_NOCOND)));
19217 prev = PREV_INSN (prev))
19218 {
19219 if (reg_set_p (op0, prev))
19220 insn_clobbered = true;
19221 }
19222
19223 /* Skip if op0 is clobbered by insn other than prev. */
19224 if (insn_clobbered)
19225 continue;
19226
19227 if (!set)
19228 continue;
19229
19230 dest = SET_DEST (set);
19231 src = SET_SRC (set);
19232 if (!low_register_operand (dest, SImode)
19233 || !low_register_operand (src, SImode))
19234 continue;
19235
19236 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19237 in INSN. Both src and dest of the move insn are checked. */
19238 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
19239 {
19240 dest = copy_rtx (dest);
19241 src = copy_rtx (src);
19242 src = gen_rtx_MINUS (SImode, src, const0_rtx);
19243 PATTERN (prev) = gen_rtx_SET (dest, src);
19244 INSN_CODE (prev) = -1;
19245 /* Set test register in INSN to dest. */
19246 XEXP (cmp, 0) = copy_rtx (dest);
19247 INSN_CODE (insn) = -1;
19248 }
19249 }
19250 }
19251
19252 /* Convert instructions to their cc-clobbering variant if possible, since
19253 that allows us to use smaller encodings. */
19254
19255 static void
19256 thumb2_reorg (void)
19257 {
19258 basic_block bb;
19259 regset_head live;
19260
19261 INIT_REG_SET (&live);
19262
19263 /* We are freeing block_for_insn in the toplev to keep compatibility
19264 with old MDEP_REORGS that are not CFG based. Recompute it now. */
19265 compute_bb_for_insn ();
19266 df_analyze ();
19267
19268 enum Convert_Action {SKIP, CONV, SWAP_CONV};
19269
19270 FOR_EACH_BB_FN (bb, cfun)
19271 {
19272 if ((current_tune->disparage_flag_setting_t16_encodings
19273 == tune_params::DISPARAGE_FLAGS_ALL)
19274 && optimize_bb_for_speed_p (bb))
19275 continue;
19276
19277 rtx_insn *insn;
19278 Convert_Action action = SKIP;
19279 Convert_Action action_for_partial_flag_setting
19280 = ((current_tune->disparage_flag_setting_t16_encodings
19281 != tune_params::DISPARAGE_FLAGS_NEITHER)
19282 && optimize_bb_for_speed_p (bb))
19283 ? SKIP : CONV;
19284
19285 COPY_REG_SET (&live, DF_LR_OUT (bb));
19286 df_simulate_initialize_backwards (bb, &live);
19287 FOR_BB_INSNS_REVERSE (bb, insn)
19288 {
19289 if (NONJUMP_INSN_P (insn)
19290 && !REGNO_REG_SET_P (&live, CC_REGNUM)
19291 && GET_CODE (PATTERN (insn)) == SET)
19292 {
19293 action = SKIP;
19294 rtx pat = PATTERN (insn);
19295 rtx dst = XEXP (pat, 0);
19296 rtx src = XEXP (pat, 1);
19297 rtx op0 = NULL_RTX, op1 = NULL_RTX;
19298
19299 if (UNARY_P (src) || BINARY_P (src))
19300 op0 = XEXP (src, 0);
19301
19302 if (BINARY_P (src))
19303 op1 = XEXP (src, 1);
19304
19305 if (low_register_operand (dst, SImode))
19306 {
19307 switch (GET_CODE (src))
19308 {
19309 case PLUS:
19310 /* Adding two registers and storing the result
19311 in the first source is already a 16-bit
19312 operation. */
19313 if (rtx_equal_p (dst, op0)
19314 && register_operand (op1, SImode))
19315 break;
19316
19317 if (low_register_operand (op0, SImode))
19318 {
19319 /* ADDS <Rd>,<Rn>,<Rm> */
19320 if (low_register_operand (op1, SImode))
19321 action = CONV;
19322 /* ADDS <Rdn>,#<imm8> */
19323 /* SUBS <Rdn>,#<imm8> */
19324 else if (rtx_equal_p (dst, op0)
19325 && CONST_INT_P (op1)
19326 && IN_RANGE (INTVAL (op1), -255, 255))
19327 action = CONV;
19328 /* ADDS <Rd>,<Rn>,#<imm3> */
19329 /* SUBS <Rd>,<Rn>,#<imm3> */
19330 else if (CONST_INT_P (op1)
19331 && IN_RANGE (INTVAL (op1), -7, 7))
19332 action = CONV;
19333 }
19334 /* ADCS <Rd>, <Rn> */
19335 else if (GET_CODE (XEXP (src, 0)) == PLUS
19336 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19337 && low_register_operand (XEXP (XEXP (src, 0), 1),
19338 SImode)
19339 && COMPARISON_P (op1)
19340 && cc_register (XEXP (op1, 0), VOIDmode)
19341 && maybe_get_arm_condition_code (op1) == ARM_CS
19342 && XEXP (op1, 1) == const0_rtx)
19343 action = CONV;
19344 break;
19345
19346 case MINUS:
19347 /* RSBS <Rd>,<Rn>,#0
19348 Not handled here: see NEG below. */
19349 /* SUBS <Rd>,<Rn>,#<imm3>
19350 SUBS <Rdn>,#<imm8>
19351 Not handled here: see PLUS above. */
19352 /* SUBS <Rd>,<Rn>,<Rm> */
19353 if (low_register_operand (op0, SImode)
19354 && low_register_operand (op1, SImode))
19355 action = CONV;
19356 break;
19357
19358 case MULT:
19359 /* MULS <Rdm>,<Rn>,<Rdm>
19360 As an exception to the rule, this is only used
19361 when optimizing for size since MULS is slow on all
19362 known implementations. We do not even want to use
19363 MULS in cold code, if optimizing for speed, so we
19364 test the global flag here. */
19365 if (!optimize_size)
19366 break;
19367 /* Fall through. */
19368 case AND:
19369 case IOR:
19370 case XOR:
19371 /* ANDS <Rdn>,<Rm> */
19372 if (rtx_equal_p (dst, op0)
19373 && low_register_operand (op1, SImode))
19374 action = action_for_partial_flag_setting;
19375 else if (rtx_equal_p (dst, op1)
19376 && low_register_operand (op0, SImode))
19377 action = action_for_partial_flag_setting == SKIP
19378 ? SKIP : SWAP_CONV;
19379 break;
19380
19381 case ASHIFTRT:
19382 case ASHIFT:
19383 case LSHIFTRT:
19384 /* ASRS <Rdn>,<Rm> */
19385 /* LSRS <Rdn>,<Rm> */
19386 /* LSLS <Rdn>,<Rm> */
19387 if (rtx_equal_p (dst, op0)
19388 && low_register_operand (op1, SImode))
19389 action = action_for_partial_flag_setting;
19390 /* ASRS <Rd>,<Rm>,#<imm5> */
19391 /* LSRS <Rd>,<Rm>,#<imm5> */
19392 /* LSLS <Rd>,<Rm>,#<imm5> */
19393 else if (low_register_operand (op0, SImode)
19394 && CONST_INT_P (op1)
19395 && IN_RANGE (INTVAL (op1), 0, 31))
19396 action = action_for_partial_flag_setting;
19397 break;
19398
19399 case ROTATERT:
19400 /* RORS <Rdn>,<Rm> */
19401 if (rtx_equal_p (dst, op0)
19402 && low_register_operand (op1, SImode))
19403 action = action_for_partial_flag_setting;
19404 break;
19405
19406 case NOT:
19407 /* MVNS <Rd>,<Rm> */
19408 if (low_register_operand (op0, SImode))
19409 action = action_for_partial_flag_setting;
19410 break;
19411
19412 case NEG:
19413 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19414 if (low_register_operand (op0, SImode))
19415 action = CONV;
19416 break;
19417
19418 case CONST_INT:
19419 /* MOVS <Rd>,#<imm8> */
19420 if (CONST_INT_P (src)
19421 && IN_RANGE (INTVAL (src), 0, 255))
19422 action = action_for_partial_flag_setting;
19423 break;
19424
19425 case REG:
19426 /* MOVS and MOV<c> with registers have different
19427 encodings, so are not relevant here. */
19428 break;
19429
19430 default:
19431 break;
19432 }
19433 }
19434
19435 if (action != SKIP)
19436 {
19437 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19438 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19439 rtvec vec;
19440
19441 if (action == SWAP_CONV)
19442 {
19443 src = copy_rtx (src);
19444 XEXP (src, 0) = op1;
19445 XEXP (src, 1) = op0;
19446 pat = gen_rtx_SET (dst, src);
19447 vec = gen_rtvec (2, pat, clobber);
19448 }
19449 else /* action == CONV */
19450 vec = gen_rtvec (2, pat, clobber);
19451
19452 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19453 INSN_CODE (insn) = -1;
19454 }
19455 }
19456
19457 if (NONDEBUG_INSN_P (insn))
19458 df_simulate_one_insn_backwards (bb, insn, &live);
19459 }
19460 }
19461
19462 CLEAR_REG_SET (&live);
19463 }
19464
19465 /* Gcc puts the pool in the wrong place for ARM, since we can only
19466 load addresses a limited distance around the pc. We do some
19467 special munging to move the constant pool values to the correct
19468 point in the code. */
19469 static void
19470 arm_reorg (void)
19471 {
19472 rtx_insn *insn;
19473 HOST_WIDE_INT address = 0;
19474 Mfix * fix;
19475
19476 if (use_cmse)
19477 cmse_nonsecure_call_inline_register_clear ();
19478
19479 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19480 if (cfun->is_thunk)
19481 ;
19482 else if (TARGET_THUMB1)
19483 thumb1_reorg ();
19484 else if (TARGET_THUMB2)
19485 thumb2_reorg ();
19486
19487 /* Ensure all insns that must be split have been split at this point.
19488 Otherwise, the pool placement code below may compute incorrect
19489 insn lengths. Note that when optimizing, all insns have already
19490 been split at this point. */
19491 if (!optimize)
19492 split_all_insns_noflow ();
19493
19494 /* Make sure we do not attempt to create a literal pool even though it should
19495 no longer be necessary to create any. */
19496 if (arm_disable_literal_pool)
19497 return ;
19498
19499 minipool_fix_head = minipool_fix_tail = NULL;
19500
19501 /* The first insn must always be a note, or the code below won't
19502 scan it properly. */
19503 insn = get_insns ();
19504 gcc_assert (NOTE_P (insn));
19505 minipool_pad = 0;
19506
19507 /* Scan all the insns and record the operands that will need fixing. */
19508 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19509 {
19510 if (BARRIER_P (insn))
19511 push_minipool_barrier (insn, address);
19512 else if (INSN_P (insn))
19513 {
19514 rtx_jump_table_data *table;
19515
19516 note_invalid_constants (insn, address, true);
19517 address += get_attr_length (insn);
19518
19519 /* If the insn is a vector jump, add the size of the table
19520 and skip the table. */
19521 if (tablejump_p (insn, NULL, &table))
19522 {
19523 address += get_jump_table_size (table);
19524 insn = table;
19525 }
19526 }
19527 else if (LABEL_P (insn))
19528 /* Add the worst-case padding due to alignment. We don't add
19529 the _current_ padding because the minipool insertions
19530 themselves might change it. */
19531 address += get_label_padding (insn);
19532 }
19533
19534 fix = minipool_fix_head;
19535
19536 /* Now scan the fixups and perform the required changes. */
19537 while (fix)
19538 {
19539 Mfix * ftmp;
19540 Mfix * fdel;
19541 Mfix * last_added_fix;
19542 Mfix * last_barrier = NULL;
19543 Mfix * this_fix;
19544
19545 /* Skip any further barriers before the next fix. */
19546 while (fix && BARRIER_P (fix->insn))
19547 fix = fix->next;
19548
19549 /* No more fixes. */
19550 if (fix == NULL)
19551 break;
19552
19553 last_added_fix = NULL;
19554
19555 for (ftmp = fix; ftmp; ftmp = ftmp->next)
19556 {
19557 if (BARRIER_P (ftmp->insn))
19558 {
19559 if (ftmp->address >= minipool_vector_head->max_address)
19560 break;
19561
19562 last_barrier = ftmp;
19563 }
19564 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19565 break;
19566
19567 last_added_fix = ftmp; /* Keep track of the last fix added. */
19568 }
19569
19570 /* If we found a barrier, drop back to that; any fixes that we
19571 could have reached but come after the barrier will now go in
19572 the next mini-pool. */
19573 if (last_barrier != NULL)
19574 {
19575 /* Reduce the refcount for those fixes that won't go into this
19576 pool after all. */
19577 for (fdel = last_barrier->next;
19578 fdel && fdel != ftmp;
19579 fdel = fdel->next)
19580 {
19581 fdel->minipool->refcount--;
19582 fdel->minipool = NULL;
19583 }
19584
19585 ftmp = last_barrier;
19586 }
19587 else
19588 {
19589 /* ftmp is first fix that we can't fit into this pool and
19590 there no natural barriers that we could use. Insert a
19591 new barrier in the code somewhere between the previous
19592 fix and this one, and arrange to jump around it. */
19593 HOST_WIDE_INT max_address;
19594
19595 /* The last item on the list of fixes must be a barrier, so
19596 we can never run off the end of the list of fixes without
19597 last_barrier being set. */
19598 gcc_assert (ftmp);
19599
19600 max_address = minipool_vector_head->max_address;
19601 /* Check that there isn't another fix that is in range that
19602 we couldn't fit into this pool because the pool was
19603 already too large: we need to put the pool before such an
19604 instruction. The pool itself may come just after the
19605 fix because create_fix_barrier also allows space for a
19606 jump instruction. */
19607 if (ftmp->address < max_address)
19608 max_address = ftmp->address + 1;
19609
19610 last_barrier = create_fix_barrier (last_added_fix, max_address);
19611 }
19612
19613 assign_minipool_offsets (last_barrier);
19614
19615 while (ftmp)
19616 {
19617 if (!BARRIER_P (ftmp->insn)
19618 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19619 == NULL))
19620 break;
19621
19622 ftmp = ftmp->next;
19623 }
19624
19625 /* Scan over the fixes we have identified for this pool, fixing them
19626 up and adding the constants to the pool itself. */
19627 for (this_fix = fix; this_fix && ftmp != this_fix;
19628 this_fix = this_fix->next)
19629 if (!BARRIER_P (this_fix->insn))
19630 {
19631 rtx addr
19632 = plus_constant (Pmode,
19633 gen_rtx_LABEL_REF (VOIDmode,
19634 minipool_vector_label),
19635 this_fix->minipool->offset);
19636 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19637 }
19638
19639 dump_minipool (last_barrier->insn);
19640 fix = ftmp;
19641 }
19642
19643 /* From now on we must synthesize any constants that we can't handle
19644 directly. This can happen if the RTL gets split during final
19645 instruction generation. */
19646 cfun->machine->after_arm_reorg = 1;
19647
19648 /* Free the minipool memory. */
19649 obstack_free (&minipool_obstack, minipool_startobj);
19650 }
19651 \f
19652 /* Routines to output assembly language. */
19653
19654 /* Return string representation of passed in real value. */
19655 static const char *
19656 fp_const_from_val (REAL_VALUE_TYPE *r)
19657 {
19658 if (!fp_consts_inited)
19659 init_fp_table ();
19660
19661 gcc_assert (real_equal (r, &value_fp0));
19662 return "0";
19663 }
19664
19665 /* OPERANDS[0] is the entire list of insns that constitute pop,
19666 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19667 is in the list, UPDATE is true iff the list contains explicit
19668 update of base register. */
19669 void
19670 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19671 bool update)
19672 {
19673 int i;
19674 char pattern[100];
19675 int offset;
19676 const char *conditional;
19677 int num_saves = XVECLEN (operands[0], 0);
19678 unsigned int regno;
19679 unsigned int regno_base = REGNO (operands[1]);
19680 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19681
19682 offset = 0;
19683 offset += update ? 1 : 0;
19684 offset += return_pc ? 1 : 0;
19685
19686 /* Is the base register in the list? */
19687 for (i = offset; i < num_saves; i++)
19688 {
19689 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19690 /* If SP is in the list, then the base register must be SP. */
19691 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19692 /* If base register is in the list, there must be no explicit update. */
19693 if (regno == regno_base)
19694 gcc_assert (!update);
19695 }
19696
19697 conditional = reverse ? "%?%D0" : "%?%d0";
19698 /* Can't use POP if returning from an interrupt. */
19699 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19700 sprintf (pattern, "pop%s\t{", conditional);
19701 else
19702 {
19703 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19704 It's just a convention, their semantics are identical. */
19705 if (regno_base == SP_REGNUM)
19706 sprintf (pattern, "ldmfd%s\t", conditional);
19707 else if (update)
19708 sprintf (pattern, "ldmia%s\t", conditional);
19709 else
19710 sprintf (pattern, "ldm%s\t", conditional);
19711
19712 strcat (pattern, reg_names[regno_base]);
19713 if (update)
19714 strcat (pattern, "!, {");
19715 else
19716 strcat (pattern, ", {");
19717 }
19718
19719 /* Output the first destination register. */
19720 strcat (pattern,
19721 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19722
19723 /* Output the rest of the destination registers. */
19724 for (i = offset + 1; i < num_saves; i++)
19725 {
19726 strcat (pattern, ", ");
19727 strcat (pattern,
19728 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19729 }
19730
19731 strcat (pattern, "}");
19732
19733 if (interrupt_p && return_pc)
19734 strcat (pattern, "^");
19735
19736 output_asm_insn (pattern, &cond);
19737 }
19738
19739
19740 /* Output the assembly for a store multiple. */
19741
19742 const char *
19743 vfp_output_vstmd (rtx * operands)
19744 {
19745 char pattern[100];
19746 int p;
19747 int base;
19748 int i;
19749 rtx addr_reg = REG_P (XEXP (operands[0], 0))
19750 ? XEXP (operands[0], 0)
19751 : XEXP (XEXP (operands[0], 0), 0);
19752 bool push_p = REGNO (addr_reg) == SP_REGNUM;
19753
19754 if (push_p)
19755 strcpy (pattern, "vpush%?.64\t{%P1");
19756 else
19757 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19758
19759 p = strlen (pattern);
19760
19761 gcc_assert (REG_P (operands[1]));
19762
19763 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19764 for (i = 1; i < XVECLEN (operands[2], 0); i++)
19765 {
19766 p += sprintf (&pattern[p], ", d%d", base + i);
19767 }
19768 strcpy (&pattern[p], "}");
19769
19770 output_asm_insn (pattern, operands);
19771 return "";
19772 }
19773
19774
19775 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19776 number of bytes pushed. */
19777
19778 static int
19779 vfp_emit_fstmd (int base_reg, int count)
19780 {
19781 rtx par;
19782 rtx dwarf;
19783 rtx tmp, reg;
19784 int i;
19785
19786 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19787 register pairs are stored by a store multiple insn. We avoid this
19788 by pushing an extra pair. */
19789 if (count == 2 && !arm_arch6)
19790 {
19791 if (base_reg == LAST_VFP_REGNUM - 3)
19792 base_reg -= 2;
19793 count++;
19794 }
19795
19796 /* FSTMD may not store more than 16 doubleword registers at once. Split
19797 larger stores into multiple parts (up to a maximum of two, in
19798 practice). */
19799 if (count > 16)
19800 {
19801 int saved;
19802 /* NOTE: base_reg is an internal register number, so each D register
19803 counts as 2. */
19804 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19805 saved += vfp_emit_fstmd (base_reg, 16);
19806 return saved;
19807 }
19808
19809 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19810 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19811
19812 reg = gen_rtx_REG (DFmode, base_reg);
19813 base_reg += 2;
19814
19815 XVECEXP (par, 0, 0)
19816 = gen_rtx_SET (gen_frame_mem
19817 (BLKmode,
19818 gen_rtx_PRE_MODIFY (Pmode,
19819 stack_pointer_rtx,
19820 plus_constant
19821 (Pmode, stack_pointer_rtx,
19822 - (count * 8)))
19823 ),
19824 gen_rtx_UNSPEC (BLKmode,
19825 gen_rtvec (1, reg),
19826 UNSPEC_PUSH_MULT));
19827
19828 tmp = gen_rtx_SET (stack_pointer_rtx,
19829 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19830 RTX_FRAME_RELATED_P (tmp) = 1;
19831 XVECEXP (dwarf, 0, 0) = tmp;
19832
19833 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19834 RTX_FRAME_RELATED_P (tmp) = 1;
19835 XVECEXP (dwarf, 0, 1) = tmp;
19836
19837 for (i = 1; i < count; i++)
19838 {
19839 reg = gen_rtx_REG (DFmode, base_reg);
19840 base_reg += 2;
19841 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19842
19843 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19844 plus_constant (Pmode,
19845 stack_pointer_rtx,
19846 i * 8)),
19847 reg);
19848 RTX_FRAME_RELATED_P (tmp) = 1;
19849 XVECEXP (dwarf, 0, i + 1) = tmp;
19850 }
19851
19852 par = emit_insn (par);
19853 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19854 RTX_FRAME_RELATED_P (par) = 1;
19855
19856 return count * 8;
19857 }
19858
19859 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19860 has the cmse_nonsecure_call attribute and returns false otherwise. */
19861
19862 bool
19863 detect_cmse_nonsecure_call (tree addr)
19864 {
19865 if (!addr)
19866 return FALSE;
19867
19868 tree fntype = TREE_TYPE (addr);
19869 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19870 TYPE_ATTRIBUTES (fntype)))
19871 return TRUE;
19872 return FALSE;
19873 }
19874
19875
19876 /* Emit a call instruction with pattern PAT. ADDR is the address of
19877 the call target. */
19878
19879 void
19880 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19881 {
19882 rtx insn;
19883
19884 insn = emit_call_insn (pat);
19885
19886 /* The PIC register is live on entry to VxWorks PIC PLT entries.
19887 If the call might use such an entry, add a use of the PIC register
19888 to the instruction's CALL_INSN_FUNCTION_USAGE. */
19889 if (TARGET_VXWORKS_RTP
19890 && flag_pic
19891 && !sibcall
19892 && SYMBOL_REF_P (addr)
19893 && (SYMBOL_REF_DECL (addr)
19894 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19895 : !SYMBOL_REF_LOCAL_P (addr)))
19896 {
19897 require_pic_register (NULL_RTX, false /*compute_now*/);
19898 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19899 }
19900
19901 if (TARGET_FDPIC)
19902 {
19903 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19904 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19905 }
19906
19907 if (TARGET_AAPCS_BASED)
19908 {
19909 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19910 linker. We need to add an IP clobber to allow setting
19911 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
19912 is not needed since it's a fixed register. */
19913 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19914 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19915 }
19916 }
19917
19918 /* Output a 'call' insn. */
19919 const char *
19920 output_call (rtx *operands)
19921 {
19922 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
19923
19924 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
19925 if (REGNO (operands[0]) == LR_REGNUM)
19926 {
19927 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19928 output_asm_insn ("mov%?\t%0, %|lr", operands);
19929 }
19930
19931 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19932
19933 if (TARGET_INTERWORK || arm_arch4t)
19934 output_asm_insn ("bx%?\t%0", operands);
19935 else
19936 output_asm_insn ("mov%?\t%|pc, %0", operands);
19937
19938 return "";
19939 }
19940
19941 /* Output a move from arm registers to arm registers of a long double
19942 OPERANDS[0] is the destination.
19943 OPERANDS[1] is the source. */
19944 const char *
19945 output_mov_long_double_arm_from_arm (rtx *operands)
19946 {
19947 /* We have to be careful here because the two might overlap. */
19948 int dest_start = REGNO (operands[0]);
19949 int src_start = REGNO (operands[1]);
19950 rtx ops[2];
19951 int i;
19952
19953 if (dest_start < src_start)
19954 {
19955 for (i = 0; i < 3; i++)
19956 {
19957 ops[0] = gen_rtx_REG (SImode, dest_start + i);
19958 ops[1] = gen_rtx_REG (SImode, src_start + i);
19959 output_asm_insn ("mov%?\t%0, %1", ops);
19960 }
19961 }
19962 else
19963 {
19964 for (i = 2; i >= 0; i--)
19965 {
19966 ops[0] = gen_rtx_REG (SImode, dest_start + i);
19967 ops[1] = gen_rtx_REG (SImode, src_start + i);
19968 output_asm_insn ("mov%?\t%0, %1", ops);
19969 }
19970 }
19971
19972 return "";
19973 }
19974
19975 void
19976 arm_emit_movpair (rtx dest, rtx src)
19977 {
19978 /* If the src is an immediate, simplify it. */
19979 if (CONST_INT_P (src))
19980 {
19981 HOST_WIDE_INT val = INTVAL (src);
19982 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
19983 if ((val >> 16) & 0x0000ffff)
19984 {
19985 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
19986 GEN_INT (16)),
19987 GEN_INT ((val >> 16) & 0x0000ffff));
19988 rtx_insn *insn = get_last_insn ();
19989 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19990 }
19991 return;
19992 }
19993 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
19994 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
19995 rtx_insn *insn = get_last_insn ();
19996 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19997 }
19998
19999 /* Output a move between double words. It must be REG<-MEM
20000 or MEM<-REG. */
20001 const char *
20002 output_move_double (rtx *operands, bool emit, int *count)
20003 {
20004 enum rtx_code code0 = GET_CODE (operands[0]);
20005 enum rtx_code code1 = GET_CODE (operands[1]);
20006 rtx otherops[3];
20007 if (count)
20008 *count = 1;
20009
20010 /* The only case when this might happen is when
20011 you are looking at the length of a DImode instruction
20012 that has an invalid constant in it. */
20013 if (code0 == REG && code1 != MEM)
20014 {
20015 gcc_assert (!emit);
20016 *count = 2;
20017 return "";
20018 }
20019
20020 if (code0 == REG)
20021 {
20022 unsigned int reg0 = REGNO (operands[0]);
20023 const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
20024
20025 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
20026
20027 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
20028
20029 switch (GET_CODE (XEXP (operands[1], 0)))
20030 {
20031 case REG:
20032
20033 if (emit)
20034 {
20035 if (can_ldrd
20036 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
20037 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
20038 else
20039 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20040 }
20041 break;
20042
20043 case PRE_INC:
20044 gcc_assert (can_ldrd);
20045 if (emit)
20046 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
20047 break;
20048
20049 case PRE_DEC:
20050 if (emit)
20051 {
20052 if (can_ldrd)
20053 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
20054 else
20055 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
20056 }
20057 break;
20058
20059 case POST_INC:
20060 if (emit)
20061 {
20062 if (can_ldrd)
20063 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
20064 else
20065 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
20066 }
20067 break;
20068
20069 case POST_DEC:
20070 gcc_assert (can_ldrd);
20071 if (emit)
20072 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
20073 break;
20074
20075 case PRE_MODIFY:
20076 case POST_MODIFY:
20077 /* Autoicrement addressing modes should never have overlapping
20078 base and destination registers, and overlapping index registers
20079 are already prohibited, so this doesn't need to worry about
20080 fix_cm3_ldrd. */
20081 otherops[0] = operands[0];
20082 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
20083 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
20084
20085 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
20086 {
20087 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
20088 {
20089 /* Registers overlap so split out the increment. */
20090 if (emit)
20091 {
20092 gcc_assert (can_ldrd);
20093 output_asm_insn ("add%?\t%1, %1, %2", otherops);
20094 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
20095 }
20096 if (count)
20097 *count = 2;
20098 }
20099 else
20100 {
20101 /* Use a single insn if we can.
20102 FIXME: IWMMXT allows offsets larger than ldrd can
20103 handle, fix these up with a pair of ldr. */
20104 if (can_ldrd
20105 && (TARGET_THUMB2
20106 || !CONST_INT_P (otherops[2])
20107 || (INTVAL (otherops[2]) > -256
20108 && INTVAL (otherops[2]) < 256)))
20109 {
20110 if (emit)
20111 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
20112 }
20113 else
20114 {
20115 if (emit)
20116 {
20117 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
20118 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20119 }
20120 if (count)
20121 *count = 2;
20122
20123 }
20124 }
20125 }
20126 else
20127 {
20128 /* Use a single insn if we can.
20129 FIXME: IWMMXT allows offsets larger than ldrd can handle,
20130 fix these up with a pair of ldr. */
20131 if (can_ldrd
20132 && (TARGET_THUMB2
20133 || !CONST_INT_P (otherops[2])
20134 || (INTVAL (otherops[2]) > -256
20135 && INTVAL (otherops[2]) < 256)))
20136 {
20137 if (emit)
20138 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
20139 }
20140 else
20141 {
20142 if (emit)
20143 {
20144 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20145 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
20146 }
20147 if (count)
20148 *count = 2;
20149 }
20150 }
20151 break;
20152
20153 case LABEL_REF:
20154 case CONST:
20155 /* We might be able to use ldrd %0, %1 here. However the range is
20156 different to ldr/adr, and it is broken on some ARMv7-M
20157 implementations. */
20158 /* Use the second register of the pair to avoid problematic
20159 overlap. */
20160 otherops[1] = operands[1];
20161 if (emit)
20162 output_asm_insn ("adr%?\t%0, %1", otherops);
20163 operands[1] = otherops[0];
20164 if (emit)
20165 {
20166 if (can_ldrd)
20167 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20168 else
20169 output_asm_insn ("ldmia%?\t%1, %M0", operands);
20170 }
20171
20172 if (count)
20173 *count = 2;
20174 break;
20175
20176 /* ??? This needs checking for thumb2. */
20177 default:
20178 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
20179 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
20180 {
20181 otherops[0] = operands[0];
20182 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
20183 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
20184
20185 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
20186 {
20187 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20188 {
20189 switch ((int) INTVAL (otherops[2]))
20190 {
20191 case -8:
20192 if (emit)
20193 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
20194 return "";
20195 case -4:
20196 if (TARGET_THUMB2)
20197 break;
20198 if (emit)
20199 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
20200 return "";
20201 case 4:
20202 if (TARGET_THUMB2)
20203 break;
20204 if (emit)
20205 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
20206 return "";
20207 }
20208 }
20209 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
20210 operands[1] = otherops[0];
20211 if (can_ldrd
20212 && (REG_P (otherops[2])
20213 || TARGET_THUMB2
20214 || (CONST_INT_P (otherops[2])
20215 && INTVAL (otherops[2]) > -256
20216 && INTVAL (otherops[2]) < 256)))
20217 {
20218 if (reg_overlap_mentioned_p (operands[0],
20219 otherops[2]))
20220 {
20221 /* Swap base and index registers over to
20222 avoid a conflict. */
20223 std::swap (otherops[1], otherops[2]);
20224 }
20225 /* If both registers conflict, it will usually
20226 have been fixed by a splitter. */
20227 if (reg_overlap_mentioned_p (operands[0], otherops[2])
20228 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
20229 {
20230 if (emit)
20231 {
20232 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20233 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20234 }
20235 if (count)
20236 *count = 2;
20237 }
20238 else
20239 {
20240 otherops[0] = operands[0];
20241 if (emit)
20242 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
20243 }
20244 return "";
20245 }
20246
20247 if (CONST_INT_P (otherops[2]))
20248 {
20249 if (emit)
20250 {
20251 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
20252 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
20253 else
20254 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20255 }
20256 }
20257 else
20258 {
20259 if (emit)
20260 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20261 }
20262 }
20263 else
20264 {
20265 if (emit)
20266 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
20267 }
20268
20269 if (count)
20270 *count = 2;
20271
20272 if (can_ldrd)
20273 return "ldrd%?\t%0, [%1]";
20274
20275 return "ldmia%?\t%1, %M0";
20276 }
20277 else
20278 {
20279 otherops[1] = adjust_address (operands[1], SImode, 4);
20280 /* Take care of overlapping base/data reg. */
20281 if (reg_mentioned_p (operands[0], operands[1]))
20282 {
20283 if (emit)
20284 {
20285 output_asm_insn ("ldr%?\t%0, %1", otherops);
20286 output_asm_insn ("ldr%?\t%0, %1", operands);
20287 }
20288 if (count)
20289 *count = 2;
20290
20291 }
20292 else
20293 {
20294 if (emit)
20295 {
20296 output_asm_insn ("ldr%?\t%0, %1", operands);
20297 output_asm_insn ("ldr%?\t%0, %1", otherops);
20298 }
20299 if (count)
20300 *count = 2;
20301 }
20302 }
20303 }
20304 }
20305 else
20306 {
20307 /* Constraints should ensure this. */
20308 gcc_assert (code0 == MEM && code1 == REG);
20309 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20310 || (TARGET_ARM && TARGET_LDRD));
20311
20312 /* For TARGET_ARM the first source register of an STRD
20313 must be even. This is usually the case for double-word
20314 values but user assembly constraints can force an odd
20315 starting register. */
20316 bool allow_strd = TARGET_LDRD
20317 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20318 switch (GET_CODE (XEXP (operands[0], 0)))
20319 {
20320 case REG:
20321 if (emit)
20322 {
20323 if (allow_strd)
20324 output_asm_insn ("strd%?\t%1, [%m0]", operands);
20325 else
20326 output_asm_insn ("stm%?\t%m0, %M1", operands);
20327 }
20328 break;
20329
20330 case PRE_INC:
20331 gcc_assert (allow_strd);
20332 if (emit)
20333 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20334 break;
20335
20336 case PRE_DEC:
20337 if (emit)
20338 {
20339 if (allow_strd)
20340 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20341 else
20342 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20343 }
20344 break;
20345
20346 case POST_INC:
20347 if (emit)
20348 {
20349 if (allow_strd)
20350 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20351 else
20352 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20353 }
20354 break;
20355
20356 case POST_DEC:
20357 gcc_assert (allow_strd);
20358 if (emit)
20359 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20360 break;
20361
20362 case PRE_MODIFY:
20363 case POST_MODIFY:
20364 otherops[0] = operands[1];
20365 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20366 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20367
20368 /* IWMMXT allows offsets larger than strd can handle,
20369 fix these up with a pair of str. */
20370 if (!TARGET_THUMB2
20371 && CONST_INT_P (otherops[2])
20372 && (INTVAL(otherops[2]) <= -256
20373 || INTVAL(otherops[2]) >= 256))
20374 {
20375 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20376 {
20377 if (emit)
20378 {
20379 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20380 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20381 }
20382 if (count)
20383 *count = 2;
20384 }
20385 else
20386 {
20387 if (emit)
20388 {
20389 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20390 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20391 }
20392 if (count)
20393 *count = 2;
20394 }
20395 }
20396 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20397 {
20398 if (emit)
20399 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20400 }
20401 else
20402 {
20403 if (emit)
20404 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20405 }
20406 break;
20407
20408 case PLUS:
20409 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20410 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20411 {
20412 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20413 {
20414 case -8:
20415 if (emit)
20416 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20417 return "";
20418
20419 case -4:
20420 if (TARGET_THUMB2)
20421 break;
20422 if (emit)
20423 output_asm_insn ("stmda%?\t%m0, %M1", operands);
20424 return "";
20425
20426 case 4:
20427 if (TARGET_THUMB2)
20428 break;
20429 if (emit)
20430 output_asm_insn ("stmib%?\t%m0, %M1", operands);
20431 return "";
20432 }
20433 }
20434 if (allow_strd
20435 && (REG_P (otherops[2])
20436 || TARGET_THUMB2
20437 || (CONST_INT_P (otherops[2])
20438 && INTVAL (otherops[2]) > -256
20439 && INTVAL (otherops[2]) < 256)))
20440 {
20441 otherops[0] = operands[1];
20442 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20443 if (emit)
20444 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20445 return "";
20446 }
20447 /* Fall through */
20448
20449 default:
20450 otherops[0] = adjust_address (operands[0], SImode, 4);
20451 otherops[1] = operands[1];
20452 if (emit)
20453 {
20454 output_asm_insn ("str%?\t%1, %0", operands);
20455 output_asm_insn ("str%?\t%H1, %0", otherops);
20456 }
20457 if (count)
20458 *count = 2;
20459 }
20460 }
20461
20462 return "";
20463 }
20464
20465 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20466 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20467
20468 const char *
20469 output_move_quad (rtx *operands)
20470 {
20471 if (REG_P (operands[0]))
20472 {
20473 /* Load, or reg->reg move. */
20474
20475 if (MEM_P (operands[1]))
20476 {
20477 switch (GET_CODE (XEXP (operands[1], 0)))
20478 {
20479 case REG:
20480 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20481 break;
20482
20483 case LABEL_REF:
20484 case CONST:
20485 output_asm_insn ("adr%?\t%0, %1", operands);
20486 output_asm_insn ("ldmia%?\t%0, %M0", operands);
20487 break;
20488
20489 default:
20490 gcc_unreachable ();
20491 }
20492 }
20493 else
20494 {
20495 rtx ops[2];
20496 int dest, src, i;
20497
20498 gcc_assert (REG_P (operands[1]));
20499
20500 dest = REGNO (operands[0]);
20501 src = REGNO (operands[1]);
20502
20503 /* This seems pretty dumb, but hopefully GCC won't try to do it
20504 very often. */
20505 if (dest < src)
20506 for (i = 0; i < 4; i++)
20507 {
20508 ops[0] = gen_rtx_REG (SImode, dest + i);
20509 ops[1] = gen_rtx_REG (SImode, src + i);
20510 output_asm_insn ("mov%?\t%0, %1", ops);
20511 }
20512 else
20513 for (i = 3; i >= 0; i--)
20514 {
20515 ops[0] = gen_rtx_REG (SImode, dest + i);
20516 ops[1] = gen_rtx_REG (SImode, src + i);
20517 output_asm_insn ("mov%?\t%0, %1", ops);
20518 }
20519 }
20520 }
20521 else
20522 {
20523 gcc_assert (MEM_P (operands[0]));
20524 gcc_assert (REG_P (operands[1]));
20525 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20526
20527 switch (GET_CODE (XEXP (operands[0], 0)))
20528 {
20529 case REG:
20530 output_asm_insn ("stm%?\t%m0, %M1", operands);
20531 break;
20532
20533 default:
20534 gcc_unreachable ();
20535 }
20536 }
20537
20538 return "";
20539 }
20540
20541 /* Output a VFP load or store instruction. */
20542
20543 const char *
20544 output_move_vfp (rtx *operands)
20545 {
20546 rtx reg, mem, addr, ops[2];
20547 int load = REG_P (operands[0]);
20548 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20549 int sp = (!TARGET_VFP_FP16INST
20550 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20551 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20552 const char *templ;
20553 char buff[50];
20554 machine_mode mode;
20555
20556 reg = operands[!load];
20557 mem = operands[load];
20558
20559 mode = GET_MODE (reg);
20560
20561 gcc_assert (REG_P (reg));
20562 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20563 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20564 || mode == SFmode
20565 || mode == DFmode
20566 || mode == HImode
20567 || mode == SImode
20568 || mode == DImode
20569 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20570 gcc_assert (MEM_P (mem));
20571
20572 addr = XEXP (mem, 0);
20573
20574 switch (GET_CODE (addr))
20575 {
20576 case PRE_DEC:
20577 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20578 ops[0] = XEXP (addr, 0);
20579 ops[1] = reg;
20580 break;
20581
20582 case POST_INC:
20583 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20584 ops[0] = XEXP (addr, 0);
20585 ops[1] = reg;
20586 break;
20587
20588 default:
20589 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20590 ops[0] = reg;
20591 ops[1] = mem;
20592 break;
20593 }
20594
20595 sprintf (buff, templ,
20596 load ? "ld" : "st",
20597 dp ? "64" : sp ? "32" : "16",
20598 dp ? "P" : "",
20599 integer_p ? "\t%@ int" : "");
20600 output_asm_insn (buff, ops);
20601
20602 return "";
20603 }
20604
20605 /* Output a Neon double-word or quad-word load or store, or a load
20606 or store for larger structure modes.
20607
20608 WARNING: The ordering of elements is weird in big-endian mode,
20609 because the EABI requires that vectors stored in memory appear
20610 as though they were stored by a VSTM, as required by the EABI.
20611 GCC RTL defines element ordering based on in-memory order.
20612 This can be different from the architectural ordering of elements
20613 within a NEON register. The intrinsics defined in arm_neon.h use the
20614 NEON register element ordering, not the GCC RTL element ordering.
20615
20616 For example, the in-memory ordering of a big-endian a quadword
20617 vector with 16-bit elements when stored from register pair {d0,d1}
20618 will be (lowest address first, d0[N] is NEON register element N):
20619
20620 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20621
20622 When necessary, quadword registers (dN, dN+1) are moved to ARM
20623 registers from rN in the order:
20624
20625 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20626
20627 So that STM/LDM can be used on vectors in ARM registers, and the
20628 same memory layout will result as if VSTM/VLDM were used.
20629
20630 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20631 possible, which allows use of appropriate alignment tags.
20632 Note that the choice of "64" is independent of the actual vector
20633 element size; this size simply ensures that the behavior is
20634 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20635
20636 Due to limitations of those instructions, use of VST1.64/VLD1.64
20637 is not possible if:
20638 - the address contains PRE_DEC, or
20639 - the mode refers to more than 4 double-word registers
20640
20641 In those cases, it would be possible to replace VSTM/VLDM by a
20642 sequence of instructions; this is not currently implemented since
20643 this is not certain to actually improve performance. */
20644
20645 const char *
20646 output_move_neon (rtx *operands)
20647 {
20648 rtx reg, mem, addr, ops[2];
20649 int regno, nregs, load = REG_P (operands[0]);
20650 const char *templ;
20651 char buff[50];
20652 machine_mode mode;
20653
20654 reg = operands[!load];
20655 mem = operands[load];
20656
20657 mode = GET_MODE (reg);
20658
20659 gcc_assert (REG_P (reg));
20660 regno = REGNO (reg);
20661 nregs = REG_NREGS (reg) / 2;
20662 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20663 || NEON_REGNO_OK_FOR_QUAD (regno));
20664 gcc_assert (VALID_NEON_DREG_MODE (mode)
20665 || VALID_NEON_QREG_MODE (mode)
20666 || VALID_NEON_STRUCT_MODE (mode));
20667 gcc_assert (MEM_P (mem));
20668
20669 addr = XEXP (mem, 0);
20670
20671 /* Strip off const from addresses like (const (plus (...))). */
20672 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20673 addr = XEXP (addr, 0);
20674
20675 switch (GET_CODE (addr))
20676 {
20677 case POST_INC:
20678 /* We have to use vldm / vstm for too-large modes. */
20679 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20680 {
20681 templ = "v%smia%%?\t%%0!, %%h1";
20682 ops[0] = XEXP (addr, 0);
20683 }
20684 else
20685 {
20686 templ = "v%s1.64\t%%h1, %%A0";
20687 ops[0] = mem;
20688 }
20689 ops[1] = reg;
20690 break;
20691
20692 case PRE_DEC:
20693 /* We have to use vldm / vstm in this case, since there is no
20694 pre-decrement form of the vld1 / vst1 instructions. */
20695 templ = "v%smdb%%?\t%%0!, %%h1";
20696 ops[0] = XEXP (addr, 0);
20697 ops[1] = reg;
20698 break;
20699
20700 case POST_MODIFY:
20701 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20702 gcc_unreachable ();
20703
20704 case REG:
20705 /* We have to use vldm / vstm for too-large modes. */
20706 if (nregs > 1)
20707 {
20708 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20709 templ = "v%smia%%?\t%%m0, %%h1";
20710 else
20711 templ = "v%s1.64\t%%h1, %%A0";
20712
20713 ops[0] = mem;
20714 ops[1] = reg;
20715 break;
20716 }
20717 /* Fall through. */
20718 case PLUS:
20719 if (GET_CODE (addr) == PLUS)
20720 addr = XEXP (addr, 0);
20721 /* Fall through. */
20722 case LABEL_REF:
20723 {
20724 int i;
20725 int overlap = -1;
20726 for (i = 0; i < nregs; i++)
20727 {
20728 /* We're only using DImode here because it's a convenient
20729 size. */
20730 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20731 ops[1] = adjust_address (mem, DImode, 8 * i);
20732 if (reg_overlap_mentioned_p (ops[0], mem))
20733 {
20734 gcc_assert (overlap == -1);
20735 overlap = i;
20736 }
20737 else
20738 {
20739 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20740 sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20741 else
20742 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20743 output_asm_insn (buff, ops);
20744 }
20745 }
20746 if (overlap != -1)
20747 {
20748 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20749 ops[1] = adjust_address (mem, SImode, 8 * overlap);
20750 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20751 sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20752 else
20753 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20754 output_asm_insn (buff, ops);
20755 }
20756
20757 return "";
20758 }
20759
20760 default:
20761 gcc_unreachable ();
20762 }
20763
20764 sprintf (buff, templ, load ? "ld" : "st");
20765 output_asm_insn (buff, ops);
20766
20767 return "";
20768 }
20769
20770 /* Compute and return the length of neon_mov<mode>, where <mode> is
20771 one of VSTRUCT modes: EI, OI, CI or XI. */
20772 int
20773 arm_attr_length_move_neon (rtx_insn *insn)
20774 {
20775 rtx reg, mem, addr;
20776 int load;
20777 machine_mode mode;
20778
20779 extract_insn_cached (insn);
20780
20781 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20782 {
20783 mode = GET_MODE (recog_data.operand[0]);
20784 switch (mode)
20785 {
20786 case E_EImode:
20787 case E_OImode:
20788 return 8;
20789 case E_CImode:
20790 return 12;
20791 case E_XImode:
20792 return 16;
20793 default:
20794 gcc_unreachable ();
20795 }
20796 }
20797
20798 load = REG_P (recog_data.operand[0]);
20799 reg = recog_data.operand[!load];
20800 mem = recog_data.operand[load];
20801
20802 gcc_assert (MEM_P (mem));
20803
20804 addr = XEXP (mem, 0);
20805
20806 /* Strip off const from addresses like (const (plus (...))). */
20807 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20808 addr = XEXP (addr, 0);
20809
20810 if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20811 {
20812 int insns = REG_NREGS (reg) / 2;
20813 return insns * 4;
20814 }
20815 else
20816 return 4;
20817 }
20818
20819 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20820 return zero. */
20821
20822 int
20823 arm_address_offset_is_imm (rtx_insn *insn)
20824 {
20825 rtx mem, addr;
20826
20827 extract_insn_cached (insn);
20828
20829 if (REG_P (recog_data.operand[0]))
20830 return 0;
20831
20832 mem = recog_data.operand[0];
20833
20834 gcc_assert (MEM_P (mem));
20835
20836 addr = XEXP (mem, 0);
20837
20838 if (REG_P (addr)
20839 || (GET_CODE (addr) == PLUS
20840 && REG_P (XEXP (addr, 0))
20841 && CONST_INT_P (XEXP (addr, 1))))
20842 return 1;
20843 else
20844 return 0;
20845 }
20846
20847 /* Output an ADD r, s, #n where n may be too big for one instruction.
20848 If adding zero to one register, output nothing. */
20849 const char *
20850 output_add_immediate (rtx *operands)
20851 {
20852 HOST_WIDE_INT n = INTVAL (operands[2]);
20853
20854 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20855 {
20856 if (n < 0)
20857 output_multi_immediate (operands,
20858 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20859 -n);
20860 else
20861 output_multi_immediate (operands,
20862 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20863 n);
20864 }
20865
20866 return "";
20867 }
20868
20869 /* Output a multiple immediate operation.
20870 OPERANDS is the vector of operands referred to in the output patterns.
20871 INSTR1 is the output pattern to use for the first constant.
20872 INSTR2 is the output pattern to use for subsequent constants.
20873 IMMED_OP is the index of the constant slot in OPERANDS.
20874 N is the constant value. */
20875 static const char *
20876 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20877 int immed_op, HOST_WIDE_INT n)
20878 {
20879 #if HOST_BITS_PER_WIDE_INT > 32
20880 n &= 0xffffffff;
20881 #endif
20882
20883 if (n == 0)
20884 {
20885 /* Quick and easy output. */
20886 operands[immed_op] = const0_rtx;
20887 output_asm_insn (instr1, operands);
20888 }
20889 else
20890 {
20891 int i;
20892 const char * instr = instr1;
20893
20894 /* Note that n is never zero here (which would give no output). */
20895 for (i = 0; i < 32; i += 2)
20896 {
20897 if (n & (3 << i))
20898 {
20899 operands[immed_op] = GEN_INT (n & (255 << i));
20900 output_asm_insn (instr, operands);
20901 instr = instr2;
20902 i += 6;
20903 }
20904 }
20905 }
20906
20907 return "";
20908 }
20909
20910 /* Return the name of a shifter operation. */
20911 static const char *
20912 arm_shift_nmem(enum rtx_code code)
20913 {
20914 switch (code)
20915 {
20916 case ASHIFT:
20917 return ARM_LSL_NAME;
20918
20919 case ASHIFTRT:
20920 return "asr";
20921
20922 case LSHIFTRT:
20923 return "lsr";
20924
20925 case ROTATERT:
20926 return "ror";
20927
20928 default:
20929 abort();
20930 }
20931 }
20932
20933 /* Return the appropriate ARM instruction for the operation code.
20934 The returned result should not be overwritten. OP is the rtx of the
20935 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20936 was shifted. */
20937 const char *
20938 arithmetic_instr (rtx op, int shift_first_arg)
20939 {
20940 switch (GET_CODE (op))
20941 {
20942 case PLUS:
20943 return "add";
20944
20945 case MINUS:
20946 return shift_first_arg ? "rsb" : "sub";
20947
20948 case IOR:
20949 return "orr";
20950
20951 case XOR:
20952 return "eor";
20953
20954 case AND:
20955 return "and";
20956
20957 case ASHIFT:
20958 case ASHIFTRT:
20959 case LSHIFTRT:
20960 case ROTATERT:
20961 return arm_shift_nmem(GET_CODE(op));
20962
20963 default:
20964 gcc_unreachable ();
20965 }
20966 }
20967
20968 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20969 for the operation code. The returned result should not be overwritten.
20970 OP is the rtx code of the shift.
20971 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20972 shift. */
20973 static const char *
20974 shift_op (rtx op, HOST_WIDE_INT *amountp)
20975 {
20976 const char * mnem;
20977 enum rtx_code code = GET_CODE (op);
20978
20979 switch (code)
20980 {
20981 case ROTATE:
20982 if (!CONST_INT_P (XEXP (op, 1)))
20983 {
20984 output_operand_lossage ("invalid shift operand");
20985 return NULL;
20986 }
20987
20988 code = ROTATERT;
20989 *amountp = 32 - INTVAL (XEXP (op, 1));
20990 mnem = "ror";
20991 break;
20992
20993 case ASHIFT:
20994 case ASHIFTRT:
20995 case LSHIFTRT:
20996 case ROTATERT:
20997 mnem = arm_shift_nmem(code);
20998 if (CONST_INT_P (XEXP (op, 1)))
20999 {
21000 *amountp = INTVAL (XEXP (op, 1));
21001 }
21002 else if (REG_P (XEXP (op, 1)))
21003 {
21004 *amountp = -1;
21005 return mnem;
21006 }
21007 else
21008 {
21009 output_operand_lossage ("invalid shift operand");
21010 return NULL;
21011 }
21012 break;
21013
21014 case MULT:
21015 /* We never have to worry about the amount being other than a
21016 power of 2, since this case can never be reloaded from a reg. */
21017 if (!CONST_INT_P (XEXP (op, 1)))
21018 {
21019 output_operand_lossage ("invalid shift operand");
21020 return NULL;
21021 }
21022
21023 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
21024
21025 /* Amount must be a power of two. */
21026 if (*amountp & (*amountp - 1))
21027 {
21028 output_operand_lossage ("invalid shift operand");
21029 return NULL;
21030 }
21031
21032 *amountp = exact_log2 (*amountp);
21033 gcc_assert (IN_RANGE (*amountp, 0, 31));
21034 return ARM_LSL_NAME;
21035
21036 default:
21037 output_operand_lossage ("invalid shift operand");
21038 return NULL;
21039 }
21040
21041 /* This is not 100% correct, but follows from the desire to merge
21042 multiplication by a power of 2 with the recognizer for a
21043 shift. >=32 is not a valid shift for "lsl", so we must try and
21044 output a shift that produces the correct arithmetical result.
21045 Using lsr #32 is identical except for the fact that the carry bit
21046 is not set correctly if we set the flags; but we never use the
21047 carry bit from such an operation, so we can ignore that. */
21048 if (code == ROTATERT)
21049 /* Rotate is just modulo 32. */
21050 *amountp &= 31;
21051 else if (*amountp != (*amountp & 31))
21052 {
21053 if (code == ASHIFT)
21054 mnem = "lsr";
21055 *amountp = 32;
21056 }
21057
21058 /* Shifts of 0 are no-ops. */
21059 if (*amountp == 0)
21060 return NULL;
21061
21062 return mnem;
21063 }
21064
21065 /* Output a .ascii pseudo-op, keeping track of lengths. This is
21066 because /bin/as is horribly restrictive. The judgement about
21067 whether or not each character is 'printable' (and can be output as
21068 is) or not (and must be printed with an octal escape) must be made
21069 with reference to the *host* character set -- the situation is
21070 similar to that discussed in the comments above pp_c_char in
21071 c-pretty-print.cc. */
21072
21073 #define MAX_ASCII_LEN 51
21074
21075 void
21076 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
21077 {
21078 int i;
21079 int len_so_far = 0;
21080
21081 fputs ("\t.ascii\t\"", stream);
21082
21083 for (i = 0; i < len; i++)
21084 {
21085 int c = p[i];
21086
21087 if (len_so_far >= MAX_ASCII_LEN)
21088 {
21089 fputs ("\"\n\t.ascii\t\"", stream);
21090 len_so_far = 0;
21091 }
21092
21093 if (ISPRINT (c))
21094 {
21095 if (c == '\\' || c == '\"')
21096 {
21097 putc ('\\', stream);
21098 len_so_far++;
21099 }
21100 putc (c, stream);
21101 len_so_far++;
21102 }
21103 else
21104 {
21105 fprintf (stream, "\\%03o", c);
21106 len_so_far += 4;
21107 }
21108 }
21109
21110 fputs ("\"\n", stream);
21111 }
21112 \f
21113
21114 /* Compute the register save mask for registers 0 through 12
21115 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
21116
21117 static unsigned long
21118 arm_compute_save_reg0_reg12_mask (void)
21119 {
21120 unsigned long func_type = arm_current_func_type ();
21121 unsigned long save_reg_mask = 0;
21122 unsigned int reg;
21123
21124 if (IS_INTERRUPT (func_type))
21125 {
21126 unsigned int max_reg;
21127 /* Interrupt functions must not corrupt any registers,
21128 even call clobbered ones. If this is a leaf function
21129 we can just examine the registers used by the RTL, but
21130 otherwise we have to assume that whatever function is
21131 called might clobber anything, and so we have to save
21132 all the call-clobbered registers as well. */
21133 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
21134 /* FIQ handlers have registers r8 - r12 banked, so
21135 we only need to check r0 - r7, Normal ISRs only
21136 bank r14 and r15, so we must check up to r12.
21137 r13 is the stack pointer which is always preserved,
21138 so we do not need to consider it here. */
21139 max_reg = 7;
21140 else
21141 max_reg = 12;
21142
21143 for (reg = 0; reg <= max_reg; reg++)
21144 if (reg_needs_saving_p (reg))
21145 save_reg_mask |= (1 << reg);
21146
21147 /* Also save the pic base register if necessary. */
21148 if (PIC_REGISTER_MAY_NEED_SAVING
21149 && crtl->uses_pic_offset_table)
21150 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21151 }
21152 else if (IS_VOLATILE(func_type))
21153 {
21154 /* For noreturn functions we historically omitted register saves
21155 altogether. However this really messes up debugging. As a
21156 compromise save just the frame pointers. Combined with the link
21157 register saved elsewhere this should be sufficient to get
21158 a backtrace. */
21159 if (frame_pointer_needed)
21160 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21161 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
21162 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21163 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
21164 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
21165 }
21166 else
21167 {
21168 /* In the normal case we only need to save those registers
21169 which are call saved and which are used by this function. */
21170 for (reg = 0; reg <= 11; reg++)
21171 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21172 save_reg_mask |= (1 << reg);
21173
21174 /* Handle the frame pointer as a special case. */
21175 if (frame_pointer_needed)
21176 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21177
21178 /* If we aren't loading the PIC register,
21179 don't stack it even though it may be live. */
21180 if (PIC_REGISTER_MAY_NEED_SAVING
21181 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
21182 || crtl->uses_pic_offset_table))
21183 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21184
21185 /* The prologue will copy SP into R0, so save it. */
21186 if (IS_STACKALIGN (func_type))
21187 save_reg_mask |= 1;
21188 }
21189
21190 /* Save registers so the exception handler can modify them. */
21191 if (crtl->calls_eh_return)
21192 {
21193 unsigned int i;
21194
21195 for (i = 0; ; i++)
21196 {
21197 reg = EH_RETURN_DATA_REGNO (i);
21198 if (reg == INVALID_REGNUM)
21199 break;
21200 save_reg_mask |= 1 << reg;
21201 }
21202 }
21203
21204 return save_reg_mask;
21205 }
21206
21207 /* Return true if r3 is live at the start of the function. */
21208
21209 static bool
21210 arm_r3_live_at_start_p (void)
21211 {
21212 /* Just look at cfg info, which is still close enough to correct at this
21213 point. This gives false positives for broken functions that might use
21214 uninitialized data that happens to be allocated in r3, but who cares? */
21215 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
21216 }
21217
21218 /* Compute the number of bytes used to store the static chain register on the
21219 stack, above the stack frame. We need to know this accurately to get the
21220 alignment of the rest of the stack frame correct. */
21221
21222 static int
21223 arm_compute_static_chain_stack_bytes (void)
21224 {
21225 /* Once the value is updated from the init value of -1, do not
21226 re-compute. */
21227 if (cfun->machine->static_chain_stack_bytes != -1)
21228 return cfun->machine->static_chain_stack_bytes;
21229
21230 /* See the defining assertion in arm_expand_prologue. */
21231 if (IS_NESTED (arm_current_func_type ())
21232 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21233 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21234 || flag_stack_clash_protection)
21235 && !df_regs_ever_live_p (LR_REGNUM)))
21236 && arm_r3_live_at_start_p ()
21237 && crtl->args.pretend_args_size == 0)
21238 return 4;
21239
21240 return 0;
21241 }
21242
21243 /* Compute a bit mask of which core registers need to be
21244 saved on the stack for the current function.
21245 This is used by arm_compute_frame_layout, which may add extra registers. */
21246
21247 static unsigned long
21248 arm_compute_save_core_reg_mask (void)
21249 {
21250 unsigned int save_reg_mask = 0;
21251 unsigned long func_type = arm_current_func_type ();
21252 unsigned int reg;
21253
21254 if (IS_NAKED (func_type))
21255 /* This should never really happen. */
21256 return 0;
21257
21258 /* If we are creating a stack frame, then we must save the frame pointer,
21259 IP (which will hold the old stack pointer), LR and the PC. */
21260 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21261 save_reg_mask |=
21262 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
21263 | (1 << IP_REGNUM)
21264 | (1 << LR_REGNUM)
21265 | (1 << PC_REGNUM);
21266
21267 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
21268
21269 if (arm_current_function_pac_enabled_p ())
21270 save_reg_mask |= 1 << IP_REGNUM;
21271
21272 /* Decide if we need to save the link register.
21273 Interrupt routines have their own banked link register,
21274 so they never need to save it.
21275 Otherwise if we do not use the link register we do not need to save
21276 it. If we are pushing other registers onto the stack however, we
21277 can save an instruction in the epilogue by pushing the link register
21278 now and then popping it back into the PC. This incurs extra memory
21279 accesses though, so we only do it when optimizing for size, and only
21280 if we know that we will not need a fancy return sequence. */
21281 if (df_regs_ever_live_p (LR_REGNUM)
21282 || (save_reg_mask
21283 && optimize_size
21284 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
21285 && !crtl->tail_call_emit
21286 && !crtl->calls_eh_return))
21287 save_reg_mask |= 1 << LR_REGNUM;
21288
21289 if (cfun->machine->lr_save_eliminated)
21290 save_reg_mask &= ~ (1 << LR_REGNUM);
21291
21292 if (TARGET_REALLY_IWMMXT
21293 && ((bit_count (save_reg_mask)
21294 + ARM_NUM_INTS (crtl->args.pretend_args_size +
21295 arm_compute_static_chain_stack_bytes())
21296 ) % 2) != 0)
21297 {
21298 /* The total number of registers that are going to be pushed
21299 onto the stack is odd. We need to ensure that the stack
21300 is 64-bit aligned before we start to save iWMMXt registers,
21301 and also before we start to create locals. (A local variable
21302 might be a double or long long which we will load/store using
21303 an iWMMXt instruction). Therefore we need to push another
21304 ARM register, so that the stack will be 64-bit aligned. We
21305 try to avoid using the arg registers (r0 -r3) as they might be
21306 used to pass values in a tail call. */
21307 for (reg = 4; reg <= 12; reg++)
21308 if ((save_reg_mask & (1 << reg)) == 0)
21309 break;
21310
21311 if (reg <= 12)
21312 save_reg_mask |= (1 << reg);
21313 else
21314 {
21315 cfun->machine->sibcall_blocked = 1;
21316 save_reg_mask |= (1 << 3);
21317 }
21318 }
21319
21320 /* We may need to push an additional register for use initializing the
21321 PIC base register. */
21322 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21323 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21324 {
21325 reg = thumb_find_work_register (1 << 4);
21326 if (!call_used_or_fixed_reg_p (reg))
21327 save_reg_mask |= (1 << reg);
21328 }
21329
21330 return save_reg_mask;
21331 }
21332
21333 /* Compute a bit mask of which core registers need to be
21334 saved on the stack for the current function. */
21335 static unsigned long
21336 thumb1_compute_save_core_reg_mask (void)
21337 {
21338 unsigned long mask;
21339 unsigned reg;
21340
21341 mask = 0;
21342 for (reg = 0; reg < 12; reg ++)
21343 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21344 mask |= 1 << reg;
21345
21346 /* Handle the frame pointer as a special case. */
21347 if (frame_pointer_needed)
21348 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21349
21350 if (flag_pic
21351 && !TARGET_SINGLE_PIC_BASE
21352 && arm_pic_register != INVALID_REGNUM
21353 && crtl->uses_pic_offset_table)
21354 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21355
21356 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21357 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21358 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21359
21360 /* LR will also be pushed if any lo regs are pushed. */
21361 if (mask & 0xff || thumb_force_lr_save ())
21362 mask |= (1 << LR_REGNUM);
21363
21364 bool call_clobbered_scratch
21365 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21366 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21367
21368 /* Make sure we have a low work register if we need one. We will
21369 need one if we are going to push a high register, but we are not
21370 currently intending to push a low register. However if both the
21371 prologue and epilogue have a spare call-clobbered low register,
21372 then we won't need to find an additional work register. It does
21373 not need to be the same register in the prologue and
21374 epilogue. */
21375 if ((mask & 0xff) == 0
21376 && !call_clobbered_scratch
21377 && ((mask & 0x0f00) || TARGET_BACKTRACE))
21378 {
21379 /* Use thumb_find_work_register to choose which register
21380 we will use. If the register is live then we will
21381 have to push it. Use LAST_LO_REGNUM as our fallback
21382 choice for the register to select. */
21383 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21384 /* Make sure the register returned by thumb_find_work_register is
21385 not part of the return value. */
21386 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21387 reg = LAST_LO_REGNUM;
21388
21389 if (callee_saved_reg_p (reg))
21390 mask |= 1 << reg;
21391 }
21392
21393 /* The 504 below is 8 bytes less than 512 because there are two possible
21394 alignment words. We can't tell here if they will be present or not so we
21395 have to play it safe and assume that they are. */
21396 if ((CALLER_INTERWORKING_SLOT_SIZE +
21397 ROUND_UP_WORD (get_frame_size ()) +
21398 crtl->outgoing_args_size) >= 504)
21399 {
21400 /* This is the same as the code in thumb1_expand_prologue() which
21401 determines which register to use for stack decrement. */
21402 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21403 if (mask & (1 << reg))
21404 break;
21405
21406 if (reg > LAST_LO_REGNUM)
21407 {
21408 /* Make sure we have a register available for stack decrement. */
21409 mask |= 1 << LAST_LO_REGNUM;
21410 }
21411 }
21412
21413 return mask;
21414 }
21415
21416 /* Return the number of bytes required to save VFP registers. */
21417 static int
21418 arm_get_vfp_saved_size (void)
21419 {
21420 unsigned int regno;
21421 int count;
21422 int saved;
21423
21424 saved = 0;
21425 /* Space for saved VFP registers. */
21426 if (TARGET_VFP_BASE)
21427 {
21428 count = 0;
21429 for (regno = FIRST_VFP_REGNUM;
21430 regno < LAST_VFP_REGNUM;
21431 regno += 2)
21432 {
21433 if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21434 {
21435 if (count > 0)
21436 {
21437 /* Workaround ARM10 VFPr1 bug. */
21438 if (count == 2 && !arm_arch6)
21439 count++;
21440 saved += count * 8;
21441 }
21442 count = 0;
21443 }
21444 else
21445 count++;
21446 }
21447 if (count > 0)
21448 {
21449 if (count == 2 && !arm_arch6)
21450 count++;
21451 saved += count * 8;
21452 }
21453 }
21454 return saved;
21455 }
21456
21457
21458 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21459 everything bar the final return instruction. If simple_return is true,
21460 then do not output epilogue, because it has already been emitted in RTL.
21461
21462 Note: do not forget to update length attribute of corresponding insn pattern
21463 when changing assembly output (eg. length attribute of
21464 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21465 register clearing sequences). */
21466 const char *
21467 output_return_instruction (rtx operand, bool really_return, bool reverse,
21468 bool simple_return)
21469 {
21470 char conditional[10];
21471 char instr[100];
21472 unsigned reg;
21473 unsigned long live_regs_mask;
21474 unsigned long func_type;
21475 arm_stack_offsets *offsets;
21476
21477 func_type = arm_current_func_type ();
21478
21479 if (IS_NAKED (func_type))
21480 return "";
21481
21482 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21483 {
21484 /* If this function was declared non-returning, and we have
21485 found a tail call, then we have to trust that the called
21486 function won't return. */
21487 if (really_return)
21488 {
21489 rtx ops[2];
21490
21491 /* Otherwise, trap an attempted return by aborting. */
21492 ops[0] = operand;
21493 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21494 : "abort");
21495 assemble_external_libcall (ops[1]);
21496 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21497 }
21498
21499 return "";
21500 }
21501
21502 gcc_assert (!cfun->calls_alloca || really_return);
21503
21504 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21505
21506 cfun->machine->return_used_this_function = 1;
21507
21508 offsets = arm_get_frame_offsets ();
21509 live_regs_mask = offsets->saved_regs_mask;
21510
21511 if (!simple_return && live_regs_mask)
21512 {
21513 const char * return_reg;
21514
21515 /* If we do not have any special requirements for function exit
21516 (e.g. interworking) then we can load the return address
21517 directly into the PC. Otherwise we must load it into LR. */
21518 if (really_return
21519 && !IS_CMSE_ENTRY (func_type)
21520 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21521 return_reg = reg_names[PC_REGNUM];
21522 else
21523 return_reg = reg_names[LR_REGNUM];
21524
21525 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21526 {
21527 /* There are three possible reasons for the IP register
21528 being saved. 1) a stack frame was created, in which case
21529 IP contains the old stack pointer, or 2) an ISR routine
21530 corrupted it, or 3) it was saved to align the stack on
21531 iWMMXt. In case 1, restore IP into SP, otherwise just
21532 restore IP. */
21533 if (frame_pointer_needed)
21534 {
21535 live_regs_mask &= ~ (1 << IP_REGNUM);
21536 live_regs_mask |= (1 << SP_REGNUM);
21537 }
21538 else
21539 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21540 }
21541
21542 /* On some ARM architectures it is faster to use LDR rather than
21543 LDM to load a single register. On other architectures, the
21544 cost is the same. In 26 bit mode, or for exception handlers,
21545 we have to use LDM to load the PC so that the CPSR is also
21546 restored. */
21547 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21548 if (live_regs_mask == (1U << reg))
21549 break;
21550
21551 if (reg <= LAST_ARM_REGNUM
21552 && (reg != LR_REGNUM
21553 || ! really_return
21554 || ! IS_INTERRUPT (func_type)))
21555 {
21556 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21557 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21558 }
21559 else
21560 {
21561 char *p;
21562 int first = 1;
21563
21564 /* Generate the load multiple instruction to restore the
21565 registers. Note we can get here, even if
21566 frame_pointer_needed is true, but only if sp already
21567 points to the base of the saved core registers. */
21568 if (live_regs_mask & (1 << SP_REGNUM))
21569 {
21570 unsigned HOST_WIDE_INT stack_adjust;
21571
21572 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21573 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21574
21575 if (stack_adjust && arm_arch5t && TARGET_ARM)
21576 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21577 else
21578 {
21579 /* If we can't use ldmib (SA110 bug),
21580 then try to pop r3 instead. */
21581 if (stack_adjust)
21582 live_regs_mask |= 1 << 3;
21583
21584 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21585 }
21586 }
21587 /* For interrupt returns we have to use an LDM rather than
21588 a POP so that we can use the exception return variant. */
21589 else if (IS_INTERRUPT (func_type))
21590 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21591 else
21592 sprintf (instr, "pop%s\t{", conditional);
21593
21594 p = instr + strlen (instr);
21595
21596 for (reg = 0; reg <= SP_REGNUM; reg++)
21597 if (live_regs_mask & (1 << reg))
21598 {
21599 int l = strlen (reg_names[reg]);
21600
21601 if (first)
21602 first = 0;
21603 else
21604 {
21605 memcpy (p, ", ", 2);
21606 p += 2;
21607 }
21608
21609 memcpy (p, "%|", 2);
21610 memcpy (p + 2, reg_names[reg], l);
21611 p += l + 2;
21612 }
21613
21614 if (live_regs_mask & (1 << LR_REGNUM))
21615 {
21616 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21617 /* If returning from an interrupt, restore the CPSR. */
21618 if (IS_INTERRUPT (func_type))
21619 strcat (p, "^");
21620 }
21621 else
21622 strcpy (p, "}");
21623 }
21624
21625 output_asm_insn (instr, & operand);
21626
21627 /* See if we need to generate an extra instruction to
21628 perform the actual function return. */
21629 if (really_return
21630 && func_type != ARM_FT_INTERWORKED
21631 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21632 {
21633 /* The return has already been handled
21634 by loading the LR into the PC. */
21635 return "";
21636 }
21637 }
21638
21639 if (really_return)
21640 {
21641 switch ((int) ARM_FUNC_TYPE (func_type))
21642 {
21643 case ARM_FT_ISR:
21644 case ARM_FT_FIQ:
21645 /* ??? This is wrong for unified assembly syntax. */
21646 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21647 break;
21648
21649 case ARM_FT_INTERWORKED:
21650 gcc_assert (arm_arch5t || arm_arch4t);
21651 sprintf (instr, "bx%s\t%%|lr", conditional);
21652 break;
21653
21654 case ARM_FT_EXCEPTION:
21655 /* ??? This is wrong for unified assembly syntax. */
21656 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21657 break;
21658
21659 default:
21660 if (IS_CMSE_ENTRY (func_type))
21661 {
21662 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21663 emitted by cmse_nonsecure_entry_clear_before_return () and the
21664 VSTR/VLDR instructions in the prologue and epilogue. */
21665 if (!TARGET_HAVE_FPCXT_CMSE)
21666 {
21667 /* Check if we have to clear the 'GE bits' which is only used if
21668 parallel add and subtraction instructions are available. */
21669 if (TARGET_INT_SIMD)
21670 snprintf (instr, sizeof (instr),
21671 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21672 else
21673 snprintf (instr, sizeof (instr),
21674 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21675
21676 output_asm_insn (instr, & operand);
21677 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21678 care of it. */
21679 if (TARGET_HARD_FLOAT)
21680 {
21681 /* Clear the cumulative exception-status bits (0-4,7) and
21682 the condition code bits (28-31) of the FPSCR. We need
21683 to remember to clear the first scratch register used
21684 (IP) and save and restore the second (r4).
21685
21686 Important note: the length of the
21687 thumb2_cmse_entry_return insn pattern must account for
21688 the size of the below instructions. */
21689 output_asm_insn ("push\t{%|r4}", & operand);
21690 output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21691 output_asm_insn ("movw\t%|r4, #65376", & operand);
21692 output_asm_insn ("movt\t%|r4, #4095", & operand);
21693 output_asm_insn ("and\t%|ip, %|r4", & operand);
21694 output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21695 output_asm_insn ("pop\t{%|r4}", & operand);
21696 output_asm_insn ("mov\t%|ip, %|lr", & operand);
21697 }
21698 }
21699 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21700 }
21701 /* Use bx if it's available. */
21702 else if (arm_arch5t || arm_arch4t)
21703 sprintf (instr, "bx%s\t%%|lr", conditional);
21704 else
21705 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21706 break;
21707 }
21708
21709 output_asm_insn (instr, & operand);
21710 }
21711
21712 return "";
21713 }
21714
21715 /* Output in FILE asm statements needed to declare the NAME of the function
21716 defined by its DECL node. */
21717
21718 void
21719 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21720 {
21721 size_t cmse_name_len;
21722 char *cmse_name = 0;
21723 char cmse_prefix[] = "__acle_se_";
21724
21725 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21726 extra function label for each function with the 'cmse_nonsecure_entry'
21727 attribute. This extra function label should be prepended with
21728 '__acle_se_', telling the linker that it needs to create secure gateway
21729 veneers for this function. */
21730 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21731 DECL_ATTRIBUTES (decl)))
21732 {
21733 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21734 cmse_name = XALLOCAVEC (char, cmse_name_len);
21735 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21736 targetm.asm_out.globalize_label (file, cmse_name);
21737
21738 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21739 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21740 }
21741
21742 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21743 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21744 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21745 ASM_OUTPUT_LABEL (file, name);
21746
21747 if (cmse_name)
21748 ASM_OUTPUT_LABEL (file, cmse_name);
21749
21750 ARM_OUTPUT_FN_UNWIND (file, TRUE);
21751 }
21752
21753 /* Write the function name into the code section, directly preceding
21754 the function prologue.
21755
21756 Code will be output similar to this:
21757 t0
21758 .ascii "arm_poke_function_name", 0
21759 .align
21760 t1
21761 .word 0xff000000 + (t1 - t0)
21762 arm_poke_function_name
21763 mov ip, sp
21764 stmfd sp!, {fp, ip, lr, pc}
21765 sub fp, ip, #4
21766
21767 When performing a stack backtrace, code can inspect the value
21768 of 'pc' stored at 'fp' + 0. If the trace function then looks
21769 at location pc - 12 and the top 8 bits are set, then we know
21770 that there is a function name embedded immediately preceding this
21771 location and has length ((pc[-3]) & 0xff000000).
21772
21773 We assume that pc is declared as a pointer to an unsigned long.
21774
21775 It is of no benefit to output the function name if we are assembling
21776 a leaf function. These function types will not contain a stack
21777 backtrace structure, therefore it is not possible to determine the
21778 function name. */
21779 void
21780 arm_poke_function_name (FILE *stream, const char *name)
21781 {
21782 unsigned long alignlength;
21783 unsigned long length;
21784 rtx x;
21785
21786 length = strlen (name) + 1;
21787 alignlength = ROUND_UP_WORD (length);
21788
21789 ASM_OUTPUT_ASCII (stream, name, length);
21790 ASM_OUTPUT_ALIGN (stream, 2);
21791 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21792 assemble_aligned_integer (UNITS_PER_WORD, x);
21793 }
21794
21795 /* Place some comments into the assembler stream
21796 describing the current function. */
21797 static void
21798 arm_output_function_prologue (FILE *f)
21799 {
21800 unsigned long func_type;
21801
21802 /* Sanity check. */
21803 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21804
21805 func_type = arm_current_func_type ();
21806
21807 switch ((int) ARM_FUNC_TYPE (func_type))
21808 {
21809 default:
21810 case ARM_FT_NORMAL:
21811 break;
21812 case ARM_FT_INTERWORKED:
21813 asm_fprintf (f, "\t%@ Function supports interworking.\n");
21814 break;
21815 case ARM_FT_ISR:
21816 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21817 break;
21818 case ARM_FT_FIQ:
21819 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21820 break;
21821 case ARM_FT_EXCEPTION:
21822 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21823 break;
21824 }
21825
21826 if (IS_NAKED (func_type))
21827 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21828
21829 if (IS_VOLATILE (func_type))
21830 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21831
21832 if (IS_NESTED (func_type))
21833 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21834 if (IS_STACKALIGN (func_type))
21835 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21836 if (IS_CMSE_ENTRY (func_type))
21837 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21838
21839 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21840 (HOST_WIDE_INT) crtl->args.size,
21841 crtl->args.pretend_args_size,
21842 (HOST_WIDE_INT) get_frame_size ());
21843
21844 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21845 frame_pointer_needed,
21846 cfun->machine->uses_anonymous_args);
21847
21848 if (cfun->machine->lr_save_eliminated)
21849 asm_fprintf (f, "\t%@ link register save eliminated.\n");
21850
21851 if (crtl->calls_eh_return)
21852 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21853
21854 }
21855
21856 static void
21857 arm_output_function_epilogue (FILE *)
21858 {
21859 arm_stack_offsets *offsets;
21860
21861 if (TARGET_THUMB1)
21862 {
21863 int regno;
21864
21865 /* Emit any call-via-reg trampolines that are needed for v4t support
21866 of call_reg and call_value_reg type insns. */
21867 for (regno = 0; regno < LR_REGNUM; regno++)
21868 {
21869 rtx label = cfun->machine->call_via[regno];
21870
21871 if (label != NULL)
21872 {
21873 switch_to_section (function_section (current_function_decl));
21874 targetm.asm_out.internal_label (asm_out_file, "L",
21875 CODE_LABEL_NUMBER (label));
21876 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21877 }
21878 }
21879
21880 /* ??? Probably not safe to set this here, since it assumes that a
21881 function will be emitted as assembly immediately after we generate
21882 RTL for it. This does not happen for inline functions. */
21883 cfun->machine->return_used_this_function = 0;
21884 }
21885 else /* TARGET_32BIT */
21886 {
21887 /* We need to take into account any stack-frame rounding. */
21888 offsets = arm_get_frame_offsets ();
21889
21890 gcc_assert (!use_return_insn (FALSE, NULL)
21891 || (cfun->machine->return_used_this_function != 0)
21892 || offsets->saved_regs == offsets->outgoing_args
21893 || frame_pointer_needed);
21894 }
21895 }
21896
21897 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21898 STR and STRD. If an even number of registers are being pushed, one
21899 or more STRD patterns are created for each register pair. If an
21900 odd number of registers are pushed, emit an initial STR followed by
21901 as many STRD instructions as are needed. This works best when the
21902 stack is initially 64-bit aligned (the normal case), since it
21903 ensures that each STRD is also 64-bit aligned. */
21904 static void
21905 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21906 {
21907 int num_regs = 0;
21908 int i;
21909 int regno;
21910 rtx par = NULL_RTX;
21911 rtx dwarf = NULL_RTX;
21912 rtx tmp;
21913 bool first = true;
21914
21915 num_regs = bit_count (saved_regs_mask);
21916
21917 /* Must be at least one register to save, and can't save SP or PC. */
21918 gcc_assert (num_regs > 0 && num_regs <= 14);
21919 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21920 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21921
21922 /* Create sequence for DWARF info. All the frame-related data for
21923 debugging is held in this wrapper. */
21924 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21925
21926 /* Describe the stack adjustment. */
21927 tmp = gen_rtx_SET (stack_pointer_rtx,
21928 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21929 RTX_FRAME_RELATED_P (tmp) = 1;
21930 XVECEXP (dwarf, 0, 0) = tmp;
21931
21932 /* Find the first register. */
21933 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21934 ;
21935
21936 i = 0;
21937
21938 /* If there's an odd number of registers to push. Start off by
21939 pushing a single register. This ensures that subsequent strd
21940 operations are dword aligned (assuming that SP was originally
21941 64-bit aligned). */
21942 if ((num_regs & 1) != 0)
21943 {
21944 rtx reg, mem, insn;
21945
21946 reg = gen_rtx_REG (SImode, regno);
21947 if (num_regs == 1)
21948 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
21949 stack_pointer_rtx));
21950 else
21951 mem = gen_frame_mem (Pmode,
21952 gen_rtx_PRE_MODIFY
21953 (Pmode, stack_pointer_rtx,
21954 plus_constant (Pmode, stack_pointer_rtx,
21955 -4 * num_regs)));
21956
21957 tmp = gen_rtx_SET (mem, reg);
21958 RTX_FRAME_RELATED_P (tmp) = 1;
21959 insn = emit_insn (tmp);
21960 RTX_FRAME_RELATED_P (insn) = 1;
21961 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21962 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
21963 RTX_FRAME_RELATED_P (tmp) = 1;
21964 i++;
21965 regno++;
21966 XVECEXP (dwarf, 0, i) = tmp;
21967 first = false;
21968 }
21969
21970 while (i < num_regs)
21971 if (saved_regs_mask & (1 << regno))
21972 {
21973 rtx reg1, reg2, mem1, mem2;
21974 rtx tmp0, tmp1, tmp2;
21975 int regno2;
21976
21977 /* Find the register to pair with this one. */
21978 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
21979 regno2++)
21980 ;
21981
21982 reg1 = gen_rtx_REG (SImode, regno);
21983 reg2 = gen_rtx_REG (SImode, regno2);
21984
21985 if (first)
21986 {
21987 rtx insn;
21988
21989 first = false;
21990 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21991 stack_pointer_rtx,
21992 -4 * num_regs));
21993 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21994 stack_pointer_rtx,
21995 -4 * (num_regs - 1)));
21996 tmp0 = gen_rtx_SET (stack_pointer_rtx,
21997 plus_constant (Pmode, stack_pointer_rtx,
21998 -4 * (num_regs)));
21999 tmp1 = gen_rtx_SET (mem1, reg1);
22000 tmp2 = gen_rtx_SET (mem2, reg2);
22001 RTX_FRAME_RELATED_P (tmp0) = 1;
22002 RTX_FRAME_RELATED_P (tmp1) = 1;
22003 RTX_FRAME_RELATED_P (tmp2) = 1;
22004 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
22005 XVECEXP (par, 0, 0) = tmp0;
22006 XVECEXP (par, 0, 1) = tmp1;
22007 XVECEXP (par, 0, 2) = tmp2;
22008 insn = emit_insn (par);
22009 RTX_FRAME_RELATED_P (insn) = 1;
22010 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22011 }
22012 else
22013 {
22014 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22015 stack_pointer_rtx,
22016 4 * i));
22017 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22018 stack_pointer_rtx,
22019 4 * (i + 1)));
22020 tmp1 = gen_rtx_SET (mem1, reg1);
22021 tmp2 = gen_rtx_SET (mem2, reg2);
22022 RTX_FRAME_RELATED_P (tmp1) = 1;
22023 RTX_FRAME_RELATED_P (tmp2) = 1;
22024 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22025 XVECEXP (par, 0, 0) = tmp1;
22026 XVECEXP (par, 0, 1) = tmp2;
22027 emit_insn (par);
22028 }
22029
22030 /* Create unwind information. This is an approximation. */
22031 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
22032 plus_constant (Pmode,
22033 stack_pointer_rtx,
22034 4 * i)),
22035 reg1);
22036 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
22037 plus_constant (Pmode,
22038 stack_pointer_rtx,
22039 4 * (i + 1))),
22040 reg2);
22041
22042 RTX_FRAME_RELATED_P (tmp1) = 1;
22043 RTX_FRAME_RELATED_P (tmp2) = 1;
22044 XVECEXP (dwarf, 0, i + 1) = tmp1;
22045 XVECEXP (dwarf, 0, i + 2) = tmp2;
22046 i += 2;
22047 regno = regno2 + 1;
22048 }
22049 else
22050 regno++;
22051
22052 return;
22053 }
22054
22055 /* STRD in ARM mode requires consecutive registers. This function emits STRD
22056 whenever possible, otherwise it emits single-word stores. The first store
22057 also allocates stack space for all saved registers, using writeback with
22058 post-addressing mode. All other stores use offset addressing. If no STRD
22059 can be emitted, this function emits a sequence of single-word stores,
22060 and not an STM as before, because single-word stores provide more freedom
22061 scheduling and can be turned into an STM by peephole optimizations. */
22062 static void
22063 arm_emit_strd_push (unsigned long saved_regs_mask)
22064 {
22065 int num_regs = 0;
22066 int i, j, dwarf_index = 0;
22067 int offset = 0;
22068 rtx dwarf = NULL_RTX;
22069 rtx insn = NULL_RTX;
22070 rtx tmp, mem;
22071
22072 /* TODO: A more efficient code can be emitted by changing the
22073 layout, e.g., first push all pairs that can use STRD to keep the
22074 stack aligned, and then push all other registers. */
22075 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22076 if (saved_regs_mask & (1 << i))
22077 num_regs++;
22078
22079 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22080 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22081 gcc_assert (num_regs > 0);
22082
22083 /* Create sequence for DWARF info. */
22084 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22085
22086 /* For dwarf info, we generate explicit stack update. */
22087 tmp = gen_rtx_SET (stack_pointer_rtx,
22088 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22089 RTX_FRAME_RELATED_P (tmp) = 1;
22090 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22091
22092 /* Save registers. */
22093 offset = - 4 * num_regs;
22094 j = 0;
22095 while (j <= LAST_ARM_REGNUM)
22096 if (saved_regs_mask & (1 << j))
22097 {
22098 if ((j % 2 == 0)
22099 && (saved_regs_mask & (1 << (j + 1))))
22100 {
22101 /* Current register and previous register form register pair for
22102 which STRD can be generated. */
22103 if (offset < 0)
22104 {
22105 /* Allocate stack space for all saved registers. */
22106 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22107 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22108 mem = gen_frame_mem (DImode, tmp);
22109 offset = 0;
22110 }
22111 else if (offset > 0)
22112 mem = gen_frame_mem (DImode,
22113 plus_constant (Pmode,
22114 stack_pointer_rtx,
22115 offset));
22116 else
22117 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22118
22119 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
22120 RTX_FRAME_RELATED_P (tmp) = 1;
22121 tmp = emit_insn (tmp);
22122
22123 /* Record the first store insn. */
22124 if (dwarf_index == 1)
22125 insn = tmp;
22126
22127 /* Generate dwarf info. */
22128 mem = gen_frame_mem (SImode,
22129 plus_constant (Pmode,
22130 stack_pointer_rtx,
22131 offset));
22132 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22133 RTX_FRAME_RELATED_P (tmp) = 1;
22134 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22135
22136 mem = gen_frame_mem (SImode,
22137 plus_constant (Pmode,
22138 stack_pointer_rtx,
22139 offset + 4));
22140 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
22141 RTX_FRAME_RELATED_P (tmp) = 1;
22142 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22143
22144 offset += 8;
22145 j += 2;
22146 }
22147 else
22148 {
22149 /* Emit a single word store. */
22150 if (offset < 0)
22151 {
22152 /* Allocate stack space for all saved registers. */
22153 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22154 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22155 mem = gen_frame_mem (SImode, tmp);
22156 offset = 0;
22157 }
22158 else if (offset > 0)
22159 mem = gen_frame_mem (SImode,
22160 plus_constant (Pmode,
22161 stack_pointer_rtx,
22162 offset));
22163 else
22164 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22165
22166 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22167 RTX_FRAME_RELATED_P (tmp) = 1;
22168 tmp = emit_insn (tmp);
22169
22170 /* Record the first store insn. */
22171 if (dwarf_index == 1)
22172 insn = tmp;
22173
22174 /* Generate dwarf info. */
22175 mem = gen_frame_mem (SImode,
22176 plus_constant(Pmode,
22177 stack_pointer_rtx,
22178 offset));
22179 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22180 RTX_FRAME_RELATED_P (tmp) = 1;
22181 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22182
22183 offset += 4;
22184 j += 1;
22185 }
22186 }
22187 else
22188 j++;
22189
22190 /* Attach dwarf info to the first insn we generate. */
22191 gcc_assert (insn != NULL_RTX);
22192 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22193 RTX_FRAME_RELATED_P (insn) = 1;
22194 }
22195
22196 /* Generate and emit an insn that we will recognize as a push_multi.
22197 Unfortunately, since this insn does not reflect very well the actual
22198 semantics of the operation, we need to annotate the insn for the benefit
22199 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
22200 MASK for registers that should be annotated for DWARF2 frame unwind
22201 information. */
22202 static rtx
22203 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
22204 {
22205 int num_regs = 0;
22206 int num_dwarf_regs = 0;
22207 int i, j;
22208 rtx par;
22209 rtx dwarf;
22210 int dwarf_par_index;
22211 rtx tmp, reg;
22212
22213 /* We don't record the PC in the dwarf frame information. */
22214 dwarf_regs_mask &= ~(1 << PC_REGNUM);
22215
22216 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22217 {
22218 if (mask & (1 << i))
22219 num_regs++;
22220 if (dwarf_regs_mask & (1 << i))
22221 num_dwarf_regs++;
22222 }
22223
22224 gcc_assert (num_regs && num_regs <= 16);
22225 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
22226
22227 /* For the body of the insn we are going to generate an UNSPEC in
22228 parallel with several USEs. This allows the insn to be recognized
22229 by the push_multi pattern in the arm.md file.
22230
22231 The body of the insn looks something like this:
22232
22233 (parallel [
22234 (set (mem:BLK (pre_modify:SI (reg:SI sp)
22235 (const_int:SI <num>)))
22236 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22237 (use (reg:SI XX))
22238 (use (reg:SI YY))
22239 ...
22240 ])
22241
22242 For the frame note however, we try to be more explicit and actually
22243 show each register being stored into the stack frame, plus a (single)
22244 decrement of the stack pointer. We do it this way in order to be
22245 friendly to the stack unwinding code, which only wants to see a single
22246 stack decrement per instruction. The RTL we generate for the note looks
22247 something like this:
22248
22249 (sequence [
22250 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22251 (set (mem:SI (reg:SI sp)) (reg:SI r4))
22252 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22253 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22254 ...
22255 ])
22256
22257 FIXME:: In an ideal world the PRE_MODIFY would not exist and
22258 instead we'd have a parallel expression detailing all
22259 the stores to the various memory addresses so that debug
22260 information is more up-to-date. Remember however while writing
22261 this to take care of the constraints with the push instruction.
22262
22263 Note also that this has to be taken care of for the VFP registers.
22264
22265 For more see PR43399. */
22266
22267 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
22268 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
22269 dwarf_par_index = 1;
22270
22271 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22272 {
22273 if (mask & (1 << i))
22274 {
22275 reg = gen_rtx_REG (SImode, i);
22276
22277 XVECEXP (par, 0, 0)
22278 = gen_rtx_SET (gen_frame_mem
22279 (BLKmode,
22280 gen_rtx_PRE_MODIFY (Pmode,
22281 stack_pointer_rtx,
22282 plus_constant
22283 (Pmode, stack_pointer_rtx,
22284 -4 * num_regs))
22285 ),
22286 gen_rtx_UNSPEC (BLKmode,
22287 gen_rtvec (1, reg),
22288 UNSPEC_PUSH_MULT));
22289
22290 if (dwarf_regs_mask & (1 << i))
22291 {
22292 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22293 reg);
22294 RTX_FRAME_RELATED_P (tmp) = 1;
22295 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22296 }
22297
22298 break;
22299 }
22300 }
22301
22302 for (j = 1, i++; j < num_regs; i++)
22303 {
22304 if (mask & (1 << i))
22305 {
22306 reg = gen_rtx_REG (SImode, i);
22307
22308 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22309
22310 if (dwarf_regs_mask & (1 << i))
22311 {
22312 tmp
22313 = gen_rtx_SET (gen_frame_mem
22314 (SImode,
22315 plus_constant (Pmode, stack_pointer_rtx,
22316 4 * j)),
22317 reg);
22318 RTX_FRAME_RELATED_P (tmp) = 1;
22319 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22320 }
22321
22322 j++;
22323 }
22324 }
22325
22326 par = emit_insn (par);
22327
22328 tmp = gen_rtx_SET (stack_pointer_rtx,
22329 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22330 RTX_FRAME_RELATED_P (tmp) = 1;
22331 XVECEXP (dwarf, 0, 0) = tmp;
22332
22333 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22334
22335 return par;
22336 }
22337
22338 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22339 SIZE is the offset to be adjusted.
22340 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22341 static void
22342 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22343 {
22344 rtx dwarf;
22345
22346 RTX_FRAME_RELATED_P (insn) = 1;
22347 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22348 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22349 }
22350
22351 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22352 SAVED_REGS_MASK shows which registers need to be restored.
22353
22354 Unfortunately, since this insn does not reflect very well the actual
22355 semantics of the operation, we need to annotate the insn for the benefit
22356 of DWARF2 frame unwind information. */
22357 static void
22358 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22359 {
22360 int num_regs = 0;
22361 int i, j;
22362 rtx par;
22363 rtx dwarf = NULL_RTX;
22364 rtx tmp, reg;
22365 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22366 int offset_adj;
22367 int emit_update;
22368
22369 offset_adj = return_in_pc ? 1 : 0;
22370 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22371 if (saved_regs_mask & (1 << i))
22372 num_regs++;
22373
22374 gcc_assert (num_regs && num_regs <= 16);
22375
22376 /* If SP is in reglist, then we don't emit SP update insn. */
22377 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22378
22379 /* The parallel needs to hold num_regs SETs
22380 and one SET for the stack update. */
22381 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22382
22383 if (return_in_pc)
22384 XVECEXP (par, 0, 0) = ret_rtx;
22385
22386 if (emit_update)
22387 {
22388 /* Increment the stack pointer, based on there being
22389 num_regs 4-byte registers to restore. */
22390 tmp = gen_rtx_SET (stack_pointer_rtx,
22391 plus_constant (Pmode,
22392 stack_pointer_rtx,
22393 4 * num_regs));
22394 RTX_FRAME_RELATED_P (tmp) = 1;
22395 XVECEXP (par, 0, offset_adj) = tmp;
22396 }
22397
22398 /* Now restore every reg, which may include PC. */
22399 for (j = 0, i = 0; j < num_regs; i++)
22400 if (saved_regs_mask & (1 << i))
22401 {
22402 reg = gen_rtx_REG (SImode, i);
22403 if ((num_regs == 1) && emit_update && !return_in_pc)
22404 {
22405 /* Emit single load with writeback. */
22406 tmp = gen_frame_mem (SImode,
22407 gen_rtx_POST_INC (Pmode,
22408 stack_pointer_rtx));
22409 tmp = emit_insn (gen_rtx_SET (reg, tmp));
22410 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22411 return;
22412 }
22413
22414 tmp = gen_rtx_SET (reg,
22415 gen_frame_mem
22416 (SImode,
22417 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22418 RTX_FRAME_RELATED_P (tmp) = 1;
22419 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22420
22421 /* We need to maintain a sequence for DWARF info too. As dwarf info
22422 should not have PC, skip PC. */
22423 if (i != PC_REGNUM)
22424 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22425
22426 j++;
22427 }
22428
22429 if (return_in_pc)
22430 par = emit_jump_insn (par);
22431 else
22432 par = emit_insn (par);
22433
22434 REG_NOTES (par) = dwarf;
22435 if (!return_in_pc)
22436 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22437 stack_pointer_rtx, stack_pointer_rtx);
22438 }
22439
22440 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22441 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22442
22443 Unfortunately, since this insn does not reflect very well the actual
22444 semantics of the operation, we need to annotate the insn for the benefit
22445 of DWARF2 frame unwind information. */
22446 static void
22447 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22448 {
22449 int i, j;
22450 rtx par;
22451 rtx dwarf = NULL_RTX;
22452 rtx tmp, reg;
22453
22454 gcc_assert (num_regs && num_regs <= 32);
22455
22456 /* Workaround ARM10 VFPr1 bug. */
22457 if (num_regs == 2 && !arm_arch6)
22458 {
22459 if (first_reg == 15)
22460 first_reg--;
22461
22462 num_regs++;
22463 }
22464
22465 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22466 there could be up to 32 D-registers to restore.
22467 If there are more than 16 D-registers, make two recursive calls,
22468 each of which emits one pop_multi instruction. */
22469 if (num_regs > 16)
22470 {
22471 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22472 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22473 return;
22474 }
22475
22476 /* The parallel needs to hold num_regs SETs
22477 and one SET for the stack update. */
22478 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22479
22480 /* Increment the stack pointer, based on there being
22481 num_regs 8-byte registers to restore. */
22482 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22483 RTX_FRAME_RELATED_P (tmp) = 1;
22484 XVECEXP (par, 0, 0) = tmp;
22485
22486 /* Now show every reg that will be restored, using a SET for each. */
22487 for (j = 0, i=first_reg; j < num_regs; i += 2)
22488 {
22489 reg = gen_rtx_REG (DFmode, i);
22490
22491 tmp = gen_rtx_SET (reg,
22492 gen_frame_mem
22493 (DFmode,
22494 plus_constant (Pmode, base_reg, 8 * j)));
22495 RTX_FRAME_RELATED_P (tmp) = 1;
22496 XVECEXP (par, 0, j + 1) = tmp;
22497
22498 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22499
22500 j++;
22501 }
22502
22503 par = emit_insn (par);
22504 REG_NOTES (par) = dwarf;
22505
22506 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22507 if (REGNO (base_reg) == IP_REGNUM)
22508 {
22509 RTX_FRAME_RELATED_P (par) = 1;
22510 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22511 }
22512 else
22513 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22514 base_reg, base_reg);
22515 }
22516
22517 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22518 number of registers are being popped, multiple LDRD patterns are created for
22519 all register pairs. If odd number of registers are popped, last register is
22520 loaded by using LDR pattern. */
22521 static void
22522 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22523 {
22524 int num_regs = 0;
22525 int i, j;
22526 rtx par = NULL_RTX;
22527 rtx dwarf = NULL_RTX;
22528 rtx tmp, reg, tmp1;
22529 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22530
22531 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22532 if (saved_regs_mask & (1 << i))
22533 num_regs++;
22534
22535 gcc_assert (num_regs && num_regs <= 16);
22536
22537 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22538 to be popped. So, if num_regs is even, now it will become odd,
22539 and we can generate pop with PC. If num_regs is odd, it will be
22540 even now, and ldr with return can be generated for PC. */
22541 if (return_in_pc)
22542 num_regs--;
22543
22544 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22545
22546 /* Var j iterates over all the registers to gather all the registers in
22547 saved_regs_mask. Var i gives index of saved registers in stack frame.
22548 A PARALLEL RTX of register-pair is created here, so that pattern for
22549 LDRD can be matched. As PC is always last register to be popped, and
22550 we have already decremented num_regs if PC, we don't have to worry
22551 about PC in this loop. */
22552 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22553 if (saved_regs_mask & (1 << j))
22554 {
22555 /* Create RTX for memory load. */
22556 reg = gen_rtx_REG (SImode, j);
22557 tmp = gen_rtx_SET (reg,
22558 gen_frame_mem (SImode,
22559 plus_constant (Pmode,
22560 stack_pointer_rtx, 4 * i)));
22561 RTX_FRAME_RELATED_P (tmp) = 1;
22562
22563 if (i % 2 == 0)
22564 {
22565 /* When saved-register index (i) is even, the RTX to be emitted is
22566 yet to be created. Hence create it first. The LDRD pattern we
22567 are generating is :
22568 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22569 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22570 where target registers need not be consecutive. */
22571 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22572 dwarf = NULL_RTX;
22573 }
22574
22575 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22576 added as 0th element and if i is odd, reg_i is added as 1st element
22577 of LDRD pattern shown above. */
22578 XVECEXP (par, 0, (i % 2)) = tmp;
22579 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22580
22581 if ((i % 2) == 1)
22582 {
22583 /* When saved-register index (i) is odd, RTXs for both the registers
22584 to be loaded are generated in above given LDRD pattern, and the
22585 pattern can be emitted now. */
22586 par = emit_insn (par);
22587 REG_NOTES (par) = dwarf;
22588 RTX_FRAME_RELATED_P (par) = 1;
22589 }
22590
22591 i++;
22592 }
22593
22594 /* If the number of registers pushed is odd AND return_in_pc is false OR
22595 number of registers are even AND return_in_pc is true, last register is
22596 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22597 then LDR with post increment. */
22598
22599 /* Increment the stack pointer, based on there being
22600 num_regs 4-byte registers to restore. */
22601 tmp = gen_rtx_SET (stack_pointer_rtx,
22602 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22603 RTX_FRAME_RELATED_P (tmp) = 1;
22604 tmp = emit_insn (tmp);
22605 if (!return_in_pc)
22606 {
22607 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22608 stack_pointer_rtx, stack_pointer_rtx);
22609 }
22610
22611 dwarf = NULL_RTX;
22612
22613 if (((num_regs % 2) == 1 && !return_in_pc)
22614 || ((num_regs % 2) == 0 && return_in_pc))
22615 {
22616 /* Scan for the single register to be popped. Skip until the saved
22617 register is found. */
22618 for (; (saved_regs_mask & (1 << j)) == 0; j++);
22619
22620 /* Gen LDR with post increment here. */
22621 tmp1 = gen_rtx_MEM (SImode,
22622 gen_rtx_POST_INC (SImode,
22623 stack_pointer_rtx));
22624 set_mem_alias_set (tmp1, get_frame_alias_set ());
22625
22626 reg = gen_rtx_REG (SImode, j);
22627 tmp = gen_rtx_SET (reg, tmp1);
22628 RTX_FRAME_RELATED_P (tmp) = 1;
22629 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22630
22631 if (return_in_pc)
22632 {
22633 /* If return_in_pc, j must be PC_REGNUM. */
22634 gcc_assert (j == PC_REGNUM);
22635 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22636 XVECEXP (par, 0, 0) = ret_rtx;
22637 XVECEXP (par, 0, 1) = tmp;
22638 par = emit_jump_insn (par);
22639 }
22640 else
22641 {
22642 par = emit_insn (tmp);
22643 REG_NOTES (par) = dwarf;
22644 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22645 stack_pointer_rtx, stack_pointer_rtx);
22646 }
22647
22648 }
22649 else if ((num_regs % 2) == 1 && return_in_pc)
22650 {
22651 /* There are 2 registers to be popped. So, generate the pattern
22652 pop_multiple_with_stack_update_and_return to pop in PC. */
22653 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22654 }
22655
22656 return;
22657 }
22658
22659 /* LDRD in ARM mode needs consecutive registers as operands. This function
22660 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22661 offset addressing and then generates one separate stack udpate. This provides
22662 more scheduling freedom, compared to writeback on every load. However,
22663 if the function returns using load into PC directly
22664 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22665 before the last load. TODO: Add a peephole optimization to recognize
22666 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22667 peephole optimization to merge the load at stack-offset zero
22668 with the stack update instruction using load with writeback
22669 in post-index addressing mode. */
22670 static void
22671 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22672 {
22673 int j = 0;
22674 int offset = 0;
22675 rtx par = NULL_RTX;
22676 rtx dwarf = NULL_RTX;
22677 rtx tmp, mem;
22678
22679 /* Restore saved registers. */
22680 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22681 j = 0;
22682 while (j <= LAST_ARM_REGNUM)
22683 if (saved_regs_mask & (1 << j))
22684 {
22685 if ((j % 2) == 0
22686 && (saved_regs_mask & (1 << (j + 1)))
22687 && (j + 1) != PC_REGNUM)
22688 {
22689 /* Current register and next register form register pair for which
22690 LDRD can be generated. PC is always the last register popped, and
22691 we handle it separately. */
22692 if (offset > 0)
22693 mem = gen_frame_mem (DImode,
22694 plus_constant (Pmode,
22695 stack_pointer_rtx,
22696 offset));
22697 else
22698 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22699
22700 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22701 tmp = emit_insn (tmp);
22702 RTX_FRAME_RELATED_P (tmp) = 1;
22703
22704 /* Generate dwarf info. */
22705
22706 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22707 gen_rtx_REG (SImode, j),
22708 NULL_RTX);
22709 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22710 gen_rtx_REG (SImode, j + 1),
22711 dwarf);
22712
22713 REG_NOTES (tmp) = dwarf;
22714
22715 offset += 8;
22716 j += 2;
22717 }
22718 else if (j != PC_REGNUM)
22719 {
22720 /* Emit a single word load. */
22721 if (offset > 0)
22722 mem = gen_frame_mem (SImode,
22723 plus_constant (Pmode,
22724 stack_pointer_rtx,
22725 offset));
22726 else
22727 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22728
22729 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22730 tmp = emit_insn (tmp);
22731 RTX_FRAME_RELATED_P (tmp) = 1;
22732
22733 /* Generate dwarf info. */
22734 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22735 gen_rtx_REG (SImode, j),
22736 NULL_RTX);
22737
22738 offset += 4;
22739 j += 1;
22740 }
22741 else /* j == PC_REGNUM */
22742 j++;
22743 }
22744 else
22745 j++;
22746
22747 /* Update the stack. */
22748 if (offset > 0)
22749 {
22750 tmp = gen_rtx_SET (stack_pointer_rtx,
22751 plus_constant (Pmode,
22752 stack_pointer_rtx,
22753 offset));
22754 tmp = emit_insn (tmp);
22755 arm_add_cfa_adjust_cfa_note (tmp, offset,
22756 stack_pointer_rtx, stack_pointer_rtx);
22757 offset = 0;
22758 }
22759
22760 if (saved_regs_mask & (1 << PC_REGNUM))
22761 {
22762 /* Only PC is to be popped. */
22763 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22764 XVECEXP (par, 0, 0) = ret_rtx;
22765 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22766 gen_frame_mem (SImode,
22767 gen_rtx_POST_INC (SImode,
22768 stack_pointer_rtx)));
22769 RTX_FRAME_RELATED_P (tmp) = 1;
22770 XVECEXP (par, 0, 1) = tmp;
22771 par = emit_jump_insn (par);
22772
22773 /* Generate dwarf info. */
22774 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22775 gen_rtx_REG (SImode, PC_REGNUM),
22776 NULL_RTX);
22777 REG_NOTES (par) = dwarf;
22778 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22779 stack_pointer_rtx, stack_pointer_rtx);
22780 }
22781 }
22782
22783 /* Calculate the size of the return value that is passed in registers. */
22784 static unsigned
22785 arm_size_return_regs (void)
22786 {
22787 machine_mode mode;
22788
22789 if (crtl->return_rtx != 0)
22790 mode = GET_MODE (crtl->return_rtx);
22791 else
22792 mode = DECL_MODE (DECL_RESULT (current_function_decl));
22793
22794 return GET_MODE_SIZE (mode);
22795 }
22796
22797 /* Return true if the current function needs to save/restore LR. */
22798 static bool
22799 thumb_force_lr_save (void)
22800 {
22801 return !cfun->machine->lr_save_eliminated
22802 && (!crtl->is_leaf
22803 || thumb_far_jump_used_p ()
22804 || df_regs_ever_live_p (LR_REGNUM));
22805 }
22806
22807 /* We do not know if r3 will be available because
22808 we do have an indirect tailcall happening in this
22809 particular case. */
22810 static bool
22811 is_indirect_tailcall_p (rtx call)
22812 {
22813 rtx pat = PATTERN (call);
22814
22815 /* Indirect tail call. */
22816 pat = XVECEXP (pat, 0, 0);
22817 if (GET_CODE (pat) == SET)
22818 pat = SET_SRC (pat);
22819
22820 pat = XEXP (XEXP (pat, 0), 0);
22821 return REG_P (pat);
22822 }
22823
22824 /* Return true if r3 is used by any of the tail call insns in the
22825 current function. */
22826 static bool
22827 any_sibcall_could_use_r3 (void)
22828 {
22829 edge_iterator ei;
22830 edge e;
22831
22832 if (!crtl->tail_call_emit)
22833 return false;
22834 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22835 if (e->flags & EDGE_SIBCALL)
22836 {
22837 rtx_insn *call = BB_END (e->src);
22838 if (!CALL_P (call))
22839 call = prev_nonnote_nondebug_insn (call);
22840 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22841 if (find_regno_fusage (call, USE, 3)
22842 || is_indirect_tailcall_p (call))
22843 return true;
22844 }
22845 return false;
22846 }
22847
22848
22849 /* Compute the distance from register FROM to register TO.
22850 These can be the arg pointer (26), the soft frame pointer (25),
22851 the stack pointer (13) or the hard frame pointer (11).
22852 In thumb mode r7 is used as the soft frame pointer, if needed.
22853 Typical stack layout looks like this:
22854
22855 old stack pointer -> | |
22856 ----
22857 | | \
22858 | | saved arguments for
22859 | | vararg functions
22860 | | /
22861 --
22862 hard FP & arg pointer -> | | \
22863 | | stack
22864 | | frame
22865 | | /
22866 --
22867 | | \
22868 | | call saved
22869 | | registers
22870 soft frame pointer -> | | /
22871 --
22872 | | \
22873 | | local
22874 | | variables
22875 locals base pointer -> | | /
22876 --
22877 | | \
22878 | | outgoing
22879 | | arguments
22880 current stack pointer -> | | /
22881 --
22882
22883 For a given function some or all of these stack components
22884 may not be needed, giving rise to the possibility of
22885 eliminating some of the registers.
22886
22887 The values returned by this function must reflect the behavior
22888 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22889
22890 The sign of the number returned reflects the direction of stack
22891 growth, so the values are positive for all eliminations except
22892 from the soft frame pointer to the hard frame pointer.
22893
22894 SFP may point just inside the local variables block to ensure correct
22895 alignment. */
22896
22897
22898 /* Return cached stack offsets. */
22899
22900 static arm_stack_offsets *
22901 arm_get_frame_offsets (void)
22902 {
22903 struct arm_stack_offsets *offsets;
22904
22905 offsets = &cfun->machine->stack_offsets;
22906
22907 return offsets;
22908 }
22909
22910
22911 /* Calculate stack offsets. These are used to calculate register elimination
22912 offsets and in prologue/epilogue code. Also calculates which registers
22913 should be saved. */
22914
22915 static void
22916 arm_compute_frame_layout (void)
22917 {
22918 struct arm_stack_offsets *offsets;
22919 unsigned long func_type;
22920 int saved;
22921 int core_saved;
22922 HOST_WIDE_INT frame_size;
22923 int i;
22924
22925 offsets = &cfun->machine->stack_offsets;
22926
22927 /* Initially this is the size of the local variables. It will translated
22928 into an offset once we have determined the size of preceding data. */
22929 frame_size = ROUND_UP_WORD (get_frame_size ());
22930
22931 /* Space for variadic functions. */
22932 offsets->saved_args = crtl->args.pretend_args_size;
22933
22934 /* In Thumb mode this is incorrect, but never used. */
22935 offsets->frame
22936 = (offsets->saved_args
22937 + arm_compute_static_chain_stack_bytes ()
22938 + (frame_pointer_needed ? 4 : 0));
22939
22940 if (TARGET_32BIT)
22941 {
22942 unsigned int regno;
22943
22944 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
22945 core_saved = bit_count (offsets->saved_regs_mask) * 4;
22946 saved = core_saved;
22947
22948 /* We know that SP will be doubleword aligned on entry, and we must
22949 preserve that condition at any subroutine call. We also require the
22950 soft frame pointer to be doubleword aligned. */
22951
22952 if (TARGET_REALLY_IWMMXT)
22953 {
22954 /* Check for the call-saved iWMMXt registers. */
22955 for (regno = FIRST_IWMMXT_REGNUM;
22956 regno <= LAST_IWMMXT_REGNUM;
22957 regno++)
22958 if (reg_needs_saving_p (regno))
22959 saved += 8;
22960 }
22961
22962 func_type = arm_current_func_type ();
22963 /* Space for saved VFP registers. */
22964 if (! IS_VOLATILE (func_type)
22965 && TARGET_VFP_BASE)
22966 saved += arm_get_vfp_saved_size ();
22967
22968 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
22969 nonecure entry functions with VSTR/VLDR. */
22970 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
22971 saved += 4;
22972 }
22973 else /* TARGET_THUMB1 */
22974 {
22975 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
22976 core_saved = bit_count (offsets->saved_regs_mask) * 4;
22977 saved = core_saved;
22978 if (TARGET_BACKTRACE)
22979 saved += 16;
22980 }
22981
22982 /* Saved registers include the stack frame. */
22983 offsets->saved_regs
22984 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
22985 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
22986
22987 /* A leaf function does not need any stack alignment if it has nothing
22988 on the stack. */
22989 if (crtl->is_leaf && frame_size == 0
22990 /* However if it calls alloca(), we have a dynamically allocated
22991 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
22992 && ! cfun->calls_alloca)
22993 {
22994 offsets->outgoing_args = offsets->soft_frame;
22995 offsets->locals_base = offsets->soft_frame;
22996 return;
22997 }
22998
22999 /* Ensure SFP has the correct alignment. */
23000 if (ARM_DOUBLEWORD_ALIGN
23001 && (offsets->soft_frame & 7))
23002 {
23003 offsets->soft_frame += 4;
23004 /* Try to align stack by pushing an extra reg. Don't bother doing this
23005 when there is a stack frame as the alignment will be rolled into
23006 the normal stack adjustment. */
23007 if (frame_size + crtl->outgoing_args_size == 0)
23008 {
23009 int reg = -1;
23010
23011 /* Register r3 is caller-saved. Normally it does not need to be
23012 saved on entry by the prologue. However if we choose to save
23013 it for padding then we may confuse the compiler into thinking
23014 a prologue sequence is required when in fact it is not. This
23015 will occur when shrink-wrapping if r3 is used as a scratch
23016 register and there are no other callee-saved writes.
23017
23018 This situation can be avoided when other callee-saved registers
23019 are available and r3 is not mandatory if we choose a callee-saved
23020 register for padding. */
23021 bool prefer_callee_reg_p = false;
23022
23023 /* If it is safe to use r3, then do so. This sometimes
23024 generates better code on Thumb-2 by avoiding the need to
23025 use 32-bit push/pop instructions. */
23026 if (! any_sibcall_could_use_r3 ()
23027 && arm_size_return_regs () <= 12
23028 && (offsets->saved_regs_mask & (1 << 3)) == 0
23029 && (TARGET_THUMB2
23030 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
23031 {
23032 reg = 3;
23033 if (!TARGET_THUMB2)
23034 prefer_callee_reg_p = true;
23035 }
23036 if (reg == -1
23037 || prefer_callee_reg_p)
23038 {
23039 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
23040 {
23041 /* Avoid fixed registers; they may be changed at
23042 arbitrary times so it's unsafe to restore them
23043 during the epilogue. */
23044 if (!fixed_regs[i]
23045 && (offsets->saved_regs_mask & (1 << i)) == 0)
23046 {
23047 reg = i;
23048 break;
23049 }
23050 }
23051 }
23052
23053 if (reg != -1)
23054 {
23055 offsets->saved_regs += 4;
23056 offsets->saved_regs_mask |= (1 << reg);
23057 }
23058 }
23059 }
23060
23061 offsets->locals_base = offsets->soft_frame + frame_size;
23062 offsets->outgoing_args = (offsets->locals_base
23063 + crtl->outgoing_args_size);
23064
23065 if (ARM_DOUBLEWORD_ALIGN)
23066 {
23067 /* Ensure SP remains doubleword aligned. */
23068 if (offsets->outgoing_args & 7)
23069 offsets->outgoing_args += 4;
23070 gcc_assert (!(offsets->outgoing_args & 7));
23071 }
23072 }
23073
23074
23075 /* Calculate the relative offsets for the different stack pointers. Positive
23076 offsets are in the direction of stack growth. */
23077
23078 HOST_WIDE_INT
23079 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23080 {
23081 arm_stack_offsets *offsets;
23082
23083 offsets = arm_get_frame_offsets ();
23084
23085 /* OK, now we have enough information to compute the distances.
23086 There must be an entry in these switch tables for each pair
23087 of registers in ELIMINABLE_REGS, even if some of the entries
23088 seem to be redundant or useless. */
23089 switch (from)
23090 {
23091 case ARG_POINTER_REGNUM:
23092 switch (to)
23093 {
23094 case THUMB_HARD_FRAME_POINTER_REGNUM:
23095 return 0;
23096
23097 case FRAME_POINTER_REGNUM:
23098 /* This is the reverse of the soft frame pointer
23099 to hard frame pointer elimination below. */
23100 return offsets->soft_frame - offsets->saved_args;
23101
23102 case ARM_HARD_FRAME_POINTER_REGNUM:
23103 /* This is only non-zero in the case where the static chain register
23104 is stored above the frame. */
23105 return offsets->frame - offsets->saved_args - 4;
23106
23107 case STACK_POINTER_REGNUM:
23108 /* If nothing has been pushed on the stack at all
23109 then this will return -4. This *is* correct! */
23110 return offsets->outgoing_args - (offsets->saved_args + 4);
23111
23112 default:
23113 gcc_unreachable ();
23114 }
23115 gcc_unreachable ();
23116
23117 case FRAME_POINTER_REGNUM:
23118 switch (to)
23119 {
23120 case THUMB_HARD_FRAME_POINTER_REGNUM:
23121 return 0;
23122
23123 case ARM_HARD_FRAME_POINTER_REGNUM:
23124 /* The hard frame pointer points to the top entry in the
23125 stack frame. The soft frame pointer to the bottom entry
23126 in the stack frame. If there is no stack frame at all,
23127 then they are identical. */
23128
23129 return offsets->frame - offsets->soft_frame;
23130
23131 case STACK_POINTER_REGNUM:
23132 return offsets->outgoing_args - offsets->soft_frame;
23133
23134 default:
23135 gcc_unreachable ();
23136 }
23137 gcc_unreachable ();
23138
23139 default:
23140 /* You cannot eliminate from the stack pointer.
23141 In theory you could eliminate from the hard frame
23142 pointer to the stack pointer, but this will never
23143 happen, since if a stack frame is not needed the
23144 hard frame pointer will never be used. */
23145 gcc_unreachable ();
23146 }
23147 }
23148
23149 /* Given FROM and TO register numbers, say whether this elimination is
23150 allowed. Frame pointer elimination is automatically handled.
23151
23152 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
23153 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
23154 pointer, we must eliminate FRAME_POINTER_REGNUM into
23155 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23156 ARG_POINTER_REGNUM. */
23157
23158 bool
23159 arm_can_eliminate (const int from, const int to)
23160 {
23161 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
23162 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
23163 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
23164 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
23165 true);
23166 }
23167
23168 /* Emit RTL to save coprocessor registers on function entry. Returns the
23169 number of bytes pushed. */
23170
23171 static int
23172 arm_save_coproc_regs(void)
23173 {
23174 int saved_size = 0;
23175 unsigned reg;
23176 unsigned start_reg;
23177 rtx insn;
23178
23179 if (TARGET_REALLY_IWMMXT)
23180 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
23181 if (reg_needs_saving_p (reg))
23182 {
23183 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23184 insn = gen_rtx_MEM (V2SImode, insn);
23185 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
23186 RTX_FRAME_RELATED_P (insn) = 1;
23187 saved_size += 8;
23188 }
23189
23190 if (TARGET_VFP_BASE)
23191 {
23192 start_reg = FIRST_VFP_REGNUM;
23193
23194 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
23195 {
23196 if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
23197 {
23198 if (start_reg != reg)
23199 saved_size += vfp_emit_fstmd (start_reg,
23200 (reg - start_reg) / 2);
23201 start_reg = reg + 2;
23202 }
23203 }
23204 if (start_reg != reg)
23205 saved_size += vfp_emit_fstmd (start_reg,
23206 (reg - start_reg) / 2);
23207 }
23208 return saved_size;
23209 }
23210
23211
23212 /* Set the Thumb frame pointer from the stack pointer. */
23213
23214 static void
23215 thumb_set_frame_pointer (arm_stack_offsets *offsets)
23216 {
23217 HOST_WIDE_INT amount;
23218 rtx insn, dwarf;
23219
23220 amount = offsets->outgoing_args - offsets->locals_base;
23221 if (amount < 1024)
23222 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23223 stack_pointer_rtx, GEN_INT (amount)));
23224 else
23225 {
23226 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
23227 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
23228 expects the first two operands to be the same. */
23229 if (TARGET_THUMB2)
23230 {
23231 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23232 stack_pointer_rtx,
23233 hard_frame_pointer_rtx));
23234 }
23235 else
23236 {
23237 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23238 hard_frame_pointer_rtx,
23239 stack_pointer_rtx));
23240 }
23241 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
23242 plus_constant (Pmode, stack_pointer_rtx, amount));
23243 RTX_FRAME_RELATED_P (dwarf) = 1;
23244 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23245 }
23246
23247 RTX_FRAME_RELATED_P (insn) = 1;
23248 }
23249
23250 struct scratch_reg {
23251 rtx reg;
23252 bool saved;
23253 };
23254
23255 /* Return a short-lived scratch register for use as a 2nd scratch register on
23256 function entry after the registers are saved in the prologue. This register
23257 must be released by means of release_scratch_register_on_entry. IP is not
23258 considered since it is always used as the 1st scratch register if available.
23259
23260 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23261 mask of live registers. */
23262
23263 static void
23264 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
23265 unsigned long live_regs)
23266 {
23267 int regno = -1;
23268
23269 sr->saved = false;
23270
23271 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
23272 regno = LR_REGNUM;
23273 else
23274 {
23275 unsigned int i;
23276
23277 for (i = 4; i < 11; i++)
23278 if (regno1 != i && (live_regs & (1 << i)) != 0)
23279 {
23280 regno = i;
23281 break;
23282 }
23283
23284 if (regno < 0)
23285 {
23286 /* If IP is used as the 1st scratch register for a nested function,
23287 then either r3 wasn't available or is used to preserve IP. */
23288 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
23289 regno1 = 3;
23290 regno = (regno1 == 3 ? 2 : 3);
23291 sr->saved
23292 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23293 regno);
23294 }
23295 }
23296
23297 sr->reg = gen_rtx_REG (SImode, regno);
23298 if (sr->saved)
23299 {
23300 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23301 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23302 rtx x = gen_rtx_SET (stack_pointer_rtx,
23303 plus_constant (Pmode, stack_pointer_rtx, -4));
23304 RTX_FRAME_RELATED_P (insn) = 1;
23305 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23306 }
23307 }
23308
23309 /* Release a scratch register obtained from the preceding function. */
23310
23311 static void
23312 release_scratch_register_on_entry (struct scratch_reg *sr)
23313 {
23314 if (sr->saved)
23315 {
23316 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23317 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23318 rtx x = gen_rtx_SET (stack_pointer_rtx,
23319 plus_constant (Pmode, stack_pointer_rtx, 4));
23320 RTX_FRAME_RELATED_P (insn) = 1;
23321 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23322 }
23323 }
23324
23325 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23326
23327 #if PROBE_INTERVAL > 4096
23328 #error Cannot use indexed addressing mode for stack probing
23329 #endif
23330
23331 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23332 inclusive. These are offsets from the current stack pointer. REGNO1
23333 is the index number of the 1st scratch register and LIVE_REGS is the
23334 mask of live registers. */
23335
23336 static void
23337 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23338 unsigned int regno1, unsigned long live_regs)
23339 {
23340 rtx reg1 = gen_rtx_REG (Pmode, regno1);
23341
23342 /* See if we have a constant small number of probes to generate. If so,
23343 that's the easy case. */
23344 if (size <= PROBE_INTERVAL)
23345 {
23346 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23347 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23348 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23349 }
23350
23351 /* The run-time loop is made up of 10 insns in the generic case while the
23352 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23353 else if (size <= 5 * PROBE_INTERVAL)
23354 {
23355 HOST_WIDE_INT i, rem;
23356
23357 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23358 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23359 emit_stack_probe (reg1);
23360
23361 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23362 it exceeds SIZE. If only two probes are needed, this will not
23363 generate any code. Then probe at FIRST + SIZE. */
23364 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23365 {
23366 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23367 emit_stack_probe (reg1);
23368 }
23369
23370 rem = size - (i - PROBE_INTERVAL);
23371 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23372 {
23373 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23374 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23375 }
23376 else
23377 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23378 }
23379
23380 /* Otherwise, do the same as above, but in a loop. Note that we must be
23381 extra careful with variables wrapping around because we might be at
23382 the very top (or the very bottom) of the address space and we have
23383 to be able to handle this case properly; in particular, we use an
23384 equality test for the loop condition. */
23385 else
23386 {
23387 HOST_WIDE_INT rounded_size;
23388 struct scratch_reg sr;
23389
23390 get_scratch_register_on_entry (&sr, regno1, live_regs);
23391
23392 emit_move_insn (reg1, GEN_INT (first));
23393
23394
23395 /* Step 1: round SIZE to the previous multiple of the interval. */
23396
23397 rounded_size = size & -PROBE_INTERVAL;
23398 emit_move_insn (sr.reg, GEN_INT (rounded_size));
23399
23400
23401 /* Step 2: compute initial and final value of the loop counter. */
23402
23403 /* TEST_ADDR = SP + FIRST. */
23404 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23405
23406 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23407 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23408
23409
23410 /* Step 3: the loop
23411
23412 do
23413 {
23414 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23415 probe at TEST_ADDR
23416 }
23417 while (TEST_ADDR != LAST_ADDR)
23418
23419 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23420 until it is equal to ROUNDED_SIZE. */
23421
23422 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23423
23424
23425 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23426 that SIZE is equal to ROUNDED_SIZE. */
23427
23428 if (size != rounded_size)
23429 {
23430 HOST_WIDE_INT rem = size - rounded_size;
23431
23432 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23433 {
23434 emit_set_insn (sr.reg,
23435 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23436 emit_stack_probe (plus_constant (Pmode, sr.reg,
23437 PROBE_INTERVAL - rem));
23438 }
23439 else
23440 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23441 }
23442
23443 release_scratch_register_on_entry (&sr);
23444 }
23445
23446 /* Make sure nothing is scheduled before we are done. */
23447 emit_insn (gen_blockage ());
23448 }
23449
23450 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23451 absolute addresses. */
23452
23453 const char *
23454 output_probe_stack_range (rtx reg1, rtx reg2)
23455 {
23456 static int labelno = 0;
23457 char loop_lab[32];
23458 rtx xops[2];
23459
23460 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23461
23462 /* Loop. */
23463 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23464
23465 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23466 xops[0] = reg1;
23467 xops[1] = GEN_INT (PROBE_INTERVAL);
23468 output_asm_insn ("sub\t%0, %0, %1", xops);
23469
23470 /* Probe at TEST_ADDR. */
23471 output_asm_insn ("str\tr0, [%0, #0]", xops);
23472
23473 /* Test if TEST_ADDR == LAST_ADDR. */
23474 xops[1] = reg2;
23475 output_asm_insn ("cmp\t%0, %1", xops);
23476
23477 /* Branch. */
23478 fputs ("\tbne\t", asm_out_file);
23479 assemble_name_raw (asm_out_file, loop_lab);
23480 fputc ('\n', asm_out_file);
23481
23482 return "";
23483 }
23484
23485 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23486 function. */
23487 void
23488 arm_expand_prologue (void)
23489 {
23490 rtx amount;
23491 rtx insn;
23492 rtx ip_rtx;
23493 unsigned long live_regs_mask;
23494 unsigned long func_type;
23495 int fp_offset = 0;
23496 int saved_pretend_args = 0;
23497 int saved_regs = 0;
23498 unsigned HOST_WIDE_INT args_to_push;
23499 HOST_WIDE_INT size;
23500 arm_stack_offsets *offsets;
23501 bool clobber_ip;
23502
23503 func_type = arm_current_func_type ();
23504
23505 /* Naked functions don't have prologues. */
23506 if (IS_NAKED (func_type))
23507 {
23508 if (flag_stack_usage_info)
23509 current_function_static_stack_size = 0;
23510 return;
23511 }
23512
23513 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23514 args_to_push = crtl->args.pretend_args_size;
23515
23516 /* Compute which register we will have to save onto the stack. */
23517 offsets = arm_get_frame_offsets ();
23518 live_regs_mask = offsets->saved_regs_mask;
23519
23520 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23521
23522 if (IS_STACKALIGN (func_type))
23523 {
23524 rtx r0, r1;
23525
23526 /* Handle a word-aligned stack pointer. We generate the following:
23527
23528 mov r0, sp
23529 bic r1, r0, #7
23530 mov sp, r1
23531 <save and restore r0 in normal prologue/epilogue>
23532 mov sp, r0
23533 bx lr
23534
23535 The unwinder doesn't need to know about the stack realignment.
23536 Just tell it we saved SP in r0. */
23537 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23538
23539 r0 = gen_rtx_REG (SImode, R0_REGNUM);
23540 r1 = gen_rtx_REG (SImode, R1_REGNUM);
23541
23542 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23543 RTX_FRAME_RELATED_P (insn) = 1;
23544 add_reg_note (insn, REG_CFA_REGISTER, NULL);
23545
23546 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23547
23548 /* ??? The CFA changes here, which may cause GDB to conclude that it
23549 has entered a different function. That said, the unwind info is
23550 correct, individually, before and after this instruction because
23551 we've described the save of SP, which will override the default
23552 handling of SP as restoring from the CFA. */
23553 emit_insn (gen_movsi (stack_pointer_rtx, r1));
23554 }
23555
23556 /* Let's compute the static_chain_stack_bytes required and store it. Right
23557 now the value must be -1 as stored by arm_init_machine_status (). */
23558 cfun->machine->static_chain_stack_bytes
23559 = arm_compute_static_chain_stack_bytes ();
23560
23561 /* The static chain register is the same as the IP register. If it is
23562 clobbered when creating the frame, we need to save and restore it. */
23563 clobber_ip = (IS_NESTED (func_type)
23564 && (((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23565 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23566 || flag_stack_clash_protection)
23567 && !df_regs_ever_live_p (LR_REGNUM)
23568 && arm_r3_live_at_start_p ()))
23569 || arm_current_function_pac_enabled_p ()));
23570
23571 /* Find somewhere to store IP whilst the frame is being created.
23572 We try the following places in order:
23573
23574 1. The last argument register r3 if it is available.
23575 2. A slot on the stack above the frame if there are no
23576 arguments to push onto the stack.
23577 3. Register r3 again, after pushing the argument registers
23578 onto the stack, if this is a varargs function.
23579 4. The last slot on the stack created for the arguments to
23580 push, if this isn't a varargs function.
23581
23582 Note - we only need to tell the dwarf2 backend about the SP
23583 adjustment in the second variant; the static chain register
23584 doesn't need to be unwound, as it doesn't contain a value
23585 inherited from the caller. */
23586 if (clobber_ip)
23587 {
23588 if (!arm_r3_live_at_start_p ())
23589 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23590 else if (args_to_push == 0)
23591 {
23592 rtx addr, dwarf;
23593
23594 saved_regs += 4;
23595
23596 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23597 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23598 fp_offset = 4;
23599
23600 /* Just tell the dwarf backend that we adjusted SP. */
23601 dwarf = gen_rtx_SET (stack_pointer_rtx,
23602 plus_constant (Pmode, stack_pointer_rtx,
23603 -fp_offset));
23604 RTX_FRAME_RELATED_P (insn) = 1;
23605 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23606 }
23607 else
23608 {
23609 /* Store the args on the stack. */
23610 if (cfun->machine->uses_anonymous_args)
23611 {
23612 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23613 (0xf0 >> (args_to_push / 4)) & 0xf);
23614 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23615 saved_pretend_args = 1;
23616 }
23617 else
23618 {
23619 rtx addr, dwarf;
23620
23621 if (args_to_push == 4)
23622 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23623 else
23624 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23625 plus_constant (Pmode,
23626 stack_pointer_rtx,
23627 -args_to_push));
23628
23629 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23630
23631 /* Just tell the dwarf backend that we adjusted SP. */
23632 dwarf = gen_rtx_SET (stack_pointer_rtx,
23633 plus_constant (Pmode, stack_pointer_rtx,
23634 -args_to_push));
23635 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23636 }
23637
23638 RTX_FRAME_RELATED_P (insn) = 1;
23639 fp_offset = args_to_push;
23640 args_to_push = 0;
23641 }
23642 }
23643
23644 if (arm_current_function_pac_enabled_p ())
23645 {
23646 /* If IP was clobbered we only emit a PAC instruction as the BTI
23647 one will be added before the push of the clobbered IP (if
23648 necessary) by the bti pass. */
23649 if (aarch_bti_enabled () && !clobber_ip)
23650 emit_insn (gen_pacbti_nop ());
23651 else
23652 emit_insn (gen_pac_nop ());
23653 }
23654
23655 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23656 {
23657 if (IS_INTERRUPT (func_type))
23658 {
23659 /* Interrupt functions must not corrupt any registers.
23660 Creating a frame pointer however, corrupts the IP
23661 register, so we must push it first. */
23662 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23663
23664 /* Do not set RTX_FRAME_RELATED_P on this insn.
23665 The dwarf stack unwinding code only wants to see one
23666 stack decrement per function, and this is not it. If
23667 this instruction is labeled as being part of the frame
23668 creation sequence then dwarf2out_frame_debug_expr will
23669 die when it encounters the assignment of IP to FP
23670 later on, since the use of SP here establishes SP as
23671 the CFA register and not IP.
23672
23673 Anyway this instruction is not really part of the stack
23674 frame creation although it is part of the prologue. */
23675 }
23676
23677 insn = emit_set_insn (ip_rtx,
23678 plus_constant (Pmode, stack_pointer_rtx,
23679 fp_offset));
23680 RTX_FRAME_RELATED_P (insn) = 1;
23681 }
23682
23683 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23684 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23685 {
23686 saved_regs += 4;
23687 insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23688 GEN_INT (FPCXTNS_ENUM)));
23689 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23690 plus_constant (Pmode, stack_pointer_rtx, -4));
23691 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23692 RTX_FRAME_RELATED_P (insn) = 1;
23693 }
23694
23695 if (args_to_push)
23696 {
23697 /* Push the argument registers, or reserve space for them. */
23698 if (cfun->machine->uses_anonymous_args)
23699 insn = emit_multi_reg_push
23700 ((0xf0 >> (args_to_push / 4)) & 0xf,
23701 (0xf0 >> (args_to_push / 4)) & 0xf);
23702 else
23703 insn = emit_insn
23704 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23705 GEN_INT (- args_to_push)));
23706 RTX_FRAME_RELATED_P (insn) = 1;
23707 }
23708
23709 /* If this is an interrupt service routine, and the link register
23710 is going to be pushed, and we're not generating extra
23711 push of IP (needed when frame is needed and frame layout if apcs),
23712 subtracting four from LR now will mean that the function return
23713 can be done with a single instruction. */
23714 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23715 && (live_regs_mask & (1 << LR_REGNUM)) != 0
23716 && !(frame_pointer_needed && TARGET_APCS_FRAME)
23717 && TARGET_ARM)
23718 {
23719 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23720
23721 emit_set_insn (lr, plus_constant (SImode, lr, -4));
23722 }
23723
23724 if (live_regs_mask)
23725 {
23726 unsigned long dwarf_regs_mask = live_regs_mask;
23727
23728 saved_regs += bit_count (live_regs_mask) * 4;
23729 if (optimize_size && !frame_pointer_needed
23730 && saved_regs == offsets->saved_regs - offsets->saved_args)
23731 {
23732 /* If no coprocessor registers are being pushed and we don't have
23733 to worry about a frame pointer then push extra registers to
23734 create the stack frame. This is done in a way that does not
23735 alter the frame layout, so is independent of the epilogue. */
23736 int n;
23737 int frame;
23738 n = 0;
23739 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23740 n++;
23741 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23742 if (frame && n * 4 >= frame)
23743 {
23744 n = frame / 4;
23745 live_regs_mask |= (1 << n) - 1;
23746 saved_regs += frame;
23747 }
23748 }
23749
23750 if (TARGET_LDRD
23751 && current_tune->prefer_ldrd_strd
23752 && !optimize_function_for_size_p (cfun))
23753 {
23754 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23755 if (TARGET_THUMB2)
23756 thumb2_emit_strd_push (live_regs_mask);
23757 else if (TARGET_ARM
23758 && !TARGET_APCS_FRAME
23759 && !IS_INTERRUPT (func_type))
23760 arm_emit_strd_push (live_regs_mask);
23761 else
23762 {
23763 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23764 RTX_FRAME_RELATED_P (insn) = 1;
23765 }
23766 }
23767 else
23768 {
23769 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23770 RTX_FRAME_RELATED_P (insn) = 1;
23771 }
23772 }
23773
23774 if (! IS_VOLATILE (func_type))
23775 saved_regs += arm_save_coproc_regs ();
23776
23777 if (frame_pointer_needed && TARGET_ARM)
23778 {
23779 /* Create the new frame pointer. */
23780 if (TARGET_APCS_FRAME)
23781 {
23782 insn = GEN_INT (-(4 + args_to_push + fp_offset));
23783 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23784 RTX_FRAME_RELATED_P (insn) = 1;
23785 }
23786 else
23787 {
23788 insn = GEN_INT (saved_regs - (4 + fp_offset));
23789 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23790 stack_pointer_rtx, insn));
23791 RTX_FRAME_RELATED_P (insn) = 1;
23792 }
23793 }
23794
23795 size = offsets->outgoing_args - offsets->saved_args;
23796 if (flag_stack_usage_info)
23797 current_function_static_stack_size = size;
23798
23799 /* If this isn't an interrupt service routine and we have a frame, then do
23800 stack checking. We use IP as the first scratch register, except for the
23801 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
23802 if (!IS_INTERRUPT (func_type)
23803 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23804 || flag_stack_clash_protection))
23805 {
23806 unsigned int regno;
23807
23808 if (!IS_NESTED (func_type) || clobber_ip)
23809 regno = IP_REGNUM;
23810 else if (df_regs_ever_live_p (LR_REGNUM))
23811 regno = LR_REGNUM;
23812 else
23813 regno = 3;
23814
23815 if (crtl->is_leaf && !cfun->calls_alloca)
23816 {
23817 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23818 arm_emit_probe_stack_range (get_stack_check_protect (),
23819 size - get_stack_check_protect (),
23820 regno, live_regs_mask);
23821 }
23822 else if (size > 0)
23823 arm_emit_probe_stack_range (get_stack_check_protect (), size,
23824 regno, live_regs_mask);
23825 }
23826
23827 /* Recover the static chain register. */
23828 if (clobber_ip)
23829 {
23830 if (!arm_r3_live_at_start_p () || saved_pretend_args)
23831 insn = gen_rtx_REG (SImode, 3);
23832 else
23833 {
23834 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23835 insn = gen_frame_mem (SImode, insn);
23836 }
23837 emit_set_insn (ip_rtx, insn);
23838 emit_insn (gen_force_register_use (ip_rtx));
23839 }
23840
23841 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23842 {
23843 /* This add can produce multiple insns for a large constant, so we
23844 need to get tricky. */
23845 rtx_insn *last = get_last_insn ();
23846
23847 amount = GEN_INT (offsets->saved_args + saved_regs
23848 - offsets->outgoing_args);
23849
23850 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23851 amount));
23852 do
23853 {
23854 last = last ? NEXT_INSN (last) : get_insns ();
23855 RTX_FRAME_RELATED_P (last) = 1;
23856 }
23857 while (last != insn);
23858
23859 /* If the frame pointer is needed, emit a special barrier that
23860 will prevent the scheduler from moving stores to the frame
23861 before the stack adjustment. */
23862 if (frame_pointer_needed)
23863 emit_insn (gen_stack_tie (stack_pointer_rtx,
23864 hard_frame_pointer_rtx));
23865 }
23866
23867
23868 if (frame_pointer_needed && TARGET_THUMB2)
23869 thumb_set_frame_pointer (offsets);
23870
23871 if (flag_pic && arm_pic_register != INVALID_REGNUM)
23872 {
23873 unsigned long mask;
23874
23875 mask = live_regs_mask;
23876 mask &= THUMB2_WORK_REGS;
23877 if (!IS_NESTED (func_type))
23878 mask |= (1 << IP_REGNUM);
23879 arm_load_pic_register (mask, NULL_RTX);
23880 }
23881
23882 /* If we are profiling, make sure no instructions are scheduled before
23883 the call to mcount. Similarly if the user has requested no
23884 scheduling in the prolog. Similarly if we want non-call exceptions
23885 using the EABI unwinder, to prevent faulting instructions from being
23886 swapped with a stack adjustment. */
23887 if (crtl->profile || !TARGET_SCHED_PROLOG
23888 || (arm_except_unwind_info (&global_options) == UI_TARGET
23889 && cfun->can_throw_non_call_exceptions))
23890 emit_insn (gen_blockage ());
23891
23892 /* If the link register is being kept alive, with the return address in it,
23893 then make sure that it does not get reused by the ce2 pass. */
23894 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23895 cfun->machine->lr_save_eliminated = 1;
23896 }
23897 \f
23898 /* Print condition code to STREAM. Helper function for arm_print_operand. */
23899 static void
23900 arm_print_condition (FILE *stream)
23901 {
23902 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
23903 {
23904 /* Branch conversion is not implemented for Thumb-2. */
23905 if (TARGET_THUMB)
23906 {
23907 output_operand_lossage ("predicated Thumb instruction");
23908 return;
23909 }
23910 if (current_insn_predicate != NULL)
23911 {
23912 output_operand_lossage
23913 ("predicated instruction in conditional sequence");
23914 return;
23915 }
23916
23917 fputs (arm_condition_codes[arm_current_cc], stream);
23918 }
23919 else if (current_insn_predicate)
23920 {
23921 enum arm_cond_code code;
23922
23923 if (TARGET_THUMB1)
23924 {
23925 output_operand_lossage ("predicated Thumb instruction");
23926 return;
23927 }
23928
23929 code = get_arm_condition_code (current_insn_predicate);
23930 fputs (arm_condition_codes[code], stream);
23931 }
23932 }
23933
23934
23935 /* Globally reserved letters: acln
23936 Puncutation letters currently used: @_|?().!#
23937 Lower case letters currently used: bcdefhimpqtvwxyz
23938 Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
23939 Letters previously used, but now deprecated/obsolete: sWXYZ.
23940
23941 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23942
23943 If CODE is 'd', then the X is a condition operand and the instruction
23944 should only be executed if the condition is true.
23945 if CODE is 'D', then the X is a condition operand and the instruction
23946 should only be executed if the condition is false: however, if the mode
23947 of the comparison is CCFPEmode, then always execute the instruction -- we
23948 do this because in these circumstances !GE does not necessarily imply LT;
23949 in these cases the instruction pattern will take care to make sure that
23950 an instruction containing %d will follow, thereby undoing the effects of
23951 doing this instruction unconditionally.
23952 If CODE is 'N' then X is a floating point operand that must be negated
23953 before output.
23954 If CODE is 'B' then output a bitwise inverted value of X (a const int).
23955 If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
23956 If CODE is 'V', then the operand must be a CONST_INT representing
23957 the bits to preserve in the modified register (Rd) of a BFI or BFC
23958 instruction: print out both the width and lsb (shift) fields. */
23959 static void
23960 arm_print_operand (FILE *stream, rtx x, int code)
23961 {
23962 switch (code)
23963 {
23964 case '@':
23965 fputs (ASM_COMMENT_START, stream);
23966 return;
23967
23968 case '_':
23969 fputs (user_label_prefix, stream);
23970 return;
23971
23972 case '|':
23973 fputs (REGISTER_PREFIX, stream);
23974 return;
23975
23976 case '?':
23977 arm_print_condition (stream);
23978 return;
23979
23980 case '.':
23981 /* The current condition code for a condition code setting instruction.
23982 Preceded by 's' in unified syntax, otherwise followed by 's'. */
23983 fputc('s', stream);
23984 arm_print_condition (stream);
23985 return;
23986
23987 case '!':
23988 /* If the instruction is conditionally executed then print
23989 the current condition code, otherwise print 's'. */
23990 gcc_assert (TARGET_THUMB2);
23991 if (current_insn_predicate)
23992 arm_print_condition (stream);
23993 else
23994 fputc('s', stream);
23995 break;
23996
23997 /* %# is a "break" sequence. It doesn't output anything, but is used to
23998 separate e.g. operand numbers from following text, if that text consists
23999 of further digits which we don't want to be part of the operand
24000 number. */
24001 case '#':
24002 return;
24003
24004 case 'N':
24005 {
24006 REAL_VALUE_TYPE r;
24007 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
24008 fprintf (stream, "%s", fp_const_from_val (&r));
24009 }
24010 return;
24011
24012 /* An integer or symbol address without a preceding # sign. */
24013 case 'c':
24014 switch (GET_CODE (x))
24015 {
24016 case CONST_INT:
24017 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
24018 break;
24019
24020 case SYMBOL_REF:
24021 output_addr_const (stream, x);
24022 break;
24023
24024 case CONST:
24025 if (GET_CODE (XEXP (x, 0)) == PLUS
24026 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
24027 {
24028 output_addr_const (stream, x);
24029 break;
24030 }
24031 /* Fall through. */
24032
24033 default:
24034 output_operand_lossage ("Unsupported operand for code '%c'", code);
24035 }
24036 return;
24037
24038 /* An integer that we want to print in HEX. */
24039 case 'x':
24040 switch (GET_CODE (x))
24041 {
24042 case CONST_INT:
24043 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
24044 break;
24045
24046 default:
24047 output_operand_lossage ("Unsupported operand for code '%c'", code);
24048 }
24049 return;
24050
24051 case 'B':
24052 if (CONST_INT_P (x))
24053 {
24054 HOST_WIDE_INT val;
24055 val = ARM_SIGN_EXTEND (~INTVAL (x));
24056 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
24057 }
24058 else
24059 {
24060 putc ('~', stream);
24061 output_addr_const (stream, x);
24062 }
24063 return;
24064
24065 case 'b':
24066 /* Print the log2 of a CONST_INT. */
24067 {
24068 HOST_WIDE_INT val;
24069
24070 if (!CONST_INT_P (x)
24071 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
24072 output_operand_lossage ("Unsupported operand for code '%c'", code);
24073 else
24074 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24075 }
24076 return;
24077
24078 case 'L':
24079 /* The low 16 bits of an immediate constant. */
24080 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
24081 return;
24082
24083 case 'i':
24084 fprintf (stream, "%s", arithmetic_instr (x, 1));
24085 return;
24086
24087 case 'I':
24088 fprintf (stream, "%s", arithmetic_instr (x, 0));
24089 return;
24090
24091 case 'S':
24092 {
24093 HOST_WIDE_INT val;
24094 const char *shift;
24095
24096 shift = shift_op (x, &val);
24097
24098 if (shift)
24099 {
24100 fprintf (stream, ", %s ", shift);
24101 if (val == -1)
24102 arm_print_operand (stream, XEXP (x, 1), 0);
24103 else
24104 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24105 }
24106 }
24107 return;
24108
24109 /* An explanation of the 'Q', 'R' and 'H' register operands:
24110
24111 In a pair of registers containing a DI or DF value the 'Q'
24112 operand returns the register number of the register containing
24113 the least significant part of the value. The 'R' operand returns
24114 the register number of the register containing the most
24115 significant part of the value.
24116
24117 The 'H' operand returns the higher of the two register numbers.
24118 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24119 same as the 'Q' operand, since the most significant part of the
24120 value is held in the lower number register. The reverse is true
24121 on systems where WORDS_BIG_ENDIAN is false.
24122
24123 The purpose of these operands is to distinguish between cases
24124 where the endian-ness of the values is important (for example
24125 when they are added together), and cases where the endian-ness
24126 is irrelevant, but the order of register operations is important.
24127 For example when loading a value from memory into a register
24128 pair, the endian-ness does not matter. Provided that the value
24129 from the lower memory address is put into the lower numbered
24130 register, and the value from the higher address is put into the
24131 higher numbered register, the load will work regardless of whether
24132 the value being loaded is big-wordian or little-wordian. The
24133 order of the two register loads can matter however, if the address
24134 of the memory location is actually held in one of the registers
24135 being overwritten by the load.
24136
24137 The 'Q' and 'R' constraints are also available for 64-bit
24138 constants. */
24139 case 'Q':
24140 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24141 {
24142 rtx part = gen_lowpart (SImode, x);
24143 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24144 return;
24145 }
24146
24147 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24148 {
24149 output_operand_lossage ("invalid operand for code '%c'", code);
24150 return;
24151 }
24152
24153 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
24154 return;
24155
24156 case 'R':
24157 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24158 {
24159 machine_mode mode = GET_MODE (x);
24160 rtx part;
24161
24162 if (mode == VOIDmode)
24163 mode = DImode;
24164 part = gen_highpart_mode (SImode, mode, x);
24165 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24166 return;
24167 }
24168
24169 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24170 {
24171 output_operand_lossage ("invalid operand for code '%c'", code);
24172 return;
24173 }
24174
24175 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
24176 return;
24177
24178 case 'H':
24179 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24180 {
24181 output_operand_lossage ("invalid operand for code '%c'", code);
24182 return;
24183 }
24184
24185 asm_fprintf (stream, "%r", REGNO (x) + 1);
24186 return;
24187
24188 case 'J':
24189 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24190 {
24191 output_operand_lossage ("invalid operand for code '%c'", code);
24192 return;
24193 }
24194
24195 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
24196 return;
24197
24198 case 'K':
24199 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24200 {
24201 output_operand_lossage ("invalid operand for code '%c'", code);
24202 return;
24203 }
24204
24205 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
24206 return;
24207
24208 case 'm':
24209 asm_fprintf (stream, "%r",
24210 REG_P (XEXP (x, 0))
24211 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
24212 return;
24213
24214 case 'M':
24215 asm_fprintf (stream, "{%r-%r}",
24216 REGNO (x),
24217 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
24218 return;
24219
24220 /* Like 'M', but writing doubleword vector registers, for use by Neon
24221 insns. */
24222 case 'h':
24223 {
24224 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
24225 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
24226 if (numregs == 1)
24227 asm_fprintf (stream, "{d%d}", regno);
24228 else
24229 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
24230 }
24231 return;
24232
24233 case 'd':
24234 /* CONST_TRUE_RTX means always -- that's the default. */
24235 if (x == const_true_rtx)
24236 return;
24237
24238 if (!COMPARISON_P (x))
24239 {
24240 output_operand_lossage ("invalid operand for code '%c'", code);
24241 return;
24242 }
24243
24244 fputs (arm_condition_codes[get_arm_condition_code (x)],
24245 stream);
24246 return;
24247
24248 case 'D':
24249 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
24250 want to do that. */
24251 if (x == const_true_rtx)
24252 {
24253 output_operand_lossage ("instruction never executed");
24254 return;
24255 }
24256 if (!COMPARISON_P (x))
24257 {
24258 output_operand_lossage ("invalid operand for code '%c'", code);
24259 return;
24260 }
24261
24262 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
24263 (get_arm_condition_code (x))],
24264 stream);
24265 return;
24266
24267 case 'V':
24268 {
24269 /* Output the LSB (shift) and width for a bitmask instruction
24270 based on a literal mask. The LSB is printed first,
24271 followed by the width.
24272
24273 Eg. For 0b1...1110001, the result is #1, #3. */
24274 if (!CONST_INT_P (x))
24275 {
24276 output_operand_lossage ("invalid operand for code '%c'", code);
24277 return;
24278 }
24279
24280 unsigned HOST_WIDE_INT val
24281 = ~UINTVAL (x) & HOST_WIDE_INT_UC (0xffffffff);
24282 int lsb = exact_log2 (val & -val);
24283 asm_fprintf (stream, "#%d, #%d", lsb,
24284 (exact_log2 (val + (val & -val)) - lsb));
24285 }
24286 return;
24287
24288 case 's':
24289 case 'W':
24290 case 'X':
24291 case 'Y':
24292 case 'Z':
24293 /* Former Maverick support, removed after GCC-4.7. */
24294 output_operand_lossage ("obsolete Maverick format code '%c'", code);
24295 return;
24296
24297 case 'U':
24298 if (!REG_P (x)
24299 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
24300 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
24301 /* Bad value for wCG register number. */
24302 {
24303 output_operand_lossage ("invalid operand for code '%c'", code);
24304 return;
24305 }
24306
24307 else
24308 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
24309 return;
24310
24311 /* Print an iWMMXt control register name. */
24312 case 'w':
24313 if (!CONST_INT_P (x)
24314 || INTVAL (x) < 0
24315 || INTVAL (x) >= 16)
24316 /* Bad value for wC register number. */
24317 {
24318 output_operand_lossage ("invalid operand for code '%c'", code);
24319 return;
24320 }
24321
24322 else
24323 {
24324 static const char * wc_reg_names [16] =
24325 {
24326 "wCID", "wCon", "wCSSF", "wCASF",
24327 "wC4", "wC5", "wC6", "wC7",
24328 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24329 "wC12", "wC13", "wC14", "wC15"
24330 };
24331
24332 fputs (wc_reg_names [INTVAL (x)], stream);
24333 }
24334 return;
24335
24336 /* Print the high single-precision register of a VFP double-precision
24337 register. */
24338 case 'p':
24339 {
24340 machine_mode mode = GET_MODE (x);
24341 int regno;
24342
24343 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24344 {
24345 output_operand_lossage ("invalid operand for code '%c'", code);
24346 return;
24347 }
24348
24349 regno = REGNO (x);
24350 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24351 {
24352 output_operand_lossage ("invalid operand for code '%c'", code);
24353 return;
24354 }
24355
24356 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24357 }
24358 return;
24359
24360 /* Print a VFP/Neon double precision or quad precision register name. */
24361 case 'P':
24362 case 'q':
24363 {
24364 machine_mode mode = GET_MODE (x);
24365 int is_quad = (code == 'q');
24366 int regno;
24367
24368 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24369 {
24370 output_operand_lossage ("invalid operand for code '%c'", code);
24371 return;
24372 }
24373
24374 if (!REG_P (x)
24375 || !IS_VFP_REGNUM (REGNO (x)))
24376 {
24377 output_operand_lossage ("invalid operand for code '%c'", code);
24378 return;
24379 }
24380
24381 regno = REGNO (x);
24382 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24383 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24384 {
24385 output_operand_lossage ("invalid operand for code '%c'", code);
24386 return;
24387 }
24388
24389 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24390 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24391 }
24392 return;
24393
24394 /* These two codes print the low/high doubleword register of a Neon quad
24395 register, respectively. For pair-structure types, can also print
24396 low/high quadword registers. */
24397 case 'e':
24398 case 'f':
24399 {
24400 machine_mode mode = GET_MODE (x);
24401 int regno;
24402
24403 if ((GET_MODE_SIZE (mode) != 16
24404 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24405 {
24406 output_operand_lossage ("invalid operand for code '%c'", code);
24407 return;
24408 }
24409
24410 regno = REGNO (x);
24411 if (!NEON_REGNO_OK_FOR_QUAD (regno))
24412 {
24413 output_operand_lossage ("invalid operand for code '%c'", code);
24414 return;
24415 }
24416
24417 if (GET_MODE_SIZE (mode) == 16)
24418 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24419 + (code == 'f' ? 1 : 0));
24420 else
24421 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24422 + (code == 'f' ? 1 : 0));
24423 }
24424 return;
24425
24426 /* Print a VFPv3 floating-point constant, represented as an integer
24427 index. */
24428 case 'G':
24429 {
24430 int index = vfp3_const_double_index (x);
24431 gcc_assert (index != -1);
24432 fprintf (stream, "%d", index);
24433 }
24434 return;
24435
24436 /* Print bits representing opcode features for Neon.
24437
24438 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24439 and polynomials as unsigned.
24440
24441 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24442
24443 Bit 2 is 1 for rounding functions, 0 otherwise. */
24444
24445 /* Identify the type as 's', 'u', 'p' or 'f'. */
24446 case 'T':
24447 {
24448 HOST_WIDE_INT bits = INTVAL (x);
24449 fputc ("uspf"[bits & 3], stream);
24450 }
24451 return;
24452
24453 /* Likewise, but signed and unsigned integers are both 'i'. */
24454 case 'F':
24455 {
24456 HOST_WIDE_INT bits = INTVAL (x);
24457 fputc ("iipf"[bits & 3], stream);
24458 }
24459 return;
24460
24461 /* As for 'T', but emit 'u' instead of 'p'. */
24462 case 't':
24463 {
24464 HOST_WIDE_INT bits = INTVAL (x);
24465 fputc ("usuf"[bits & 3], stream);
24466 }
24467 return;
24468
24469 /* Bit 2: rounding (vs none). */
24470 case 'O':
24471 {
24472 HOST_WIDE_INT bits = INTVAL (x);
24473 fputs ((bits & 4) != 0 ? "r" : "", stream);
24474 }
24475 return;
24476
24477 /* Memory operand for vld1/vst1 instruction. */
24478 case 'A':
24479 {
24480 rtx addr;
24481 bool postinc = FALSE;
24482 rtx postinc_reg = NULL;
24483 unsigned align, memsize, align_bits;
24484
24485 gcc_assert (MEM_P (x));
24486 addr = XEXP (x, 0);
24487 if (GET_CODE (addr) == POST_INC)
24488 {
24489 postinc = 1;
24490 addr = XEXP (addr, 0);
24491 }
24492 if (GET_CODE (addr) == POST_MODIFY)
24493 {
24494 postinc_reg = XEXP( XEXP (addr, 1), 1);
24495 addr = XEXP (addr, 0);
24496 }
24497 asm_fprintf (stream, "[%r", REGNO (addr));
24498
24499 /* We know the alignment of this access, so we can emit a hint in the
24500 instruction (for some alignments) as an aid to the memory subsystem
24501 of the target. */
24502 align = MEM_ALIGN (x) >> 3;
24503 memsize = MEM_SIZE (x);
24504
24505 /* Only certain alignment specifiers are supported by the hardware. */
24506 if (memsize == 32 && (align % 32) == 0)
24507 align_bits = 256;
24508 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24509 align_bits = 128;
24510 else if (memsize >= 8 && (align % 8) == 0)
24511 align_bits = 64;
24512 else
24513 align_bits = 0;
24514
24515 if (align_bits != 0)
24516 asm_fprintf (stream, ":%d", align_bits);
24517
24518 asm_fprintf (stream, "]");
24519
24520 if (postinc)
24521 fputs("!", stream);
24522 if (postinc_reg)
24523 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24524 }
24525 return;
24526
24527 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24528 rtx_code the memory operands output looks like following.
24529 1. [Rn], #+/-<imm>
24530 2. [Rn, #+/-<imm>]!
24531 3. [Rn, #+/-<imm>]
24532 4. [Rn]. */
24533 case 'E':
24534 {
24535 rtx addr;
24536 rtx postinc_reg = NULL;
24537 unsigned inc_val = 0;
24538 enum rtx_code code;
24539
24540 gcc_assert (MEM_P (x));
24541 addr = XEXP (x, 0);
24542 code = GET_CODE (addr);
24543 if (code == POST_INC || code == POST_DEC || code == PRE_INC
24544 || code == PRE_DEC)
24545 {
24546 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24547 inc_val = GET_MODE_SIZE (GET_MODE (x));
24548 if (code == POST_INC || code == POST_DEC)
24549 asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24550 ? "": "-", inc_val);
24551 else
24552 asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24553 ? "": "-", inc_val);
24554 }
24555 else if (code == POST_MODIFY || code == PRE_MODIFY)
24556 {
24557 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24558 postinc_reg = XEXP (XEXP (addr, 1), 1);
24559 if (postinc_reg && CONST_INT_P (postinc_reg))
24560 {
24561 if (code == POST_MODIFY)
24562 asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24563 else
24564 asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24565 }
24566 }
24567 else if (code == PLUS)
24568 {
24569 rtx base = XEXP (addr, 0);
24570 rtx index = XEXP (addr, 1);
24571
24572 gcc_assert (REG_P (base) && CONST_INT_P (index));
24573
24574 HOST_WIDE_INT offset = INTVAL (index);
24575 asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24576 }
24577 else
24578 {
24579 gcc_assert (REG_P (addr));
24580 asm_fprintf (stream, "[%r]",REGNO (addr));
24581 }
24582 }
24583 return;
24584
24585 case 'C':
24586 {
24587 rtx addr;
24588
24589 gcc_assert (MEM_P (x));
24590 addr = XEXP (x, 0);
24591 gcc_assert (REG_P (addr));
24592 asm_fprintf (stream, "[%r]", REGNO (addr));
24593 }
24594 return;
24595
24596 /* Translate an S register number into a D register number and element index. */
24597 case 'y':
24598 {
24599 machine_mode mode = GET_MODE (x);
24600 int regno;
24601
24602 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24603 {
24604 output_operand_lossage ("invalid operand for code '%c'", code);
24605 return;
24606 }
24607
24608 regno = REGNO (x);
24609 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24610 {
24611 output_operand_lossage ("invalid operand for code '%c'", code);
24612 return;
24613 }
24614
24615 regno = regno - FIRST_VFP_REGNUM;
24616 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24617 }
24618 return;
24619
24620 case 'v':
24621 gcc_assert (CONST_DOUBLE_P (x));
24622 int result;
24623 result = vfp3_const_double_for_fract_bits (x);
24624 if (result == 0)
24625 result = vfp3_const_double_for_bits (x);
24626 fprintf (stream, "#%d", result);
24627 return;
24628
24629 /* Register specifier for vld1.16/vst1.16. Translate the S register
24630 number into a D register number and element index. */
24631 case 'z':
24632 {
24633 machine_mode mode = GET_MODE (x);
24634 int regno;
24635
24636 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24637 {
24638 output_operand_lossage ("invalid operand for code '%c'", code);
24639 return;
24640 }
24641
24642 regno = REGNO (x);
24643 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24644 {
24645 output_operand_lossage ("invalid operand for code '%c'", code);
24646 return;
24647 }
24648
24649 regno = regno - FIRST_VFP_REGNUM;
24650 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24651 }
24652 return;
24653
24654 default:
24655 if (x == 0)
24656 {
24657 output_operand_lossage ("missing operand");
24658 return;
24659 }
24660
24661 switch (GET_CODE (x))
24662 {
24663 case REG:
24664 asm_fprintf (stream, "%r", REGNO (x));
24665 break;
24666
24667 case MEM:
24668 output_address (GET_MODE (x), XEXP (x, 0));
24669 break;
24670
24671 case CONST_DOUBLE:
24672 {
24673 char fpstr[20];
24674 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24675 sizeof (fpstr), 0, 1);
24676 fprintf (stream, "#%s", fpstr);
24677 }
24678 break;
24679
24680 default:
24681 gcc_assert (GET_CODE (x) != NEG);
24682 fputc ('#', stream);
24683 if (GET_CODE (x) == HIGH)
24684 {
24685 fputs (":lower16:", stream);
24686 x = XEXP (x, 0);
24687 }
24688
24689 output_addr_const (stream, x);
24690 break;
24691 }
24692 }
24693 }
24694 \f
24695 /* Target hook for printing a memory address. */
24696 static void
24697 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24698 {
24699 if (TARGET_32BIT)
24700 {
24701 int is_minus = GET_CODE (x) == MINUS;
24702
24703 if (REG_P (x))
24704 asm_fprintf (stream, "[%r]", REGNO (x));
24705 else if (GET_CODE (x) == PLUS || is_minus)
24706 {
24707 rtx base = XEXP (x, 0);
24708 rtx index = XEXP (x, 1);
24709 HOST_WIDE_INT offset = 0;
24710 if (!REG_P (base)
24711 || (REG_P (index) && REGNO (index) == SP_REGNUM))
24712 {
24713 /* Ensure that BASE is a register. */
24714 /* (one of them must be). */
24715 /* Also ensure the SP is not used as in index register. */
24716 std::swap (base, index);
24717 }
24718 switch (GET_CODE (index))
24719 {
24720 case CONST_INT:
24721 offset = INTVAL (index);
24722 if (is_minus)
24723 offset = -offset;
24724 asm_fprintf (stream, "[%r, #%wd]",
24725 REGNO (base), offset);
24726 break;
24727
24728 case REG:
24729 asm_fprintf (stream, "[%r, %s%r]",
24730 REGNO (base), is_minus ? "-" : "",
24731 REGNO (index));
24732 break;
24733
24734 case MULT:
24735 case ASHIFTRT:
24736 case LSHIFTRT:
24737 case ASHIFT:
24738 case ROTATERT:
24739 {
24740 asm_fprintf (stream, "[%r, %s%r",
24741 REGNO (base), is_minus ? "-" : "",
24742 REGNO (XEXP (index, 0)));
24743 arm_print_operand (stream, index, 'S');
24744 fputs ("]", stream);
24745 break;
24746 }
24747
24748 default:
24749 gcc_unreachable ();
24750 }
24751 }
24752 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24753 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24754 {
24755 gcc_assert (REG_P (XEXP (x, 0)));
24756
24757 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24758 asm_fprintf (stream, "[%r, #%s%d]!",
24759 REGNO (XEXP (x, 0)),
24760 GET_CODE (x) == PRE_DEC ? "-" : "",
24761 GET_MODE_SIZE (mode));
24762 else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24763 asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24764 else
24765 asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24766 GET_CODE (x) == POST_DEC ? "-" : "",
24767 GET_MODE_SIZE (mode));
24768 }
24769 else if (GET_CODE (x) == PRE_MODIFY)
24770 {
24771 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24772 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24773 asm_fprintf (stream, "#%wd]!",
24774 INTVAL (XEXP (XEXP (x, 1), 1)));
24775 else
24776 asm_fprintf (stream, "%r]!",
24777 REGNO (XEXP (XEXP (x, 1), 1)));
24778 }
24779 else if (GET_CODE (x) == POST_MODIFY)
24780 {
24781 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24782 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24783 asm_fprintf (stream, "#%wd",
24784 INTVAL (XEXP (XEXP (x, 1), 1)));
24785 else
24786 asm_fprintf (stream, "%r",
24787 REGNO (XEXP (XEXP (x, 1), 1)));
24788 }
24789 else output_addr_const (stream, x);
24790 }
24791 else
24792 {
24793 if (REG_P (x))
24794 asm_fprintf (stream, "[%r]", REGNO (x));
24795 else if (GET_CODE (x) == POST_INC)
24796 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24797 else if (GET_CODE (x) == PLUS)
24798 {
24799 gcc_assert (REG_P (XEXP (x, 0)));
24800 if (CONST_INT_P (XEXP (x, 1)))
24801 asm_fprintf (stream, "[%r, #%wd]",
24802 REGNO (XEXP (x, 0)),
24803 INTVAL (XEXP (x, 1)));
24804 else
24805 asm_fprintf (stream, "[%r, %r]",
24806 REGNO (XEXP (x, 0)),
24807 REGNO (XEXP (x, 1)));
24808 }
24809 else
24810 output_addr_const (stream, x);
24811 }
24812 }
24813 \f
24814 /* Target hook for indicating whether a punctuation character for
24815 TARGET_PRINT_OPERAND is valid. */
24816 static bool
24817 arm_print_operand_punct_valid_p (unsigned char code)
24818 {
24819 return (code == '@' || code == '|' || code == '.'
24820 || code == '(' || code == ')' || code == '#'
24821 || (TARGET_32BIT && (code == '?'))
24822 || (TARGET_THUMB2 && (code == '!'))
24823 || (TARGET_THUMB && (code == '_')));
24824 }
24825 \f
24826 /* Target hook for assembling integer objects. The ARM version needs to
24827 handle word-sized values specially. */
24828 static bool
24829 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24830 {
24831 machine_mode mode;
24832
24833 if (size == UNITS_PER_WORD && aligned_p)
24834 {
24835 fputs ("\t.word\t", asm_out_file);
24836 output_addr_const (asm_out_file, x);
24837
24838 /* Mark symbols as position independent. We only do this in the
24839 .text segment, not in the .data segment. */
24840 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24841 (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
24842 {
24843 /* See legitimize_pic_address for an explanation of the
24844 TARGET_VXWORKS_RTP check. */
24845 /* References to weak symbols cannot be resolved locally:
24846 they may be overridden by a non-weak definition at link
24847 time. */
24848 if (!arm_pic_data_is_text_relative
24849 || (SYMBOL_REF_P (x)
24850 && (!SYMBOL_REF_LOCAL_P (x)
24851 || (SYMBOL_REF_DECL (x)
24852 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24853 || (SYMBOL_REF_FUNCTION_P (x)
24854 && !arm_fdpic_local_funcdesc_p (x)))))
24855 {
24856 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24857 fputs ("(GOTFUNCDESC)", asm_out_file);
24858 else
24859 fputs ("(GOT)", asm_out_file);
24860 }
24861 else
24862 {
24863 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24864 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24865 else
24866 {
24867 bool is_readonly;
24868
24869 if (!TARGET_FDPIC
24870 || arm_is_segment_info_known (x, &is_readonly))
24871 fputs ("(GOTOFF)", asm_out_file);
24872 else
24873 fputs ("(GOT)", asm_out_file);
24874 }
24875 }
24876 }
24877
24878 /* For FDPIC we also have to mark symbol for .data section. */
24879 if (TARGET_FDPIC
24880 && !making_const_table
24881 && SYMBOL_REF_P (x)
24882 && SYMBOL_REF_FUNCTION_P (x))
24883 fputs ("(FUNCDESC)", asm_out_file);
24884
24885 fputc ('\n', asm_out_file);
24886 return true;
24887 }
24888
24889 mode = GET_MODE (x);
24890
24891 if (arm_vector_mode_supported_p (mode))
24892 {
24893 int i, units;
24894
24895 gcc_assert (GET_CODE (x) == CONST_VECTOR);
24896
24897 units = CONST_VECTOR_NUNITS (x);
24898 size = GET_MODE_UNIT_SIZE (mode);
24899
24900 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24901 for (i = 0; i < units; i++)
24902 {
24903 rtx elt = CONST_VECTOR_ELT (x, i);
24904 assemble_integer
24905 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
24906 }
24907 else
24908 for (i = 0; i < units; i++)
24909 {
24910 rtx elt = CONST_VECTOR_ELT (x, i);
24911 assemble_real
24912 (*CONST_DOUBLE_REAL_VALUE (elt),
24913 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
24914 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
24915 }
24916
24917 return true;
24918 }
24919
24920 return default_assemble_integer (x, size, aligned_p);
24921 }
24922
24923 static void
24924 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
24925 {
24926 section *s;
24927
24928 if (!TARGET_AAPCS_BASED)
24929 {
24930 (is_ctor ?
24931 default_named_section_asm_out_constructor
24932 : default_named_section_asm_out_destructor) (symbol, priority);
24933 return;
24934 }
24935
24936 /* Put these in the .init_array section, using a special relocation. */
24937 if (priority != DEFAULT_INIT_PRIORITY)
24938 {
24939 char buf[18];
24940 sprintf (buf, "%s.%.5u",
24941 is_ctor ? ".init_array" : ".fini_array",
24942 priority);
24943 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
24944 }
24945 else if (is_ctor)
24946 s = ctors_section;
24947 else
24948 s = dtors_section;
24949
24950 switch_to_section (s);
24951 assemble_align (POINTER_SIZE);
24952 fputs ("\t.word\t", asm_out_file);
24953 output_addr_const (asm_out_file, symbol);
24954 fputs ("(target1)\n", asm_out_file);
24955 }
24956
24957 /* Add a function to the list of static constructors. */
24958
24959 static void
24960 arm_elf_asm_constructor (rtx symbol, int priority)
24961 {
24962 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
24963 }
24964
24965 /* Add a function to the list of static destructors. */
24966
24967 static void
24968 arm_elf_asm_destructor (rtx symbol, int priority)
24969 {
24970 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
24971 }
24972 \f
24973 /* A finite state machine takes care of noticing whether or not instructions
24974 can be conditionally executed, and thus decrease execution time and code
24975 size by deleting branch instructions. The fsm is controlled by
24976 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
24977
24978 /* The state of the fsm controlling condition codes are:
24979 0: normal, do nothing special
24980 1: make ASM_OUTPUT_OPCODE not output this instruction
24981 2: make ASM_OUTPUT_OPCODE not output this instruction
24982 3: make instructions conditional
24983 4: make instructions conditional
24984
24985 State transitions (state->state by whom under condition):
24986 0 -> 1 final_prescan_insn if the `target' is a label
24987 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
24988 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
24989 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
24990 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
24991 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
24992 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
24993 (the target insn is arm_target_insn).
24994
24995 If the jump clobbers the conditions then we use states 2 and 4.
24996
24997 A similar thing can be done with conditional return insns.
24998
24999 XXX In case the `target' is an unconditional branch, this conditionalising
25000 of the instructions always reduces code size, but not always execution
25001 time. But then, I want to reduce the code size to somewhere near what
25002 /bin/cc produces. */
25003
25004 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
25005 instructions. When a COND_EXEC instruction is seen the subsequent
25006 instructions are scanned so that multiple conditional instructions can be
25007 combined into a single IT block. arm_condexec_count and arm_condexec_mask
25008 specify the length and true/false mask for the IT block. These will be
25009 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
25010
25011 /* Returns the index of the ARM condition code string in
25012 `arm_condition_codes', or ARM_NV if the comparison is invalid.
25013 COMPARISON should be an rtx like `(eq (...) (...))'. */
25014
25015 enum arm_cond_code
25016 maybe_get_arm_condition_code (rtx comparison)
25017 {
25018 machine_mode mode = GET_MODE (XEXP (comparison, 0));
25019 enum arm_cond_code code;
25020 enum rtx_code comp_code = GET_CODE (comparison);
25021
25022 if (GET_MODE_CLASS (mode) != MODE_CC)
25023 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
25024 XEXP (comparison, 1));
25025
25026 switch (mode)
25027 {
25028 case E_CC_DNEmode: code = ARM_NE; goto dominance;
25029 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
25030 case E_CC_DGEmode: code = ARM_GE; goto dominance;
25031 case E_CC_DGTmode: code = ARM_GT; goto dominance;
25032 case E_CC_DLEmode: code = ARM_LE; goto dominance;
25033 case E_CC_DLTmode: code = ARM_LT; goto dominance;
25034 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
25035 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
25036 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
25037 case E_CC_DLTUmode: code = ARM_CC;
25038
25039 dominance:
25040 if (comp_code == EQ)
25041 return ARM_INVERSE_CONDITION_CODE (code);
25042 if (comp_code == NE)
25043 return code;
25044 return ARM_NV;
25045
25046 case E_CC_NZmode:
25047 switch (comp_code)
25048 {
25049 case NE: return ARM_NE;
25050 case EQ: return ARM_EQ;
25051 case GE: return ARM_PL;
25052 case LT: return ARM_MI;
25053 default: return ARM_NV;
25054 }
25055
25056 case E_CC_Zmode:
25057 switch (comp_code)
25058 {
25059 case NE: return ARM_NE;
25060 case EQ: return ARM_EQ;
25061 default: return ARM_NV;
25062 }
25063
25064 case E_CC_Nmode:
25065 switch (comp_code)
25066 {
25067 case NE: return ARM_MI;
25068 case EQ: return ARM_PL;
25069 default: return ARM_NV;
25070 }
25071
25072 case E_CCFPEmode:
25073 case E_CCFPmode:
25074 /* We can handle all cases except UNEQ and LTGT. */
25075 switch (comp_code)
25076 {
25077 case GE: return ARM_GE;
25078 case GT: return ARM_GT;
25079 case LE: return ARM_LS;
25080 case LT: return ARM_MI;
25081 case NE: return ARM_NE;
25082 case EQ: return ARM_EQ;
25083 case ORDERED: return ARM_VC;
25084 case UNORDERED: return ARM_VS;
25085 case UNLT: return ARM_LT;
25086 case UNLE: return ARM_LE;
25087 case UNGT: return ARM_HI;
25088 case UNGE: return ARM_PL;
25089 /* UNEQ and LTGT do not have a representation. */
25090 case UNEQ: /* Fall through. */
25091 case LTGT: /* Fall through. */
25092 default: return ARM_NV;
25093 }
25094
25095 case E_CC_SWPmode:
25096 switch (comp_code)
25097 {
25098 case NE: return ARM_NE;
25099 case EQ: return ARM_EQ;
25100 case GE: return ARM_LE;
25101 case GT: return ARM_LT;
25102 case LE: return ARM_GE;
25103 case LT: return ARM_GT;
25104 case GEU: return ARM_LS;
25105 case GTU: return ARM_CC;
25106 case LEU: return ARM_CS;
25107 case LTU: return ARM_HI;
25108 default: return ARM_NV;
25109 }
25110
25111 case E_CC_Cmode:
25112 switch (comp_code)
25113 {
25114 case LTU: return ARM_CS;
25115 case GEU: return ARM_CC;
25116 default: return ARM_NV;
25117 }
25118
25119 case E_CC_NVmode:
25120 switch (comp_code)
25121 {
25122 case GE: return ARM_GE;
25123 case LT: return ARM_LT;
25124 default: return ARM_NV;
25125 }
25126
25127 case E_CC_Bmode:
25128 switch (comp_code)
25129 {
25130 case GEU: return ARM_CS;
25131 case LTU: return ARM_CC;
25132 default: return ARM_NV;
25133 }
25134
25135 case E_CC_Vmode:
25136 switch (comp_code)
25137 {
25138 case NE: return ARM_VS;
25139 case EQ: return ARM_VC;
25140 default: return ARM_NV;
25141 }
25142
25143 case E_CC_ADCmode:
25144 switch (comp_code)
25145 {
25146 case GEU: return ARM_CS;
25147 case LTU: return ARM_CC;
25148 default: return ARM_NV;
25149 }
25150
25151 case E_CCmode:
25152 case E_CC_RSBmode:
25153 switch (comp_code)
25154 {
25155 case NE: return ARM_NE;
25156 case EQ: return ARM_EQ;
25157 case GE: return ARM_GE;
25158 case GT: return ARM_GT;
25159 case LE: return ARM_LE;
25160 case LT: return ARM_LT;
25161 case GEU: return ARM_CS;
25162 case GTU: return ARM_HI;
25163 case LEU: return ARM_LS;
25164 case LTU: return ARM_CC;
25165 default: return ARM_NV;
25166 }
25167
25168 default: gcc_unreachable ();
25169 }
25170 }
25171
25172 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
25173 static enum arm_cond_code
25174 get_arm_condition_code (rtx comparison)
25175 {
25176 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
25177 gcc_assert (code != ARM_NV);
25178 return code;
25179 }
25180
25181 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
25182 code registers when not targetting Thumb1. The VFP condition register
25183 only exists when generating hard-float code. */
25184 static bool
25185 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
25186 {
25187 if (!TARGET_32BIT)
25188 return false;
25189
25190 *p1 = CC_REGNUM;
25191 *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
25192 return true;
25193 }
25194
25195 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25196 instructions. */
25197 void
25198 thumb2_final_prescan_insn (rtx_insn *insn)
25199 {
25200 rtx_insn *first_insn = insn;
25201 rtx body = PATTERN (insn);
25202 rtx predicate;
25203 enum arm_cond_code code;
25204 int n;
25205 int mask;
25206 int max;
25207
25208 /* max_insns_skipped in the tune was already taken into account in the
25209 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
25210 just emit the IT blocks as we can. It does not make sense to split
25211 the IT blocks. */
25212 max = MAX_INSN_PER_IT_BLOCK;
25213
25214 /* Remove the previous insn from the count of insns to be output. */
25215 if (arm_condexec_count)
25216 arm_condexec_count--;
25217
25218 /* Nothing to do if we are already inside a conditional block. */
25219 if (arm_condexec_count)
25220 return;
25221
25222 if (GET_CODE (body) != COND_EXEC)
25223 return;
25224
25225 /* Conditional jumps are implemented directly. */
25226 if (JUMP_P (insn))
25227 return;
25228
25229 predicate = COND_EXEC_TEST (body);
25230 arm_current_cc = get_arm_condition_code (predicate);
25231
25232 n = get_attr_ce_count (insn);
25233 arm_condexec_count = 1;
25234 arm_condexec_mask = (1 << n) - 1;
25235 arm_condexec_masklen = n;
25236 /* See if subsequent instructions can be combined into the same block. */
25237 for (;;)
25238 {
25239 insn = next_nonnote_insn (insn);
25240
25241 /* Jumping into the middle of an IT block is illegal, so a label or
25242 barrier terminates the block. */
25243 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
25244 break;
25245
25246 body = PATTERN (insn);
25247 /* USE and CLOBBER aren't really insns, so just skip them. */
25248 if (GET_CODE (body) == USE
25249 || GET_CODE (body) == CLOBBER)
25250 continue;
25251
25252 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
25253 if (GET_CODE (body) != COND_EXEC)
25254 break;
25255 /* Maximum number of conditionally executed instructions in a block. */
25256 n = get_attr_ce_count (insn);
25257 if (arm_condexec_masklen + n > max)
25258 break;
25259
25260 predicate = COND_EXEC_TEST (body);
25261 code = get_arm_condition_code (predicate);
25262 mask = (1 << n) - 1;
25263 if (arm_current_cc == code)
25264 arm_condexec_mask |= (mask << arm_condexec_masklen);
25265 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
25266 break;
25267
25268 arm_condexec_count++;
25269 arm_condexec_masklen += n;
25270
25271 /* A jump must be the last instruction in a conditional block. */
25272 if (JUMP_P (insn))
25273 break;
25274 }
25275 /* Restore recog_data (getting the attributes of other insns can
25276 destroy this array, but final.cc assumes that it remains intact
25277 across this call). */
25278 extract_constrain_insn_cached (first_insn);
25279 }
25280
25281 void
25282 arm_final_prescan_insn (rtx_insn *insn)
25283 {
25284 /* BODY will hold the body of INSN. */
25285 rtx body = PATTERN (insn);
25286
25287 /* This will be 1 if trying to repeat the trick, and things need to be
25288 reversed if it appears to fail. */
25289 int reverse = 0;
25290
25291 /* If we start with a return insn, we only succeed if we find another one. */
25292 int seeking_return = 0;
25293 enum rtx_code return_code = UNKNOWN;
25294
25295 /* START_INSN will hold the insn from where we start looking. This is the
25296 first insn after the following code_label if REVERSE is true. */
25297 rtx_insn *start_insn = insn;
25298
25299 /* If in state 4, check if the target branch is reached, in order to
25300 change back to state 0. */
25301 if (arm_ccfsm_state == 4)
25302 {
25303 if (insn == arm_target_insn)
25304 {
25305 arm_target_insn = NULL;
25306 arm_ccfsm_state = 0;
25307 }
25308 return;
25309 }
25310
25311 /* If in state 3, it is possible to repeat the trick, if this insn is an
25312 unconditional branch to a label, and immediately following this branch
25313 is the previous target label which is only used once, and the label this
25314 branch jumps to is not too far off. */
25315 if (arm_ccfsm_state == 3)
25316 {
25317 if (simplejump_p (insn))
25318 {
25319 start_insn = next_nonnote_insn (start_insn);
25320 if (BARRIER_P (start_insn))
25321 {
25322 /* XXX Isn't this always a barrier? */
25323 start_insn = next_nonnote_insn (start_insn);
25324 }
25325 if (LABEL_P (start_insn)
25326 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25327 && LABEL_NUSES (start_insn) == 1)
25328 reverse = TRUE;
25329 else
25330 return;
25331 }
25332 else if (ANY_RETURN_P (body))
25333 {
25334 start_insn = next_nonnote_insn (start_insn);
25335 if (BARRIER_P (start_insn))
25336 start_insn = next_nonnote_insn (start_insn);
25337 if (LABEL_P (start_insn)
25338 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25339 && LABEL_NUSES (start_insn) == 1)
25340 {
25341 reverse = TRUE;
25342 seeking_return = 1;
25343 return_code = GET_CODE (body);
25344 }
25345 else
25346 return;
25347 }
25348 else
25349 return;
25350 }
25351
25352 gcc_assert (!arm_ccfsm_state || reverse);
25353 if (!JUMP_P (insn))
25354 return;
25355
25356 /* This jump might be paralleled with a clobber of the condition codes
25357 the jump should always come first */
25358 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25359 body = XVECEXP (body, 0, 0);
25360
25361 if (reverse
25362 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25363 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25364 {
25365 int insns_skipped;
25366 int fail = FALSE, succeed = FALSE;
25367 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25368 int then_not_else = TRUE;
25369 rtx_insn *this_insn = start_insn;
25370 rtx label = 0;
25371
25372 /* Register the insn jumped to. */
25373 if (reverse)
25374 {
25375 if (!seeking_return)
25376 label = XEXP (SET_SRC (body), 0);
25377 }
25378 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25379 label = XEXP (XEXP (SET_SRC (body), 1), 0);
25380 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25381 {
25382 label = XEXP (XEXP (SET_SRC (body), 2), 0);
25383 then_not_else = FALSE;
25384 }
25385 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25386 {
25387 seeking_return = 1;
25388 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25389 }
25390 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25391 {
25392 seeking_return = 1;
25393 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25394 then_not_else = FALSE;
25395 }
25396 else
25397 gcc_unreachable ();
25398
25399 /* See how many insns this branch skips, and what kind of insns. If all
25400 insns are okay, and the label or unconditional branch to the same
25401 label is not too far away, succeed. */
25402 for (insns_skipped = 0;
25403 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25404 {
25405 rtx scanbody;
25406
25407 this_insn = next_nonnote_insn (this_insn);
25408 if (!this_insn)
25409 break;
25410
25411 switch (GET_CODE (this_insn))
25412 {
25413 case CODE_LABEL:
25414 /* Succeed if it is the target label, otherwise fail since
25415 control falls in from somewhere else. */
25416 if (this_insn == label)
25417 {
25418 arm_ccfsm_state = 1;
25419 succeed = TRUE;
25420 }
25421 else
25422 fail = TRUE;
25423 break;
25424
25425 case BARRIER:
25426 /* Succeed if the following insn is the target label.
25427 Otherwise fail.
25428 If return insns are used then the last insn in a function
25429 will be a barrier. */
25430 this_insn = next_nonnote_insn (this_insn);
25431 if (this_insn && this_insn == label)
25432 {
25433 arm_ccfsm_state = 1;
25434 succeed = TRUE;
25435 }
25436 else
25437 fail = TRUE;
25438 break;
25439
25440 case CALL_INSN:
25441 /* The AAPCS says that conditional calls should not be
25442 used since they make interworking inefficient (the
25443 linker can't transform BL<cond> into BLX). That's
25444 only a problem if the machine has BLX. */
25445 if (arm_arch5t)
25446 {
25447 fail = TRUE;
25448 break;
25449 }
25450
25451 /* Succeed if the following insn is the target label, or
25452 if the following two insns are a barrier and the
25453 target label. */
25454 this_insn = next_nonnote_insn (this_insn);
25455 if (this_insn && BARRIER_P (this_insn))
25456 this_insn = next_nonnote_insn (this_insn);
25457
25458 if (this_insn && this_insn == label
25459 && insns_skipped < max_insns_skipped)
25460 {
25461 arm_ccfsm_state = 1;
25462 succeed = TRUE;
25463 }
25464 else
25465 fail = TRUE;
25466 break;
25467
25468 case JUMP_INSN:
25469 /* If this is an unconditional branch to the same label, succeed.
25470 If it is to another label, do nothing. If it is conditional,
25471 fail. */
25472 /* XXX Probably, the tests for SET and the PC are
25473 unnecessary. */
25474
25475 scanbody = PATTERN (this_insn);
25476 if (GET_CODE (scanbody) == SET
25477 && GET_CODE (SET_DEST (scanbody)) == PC)
25478 {
25479 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25480 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25481 {
25482 arm_ccfsm_state = 2;
25483 succeed = TRUE;
25484 }
25485 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25486 fail = TRUE;
25487 }
25488 /* Fail if a conditional return is undesirable (e.g. on a
25489 StrongARM), but still allow this if optimizing for size. */
25490 else if (GET_CODE (scanbody) == return_code
25491 && !use_return_insn (TRUE, NULL)
25492 && !optimize_size)
25493 fail = TRUE;
25494 else if (GET_CODE (scanbody) == return_code)
25495 {
25496 arm_ccfsm_state = 2;
25497 succeed = TRUE;
25498 }
25499 else if (GET_CODE (scanbody) == PARALLEL)
25500 {
25501 switch (get_attr_conds (this_insn))
25502 {
25503 case CONDS_NOCOND:
25504 break;
25505 default:
25506 fail = TRUE;
25507 break;
25508 }
25509 }
25510 else
25511 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
25512
25513 break;
25514
25515 case INSN:
25516 /* Instructions using or affecting the condition codes make it
25517 fail. */
25518 scanbody = PATTERN (this_insn);
25519 if (!(GET_CODE (scanbody) == SET
25520 || GET_CODE (scanbody) == PARALLEL)
25521 || get_attr_conds (this_insn) != CONDS_NOCOND)
25522 fail = TRUE;
25523 break;
25524
25525 default:
25526 break;
25527 }
25528 }
25529 if (succeed)
25530 {
25531 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25532 arm_target_label = CODE_LABEL_NUMBER (label);
25533 else
25534 {
25535 gcc_assert (seeking_return || arm_ccfsm_state == 2);
25536
25537 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25538 {
25539 this_insn = next_nonnote_insn (this_insn);
25540 gcc_assert (!this_insn
25541 || (!BARRIER_P (this_insn)
25542 && !LABEL_P (this_insn)));
25543 }
25544 if (!this_insn)
25545 {
25546 /* Oh, dear! we ran off the end.. give up. */
25547 extract_constrain_insn_cached (insn);
25548 arm_ccfsm_state = 0;
25549 arm_target_insn = NULL;
25550 return;
25551 }
25552 arm_target_insn = this_insn;
25553 }
25554
25555 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25556 what it was. */
25557 if (!reverse)
25558 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25559
25560 if (reverse || then_not_else)
25561 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25562 }
25563
25564 /* Restore recog_data (getting the attributes of other insns can
25565 destroy this array, but final.cc assumes that it remains intact
25566 across this call. */
25567 extract_constrain_insn_cached (insn);
25568 }
25569 }
25570
25571 /* Output IT instructions. */
25572 void
25573 thumb2_asm_output_opcode (FILE * stream)
25574 {
25575 char buff[5];
25576 int n;
25577
25578 if (arm_condexec_mask)
25579 {
25580 for (n = 0; n < arm_condexec_masklen; n++)
25581 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25582 buff[n] = 0;
25583 asm_fprintf(stream, "i%s\t%s\n\t", buff,
25584 arm_condition_codes[arm_current_cc]);
25585 arm_condexec_mask = 0;
25586 }
25587 }
25588
25589 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25590 UNITS_PER_WORD bytes wide. */
25591 static unsigned int
25592 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25593 {
25594 if (IS_VPR_REGNUM (regno))
25595 return CEIL (GET_MODE_SIZE (mode), 2);
25596
25597 if (TARGET_32BIT
25598 && regno > PC_REGNUM
25599 && regno != FRAME_POINTER_REGNUM
25600 && regno != ARG_POINTER_REGNUM
25601 && !IS_VFP_REGNUM (regno))
25602 return 1;
25603
25604 return ARM_NUM_REGS (mode);
25605 }
25606
25607 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25608 static bool
25609 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25610 {
25611 if (GET_MODE_CLASS (mode) == MODE_CC)
25612 return (regno == CC_REGNUM
25613 || (TARGET_VFP_BASE
25614 && regno == VFPCC_REGNUM));
25615
25616 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25617 return false;
25618
25619 if (IS_VPR_REGNUM (regno))
25620 return mode == HImode
25621 || mode == V16BImode
25622 || mode == V8BImode
25623 || mode == V4BImode;
25624
25625 if (TARGET_THUMB1)
25626 /* For the Thumb we only allow values bigger than SImode in
25627 registers 0 - 6, so that there is always a second low
25628 register available to hold the upper part of the value.
25629 We probably we ought to ensure that the register is the
25630 start of an even numbered register pair. */
25631 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25632
25633 if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25634 {
25635 if (mode == DFmode || mode == DImode)
25636 return VFP_REGNO_OK_FOR_DOUBLE (regno);
25637
25638 if (mode == HFmode || mode == BFmode || mode == HImode
25639 || mode == SFmode || mode == SImode)
25640 return VFP_REGNO_OK_FOR_SINGLE (regno);
25641
25642 if (TARGET_NEON)
25643 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25644 || (VALID_NEON_QREG_MODE (mode)
25645 && NEON_REGNO_OK_FOR_QUAD (regno))
25646 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25647 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25648 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25649 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25650 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25651 if (TARGET_HAVE_MVE)
25652 return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25653 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25654 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25655
25656 return false;
25657 }
25658
25659 if (TARGET_REALLY_IWMMXT)
25660 {
25661 if (IS_IWMMXT_GR_REGNUM (regno))
25662 return mode == SImode;
25663
25664 if (IS_IWMMXT_REGNUM (regno))
25665 return VALID_IWMMXT_REG_MODE (mode);
25666 }
25667
25668 /* We allow almost any value to be stored in the general registers.
25669 Restrict doubleword quantities to even register pairs in ARM state
25670 so that we can use ldrd. The same restriction applies for MVE
25671 in order to support Armv8.1-M Mainline instructions.
25672 Do not allow very large Neon structure opaque modes in general
25673 registers; they would use too many. */
25674 if (regno <= LAST_ARM_REGNUM)
25675 {
25676 if (ARM_NUM_REGS (mode) > 4)
25677 return false;
25678
25679 if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25680 return true;
25681
25682 return !((TARGET_LDRD || TARGET_CDE)
25683 && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25684 }
25685
25686 if (regno == FRAME_POINTER_REGNUM
25687 || regno == ARG_POINTER_REGNUM)
25688 /* We only allow integers in the fake hard registers. */
25689 return GET_MODE_CLASS (mode) == MODE_INT;
25690
25691 return false;
25692 }
25693
25694 /* Implement TARGET_MODES_TIEABLE_P. */
25695
25696 static bool
25697 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25698 {
25699 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25700 return true;
25701
25702 /* We specifically want to allow elements of "structure" modes to
25703 be tieable to the structure. This more general condition allows
25704 other rarer situations too. */
25705 if ((TARGET_NEON
25706 && (VALID_NEON_DREG_MODE (mode1)
25707 || VALID_NEON_QREG_MODE (mode1)
25708 || VALID_NEON_STRUCT_MODE (mode1))
25709 && (VALID_NEON_DREG_MODE (mode2)
25710 || VALID_NEON_QREG_MODE (mode2)
25711 || VALID_NEON_STRUCT_MODE (mode2)))
25712 || (TARGET_HAVE_MVE
25713 && (VALID_MVE_MODE (mode1)
25714 || VALID_MVE_STRUCT_MODE (mode1))
25715 && (VALID_MVE_MODE (mode2)
25716 || VALID_MVE_STRUCT_MODE (mode2))))
25717 return true;
25718
25719 return false;
25720 }
25721
25722 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25723 not used in arm mode. */
25724
25725 enum reg_class
25726 arm_regno_class (int regno)
25727 {
25728 if (regno == PC_REGNUM)
25729 return NO_REGS;
25730
25731 if (IS_VPR_REGNUM (regno))
25732 return VPR_REG;
25733
25734 if (TARGET_THUMB1)
25735 {
25736 if (regno == STACK_POINTER_REGNUM)
25737 return STACK_REG;
25738 if (regno == CC_REGNUM)
25739 return CC_REG;
25740 if (regno < 8)
25741 return LO_REGS;
25742 return HI_REGS;
25743 }
25744
25745 if (TARGET_THUMB2 && regno < 8)
25746 return LO_REGS;
25747
25748 if ( regno <= LAST_ARM_REGNUM
25749 || regno == FRAME_POINTER_REGNUM
25750 || regno == ARG_POINTER_REGNUM)
25751 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25752
25753 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25754 return TARGET_THUMB2 ? CC_REG : NO_REGS;
25755
25756 if (IS_VFP_REGNUM (regno))
25757 {
25758 if (regno <= D7_VFP_REGNUM)
25759 return VFP_D0_D7_REGS;
25760 else if (regno <= LAST_LO_VFP_REGNUM)
25761 return VFP_LO_REGS;
25762 else
25763 return VFP_HI_REGS;
25764 }
25765
25766 if (IS_IWMMXT_REGNUM (regno))
25767 return IWMMXT_REGS;
25768
25769 if (IS_IWMMXT_GR_REGNUM (regno))
25770 return IWMMXT_GR_REGS;
25771
25772 return NO_REGS;
25773 }
25774
25775 /* Handle a special case when computing the offset
25776 of an argument from the frame pointer. */
25777 int
25778 arm_debugger_arg_offset (int value, rtx addr)
25779 {
25780 rtx_insn *insn;
25781
25782 /* We are only interested if dbxout_parms() failed to compute the offset. */
25783 if (value != 0)
25784 return 0;
25785
25786 /* We can only cope with the case where the address is held in a register. */
25787 if (!REG_P (addr))
25788 return 0;
25789
25790 /* If we are using the frame pointer to point at the argument, then
25791 an offset of 0 is correct. */
25792 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25793 return 0;
25794
25795 /* If we are using the stack pointer to point at the
25796 argument, then an offset of 0 is correct. */
25797 /* ??? Check this is consistent with thumb2 frame layout. */
25798 if ((TARGET_THUMB || !frame_pointer_needed)
25799 && REGNO (addr) == SP_REGNUM)
25800 return 0;
25801
25802 /* Oh dear. The argument is pointed to by a register rather
25803 than being held in a register, or being stored at a known
25804 offset from the frame pointer. Since GDB only understands
25805 those two kinds of argument we must translate the address
25806 held in the register into an offset from the frame pointer.
25807 We do this by searching through the insns for the function
25808 looking to see where this register gets its value. If the
25809 register is initialized from the frame pointer plus an offset
25810 then we are in luck and we can continue, otherwise we give up.
25811
25812 This code is exercised by producing debugging information
25813 for a function with arguments like this:
25814
25815 double func (double a, double b, int c, double d) {return d;}
25816
25817 Without this code the stab for parameter 'd' will be set to
25818 an offset of 0 from the frame pointer, rather than 8. */
25819
25820 /* The if() statement says:
25821
25822 If the insn is a normal instruction
25823 and if the insn is setting the value in a register
25824 and if the register being set is the register holding the address of the argument
25825 and if the address is computing by an addition
25826 that involves adding to a register
25827 which is the frame pointer
25828 a constant integer
25829
25830 then... */
25831
25832 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25833 {
25834 if ( NONJUMP_INSN_P (insn)
25835 && GET_CODE (PATTERN (insn)) == SET
25836 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25837 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25838 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25839 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25840 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25841 )
25842 {
25843 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25844
25845 break;
25846 }
25847 }
25848
25849 if (value == 0)
25850 {
25851 debug_rtx (addr);
25852 warning (0, "unable to compute real location of stacked parameter");
25853 value = 8; /* XXX magic hack */
25854 }
25855
25856 return value;
25857 }
25858 \f
25859 /* Implement TARGET_PROMOTED_TYPE. */
25860
25861 static tree
25862 arm_promoted_type (const_tree t)
25863 {
25864 if (SCALAR_FLOAT_TYPE_P (t)
25865 && TYPE_PRECISION (t) == 16
25866 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25867 return float_type_node;
25868 return NULL_TREE;
25869 }
25870
25871 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25872 This simply adds HFmode as a supported mode; even though we don't
25873 implement arithmetic on this type directly, it's supported by
25874 optabs conversions, much the way the double-word arithmetic is
25875 special-cased in the default hook. */
25876
25877 static bool
25878 arm_scalar_mode_supported_p (scalar_mode mode)
25879 {
25880 if (mode == HFmode)
25881 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25882 else if (ALL_FIXED_POINT_MODE_P (mode))
25883 return true;
25884 else
25885 return default_scalar_mode_supported_p (mode);
25886 }
25887
25888 /* Set the value of FLT_EVAL_METHOD.
25889 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25890
25891 0: evaluate all operations and constants, whose semantic type has at
25892 most the range and precision of type float, to the range and
25893 precision of float; evaluate all other operations and constants to
25894 the range and precision of the semantic type;
25895
25896 N, where _FloatN is a supported interchange floating type
25897 evaluate all operations and constants, whose semantic type has at
25898 most the range and precision of _FloatN type, to the range and
25899 precision of the _FloatN type; evaluate all other operations and
25900 constants to the range and precision of the semantic type;
25901
25902 If we have the ARMv8.2-A extensions then we support _Float16 in native
25903 precision, so we should set this to 16. Otherwise, we support the type,
25904 but want to evaluate expressions in float precision, so set this to
25905 0. */
25906
25907 static enum flt_eval_method
25908 arm_excess_precision (enum excess_precision_type type)
25909 {
25910 switch (type)
25911 {
25912 case EXCESS_PRECISION_TYPE_FAST:
25913 case EXCESS_PRECISION_TYPE_STANDARD:
25914 /* We can calculate either in 16-bit range and precision or
25915 32-bit range and precision. Make that decision based on whether
25916 we have native support for the ARMv8.2-A 16-bit floating-point
25917 instructions or not. */
25918 return (TARGET_VFP_FP16INST
25919 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25920 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
25921 case EXCESS_PRECISION_TYPE_IMPLICIT:
25922 case EXCESS_PRECISION_TYPE_FLOAT16:
25923 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25924 default:
25925 gcc_unreachable ();
25926 }
25927 return FLT_EVAL_METHOD_UNPREDICTABLE;
25928 }
25929
25930
25931 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
25932 _Float16 if we are using anything other than ieee format for 16-bit
25933 floating point. Otherwise, punt to the default implementation. */
25934 static opt_scalar_float_mode
25935 arm_floatn_mode (int n, bool extended)
25936 {
25937 if (!extended && n == 16)
25938 {
25939 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
25940 return HFmode;
25941 return opt_scalar_float_mode ();
25942 }
25943
25944 return default_floatn_mode (n, extended);
25945 }
25946
25947
25948 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25949 not to early-clobber SRC registers in the process.
25950
25951 We assume that the operands described by SRC and DEST represent a
25952 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25953 number of components into which the copy has been decomposed. */
25954 void
25955 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25956 {
25957 unsigned int i;
25958
25959 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25960 || REGNO (operands[0]) < REGNO (operands[1]))
25961 {
25962 for (i = 0; i < count; i++)
25963 {
25964 operands[2 * i] = dest[i];
25965 operands[2 * i + 1] = src[i];
25966 }
25967 }
25968 else
25969 {
25970 for (i = 0; i < count; i++)
25971 {
25972 operands[2 * i] = dest[count - i - 1];
25973 operands[2 * i + 1] = src[count - i - 1];
25974 }
25975 }
25976 }
25977
25978 /* Split operands into moves from op[1] + op[2] into op[0]. */
25979
25980 void
25981 neon_split_vcombine (rtx operands[3])
25982 {
25983 unsigned int dest = REGNO (operands[0]);
25984 unsigned int src1 = REGNO (operands[1]);
25985 unsigned int src2 = REGNO (operands[2]);
25986 machine_mode halfmode = GET_MODE (operands[1]);
25987 unsigned int halfregs = REG_NREGS (operands[1]);
25988 rtx destlo, desthi;
25989
25990 if (src1 == dest && src2 == dest + halfregs)
25991 {
25992 /* No-op move. Can't split to nothing; emit something. */
25993 emit_note (NOTE_INSN_DELETED);
25994 return;
25995 }
25996
25997 /* Preserve register attributes for variable tracking. */
25998 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25999 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
26000 GET_MODE_SIZE (halfmode));
26001
26002 /* Special case of reversed high/low parts. Use VSWP. */
26003 if (src2 == dest && src1 == dest + halfregs)
26004 {
26005 rtx x = gen_rtx_SET (destlo, operands[1]);
26006 rtx y = gen_rtx_SET (desthi, operands[2]);
26007 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
26008 return;
26009 }
26010
26011 if (!reg_overlap_mentioned_p (operands[2], destlo))
26012 {
26013 /* Try to avoid unnecessary moves if part of the result
26014 is in the right place already. */
26015 if (src1 != dest)
26016 emit_move_insn (destlo, operands[1]);
26017 if (src2 != dest + halfregs)
26018 emit_move_insn (desthi, operands[2]);
26019 }
26020 else
26021 {
26022 if (src2 != dest + halfregs)
26023 emit_move_insn (desthi, operands[2]);
26024 if (src1 != dest)
26025 emit_move_insn (destlo, operands[1]);
26026 }
26027 }
26028 \f
26029 /* Return the number (counting from 0) of
26030 the least significant set bit in MASK. */
26031
26032 inline static int
26033 number_of_first_bit_set (unsigned mask)
26034 {
26035 return ctz_hwi (mask);
26036 }
26037
26038 /* Like emit_multi_reg_push, but allowing for a different set of
26039 registers to be described as saved. MASK is the set of registers
26040 to be saved; REAL_REGS is the set of registers to be described as
26041 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26042
26043 static rtx_insn *
26044 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26045 {
26046 unsigned long regno;
26047 rtx par[10], tmp, reg;
26048 rtx_insn *insn;
26049 int i, j;
26050
26051 /* Build the parallel of the registers actually being stored. */
26052 for (i = 0; mask; ++i, mask &= mask - 1)
26053 {
26054 regno = ctz_hwi (mask);
26055 reg = gen_rtx_REG (SImode, regno);
26056
26057 if (i == 0)
26058 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26059 else
26060 tmp = gen_rtx_USE (VOIDmode, reg);
26061
26062 par[i] = tmp;
26063 }
26064
26065 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26066 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26067 tmp = gen_frame_mem (BLKmode, tmp);
26068 tmp = gen_rtx_SET (tmp, par[0]);
26069 par[0] = tmp;
26070
26071 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26072 insn = emit_insn (tmp);
26073
26074 /* Always build the stack adjustment note for unwind info. */
26075 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26076 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
26077 par[0] = tmp;
26078
26079 /* Build the parallel of the registers recorded as saved for unwind. */
26080 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26081 {
26082 regno = ctz_hwi (real_regs);
26083 reg = gen_rtx_REG (SImode, regno);
26084
26085 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26086 tmp = gen_frame_mem (SImode, tmp);
26087 tmp = gen_rtx_SET (tmp, reg);
26088 RTX_FRAME_RELATED_P (tmp) = 1;
26089 par[j + 1] = tmp;
26090 }
26091
26092 if (j == 0)
26093 tmp = par[0];
26094 else
26095 {
26096 RTX_FRAME_RELATED_P (par[0]) = 1;
26097 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26098 }
26099
26100 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26101
26102 return insn;
26103 }
26104
26105 /* Emit code to push or pop registers to or from the stack. F is the
26106 assembly file. MASK is the registers to pop. */
26107 static void
26108 thumb_pop (FILE *f, unsigned long mask)
26109 {
26110 int regno;
26111 int lo_mask = mask & 0xFF;
26112
26113 gcc_assert (mask);
26114
26115 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26116 {
26117 /* Special case. Do not generate a POP PC statement here, do it in
26118 thumb_exit() */
26119 thumb_exit (f, -1);
26120 return;
26121 }
26122
26123 fprintf (f, "\tpop\t{");
26124
26125 /* Look at the low registers first. */
26126 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26127 {
26128 if (lo_mask & 1)
26129 {
26130 asm_fprintf (f, "%r", regno);
26131
26132 if ((lo_mask & ~1) != 0)
26133 fprintf (f, ", ");
26134 }
26135 }
26136
26137 if (mask & (1 << PC_REGNUM))
26138 {
26139 /* Catch popping the PC. */
26140 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
26141 || IS_CMSE_ENTRY (arm_current_func_type ()))
26142 {
26143 /* The PC is never poped directly, instead
26144 it is popped into r3 and then BX is used. */
26145 fprintf (f, "}\n");
26146
26147 thumb_exit (f, -1);
26148
26149 return;
26150 }
26151 else
26152 {
26153 if (mask & 0xFF)
26154 fprintf (f, ", ");
26155
26156 asm_fprintf (f, "%r", PC_REGNUM);
26157 }
26158 }
26159
26160 fprintf (f, "}\n");
26161 }
26162
26163 /* Generate code to return from a thumb function.
26164 If 'reg_containing_return_addr' is -1, then the return address is
26165 actually on the stack, at the stack pointer.
26166
26167 Note: do not forget to update length attribute of corresponding insn pattern
26168 when changing assembly output (eg. length attribute of epilogue_insns when
26169 updating Armv8-M Baseline Security Extensions register clearing
26170 sequences). */
26171 static void
26172 thumb_exit (FILE *f, int reg_containing_return_addr)
26173 {
26174 unsigned regs_available_for_popping;
26175 unsigned regs_to_pop;
26176 int pops_needed;
26177 unsigned available;
26178 unsigned required;
26179 machine_mode mode;
26180 int size;
26181 int restore_a4 = FALSE;
26182
26183 /* Compute the registers we need to pop. */
26184 regs_to_pop = 0;
26185 pops_needed = 0;
26186
26187 if (reg_containing_return_addr == -1)
26188 {
26189 regs_to_pop |= 1 << LR_REGNUM;
26190 ++pops_needed;
26191 }
26192
26193 if (TARGET_BACKTRACE)
26194 {
26195 /* Restore the (ARM) frame pointer and stack pointer. */
26196 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26197 pops_needed += 2;
26198 }
26199
26200 /* If there is nothing to pop then just emit the BX instruction and
26201 return. */
26202 if (pops_needed == 0)
26203 {
26204 if (crtl->calls_eh_return)
26205 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26206
26207 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26208 {
26209 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26210 emitted by cmse_nonsecure_entry_clear_before_return (). */
26211 if (!TARGET_HAVE_FPCXT_CMSE)
26212 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
26213 reg_containing_return_addr);
26214 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26215 }
26216 else
26217 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26218 return;
26219 }
26220 /* Otherwise if we are not supporting interworking and we have not created
26221 a backtrace structure and the function was not entered in ARM mode then
26222 just pop the return address straight into the PC. */
26223 else if (!TARGET_INTERWORK
26224 && !TARGET_BACKTRACE
26225 && !is_called_in_ARM_mode (current_function_decl)
26226 && !crtl->calls_eh_return
26227 && !IS_CMSE_ENTRY (arm_current_func_type ()))
26228 {
26229 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26230 return;
26231 }
26232
26233 /* Find out how many of the (return) argument registers we can corrupt. */
26234 regs_available_for_popping = 0;
26235
26236 /* If returning via __builtin_eh_return, the bottom three registers
26237 all contain information needed for the return. */
26238 if (crtl->calls_eh_return)
26239 size = 12;
26240 else
26241 {
26242 /* If we can deduce the registers used from the function's
26243 return value. This is more reliable that examining
26244 df_regs_ever_live_p () because that will be set if the register is
26245 ever used in the function, not just if the register is used
26246 to hold a return value. */
26247
26248 if (crtl->return_rtx != 0)
26249 mode = GET_MODE (crtl->return_rtx);
26250 else
26251 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26252
26253 size = GET_MODE_SIZE (mode);
26254
26255 if (size == 0)
26256 {
26257 /* In a void function we can use any argument register.
26258 In a function that returns a structure on the stack
26259 we can use the second and third argument registers. */
26260 if (mode == VOIDmode)
26261 regs_available_for_popping =
26262 (1 << ARG_REGISTER (1))
26263 | (1 << ARG_REGISTER (2))
26264 | (1 << ARG_REGISTER (3));
26265 else
26266 regs_available_for_popping =
26267 (1 << ARG_REGISTER (2))
26268 | (1 << ARG_REGISTER (3));
26269 }
26270 else if (size <= 4)
26271 regs_available_for_popping =
26272 (1 << ARG_REGISTER (2))
26273 | (1 << ARG_REGISTER (3));
26274 else if (size <= 8)
26275 regs_available_for_popping =
26276 (1 << ARG_REGISTER (3));
26277 }
26278
26279 /* Match registers to be popped with registers into which we pop them. */
26280 for (available = regs_available_for_popping,
26281 required = regs_to_pop;
26282 required != 0 && available != 0;
26283 available &= ~(available & - available),
26284 required &= ~(required & - required))
26285 -- pops_needed;
26286
26287 /* If we have any popping registers left over, remove them. */
26288 if (available > 0)
26289 regs_available_for_popping &= ~available;
26290
26291 /* Otherwise if we need another popping register we can use
26292 the fourth argument register. */
26293 else if (pops_needed)
26294 {
26295 /* If we have not found any free argument registers and
26296 reg a4 contains the return address, we must move it. */
26297 if (regs_available_for_popping == 0
26298 && reg_containing_return_addr == LAST_ARG_REGNUM)
26299 {
26300 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26301 reg_containing_return_addr = LR_REGNUM;
26302 }
26303 else if (size > 12)
26304 {
26305 /* Register a4 is being used to hold part of the return value,
26306 but we have dire need of a free, low register. */
26307 restore_a4 = TRUE;
26308
26309 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26310 }
26311
26312 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26313 {
26314 /* The fourth argument register is available. */
26315 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26316
26317 --pops_needed;
26318 }
26319 }
26320
26321 /* Pop as many registers as we can. */
26322 thumb_pop (f, regs_available_for_popping);
26323
26324 /* Process the registers we popped. */
26325 if (reg_containing_return_addr == -1)
26326 {
26327 /* The return address was popped into the lowest numbered register. */
26328 regs_to_pop &= ~(1 << LR_REGNUM);
26329
26330 reg_containing_return_addr =
26331 number_of_first_bit_set (regs_available_for_popping);
26332
26333 /* Remove this register for the mask of available registers, so that
26334 the return address will not be corrupted by further pops. */
26335 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26336 }
26337
26338 /* If we popped other registers then handle them here. */
26339 if (regs_available_for_popping)
26340 {
26341 int frame_pointer;
26342
26343 /* Work out which register currently contains the frame pointer. */
26344 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26345
26346 /* Move it into the correct place. */
26347 asm_fprintf (f, "\tmov\t%r, %r\n",
26348 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26349
26350 /* (Temporarily) remove it from the mask of popped registers. */
26351 regs_available_for_popping &= ~(1 << frame_pointer);
26352 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26353
26354 if (regs_available_for_popping)
26355 {
26356 int stack_pointer;
26357
26358 /* We popped the stack pointer as well,
26359 find the register that contains it. */
26360 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26361
26362 /* Move it into the stack register. */
26363 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26364
26365 /* At this point we have popped all necessary registers, so
26366 do not worry about restoring regs_available_for_popping
26367 to its correct value:
26368
26369 assert (pops_needed == 0)
26370 assert (regs_available_for_popping == (1 << frame_pointer))
26371 assert (regs_to_pop == (1 << STACK_POINTER)) */
26372 }
26373 else
26374 {
26375 /* Since we have just move the popped value into the frame
26376 pointer, the popping register is available for reuse, and
26377 we know that we still have the stack pointer left to pop. */
26378 regs_available_for_popping |= (1 << frame_pointer);
26379 }
26380 }
26381
26382 /* If we still have registers left on the stack, but we no longer have
26383 any registers into which we can pop them, then we must move the return
26384 address into the link register and make available the register that
26385 contained it. */
26386 if (regs_available_for_popping == 0 && pops_needed > 0)
26387 {
26388 regs_available_for_popping |= 1 << reg_containing_return_addr;
26389
26390 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26391 reg_containing_return_addr);
26392
26393 reg_containing_return_addr = LR_REGNUM;
26394 }
26395
26396 /* If we have registers left on the stack then pop some more.
26397 We know that at most we will want to pop FP and SP. */
26398 if (pops_needed > 0)
26399 {
26400 int popped_into;
26401 int move_to;
26402
26403 thumb_pop (f, regs_available_for_popping);
26404
26405 /* We have popped either FP or SP.
26406 Move whichever one it is into the correct register. */
26407 popped_into = number_of_first_bit_set (regs_available_for_popping);
26408 move_to = number_of_first_bit_set (regs_to_pop);
26409
26410 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26411 --pops_needed;
26412 }
26413
26414 /* If we still have not popped everything then we must have only
26415 had one register available to us and we are now popping the SP. */
26416 if (pops_needed > 0)
26417 {
26418 int popped_into;
26419
26420 thumb_pop (f, regs_available_for_popping);
26421
26422 popped_into = number_of_first_bit_set (regs_available_for_popping);
26423
26424 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26425 /*
26426 assert (regs_to_pop == (1 << STACK_POINTER))
26427 assert (pops_needed == 1)
26428 */
26429 }
26430
26431 /* If necessary restore the a4 register. */
26432 if (restore_a4)
26433 {
26434 if (reg_containing_return_addr != LR_REGNUM)
26435 {
26436 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26437 reg_containing_return_addr = LR_REGNUM;
26438 }
26439
26440 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26441 }
26442
26443 if (crtl->calls_eh_return)
26444 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26445
26446 /* Return to caller. */
26447 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26448 {
26449 /* This is for the cases where LR is not being used to contain the return
26450 address. It may therefore contain information that we might not want
26451 to leak, hence it must be cleared. The value in R0 will never be a
26452 secret at this point, so it is safe to use it, see the clearing code
26453 in cmse_nonsecure_entry_clear_before_return (). */
26454 if (reg_containing_return_addr != LR_REGNUM)
26455 asm_fprintf (f, "\tmov\tlr, r0\n");
26456
26457 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26458 by cmse_nonsecure_entry_clear_before_return (). */
26459 if (!TARGET_HAVE_FPCXT_CMSE)
26460 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26461 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26462 }
26463 else
26464 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26465 }
26466 \f
26467 /* Scan INSN just before assembler is output for it.
26468 For Thumb-1, we track the status of the condition codes; this
26469 information is used in the cbranchsi4_insn pattern. */
26470 void
26471 thumb1_final_prescan_insn (rtx_insn *insn)
26472 {
26473 if (flag_print_asm_name)
26474 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26475 INSN_ADDRESSES (INSN_UID (insn)));
26476 /* Don't overwrite the previous setter when we get to a cbranch. */
26477 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26478 {
26479 enum attr_conds conds;
26480
26481 if (cfun->machine->thumb1_cc_insn)
26482 {
26483 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26484 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26485 CC_STATUS_INIT;
26486 }
26487 conds = get_attr_conds (insn);
26488 if (conds == CONDS_SET)
26489 {
26490 rtx set = single_set (insn);
26491 cfun->machine->thumb1_cc_insn = insn;
26492 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26493 cfun->machine->thumb1_cc_op1 = const0_rtx;
26494 cfun->machine->thumb1_cc_mode = CC_NZmode;
26495 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26496 {
26497 rtx src1 = XEXP (SET_SRC (set), 1);
26498 if (src1 == const0_rtx)
26499 cfun->machine->thumb1_cc_mode = CCmode;
26500 }
26501 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26502 {
26503 /* Record the src register operand instead of dest because
26504 cprop_hardreg pass propagates src. */
26505 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26506 }
26507 }
26508 else if (conds != CONDS_NOCOND)
26509 cfun->machine->thumb1_cc_insn = NULL_RTX;
26510 }
26511
26512 /* Check if unexpected far jump is used. */
26513 if (cfun->machine->lr_save_eliminated
26514 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26515 internal_error("Unexpected thumb1 far jump");
26516 }
26517
26518 int
26519 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26520 {
26521 unsigned HOST_WIDE_INT mask = 0xff;
26522 int i;
26523
26524 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26525 if (val == 0) /* XXX */
26526 return 0;
26527
26528 for (i = 0; i < 25; i++)
26529 if ((val & (mask << i)) == val)
26530 return 1;
26531
26532 return 0;
26533 }
26534
26535 /* Returns nonzero if the current function contains,
26536 or might contain a far jump. */
26537 static int
26538 thumb_far_jump_used_p (void)
26539 {
26540 rtx_insn *insn;
26541 bool far_jump = false;
26542 unsigned int func_size = 0;
26543
26544 /* If we have already decided that far jumps may be used,
26545 do not bother checking again, and always return true even if
26546 it turns out that they are not being used. Once we have made
26547 the decision that far jumps are present (and that hence the link
26548 register will be pushed onto the stack) we cannot go back on it. */
26549 if (cfun->machine->far_jump_used)
26550 return 1;
26551
26552 /* If this function is not being called from the prologue/epilogue
26553 generation code then it must be being called from the
26554 INITIAL_ELIMINATION_OFFSET macro. */
26555 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26556 {
26557 /* In this case we know that we are being asked about the elimination
26558 of the arg pointer register. If that register is not being used,
26559 then there are no arguments on the stack, and we do not have to
26560 worry that a far jump might force the prologue to push the link
26561 register, changing the stack offsets. In this case we can just
26562 return false, since the presence of far jumps in the function will
26563 not affect stack offsets.
26564
26565 If the arg pointer is live (or if it was live, but has now been
26566 eliminated and so set to dead) then we do have to test to see if
26567 the function might contain a far jump. This test can lead to some
26568 false negatives, since before reload is completed, then length of
26569 branch instructions is not known, so gcc defaults to returning their
26570 longest length, which in turn sets the far jump attribute to true.
26571
26572 A false negative will not result in bad code being generated, but it
26573 will result in a needless push and pop of the link register. We
26574 hope that this does not occur too often.
26575
26576 If we need doubleword stack alignment this could affect the other
26577 elimination offsets so we can't risk getting it wrong. */
26578 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26579 cfun->machine->arg_pointer_live = 1;
26580 else if (!cfun->machine->arg_pointer_live)
26581 return 0;
26582 }
26583
26584 /* We should not change far_jump_used during or after reload, as there is
26585 no chance to change stack frame layout. */
26586 if (reload_in_progress || reload_completed)
26587 return 0;
26588
26589 /* Check to see if the function contains a branch
26590 insn with the far jump attribute set. */
26591 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26592 {
26593 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26594 {
26595 far_jump = true;
26596 }
26597 func_size += get_attr_length (insn);
26598 }
26599
26600 /* Attribute far_jump will always be true for thumb1 before
26601 shorten_branch pass. So checking far_jump attribute before
26602 shorten_branch isn't much useful.
26603
26604 Following heuristic tries to estimate more accurately if a far jump
26605 may finally be used. The heuristic is very conservative as there is
26606 no chance to roll-back the decision of not to use far jump.
26607
26608 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26609 2-byte insn is associated with a 4 byte constant pool. Using
26610 function size 2048/3 as the threshold is conservative enough. */
26611 if (far_jump)
26612 {
26613 if ((func_size * 3) >= 2048)
26614 {
26615 /* Record the fact that we have decided that
26616 the function does use far jumps. */
26617 cfun->machine->far_jump_used = 1;
26618 return 1;
26619 }
26620 }
26621
26622 return 0;
26623 }
26624
26625 /* Return nonzero if FUNC must be entered in ARM mode. */
26626 static bool
26627 is_called_in_ARM_mode (tree func)
26628 {
26629 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26630
26631 /* Ignore the problem about functions whose address is taken. */
26632 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26633 return true;
26634
26635 #ifdef ARM_PE
26636 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26637 #else
26638 return false;
26639 #endif
26640 }
26641
26642 /* Given the stack offsets and register mask in OFFSETS, decide how
26643 many additional registers to push instead of subtracting a constant
26644 from SP. For epilogues the principle is the same except we use pop.
26645 FOR_PROLOGUE indicates which we're generating. */
26646 static int
26647 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26648 {
26649 HOST_WIDE_INT amount;
26650 unsigned long live_regs_mask = offsets->saved_regs_mask;
26651 /* Extract a mask of the ones we can give to the Thumb's push/pop
26652 instruction. */
26653 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26654 /* Then count how many other high registers will need to be pushed. */
26655 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26656 int n_free, reg_base, size;
26657
26658 if (!for_prologue && frame_pointer_needed)
26659 amount = offsets->locals_base - offsets->saved_regs;
26660 else
26661 amount = offsets->outgoing_args - offsets->saved_regs;
26662
26663 /* If the stack frame size is 512 exactly, we can save one load
26664 instruction, which should make this a win even when optimizing
26665 for speed. */
26666 if (!optimize_size && amount != 512)
26667 return 0;
26668
26669 /* Can't do this if there are high registers to push. */
26670 if (high_regs_pushed != 0)
26671 return 0;
26672
26673 /* Shouldn't do it in the prologue if no registers would normally
26674 be pushed at all. In the epilogue, also allow it if we'll have
26675 a pop insn for the PC. */
26676 if (l_mask == 0
26677 && (for_prologue
26678 || TARGET_BACKTRACE
26679 || (live_regs_mask & 1 << LR_REGNUM) == 0
26680 || TARGET_INTERWORK
26681 || crtl->args.pretend_args_size != 0))
26682 return 0;
26683
26684 /* Don't do this if thumb_expand_prologue wants to emit instructions
26685 between the push and the stack frame allocation. */
26686 if (for_prologue
26687 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26688 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26689 return 0;
26690
26691 reg_base = 0;
26692 n_free = 0;
26693 if (!for_prologue)
26694 {
26695 size = arm_size_return_regs ();
26696 reg_base = ARM_NUM_INTS (size);
26697 live_regs_mask >>= reg_base;
26698 }
26699
26700 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26701 && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26702 {
26703 live_regs_mask >>= 1;
26704 n_free++;
26705 }
26706
26707 if (n_free == 0)
26708 return 0;
26709 gcc_assert (amount / 4 * 4 == amount);
26710
26711 if (amount >= 512 && (amount - n_free * 4) < 512)
26712 return (amount - 508) / 4;
26713 if (amount <= n_free * 4)
26714 return amount / 4;
26715 return 0;
26716 }
26717
26718 /* The bits which aren't usefully expanded as rtl. */
26719 const char *
26720 thumb1_unexpanded_epilogue (void)
26721 {
26722 arm_stack_offsets *offsets;
26723 int regno;
26724 unsigned long live_regs_mask = 0;
26725 int high_regs_pushed = 0;
26726 int extra_pop;
26727 int had_to_push_lr;
26728 int size;
26729
26730 if (cfun->machine->return_used_this_function != 0)
26731 return "";
26732
26733 if (IS_NAKED (arm_current_func_type ()))
26734 return "";
26735
26736 offsets = arm_get_frame_offsets ();
26737 live_regs_mask = offsets->saved_regs_mask;
26738 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26739
26740 /* If we can deduce the registers used from the function's return value.
26741 This is more reliable that examining df_regs_ever_live_p () because that
26742 will be set if the register is ever used in the function, not just if
26743 the register is used to hold a return value. */
26744 size = arm_size_return_regs ();
26745
26746 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26747 if (extra_pop > 0)
26748 {
26749 unsigned long extra_mask = (1 << extra_pop) - 1;
26750 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26751 }
26752
26753 /* The prolog may have pushed some high registers to use as
26754 work registers. e.g. the testsuite file:
26755 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26756 compiles to produce:
26757 push {r4, r5, r6, r7, lr}
26758 mov r7, r9
26759 mov r6, r8
26760 push {r6, r7}
26761 as part of the prolog. We have to undo that pushing here. */
26762
26763 if (high_regs_pushed)
26764 {
26765 unsigned long mask = live_regs_mask & 0xff;
26766 int next_hi_reg;
26767
26768 mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26769
26770 if (mask == 0)
26771 /* Oh dear! We have no low registers into which we can pop
26772 high registers! */
26773 internal_error
26774 ("no low registers available for popping high registers");
26775
26776 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26777 if (live_regs_mask & (1 << next_hi_reg))
26778 break;
26779
26780 while (high_regs_pushed)
26781 {
26782 /* Find lo register(s) into which the high register(s) can
26783 be popped. */
26784 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26785 {
26786 if (mask & (1 << regno))
26787 high_regs_pushed--;
26788 if (high_regs_pushed == 0)
26789 break;
26790 }
26791
26792 if (high_regs_pushed == 0 && regno >= 0)
26793 mask &= ~((1 << regno) - 1);
26794
26795 /* Pop the values into the low register(s). */
26796 thumb_pop (asm_out_file, mask);
26797
26798 /* Move the value(s) into the high registers. */
26799 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26800 {
26801 if (mask & (1 << regno))
26802 {
26803 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26804 regno);
26805
26806 for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26807 next_hi_reg--)
26808 if (live_regs_mask & (1 << next_hi_reg))
26809 break;
26810 }
26811 }
26812 }
26813 live_regs_mask &= ~0x0f00;
26814 }
26815
26816 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26817 live_regs_mask &= 0xff;
26818
26819 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26820 {
26821 /* Pop the return address into the PC. */
26822 if (had_to_push_lr)
26823 live_regs_mask |= 1 << PC_REGNUM;
26824
26825 /* Either no argument registers were pushed or a backtrace
26826 structure was created which includes an adjusted stack
26827 pointer, so just pop everything. */
26828 if (live_regs_mask)
26829 thumb_pop (asm_out_file, live_regs_mask);
26830
26831 /* We have either just popped the return address into the
26832 PC or it is was kept in LR for the entire function.
26833 Note that thumb_pop has already called thumb_exit if the
26834 PC was in the list. */
26835 if (!had_to_push_lr)
26836 thumb_exit (asm_out_file, LR_REGNUM);
26837 }
26838 else
26839 {
26840 /* Pop everything but the return address. */
26841 if (live_regs_mask)
26842 thumb_pop (asm_out_file, live_regs_mask);
26843
26844 if (had_to_push_lr)
26845 {
26846 if (size > 12)
26847 {
26848 /* We have no free low regs, so save one. */
26849 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26850 LAST_ARG_REGNUM);
26851 }
26852
26853 /* Get the return address into a temporary register. */
26854 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26855
26856 if (size > 12)
26857 {
26858 /* Move the return address to lr. */
26859 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26860 LAST_ARG_REGNUM);
26861 /* Restore the low register. */
26862 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26863 IP_REGNUM);
26864 regno = LR_REGNUM;
26865 }
26866 else
26867 regno = LAST_ARG_REGNUM;
26868 }
26869 else
26870 regno = LR_REGNUM;
26871
26872 /* Remove the argument registers that were pushed onto the stack. */
26873 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26874 SP_REGNUM, SP_REGNUM,
26875 crtl->args.pretend_args_size);
26876
26877 thumb_exit (asm_out_file, regno);
26878 }
26879
26880 return "";
26881 }
26882
26883 /* Functions to save and restore machine-specific function data. */
26884 static struct machine_function *
26885 arm_init_machine_status (void)
26886 {
26887 struct machine_function *machine;
26888 machine = ggc_cleared_alloc<machine_function> ();
26889
26890 #if ARM_FT_UNKNOWN != 0
26891 machine->func_type = ARM_FT_UNKNOWN;
26892 #endif
26893 machine->static_chain_stack_bytes = -1;
26894 return machine;
26895 }
26896
26897 /* Return an RTX indicating where the return address to the
26898 calling function can be found. */
26899 rtx
26900 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26901 {
26902 if (count != 0)
26903 return NULL_RTX;
26904
26905 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26906 }
26907
26908 /* Do anything needed before RTL is emitted for each function. */
26909 void
26910 arm_init_expanders (void)
26911 {
26912 /* Arrange to initialize and mark the machine per-function status. */
26913 init_machine_status = arm_init_machine_status;
26914
26915 /* This is to stop the combine pass optimizing away the alignment
26916 adjustment of va_arg. */
26917 /* ??? It is claimed that this should not be necessary. */
26918 if (cfun)
26919 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26920 }
26921
26922 /* Check that FUNC is called with a different mode. */
26923
26924 bool
26925 arm_change_mode_p (tree func)
26926 {
26927 if (TREE_CODE (func) != FUNCTION_DECL)
26928 return false;
26929
26930 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
26931
26932 if (!callee_tree)
26933 callee_tree = target_option_default_node;
26934
26935 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
26936 int flags = callee_opts->x_target_flags;
26937
26938 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
26939 }
26940
26941 /* Like arm_compute_initial_elimination offset. Simpler because there
26942 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26943 to point at the base of the local variables after static stack
26944 space for a function has been allocated. */
26945
26946 HOST_WIDE_INT
26947 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26948 {
26949 arm_stack_offsets *offsets;
26950
26951 offsets = arm_get_frame_offsets ();
26952
26953 switch (from)
26954 {
26955 case ARG_POINTER_REGNUM:
26956 switch (to)
26957 {
26958 case STACK_POINTER_REGNUM:
26959 return offsets->outgoing_args - offsets->saved_args;
26960
26961 case FRAME_POINTER_REGNUM:
26962 return offsets->soft_frame - offsets->saved_args;
26963
26964 case ARM_HARD_FRAME_POINTER_REGNUM:
26965 return offsets->saved_regs - offsets->saved_args;
26966
26967 case THUMB_HARD_FRAME_POINTER_REGNUM:
26968 return offsets->locals_base - offsets->saved_args;
26969
26970 default:
26971 gcc_unreachable ();
26972 }
26973 break;
26974
26975 case FRAME_POINTER_REGNUM:
26976 switch (to)
26977 {
26978 case STACK_POINTER_REGNUM:
26979 return offsets->outgoing_args - offsets->soft_frame;
26980
26981 case ARM_HARD_FRAME_POINTER_REGNUM:
26982 return offsets->saved_regs - offsets->soft_frame;
26983
26984 case THUMB_HARD_FRAME_POINTER_REGNUM:
26985 return offsets->locals_base - offsets->soft_frame;
26986
26987 default:
26988 gcc_unreachable ();
26989 }
26990 break;
26991
26992 default:
26993 gcc_unreachable ();
26994 }
26995 }
26996
26997 /* Generate the function's prologue. */
26998
26999 void
27000 thumb1_expand_prologue (void)
27001 {
27002 rtx_insn *insn;
27003
27004 HOST_WIDE_INT amount;
27005 HOST_WIDE_INT size;
27006 arm_stack_offsets *offsets;
27007 unsigned long func_type;
27008 int regno;
27009 unsigned long live_regs_mask;
27010 unsigned long l_mask;
27011 unsigned high_regs_pushed = 0;
27012 bool lr_needs_saving;
27013
27014 func_type = arm_current_func_type ();
27015
27016 /* Naked functions don't have prologues. */
27017 if (IS_NAKED (func_type))
27018 {
27019 if (flag_stack_usage_info)
27020 current_function_static_stack_size = 0;
27021 return;
27022 }
27023
27024 if (IS_INTERRUPT (func_type))
27025 {
27026 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
27027 return;
27028 }
27029
27030 if (is_called_in_ARM_mode (current_function_decl))
27031 emit_insn (gen_prologue_thumb1_interwork ());
27032
27033 offsets = arm_get_frame_offsets ();
27034 live_regs_mask = offsets->saved_regs_mask;
27035 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
27036
27037 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27038 l_mask = live_regs_mask & 0x40ff;
27039 /* Then count how many other high registers will need to be pushed. */
27040 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27041
27042 if (crtl->args.pretend_args_size)
27043 {
27044 rtx x = GEN_INT (-crtl->args.pretend_args_size);
27045
27046 if (cfun->machine->uses_anonymous_args)
27047 {
27048 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27049 unsigned long mask;
27050
27051 mask = 1ul << (LAST_ARG_REGNUM + 1);
27052 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27053
27054 insn = thumb1_emit_multi_reg_push (mask, 0);
27055 }
27056 else
27057 {
27058 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27059 stack_pointer_rtx, x));
27060 }
27061 RTX_FRAME_RELATED_P (insn) = 1;
27062 }
27063
27064 if (TARGET_BACKTRACE)
27065 {
27066 HOST_WIDE_INT offset = 0;
27067 unsigned work_register;
27068 rtx work_reg, x, arm_hfp_rtx;
27069
27070 /* We have been asked to create a stack backtrace structure.
27071 The code looks like this:
27072
27073 0 .align 2
27074 0 func:
27075 0 sub SP, #16 Reserve space for 4 registers.
27076 2 push {R7} Push low registers.
27077 4 add R7, SP, #20 Get the stack pointer before the push.
27078 6 str R7, [SP, #8] Store the stack pointer
27079 (before reserving the space).
27080 8 mov R7, PC Get hold of the start of this code + 12.
27081 10 str R7, [SP, #16] Store it.
27082 12 mov R7, FP Get hold of the current frame pointer.
27083 14 str R7, [SP, #4] Store it.
27084 16 mov R7, LR Get hold of the current return address.
27085 18 str R7, [SP, #12] Store it.
27086 20 add R7, SP, #16 Point at the start of the
27087 backtrace structure.
27088 22 mov FP, R7 Put this value into the frame pointer. */
27089
27090 work_register = thumb_find_work_register (live_regs_mask);
27091 work_reg = gen_rtx_REG (SImode, work_register);
27092 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27093
27094 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27095 stack_pointer_rtx, GEN_INT (-16)));
27096 RTX_FRAME_RELATED_P (insn) = 1;
27097
27098 if (l_mask)
27099 {
27100 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27101 RTX_FRAME_RELATED_P (insn) = 1;
27102 lr_needs_saving = false;
27103
27104 offset = bit_count (l_mask) * UNITS_PER_WORD;
27105 }
27106
27107 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27108 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27109
27110 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27111 x = gen_frame_mem (SImode, x);
27112 emit_move_insn (x, work_reg);
27113
27114 /* Make sure that the instruction fetching the PC is in the right place
27115 to calculate "start of backtrace creation code + 12". */
27116 /* ??? The stores using the common WORK_REG ought to be enough to
27117 prevent the scheduler from doing anything weird. Failing that
27118 we could always move all of the following into an UNSPEC_VOLATILE. */
27119 if (l_mask)
27120 {
27121 x = gen_rtx_REG (SImode, PC_REGNUM);
27122 emit_move_insn (work_reg, x);
27123
27124 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27125 x = gen_frame_mem (SImode, x);
27126 emit_move_insn (x, work_reg);
27127
27128 emit_move_insn (work_reg, arm_hfp_rtx);
27129
27130 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27131 x = gen_frame_mem (SImode, x);
27132 emit_move_insn (x, work_reg);
27133 }
27134 else
27135 {
27136 emit_move_insn (work_reg, arm_hfp_rtx);
27137
27138 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27139 x = gen_frame_mem (SImode, x);
27140 emit_move_insn (x, work_reg);
27141
27142 x = gen_rtx_REG (SImode, PC_REGNUM);
27143 emit_move_insn (work_reg, x);
27144
27145 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27146 x = gen_frame_mem (SImode, x);
27147 emit_move_insn (x, work_reg);
27148 }
27149
27150 x = gen_rtx_REG (SImode, LR_REGNUM);
27151 emit_move_insn (work_reg, x);
27152
27153 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27154 x = gen_frame_mem (SImode, x);
27155 emit_move_insn (x, work_reg);
27156
27157 x = GEN_INT (offset + 12);
27158 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27159
27160 emit_move_insn (arm_hfp_rtx, work_reg);
27161 }
27162 /* Optimization: If we are not pushing any low registers but we are going
27163 to push some high registers then delay our first push. This will just
27164 be a push of LR and we can combine it with the push of the first high
27165 register. */
27166 else if ((l_mask & 0xff) != 0
27167 || (high_regs_pushed == 0 && lr_needs_saving))
27168 {
27169 unsigned long mask = l_mask;
27170 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27171 insn = thumb1_emit_multi_reg_push (mask, mask);
27172 RTX_FRAME_RELATED_P (insn) = 1;
27173 lr_needs_saving = false;
27174 }
27175
27176 if (high_regs_pushed)
27177 {
27178 unsigned pushable_regs;
27179 unsigned next_hi_reg;
27180 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27181 : crtl->args.info.nregs;
27182 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27183
27184 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27185 if (live_regs_mask & (1 << next_hi_reg))
27186 break;
27187
27188 /* Here we need to mask out registers used for passing arguments
27189 even if they can be pushed. This is to avoid using them to
27190 stash the high registers. Such kind of stash may clobber the
27191 use of arguments. */
27192 pushable_regs = l_mask & (~arg_regs_mask);
27193 pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
27194
27195 /* Normally, LR can be used as a scratch register once it has been
27196 saved; but if the function examines its own return address then
27197 the value is still live and we need to avoid using it. */
27198 bool return_addr_live
27199 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
27200 LR_REGNUM);
27201
27202 if (lr_needs_saving || return_addr_live)
27203 pushable_regs &= ~(1 << LR_REGNUM);
27204
27205 if (pushable_regs == 0)
27206 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27207
27208 while (high_regs_pushed > 0)
27209 {
27210 unsigned long real_regs_mask = 0;
27211 unsigned long push_mask = 0;
27212
27213 for (regno = LR_REGNUM; regno >= 0; regno --)
27214 {
27215 if (pushable_regs & (1 << regno))
27216 {
27217 emit_move_insn (gen_rtx_REG (SImode, regno),
27218 gen_rtx_REG (SImode, next_hi_reg));
27219
27220 high_regs_pushed --;
27221 real_regs_mask |= (1 << next_hi_reg);
27222 push_mask |= (1 << regno);
27223
27224 if (high_regs_pushed)
27225 {
27226 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27227 next_hi_reg --)
27228 if (live_regs_mask & (1 << next_hi_reg))
27229 break;
27230 }
27231 else
27232 break;
27233 }
27234 }
27235
27236 /* If we had to find a work register and we have not yet
27237 saved the LR then add it to the list of regs to push. */
27238 if (lr_needs_saving)
27239 {
27240 push_mask |= 1 << LR_REGNUM;
27241 real_regs_mask |= 1 << LR_REGNUM;
27242 lr_needs_saving = false;
27243 /* If the return address is not live at this point, we
27244 can add LR to the list of registers that we can use
27245 for pushes. */
27246 if (!return_addr_live)
27247 pushable_regs |= 1 << LR_REGNUM;
27248 }
27249
27250 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
27251 RTX_FRAME_RELATED_P (insn) = 1;
27252 }
27253 }
27254
27255 /* Load the pic register before setting the frame pointer,
27256 so we can use r7 as a temporary work register. */
27257 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27258 arm_load_pic_register (live_regs_mask, NULL_RTX);
27259
27260 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27261 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27262 stack_pointer_rtx);
27263
27264 size = offsets->outgoing_args - offsets->saved_args;
27265 if (flag_stack_usage_info)
27266 current_function_static_stack_size = size;
27267
27268 /* If we have a frame, then do stack checking. FIXME: not implemented. */
27269 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27270 || flag_stack_clash_protection)
27271 && size)
27272 sorry ("%<-fstack-check=specific%> for Thumb-1");
27273
27274 amount = offsets->outgoing_args - offsets->saved_regs;
27275 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27276 if (amount)
27277 {
27278 if (amount < 512)
27279 {
27280 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27281 GEN_INT (- amount)));
27282 RTX_FRAME_RELATED_P (insn) = 1;
27283 }
27284 else
27285 {
27286 rtx reg, dwarf;
27287
27288 /* The stack decrement is too big for an immediate value in a single
27289 insn. In theory we could issue multiple subtracts, but after
27290 three of them it becomes more space efficient to place the full
27291 value in the constant pool and load into a register. (Also the
27292 ARM debugger really likes to see only one stack decrement per
27293 function). So instead we look for a scratch register into which
27294 we can load the decrement, and then we subtract this from the
27295 stack pointer. Unfortunately on the thumb the only available
27296 scratch registers are the argument registers, and we cannot use
27297 these as they may hold arguments to the function. Instead we
27298 attempt to locate a call preserved register which is used by this
27299 function. If we can find one, then we know that it will have
27300 been pushed at the start of the prologue and so we can corrupt
27301 it now. */
27302 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27303 if (live_regs_mask & (1 << regno))
27304 break;
27305
27306 gcc_assert(regno <= LAST_LO_REGNUM);
27307
27308 reg = gen_rtx_REG (SImode, regno);
27309
27310 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27311
27312 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27313 stack_pointer_rtx, reg));
27314
27315 dwarf = gen_rtx_SET (stack_pointer_rtx,
27316 plus_constant (Pmode, stack_pointer_rtx,
27317 -amount));
27318 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27319 RTX_FRAME_RELATED_P (insn) = 1;
27320 }
27321 }
27322
27323 if (frame_pointer_needed)
27324 thumb_set_frame_pointer (offsets);
27325
27326 /* If we are profiling, make sure no instructions are scheduled before
27327 the call to mcount. Similarly if the user has requested no
27328 scheduling in the prolog. Similarly if we want non-call exceptions
27329 using the EABI unwinder, to prevent faulting instructions from being
27330 swapped with a stack adjustment. */
27331 if (crtl->profile || !TARGET_SCHED_PROLOG
27332 || (arm_except_unwind_info (&global_options) == UI_TARGET
27333 && cfun->can_throw_non_call_exceptions))
27334 emit_insn (gen_blockage ());
27335
27336 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27337 if (live_regs_mask & 0xff)
27338 cfun->machine->lr_save_eliminated = 0;
27339 }
27340
27341 /* Clear caller saved registers not used to pass return values and leaked
27342 condition flags before exiting a cmse_nonsecure_entry function. */
27343
27344 void
27345 cmse_nonsecure_entry_clear_before_return (void)
27346 {
27347 bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27348 int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27349 uint32_t padding_bits_to_clear = 0;
27350 auto_sbitmap to_clear_bitmap (maxregno + 1);
27351 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27352 tree result_type;
27353
27354 bitmap_clear (to_clear_bitmap);
27355 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27356 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27357
27358 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27359 registers. */
27360 if (clear_vfpregs)
27361 {
27362 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27363
27364 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27365
27366 if (!TARGET_HAVE_FPCXT_CMSE)
27367 {
27368 /* Make sure we don't clear the two scratch registers used to clear
27369 the relevant FPSCR bits in output_return_instruction. */
27370 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27371 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27372 emit_use (gen_rtx_REG (SImode, 4));
27373 bitmap_clear_bit (to_clear_bitmap, 4);
27374 }
27375 }
27376
27377 /* If the user has defined registers to be caller saved, these are no longer
27378 restored by the function before returning and must thus be cleared for
27379 security purposes. */
27380 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27381 {
27382 /* We do not touch registers that can be used to pass arguments as per
27383 the AAPCS, since these should never be made callee-saved by user
27384 options. */
27385 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27386 continue;
27387 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27388 continue;
27389 if (!callee_saved_reg_p (regno)
27390 && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27391 || TARGET_HARD_FLOAT))
27392 bitmap_set_bit (to_clear_bitmap, regno);
27393 }
27394
27395 /* Make sure we do not clear the registers used to return the result in. */
27396 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27397 if (!VOID_TYPE_P (result_type))
27398 {
27399 uint64_t to_clear_return_mask;
27400 result_rtl = arm_function_value (result_type, current_function_decl, 0);
27401
27402 /* No need to check that we return in registers, because we don't
27403 support returning on stack yet. */
27404 gcc_assert (REG_P (result_rtl));
27405 to_clear_return_mask
27406 = compute_not_to_clear_mask (result_type, result_rtl, 0,
27407 &padding_bits_to_clear);
27408 if (to_clear_return_mask)
27409 {
27410 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27411 for (regno = R0_REGNUM; regno <= maxregno; regno++)
27412 {
27413 if (to_clear_return_mask & (1ULL << regno))
27414 bitmap_clear_bit (to_clear_bitmap, regno);
27415 }
27416 }
27417 }
27418
27419 if (padding_bits_to_clear != 0)
27420 {
27421 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27422 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27423
27424 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27425 returning a composite type, which only uses r0. Let's make sure that
27426 r1-r3 is cleared too. */
27427 bitmap_clear (to_clear_arg_regs_bitmap);
27428 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27429 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27430 }
27431
27432 /* Clear full registers that leak before returning. */
27433 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27434 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27435 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27436 clearing_reg);
27437 }
27438
27439 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27440 POP instruction can be generated. LR should be replaced by PC. All
27441 the checks required are already done by USE_RETURN_INSN (). Hence,
27442 all we really need to check here is if single register is to be
27443 returned, or multiple register return. */
27444 void
27445 thumb2_expand_return (bool simple_return)
27446 {
27447 int i, num_regs;
27448 unsigned long saved_regs_mask;
27449 arm_stack_offsets *offsets;
27450
27451 offsets = arm_get_frame_offsets ();
27452 saved_regs_mask = offsets->saved_regs_mask;
27453
27454 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27455 if (saved_regs_mask & (1 << i))
27456 num_regs++;
27457
27458 if (!simple_return && saved_regs_mask)
27459 {
27460 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27461 functions or adapt code to handle according to ACLE. This path should
27462 not be reachable for cmse_nonsecure_entry functions though we prefer
27463 to assert it for now to ensure that future code changes do not silently
27464 change this behavior. */
27465 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27466 if (arm_current_function_pac_enabled_p ())
27467 {
27468 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
27469 arm_emit_multi_reg_pop (saved_regs_mask);
27470 emit_insn (gen_aut_nop ());
27471 emit_jump_insn (simple_return_rtx);
27472 }
27473 else if (num_regs == 1)
27474 {
27475 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27476 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27477 rtx addr = gen_rtx_MEM (SImode,
27478 gen_rtx_POST_INC (SImode,
27479 stack_pointer_rtx));
27480 set_mem_alias_set (addr, get_frame_alias_set ());
27481 XVECEXP (par, 0, 0) = ret_rtx;
27482 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27483 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27484 emit_jump_insn (par);
27485 }
27486 else
27487 {
27488 saved_regs_mask &= ~ (1 << LR_REGNUM);
27489 saved_regs_mask |= (1 << PC_REGNUM);
27490 arm_emit_multi_reg_pop (saved_regs_mask);
27491 }
27492 }
27493 else
27494 {
27495 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27496 cmse_nonsecure_entry_clear_before_return ();
27497 emit_jump_insn (simple_return_rtx);
27498 }
27499 }
27500
27501 void
27502 thumb1_expand_epilogue (void)
27503 {
27504 HOST_WIDE_INT amount;
27505 arm_stack_offsets *offsets;
27506 int regno;
27507
27508 /* Naked functions don't have prologues. */
27509 if (IS_NAKED (arm_current_func_type ()))
27510 return;
27511
27512 offsets = arm_get_frame_offsets ();
27513 amount = offsets->outgoing_args - offsets->saved_regs;
27514
27515 if (frame_pointer_needed)
27516 {
27517 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27518 amount = offsets->locals_base - offsets->saved_regs;
27519 }
27520 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27521
27522 gcc_assert (amount >= 0);
27523 if (amount)
27524 {
27525 emit_insn (gen_blockage ());
27526
27527 if (amount < 512)
27528 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27529 GEN_INT (amount)));
27530 else
27531 {
27532 /* r3 is always free in the epilogue. */
27533 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27534
27535 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27536 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27537 }
27538 }
27539
27540 /* Emit a USE (stack_pointer_rtx), so that
27541 the stack adjustment will not be deleted. */
27542 emit_insn (gen_force_register_use (stack_pointer_rtx));
27543
27544 if (crtl->profile || !TARGET_SCHED_PROLOG)
27545 emit_insn (gen_blockage ());
27546
27547 /* Emit a clobber for each insn that will be restored in the epilogue,
27548 so that flow2 will get register lifetimes correct. */
27549 for (regno = 0; regno < 13; regno++)
27550 if (reg_needs_saving_p (regno))
27551 emit_clobber (gen_rtx_REG (SImode, regno));
27552
27553 if (! df_regs_ever_live_p (LR_REGNUM))
27554 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27555
27556 /* Clear all caller-saved regs that are not used to return. */
27557 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27558 cmse_nonsecure_entry_clear_before_return ();
27559 }
27560
27561 /* Epilogue code for APCS frame. */
27562 static void
27563 arm_expand_epilogue_apcs_frame (bool really_return)
27564 {
27565 unsigned long func_type;
27566 unsigned long saved_regs_mask;
27567 int num_regs = 0;
27568 int i;
27569 int floats_from_frame = 0;
27570 arm_stack_offsets *offsets;
27571
27572 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27573 func_type = arm_current_func_type ();
27574
27575 /* Get frame offsets for ARM. */
27576 offsets = arm_get_frame_offsets ();
27577 saved_regs_mask = offsets->saved_regs_mask;
27578
27579 /* Find the offset of the floating-point save area in the frame. */
27580 floats_from_frame
27581 = (offsets->saved_args
27582 + arm_compute_static_chain_stack_bytes ()
27583 - offsets->frame);
27584
27585 /* Compute how many core registers saved and how far away the floats are. */
27586 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27587 if (saved_regs_mask & (1 << i))
27588 {
27589 num_regs++;
27590 floats_from_frame += 4;
27591 }
27592
27593 if (TARGET_VFP_BASE)
27594 {
27595 int start_reg;
27596 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27597
27598 /* The offset is from IP_REGNUM. */
27599 int saved_size = arm_get_vfp_saved_size ();
27600 if (saved_size > 0)
27601 {
27602 rtx_insn *insn;
27603 floats_from_frame += saved_size;
27604 insn = emit_insn (gen_addsi3 (ip_rtx,
27605 hard_frame_pointer_rtx,
27606 GEN_INT (-floats_from_frame)));
27607 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27608 ip_rtx, hard_frame_pointer_rtx);
27609 }
27610
27611 /* Generate VFP register multi-pop. */
27612 start_reg = FIRST_VFP_REGNUM;
27613
27614 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27615 /* Look for a case where a reg does not need restoring. */
27616 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27617 {
27618 if (start_reg != i)
27619 arm_emit_vfp_multi_reg_pop (start_reg,
27620 (i - start_reg) / 2,
27621 gen_rtx_REG (SImode,
27622 IP_REGNUM));
27623 start_reg = i + 2;
27624 }
27625
27626 /* Restore the remaining regs that we have discovered (or possibly
27627 even all of them, if the conditional in the for loop never
27628 fired). */
27629 if (start_reg != i)
27630 arm_emit_vfp_multi_reg_pop (start_reg,
27631 (i - start_reg) / 2,
27632 gen_rtx_REG (SImode, IP_REGNUM));
27633 }
27634
27635 if (TARGET_IWMMXT)
27636 {
27637 /* The frame pointer is guaranteed to be non-double-word aligned, as
27638 it is set to double-word-aligned old_stack_pointer - 4. */
27639 rtx_insn *insn;
27640 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27641
27642 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27643 if (reg_needs_saving_p (i))
27644 {
27645 rtx addr = gen_frame_mem (V2SImode,
27646 plus_constant (Pmode, hard_frame_pointer_rtx,
27647 - lrm_count * 4));
27648 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27649 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27650 gen_rtx_REG (V2SImode, i),
27651 NULL_RTX);
27652 lrm_count += 2;
27653 }
27654 }
27655
27656 /* saved_regs_mask should contain IP which contains old stack pointer
27657 at the time of activation creation. Since SP and IP are adjacent registers,
27658 we can restore the value directly into SP. */
27659 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27660 saved_regs_mask &= ~(1 << IP_REGNUM);
27661 saved_regs_mask |= (1 << SP_REGNUM);
27662
27663 /* There are two registers left in saved_regs_mask - LR and PC. We
27664 only need to restore LR (the return address), but to
27665 save time we can load it directly into PC, unless we need a
27666 special function exit sequence, or we are not really returning. */
27667 if (really_return
27668 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27669 && !crtl->calls_eh_return)
27670 /* Delete LR from the register mask, so that LR on
27671 the stack is loaded into the PC in the register mask. */
27672 saved_regs_mask &= ~(1 << LR_REGNUM);
27673 else
27674 saved_regs_mask &= ~(1 << PC_REGNUM);
27675
27676 num_regs = bit_count (saved_regs_mask);
27677 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27678 {
27679 rtx_insn *insn;
27680 emit_insn (gen_blockage ());
27681 /* Unwind the stack to just below the saved registers. */
27682 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27683 hard_frame_pointer_rtx,
27684 GEN_INT (- 4 * num_regs)));
27685
27686 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27687 stack_pointer_rtx, hard_frame_pointer_rtx);
27688 }
27689
27690 arm_emit_multi_reg_pop (saved_regs_mask);
27691
27692 if (IS_INTERRUPT (func_type))
27693 {
27694 /* Interrupt handlers will have pushed the
27695 IP onto the stack, so restore it now. */
27696 rtx_insn *insn;
27697 rtx addr = gen_rtx_MEM (SImode,
27698 gen_rtx_POST_INC (SImode,
27699 stack_pointer_rtx));
27700 set_mem_alias_set (addr, get_frame_alias_set ());
27701 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27702 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27703 gen_rtx_REG (SImode, IP_REGNUM),
27704 NULL_RTX);
27705 }
27706
27707 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27708 return;
27709
27710 if (crtl->calls_eh_return)
27711 emit_insn (gen_addsi3 (stack_pointer_rtx,
27712 stack_pointer_rtx,
27713 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27714
27715 if (IS_STACKALIGN (func_type))
27716 /* Restore the original stack pointer. Before prologue, the stack was
27717 realigned and the original stack pointer saved in r0. For details,
27718 see comment in arm_expand_prologue. */
27719 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27720
27721 emit_jump_insn (simple_return_rtx);
27722 }
27723
27724 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27725 function is not a sibcall. */
27726 void
27727 arm_expand_epilogue (bool really_return)
27728 {
27729 unsigned long func_type;
27730 unsigned long saved_regs_mask;
27731 int num_regs = 0;
27732 int i;
27733 int amount;
27734 arm_stack_offsets *offsets;
27735
27736 func_type = arm_current_func_type ();
27737
27738 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27739 let output_return_instruction take care of instruction emission if any. */
27740 if (IS_NAKED (func_type)
27741 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27742 {
27743 if (really_return)
27744 emit_jump_insn (simple_return_rtx);
27745 return;
27746 }
27747
27748 /* If we are throwing an exception, then we really must be doing a
27749 return, so we can't tail-call. */
27750 gcc_assert (!crtl->calls_eh_return || really_return);
27751
27752 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27753 {
27754 arm_expand_epilogue_apcs_frame (really_return);
27755 return;
27756 }
27757
27758 /* Get frame offsets for ARM. */
27759 offsets = arm_get_frame_offsets ();
27760 saved_regs_mask = offsets->saved_regs_mask;
27761 num_regs = bit_count (saved_regs_mask);
27762
27763 if (frame_pointer_needed)
27764 {
27765 rtx_insn *insn;
27766 /* Restore stack pointer if necessary. */
27767 if (TARGET_ARM)
27768 {
27769 /* In ARM mode, frame pointer points to first saved register.
27770 Restore stack pointer to last saved register. */
27771 amount = offsets->frame - offsets->saved_regs;
27772
27773 /* Force out any pending memory operations that reference stacked data
27774 before stack de-allocation occurs. */
27775 emit_insn (gen_blockage ());
27776 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27777 hard_frame_pointer_rtx,
27778 GEN_INT (amount)));
27779 arm_add_cfa_adjust_cfa_note (insn, amount,
27780 stack_pointer_rtx,
27781 hard_frame_pointer_rtx);
27782
27783 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27784 deleted. */
27785 emit_insn (gen_force_register_use (stack_pointer_rtx));
27786 }
27787 else
27788 {
27789 /* In Thumb-2 mode, the frame pointer points to the last saved
27790 register. */
27791 amount = offsets->locals_base - offsets->saved_regs;
27792 if (amount)
27793 {
27794 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27795 hard_frame_pointer_rtx,
27796 GEN_INT (amount)));
27797 arm_add_cfa_adjust_cfa_note (insn, amount,
27798 hard_frame_pointer_rtx,
27799 hard_frame_pointer_rtx);
27800 }
27801
27802 /* Force out any pending memory operations that reference stacked data
27803 before stack de-allocation occurs. */
27804 emit_insn (gen_blockage ());
27805 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27806 hard_frame_pointer_rtx));
27807 arm_add_cfa_adjust_cfa_note (insn, 0,
27808 stack_pointer_rtx,
27809 hard_frame_pointer_rtx);
27810 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27811 deleted. */
27812 emit_insn (gen_force_register_use (stack_pointer_rtx));
27813 }
27814 }
27815 else
27816 {
27817 /* Pop off outgoing args and local frame to adjust stack pointer to
27818 last saved register. */
27819 amount = offsets->outgoing_args - offsets->saved_regs;
27820 if (amount)
27821 {
27822 rtx_insn *tmp;
27823 /* Force out any pending memory operations that reference stacked data
27824 before stack de-allocation occurs. */
27825 emit_insn (gen_blockage ());
27826 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27827 stack_pointer_rtx,
27828 GEN_INT (amount)));
27829 arm_add_cfa_adjust_cfa_note (tmp, amount,
27830 stack_pointer_rtx, stack_pointer_rtx);
27831 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27832 not deleted. */
27833 emit_insn (gen_force_register_use (stack_pointer_rtx));
27834 }
27835 }
27836
27837 if (TARGET_VFP_BASE)
27838 {
27839 /* Generate VFP register multi-pop. */
27840 int end_reg = LAST_VFP_REGNUM + 1;
27841
27842 /* Scan the registers in reverse order. We need to match
27843 any groupings made in the prologue and generate matching
27844 vldm operations. The need to match groups is because,
27845 unlike pop, vldm can only do consecutive regs. */
27846 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27847 /* Look for a case where a reg does not need restoring. */
27848 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27849 {
27850 /* Restore the regs discovered so far (from reg+2 to
27851 end_reg). */
27852 if (end_reg > i + 2)
27853 arm_emit_vfp_multi_reg_pop (i + 2,
27854 (end_reg - (i + 2)) / 2,
27855 stack_pointer_rtx);
27856 end_reg = i;
27857 }
27858
27859 /* Restore the remaining regs that we have discovered (or possibly
27860 even all of them, if the conditional in the for loop never
27861 fired). */
27862 if (end_reg > i + 2)
27863 arm_emit_vfp_multi_reg_pop (i + 2,
27864 (end_reg - (i + 2)) / 2,
27865 stack_pointer_rtx);
27866 }
27867
27868 if (TARGET_IWMMXT)
27869 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27870 if (reg_needs_saving_p (i))
27871 {
27872 rtx_insn *insn;
27873 rtx addr = gen_rtx_MEM (V2SImode,
27874 gen_rtx_POST_INC (SImode,
27875 stack_pointer_rtx));
27876 set_mem_alias_set (addr, get_frame_alias_set ());
27877 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27878 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27879 gen_rtx_REG (V2SImode, i),
27880 NULL_RTX);
27881 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27882 stack_pointer_rtx, stack_pointer_rtx);
27883 }
27884
27885 if (saved_regs_mask)
27886 {
27887 rtx insn;
27888 bool return_in_pc = false;
27889
27890 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27891 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27892 && !IS_CMSE_ENTRY (func_type)
27893 && !IS_STACKALIGN (func_type)
27894 && really_return
27895 && crtl->args.pretend_args_size == 0
27896 && saved_regs_mask & (1 << LR_REGNUM)
27897 && !crtl->calls_eh_return
27898 && !arm_current_function_pac_enabled_p ())
27899 {
27900 saved_regs_mask &= ~(1 << LR_REGNUM);
27901 saved_regs_mask |= (1 << PC_REGNUM);
27902 return_in_pc = true;
27903 }
27904
27905 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27906 {
27907 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27908 if (saved_regs_mask & (1 << i))
27909 {
27910 rtx addr = gen_rtx_MEM (SImode,
27911 gen_rtx_POST_INC (SImode,
27912 stack_pointer_rtx));
27913 set_mem_alias_set (addr, get_frame_alias_set ());
27914
27915 if (i == PC_REGNUM)
27916 {
27917 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27918 XVECEXP (insn, 0, 0) = ret_rtx;
27919 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
27920 addr);
27921 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27922 insn = emit_jump_insn (insn);
27923 }
27924 else
27925 {
27926 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27927 addr));
27928 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27929 gen_rtx_REG (SImode, i),
27930 NULL_RTX);
27931 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27932 stack_pointer_rtx,
27933 stack_pointer_rtx);
27934 }
27935 }
27936 }
27937 else
27938 {
27939 if (TARGET_LDRD
27940 && current_tune->prefer_ldrd_strd
27941 && !optimize_function_for_size_p (cfun))
27942 {
27943 if (TARGET_THUMB2)
27944 thumb2_emit_ldrd_pop (saved_regs_mask);
27945 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27946 arm_emit_ldrd_pop (saved_regs_mask);
27947 else
27948 arm_emit_multi_reg_pop (saved_regs_mask);
27949 }
27950 else
27951 arm_emit_multi_reg_pop (saved_regs_mask);
27952 }
27953
27954 if (return_in_pc)
27955 return;
27956 }
27957
27958 amount
27959 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
27960 if (amount)
27961 {
27962 int i, j;
27963 rtx dwarf = NULL_RTX;
27964 rtx_insn *tmp =
27965 emit_insn (gen_addsi3 (stack_pointer_rtx,
27966 stack_pointer_rtx,
27967 GEN_INT (amount)));
27968
27969 RTX_FRAME_RELATED_P (tmp) = 1;
27970
27971 if (cfun->machine->uses_anonymous_args)
27972 {
27973 /* Restore pretend args. Refer arm_expand_prologue on how to save
27974 pretend_args in stack. */
27975 int num_regs = crtl->args.pretend_args_size / 4;
27976 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27977 for (j = 0, i = 0; j < num_regs; i++)
27978 if (saved_regs_mask & (1 << i))
27979 {
27980 rtx reg = gen_rtx_REG (SImode, i);
27981 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27982 j++;
27983 }
27984 REG_NOTES (tmp) = dwarf;
27985 }
27986 arm_add_cfa_adjust_cfa_note (tmp, amount,
27987 stack_pointer_rtx, stack_pointer_rtx);
27988 }
27989
27990 if (IS_CMSE_ENTRY (func_type))
27991 {
27992 /* CMSE_ENTRY always returns. */
27993 gcc_assert (really_return);
27994 /* Clear all caller-saved regs that are not used to return. */
27995 cmse_nonsecure_entry_clear_before_return ();
27996
27997 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
27998 VLDR. */
27999 if (TARGET_HAVE_FPCXT_CMSE)
28000 {
28001 rtx_insn *insn;
28002
28003 insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
28004 GEN_INT (FPCXTNS_ENUM)));
28005 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
28006 plus_constant (Pmode, stack_pointer_rtx, 4));
28007 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
28008 RTX_FRAME_RELATED_P (insn) = 1;
28009 }
28010 }
28011
28012 if (arm_current_function_pac_enabled_p ())
28013 emit_insn (gen_aut_nop ());
28014
28015 if (!really_return)
28016 return;
28017
28018 if (crtl->calls_eh_return)
28019 emit_insn (gen_addsi3 (stack_pointer_rtx,
28020 stack_pointer_rtx,
28021 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
28022
28023 if (IS_STACKALIGN (func_type))
28024 /* Restore the original stack pointer. Before prologue, the stack was
28025 realigned and the original stack pointer saved in r0. For details,
28026 see comment in arm_expand_prologue. */
28027 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
28028
28029 emit_jump_insn (simple_return_rtx);
28030 }
28031
28032 /* Implementation of insn prologue_thumb1_interwork. This is the first
28033 "instruction" of a function called in ARM mode. Swap to thumb mode. */
28034
28035 const char *
28036 thumb1_output_interwork (void)
28037 {
28038 const char * name;
28039 FILE *f = asm_out_file;
28040
28041 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28042 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28043 == SYMBOL_REF);
28044 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28045
28046 /* Generate code sequence to switch us into Thumb mode. */
28047 /* The .code 32 directive has already been emitted by
28048 ASM_DECLARE_FUNCTION_NAME. */
28049 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28050 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28051
28052 /* Generate a label, so that the debugger will notice the
28053 change in instruction sets. This label is also used by
28054 the assembler to bypass the ARM code when this function
28055 is called from a Thumb encoded function elsewhere in the
28056 same file. Hence the definition of STUB_NAME here must
28057 agree with the definition in gas/config/tc-arm.c. */
28058
28059 #define STUB_NAME ".real_start_of"
28060
28061 fprintf (f, "\t.code\t16\n");
28062 #ifdef ARM_PE
28063 if (arm_dllexport_name_p (name))
28064 name = arm_strip_name_encoding (name);
28065 #endif
28066 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28067 fprintf (f, "\t.thumb_func\n");
28068 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28069
28070 return "";
28071 }
28072
28073 /* Handle the case of a double word load into a low register from
28074 a computed memory address. The computed address may involve a
28075 register which is overwritten by the load. */
28076 const char *
28077 thumb_load_double_from_address (rtx *operands)
28078 {
28079 rtx addr;
28080 rtx base;
28081 rtx offset;
28082 rtx arg1;
28083 rtx arg2;
28084
28085 gcc_assert (REG_P (operands[0]));
28086 gcc_assert (MEM_P (operands[1]));
28087
28088 /* Get the memory address. */
28089 addr = XEXP (operands[1], 0);
28090
28091 /* Work out how the memory address is computed. */
28092 switch (GET_CODE (addr))
28093 {
28094 case REG:
28095 operands[2] = adjust_address (operands[1], SImode, 4);
28096
28097 if (REGNO (operands[0]) == REGNO (addr))
28098 {
28099 output_asm_insn ("ldr\t%H0, %2", operands);
28100 output_asm_insn ("ldr\t%0, %1", operands);
28101 }
28102 else
28103 {
28104 output_asm_insn ("ldr\t%0, %1", operands);
28105 output_asm_insn ("ldr\t%H0, %2", operands);
28106 }
28107 break;
28108
28109 case CONST:
28110 /* Compute <address> + 4 for the high order load. */
28111 operands[2] = adjust_address (operands[1], SImode, 4);
28112
28113 output_asm_insn ("ldr\t%0, %1", operands);
28114 output_asm_insn ("ldr\t%H0, %2", operands);
28115 break;
28116
28117 case PLUS:
28118 arg1 = XEXP (addr, 0);
28119 arg2 = XEXP (addr, 1);
28120
28121 if (CONSTANT_P (arg1))
28122 base = arg2, offset = arg1;
28123 else
28124 base = arg1, offset = arg2;
28125
28126 gcc_assert (REG_P (base));
28127
28128 /* Catch the case of <address> = <reg> + <reg> */
28129 if (REG_P (offset))
28130 {
28131 int reg_offset = REGNO (offset);
28132 int reg_base = REGNO (base);
28133 int reg_dest = REGNO (operands[0]);
28134
28135 /* Add the base and offset registers together into the
28136 higher destination register. */
28137 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28138 reg_dest + 1, reg_base, reg_offset);
28139
28140 /* Load the lower destination register from the address in
28141 the higher destination register. */
28142 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28143 reg_dest, reg_dest + 1);
28144
28145 /* Load the higher destination register from its own address
28146 plus 4. */
28147 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28148 reg_dest + 1, reg_dest + 1);
28149 }
28150 else
28151 {
28152 /* Compute <address> + 4 for the high order load. */
28153 operands[2] = adjust_address (operands[1], SImode, 4);
28154
28155 /* If the computed address is held in the low order register
28156 then load the high order register first, otherwise always
28157 load the low order register first. */
28158 if (REGNO (operands[0]) == REGNO (base))
28159 {
28160 output_asm_insn ("ldr\t%H0, %2", operands);
28161 output_asm_insn ("ldr\t%0, %1", operands);
28162 }
28163 else
28164 {
28165 output_asm_insn ("ldr\t%0, %1", operands);
28166 output_asm_insn ("ldr\t%H0, %2", operands);
28167 }
28168 }
28169 break;
28170
28171 case LABEL_REF:
28172 /* With no registers to worry about we can just load the value
28173 directly. */
28174 operands[2] = adjust_address (operands[1], SImode, 4);
28175
28176 output_asm_insn ("ldr\t%H0, %2", operands);
28177 output_asm_insn ("ldr\t%0, %1", operands);
28178 break;
28179
28180 default:
28181 gcc_unreachable ();
28182 }
28183
28184 return "";
28185 }
28186
28187 const char *
28188 thumb_output_move_mem_multiple (int n, rtx *operands)
28189 {
28190 switch (n)
28191 {
28192 case 2:
28193 if (REGNO (operands[4]) > REGNO (operands[5]))
28194 std::swap (operands[4], operands[5]);
28195
28196 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28197 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28198 break;
28199
28200 case 3:
28201 if (REGNO (operands[4]) > REGNO (operands[5]))
28202 std::swap (operands[4], operands[5]);
28203 if (REGNO (operands[5]) > REGNO (operands[6]))
28204 std::swap (operands[5], operands[6]);
28205 if (REGNO (operands[4]) > REGNO (operands[5]))
28206 std::swap (operands[4], operands[5]);
28207
28208 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28209 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28210 break;
28211
28212 default:
28213 gcc_unreachable ();
28214 }
28215
28216 return "";
28217 }
28218
28219 /* Output a call-via instruction for thumb state. */
28220 const char *
28221 thumb_call_via_reg (rtx reg)
28222 {
28223 int regno = REGNO (reg);
28224 rtx *labelp;
28225
28226 gcc_assert (regno < LR_REGNUM);
28227
28228 /* If we are in the normal text section we can use a single instance
28229 per compilation unit. If we are doing function sections, then we need
28230 an entry per section, since we can't rely on reachability. */
28231 if (in_section == text_section)
28232 {
28233 thumb_call_reg_needed = 1;
28234
28235 if (thumb_call_via_label[regno] == NULL)
28236 thumb_call_via_label[regno] = gen_label_rtx ();
28237 labelp = thumb_call_via_label + regno;
28238 }
28239 else
28240 {
28241 if (cfun->machine->call_via[regno] == NULL)
28242 cfun->machine->call_via[regno] = gen_label_rtx ();
28243 labelp = cfun->machine->call_via + regno;
28244 }
28245
28246 output_asm_insn ("bl\t%a0", labelp);
28247 return "";
28248 }
28249
28250 /* Routines for generating rtl. */
28251 void
28252 thumb_expand_cpymemqi (rtx *operands)
28253 {
28254 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28255 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28256 HOST_WIDE_INT len = INTVAL (operands[2]);
28257 HOST_WIDE_INT offset = 0;
28258
28259 while (len >= 12)
28260 {
28261 emit_insn (gen_cpymem12b (out, in, out, in));
28262 len -= 12;
28263 }
28264
28265 if (len >= 8)
28266 {
28267 emit_insn (gen_cpymem8b (out, in, out, in));
28268 len -= 8;
28269 }
28270
28271 if (len >= 4)
28272 {
28273 rtx reg = gen_reg_rtx (SImode);
28274 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28275 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28276 len -= 4;
28277 offset += 4;
28278 }
28279
28280 if (len >= 2)
28281 {
28282 rtx reg = gen_reg_rtx (HImode);
28283 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28284 plus_constant (Pmode, in,
28285 offset))));
28286 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28287 offset)),
28288 reg));
28289 len -= 2;
28290 offset += 2;
28291 }
28292
28293 if (len)
28294 {
28295 rtx reg = gen_reg_rtx (QImode);
28296 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28297 plus_constant (Pmode, in,
28298 offset))));
28299 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28300 offset)),
28301 reg));
28302 }
28303 }
28304
28305 void
28306 thumb_reload_out_hi (rtx *operands)
28307 {
28308 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28309 }
28310
28311 /* Return the length of a function name prefix
28312 that starts with the character 'c'. */
28313 static int
28314 arm_get_strip_length (int c)
28315 {
28316 switch (c)
28317 {
28318 ARM_NAME_ENCODING_LENGTHS
28319 default: return 0;
28320 }
28321 }
28322
28323 /* Return a pointer to a function's name with any
28324 and all prefix encodings stripped from it. */
28325 const char *
28326 arm_strip_name_encoding (const char *name)
28327 {
28328 int skip;
28329
28330 while ((skip = arm_get_strip_length (* name)))
28331 name += skip;
28332
28333 return name;
28334 }
28335
28336 /* If there is a '*' anywhere in the name's prefix, then
28337 emit the stripped name verbatim, otherwise prepend an
28338 underscore if leading underscores are being used. */
28339 void
28340 arm_asm_output_labelref (FILE *stream, const char *name)
28341 {
28342 int skip;
28343 int verbatim = 0;
28344
28345 while ((skip = arm_get_strip_length (* name)))
28346 {
28347 verbatim |= (*name == '*');
28348 name += skip;
28349 }
28350
28351 if (verbatim)
28352 fputs (name, stream);
28353 else
28354 asm_fprintf (stream, "%U%s", name);
28355 }
28356
28357 /* This function is used to emit an EABI tag and its associated value.
28358 We emit the numerical value of the tag in case the assembler does not
28359 support textual tags. (Eg gas prior to 2.20). If requested we include
28360 the tag name in a comment so that anyone reading the assembler output
28361 will know which tag is being set.
28362
28363 This function is not static because arm-c.cc needs it too. */
28364
28365 void
28366 arm_emit_eabi_attribute (const char *name, int num, int val)
28367 {
28368 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28369 if (flag_verbose_asm || flag_debug_asm)
28370 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28371 asm_fprintf (asm_out_file, "\n");
28372 }
28373
28374 /* This function is used to print CPU tuning information as comment
28375 in assembler file. Pointers are not printed for now. */
28376
28377 void
28378 arm_print_tune_info (void)
28379 {
28380 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28381 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28382 current_tune->constant_limit);
28383 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28384 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28385 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28386 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28387 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28388 "prefetch.l1_cache_size:\t%d\n",
28389 current_tune->prefetch.l1_cache_size);
28390 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28391 "prefetch.l1_cache_line_size:\t%d\n",
28392 current_tune->prefetch.l1_cache_line_size);
28393 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28394 "prefer_constant_pool:\t%d\n",
28395 (int) current_tune->prefer_constant_pool);
28396 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28397 "branch_cost:\t(s:speed, p:predictable)\n");
28398 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28399 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28400 current_tune->branch_cost (false, false));
28401 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28402 current_tune->branch_cost (false, true));
28403 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28404 current_tune->branch_cost (true, false));
28405 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28406 current_tune->branch_cost (true, true));
28407 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28408 "prefer_ldrd_strd:\t%d\n",
28409 (int) current_tune->prefer_ldrd_strd);
28410 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28411 "logical_op_non_short_circuit:\t[%d,%d]\n",
28412 (int) current_tune->logical_op_non_short_circuit_thumb,
28413 (int) current_tune->logical_op_non_short_circuit_arm);
28414 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28415 "disparage_flag_setting_t16_encodings:\t%d\n",
28416 (int) current_tune->disparage_flag_setting_t16_encodings);
28417 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28418 "string_ops_prefer_neon:\t%d\n",
28419 (int) current_tune->string_ops_prefer_neon);
28420 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28421 "max_insns_inline_memset:\t%d\n",
28422 current_tune->max_insns_inline_memset);
28423 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28424 current_tune->fusible_ops);
28425 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28426 (int) current_tune->sched_autopref);
28427 }
28428
28429 /* The last set of target options used to emit .arch directives, etc. This
28430 could be a function-local static if it were not required to expose it as a
28431 root to the garbage collector. */
28432 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28433
28434 /* Print .arch and .arch_extension directives corresponding to the
28435 current architecture configuration. */
28436 static void
28437 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28438 {
28439 arm_build_target build_target;
28440 /* If the target options haven't changed since the last time we were called
28441 there is nothing to do. This should be sufficient to suppress the
28442 majority of redundant work. */
28443 if (last_asm_targ_options == targ_options)
28444 return;
28445
28446 last_asm_targ_options = targ_options;
28447
28448 build_target.isa = sbitmap_alloc (isa_num_bits);
28449 arm_configure_build_target (&build_target, targ_options, false);
28450
28451 if (build_target.core_name
28452 && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28453 {
28454 const char* truncated_name
28455 = arm_rewrite_selected_cpu (build_target.core_name);
28456 asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28457 }
28458
28459 const arch_option *arch
28460 = arm_parse_arch_option_name (all_architectures, "-march",
28461 build_target.arch_name);
28462 auto_sbitmap opt_bits (isa_num_bits);
28463
28464 gcc_assert (arch);
28465
28466 if (strcmp (build_target.arch_name, "armv7ve") == 0)
28467 {
28468 /* Keep backward compatability for assemblers which don't support
28469 armv7ve. Fortunately, none of the following extensions are reset
28470 by a .fpu directive. */
28471 asm_fprintf (stream, "\t.arch armv7-a\n");
28472 asm_fprintf (stream, "\t.arch_extension virt\n");
28473 asm_fprintf (stream, "\t.arch_extension idiv\n");
28474 asm_fprintf (stream, "\t.arch_extension sec\n");
28475 asm_fprintf (stream, "\t.arch_extension mp\n");
28476 }
28477 else
28478 asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28479
28480 /* The .fpu directive will reset any architecture extensions from the
28481 assembler that relate to the fp/vector extensions. So put this out before
28482 any .arch_extension directives. */
28483 const char *fpu_name = (TARGET_SOFT_FLOAT
28484 ? "softvfp"
28485 : arm_identify_fpu_from_isa (build_target.isa));
28486 asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28487
28488 if (!arch->common.extensions)
28489 return;
28490
28491 for (const struct cpu_arch_extension *opt = arch->common.extensions;
28492 opt->name != NULL;
28493 opt++)
28494 {
28495 if (!opt->remove)
28496 {
28497 arm_initialize_isa (opt_bits, opt->isa_bits);
28498
28499 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28500 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28501 floating point instructions is disabled. So the following check
28502 restricts the printing of ".arch_extension mve" and
28503 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28504 this special behaviour because the feature bit "mve" and
28505 "mve_float" are not part of "fpu bits", so they are not cleared
28506 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28507 TARGET_HAVE_MVE_FLOAT are disabled. */
28508 if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28509 || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28510 && !TARGET_HAVE_MVE_FLOAT))
28511 continue;
28512
28513 /* If every feature bit of this option is set in the target ISA
28514 specification, print out the option name. However, don't print
28515 anything if all the bits are part of the FPU specification. */
28516 if (bitmap_subset_p (opt_bits, build_target.isa)
28517 && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28518 asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28519 }
28520 }
28521 }
28522
28523 static void
28524 arm_file_start (void)
28525 {
28526 int val;
28527 bool pac = (aarch_ra_sign_scope != AARCH_FUNCTION_NONE);
28528 bool bti = (aarch_enable_bti == 1);
28529
28530 arm_print_asm_arch_directives
28531 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28532
28533 if (TARGET_BPABI)
28534 {
28535 /* If we have a named cpu, but we the assembler does not support that
28536 name via .cpu, put out a cpu name attribute; but don't do this if the
28537 name starts with the fictitious prefix, 'generic'. */
28538 if (arm_active_target.core_name
28539 && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28540 && !startswith (arm_active_target.core_name, "generic"))
28541 {
28542 const char* truncated_name
28543 = arm_rewrite_selected_cpu (arm_active_target.core_name);
28544 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28545 asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28546 truncated_name);
28547 }
28548
28549 if (print_tune_info)
28550 arm_print_tune_info ();
28551
28552 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28553 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28554
28555 if (TARGET_HARD_FLOAT_ABI)
28556 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28557
28558 /* Some of these attributes only apply when the corresponding features
28559 are used. However we don't have any easy way of figuring this out.
28560 Conservatively record the setting that would have been used. */
28561
28562 if (flag_rounding_math)
28563 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28564
28565 if (!flag_unsafe_math_optimizations)
28566 {
28567 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28568 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28569 }
28570 if (flag_signaling_nans)
28571 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28572
28573 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28574 flag_finite_math_only ? 1 : 3);
28575
28576 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28577 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28578 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28579 flag_short_enums ? 1 : 2);
28580
28581 /* Tag_ABI_optimization_goals. */
28582 if (optimize_size)
28583 val = 4;
28584 else if (optimize >= 2)
28585 val = 2;
28586 else if (optimize)
28587 val = 1;
28588 else
28589 val = 6;
28590 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28591
28592 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28593 unaligned_access);
28594
28595 if (arm_fp16_format)
28596 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28597 (int) arm_fp16_format);
28598
28599 if (TARGET_HAVE_PACBTI)
28600 {
28601 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
28602 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
28603 }
28604 else if (pac || bti)
28605 {
28606 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
28607 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
28608 }
28609
28610 if (bti)
28611 arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
28612 if (pac)
28613 arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
28614
28615 if (arm_lang_output_object_attributes_hook)
28616 arm_lang_output_object_attributes_hook();
28617 }
28618
28619 default_file_start ();
28620 }
28621
28622 static void
28623 arm_file_end (void)
28624 {
28625 int regno;
28626
28627 /* Just in case the last function output in the assembler had non-default
28628 architecture directives, we force the assembler state back to the default
28629 set, so that any 'calculated' build attributes are based on the default
28630 options rather than the special options for that function. */
28631 arm_print_asm_arch_directives
28632 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28633
28634 if (NEED_INDICATE_EXEC_STACK)
28635 /* Add .note.GNU-stack. */
28636 file_end_indicate_exec_stack ();
28637
28638 if (! thumb_call_reg_needed)
28639 return;
28640
28641 switch_to_section (text_section);
28642 asm_fprintf (asm_out_file, "\t.code 16\n");
28643 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28644
28645 for (regno = 0; regno < LR_REGNUM; regno++)
28646 {
28647 rtx label = thumb_call_via_label[regno];
28648
28649 if (label != 0)
28650 {
28651 targetm.asm_out.internal_label (asm_out_file, "L",
28652 CODE_LABEL_NUMBER (label));
28653 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28654 }
28655 }
28656 }
28657
28658 #ifndef ARM_PE
28659 /* Symbols in the text segment can be accessed without indirecting via the
28660 constant pool; it may take an extra binary operation, but this is still
28661 faster than indirecting via memory. Don't do this when not optimizing,
28662 since we won't be calculating al of the offsets necessary to do this
28663 simplification. */
28664
28665 static void
28666 arm_encode_section_info (tree decl, rtx rtl, int first)
28667 {
28668 if (optimize > 0 && TREE_CONSTANT (decl))
28669 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28670
28671 default_encode_section_info (decl, rtl, first);
28672 }
28673 #endif /* !ARM_PE */
28674
28675 static void
28676 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28677 {
28678 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28679 && !strcmp (prefix, "L"))
28680 {
28681 arm_ccfsm_state = 0;
28682 arm_target_insn = NULL;
28683 }
28684 default_internal_label (stream, prefix, labelno);
28685 }
28686
28687 /* Define classes to generate code as RTL or output asm to a file.
28688 Using templates then allows to use the same code to output code
28689 sequences in the two formats. */
28690 class thumb1_const_rtl
28691 {
28692 public:
28693 thumb1_const_rtl (rtx dst) : dst (dst) {}
28694
28695 void mov (HOST_WIDE_INT val)
28696 {
28697 emit_set_insn (dst, GEN_INT (val));
28698 }
28699
28700 void add (HOST_WIDE_INT val)
28701 {
28702 emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28703 }
28704
28705 void ashift (HOST_WIDE_INT shift)
28706 {
28707 emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28708 }
28709
28710 void neg ()
28711 {
28712 emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28713 }
28714
28715 private:
28716 rtx dst;
28717 };
28718
28719 class thumb1_const_print
28720 {
28721 public:
28722 thumb1_const_print (FILE *f, int regno)
28723 {
28724 t_file = f;
28725 dst_regname = reg_names[regno];
28726 }
28727
28728 void mov (HOST_WIDE_INT val)
28729 {
28730 asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28731 dst_regname, val);
28732 }
28733
28734 void add (HOST_WIDE_INT val)
28735 {
28736 asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28737 dst_regname, val);
28738 }
28739
28740 void ashift (HOST_WIDE_INT shift)
28741 {
28742 asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28743 dst_regname, shift);
28744 }
28745
28746 void neg ()
28747 {
28748 asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
28749 }
28750
28751 private:
28752 FILE *t_file;
28753 const char *dst_regname;
28754 };
28755
28756 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28757 Avoid generating useless code when one of the bytes is zero. */
28758 template <class T>
28759 void
28760 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
28761 {
28762 bool mov_done_p = false;
28763 unsigned HOST_WIDE_INT val = op1;
28764 int shift = 0;
28765 int i;
28766
28767 gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
28768
28769 if (val <= 255)
28770 {
28771 dst.mov (val);
28772 return;
28773 }
28774
28775 /* For negative numbers with the first nine bits set, build the
28776 opposite of OP1, then negate it, it's generally shorter and not
28777 longer. */
28778 if ((val & 0xFF800000) == 0xFF800000)
28779 {
28780 thumb1_gen_const_int_1 (dst, -op1);
28781 dst.neg ();
28782 return;
28783 }
28784
28785 /* In the general case, we need 7 instructions to build
28786 a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28787 do better if VAL is small enough, or
28788 right-shiftable by a suitable amount. If the
28789 right-shift enables to encode at least one less byte,
28790 it's worth it: we save a adds and a lsls at the
28791 expense of a final lsls. */
28792 int final_shift = number_of_first_bit_set (val);
28793
28794 int leading_zeroes = clz_hwi (val);
28795 int number_of_bytes_needed
28796 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
28797 / BITS_PER_UNIT) + 1;
28798 int number_of_bytes_needed2
28799 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
28800 / BITS_PER_UNIT) + 1;
28801
28802 if (number_of_bytes_needed2 < number_of_bytes_needed)
28803 val >>= final_shift;
28804 else
28805 final_shift = 0;
28806
28807 /* If we are in a very small range, we can use either a single movs
28808 or movs+adds. */
28809 if (val <= 510)
28810 {
28811 if (val > 255)
28812 {
28813 unsigned HOST_WIDE_INT high = val - 255;
28814
28815 dst.mov (high);
28816 dst.add (255);
28817 }
28818 else
28819 dst.mov (val);
28820
28821 if (final_shift > 0)
28822 dst.ashift (final_shift);
28823 }
28824 else
28825 {
28826 /* General case, emit upper 3 bytes as needed. */
28827 for (i = 0; i < 3; i++)
28828 {
28829 unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
28830
28831 if (byte)
28832 {
28833 /* We are about to emit new bits, stop accumulating a
28834 shift amount, and left-shift only if we have already
28835 emitted some upper bits. */
28836 if (mov_done_p)
28837 {
28838 dst.ashift (shift);
28839 dst.add (byte);
28840 }
28841 else
28842 dst.mov (byte);
28843
28844 /* Stop accumulating shift amount since we've just
28845 emitted some bits. */
28846 shift = 0;
28847
28848 mov_done_p = true;
28849 }
28850
28851 if (mov_done_p)
28852 shift += 8;
28853 }
28854
28855 /* Emit lower byte. */
28856 if (!mov_done_p)
28857 dst.mov (val & 0xff);
28858 else
28859 {
28860 dst.ashift (shift);
28861 if (val & 0xff)
28862 dst.add (val & 0xff);
28863 }
28864
28865 if (final_shift > 0)
28866 dst.ashift (final_shift);
28867 }
28868 }
28869
28870 /* Proxies for thumb1.md, since the thumb1_const_print and
28871 thumb1_const_rtl classes are not exported. */
28872 void
28873 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
28874 {
28875 thumb1_const_rtl t (dst);
28876 thumb1_gen_const_int_1 (t, op1);
28877 }
28878
28879 void
28880 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
28881 {
28882 thumb1_const_print t (asm_out_file, REGNO (dst));
28883 thumb1_gen_const_int_1 (t, op1);
28884 }
28885
28886 /* Output code to add DELTA to the first argument, and then jump
28887 to FUNCTION. Used for C++ multiple inheritance. */
28888
28889 static void
28890 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28891 HOST_WIDE_INT, tree function)
28892 {
28893 static int thunk_label = 0;
28894 char label[256];
28895 char labelpc[256];
28896 int mi_delta = delta;
28897 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28898 int shift = 0;
28899 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28900 ? 1 : 0);
28901 if (mi_delta < 0)
28902 mi_delta = - mi_delta;
28903
28904 final_start_function (emit_barrier (), file, 1);
28905
28906 if (TARGET_THUMB1)
28907 {
28908 int labelno = thunk_label++;
28909 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28910 /* Thunks are entered in arm mode when available. */
28911 if (TARGET_THUMB1_ONLY)
28912 {
28913 /* push r3 so we can use it as a temporary. */
28914 /* TODO: Omit this save if r3 is not used. */
28915 fputs ("\tpush {r3}\n", file);
28916
28917 /* With -mpure-code, we cannot load the address from the
28918 constant pool: we build it explicitly. */
28919 if (target_pure_code)
28920 {
28921 fputs ("\tmovs\tr3, #:upper8_15:#", file);
28922 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28923 fputc ('\n', file);
28924 fputs ("\tlsls r3, #8\n", file);
28925 fputs ("\tadds\tr3, #:upper0_7:#", file);
28926 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28927 fputc ('\n', file);
28928 fputs ("\tlsls r3, #8\n", file);
28929 fputs ("\tadds\tr3, #:lower8_15:#", file);
28930 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28931 fputc ('\n', file);
28932 fputs ("\tlsls r3, #8\n", file);
28933 fputs ("\tadds\tr3, #:lower0_7:#", file);
28934 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28935 fputc ('\n', file);
28936 }
28937 else
28938 fputs ("\tldr\tr3, ", file);
28939 }
28940 else
28941 {
28942 fputs ("\tldr\tr12, ", file);
28943 }
28944
28945 if (!target_pure_code)
28946 {
28947 assemble_name (file, label);
28948 fputc ('\n', file);
28949 }
28950
28951 if (flag_pic)
28952 {
28953 /* If we are generating PIC, the ldr instruction below loads
28954 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28955 the address of the add + 8, so we have:
28956
28957 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28958 = target + 1.
28959
28960 Note that we have "+ 1" because some versions of GNU ld
28961 don't set the low bit of the result for R_ARM_REL32
28962 relocations against thumb function symbols.
28963 On ARMv6M this is +4, not +8. */
28964 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28965 assemble_name (file, labelpc);
28966 fputs (":\n", file);
28967 if (TARGET_THUMB1_ONLY)
28968 {
28969 /* This is 2 insns after the start of the thunk, so we know it
28970 is 4-byte aligned. */
28971 fputs ("\tadd\tr3, pc, r3\n", file);
28972 fputs ("\tmov r12, r3\n", file);
28973 }
28974 else
28975 fputs ("\tadd\tr12, pc, r12\n", file);
28976 }
28977 else if (TARGET_THUMB1_ONLY)
28978 fputs ("\tmov r12, r3\n", file);
28979 }
28980 if (TARGET_THUMB1_ONLY)
28981 {
28982 if (mi_delta > 255)
28983 {
28984 /* With -mpure-code, we cannot load MI_DELTA from the
28985 constant pool: we build it explicitly. */
28986 if (target_pure_code)
28987 {
28988 thumb1_const_print r3 (file, 3);
28989 thumb1_gen_const_int_1 (r3, mi_delta);
28990 }
28991 else
28992 {
28993 fputs ("\tldr\tr3, ", file);
28994 assemble_name (file, label);
28995 fputs ("+4\n", file);
28996 }
28997 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
28998 mi_op, this_regno, this_regno);
28999 }
29000 else if (mi_delta != 0)
29001 {
29002 /* Thumb1 unified syntax requires s suffix in instruction name when
29003 one of the operands is immediate. */
29004 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
29005 mi_op, this_regno, this_regno,
29006 mi_delta);
29007 }
29008 }
29009 else
29010 {
29011 /* TODO: Use movw/movt for large constants when available. */
29012 while (mi_delta != 0)
29013 {
29014 if ((mi_delta & (3 << shift)) == 0)
29015 shift += 2;
29016 else
29017 {
29018 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
29019 mi_op, this_regno, this_regno,
29020 mi_delta & (0xff << shift));
29021 mi_delta &= ~(0xff << shift);
29022 shift += 8;
29023 }
29024 }
29025 }
29026 if (TARGET_THUMB1)
29027 {
29028 if (TARGET_THUMB1_ONLY)
29029 fputs ("\tpop\t{r3}\n", file);
29030
29031 fprintf (file, "\tbx\tr12\n");
29032
29033 /* With -mpure-code, we don't need to emit literals for the
29034 function address and delta since we emitted code to build
29035 them. */
29036 if (!target_pure_code)
29037 {
29038 ASM_OUTPUT_ALIGN (file, 2);
29039 assemble_name (file, label);
29040 fputs (":\n", file);
29041 if (flag_pic)
29042 {
29043 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
29044 rtx tem = XEXP (DECL_RTL (function), 0);
29045 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
29046 pipeline offset is four rather than eight. Adjust the offset
29047 accordingly. */
29048 tem = plus_constant (GET_MODE (tem), tem,
29049 TARGET_THUMB1_ONLY ? -3 : -7);
29050 tem = gen_rtx_MINUS (GET_MODE (tem),
29051 tem,
29052 gen_rtx_SYMBOL_REF (Pmode,
29053 ggc_strdup (labelpc)));
29054 assemble_integer (tem, 4, BITS_PER_WORD, 1);
29055 }
29056 else
29057 /* Output ".word .LTHUNKn". */
29058 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
29059
29060 if (TARGET_THUMB1_ONLY && mi_delta > 255)
29061 assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
29062 }
29063 }
29064 else
29065 {
29066 fputs ("\tb\t", file);
29067 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29068 if (NEED_PLT_RELOC)
29069 fputs ("(PLT)", file);
29070 fputc ('\n', file);
29071 }
29072
29073 final_end_function ();
29074 }
29075
29076 /* MI thunk handling for TARGET_32BIT. */
29077
29078 static void
29079 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
29080 HOST_WIDE_INT vcall_offset, tree function)
29081 {
29082 const bool long_call_p = arm_is_long_call_p (function);
29083
29084 /* On ARM, this_regno is R0 or R1 depending on
29085 whether the function returns an aggregate or not.
29086 */
29087 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
29088 function)
29089 ? R1_REGNUM : R0_REGNUM);
29090
29091 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
29092 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
29093 reload_completed = 1;
29094 emit_note (NOTE_INSN_PROLOGUE_END);
29095
29096 /* Add DELTA to THIS_RTX. */
29097 if (delta != 0)
29098 arm_split_constant (PLUS, Pmode, NULL_RTX,
29099 delta, this_rtx, this_rtx, false);
29100
29101 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
29102 if (vcall_offset != 0)
29103 {
29104 /* Load *THIS_RTX. */
29105 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
29106 /* Compute *THIS_RTX + VCALL_OFFSET. */
29107 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
29108 false);
29109 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
29110 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
29111 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
29112 }
29113
29114 /* Generate a tail call to the target function. */
29115 if (!TREE_USED (function))
29116 {
29117 assemble_external (function);
29118 TREE_USED (function) = 1;
29119 }
29120 rtx funexp = XEXP (DECL_RTL (function), 0);
29121 if (long_call_p)
29122 {
29123 emit_move_insn (temp, funexp);
29124 funexp = temp;
29125 }
29126 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29127 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
29128 SIBLING_CALL_P (insn) = 1;
29129 emit_barrier ();
29130
29131 /* Indirect calls require a bit of fixup in PIC mode. */
29132 if (long_call_p)
29133 {
29134 split_all_insns_noflow ();
29135 arm_reorg ();
29136 }
29137
29138 insn = get_insns ();
29139 shorten_branches (insn);
29140 final_start_function (insn, file, 1);
29141 final (insn, file, 1);
29142 final_end_function ();
29143
29144 /* Stop pretending this is a post-reload pass. */
29145 reload_completed = 0;
29146 }
29147
29148 /* Output code to add DELTA to the first argument, and then jump
29149 to FUNCTION. Used for C++ multiple inheritance. */
29150
29151 static void
29152 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
29153 HOST_WIDE_INT vcall_offset, tree function)
29154 {
29155 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
29156
29157 assemble_start_function (thunk, fnname);
29158 if (TARGET_32BIT)
29159 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
29160 else
29161 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
29162 assemble_end_function (thunk, fnname);
29163 }
29164
29165 int
29166 arm_emit_vector_const (FILE *file, rtx x)
29167 {
29168 int i;
29169 const char * pattern;
29170
29171 gcc_assert (GET_CODE (x) == CONST_VECTOR);
29172
29173 switch (GET_MODE (x))
29174 {
29175 case E_V2SImode: pattern = "%08x"; break;
29176 case E_V4HImode: pattern = "%04x"; break;
29177 case E_V8QImode: pattern = "%02x"; break;
29178 default: gcc_unreachable ();
29179 }
29180
29181 fprintf (file, "0x");
29182 for (i = CONST_VECTOR_NUNITS (x); i--;)
29183 {
29184 rtx element;
29185
29186 element = CONST_VECTOR_ELT (x, i);
29187 fprintf (file, pattern, INTVAL (element));
29188 }
29189
29190 return 1;
29191 }
29192
29193 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29194 HFmode constant pool entries are actually loaded with ldr. */
29195 void
29196 arm_emit_fp16_const (rtx c)
29197 {
29198 long bits;
29199
29200 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
29201 if (WORDS_BIG_ENDIAN)
29202 assemble_zeros (2);
29203 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
29204 if (!WORDS_BIG_ENDIAN)
29205 assemble_zeros (2);
29206 }
29207
29208 const char *
29209 arm_output_load_gr (rtx *operands)
29210 {
29211 rtx reg;
29212 rtx offset;
29213 rtx wcgr;
29214 rtx sum;
29215
29216 if (!MEM_P (operands [1])
29217 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
29218 || !REG_P (reg = XEXP (sum, 0))
29219 || !CONST_INT_P (offset = XEXP (sum, 1))
29220 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
29221 return "wldrw%?\t%0, %1";
29222
29223 /* Fix up an out-of-range load of a GR register. */
29224 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
29225 wcgr = operands[0];
29226 operands[0] = reg;
29227 output_asm_insn ("ldr%?\t%0, %1", operands);
29228
29229 operands[0] = wcgr;
29230 operands[1] = reg;
29231 output_asm_insn ("tmcr%?\t%0, %1", operands);
29232 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
29233
29234 return "";
29235 }
29236
29237 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29238
29239 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29240 named arg and all anonymous args onto the stack.
29241 XXX I know the prologue shouldn't be pushing registers, but it is faster
29242 that way. */
29243
29244 static void
29245 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
29246 const function_arg_info &arg,
29247 int *pretend_size,
29248 int second_time ATTRIBUTE_UNUSED)
29249 {
29250 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
29251 int nregs;
29252
29253 cfun->machine->uses_anonymous_args = 1;
29254 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
29255 {
29256 nregs = pcum->aapcs_ncrn;
29257 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
29258 && (nregs & 1))
29259 {
29260 int res = arm_needs_doubleword_align (arg.mode, arg.type);
29261 if (res < 0 && warn_psabi)
29262 inform (input_location, "parameter passing for argument of "
29263 "type %qT changed in GCC 7.1", arg.type);
29264 else if (res > 0)
29265 {
29266 nregs++;
29267 if (res > 1 && warn_psabi)
29268 inform (input_location,
29269 "parameter passing for argument of type "
29270 "%qT changed in GCC 9.1", arg.type);
29271 }
29272 }
29273 }
29274 else
29275 nregs = pcum->nregs;
29276
29277 if (nregs < NUM_ARG_REGS)
29278 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
29279 }
29280
29281 /* We can't rely on the caller doing the proper promotion when
29282 using APCS or ATPCS. */
29283
29284 static bool
29285 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
29286 {
29287 return !TARGET_AAPCS_BASED;
29288 }
29289
29290 static machine_mode
29291 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
29292 machine_mode mode,
29293 int *punsignedp ATTRIBUTE_UNUSED,
29294 const_tree fntype ATTRIBUTE_UNUSED,
29295 int for_return ATTRIBUTE_UNUSED)
29296 {
29297 if (GET_MODE_CLASS (mode) == MODE_INT
29298 && GET_MODE_SIZE (mode) < 4)
29299 return SImode;
29300
29301 return mode;
29302 }
29303
29304
29305 static bool
29306 arm_default_short_enums (void)
29307 {
29308 return ARM_DEFAULT_SHORT_ENUMS;
29309 }
29310
29311
29312 /* AAPCS requires that anonymous bitfields affect structure alignment. */
29313
29314 static bool
29315 arm_align_anon_bitfield (void)
29316 {
29317 return TARGET_AAPCS_BASED;
29318 }
29319
29320
29321 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
29322
29323 static tree
29324 arm_cxx_guard_type (void)
29325 {
29326 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
29327 }
29328
29329
29330 /* The EABI says test the least significant bit of a guard variable. */
29331
29332 static bool
29333 arm_cxx_guard_mask_bit (void)
29334 {
29335 return TARGET_AAPCS_BASED;
29336 }
29337
29338
29339 /* The EABI specifies that all array cookies are 8 bytes long. */
29340
29341 static tree
29342 arm_get_cookie_size (tree type)
29343 {
29344 tree size;
29345
29346 if (!TARGET_AAPCS_BASED)
29347 return default_cxx_get_cookie_size (type);
29348
29349 size = build_int_cst (sizetype, 8);
29350 return size;
29351 }
29352
29353
29354 /* The EABI says that array cookies should also contain the element size. */
29355
29356 static bool
29357 arm_cookie_has_size (void)
29358 {
29359 return TARGET_AAPCS_BASED;
29360 }
29361
29362
29363 /* The EABI says constructors and destructors should return a pointer to
29364 the object constructed/destroyed. */
29365
29366 static bool
29367 arm_cxx_cdtor_returns_this (void)
29368 {
29369 return TARGET_AAPCS_BASED;
29370 }
29371
29372 /* The EABI says that an inline function may never be the key
29373 method. */
29374
29375 static bool
29376 arm_cxx_key_method_may_be_inline (void)
29377 {
29378 return !TARGET_AAPCS_BASED;
29379 }
29380
29381 static void
29382 arm_cxx_determine_class_data_visibility (tree decl)
29383 {
29384 if (!TARGET_AAPCS_BASED
29385 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
29386 return;
29387
29388 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29389 is exported. However, on systems without dynamic vague linkage,
29390 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
29391 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
29392 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
29393 else
29394 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
29395 DECL_VISIBILITY_SPECIFIED (decl) = 1;
29396 }
29397
29398 static bool
29399 arm_cxx_class_data_always_comdat (void)
29400 {
29401 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29402 vague linkage if the class has no key function. */
29403 return !TARGET_AAPCS_BASED;
29404 }
29405
29406
29407 /* The EABI says __aeabi_atexit should be used to register static
29408 destructors. */
29409
29410 static bool
29411 arm_cxx_use_aeabi_atexit (void)
29412 {
29413 return TARGET_AAPCS_BASED;
29414 }
29415
29416
29417 void
29418 arm_set_return_address (rtx source, rtx scratch)
29419 {
29420 arm_stack_offsets *offsets;
29421 HOST_WIDE_INT delta;
29422 rtx addr, mem;
29423 unsigned long saved_regs;
29424
29425 offsets = arm_get_frame_offsets ();
29426 saved_regs = offsets->saved_regs_mask;
29427
29428 if ((saved_regs & (1 << LR_REGNUM)) == 0)
29429 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29430 else
29431 {
29432 if (frame_pointer_needed)
29433 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29434 else
29435 {
29436 /* LR will be the first saved register. */
29437 delta = offsets->outgoing_args - (offsets->frame + 4);
29438
29439
29440 if (delta >= 4096)
29441 {
29442 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29443 GEN_INT (delta & ~4095)));
29444 addr = scratch;
29445 delta &= 4095;
29446 }
29447 else
29448 addr = stack_pointer_rtx;
29449
29450 addr = plus_constant (Pmode, addr, delta);
29451 }
29452
29453 /* The store needs to be marked to prevent DSE from deleting
29454 it as dead if it is based on fp. */
29455 mem = gen_frame_mem (Pmode, addr);
29456 MEM_VOLATILE_P (mem) = true;
29457 emit_move_insn (mem, source);
29458 }
29459 }
29460
29461
29462 void
29463 thumb_set_return_address (rtx source, rtx scratch)
29464 {
29465 arm_stack_offsets *offsets;
29466 HOST_WIDE_INT delta;
29467 HOST_WIDE_INT limit;
29468 int reg;
29469 rtx addr, mem;
29470 unsigned long mask;
29471
29472 emit_use (source);
29473
29474 offsets = arm_get_frame_offsets ();
29475 mask = offsets->saved_regs_mask;
29476 if (mask & (1 << LR_REGNUM))
29477 {
29478 limit = 1024;
29479 /* Find the saved regs. */
29480 if (frame_pointer_needed)
29481 {
29482 delta = offsets->soft_frame - offsets->saved_args;
29483 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29484 if (TARGET_THUMB1)
29485 limit = 128;
29486 }
29487 else
29488 {
29489 delta = offsets->outgoing_args - offsets->saved_args;
29490 reg = SP_REGNUM;
29491 }
29492 /* Allow for the stack frame. */
29493 if (TARGET_THUMB1 && TARGET_BACKTRACE)
29494 delta -= 16;
29495 /* The link register is always the first saved register. */
29496 delta -= 4;
29497
29498 /* Construct the address. */
29499 addr = gen_rtx_REG (SImode, reg);
29500 if (delta > limit)
29501 {
29502 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29503 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29504 addr = scratch;
29505 }
29506 else
29507 addr = plus_constant (Pmode, addr, delta);
29508
29509 /* The store needs to be marked to prevent DSE from deleting
29510 it as dead if it is based on fp. */
29511 mem = gen_frame_mem (Pmode, addr);
29512 MEM_VOLATILE_P (mem) = true;
29513 emit_move_insn (mem, source);
29514 }
29515 else
29516 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29517 }
29518
29519 /* Implements target hook vector_mode_supported_p. */
29520 bool
29521 arm_vector_mode_supported_p (machine_mode mode)
29522 {
29523 /* Neon also supports V2SImode, etc. listed in the clause below. */
29524 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29525 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29526 || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29527 || mode == V8BFmode))
29528 return true;
29529
29530 if ((TARGET_NEON || TARGET_IWMMXT)
29531 && ((mode == V2SImode)
29532 || (mode == V4HImode)
29533 || (mode == V8QImode)))
29534 return true;
29535
29536 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29537 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29538 || mode == V2HAmode))
29539 return true;
29540
29541 if (TARGET_HAVE_MVE
29542 && (mode == V2DImode || mode == V4SImode || mode == V8HImode
29543 || mode == V16QImode
29544 || mode == V16BImode || mode == V8BImode || mode == V4BImode))
29545 return true;
29546
29547 if (TARGET_HAVE_MVE_FLOAT
29548 && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29549 return true;
29550
29551 return false;
29552 }
29553
29554 /* Implements target hook array_mode_supported_p. */
29555
29556 static bool
29557 arm_array_mode_supported_p (machine_mode mode,
29558 unsigned HOST_WIDE_INT nelems)
29559 {
29560 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29561 for now, as the lane-swapping logic needs to be extended in the expanders.
29562 See PR target/82518. */
29563 if (TARGET_NEON && !BYTES_BIG_ENDIAN
29564 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29565 && (nelems >= 2 && nelems <= 4))
29566 return true;
29567
29568 if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29569 && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29570 return true;
29571
29572 return false;
29573 }
29574
29575 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29576 registers when autovectorizing for Neon, at least until multiple vector
29577 widths are supported properly by the middle-end. */
29578
29579 static machine_mode
29580 arm_preferred_simd_mode (scalar_mode mode)
29581 {
29582 if (TARGET_NEON)
29583 switch (mode)
29584 {
29585 case E_HFmode:
29586 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29587 case E_SFmode:
29588 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29589 case E_SImode:
29590 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29591 case E_HImode:
29592 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29593 case E_QImode:
29594 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29595 case E_DImode:
29596 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29597 return V2DImode;
29598 break;
29599
29600 default:;
29601 }
29602
29603 if (TARGET_REALLY_IWMMXT)
29604 switch (mode)
29605 {
29606 case E_SImode:
29607 return V2SImode;
29608 case E_HImode:
29609 return V4HImode;
29610 case E_QImode:
29611 return V8QImode;
29612
29613 default:;
29614 }
29615
29616 if (TARGET_HAVE_MVE)
29617 switch (mode)
29618 {
29619 case E_QImode:
29620 return V16QImode;
29621 case E_HImode:
29622 return V8HImode;
29623 case E_SImode:
29624 return V4SImode;
29625
29626 default:;
29627 }
29628
29629 if (TARGET_HAVE_MVE_FLOAT)
29630 switch (mode)
29631 {
29632 case E_HFmode:
29633 return V8HFmode;
29634 case E_SFmode:
29635 return V4SFmode;
29636
29637 default:;
29638 }
29639
29640 return word_mode;
29641 }
29642
29643 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29644
29645 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29646 using r0-r4 for function arguments, r7 for the stack frame and don't have
29647 enough left over to do doubleword arithmetic. For Thumb-2 all the
29648 potentially problematic instructions accept high registers so this is not
29649 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29650 that require many low registers. */
29651 static bool
29652 arm_class_likely_spilled_p (reg_class_t rclass)
29653 {
29654 if ((TARGET_THUMB1 && rclass == LO_REGS)
29655 || rclass == CC_REG)
29656 return true;
29657
29658 return default_class_likely_spilled_p (rclass);
29659 }
29660
29661 /* Implements target hook small_register_classes_for_mode_p. */
29662 bool
29663 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29664 {
29665 return TARGET_THUMB1;
29666 }
29667
29668 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29669 ARM insns and therefore guarantee that the shift count is modulo 256.
29670 DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29671 guarantee no particular behavior for out-of-range counts. */
29672
29673 static unsigned HOST_WIDE_INT
29674 arm_shift_truncation_mask (machine_mode mode)
29675 {
29676 return mode == SImode ? 255 : 0;
29677 }
29678
29679
29680 /* Map internal gcc register numbers to DWARF2 register numbers. */
29681
29682 unsigned int
29683 arm_debugger_regno (unsigned int regno)
29684 {
29685 if (regno < 16)
29686 return regno;
29687
29688 if (IS_VFP_REGNUM (regno))
29689 {
29690 /* See comment in arm_dwarf_register_span. */
29691 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29692 return 64 + regno - FIRST_VFP_REGNUM;
29693 else
29694 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29695 }
29696
29697 if (IS_IWMMXT_GR_REGNUM (regno))
29698 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29699
29700 if (IS_IWMMXT_REGNUM (regno))
29701 return 112 + regno - FIRST_IWMMXT_REGNUM;
29702
29703 return DWARF_FRAME_REGISTERS;
29704 }
29705
29706 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29707 GCC models tham as 64 32-bit registers, so we need to describe this to
29708 the DWARF generation code. Other registers can use the default. */
29709 static rtx
29710 arm_dwarf_register_span (rtx rtl)
29711 {
29712 machine_mode mode;
29713 unsigned regno;
29714 rtx parts[16];
29715 int nregs;
29716 int i;
29717
29718 regno = REGNO (rtl);
29719 if (!IS_VFP_REGNUM (regno))
29720 return NULL_RTX;
29721
29722 /* XXX FIXME: The EABI defines two VFP register ranges:
29723 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29724 256-287: D0-D31
29725 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29726 corresponding D register. Until GDB supports this, we shall use the
29727 legacy encodings. We also use these encodings for D0-D15 for
29728 compatibility with older debuggers. */
29729 mode = GET_MODE (rtl);
29730 if (GET_MODE_SIZE (mode) < 8)
29731 return NULL_RTX;
29732
29733 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29734 {
29735 nregs = GET_MODE_SIZE (mode) / 4;
29736 for (i = 0; i < nregs; i += 2)
29737 if (TARGET_BIG_END)
29738 {
29739 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29740 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29741 }
29742 else
29743 {
29744 parts[i] = gen_rtx_REG (SImode, regno + i);
29745 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29746 }
29747 }
29748 else
29749 {
29750 nregs = GET_MODE_SIZE (mode) / 8;
29751 for (i = 0; i < nregs; i++)
29752 parts[i] = gen_rtx_REG (DImode, regno + i);
29753 }
29754
29755 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29756 }
29757
29758 #if ARM_UNWIND_INFO
29759 /* Emit unwind directives for a store-multiple instruction or stack pointer
29760 push during alignment.
29761 These should only ever be generated by the function prologue code, so
29762 expect them to have a particular form.
29763 The store-multiple instruction sometimes pushes pc as the last register,
29764 although it should not be tracked into unwind information, or for -Os
29765 sometimes pushes some dummy registers before first register that needs
29766 to be tracked in unwind information; such dummy registers are there just
29767 to avoid separate stack adjustment, and will not be restored in the
29768 epilogue. */
29769
29770 static void
29771 arm_unwind_emit_sequence (FILE * out_file, rtx p)
29772 {
29773 int i;
29774 HOST_WIDE_INT offset;
29775 HOST_WIDE_INT nregs;
29776 int reg_size;
29777 unsigned reg;
29778 unsigned lastreg;
29779 unsigned padfirst = 0, padlast = 0;
29780 rtx e;
29781
29782 e = XVECEXP (p, 0, 0);
29783 gcc_assert (GET_CODE (e) == SET);
29784
29785 /* First insn will adjust the stack pointer. */
29786 gcc_assert (GET_CODE (e) == SET
29787 && REG_P (SET_DEST (e))
29788 && REGNO (SET_DEST (e)) == SP_REGNUM
29789 && GET_CODE (SET_SRC (e)) == PLUS);
29790
29791 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29792 nregs = XVECLEN (p, 0) - 1;
29793 gcc_assert (nregs);
29794
29795 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29796 if (reg < 16)
29797 {
29798 /* For -Os dummy registers can be pushed at the beginning to
29799 avoid separate stack pointer adjustment. */
29800 e = XVECEXP (p, 0, 1);
29801 e = XEXP (SET_DEST (e), 0);
29802 if (GET_CODE (e) == PLUS)
29803 padfirst = INTVAL (XEXP (e, 1));
29804 gcc_assert (padfirst == 0 || optimize_size);
29805 /* The function prologue may also push pc, but not annotate it as it is
29806 never restored. We turn this into a stack pointer adjustment. */
29807 e = XVECEXP (p, 0, nregs);
29808 e = XEXP (SET_DEST (e), 0);
29809 if (GET_CODE (e) == PLUS)
29810 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29811 else
29812 padlast = offset - 4;
29813 gcc_assert (padlast == 0 || padlast == 4);
29814 if (padlast == 4)
29815 fprintf (out_file, "\t.pad #4\n");
29816 reg_size = 4;
29817 fprintf (out_file, "\t.save {");
29818 }
29819 else if (IS_VFP_REGNUM (reg))
29820 {
29821 reg_size = 8;
29822 fprintf (out_file, "\t.vsave {");
29823 }
29824 else
29825 /* Unknown register type. */
29826 gcc_unreachable ();
29827
29828 /* If the stack increment doesn't match the size of the saved registers,
29829 something has gone horribly wrong. */
29830 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29831
29832 offset = padfirst;
29833 lastreg = 0;
29834 /* The remaining insns will describe the stores. */
29835 for (i = 1; i <= nregs; i++)
29836 {
29837 /* Expect (set (mem <addr>) (reg)).
29838 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29839 e = XVECEXP (p, 0, i);
29840 gcc_assert (GET_CODE (e) == SET
29841 && MEM_P (SET_DEST (e))
29842 && REG_P (SET_SRC (e)));
29843
29844 reg = REGNO (SET_SRC (e));
29845 gcc_assert (reg >= lastreg);
29846
29847 if (i != 1)
29848 fprintf (out_file, ", ");
29849 /* We can't use %r for vfp because we need to use the
29850 double precision register names. */
29851 if (IS_VFP_REGNUM (reg))
29852 asm_fprintf (out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29853 else
29854 asm_fprintf (out_file, "%r", reg);
29855
29856 if (flag_checking)
29857 {
29858 /* Check that the addresses are consecutive. */
29859 e = XEXP (SET_DEST (e), 0);
29860 if (GET_CODE (e) == PLUS)
29861 gcc_assert (REG_P (XEXP (e, 0))
29862 && REGNO (XEXP (e, 0)) == SP_REGNUM
29863 && CONST_INT_P (XEXP (e, 1))
29864 && offset == INTVAL (XEXP (e, 1)));
29865 else
29866 gcc_assert (i == 1
29867 && REG_P (e)
29868 && REGNO (e) == SP_REGNUM);
29869 offset += reg_size;
29870 }
29871 }
29872 fprintf (out_file, "}\n");
29873 if (padfirst)
29874 fprintf (out_file, "\t.pad #%d\n", padfirst);
29875 }
29876
29877 /* Emit unwind directives for a SET. */
29878
29879 static void
29880 arm_unwind_emit_set (FILE * out_file, rtx p)
29881 {
29882 rtx e0;
29883 rtx e1;
29884 unsigned reg;
29885
29886 e0 = XEXP (p, 0);
29887 e1 = XEXP (p, 1);
29888 switch (GET_CODE (e0))
29889 {
29890 case MEM:
29891 /* Pushing a single register. */
29892 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29893 || !REG_P (XEXP (XEXP (e0, 0), 0))
29894 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29895 abort ();
29896
29897 asm_fprintf (out_file, "\t.save ");
29898 if (IS_VFP_REGNUM (REGNO (e1)))
29899 asm_fprintf(out_file, "{d%d}\n",
29900 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29901 else
29902 asm_fprintf(out_file, "{%r}\n", REGNO (e1));
29903 break;
29904
29905 case REG:
29906 if (REGNO (e0) == SP_REGNUM)
29907 {
29908 /* A stack increment. */
29909 if (GET_CODE (e1) != PLUS
29910 || !REG_P (XEXP (e1, 0))
29911 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29912 || !CONST_INT_P (XEXP (e1, 1)))
29913 abort ();
29914
29915 asm_fprintf (out_file, "\t.pad #%wd\n",
29916 -INTVAL (XEXP (e1, 1)));
29917 }
29918 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29919 {
29920 HOST_WIDE_INT offset;
29921
29922 if (GET_CODE (e1) == PLUS)
29923 {
29924 if (!REG_P (XEXP (e1, 0))
29925 || !CONST_INT_P (XEXP (e1, 1)))
29926 abort ();
29927 reg = REGNO (XEXP (e1, 0));
29928 offset = INTVAL (XEXP (e1, 1));
29929 asm_fprintf (out_file, "\t.setfp %r, %r, #%wd\n",
29930 HARD_FRAME_POINTER_REGNUM, reg,
29931 offset);
29932 }
29933 else if (REG_P (e1))
29934 {
29935 reg = REGNO (e1);
29936 asm_fprintf (out_file, "\t.setfp %r, %r\n",
29937 HARD_FRAME_POINTER_REGNUM, reg);
29938 }
29939 else
29940 abort ();
29941 }
29942 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29943 {
29944 /* Move from sp to reg. */
29945 asm_fprintf (out_file, "\t.movsp %r\n", REGNO (e0));
29946 }
29947 else if (GET_CODE (e1) == PLUS
29948 && REG_P (XEXP (e1, 0))
29949 && REGNO (XEXP (e1, 0)) == SP_REGNUM
29950 && CONST_INT_P (XEXP (e1, 1)))
29951 {
29952 /* Set reg to offset from sp. */
29953 asm_fprintf (out_file, "\t.movsp %r, #%d\n",
29954 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29955 }
29956 else
29957 abort ();
29958 break;
29959
29960 default:
29961 abort ();
29962 }
29963 }
29964
29965
29966 /* Emit unwind directives for the given insn. */
29967
29968 static void
29969 arm_unwind_emit (FILE * out_file, rtx_insn *insn)
29970 {
29971 rtx note, pat;
29972 bool handled_one = false;
29973
29974 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29975 return;
29976
29977 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29978 && (TREE_NOTHROW (current_function_decl)
29979 || crtl->all_throwers_are_sibcalls))
29980 return;
29981
29982 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29983 return;
29984
29985 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29986 {
29987 switch (REG_NOTE_KIND (note))
29988 {
29989 case REG_FRAME_RELATED_EXPR:
29990 pat = XEXP (note, 0);
29991 goto found;
29992
29993 case REG_CFA_REGISTER:
29994 pat = XEXP (note, 0);
29995 if (pat == NULL)
29996 {
29997 pat = PATTERN (insn);
29998 if (GET_CODE (pat) == PARALLEL)
29999 pat = XVECEXP (pat, 0, 0);
30000 }
30001
30002 /* Only emitted for IS_STACKALIGN re-alignment. */
30003 {
30004 rtx dest, src;
30005 unsigned reg;
30006
30007 src = SET_SRC (pat);
30008 dest = SET_DEST (pat);
30009
30010 gcc_assert (src == stack_pointer_rtx);
30011 reg = REGNO (dest);
30012 asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
30013 reg + 0x90, reg);
30014 }
30015 handled_one = true;
30016 break;
30017
30018 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
30019 to get correct dwarf information for shrink-wrap. We should not
30020 emit unwind information for it because these are used either for
30021 pretend arguments or notes to adjust sp and restore registers from
30022 stack. */
30023 case REG_CFA_DEF_CFA:
30024 case REG_CFA_ADJUST_CFA:
30025 case REG_CFA_RESTORE:
30026 return;
30027
30028 case REG_CFA_EXPRESSION:
30029 case REG_CFA_OFFSET:
30030 /* ??? Only handling here what we actually emit. */
30031 gcc_unreachable ();
30032
30033 default:
30034 break;
30035 }
30036 }
30037 if (handled_one)
30038 return;
30039 pat = PATTERN (insn);
30040 found:
30041
30042 switch (GET_CODE (pat))
30043 {
30044 case SET:
30045 arm_unwind_emit_set (out_file, pat);
30046 break;
30047
30048 case SEQUENCE:
30049 /* Store multiple. */
30050 arm_unwind_emit_sequence (out_file, pat);
30051 break;
30052
30053 default:
30054 abort();
30055 }
30056 }
30057
30058
30059 /* Output a reference from a function exception table to the type_info
30060 object X. The EABI specifies that the symbol should be relocated by
30061 an R_ARM_TARGET2 relocation. */
30062
30063 static bool
30064 arm_output_ttype (rtx x)
30065 {
30066 fputs ("\t.word\t", asm_out_file);
30067 output_addr_const (asm_out_file, x);
30068 /* Use special relocations for symbol references. */
30069 if (!CONST_INT_P (x))
30070 fputs ("(TARGET2)", asm_out_file);
30071 fputc ('\n', asm_out_file);
30072
30073 return TRUE;
30074 }
30075
30076 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
30077
30078 static void
30079 arm_asm_emit_except_personality (rtx personality)
30080 {
30081 fputs ("\t.personality\t", asm_out_file);
30082 output_addr_const (asm_out_file, personality);
30083 fputc ('\n', asm_out_file);
30084 }
30085 #endif /* ARM_UNWIND_INFO */
30086
30087 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
30088
30089 static void
30090 arm_asm_init_sections (void)
30091 {
30092 #if ARM_UNWIND_INFO
30093 exception_section = get_unnamed_section (0, output_section_asm_op,
30094 "\t.handlerdata");
30095 #endif /* ARM_UNWIND_INFO */
30096
30097 #ifdef OBJECT_FORMAT_ELF
30098 if (target_pure_code)
30099 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
30100 #endif
30101 }
30102
30103 /* Output unwind directives for the start/end of a function. */
30104
30105 void
30106 arm_output_fn_unwind (FILE * f, bool prologue)
30107 {
30108 if (arm_except_unwind_info (&global_options) != UI_TARGET)
30109 return;
30110
30111 if (prologue)
30112 fputs ("\t.fnstart\n", f);
30113 else
30114 {
30115 /* If this function will never be unwound, then mark it as such.
30116 The came condition is used in arm_unwind_emit to suppress
30117 the frame annotations. */
30118 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30119 && (TREE_NOTHROW (current_function_decl)
30120 || crtl->all_throwers_are_sibcalls))
30121 fputs("\t.cantunwind\n", f);
30122
30123 fputs ("\t.fnend\n", f);
30124 }
30125 }
30126
30127 static bool
30128 arm_emit_tls_decoration (FILE *fp, rtx x)
30129 {
30130 enum tls_reloc reloc;
30131 rtx val;
30132
30133 val = XVECEXP (x, 0, 0);
30134 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
30135
30136 output_addr_const (fp, val);
30137
30138 switch (reloc)
30139 {
30140 case TLS_GD32:
30141 fputs ("(tlsgd)", fp);
30142 break;
30143 case TLS_GD32_FDPIC:
30144 fputs ("(tlsgd_fdpic)", fp);
30145 break;
30146 case TLS_LDM32:
30147 fputs ("(tlsldm)", fp);
30148 break;
30149 case TLS_LDM32_FDPIC:
30150 fputs ("(tlsldm_fdpic)", fp);
30151 break;
30152 case TLS_LDO32:
30153 fputs ("(tlsldo)", fp);
30154 break;
30155 case TLS_IE32:
30156 fputs ("(gottpoff)", fp);
30157 break;
30158 case TLS_IE32_FDPIC:
30159 fputs ("(gottpoff_fdpic)", fp);
30160 break;
30161 case TLS_LE32:
30162 fputs ("(tpoff)", fp);
30163 break;
30164 case TLS_DESCSEQ:
30165 fputs ("(tlsdesc)", fp);
30166 break;
30167 default:
30168 gcc_unreachable ();
30169 }
30170
30171 switch (reloc)
30172 {
30173 case TLS_GD32:
30174 case TLS_LDM32:
30175 case TLS_IE32:
30176 case TLS_DESCSEQ:
30177 fputs (" + (. - ", fp);
30178 output_addr_const (fp, XVECEXP (x, 0, 2));
30179 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30180 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
30181 output_addr_const (fp, XVECEXP (x, 0, 3));
30182 fputc (')', fp);
30183 break;
30184 default:
30185 break;
30186 }
30187
30188 return TRUE;
30189 }
30190
30191 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
30192
30193 static void
30194 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
30195 {
30196 gcc_assert (size == 4);
30197 fputs ("\t.word\t", file);
30198 output_addr_const (file, x);
30199 fputs ("(tlsldo)", file);
30200 }
30201
30202 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
30203
30204 static bool
30205 arm_output_addr_const_extra (FILE *fp, rtx x)
30206 {
30207 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
30208 return arm_emit_tls_decoration (fp, x);
30209 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
30210 {
30211 char label[256];
30212 int labelno = INTVAL (XVECEXP (x, 0, 0));
30213
30214 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
30215 assemble_name_raw (fp, label);
30216
30217 return TRUE;
30218 }
30219 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
30220 {
30221 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
30222 if (GOT_PCREL)
30223 fputs ("+.", fp);
30224 fputs ("-(", fp);
30225 output_addr_const (fp, XVECEXP (x, 0, 0));
30226 fputc (')', fp);
30227 return TRUE;
30228 }
30229 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
30230 {
30231 output_addr_const (fp, XVECEXP (x, 0, 0));
30232 if (GOT_PCREL)
30233 fputs ("+.", fp);
30234 fputs ("-(", fp);
30235 output_addr_const (fp, XVECEXP (x, 0, 1));
30236 fputc (')', fp);
30237 return TRUE;
30238 }
30239 else if (GET_CODE (x) == CONST_VECTOR)
30240 return arm_emit_vector_const (fp, x);
30241
30242 return FALSE;
30243 }
30244
30245 /* Output assembly for a shift instruction.
30246 SET_FLAGS determines how the instruction modifies the condition codes.
30247 0 - Do not set condition codes.
30248 1 - Set condition codes.
30249 2 - Use smallest instruction. */
30250 const char *
30251 arm_output_shift(rtx * operands, int set_flags)
30252 {
30253 char pattern[100];
30254 static const char flag_chars[3] = {'?', '.', '!'};
30255 const char *shift;
30256 HOST_WIDE_INT val;
30257 char c;
30258
30259 c = flag_chars[set_flags];
30260 shift = shift_op(operands[3], &val);
30261 if (shift)
30262 {
30263 if (val != -1)
30264 operands[2] = GEN_INT(val);
30265 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
30266 }
30267 else
30268 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
30269
30270 output_asm_insn (pattern, operands);
30271 return "";
30272 }
30273
30274 /* Output assembly for a WMMX immediate shift instruction. */
30275 const char *
30276 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
30277 {
30278 int shift = INTVAL (operands[2]);
30279 char templ[50];
30280 machine_mode opmode = GET_MODE (operands[0]);
30281
30282 gcc_assert (shift >= 0);
30283
30284 /* If the shift value in the register versions is > 63 (for D qualifier),
30285 31 (for W qualifier) or 15 (for H qualifier). */
30286 if (((opmode == V4HImode) && (shift > 15))
30287 || ((opmode == V2SImode) && (shift > 31))
30288 || ((opmode == DImode) && (shift > 63)))
30289 {
30290 if (wror_or_wsra)
30291 {
30292 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30293 output_asm_insn (templ, operands);
30294 if (opmode == DImode)
30295 {
30296 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
30297 output_asm_insn (templ, operands);
30298 }
30299 }
30300 else
30301 {
30302 /* The destination register will contain all zeros. */
30303 sprintf (templ, "wzero\t%%0");
30304 output_asm_insn (templ, operands);
30305 }
30306 return "";
30307 }
30308
30309 if ((opmode == DImode) && (shift > 32))
30310 {
30311 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30312 output_asm_insn (templ, operands);
30313 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
30314 output_asm_insn (templ, operands);
30315 }
30316 else
30317 {
30318 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
30319 output_asm_insn (templ, operands);
30320 }
30321 return "";
30322 }
30323
30324 /* Output assembly for a WMMX tinsr instruction. */
30325 const char *
30326 arm_output_iwmmxt_tinsr (rtx *operands)
30327 {
30328 int mask = INTVAL (operands[3]);
30329 int i;
30330 char templ[50];
30331 int units = mode_nunits[GET_MODE (operands[0])];
30332 gcc_assert ((mask & (mask - 1)) == 0);
30333 for (i = 0; i < units; ++i)
30334 {
30335 if ((mask & 0x01) == 1)
30336 {
30337 break;
30338 }
30339 mask >>= 1;
30340 }
30341 gcc_assert (i < units);
30342 {
30343 switch (GET_MODE (operands[0]))
30344 {
30345 case E_V8QImode:
30346 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
30347 break;
30348 case E_V4HImode:
30349 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
30350 break;
30351 case E_V2SImode:
30352 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
30353 break;
30354 default:
30355 gcc_unreachable ();
30356 break;
30357 }
30358 output_asm_insn (templ, operands);
30359 }
30360 return "";
30361 }
30362
30363 /* Output a Thumb-1 casesi dispatch sequence. */
30364 const char *
30365 thumb1_output_casesi (rtx *operands)
30366 {
30367 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
30368
30369 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30370
30371 switch (GET_MODE(diff_vec))
30372 {
30373 case E_QImode:
30374 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30375 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30376 case E_HImode:
30377 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30378 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30379 case E_SImode:
30380 return "bl\t%___gnu_thumb1_case_si";
30381 default:
30382 gcc_unreachable ();
30383 }
30384 }
30385
30386 /* Output a Thumb-2 casesi instruction. */
30387 const char *
30388 thumb2_output_casesi (rtx *operands)
30389 {
30390 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30391
30392 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30393
30394 output_asm_insn ("cmp\t%0, %1", operands);
30395 output_asm_insn ("bhi\t%l3", operands);
30396 switch (GET_MODE(diff_vec))
30397 {
30398 case E_QImode:
30399 return "tbb\t[%|pc, %0]";
30400 case E_HImode:
30401 return "tbh\t[%|pc, %0, lsl #1]";
30402 case E_SImode:
30403 if (flag_pic)
30404 {
30405 output_asm_insn ("adr\t%4, %l2", operands);
30406 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
30407 output_asm_insn ("add\t%4, %4, %5", operands);
30408 return "bx\t%4";
30409 }
30410 else
30411 {
30412 output_asm_insn ("adr\t%4, %l2", operands);
30413 return "ldr\t%|pc, [%4, %0, lsl #2]";
30414 }
30415 default:
30416 gcc_unreachable ();
30417 }
30418 }
30419
30420 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
30421 per-core tuning structs. */
30422 static int
30423 arm_issue_rate (void)
30424 {
30425 return current_tune->issue_rate;
30426 }
30427
30428 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
30429 static int
30430 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30431 {
30432 if (DEBUG_INSN_P (insn))
30433 return more;
30434
30435 rtx_code code = GET_CODE (PATTERN (insn));
30436 if (code == USE || code == CLOBBER)
30437 return more;
30438
30439 if (get_attr_type (insn) == TYPE_NO_INSN)
30440 return more;
30441
30442 return more - 1;
30443 }
30444
30445 /* Return how many instructions should scheduler lookahead to choose the
30446 best one. */
30447 static int
30448 arm_first_cycle_multipass_dfa_lookahead (void)
30449 {
30450 int issue_rate = arm_issue_rate ();
30451
30452 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30453 }
30454
30455 /* Enable modeling of L2 auto-prefetcher. */
30456 static int
30457 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30458 {
30459 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30460 }
30461
30462 const char *
30463 arm_mangle_type (const_tree type)
30464 {
30465 /* The ARM ABI documents (10th October 2008) say that "__va_list"
30466 has to be managled as if it is in the "std" namespace. */
30467 if (TARGET_AAPCS_BASED
30468 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30469 return "St9__va_list";
30470
30471 /* Half-precision floating point types. */
30472 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
30473 {
30474 if (TYPE_MAIN_VARIANT (type) == float16_type_node)
30475 return NULL;
30476 if (TYPE_MODE (type) == BFmode)
30477 return "u6__bf16";
30478 else
30479 return "Dh";
30480 }
30481
30482 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30483 builtin type. */
30484 if (TYPE_NAME (type) != NULL)
30485 return arm_mangle_builtin_type (type);
30486
30487 /* Use the default mangling. */
30488 return NULL;
30489 }
30490
30491 /* Order of allocation of core registers for Thumb: this allocation is
30492 written over the corresponding initial entries of the array
30493 initialized with REG_ALLOC_ORDER. We allocate all low registers
30494 first. Saving and restoring a low register is usually cheaper than
30495 using a call-clobbered high register. */
30496
30497 static const int thumb_core_reg_alloc_order[] =
30498 {
30499 3, 2, 1, 0, 4, 5, 6, 7,
30500 12, 14, 8, 9, 10, 11
30501 };
30502
30503 /* Adjust register allocation order when compiling for Thumb. */
30504
30505 void
30506 arm_order_regs_for_local_alloc (void)
30507 {
30508 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30509 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30510 if (TARGET_THUMB)
30511 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30512 sizeof (thumb_core_reg_alloc_order));
30513 }
30514
30515 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30516
30517 bool
30518 arm_frame_pointer_required (void)
30519 {
30520 if (SUBTARGET_FRAME_POINTER_REQUIRED)
30521 return true;
30522
30523 /* If the function receives nonlocal gotos, it needs to save the frame
30524 pointer in the nonlocal_goto_save_area object. */
30525 if (cfun->has_nonlocal_label)
30526 return true;
30527
30528 /* The frame pointer is required for non-leaf APCS frames. */
30529 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30530 return true;
30531
30532 /* If we are probing the stack in the prologue, we will have a faulting
30533 instruction prior to the stack adjustment and this requires a frame
30534 pointer if we want to catch the exception using the EABI unwinder. */
30535 if (!IS_INTERRUPT (arm_current_func_type ())
30536 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30537 || flag_stack_clash_protection)
30538 && arm_except_unwind_info (&global_options) == UI_TARGET
30539 && cfun->can_throw_non_call_exceptions)
30540 {
30541 HOST_WIDE_INT size = get_frame_size ();
30542
30543 /* That's irrelevant if there is no stack adjustment. */
30544 if (size <= 0)
30545 return false;
30546
30547 /* That's relevant only if there is a stack probe. */
30548 if (crtl->is_leaf && !cfun->calls_alloca)
30549 {
30550 /* We don't have the final size of the frame so adjust. */
30551 size += 32 * UNITS_PER_WORD;
30552 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30553 return true;
30554 }
30555 else
30556 return true;
30557 }
30558
30559 return false;
30560 }
30561
30562 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30563 All modes except THUMB1 have conditional execution.
30564 If we have conditional arithmetic, return false before reload to
30565 enable some ifcvt transformations. */
30566 static bool
30567 arm_have_conditional_execution (void)
30568 {
30569 bool has_cond_exec, enable_ifcvt_trans;
30570
30571 /* Only THUMB1 cannot support conditional execution. */
30572 has_cond_exec = !TARGET_THUMB1;
30573
30574 /* Enable ifcvt transformations if we have conditional arithmetic, but only
30575 before reload. */
30576 enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30577
30578 return has_cond_exec && !enable_ifcvt_trans;
30579 }
30580
30581 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30582 static HOST_WIDE_INT
30583 arm_vector_alignment (const_tree type)
30584 {
30585 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30586
30587 if (TARGET_AAPCS_BASED)
30588 align = MIN (align, 64);
30589
30590 return align;
30591 }
30592
30593 static unsigned int
30594 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30595 {
30596 if (!TARGET_NEON_VECTORIZE_DOUBLE)
30597 {
30598 modes->safe_push (V16QImode);
30599 modes->safe_push (V8QImode);
30600 }
30601 return 0;
30602 }
30603
30604 static bool
30605 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30606 {
30607 /* Vectors which aren't in packed structures will not be less aligned than
30608 the natural alignment of their element type, so this is safe. */
30609 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30610 return !is_packed;
30611
30612 return default_builtin_vector_alignment_reachable (type, is_packed);
30613 }
30614
30615 static bool
30616 arm_builtin_support_vector_misalignment (machine_mode mode,
30617 const_tree type, int misalignment,
30618 bool is_packed)
30619 {
30620 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30621 {
30622 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30623
30624 if (is_packed)
30625 return align == 1;
30626
30627 /* If the misalignment is unknown, we should be able to handle the access
30628 so long as it is not to a member of a packed data structure. */
30629 if (misalignment == -1)
30630 return true;
30631
30632 /* Return true if the misalignment is a multiple of the natural alignment
30633 of the vector's element type. This is probably always going to be
30634 true in practice, since we've already established that this isn't a
30635 packed access. */
30636 return ((misalignment % align) == 0);
30637 }
30638
30639 return default_builtin_support_vector_misalignment (mode, type, misalignment,
30640 is_packed);
30641 }
30642
30643 static void
30644 arm_conditional_register_usage (void)
30645 {
30646 int regno;
30647
30648 if (TARGET_THUMB1 && optimize_size)
30649 {
30650 /* When optimizing for size on Thumb-1, it's better not
30651 to use the HI regs, because of the overhead of
30652 stacking them. */
30653 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30654 fixed_regs[regno] = call_used_regs[regno] = 1;
30655 }
30656
30657 /* The link register can be clobbered by any branch insn,
30658 but we have no way to track that at present, so mark
30659 it as unavailable. */
30660 if (TARGET_THUMB1)
30661 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30662
30663 if (TARGET_32BIT && TARGET_VFP_BASE)
30664 {
30665 /* VFPv3 registers are disabled when earlier VFP
30666 versions are selected due to the definition of
30667 LAST_VFP_REGNUM. */
30668 for (regno = FIRST_VFP_REGNUM;
30669 regno <= LAST_VFP_REGNUM; ++ regno)
30670 {
30671 fixed_regs[regno] = 0;
30672 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30673 || regno >= FIRST_VFP_REGNUM + 32;
30674 }
30675 if (TARGET_HAVE_MVE)
30676 fixed_regs[VPR_REGNUM] = 0;
30677 }
30678
30679 if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30680 {
30681 regno = FIRST_IWMMXT_GR_REGNUM;
30682 /* The 2002/10/09 revision of the XScale ABI has wCG0
30683 and wCG1 as call-preserved registers. The 2002/11/21
30684 revision changed this so that all wCG registers are
30685 scratch registers. */
30686 for (regno = FIRST_IWMMXT_GR_REGNUM;
30687 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30688 fixed_regs[regno] = 0;
30689 /* The XScale ABI has wR0 - wR9 as scratch registers,
30690 the rest as call-preserved registers. */
30691 for (regno = FIRST_IWMMXT_REGNUM;
30692 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30693 {
30694 fixed_regs[regno] = 0;
30695 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30696 }
30697 }
30698
30699 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30700 {
30701 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30702 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30703 }
30704 else if (TARGET_APCS_STACK)
30705 {
30706 fixed_regs[10] = 1;
30707 call_used_regs[10] = 1;
30708 }
30709 /* -mcaller-super-interworking reserves r11 for calls to
30710 _interwork_r11_call_via_rN(). Making the register global
30711 is an easy way of ensuring that it remains valid for all
30712 calls. */
30713 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30714 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30715 {
30716 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30717 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30718 if (TARGET_CALLER_INTERWORKING)
30719 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30720 }
30721
30722 /* The Q and GE bits are only accessed via special ACLE patterns. */
30723 CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30724 CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30725
30726 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30727 }
30728
30729 static reg_class_t
30730 arm_preferred_rename_class (reg_class_t rclass)
30731 {
30732 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30733 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30734 and code size can be reduced. */
30735 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30736 return LO_REGS;
30737 else
30738 return NO_REGS;
30739 }
30740
30741 /* Compute the attribute "length" of insn "*push_multi".
30742 So this function MUST be kept in sync with that insn pattern. */
30743 int
30744 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30745 {
30746 int i, regno, hi_reg;
30747 int num_saves = XVECLEN (parallel_op, 0);
30748
30749 /* ARM mode. */
30750 if (TARGET_ARM)
30751 return 4;
30752 /* Thumb1 mode. */
30753 if (TARGET_THUMB1)
30754 return 2;
30755
30756 /* Thumb2 mode. */
30757 regno = REGNO (first_op);
30758 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30759 list is 8-bit. Normally this means all registers in the list must be
30760 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
30761 encodings. There is one exception for PUSH that LR in HI_REGS can be used
30762 with 16-bit encoding. */
30763 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30764 for (i = 1; i < num_saves && !hi_reg; i++)
30765 {
30766 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30767 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30768 }
30769
30770 if (!hi_reg)
30771 return 2;
30772 return 4;
30773 }
30774
30775 /* Compute the attribute "length" of insn. Currently, this function is used
30776 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30777 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
30778 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
30779 true if OPERANDS contains insn which explicit updates base register. */
30780
30781 int
30782 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30783 {
30784 /* ARM mode. */
30785 if (TARGET_ARM)
30786 return 4;
30787 /* Thumb1 mode. */
30788 if (TARGET_THUMB1)
30789 return 2;
30790
30791 rtx parallel_op = operands[0];
30792 /* Initialize to elements number of PARALLEL. */
30793 unsigned indx = XVECLEN (parallel_op, 0) - 1;
30794 /* Initialize the value to base register. */
30795 unsigned regno = REGNO (operands[1]);
30796 /* Skip return and write back pattern.
30797 We only need register pop pattern for later analysis. */
30798 unsigned first_indx = 0;
30799 first_indx += return_pc ? 1 : 0;
30800 first_indx += write_back_p ? 1 : 0;
30801
30802 /* A pop operation can be done through LDM or POP. If the base register is SP
30803 and if it's with write back, then a LDM will be alias of POP. */
30804 bool pop_p = (regno == SP_REGNUM && write_back_p);
30805 bool ldm_p = !pop_p;
30806
30807 /* Check base register for LDM. */
30808 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30809 return 4;
30810
30811 /* Check each register in the list. */
30812 for (; indx >= first_indx; indx--)
30813 {
30814 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30815 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
30816 comment in arm_attr_length_push_multi. */
30817 if (REGNO_REG_CLASS (regno) == HI_REGS
30818 && (regno != PC_REGNUM || ldm_p))
30819 return 4;
30820 }
30821
30822 return 2;
30823 }
30824
30825 /* Compute the number of instructions emitted by output_move_double. */
30826 int
30827 arm_count_output_move_double_insns (rtx *operands)
30828 {
30829 int count;
30830 rtx ops[2];
30831 /* output_move_double may modify the operands array, so call it
30832 here on a copy of the array. */
30833 ops[0] = operands[0];
30834 ops[1] = operands[1];
30835 output_move_double (ops, false, &count);
30836 return count;
30837 }
30838
30839 /* Same as above, but operands are a register/memory pair in SImode.
30840 Assumes operands has the base register in position 0 and memory in position
30841 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
30842 int
30843 arm_count_ldrdstrd_insns (rtx *operands, bool load)
30844 {
30845 int count;
30846 rtx ops[2];
30847 int regnum, memnum;
30848 if (load)
30849 regnum = 0, memnum = 1;
30850 else
30851 regnum = 1, memnum = 0;
30852 ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
30853 ops[memnum] = adjust_address (operands[2], DImode, 0);
30854 output_move_double (ops, false, &count);
30855 return count;
30856 }
30857
30858
30859 int
30860 vfp3_const_double_for_fract_bits (rtx operand)
30861 {
30862 REAL_VALUE_TYPE r0;
30863
30864 if (!CONST_DOUBLE_P (operand))
30865 return 0;
30866
30867 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
30868 if (exact_real_inverse (DFmode, &r0)
30869 && !REAL_VALUE_NEGATIVE (r0))
30870 {
30871 if (exact_real_truncate (DFmode, &r0))
30872 {
30873 HOST_WIDE_INT value = real_to_integer (&r0);
30874 value = value & 0xffffffff;
30875 if ((value != 0) && ( (value & (value - 1)) == 0))
30876 {
30877 int ret = exact_log2 (value);
30878 gcc_assert (IN_RANGE (ret, 0, 31));
30879 return ret;
30880 }
30881 }
30882 }
30883 return 0;
30884 }
30885
30886 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30887 log2 is in [1, 32], return that log2. Otherwise return -1.
30888 This is used in the patterns for vcvt.s32.f32 floating-point to
30889 fixed-point conversions. */
30890
30891 int
30892 vfp3_const_double_for_bits (rtx x)
30893 {
30894 const REAL_VALUE_TYPE *r;
30895
30896 if (!CONST_DOUBLE_P (x))
30897 return -1;
30898
30899 r = CONST_DOUBLE_REAL_VALUE (x);
30900
30901 if (REAL_VALUE_NEGATIVE (*r)
30902 || REAL_VALUE_ISNAN (*r)
30903 || REAL_VALUE_ISINF (*r)
30904 || !real_isinteger (r, SFmode))
30905 return -1;
30906
30907 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
30908
30909 /* The exact_log2 above will have returned -1 if this is
30910 not an exact log2. */
30911 if (!IN_RANGE (hwint, 1, 32))
30912 return -1;
30913
30914 return hwint;
30915 }
30916
30917 \f
30918 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30919
30920 static void
30921 arm_pre_atomic_barrier (enum memmodel model)
30922 {
30923 if (need_atomic_barrier_p (model, true))
30924 emit_insn (gen_memory_barrier ());
30925 }
30926
30927 static void
30928 arm_post_atomic_barrier (enum memmodel model)
30929 {
30930 if (need_atomic_barrier_p (model, false))
30931 emit_insn (gen_memory_barrier ());
30932 }
30933
30934 /* Emit the load-exclusive and store-exclusive instructions.
30935 Use acquire and release versions if necessary. */
30936
30937 static void
30938 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30939 {
30940 rtx (*gen) (rtx, rtx);
30941
30942 if (acq)
30943 {
30944 switch (mode)
30945 {
30946 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30947 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30948 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30949 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30950 default:
30951 gcc_unreachable ();
30952 }
30953 }
30954 else
30955 {
30956 switch (mode)
30957 {
30958 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
30959 case E_HImode: gen = gen_arm_load_exclusivehi; break;
30960 case E_SImode: gen = gen_arm_load_exclusivesi; break;
30961 case E_DImode: gen = gen_arm_load_exclusivedi; break;
30962 default:
30963 gcc_unreachable ();
30964 }
30965 }
30966
30967 emit_insn (gen (rval, mem));
30968 }
30969
30970 static void
30971 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30972 rtx mem, bool rel)
30973 {
30974 rtx (*gen) (rtx, rtx, rtx);
30975
30976 if (rel)
30977 {
30978 switch (mode)
30979 {
30980 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
30981 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
30982 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
30983 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
30984 default:
30985 gcc_unreachable ();
30986 }
30987 }
30988 else
30989 {
30990 switch (mode)
30991 {
30992 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
30993 case E_HImode: gen = gen_arm_store_exclusivehi; break;
30994 case E_SImode: gen = gen_arm_store_exclusivesi; break;
30995 case E_DImode: gen = gen_arm_store_exclusivedi; break;
30996 default:
30997 gcc_unreachable ();
30998 }
30999 }
31000
31001 emit_insn (gen (bval, rval, mem));
31002 }
31003
31004 /* Mark the previous jump instruction as unlikely. */
31005
31006 static void
31007 emit_unlikely_jump (rtx insn)
31008 {
31009 rtx_insn *jump = emit_jump_insn (insn);
31010 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
31011 }
31012
31013 /* Expand a compare and swap pattern. */
31014
31015 void
31016 arm_expand_compare_and_swap (rtx operands[])
31017 {
31018 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
31019 machine_mode mode, cmp_mode;
31020
31021 bval = operands[0];
31022 rval = operands[1];
31023 mem = operands[2];
31024 oldval = operands[3];
31025 newval = operands[4];
31026 is_weak = operands[5];
31027 mod_s = operands[6];
31028 mod_f = operands[7];
31029 mode = GET_MODE (mem);
31030
31031 /* Normally the succ memory model must be stronger than fail, but in the
31032 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
31033 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
31034
31035 if (TARGET_HAVE_LDACQ
31036 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
31037 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
31038 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
31039
31040 switch (mode)
31041 {
31042 case E_QImode:
31043 case E_HImode:
31044 /* For narrow modes, we're going to perform the comparison in SImode,
31045 so do the zero-extension now. */
31046 rval = gen_reg_rtx (SImode);
31047 oldval = convert_modes (SImode, mode, oldval, true);
31048 /* FALLTHRU */
31049
31050 case E_SImode:
31051 /* Force the value into a register if needed. We waited until after
31052 the zero-extension above to do this properly. */
31053 if (!arm_add_operand (oldval, SImode))
31054 oldval = force_reg (SImode, oldval);
31055 break;
31056
31057 case E_DImode:
31058 if (!cmpdi_operand (oldval, mode))
31059 oldval = force_reg (mode, oldval);
31060 break;
31061
31062 default:
31063 gcc_unreachable ();
31064 }
31065
31066 if (TARGET_THUMB1)
31067 cmp_mode = E_SImode;
31068 else
31069 cmp_mode = CC_Zmode;
31070
31071 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
31072 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
31073 oldval, newval, is_weak, mod_s, mod_f));
31074
31075 if (mode == QImode || mode == HImode)
31076 emit_move_insn (operands[1], gen_lowpart (mode, rval));
31077
31078 /* In all cases, we arrange for success to be signaled by Z set.
31079 This arrangement allows for the boolean result to be used directly
31080 in a subsequent branch, post optimization. For Thumb-1 targets, the
31081 boolean negation of the result is also stored in bval because Thumb-1
31082 backend lacks dependency tracking for CC flag due to flag-setting not
31083 being represented at RTL level. */
31084 if (TARGET_THUMB1)
31085 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
31086 else
31087 {
31088 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
31089 emit_insn (gen_rtx_SET (bval, x));
31090 }
31091 }
31092
31093 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
31094 another memory store between the load-exclusive and store-exclusive can
31095 reset the monitor from Exclusive to Open state. This means we must wait
31096 until after reload to split the pattern, lest we get a register spill in
31097 the middle of the atomic sequence. Success of the compare and swap is
31098 indicated by the Z flag set for 32bit targets and by neg_bval being zero
31099 for Thumb-1 targets (ie. negation of the boolean value returned by
31100 atomic_compare_and_swapmode standard pattern in operand 0). */
31101
31102 void
31103 arm_split_compare_and_swap (rtx operands[])
31104 {
31105 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
31106 machine_mode mode;
31107 enum memmodel mod_s, mod_f;
31108 bool is_weak;
31109 rtx_code_label *label1, *label2;
31110 rtx x, cond;
31111
31112 rval = operands[1];
31113 mem = operands[2];
31114 oldval = operands[3];
31115 newval = operands[4];
31116 is_weak = (operands[5] != const0_rtx);
31117 mod_s_rtx = operands[6];
31118 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
31119 mod_f = memmodel_from_int (INTVAL (operands[7]));
31120 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
31121 mode = GET_MODE (mem);
31122
31123 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
31124
31125 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
31126 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
31127
31128 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
31129 a full barrier is emitted after the store-release. */
31130 if (is_armv8_sync)
31131 use_acquire = false;
31132
31133 /* Checks whether a barrier is needed and emits one accordingly. */
31134 if (!(use_acquire || use_release))
31135 arm_pre_atomic_barrier (mod_s);
31136
31137 label1 = NULL;
31138 if (!is_weak)
31139 {
31140 label1 = gen_label_rtx ();
31141 emit_label (label1);
31142 }
31143 label2 = gen_label_rtx ();
31144
31145 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
31146
31147 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31148 as required to communicate with arm_expand_compare_and_swap. */
31149 if (TARGET_32BIT)
31150 {
31151 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
31152 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31153 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31154 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
31155 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31156 }
31157 else
31158 {
31159 cond = gen_rtx_NE (VOIDmode, rval, oldval);
31160 if (thumb1_cmpneg_operand (oldval, SImode))
31161 {
31162 rtx src = rval;
31163 if (!satisfies_constraint_L (oldval))
31164 {
31165 gcc_assert (satisfies_constraint_J (oldval));
31166
31167 /* For such immediates, ADDS needs the source and destination regs
31168 to be the same.
31169
31170 Normally this would be handled by RA, but this is all happening
31171 after RA. */
31172 emit_move_insn (neg_bval, rval);
31173 src = neg_bval;
31174 }
31175
31176 emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
31177 label2, cond));
31178 }
31179 else
31180 {
31181 emit_move_insn (neg_bval, const1_rtx);
31182 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
31183 }
31184 }
31185
31186 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
31187
31188 /* Weak or strong, we want EQ to be true for success, so that we
31189 match the flags that we got from the compare above. */
31190 if (TARGET_32BIT)
31191 {
31192 cond = gen_rtx_REG (CCmode, CC_REGNUM);
31193 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
31194 emit_insn (gen_rtx_SET (cond, x));
31195 }
31196
31197 if (!is_weak)
31198 {
31199 /* Z is set to boolean value of !neg_bval, as required to communicate
31200 with arm_expand_compare_and_swap. */
31201 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
31202 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
31203 }
31204
31205 if (!is_mm_relaxed (mod_f))
31206 emit_label (label2);
31207
31208 /* Checks whether a barrier is needed and emits one accordingly. */
31209 if (is_armv8_sync
31210 || !(use_acquire || use_release))
31211 arm_post_atomic_barrier (mod_s);
31212
31213 if (is_mm_relaxed (mod_f))
31214 emit_label (label2);
31215 }
31216
31217 /* Split an atomic operation pattern. Operation is given by CODE and is one
31218 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31219 operation). Operation is performed on the content at MEM and on VALUE
31220 following the memory model MODEL_RTX. The content at MEM before and after
31221 the operation is returned in OLD_OUT and NEW_OUT respectively while the
31222 success of the operation is returned in COND. Using a scratch register or
31223 an operand register for these determines what result is returned for that
31224 pattern. */
31225
31226 void
31227 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
31228 rtx value, rtx model_rtx, rtx cond)
31229 {
31230 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
31231 machine_mode mode = GET_MODE (mem);
31232 machine_mode wmode = (mode == DImode ? DImode : SImode);
31233 rtx_code_label *label;
31234 bool all_low_regs, bind_old_new;
31235 rtx x;
31236
31237 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
31238
31239 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
31240 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
31241
31242 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
31243 a full barrier is emitted after the store-release. */
31244 if (is_armv8_sync)
31245 use_acquire = false;
31246
31247 /* Checks whether a barrier is needed and emits one accordingly. */
31248 if (!(use_acquire || use_release))
31249 arm_pre_atomic_barrier (model);
31250
31251 label = gen_label_rtx ();
31252 emit_label (label);
31253
31254 if (new_out)
31255 new_out = gen_lowpart (wmode, new_out);
31256 if (old_out)
31257 old_out = gen_lowpart (wmode, old_out);
31258 else
31259 old_out = new_out;
31260 value = simplify_gen_subreg (wmode, value, mode, 0);
31261
31262 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
31263
31264 /* Does the operation require destination and first operand to use the same
31265 register? This is decided by register constraints of relevant insn
31266 patterns in thumb1.md. */
31267 gcc_assert (!new_out || REG_P (new_out));
31268 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
31269 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
31270 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
31271 bind_old_new =
31272 (TARGET_THUMB1
31273 && code != SET
31274 && code != MINUS
31275 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
31276
31277 /* We want to return the old value while putting the result of the operation
31278 in the same register as the old value so copy the old value over to the
31279 destination register and use that register for the operation. */
31280 if (old_out && bind_old_new)
31281 {
31282 emit_move_insn (new_out, old_out);
31283 old_out = new_out;
31284 }
31285
31286 switch (code)
31287 {
31288 case SET:
31289 new_out = value;
31290 break;
31291
31292 case NOT:
31293 x = gen_rtx_AND (wmode, old_out, value);
31294 emit_insn (gen_rtx_SET (new_out, x));
31295 x = gen_rtx_NOT (wmode, new_out);
31296 emit_insn (gen_rtx_SET (new_out, x));
31297 break;
31298
31299 case MINUS:
31300 if (CONST_INT_P (value))
31301 {
31302 value = gen_int_mode (-INTVAL (value), wmode);
31303 code = PLUS;
31304 }
31305 /* FALLTHRU */
31306
31307 case PLUS:
31308 if (mode == DImode)
31309 {
31310 /* DImode plus/minus need to clobber flags. */
31311 /* The adddi3 and subdi3 patterns are incorrectly written so that
31312 they require matching operands, even when we could easily support
31313 three operands. Thankfully, this can be fixed up post-splitting,
31314 as the individual add+adc patterns do accept three operands and
31315 post-reload cprop can make these moves go away. */
31316 emit_move_insn (new_out, old_out);
31317 if (code == PLUS)
31318 x = gen_adddi3 (new_out, new_out, value);
31319 else
31320 x = gen_subdi3 (new_out, new_out, value);
31321 emit_insn (x);
31322 break;
31323 }
31324 /* FALLTHRU */
31325
31326 default:
31327 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
31328 emit_insn (gen_rtx_SET (new_out, x));
31329 break;
31330 }
31331
31332 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
31333 use_release);
31334
31335 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31336 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
31337
31338 /* Checks whether a barrier is needed and emits one accordingly. */
31339 if (is_armv8_sync
31340 || !(use_acquire || use_release))
31341 arm_post_atomic_barrier (model);
31342 }
31343 \f
31344 /* Return the mode for the MVE vector of predicates corresponding to MODE. */
31345 opt_machine_mode
31346 arm_mode_to_pred_mode (machine_mode mode)
31347 {
31348 switch (GET_MODE_NUNITS (mode))
31349 {
31350 case 16: return V16BImode;
31351 case 8: return V8BImode;
31352 case 4: return V4BImode;
31353 }
31354 return opt_machine_mode ();
31355 }
31356
31357 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31358 If CAN_INVERT, store either the result or its inverse in TARGET
31359 and return true if TARGET contains the inverse. If !CAN_INVERT,
31360 always store the result in TARGET, never its inverse.
31361
31362 Note that the handling of floating-point comparisons is not
31363 IEEE compliant. */
31364
31365 bool
31366 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
31367 bool can_invert)
31368 {
31369 machine_mode cmp_result_mode = GET_MODE (target);
31370 machine_mode cmp_mode = GET_MODE (op0);
31371
31372 bool inverted;
31373
31374 /* MVE supports more comparisons than Neon. */
31375 if (TARGET_HAVE_MVE)
31376 inverted = false;
31377 else
31378 switch (code)
31379 {
31380 /* For these we need to compute the inverse of the requested
31381 comparison. */
31382 case UNORDERED:
31383 case UNLT:
31384 case UNLE:
31385 case UNGT:
31386 case UNGE:
31387 case UNEQ:
31388 case NE:
31389 code = reverse_condition_maybe_unordered (code);
31390 if (!can_invert)
31391 {
31392 /* Recursively emit the inverted comparison into a temporary
31393 and then store its inverse in TARGET. This avoids reusing
31394 TARGET (which for integer NE could be one of the inputs). */
31395 rtx tmp = gen_reg_rtx (cmp_result_mode);
31396 if (arm_expand_vector_compare (tmp, code, op0, op1, true))
31397 gcc_unreachable ();
31398 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
31399 return false;
31400 }
31401 inverted = true;
31402 break;
31403
31404 default:
31405 inverted = false;
31406 break;
31407 }
31408
31409 switch (code)
31410 {
31411 /* These are natively supported by Neon for zero comparisons, but otherwise
31412 require the operands to be swapped. For MVE, we can only compare
31413 registers. */
31414 case LE:
31415 case LT:
31416 if (!TARGET_HAVE_MVE)
31417 if (op1 != CONST0_RTX (cmp_mode))
31418 {
31419 code = swap_condition (code);
31420 std::swap (op0, op1);
31421 }
31422 /* Fall through. */
31423
31424 /* These are natively supported by Neon for both register and zero
31425 operands. MVE supports registers only. */
31426 case EQ:
31427 case GE:
31428 case GT:
31429 case NE:
31430 if (TARGET_HAVE_MVE)
31431 {
31432 switch (GET_MODE_CLASS (cmp_mode))
31433 {
31434 case MODE_VECTOR_INT:
31435 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31436 op0, force_reg (cmp_mode, op1)));
31437 break;
31438 case MODE_VECTOR_FLOAT:
31439 if (TARGET_HAVE_MVE_FLOAT)
31440 emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
31441 op0, force_reg (cmp_mode, op1)));
31442 else
31443 gcc_unreachable ();
31444 break;
31445 default:
31446 gcc_unreachable ();
31447 }
31448 }
31449 else
31450 emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
31451 return inverted;
31452
31453 /* These are natively supported for register operands only.
31454 Comparisons with zero aren't useful and should be folded
31455 or canonicalized by target-independent code. */
31456 case GEU:
31457 case GTU:
31458 if (TARGET_HAVE_MVE)
31459 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31460 op0, force_reg (cmp_mode, op1)));
31461 else
31462 emit_insn (gen_neon_vc (code, cmp_mode, target,
31463 op0, force_reg (cmp_mode, op1)));
31464 return inverted;
31465
31466 /* These require the operands to be swapped and likewise do not
31467 support comparisons with zero. */
31468 case LEU:
31469 case LTU:
31470 if (TARGET_HAVE_MVE)
31471 emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
31472 force_reg (cmp_mode, op1), op0));
31473 else
31474 emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31475 target, force_reg (cmp_mode, op1), op0));
31476 return inverted;
31477
31478 /* These need a combination of two comparisons. */
31479 case LTGT:
31480 case ORDERED:
31481 {
31482 /* Operands are LTGT iff (a > b || a > b).
31483 Operands are ORDERED iff (a > b || a <= b). */
31484 rtx gt_res = gen_reg_rtx (cmp_result_mode);
31485 rtx alt_res = gen_reg_rtx (cmp_result_mode);
31486 rtx_code alt_code = (code == LTGT ? LT : LE);
31487 if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31488 || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31489 gcc_unreachable ();
31490 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31491 gt_res, alt_res)));
31492 return inverted;
31493 }
31494
31495 default:
31496 gcc_unreachable ();
31497 }
31498 }
31499
31500 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31501 CMP_RESULT_MODE is the mode of the comparison result. */
31502
31503 void
31504 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
31505 {
31506 /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31507 arm_expand_vector_compare, and another one here. */
31508 rtx mask;
31509
31510 if (TARGET_HAVE_MVE)
31511 mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
31512 else
31513 mask = gen_reg_rtx (cmp_result_mode);
31514
31515 bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
31516 operands[4], operands[5], true);
31517 if (inverted)
31518 std::swap (operands[1], operands[2]);
31519 if (TARGET_NEON)
31520 emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
31521 mask, operands[1], operands[2]));
31522 else
31523 {
31524 machine_mode cmp_mode = GET_MODE (operands[0]);
31525
31526 switch (GET_MODE_CLASS (cmp_mode))
31527 {
31528 case MODE_VECTOR_INT:
31529 emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0],
31530 operands[1], operands[2], mask));
31531 break;
31532 case MODE_VECTOR_FLOAT:
31533 if (TARGET_HAVE_MVE_FLOAT)
31534 emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0],
31535 operands[1], operands[2], mask));
31536 else
31537 gcc_unreachable ();
31538 break;
31539 default:
31540 gcc_unreachable ();
31541 }
31542 }
31543 }
31544 \f
31545 #define MAX_VECT_LEN 16
31546
31547 struct expand_vec_perm_d
31548 {
31549 rtx target, op0, op1;
31550 vec_perm_indices perm;
31551 machine_mode vmode;
31552 bool one_vector_p;
31553 bool testing_p;
31554 };
31555
31556 /* Generate a variable permutation. */
31557
31558 static void
31559 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31560 {
31561 machine_mode vmode = GET_MODE (target);
31562 bool one_vector_p = rtx_equal_p (op0, op1);
31563
31564 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31565 gcc_checking_assert (GET_MODE (op0) == vmode);
31566 gcc_checking_assert (GET_MODE (op1) == vmode);
31567 gcc_checking_assert (GET_MODE (sel) == vmode);
31568 gcc_checking_assert (TARGET_NEON);
31569
31570 if (one_vector_p)
31571 {
31572 if (vmode == V8QImode)
31573 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31574 else
31575 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31576 }
31577 else
31578 {
31579 rtx pair;
31580
31581 if (vmode == V8QImode)
31582 {
31583 pair = gen_reg_rtx (V16QImode);
31584 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31585 pair = gen_lowpart (TImode, pair);
31586 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31587 }
31588 else
31589 {
31590 pair = gen_reg_rtx (OImode);
31591 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31592 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31593 }
31594 }
31595 }
31596
31597 void
31598 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31599 {
31600 machine_mode vmode = GET_MODE (target);
31601 unsigned int nelt = GET_MODE_NUNITS (vmode);
31602 bool one_vector_p = rtx_equal_p (op0, op1);
31603 rtx mask;
31604
31605 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31606 numbering of elements for big-endian, we must reverse the order. */
31607 gcc_checking_assert (!BYTES_BIG_ENDIAN);
31608
31609 /* The VTBL instruction does not use a modulo index, so we must take care
31610 of that ourselves. */
31611 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31612 mask = gen_const_vec_duplicate (vmode, mask);
31613 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31614
31615 arm_expand_vec_perm_1 (target, op0, op1, sel);
31616 }
31617
31618 /* Map lane ordering between architectural lane order, and GCC lane order,
31619 taking into account ABI. See comment above output_move_neon for details. */
31620
31621 static int
31622 neon_endian_lane_map (machine_mode mode, int lane)
31623 {
31624 if (BYTES_BIG_ENDIAN)
31625 {
31626 int nelems = GET_MODE_NUNITS (mode);
31627 /* Reverse lane order. */
31628 lane = (nelems - 1 - lane);
31629 /* Reverse D register order, to match ABI. */
31630 if (GET_MODE_SIZE (mode) == 16)
31631 lane = lane ^ (nelems / 2);
31632 }
31633 return lane;
31634 }
31635
31636 /* Some permutations index into pairs of vectors, this is a helper function
31637 to map indexes into those pairs of vectors. */
31638
31639 static int
31640 neon_pair_endian_lane_map (machine_mode mode, int lane)
31641 {
31642 int nelem = GET_MODE_NUNITS (mode);
31643 if (BYTES_BIG_ENDIAN)
31644 lane =
31645 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31646 return lane;
31647 }
31648
31649 /* Generate or test for an insn that supports a constant permutation. */
31650
31651 /* Recognize patterns for the VUZP insns. */
31652
31653 static bool
31654 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31655 {
31656 unsigned int i, odd, mask, nelt = d->perm.length ();
31657 rtx out0, out1, in0, in1;
31658 int first_elem;
31659 int swap_nelt;
31660
31661 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31662 return false;
31663
31664 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31665 big endian pattern on 64 bit vectors, so we correct for that. */
31666 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31667 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31668
31669 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31670
31671 if (first_elem == neon_endian_lane_map (d->vmode, 0))
31672 odd = 0;
31673 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31674 odd = 1;
31675 else
31676 return false;
31677 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31678
31679 for (i = 0; i < nelt; i++)
31680 {
31681 unsigned elt =
31682 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31683 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31684 return false;
31685 }
31686
31687 /* Success! */
31688 if (d->testing_p)
31689 return true;
31690
31691 in0 = d->op0;
31692 in1 = d->op1;
31693 if (swap_nelt != 0)
31694 std::swap (in0, in1);
31695
31696 out0 = d->target;
31697 out1 = gen_reg_rtx (d->vmode);
31698 if (odd)
31699 std::swap (out0, out1);
31700
31701 emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
31702 return true;
31703 }
31704
31705 /* Recognize patterns for the VZIP insns. */
31706
31707 static bool
31708 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
31709 {
31710 unsigned int i, high, mask, nelt = d->perm.length ();
31711 rtx out0, out1, in0, in1;
31712 int first_elem;
31713 bool is_swapped;
31714
31715 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31716 return false;
31717
31718 is_swapped = BYTES_BIG_ENDIAN;
31719
31720 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
31721
31722 high = nelt / 2;
31723 if (first_elem == neon_endian_lane_map (d->vmode, high))
31724 ;
31725 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
31726 high = 0;
31727 else
31728 return false;
31729 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31730
31731 for (i = 0; i < nelt / 2; i++)
31732 {
31733 unsigned elt =
31734 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
31735 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
31736 != elt)
31737 return false;
31738 elt =
31739 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
31740 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
31741 != elt)
31742 return false;
31743 }
31744
31745 /* Success! */
31746 if (d->testing_p)
31747 return true;
31748
31749 in0 = d->op0;
31750 in1 = d->op1;
31751 if (is_swapped)
31752 std::swap (in0, in1);
31753
31754 out0 = d->target;
31755 out1 = gen_reg_rtx (d->vmode);
31756 if (high)
31757 std::swap (out0, out1);
31758
31759 emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31760 return true;
31761 }
31762
31763 /* Recognize patterns for the VREV insns. */
31764 static bool
31765 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31766 {
31767 unsigned int i, j, diff, nelt = d->perm.length ();
31768 rtx (*gen) (machine_mode, rtx, rtx);
31769
31770 if (!d->one_vector_p)
31771 return false;
31772
31773 diff = d->perm[0];
31774 switch (diff)
31775 {
31776 case 7:
31777 switch (d->vmode)
31778 {
31779 case E_V16QImode:
31780 case E_V8QImode:
31781 gen = gen_neon_vrev64;
31782 break;
31783 default:
31784 return false;
31785 }
31786 break;
31787 case 3:
31788 switch (d->vmode)
31789 {
31790 case E_V16QImode:
31791 case E_V8QImode:
31792 gen = gen_neon_vrev32;
31793 break;
31794 case E_V8HImode:
31795 case E_V4HImode:
31796 case E_V8HFmode:
31797 case E_V4HFmode:
31798 gen = gen_neon_vrev64;
31799 break;
31800 default:
31801 return false;
31802 }
31803 break;
31804 case 1:
31805 switch (d->vmode)
31806 {
31807 case E_V16QImode:
31808 case E_V8QImode:
31809 gen = gen_neon_vrev16;
31810 break;
31811 case E_V8HImode:
31812 case E_V4HImode:
31813 gen = gen_neon_vrev32;
31814 break;
31815 case E_V4SImode:
31816 case E_V2SImode:
31817 case E_V4SFmode:
31818 case E_V2SFmode:
31819 gen = gen_neon_vrev64;
31820 break;
31821 default:
31822 return false;
31823 }
31824 break;
31825 default:
31826 return false;
31827 }
31828
31829 for (i = 0; i < nelt ; i += diff + 1)
31830 for (j = 0; j <= diff; j += 1)
31831 {
31832 /* This is guaranteed to be true as the value of diff
31833 is 7, 3, 1 and we should have enough elements in the
31834 queue to generate this. Getting a vector mask with a
31835 value of diff other than these values implies that
31836 something is wrong by the time we get here. */
31837 gcc_assert (i + j < nelt);
31838 if (d->perm[i + j] != i + diff - j)
31839 return false;
31840 }
31841
31842 /* Success! */
31843 if (d->testing_p)
31844 return true;
31845
31846 emit_insn (gen (d->vmode, d->target, d->op0));
31847 return true;
31848 }
31849
31850 /* Recognize patterns for the VTRN insns. */
31851
31852 static bool
31853 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31854 {
31855 unsigned int i, odd, mask, nelt = d->perm.length ();
31856 rtx out0, out1, in0, in1;
31857
31858 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31859 return false;
31860
31861 /* Note that these are little-endian tests. Adjust for big-endian later. */
31862 if (d->perm[0] == 0)
31863 odd = 0;
31864 else if (d->perm[0] == 1)
31865 odd = 1;
31866 else
31867 return false;
31868 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31869
31870 for (i = 0; i < nelt; i += 2)
31871 {
31872 if (d->perm[i] != i + odd)
31873 return false;
31874 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31875 return false;
31876 }
31877
31878 /* Success! */
31879 if (d->testing_p)
31880 return true;
31881
31882 in0 = d->op0;
31883 in1 = d->op1;
31884 if (BYTES_BIG_ENDIAN)
31885 {
31886 std::swap (in0, in1);
31887 odd = !odd;
31888 }
31889
31890 out0 = d->target;
31891 out1 = gen_reg_rtx (d->vmode);
31892 if (odd)
31893 std::swap (out0, out1);
31894
31895 emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
31896 return true;
31897 }
31898
31899 /* Recognize patterns for the VEXT insns. */
31900
31901 static bool
31902 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31903 {
31904 unsigned int i, nelt = d->perm.length ();
31905 rtx offset;
31906
31907 unsigned int location;
31908
31909 unsigned int next = d->perm[0] + 1;
31910
31911 /* TODO: Handle GCC's numbering of elements for big-endian. */
31912 if (BYTES_BIG_ENDIAN)
31913 return false;
31914
31915 /* Check if the extracted indexes are increasing by one. */
31916 for (i = 1; i < nelt; next++, i++)
31917 {
31918 /* If we hit the most significant element of the 2nd vector in
31919 the previous iteration, no need to test further. */
31920 if (next == 2 * nelt)
31921 return false;
31922
31923 /* If we are operating on only one vector: it could be a
31924 rotation. If there are only two elements of size < 64, let
31925 arm_evpc_neon_vrev catch it. */
31926 if (d->one_vector_p && (next == nelt))
31927 {
31928 if ((nelt == 2) && (d->vmode != V2DImode))
31929 return false;
31930 else
31931 next = 0;
31932 }
31933
31934 if (d->perm[i] != next)
31935 return false;
31936 }
31937
31938 location = d->perm[0];
31939
31940 /* Success! */
31941 if (d->testing_p)
31942 return true;
31943
31944 offset = GEN_INT (location);
31945
31946 if(d->vmode == E_DImode)
31947 return false;
31948
31949 emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
31950 return true;
31951 }
31952
31953 /* The NEON VTBL instruction is a fully variable permuation that's even
31954 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31955 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31956 can do slightly better by expanding this as a constant where we don't
31957 have to apply a mask. */
31958
31959 static bool
31960 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31961 {
31962 rtx rperm[MAX_VECT_LEN], sel;
31963 machine_mode vmode = d->vmode;
31964 unsigned int i, nelt = d->perm.length ();
31965
31966 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31967 numbering of elements for big-endian, we must reverse the order. */
31968 if (BYTES_BIG_ENDIAN)
31969 return false;
31970
31971 if (d->testing_p)
31972 return true;
31973
31974 /* Generic code will try constant permutation twice. Once with the
31975 original mode and again with the elements lowered to QImode.
31976 So wait and don't do the selector expansion ourselves. */
31977 if (vmode != V8QImode && vmode != V16QImode)
31978 return false;
31979
31980 for (i = 0; i < nelt; ++i)
31981 rperm[i] = GEN_INT (d->perm[i]);
31982 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31983 sel = force_reg (vmode, sel);
31984
31985 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31986 return true;
31987 }
31988
31989 static bool
31990 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31991 {
31992 /* Check if the input mask matches vext before reordering the
31993 operands. */
31994 if (TARGET_NEON)
31995 if (arm_evpc_neon_vext (d))
31996 return true;
31997
31998 /* The pattern matching functions above are written to look for a small
31999 number to begin the sequence (0, 1, N/2). If we begin with an index
32000 from the second operand, we can swap the operands. */
32001 unsigned int nelt = d->perm.length ();
32002 if (d->perm[0] >= nelt)
32003 {
32004 d->perm.rotate_inputs (1);
32005 std::swap (d->op0, d->op1);
32006 }
32007
32008 if (TARGET_NEON)
32009 {
32010 if (arm_evpc_neon_vuzp (d))
32011 return true;
32012 if (arm_evpc_neon_vzip (d))
32013 return true;
32014 if (arm_evpc_neon_vrev (d))
32015 return true;
32016 if (arm_evpc_neon_vtrn (d))
32017 return true;
32018 return arm_evpc_neon_vtbl (d);
32019 }
32020 return false;
32021 }
32022
32023 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
32024
32025 static bool
32026 arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
32027 rtx target, rtx op0, rtx op1,
32028 const vec_perm_indices &sel)
32029 {
32030 if (vmode != op_mode)
32031 return false;
32032
32033 struct expand_vec_perm_d d;
32034 int i, nelt, which;
32035
32036 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
32037 return false;
32038
32039 d.target = target;
32040 if (op0)
32041 {
32042 rtx nop0 = force_reg (vmode, op0);
32043 if (op0 == op1)
32044 op1 = nop0;
32045 op0 = nop0;
32046 }
32047 if (op1)
32048 op1 = force_reg (vmode, op1);
32049 d.op0 = op0;
32050 d.op1 = op1;
32051
32052 d.vmode = vmode;
32053 gcc_assert (VECTOR_MODE_P (d.vmode));
32054 d.testing_p = !target;
32055
32056 nelt = GET_MODE_NUNITS (d.vmode);
32057 for (i = which = 0; i < nelt; ++i)
32058 {
32059 int ei = sel[i] & (2 * nelt - 1);
32060 which |= (ei < nelt ? 1 : 2);
32061 }
32062
32063 switch (which)
32064 {
32065 default:
32066 gcc_unreachable();
32067
32068 case 3:
32069 d.one_vector_p = false;
32070 if (d.testing_p || !rtx_equal_p (op0, op1))
32071 break;
32072
32073 /* The elements of PERM do not suggest that only the first operand
32074 is used, but both operands are identical. Allow easier matching
32075 of the permutation by folding the permutation into the single
32076 input vector. */
32077 /* FALLTHRU */
32078 case 2:
32079 d.op0 = op1;
32080 d.one_vector_p = true;
32081 break;
32082
32083 case 1:
32084 d.op1 = op0;
32085 d.one_vector_p = true;
32086 break;
32087 }
32088
32089 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
32090
32091 if (!d.testing_p)
32092 return arm_expand_vec_perm_const_1 (&d);
32093
32094 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32095 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32096 if (!d.one_vector_p)
32097 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32098
32099 start_sequence ();
32100 bool ret = arm_expand_vec_perm_const_1 (&d);
32101 end_sequence ();
32102
32103 return ret;
32104 }
32105
32106 bool
32107 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
32108 {
32109 /* If we are soft float and we do not have ldrd
32110 then all auto increment forms are ok. */
32111 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
32112 return true;
32113
32114 switch (code)
32115 {
32116 /* Post increment and Pre Decrement are supported for all
32117 instruction forms except for vector forms. */
32118 case ARM_POST_INC:
32119 case ARM_PRE_DEC:
32120 if (VECTOR_MODE_P (mode))
32121 {
32122 if (code != ARM_PRE_DEC)
32123 return true;
32124 else
32125 return false;
32126 }
32127
32128 return true;
32129
32130 case ARM_POST_DEC:
32131 case ARM_PRE_INC:
32132 /* Without LDRD and mode size greater than
32133 word size, there is no point in auto-incrementing
32134 because ldm and stm will not have these forms. */
32135 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
32136 return false;
32137
32138 /* Vector and floating point modes do not support
32139 these auto increment forms. */
32140 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
32141 return false;
32142
32143 return true;
32144
32145 default:
32146 return false;
32147
32148 }
32149
32150 return false;
32151 }
32152
32153 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32154 on ARM, since we know that shifts by negative amounts are no-ops.
32155 Additionally, the default expansion code is not available or suitable
32156 for post-reload insn splits (this can occur when the register allocator
32157 chooses not to do a shift in NEON).
32158
32159 This function is used in both initial expand and post-reload splits, and
32160 handles all kinds of 64-bit shifts.
32161
32162 Input requirements:
32163 - It is safe for the input and output to be the same register, but
32164 early-clobber rules apply for the shift amount and scratch registers.
32165 - Shift by register requires both scratch registers. In all other cases
32166 the scratch registers may be NULL.
32167 - Ashiftrt by a register also clobbers the CC register. */
32168 void
32169 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
32170 rtx amount, rtx scratch1, rtx scratch2)
32171 {
32172 rtx out_high = gen_highpart (SImode, out);
32173 rtx out_low = gen_lowpart (SImode, out);
32174 rtx in_high = gen_highpart (SImode, in);
32175 rtx in_low = gen_lowpart (SImode, in);
32176
32177 /* Terminology:
32178 in = the register pair containing the input value.
32179 out = the destination register pair.
32180 up = the high- or low-part of each pair.
32181 down = the opposite part to "up".
32182 In a shift, we can consider bits to shift from "up"-stream to
32183 "down"-stream, so in a left-shift "up" is the low-part and "down"
32184 is the high-part of each register pair. */
32185
32186 rtx out_up = code == ASHIFT ? out_low : out_high;
32187 rtx out_down = code == ASHIFT ? out_high : out_low;
32188 rtx in_up = code == ASHIFT ? in_low : in_high;
32189 rtx in_down = code == ASHIFT ? in_high : in_low;
32190
32191 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
32192 gcc_assert (out
32193 && (REG_P (out) || SUBREG_P (out))
32194 && GET_MODE (out) == DImode);
32195 gcc_assert (in
32196 && (REG_P (in) || SUBREG_P (in))
32197 && GET_MODE (in) == DImode);
32198 gcc_assert (amount
32199 && (((REG_P (amount) || SUBREG_P (amount))
32200 && GET_MODE (amount) == SImode)
32201 || CONST_INT_P (amount)));
32202 gcc_assert (scratch1 == NULL
32203 || (GET_CODE (scratch1) == SCRATCH)
32204 || (GET_MODE (scratch1) == SImode
32205 && REG_P (scratch1)));
32206 gcc_assert (scratch2 == NULL
32207 || (GET_CODE (scratch2) == SCRATCH)
32208 || (GET_MODE (scratch2) == SImode
32209 && REG_P (scratch2)));
32210 gcc_assert (!REG_P (out) || !REG_P (amount)
32211 || !HARD_REGISTER_P (out)
32212 || (REGNO (out) != REGNO (amount)
32213 && REGNO (out) + 1 != REGNO (amount)));
32214
32215 /* Macros to make following code more readable. */
32216 #define SUB_32(DEST,SRC) \
32217 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32218 #define RSB_32(DEST,SRC) \
32219 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32220 #define SUB_S_32(DEST,SRC) \
32221 gen_addsi3_compare0 ((DEST), (SRC), \
32222 GEN_INT (-32))
32223 #define SET(DEST,SRC) \
32224 gen_rtx_SET ((DEST), (SRC))
32225 #define SHIFT(CODE,SRC,AMOUNT) \
32226 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32227 #define LSHIFT(CODE,SRC,AMOUNT) \
32228 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32229 SImode, (SRC), (AMOUNT))
32230 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32231 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32232 SImode, (SRC), (AMOUNT))
32233 #define ORR(A,B) \
32234 gen_rtx_IOR (SImode, (A), (B))
32235 #define BRANCH(COND,LABEL) \
32236 gen_arm_cond_branch ((LABEL), \
32237 gen_rtx_ ## COND (CCmode, cc_reg, \
32238 const0_rtx), \
32239 cc_reg)
32240
32241 /* Shifts by register and shifts by constant are handled separately. */
32242 if (CONST_INT_P (amount))
32243 {
32244 /* We have a shift-by-constant. */
32245
32246 /* First, handle out-of-range shift amounts.
32247 In both cases we try to match the result an ARM instruction in a
32248 shift-by-register would give. This helps reduce execution
32249 differences between optimization levels, but it won't stop other
32250 parts of the compiler doing different things. This is "undefined
32251 behavior, in any case. */
32252 if (INTVAL (amount) <= 0)
32253 emit_insn (gen_movdi (out, in));
32254 else if (INTVAL (amount) >= 64)
32255 {
32256 if (code == ASHIFTRT)
32257 {
32258 rtx const31_rtx = GEN_INT (31);
32259 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
32260 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
32261 }
32262 else
32263 emit_insn (gen_movdi (out, const0_rtx));
32264 }
32265
32266 /* Now handle valid shifts. */
32267 else if (INTVAL (amount) < 32)
32268 {
32269 /* Shifts by a constant less than 32. */
32270 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
32271
32272 /* Clearing the out register in DImode first avoids lots
32273 of spilling and results in less stack usage.
32274 Later this redundant insn is completely removed.
32275 Do that only if "in" and "out" are different registers. */
32276 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32277 emit_insn (SET (out, const0_rtx));
32278 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32279 emit_insn (SET (out_down,
32280 ORR (REV_LSHIFT (code, in_up, reverse_amount),
32281 out_down)));
32282 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32283 }
32284 else
32285 {
32286 /* Shifts by a constant greater than 31. */
32287 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
32288
32289 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32290 emit_insn (SET (out, const0_rtx));
32291 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
32292 if (code == ASHIFTRT)
32293 emit_insn (gen_ashrsi3 (out_up, in_up,
32294 GEN_INT (31)));
32295 else
32296 emit_insn (SET (out_up, const0_rtx));
32297 }
32298 }
32299 else
32300 {
32301 /* We have a shift-by-register. */
32302 rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
32303
32304 /* This alternative requires the scratch registers. */
32305 gcc_assert (scratch1 && REG_P (scratch1));
32306 gcc_assert (scratch2 && REG_P (scratch2));
32307
32308 /* We will need the values "amount-32" and "32-amount" later.
32309 Swapping them around now allows the later code to be more general. */
32310 switch (code)
32311 {
32312 case ASHIFT:
32313 emit_insn (SUB_32 (scratch1, amount));
32314 emit_insn (RSB_32 (scratch2, amount));
32315 break;
32316 case ASHIFTRT:
32317 emit_insn (RSB_32 (scratch1, amount));
32318 /* Also set CC = amount > 32. */
32319 emit_insn (SUB_S_32 (scratch2, amount));
32320 break;
32321 case LSHIFTRT:
32322 emit_insn (RSB_32 (scratch1, amount));
32323 emit_insn (SUB_32 (scratch2, amount));
32324 break;
32325 default:
32326 gcc_unreachable ();
32327 }
32328
32329 /* Emit code like this:
32330
32331 arithmetic-left:
32332 out_down = in_down << amount;
32333 out_down = (in_up << (amount - 32)) | out_down;
32334 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32335 out_up = in_up << amount;
32336
32337 arithmetic-right:
32338 out_down = in_down >> amount;
32339 out_down = (in_up << (32 - amount)) | out_down;
32340 if (amount < 32)
32341 out_down = ((signed)in_up >> (amount - 32)) | out_down;
32342 out_up = in_up << amount;
32343
32344 logical-right:
32345 out_down = in_down >> amount;
32346 out_down = (in_up << (32 - amount)) | out_down;
32347 if (amount < 32)
32348 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32349 out_up = in_up << amount;
32350
32351 The ARM and Thumb2 variants are the same but implemented slightly
32352 differently. If this were only called during expand we could just
32353 use the Thumb2 case and let combine do the right thing, but this
32354 can also be called from post-reload splitters. */
32355
32356 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32357
32358 if (!TARGET_THUMB2)
32359 {
32360 /* Emit code for ARM mode. */
32361 emit_insn (SET (out_down,
32362 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
32363 if (code == ASHIFTRT)
32364 {
32365 rtx_code_label *done_label = gen_label_rtx ();
32366 emit_jump_insn (BRANCH (LT, done_label));
32367 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
32368 out_down)));
32369 emit_label (done_label);
32370 }
32371 else
32372 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
32373 out_down)));
32374 }
32375 else
32376 {
32377 /* Emit code for Thumb2 mode.
32378 Thumb2 can't do shift and or in one insn. */
32379 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
32380 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
32381
32382 if (code == ASHIFTRT)
32383 {
32384 rtx_code_label *done_label = gen_label_rtx ();
32385 emit_jump_insn (BRANCH (LT, done_label));
32386 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
32387 emit_insn (SET (out_down, ORR (out_down, scratch2)));
32388 emit_label (done_label);
32389 }
32390 else
32391 {
32392 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
32393 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
32394 }
32395 }
32396
32397 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32398 }
32399
32400 #undef SUB_32
32401 #undef RSB_32
32402 #undef SUB_S_32
32403 #undef SET
32404 #undef SHIFT
32405 #undef LSHIFT
32406 #undef REV_LSHIFT
32407 #undef ORR
32408 #undef BRANCH
32409 }
32410
32411 /* Returns true if the pattern is a valid symbolic address, which is either a
32412 symbol_ref or (symbol_ref + addend).
32413
32414 According to the ARM ELF ABI, the initial addend of REL-type relocations
32415 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32416 literal field of the instruction as a 16-bit signed value in the range
32417 -32768 <= A < 32768.
32418
32419 In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32420 unsigned range of 0 <= A < 256 as described in the AAELF32
32421 relocation handling documentation: REL-type relocations are encoded
32422 as unsigned in this case. */
32423
32424 bool
32425 arm_valid_symbolic_address_p (rtx addr)
32426 {
32427 rtx xop0, xop1 = NULL_RTX;
32428 rtx tmp = addr;
32429
32430 if (target_word_relocations)
32431 return false;
32432
32433 if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
32434 return true;
32435
32436 /* (const (plus: symbol_ref const_int)) */
32437 if (GET_CODE (addr) == CONST)
32438 tmp = XEXP (addr, 0);
32439
32440 if (GET_CODE (tmp) == PLUS)
32441 {
32442 xop0 = XEXP (tmp, 0);
32443 xop1 = XEXP (tmp, 1);
32444
32445 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
32446 {
32447 if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
32448 return IN_RANGE (INTVAL (xop1), 0, 0xff);
32449 else
32450 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
32451 }
32452 }
32453
32454 return false;
32455 }
32456
32457 /* Returns true if a valid comparison operation and makes
32458 the operands in a form that is valid. */
32459 bool
32460 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
32461 {
32462 enum rtx_code code = GET_CODE (*comparison);
32463 int code_int;
32464 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
32465 ? GET_MODE (*op2) : GET_MODE (*op1);
32466
32467 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
32468
32469 if (code == UNEQ || code == LTGT)
32470 return false;
32471
32472 code_int = (int)code;
32473 arm_canonicalize_comparison (&code_int, op1, op2, 0);
32474 PUT_CODE (*comparison, (enum rtx_code)code_int);
32475
32476 switch (mode)
32477 {
32478 case E_SImode:
32479 if (!arm_add_operand (*op1, mode))
32480 *op1 = force_reg (mode, *op1);
32481 if (!arm_add_operand (*op2, mode))
32482 *op2 = force_reg (mode, *op2);
32483 return true;
32484
32485 case E_DImode:
32486 /* gen_compare_reg() will sort out any invalid operands. */
32487 return true;
32488
32489 case E_HFmode:
32490 if (!TARGET_VFP_FP16INST)
32491 break;
32492 /* FP16 comparisons are done in SF mode. */
32493 mode = SFmode;
32494 *op1 = convert_to_mode (mode, *op1, 1);
32495 *op2 = convert_to_mode (mode, *op2, 1);
32496 /* Fall through. */
32497 case E_SFmode:
32498 case E_DFmode:
32499 if (!vfp_compare_operand (*op1, mode))
32500 *op1 = force_reg (mode, *op1);
32501 if (!vfp_compare_operand (*op2, mode))
32502 *op2 = force_reg (mode, *op2);
32503 return true;
32504 default:
32505 break;
32506 }
32507
32508 return false;
32509
32510 }
32511
32512 /* Maximum number of instructions to set block of memory. */
32513 static int
32514 arm_block_set_max_insns (void)
32515 {
32516 if (optimize_function_for_size_p (cfun))
32517 return 4;
32518 else
32519 return current_tune->max_insns_inline_memset;
32520 }
32521
32522 /* Return TRUE if it's profitable to set block of memory for
32523 non-vectorized case. VAL is the value to set the memory
32524 with. LENGTH is the number of bytes to set. ALIGN is the
32525 alignment of the destination memory in bytes. UNALIGNED_P
32526 is TRUE if we can only set the memory with instructions
32527 meeting alignment requirements. USE_STRD_P is TRUE if we
32528 can use strd to set the memory. */
32529 static bool
32530 arm_block_set_non_vect_profit_p (rtx val,
32531 unsigned HOST_WIDE_INT length,
32532 unsigned HOST_WIDE_INT align,
32533 bool unaligned_p, bool use_strd_p)
32534 {
32535 int num = 0;
32536 /* For leftovers in bytes of 0-7, we can set the memory block using
32537 strb/strh/str with minimum instruction number. */
32538 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32539
32540 if (unaligned_p)
32541 {
32542 num = arm_const_inline_cost (SET, val);
32543 num += length / align + length % align;
32544 }
32545 else if (use_strd_p)
32546 {
32547 num = arm_const_double_inline_cost (val);
32548 num += (length >> 3) + leftover[length & 7];
32549 }
32550 else
32551 {
32552 num = arm_const_inline_cost (SET, val);
32553 num += (length >> 2) + leftover[length & 3];
32554 }
32555
32556 /* We may be able to combine last pair STRH/STRB into a single STR
32557 by shifting one byte back. */
32558 if (unaligned_access && length > 3 && (length & 3) == 3)
32559 num--;
32560
32561 return (num <= arm_block_set_max_insns ());
32562 }
32563
32564 /* Return TRUE if it's profitable to set block of memory for
32565 vectorized case. LENGTH is the number of bytes to set.
32566 ALIGN is the alignment of destination memory in bytes.
32567 MODE is the vector mode used to set the memory. */
32568 static bool
32569 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32570 unsigned HOST_WIDE_INT align,
32571 machine_mode mode)
32572 {
32573 int num;
32574 bool unaligned_p = ((align & 3) != 0);
32575 unsigned int nelt = GET_MODE_NUNITS (mode);
32576
32577 /* Instruction loading constant value. */
32578 num = 1;
32579 /* Instructions storing the memory. */
32580 num += (length + nelt - 1) / nelt;
32581 /* Instructions adjusting the address expression. Only need to
32582 adjust address expression if it's 4 bytes aligned and bytes
32583 leftover can only be stored by mis-aligned store instruction. */
32584 if (!unaligned_p && (length & 3) != 0)
32585 num++;
32586
32587 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
32588 if (!unaligned_p && mode == V16QImode)
32589 num--;
32590
32591 return (num <= arm_block_set_max_insns ());
32592 }
32593
32594 /* Set a block of memory using vectorization instructions for the
32595 unaligned case. We fill the first LENGTH bytes of the memory
32596 area starting from DSTBASE with byte constant VALUE. ALIGN is
32597 the alignment requirement of memory. Return TRUE if succeeded. */
32598 static bool
32599 arm_block_set_unaligned_vect (rtx dstbase,
32600 unsigned HOST_WIDE_INT length,
32601 unsigned HOST_WIDE_INT value,
32602 unsigned HOST_WIDE_INT align)
32603 {
32604 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32605 rtx dst, mem;
32606 rtx val_vec, reg;
32607 rtx (*gen_func) (rtx, rtx);
32608 machine_mode mode;
32609 unsigned HOST_WIDE_INT v = value;
32610 unsigned int offset = 0;
32611 gcc_assert ((align & 0x3) != 0);
32612 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32613 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32614 if (length >= nelt_v16)
32615 {
32616 mode = V16QImode;
32617 gen_func = gen_movmisalignv16qi;
32618 }
32619 else
32620 {
32621 mode = V8QImode;
32622 gen_func = gen_movmisalignv8qi;
32623 }
32624 nelt_mode = GET_MODE_NUNITS (mode);
32625 gcc_assert (length >= nelt_mode);
32626 /* Skip if it isn't profitable. */
32627 if (!arm_block_set_vect_profit_p (length, align, mode))
32628 return false;
32629
32630 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32631 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32632
32633 v = sext_hwi (v, BITS_PER_WORD);
32634
32635 reg = gen_reg_rtx (mode);
32636 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32637 /* Emit instruction loading the constant value. */
32638 emit_move_insn (reg, val_vec);
32639
32640 /* Handle nelt_mode bytes in a vector. */
32641 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32642 {
32643 emit_insn ((*gen_func) (mem, reg));
32644 if (i + 2 * nelt_mode <= length)
32645 {
32646 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32647 offset += nelt_mode;
32648 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32649 }
32650 }
32651
32652 /* If there are not less than nelt_v8 bytes leftover, we must be in
32653 V16QI mode. */
32654 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32655
32656 /* Handle (8, 16) bytes leftover. */
32657 if (i + nelt_v8 < length)
32658 {
32659 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32660 offset += length - i;
32661 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32662
32663 /* We are shifting bytes back, set the alignment accordingly. */
32664 if ((length & 1) != 0 && align >= 2)
32665 set_mem_align (mem, BITS_PER_UNIT);
32666
32667 emit_insn (gen_movmisalignv16qi (mem, reg));
32668 }
32669 /* Handle (0, 8] bytes leftover. */
32670 else if (i < length && i + nelt_v8 >= length)
32671 {
32672 if (mode == V16QImode)
32673 reg = gen_lowpart (V8QImode, reg);
32674
32675 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32676 + (nelt_mode - nelt_v8))));
32677 offset += (length - i) + (nelt_mode - nelt_v8);
32678 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32679
32680 /* We are shifting bytes back, set the alignment accordingly. */
32681 if ((length & 1) != 0 && align >= 2)
32682 set_mem_align (mem, BITS_PER_UNIT);
32683
32684 emit_insn (gen_movmisalignv8qi (mem, reg));
32685 }
32686
32687 return true;
32688 }
32689
32690 /* Set a block of memory using vectorization instructions for the
32691 aligned case. We fill the first LENGTH bytes of the memory area
32692 starting from DSTBASE with byte constant VALUE. ALIGN is the
32693 alignment requirement of memory. Return TRUE if succeeded. */
32694 static bool
32695 arm_block_set_aligned_vect (rtx dstbase,
32696 unsigned HOST_WIDE_INT length,
32697 unsigned HOST_WIDE_INT value,
32698 unsigned HOST_WIDE_INT align)
32699 {
32700 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
32701 rtx dst, addr, mem;
32702 rtx val_vec, reg;
32703 machine_mode mode;
32704 unsigned int offset = 0;
32705
32706 gcc_assert ((align & 0x3) == 0);
32707 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32708 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32709 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
32710 mode = V16QImode;
32711 else
32712 mode = V8QImode;
32713
32714 nelt_mode = GET_MODE_NUNITS (mode);
32715 gcc_assert (length >= nelt_mode);
32716 /* Skip if it isn't profitable. */
32717 if (!arm_block_set_vect_profit_p (length, align, mode))
32718 return false;
32719
32720 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32721
32722 reg = gen_reg_rtx (mode);
32723 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
32724 /* Emit instruction loading the constant value. */
32725 emit_move_insn (reg, val_vec);
32726
32727 i = 0;
32728 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
32729 if (mode == V16QImode)
32730 {
32731 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32732 emit_insn (gen_movmisalignv16qi (mem, reg));
32733 i += nelt_mode;
32734 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
32735 if (i + nelt_v8 < length && i + nelt_v16 > length)
32736 {
32737 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32738 offset += length - nelt_mode;
32739 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32740 /* We are shifting bytes back, set the alignment accordingly. */
32741 if ((length & 0x3) == 0)
32742 set_mem_align (mem, BITS_PER_UNIT * 4);
32743 else if ((length & 0x1) == 0)
32744 set_mem_align (mem, BITS_PER_UNIT * 2);
32745 else
32746 set_mem_align (mem, BITS_PER_UNIT);
32747
32748 emit_insn (gen_movmisalignv16qi (mem, reg));
32749 return true;
32750 }
32751 /* Fall through for bytes leftover. */
32752 mode = V8QImode;
32753 nelt_mode = GET_MODE_NUNITS (mode);
32754 reg = gen_lowpart (V8QImode, reg);
32755 }
32756
32757 /* Handle 8 bytes in a vector. */
32758 for (; (i + nelt_mode <= length); i += nelt_mode)
32759 {
32760 addr = plus_constant (Pmode, dst, i);
32761 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
32762 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32763 emit_move_insn (mem, reg);
32764 else
32765 emit_insn (gen_unaligned_storev8qi (mem, reg));
32766 }
32767
32768 /* Handle single word leftover by shifting 4 bytes back. We can
32769 use aligned access for this case. */
32770 if (i + UNITS_PER_WORD == length)
32771 {
32772 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32773 offset += i - UNITS_PER_WORD;
32774 mem = adjust_automodify_address (dstbase, mode, addr, offset);
32775 /* We are shifting 4 bytes back, set the alignment accordingly. */
32776 if (align > UNITS_PER_WORD)
32777 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32778
32779 emit_insn (gen_unaligned_storev8qi (mem, reg));
32780 }
32781 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32782 We have to use unaligned access for this case. */
32783 else if (i < length)
32784 {
32785 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32786 offset += length - nelt_mode;
32787 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32788 /* We are shifting bytes back, set the alignment accordingly. */
32789 if ((length & 1) == 0)
32790 set_mem_align (mem, BITS_PER_UNIT * 2);
32791 else
32792 set_mem_align (mem, BITS_PER_UNIT);
32793
32794 emit_insn (gen_movmisalignv8qi (mem, reg));
32795 }
32796
32797 return true;
32798 }
32799
32800 /* Set a block of memory using plain strh/strb instructions, only
32801 using instructions allowed by ALIGN on processor. We fill the
32802 first LENGTH bytes of the memory area starting from DSTBASE
32803 with byte constant VALUE. ALIGN is the alignment requirement
32804 of memory. */
32805 static bool
32806 arm_block_set_unaligned_non_vect (rtx dstbase,
32807 unsigned HOST_WIDE_INT length,
32808 unsigned HOST_WIDE_INT value,
32809 unsigned HOST_WIDE_INT align)
32810 {
32811 unsigned int i;
32812 rtx dst, addr, mem;
32813 rtx val_exp, val_reg, reg;
32814 machine_mode mode;
32815 HOST_WIDE_INT v = value;
32816
32817 gcc_assert (align == 1 || align == 2);
32818
32819 if (align == 2)
32820 v |= (value << BITS_PER_UNIT);
32821
32822 v = sext_hwi (v, BITS_PER_WORD);
32823 val_exp = GEN_INT (v);
32824 /* Skip if it isn't profitable. */
32825 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32826 align, true, false))
32827 return false;
32828
32829 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32830 mode = (align == 2 ? HImode : QImode);
32831 val_reg = force_reg (SImode, val_exp);
32832 reg = gen_lowpart (mode, val_reg);
32833
32834 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
32835 {
32836 addr = plus_constant (Pmode, dst, i);
32837 mem = adjust_automodify_address (dstbase, mode, addr, i);
32838 emit_move_insn (mem, reg);
32839 }
32840
32841 /* Handle single byte leftover. */
32842 if (i + 1 == length)
32843 {
32844 reg = gen_lowpart (QImode, val_reg);
32845 addr = plus_constant (Pmode, dst, i);
32846 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32847 emit_move_insn (mem, reg);
32848 i++;
32849 }
32850
32851 gcc_assert (i == length);
32852 return true;
32853 }
32854
32855 /* Set a block of memory using plain strd/str/strh/strb instructions,
32856 to permit unaligned copies on processors which support unaligned
32857 semantics for those instructions. We fill the first LENGTH bytes
32858 of the memory area starting from DSTBASE with byte constant VALUE.
32859 ALIGN is the alignment requirement of memory. */
32860 static bool
32861 arm_block_set_aligned_non_vect (rtx dstbase,
32862 unsigned HOST_WIDE_INT length,
32863 unsigned HOST_WIDE_INT value,
32864 unsigned HOST_WIDE_INT align)
32865 {
32866 unsigned int i;
32867 rtx dst, addr, mem;
32868 rtx val_exp, val_reg, reg;
32869 unsigned HOST_WIDE_INT v;
32870 bool use_strd_p;
32871
32872 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32873 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32874
32875 v = (value | (value << 8) | (value << 16) | (value << 24));
32876 if (length < UNITS_PER_WORD)
32877 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32878
32879 if (use_strd_p)
32880 v |= (v << BITS_PER_WORD);
32881 else
32882 v = sext_hwi (v, BITS_PER_WORD);
32883
32884 val_exp = GEN_INT (v);
32885 /* Skip if it isn't profitable. */
32886 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32887 align, false, use_strd_p))
32888 {
32889 if (!use_strd_p)
32890 return false;
32891
32892 /* Try without strd. */
32893 v = (v >> BITS_PER_WORD);
32894 v = sext_hwi (v, BITS_PER_WORD);
32895 val_exp = GEN_INT (v);
32896 use_strd_p = false;
32897 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32898 align, false, use_strd_p))
32899 return false;
32900 }
32901
32902 i = 0;
32903 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32904 /* Handle double words using strd if possible. */
32905 if (use_strd_p)
32906 {
32907 val_reg = force_reg (DImode, val_exp);
32908 reg = val_reg;
32909 for (; (i + 8 <= length); i += 8)
32910 {
32911 addr = plus_constant (Pmode, dst, i);
32912 mem = adjust_automodify_address (dstbase, DImode, addr, i);
32913 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32914 emit_move_insn (mem, reg);
32915 else
32916 emit_insn (gen_unaligned_storedi (mem, reg));
32917 }
32918 }
32919 else
32920 val_reg = force_reg (SImode, val_exp);
32921
32922 /* Handle words. */
32923 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32924 for (; (i + 4 <= length); i += 4)
32925 {
32926 addr = plus_constant (Pmode, dst, i);
32927 mem = adjust_automodify_address (dstbase, SImode, addr, i);
32928 if ((align & 3) == 0)
32929 emit_move_insn (mem, reg);
32930 else
32931 emit_insn (gen_unaligned_storesi (mem, reg));
32932 }
32933
32934 /* Merge last pair of STRH and STRB into a STR if possible. */
32935 if (unaligned_access && i > 0 && (i + 3) == length)
32936 {
32937 addr = plus_constant (Pmode, dst, i - 1);
32938 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32939 /* We are shifting one byte back, set the alignment accordingly. */
32940 if ((align & 1) == 0)
32941 set_mem_align (mem, BITS_PER_UNIT);
32942
32943 /* Most likely this is an unaligned access, and we can't tell at
32944 compilation time. */
32945 emit_insn (gen_unaligned_storesi (mem, reg));
32946 return true;
32947 }
32948
32949 /* Handle half word leftover. */
32950 if (i + 2 <= length)
32951 {
32952 reg = gen_lowpart (HImode, val_reg);
32953 addr = plus_constant (Pmode, dst, i);
32954 mem = adjust_automodify_address (dstbase, HImode, addr, i);
32955 if ((align & 1) == 0)
32956 emit_move_insn (mem, reg);
32957 else
32958 emit_insn (gen_unaligned_storehi (mem, reg));
32959
32960 i += 2;
32961 }
32962
32963 /* Handle single byte leftover. */
32964 if (i + 1 == length)
32965 {
32966 reg = gen_lowpart (QImode, val_reg);
32967 addr = plus_constant (Pmode, dst, i);
32968 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32969 emit_move_insn (mem, reg);
32970 }
32971
32972 return true;
32973 }
32974
32975 /* Set a block of memory using vectorization instructions for both
32976 aligned and unaligned cases. We fill the first LENGTH bytes of
32977 the memory area starting from DSTBASE with byte constant VALUE.
32978 ALIGN is the alignment requirement of memory. */
32979 static bool
32980 arm_block_set_vect (rtx dstbase,
32981 unsigned HOST_WIDE_INT length,
32982 unsigned HOST_WIDE_INT value,
32983 unsigned HOST_WIDE_INT align)
32984 {
32985 /* Check whether we need to use unaligned store instruction. */
32986 if (((align & 3) != 0 || (length & 3) != 0)
32987 /* Check whether unaligned store instruction is available. */
32988 && (!unaligned_access || BYTES_BIG_ENDIAN))
32989 return false;
32990
32991 if ((align & 3) == 0)
32992 return arm_block_set_aligned_vect (dstbase, length, value, align);
32993 else
32994 return arm_block_set_unaligned_vect (dstbase, length, value, align);
32995 }
32996
32997 /* Expand string store operation. Firstly we try to do that by using
32998 vectorization instructions, then try with ARM unaligned access and
32999 double-word store if profitable. OPERANDS[0] is the destination,
33000 OPERANDS[1] is the number of bytes, operands[2] is the value to
33001 initialize the memory, OPERANDS[3] is the known alignment of the
33002 destination. */
33003 bool
33004 arm_gen_setmem (rtx *operands)
33005 {
33006 rtx dstbase = operands[0];
33007 unsigned HOST_WIDE_INT length;
33008 unsigned HOST_WIDE_INT value;
33009 unsigned HOST_WIDE_INT align;
33010
33011 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
33012 return false;
33013
33014 length = UINTVAL (operands[1]);
33015 if (length > 64)
33016 return false;
33017
33018 value = (UINTVAL (operands[2]) & 0xFF);
33019 align = UINTVAL (operands[3]);
33020 if (TARGET_NEON && length >= 8
33021 && current_tune->string_ops_prefer_neon
33022 && arm_block_set_vect (dstbase, length, value, align))
33023 return true;
33024
33025 if (!unaligned_access && (align & 3) != 0)
33026 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
33027
33028 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
33029 }
33030
33031
33032 static bool
33033 arm_macro_fusion_p (void)
33034 {
33035 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
33036 }
33037
33038 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
33039 for MOVW / MOVT macro fusion. */
33040
33041 static bool
33042 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
33043 {
33044 /* We are trying to fuse
33045 movw imm / movt imm
33046 instructions as a group that gets scheduled together. */
33047
33048 rtx set_dest = SET_DEST (curr_set);
33049
33050 if (GET_MODE (set_dest) != SImode)
33051 return false;
33052
33053 /* We are trying to match:
33054 prev (movw) == (set (reg r0) (const_int imm16))
33055 curr (movt) == (set (zero_extract (reg r0)
33056 (const_int 16)
33057 (const_int 16))
33058 (const_int imm16_1))
33059 or
33060 prev (movw) == (set (reg r1)
33061 (high (symbol_ref ("SYM"))))
33062 curr (movt) == (set (reg r0)
33063 (lo_sum (reg r1)
33064 (symbol_ref ("SYM")))) */
33065
33066 if (GET_CODE (set_dest) == ZERO_EXTRACT)
33067 {
33068 if (CONST_INT_P (SET_SRC (curr_set))
33069 && CONST_INT_P (SET_SRC (prev_set))
33070 && REG_P (XEXP (set_dest, 0))
33071 && REG_P (SET_DEST (prev_set))
33072 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
33073 return true;
33074
33075 }
33076 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
33077 && REG_P (SET_DEST (curr_set))
33078 && REG_P (SET_DEST (prev_set))
33079 && GET_CODE (SET_SRC (prev_set)) == HIGH
33080 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
33081 return true;
33082
33083 return false;
33084 }
33085
33086 static bool
33087 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
33088 {
33089 rtx prev_set = single_set (prev);
33090 rtx curr_set = single_set (curr);
33091
33092 if (!prev_set
33093 || !curr_set)
33094 return false;
33095
33096 if (any_condjump_p (curr))
33097 return false;
33098
33099 if (!arm_macro_fusion_p ())
33100 return false;
33101
33102 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
33103 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
33104 return true;
33105
33106 return false;
33107 }
33108
33109 /* Return true iff the instruction fusion described by OP is enabled. */
33110 bool
33111 arm_fusion_enabled_p (tune_params::fuse_ops op)
33112 {
33113 return current_tune->fusible_ops & op;
33114 }
33115
33116 /* Return TRUE if return address signing mechanism is enabled. */
33117 bool
33118 arm_current_function_pac_enabled_p (void)
33119 {
33120 return (aarch_ra_sign_scope == AARCH_FUNCTION_ALL
33121 || (aarch_ra_sign_scope == AARCH_FUNCTION_NON_LEAF
33122 && !crtl->is_leaf));
33123 }
33124
33125 /* Return TRUE if Branch Target Identification Mechanism is enabled. */
33126 static bool
33127 aarch_bti_enabled ()
33128 {
33129 return false;
33130 }
33131
33132 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
33133 scheduled for speculative execution. Reject the long-running division
33134 and square-root instructions. */
33135
33136 static bool
33137 arm_sched_can_speculate_insn (rtx_insn *insn)
33138 {
33139 switch (get_attr_type (insn))
33140 {
33141 case TYPE_SDIV:
33142 case TYPE_UDIV:
33143 case TYPE_FDIVS:
33144 case TYPE_FDIVD:
33145 case TYPE_FSQRTS:
33146 case TYPE_FSQRTD:
33147 case TYPE_NEON_FP_SQRT_S:
33148 case TYPE_NEON_FP_SQRT_D:
33149 case TYPE_NEON_FP_SQRT_S_Q:
33150 case TYPE_NEON_FP_SQRT_D_Q:
33151 case TYPE_NEON_FP_DIV_S:
33152 case TYPE_NEON_FP_DIV_D:
33153 case TYPE_NEON_FP_DIV_S_Q:
33154 case TYPE_NEON_FP_DIV_D_Q:
33155 return false;
33156 default:
33157 return true;
33158 }
33159 }
33160
33161 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
33162
33163 static unsigned HOST_WIDE_INT
33164 arm_asan_shadow_offset (void)
33165 {
33166 return HOST_WIDE_INT_1U << 29;
33167 }
33168
33169
33170 /* This is a temporary fix for PR60655. Ideally we need
33171 to handle most of these cases in the generic part but
33172 currently we reject minus (..) (sym_ref). We try to
33173 ameliorate the case with minus (sym_ref1) (sym_ref2)
33174 where they are in the same section. */
33175
33176 static bool
33177 arm_const_not_ok_for_debug_p (rtx p)
33178 {
33179 tree decl_op0 = NULL;
33180 tree decl_op1 = NULL;
33181
33182 if (GET_CODE (p) == UNSPEC)
33183 return true;
33184 if (GET_CODE (p) == MINUS)
33185 {
33186 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
33187 {
33188 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
33189 if (decl_op1
33190 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
33191 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
33192 {
33193 if ((VAR_P (decl_op1)
33194 || TREE_CODE (decl_op1) == CONST_DECL)
33195 && (VAR_P (decl_op0)
33196 || TREE_CODE (decl_op0) == CONST_DECL))
33197 return (get_variable_section (decl_op1, false)
33198 != get_variable_section (decl_op0, false));
33199
33200 if (TREE_CODE (decl_op1) == LABEL_DECL
33201 && TREE_CODE (decl_op0) == LABEL_DECL)
33202 return (DECL_CONTEXT (decl_op1)
33203 != DECL_CONTEXT (decl_op0));
33204 }
33205
33206 return true;
33207 }
33208 }
33209
33210 return false;
33211 }
33212
33213 /* return TRUE if x is a reference to a value in a constant pool */
33214 extern bool
33215 arm_is_constant_pool_ref (rtx x)
33216 {
33217 return (MEM_P (x)
33218 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
33219 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
33220 }
33221
33222 /* Remember the last target of arm_set_current_function. */
33223 static GTY(()) tree arm_previous_fndecl;
33224
33225 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
33226
33227 void
33228 save_restore_target_globals (tree new_tree)
33229 {
33230 /* If we have a previous state, use it. */
33231 if (TREE_TARGET_GLOBALS (new_tree))
33232 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33233 else if (new_tree == target_option_default_node)
33234 restore_target_globals (&default_target_globals);
33235 else
33236 {
33237 /* Call target_reinit and save the state for TARGET_GLOBALS. */
33238 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
33239 }
33240
33241 arm_option_params_internal ();
33242 }
33243
33244 /* Invalidate arm_previous_fndecl. */
33245
33246 void
33247 arm_reset_previous_fndecl (void)
33248 {
33249 arm_previous_fndecl = NULL_TREE;
33250 }
33251
33252 /* Establish appropriate back-end context for processing the function
33253 FNDECL. The argument might be NULL to indicate processing at top
33254 level, outside of any function scope. */
33255
33256 static void
33257 arm_set_current_function (tree fndecl)
33258 {
33259 if (!fndecl || fndecl == arm_previous_fndecl)
33260 return;
33261
33262 tree old_tree = (arm_previous_fndecl
33263 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
33264 : NULL_TREE);
33265
33266 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33267
33268 /* If current function has no attributes but previous one did,
33269 use the default node. */
33270 if (! new_tree && old_tree)
33271 new_tree = target_option_default_node;
33272
33273 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
33274 the default have been handled by save_restore_target_globals from
33275 arm_pragma_target_parse. */
33276 if (old_tree == new_tree)
33277 return;
33278
33279 arm_previous_fndecl = fndecl;
33280
33281 /* First set the target options. */
33282 cl_target_option_restore (&global_options, &global_options_set,
33283 TREE_TARGET_OPTION (new_tree));
33284
33285 save_restore_target_globals (new_tree);
33286
33287 arm_override_options_after_change_1 (&global_options, &global_options_set);
33288 }
33289
33290 /* Implement TARGET_OPTION_PRINT. */
33291
33292 static void
33293 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
33294 {
33295 int flags = ptr->x_target_flags;
33296 const char *fpu_name;
33297
33298 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
33299 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
33300
33301 fprintf (file, "%*sselected isa %s\n", indent, "",
33302 TARGET_THUMB2_P (flags) ? "thumb2" :
33303 TARGET_THUMB_P (flags) ? "thumb1" :
33304 "arm");
33305
33306 if (ptr->x_arm_arch_string)
33307 fprintf (file, "%*sselected architecture %s\n", indent, "",
33308 ptr->x_arm_arch_string);
33309
33310 if (ptr->x_arm_cpu_string)
33311 fprintf (file, "%*sselected CPU %s\n", indent, "",
33312 ptr->x_arm_cpu_string);
33313
33314 if (ptr->x_arm_tune_string)
33315 fprintf (file, "%*sselected tune %s\n", indent, "",
33316 ptr->x_arm_tune_string);
33317
33318 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
33319 }
33320
33321 /* Hook to determine if one function can safely inline another. */
33322
33323 static bool
33324 arm_can_inline_p (tree caller, tree callee)
33325 {
33326 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33327 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33328 bool can_inline = true;
33329
33330 struct cl_target_option *caller_opts
33331 = TREE_TARGET_OPTION (caller_tree ? caller_tree
33332 : target_option_default_node);
33333
33334 struct cl_target_option *callee_opts
33335 = TREE_TARGET_OPTION (callee_tree ? callee_tree
33336 : target_option_default_node);
33337
33338 if (callee_opts == caller_opts)
33339 return true;
33340
33341 /* Callee's ISA features should be a subset of the caller's. */
33342 struct arm_build_target caller_target;
33343 struct arm_build_target callee_target;
33344 caller_target.isa = sbitmap_alloc (isa_num_bits);
33345 callee_target.isa = sbitmap_alloc (isa_num_bits);
33346
33347 arm_configure_build_target (&caller_target, caller_opts, false);
33348 arm_configure_build_target (&callee_target, callee_opts, false);
33349 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
33350 can_inline = false;
33351
33352 sbitmap_free (caller_target.isa);
33353 sbitmap_free (callee_target.isa);
33354
33355 /* OK to inline between different modes.
33356 Function with mode specific instructions, e.g using asm,
33357 must be explicitly protected with noinline. */
33358 return can_inline;
33359 }
33360
33361 /* Hook to fix function's alignment affected by target attribute. */
33362
33363 static void
33364 arm_relayout_function (tree fndecl)
33365 {
33366 if (DECL_USER_ALIGN (fndecl))
33367 return;
33368
33369 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33370
33371 if (!callee_tree)
33372 callee_tree = target_option_default_node;
33373
33374 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
33375 SET_DECL_ALIGN
33376 (fndecl,
33377 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
33378 }
33379
33380 /* Inner function to process the attribute((target(...))), take an argument and
33381 set the current options from the argument. If we have a list, recursively
33382 go over the list. */
33383
33384 static bool
33385 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
33386 {
33387 if (TREE_CODE (args) == TREE_LIST)
33388 {
33389 bool ret = true;
33390
33391 for (; args; args = TREE_CHAIN (args))
33392 if (TREE_VALUE (args)
33393 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
33394 ret = false;
33395 return ret;
33396 }
33397
33398 else if (TREE_CODE (args) != STRING_CST)
33399 {
33400 error ("attribute %<target%> argument not a string");
33401 return false;
33402 }
33403
33404 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
33405 char *q;
33406
33407 while ((q = strtok (argstr, ",")) != NULL)
33408 {
33409 argstr = NULL;
33410 if (!strcmp (q, "thumb"))
33411 {
33412 opts->x_target_flags |= MASK_THUMB;
33413 if (TARGET_FDPIC && !arm_arch_thumb2)
33414 sorry ("FDPIC mode is not supported in Thumb-1 mode");
33415 }
33416
33417 else if (!strcmp (q, "arm"))
33418 opts->x_target_flags &= ~MASK_THUMB;
33419
33420 else if (!strcmp (q, "general-regs-only"))
33421 opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
33422
33423 else if (startswith (q, "fpu="))
33424 {
33425 int fpu_index;
33426 if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
33427 &fpu_index, CL_TARGET))
33428 {
33429 error ("invalid fpu for target attribute or pragma %qs", q);
33430 return false;
33431 }
33432 if (fpu_index == TARGET_FPU_auto)
33433 {
33434 /* This doesn't really make sense until we support
33435 general dynamic selection of the architecture and all
33436 sub-features. */
33437 sorry ("auto fpu selection not currently permitted here");
33438 return false;
33439 }
33440 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
33441 }
33442 else if (startswith (q, "arch="))
33443 {
33444 char *arch = q + 5;
33445 const arch_option *arm_selected_arch
33446 = arm_parse_arch_option_name (all_architectures, "arch", arch);
33447
33448 if (!arm_selected_arch)
33449 {
33450 error ("invalid architecture for target attribute or pragma %qs",
33451 q);
33452 return false;
33453 }
33454
33455 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
33456 }
33457 else if (q[0] == '+')
33458 {
33459 opts->x_arm_arch_string
33460 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
33461 }
33462 else
33463 {
33464 error ("unknown target attribute or pragma %qs", q);
33465 return false;
33466 }
33467 }
33468
33469 return true;
33470 }
33471
33472 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
33473
33474 tree
33475 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
33476 struct gcc_options *opts_set)
33477 {
33478 struct cl_target_option cl_opts;
33479
33480 if (!arm_valid_target_attribute_rec (args, opts))
33481 return NULL_TREE;
33482
33483 cl_target_option_save (&cl_opts, opts, opts_set);
33484 arm_configure_build_target (&arm_active_target, &cl_opts, false);
33485 arm_option_check_internal (opts);
33486 /* Do any overrides, such as global options arch=xxx.
33487 We do this since arm_active_target was overridden. */
33488 arm_option_reconfigure_globals ();
33489 arm_options_perform_arch_sanity_checks ();
33490 arm_option_override_internal (opts, opts_set);
33491
33492 return build_target_option_node (opts, opts_set);
33493 }
33494
33495 static void
33496 add_attribute (const char * mode, tree *attributes)
33497 {
33498 size_t len = strlen (mode);
33499 tree value = build_string (len, mode);
33500
33501 TREE_TYPE (value) = build_array_type (char_type_node,
33502 build_index_type (size_int (len)));
33503
33504 *attributes = tree_cons (get_identifier ("target"),
33505 build_tree_list (NULL_TREE, value),
33506 *attributes);
33507 }
33508
33509 /* For testing. Insert thumb or arm modes alternatively on functions. */
33510
33511 static void
33512 arm_insert_attributes (tree fndecl, tree * attributes)
33513 {
33514 const char *mode;
33515
33516 if (! TARGET_FLIP_THUMB)
33517 return;
33518
33519 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33520 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33521 return;
33522
33523 /* Nested definitions must inherit mode. */
33524 if (current_function_decl)
33525 {
33526 mode = TARGET_THUMB ? "thumb" : "arm";
33527 add_attribute (mode, attributes);
33528 return;
33529 }
33530
33531 /* If there is already a setting don't change it. */
33532 if (lookup_attribute ("target", *attributes) != NULL)
33533 return;
33534
33535 mode = thumb_flipper ? "thumb" : "arm";
33536 add_attribute (mode, attributes);
33537
33538 thumb_flipper = !thumb_flipper;
33539 }
33540
33541 /* Hook to validate attribute((target("string"))). */
33542
33543 static bool
33544 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33545 tree args, int ARG_UNUSED (flags))
33546 {
33547 bool ret = true;
33548 struct gcc_options func_options, func_options_set;
33549 tree cur_tree, new_optimize;
33550 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33551
33552 /* Get the optimization options of the current function. */
33553 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33554
33555 /* If the function changed the optimization levels as well as setting target
33556 options, start with the optimizations specified. */
33557 if (!func_optimize)
33558 func_optimize = optimization_default_node;
33559
33560 /* Init func_options. */
33561 memset (&func_options, 0, sizeof (func_options));
33562 init_options_struct (&func_options, NULL);
33563 lang_hooks.init_options_struct (&func_options);
33564 memset (&func_options_set, 0, sizeof (func_options_set));
33565
33566 /* Initialize func_options to the defaults. */
33567 cl_optimization_restore (&func_options, &func_options_set,
33568 TREE_OPTIMIZATION (func_optimize));
33569
33570 cl_target_option_restore (&func_options, &func_options_set,
33571 TREE_TARGET_OPTION (target_option_default_node));
33572
33573 /* Set func_options flags with new target mode. */
33574 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33575 &func_options_set);
33576
33577 if (cur_tree == NULL_TREE)
33578 ret = false;
33579
33580 new_optimize = build_optimization_node (&func_options, &func_options_set);
33581
33582 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33583
33584 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33585
33586 return ret;
33587 }
33588
33589 /* Match an ISA feature bitmap to a named FPU. We always use the
33590 first entry that exactly matches the feature set, so that we
33591 effectively canonicalize the FPU name for the assembler. */
33592 static const char*
33593 arm_identify_fpu_from_isa (sbitmap isa)
33594 {
33595 auto_sbitmap fpubits (isa_num_bits);
33596 auto_sbitmap cand_fpubits (isa_num_bits);
33597
33598 bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33599
33600 /* If there are no ISA feature bits relating to the FPU, we must be
33601 doing soft-float. */
33602 if (bitmap_empty_p (fpubits))
33603 return "softvfp";
33604
33605 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33606 {
33607 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33608 if (bitmap_equal_p (fpubits, cand_fpubits))
33609 return all_fpus[i].name;
33610 }
33611 /* We must find an entry, or things have gone wrong. */
33612 gcc_unreachable ();
33613 }
33614
33615 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
33616 by the function fndecl. */
33617 void
33618 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33619 {
33620 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
33621
33622 struct cl_target_option *targ_options;
33623 if (target_parts)
33624 targ_options = TREE_TARGET_OPTION (target_parts);
33625 else
33626 targ_options = TREE_TARGET_OPTION (target_option_current_node);
33627 gcc_assert (targ_options);
33628
33629 arm_print_asm_arch_directives (stream, targ_options);
33630
33631 fprintf (stream, "\t.syntax unified\n");
33632
33633 if (TARGET_THUMB)
33634 {
33635 if (is_called_in_ARM_mode (decl)
33636 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
33637 && cfun->is_thunk))
33638 fprintf (stream, "\t.code 32\n");
33639 else if (TARGET_THUMB1)
33640 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
33641 else
33642 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
33643 }
33644 else
33645 fprintf (stream, "\t.arm\n");
33646
33647 if (TARGET_POKE_FUNCTION_NAME)
33648 arm_poke_function_name (stream, (const char *) name);
33649 }
33650
33651 /* If MEM is in the form of [base+offset], extract the two parts
33652 of address and set to BASE and OFFSET, otherwise return false
33653 after clearing BASE and OFFSET. */
33654
33655 static bool
33656 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
33657 {
33658 rtx addr;
33659
33660 gcc_assert (MEM_P (mem));
33661
33662 addr = XEXP (mem, 0);
33663
33664 /* Strip off const from addresses like (const (addr)). */
33665 if (GET_CODE (addr) == CONST)
33666 addr = XEXP (addr, 0);
33667
33668 if (REG_P (addr))
33669 {
33670 *base = addr;
33671 *offset = const0_rtx;
33672 return true;
33673 }
33674
33675 if (GET_CODE (addr) == PLUS
33676 && GET_CODE (XEXP (addr, 0)) == REG
33677 && CONST_INT_P (XEXP (addr, 1)))
33678 {
33679 *base = XEXP (addr, 0);
33680 *offset = XEXP (addr, 1);
33681 return true;
33682 }
33683
33684 *base = NULL_RTX;
33685 *offset = NULL_RTX;
33686
33687 return false;
33688 }
33689
33690 /* If INSN is a load or store of address in the form of [base+offset],
33691 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
33692 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
33693 otherwise return FALSE. */
33694
33695 static bool
33696 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
33697 {
33698 rtx x, dest, src;
33699
33700 gcc_assert (INSN_P (insn));
33701 x = PATTERN (insn);
33702 if (GET_CODE (x) != SET)
33703 return false;
33704
33705 src = SET_SRC (x);
33706 dest = SET_DEST (x);
33707 if (REG_P (src) && MEM_P (dest))
33708 {
33709 *is_load = false;
33710 extract_base_offset_in_addr (dest, base, offset);
33711 }
33712 else if (MEM_P (src) && REG_P (dest))
33713 {
33714 *is_load = true;
33715 extract_base_offset_in_addr (src, base, offset);
33716 }
33717 else
33718 return false;
33719
33720 return (*base != NULL_RTX && *offset != NULL_RTX);
33721 }
33722
33723 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33724
33725 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33726 and PRI are only calculated for these instructions. For other instruction,
33727 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
33728 instruction fusion can be supported by returning different priorities.
33729
33730 It's important that irrelevant instructions get the largest FUSION_PRI. */
33731
33732 static void
33733 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33734 int *fusion_pri, int *pri)
33735 {
33736 int tmp, off_val;
33737 bool is_load;
33738 rtx base, offset;
33739
33740 gcc_assert (INSN_P (insn));
33741
33742 tmp = max_pri - 1;
33743 if (!fusion_load_store (insn, &base, &offset, &is_load))
33744 {
33745 *pri = tmp;
33746 *fusion_pri = tmp;
33747 return;
33748 }
33749
33750 /* Load goes first. */
33751 if (is_load)
33752 *fusion_pri = tmp - 1;
33753 else
33754 *fusion_pri = tmp - 2;
33755
33756 tmp /= 2;
33757
33758 /* INSN with smaller base register goes first. */
33759 tmp -= ((REGNO (base) & 0xff) << 20);
33760
33761 /* INSN with smaller offset goes first. */
33762 off_val = (int)(INTVAL (offset));
33763 if (off_val >= 0)
33764 tmp -= (off_val & 0xfffff);
33765 else
33766 tmp += ((- off_val) & 0xfffff);
33767
33768 *pri = tmp;
33769 return;
33770 }
33771
33772
33773 /* Construct and return a PARALLEL RTX vector with elements numbering the
33774 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
33775 the vector - from the perspective of the architecture. This does not
33776 line up with GCC's perspective on lane numbers, so we end up with
33777 different masks depending on our target endian-ness. The diagram
33778 below may help. We must draw the distinction when building masks
33779 which select one half of the vector. An instruction selecting
33780 architectural low-lanes for a big-endian target, must be described using
33781 a mask selecting GCC high-lanes.
33782
33783 Big-Endian Little-Endian
33784
33785 GCC 0 1 2 3 3 2 1 0
33786 | x | x | x | x | | x | x | x | x |
33787 Architecture 3 2 1 0 3 2 1 0
33788
33789 Low Mask: { 2, 3 } { 0, 1 }
33790 High Mask: { 0, 1 } { 2, 3 }
33791 */
33792
33793 rtx
33794 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
33795 {
33796 int nunits = GET_MODE_NUNITS (mode);
33797 rtvec v = rtvec_alloc (nunits / 2);
33798 int high_base = nunits / 2;
33799 int low_base = 0;
33800 int base;
33801 rtx t1;
33802 int i;
33803
33804 if (BYTES_BIG_ENDIAN)
33805 base = high ? low_base : high_base;
33806 else
33807 base = high ? high_base : low_base;
33808
33809 for (i = 0; i < nunits / 2; i++)
33810 RTVEC_ELT (v, i) = GEN_INT (base + i);
33811
33812 t1 = gen_rtx_PARALLEL (mode, v);
33813 return t1;
33814 }
33815
33816 /* Check OP for validity as a PARALLEL RTX vector with elements
33817 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
33818 from the perspective of the architecture. See the diagram above
33819 arm_simd_vect_par_cnst_half_p for more details. */
33820
33821 bool
33822 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
33823 bool high)
33824 {
33825 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
33826 HOST_WIDE_INT count_op = XVECLEN (op, 0);
33827 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
33828 int i = 0;
33829
33830 if (!VECTOR_MODE_P (mode))
33831 return false;
33832
33833 if (count_op != count_ideal)
33834 return false;
33835
33836 for (i = 0; i < count_ideal; i++)
33837 {
33838 rtx elt_op = XVECEXP (op, 0, i);
33839 rtx elt_ideal = XVECEXP (ideal, 0, i);
33840
33841 if (!CONST_INT_P (elt_op)
33842 || INTVAL (elt_ideal) != INTVAL (elt_op))
33843 return false;
33844 }
33845 return true;
33846 }
33847
33848 /* Can output mi_thunk for all cases except for non-zero vcall_offset
33849 in Thumb1. */
33850 static bool
33851 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
33852 const_tree)
33853 {
33854 /* For now, we punt and not handle this for TARGET_THUMB1. */
33855 if (vcall_offset && TARGET_THUMB1)
33856 return false;
33857
33858 /* Otherwise ok. */
33859 return true;
33860 }
33861
33862 /* Generate RTL for a conditional branch with rtx comparison CODE in
33863 mode CC_MODE. The destination of the unlikely conditional branch
33864 is LABEL_REF. */
33865
33866 void
33867 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
33868 rtx label_ref)
33869 {
33870 rtx x;
33871 x = gen_rtx_fmt_ee (code, VOIDmode,
33872 gen_rtx_REG (cc_mode, CC_REGNUM),
33873 const0_rtx);
33874
33875 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
33876 gen_rtx_LABEL_REF (VOIDmode, label_ref),
33877 pc_rtx);
33878 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
33879 }
33880
33881 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
33882
33883 For pure-code sections there is no letter code for this attribute, so
33884 output all the section flags numerically when this is needed. */
33885
33886 static bool
33887 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
33888 {
33889
33890 if (flags & SECTION_ARM_PURECODE)
33891 {
33892 *num = 0x20000000;
33893
33894 if (!(flags & SECTION_DEBUG))
33895 *num |= 0x2;
33896 if (flags & SECTION_EXCLUDE)
33897 *num |= 0x80000000;
33898 if (flags & SECTION_WRITE)
33899 *num |= 0x1;
33900 if (flags & SECTION_CODE)
33901 *num |= 0x4;
33902 if (flags & SECTION_MERGE)
33903 *num |= 0x10;
33904 if (flags & SECTION_STRINGS)
33905 *num |= 0x20;
33906 if (flags & SECTION_TLS)
33907 *num |= 0x400;
33908 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
33909 *num |= 0x200;
33910
33911 return true;
33912 }
33913
33914 return false;
33915 }
33916
33917 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
33918
33919 If pure-code is passed as an option, make sure all functions are in
33920 sections that have the SHF_ARM_PURECODE attribute. */
33921
33922 static section *
33923 arm_function_section (tree decl, enum node_frequency freq,
33924 bool startup, bool exit)
33925 {
33926 const char * section_name;
33927 section * sec;
33928
33929 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
33930 return default_function_section (decl, freq, startup, exit);
33931
33932 if (!target_pure_code)
33933 return default_function_section (decl, freq, startup, exit);
33934
33935
33936 section_name = DECL_SECTION_NAME (decl);
33937
33938 /* If a function is not in a named section then it falls under the 'default'
33939 text section, also known as '.text'. We can preserve previous behavior as
33940 the default text section already has the SHF_ARM_PURECODE section
33941 attribute. */
33942 if (!section_name)
33943 {
33944 section *default_sec = default_function_section (decl, freq, startup,
33945 exit);
33946
33947 /* If default_sec is not null, then it must be a special section like for
33948 example .text.startup. We set the pure-code attribute and return the
33949 same section to preserve existing behavior. */
33950 if (default_sec)
33951 default_sec->common.flags |= SECTION_ARM_PURECODE;
33952 return default_sec;
33953 }
33954
33955 /* Otherwise look whether a section has already been created with
33956 'section_name'. */
33957 sec = get_named_section (decl, section_name, 0);
33958 if (!sec)
33959 /* If that is not the case passing NULL as the section's name to
33960 'get_named_section' will create a section with the declaration's
33961 section name. */
33962 sec = get_named_section (decl, NULL, 0);
33963
33964 /* Set the SHF_ARM_PURECODE attribute. */
33965 sec->common.flags |= SECTION_ARM_PURECODE;
33966
33967 return sec;
33968 }
33969
33970 /* Implements the TARGET_SECTION_FLAGS hook.
33971
33972 If DECL is a function declaration and pure-code is passed as an option
33973 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
33974 section's name and RELOC indicates whether the declarations initializer may
33975 contain runtime relocations. */
33976
33977 static unsigned int
33978 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
33979 {
33980 unsigned int flags = default_section_type_flags (decl, name, reloc);
33981
33982 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
33983 flags |= SECTION_ARM_PURECODE;
33984
33985 return flags;
33986 }
33987
33988 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
33989
33990 static void
33991 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
33992 rtx op0, rtx op1,
33993 rtx *quot_p, rtx *rem_p)
33994 {
33995 if (mode == SImode)
33996 gcc_assert (!TARGET_IDIV);
33997
33998 scalar_int_mode libval_mode
33999 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
34000
34001 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
34002 libval_mode, op0, mode, op1, mode);
34003
34004 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
34005 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
34006 GET_MODE_SIZE (mode));
34007
34008 gcc_assert (quotient);
34009 gcc_assert (remainder);
34010
34011 *quot_p = quotient;
34012 *rem_p = remainder;
34013 }
34014
34015 /* This function checks for the availability of the coprocessor builtin passed
34016 in BUILTIN for the current target. Returns true if it is available and
34017 false otherwise. If a BUILTIN is passed for which this function has not
34018 been implemented it will cause an exception. */
34019
34020 bool
34021 arm_coproc_builtin_available (enum unspecv builtin)
34022 {
34023 /* None of these builtins are available in Thumb mode if the target only
34024 supports Thumb-1. */
34025 if (TARGET_THUMB1)
34026 return false;
34027
34028 switch (builtin)
34029 {
34030 case VUNSPEC_CDP:
34031 case VUNSPEC_LDC:
34032 case VUNSPEC_LDCL:
34033 case VUNSPEC_STC:
34034 case VUNSPEC_STCL:
34035 case VUNSPEC_MCR:
34036 case VUNSPEC_MRC:
34037 if (arm_arch4)
34038 return true;
34039 break;
34040 case VUNSPEC_CDP2:
34041 case VUNSPEC_LDC2:
34042 case VUNSPEC_LDC2L:
34043 case VUNSPEC_STC2:
34044 case VUNSPEC_STC2L:
34045 case VUNSPEC_MCR2:
34046 case VUNSPEC_MRC2:
34047 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
34048 ARMv8-{A,M}. */
34049 if (arm_arch5t)
34050 return true;
34051 break;
34052 case VUNSPEC_MCRR:
34053 case VUNSPEC_MRRC:
34054 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
34055 ARMv8-{A,M}. */
34056 if (arm_arch6 || arm_arch5te)
34057 return true;
34058 break;
34059 case VUNSPEC_MCRR2:
34060 case VUNSPEC_MRRC2:
34061 if (arm_arch6)
34062 return true;
34063 break;
34064 default:
34065 gcc_unreachable ();
34066 }
34067 return false;
34068 }
34069
34070 /* This function returns true if OP is a valid memory operand for the ldc and
34071 stc coprocessor instructions and false otherwise. */
34072
34073 bool
34074 arm_coproc_ldc_stc_legitimate_address (rtx op)
34075 {
34076 HOST_WIDE_INT range;
34077 /* Has to be a memory operand. */
34078 if (!MEM_P (op))
34079 return false;
34080
34081 op = XEXP (op, 0);
34082
34083 /* We accept registers. */
34084 if (REG_P (op))
34085 return true;
34086
34087 switch GET_CODE (op)
34088 {
34089 case PLUS:
34090 {
34091 /* Or registers with an offset. */
34092 if (!REG_P (XEXP (op, 0)))
34093 return false;
34094
34095 op = XEXP (op, 1);
34096
34097 /* The offset must be an immediate though. */
34098 if (!CONST_INT_P (op))
34099 return false;
34100
34101 range = INTVAL (op);
34102
34103 /* Within the range of [-1020,1020]. */
34104 if (!IN_RANGE (range, -1020, 1020))
34105 return false;
34106
34107 /* And a multiple of 4. */
34108 return (range % 4) == 0;
34109 }
34110 case PRE_INC:
34111 case POST_INC:
34112 case PRE_DEC:
34113 case POST_DEC:
34114 return REG_P (XEXP (op, 0));
34115 default:
34116 gcc_unreachable ();
34117 }
34118 return false;
34119 }
34120
34121 /* Return the diagnostic message string if conversion from FROMTYPE to
34122 TOTYPE is not allowed, NULL otherwise. */
34123
34124 static const char *
34125 arm_invalid_conversion (const_tree fromtype, const_tree totype)
34126 {
34127 if (element_mode (fromtype) != element_mode (totype))
34128 {
34129 /* Do no allow conversions to/from BFmode scalar types. */
34130 if (TYPE_MODE (fromtype) == BFmode)
34131 return N_("invalid conversion from type %<bfloat16_t%>");
34132 if (TYPE_MODE (totype) == BFmode)
34133 return N_("invalid conversion to type %<bfloat16_t%>");
34134 }
34135
34136 /* Conversion allowed. */
34137 return NULL;
34138 }
34139
34140 /* Return the diagnostic message string if the unary operation OP is
34141 not permitted on TYPE, NULL otherwise. */
34142
34143 static const char *
34144 arm_invalid_unary_op (int op, const_tree type)
34145 {
34146 /* Reject all single-operand operations on BFmode except for &. */
34147 if (element_mode (type) == BFmode && op != ADDR_EXPR)
34148 return N_("operation not permitted on type %<bfloat16_t%>");
34149
34150 /* Operation allowed. */
34151 return NULL;
34152 }
34153
34154 /* Return the diagnostic message string if the binary operation OP is
34155 not permitted on TYPE1 and TYPE2, NULL otherwise. */
34156
34157 static const char *
34158 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
34159 const_tree type2)
34160 {
34161 /* Reject all 2-operand operations on BFmode. */
34162 if (element_mode (type1) == BFmode
34163 || element_mode (type2) == BFmode)
34164 return N_("operation not permitted on type %<bfloat16_t%>");
34165
34166 /* Operation allowed. */
34167 return NULL;
34168 }
34169
34170 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34171
34172 In VFPv1, VFP registers could only be accessed in the mode they were
34173 set, so subregs would be invalid there. However, we don't support
34174 VFPv1 at the moment, and the restriction was lifted in VFPv2.
34175
34176 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34177 VFP registers in little-endian order. We can't describe that accurately to
34178 GCC, so avoid taking subregs of such values.
34179
34180 The only exception is going from a 128-bit to a 64-bit type. In that
34181 case the data layout happens to be consistent for big-endian, so we
34182 explicitly allow that case. */
34183
34184 static bool
34185 arm_can_change_mode_class (machine_mode from, machine_mode to,
34186 reg_class_t rclass)
34187 {
34188 if (TARGET_BIG_END
34189 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
34190 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
34191 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
34192 && reg_classes_intersect_p (VFP_REGS, rclass))
34193 return false;
34194 return true;
34195 }
34196
34197 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
34198 strcpy from constants will be faster. */
34199
34200 static HOST_WIDE_INT
34201 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
34202 {
34203 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
34204 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
34205 return MAX (align, BITS_PER_WORD * factor);
34206 return align;
34207 }
34208
34209 /* Emit a speculation barrier on target architectures that do not have
34210 DSB/ISB directly. Such systems probably don't need a barrier
34211 themselves, but if the code is ever run on a later architecture, it
34212 might become a problem. */
34213 void
34214 arm_emit_speculation_barrier_function ()
34215 {
34216 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
34217 }
34218
34219 /* Have we recorded an explicit access to the Q bit of APSR?. */
34220 bool
34221 arm_q_bit_access (void)
34222 {
34223 if (cfun && cfun->decl)
34224 return lookup_attribute ("acle qbit",
34225 DECL_ATTRIBUTES (cfun->decl));
34226 return true;
34227 }
34228
34229 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
34230 bool
34231 arm_ge_bits_access (void)
34232 {
34233 if (cfun && cfun->decl)
34234 return lookup_attribute ("acle gebits",
34235 DECL_ATTRIBUTES (cfun->decl));
34236 return true;
34237 }
34238
34239 /* NULL if insn INSN is valid within a low-overhead loop.
34240 Otherwise return why doloop cannot be applied. */
34241
34242 static const char *
34243 arm_invalid_within_doloop (const rtx_insn *insn)
34244 {
34245 if (!TARGET_HAVE_LOB)
34246 return default_invalid_within_doloop (insn);
34247
34248 if (CALL_P (insn))
34249 return "Function call in the loop.";
34250
34251 if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
34252 return "LR is used inside loop.";
34253
34254 return NULL;
34255 }
34256
34257 bool
34258 arm_target_insn_ok_for_lob (rtx insn)
34259 {
34260 basic_block bb = BLOCK_FOR_INSN (insn);
34261 /* Make sure the basic block of the target insn is a simple latch
34262 having as single predecessor and successor the body of the loop
34263 itself. Only simple loops with a single basic block as body are
34264 supported for 'low over head loop' making sure that LE target is
34265 above LE itself in the generated code. */
34266
34267 return single_succ_p (bb)
34268 && single_pred_p (bb)
34269 && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
34270 && contains_no_active_insn_p (bb);
34271 }
34272
34273 #if CHECKING_P
34274 namespace selftest {
34275
34276 /* Scan the static data tables generated by parsecpu.awk looking for
34277 potential issues with the data. We primarily check for
34278 inconsistencies in the option extensions at present (extensions
34279 that duplicate others but aren't marked as aliases). Furthermore,
34280 for correct canonicalization later options must never be a subset
34281 of an earlier option. Any extension should also only specify other
34282 feature bits and never an architecture bit. The architecture is inferred
34283 from the declaration of the extension. */
34284 static void
34285 arm_test_cpu_arch_data (void)
34286 {
34287 const arch_option *arch;
34288 const cpu_option *cpu;
34289 auto_sbitmap target_isa (isa_num_bits);
34290 auto_sbitmap isa1 (isa_num_bits);
34291 auto_sbitmap isa2 (isa_num_bits);
34292
34293 for (arch = all_architectures; arch->common.name != NULL; ++arch)
34294 {
34295 const cpu_arch_extension *ext1, *ext2;
34296
34297 if (arch->common.extensions == NULL)
34298 continue;
34299
34300 arm_initialize_isa (target_isa, arch->common.isa_bits);
34301
34302 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
34303 {
34304 if (ext1->alias)
34305 continue;
34306
34307 arm_initialize_isa (isa1, ext1->isa_bits);
34308 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34309 {
34310 if (ext2->alias || ext1->remove != ext2->remove)
34311 continue;
34312
34313 arm_initialize_isa (isa2, ext2->isa_bits);
34314 /* If the option is a subset of the parent option, it doesn't
34315 add anything and so isn't useful. */
34316 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34317
34318 /* If the extension specifies any architectural bits then
34319 disallow it. Extensions should only specify feature bits. */
34320 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34321 }
34322 }
34323 }
34324
34325 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
34326 {
34327 const cpu_arch_extension *ext1, *ext2;
34328
34329 if (cpu->common.extensions == NULL)
34330 continue;
34331
34332 arm_initialize_isa (target_isa, arch->common.isa_bits);
34333
34334 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
34335 {
34336 if (ext1->alias)
34337 continue;
34338
34339 arm_initialize_isa (isa1, ext1->isa_bits);
34340 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34341 {
34342 if (ext2->alias || ext1->remove != ext2->remove)
34343 continue;
34344
34345 arm_initialize_isa (isa2, ext2->isa_bits);
34346 /* If the option is a subset of the parent option, it doesn't
34347 add anything and so isn't useful. */
34348 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34349
34350 /* If the extension specifies any architectural bits then
34351 disallow it. Extensions should only specify feature bits. */
34352 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34353 }
34354 }
34355 }
34356 }
34357
34358 /* Scan the static data tables generated by parsecpu.awk looking for
34359 potential issues with the data. Here we check for consistency between the
34360 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34361 a feature bit that is not defined by any FPU flag. */
34362 static void
34363 arm_test_fpu_data (void)
34364 {
34365 auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
34366 auto_sbitmap fpubits (isa_num_bits);
34367 auto_sbitmap tmpset (isa_num_bits);
34368
34369 static const enum isa_feature fpu_bitlist_internal[]
34370 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
34371 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
34372
34373 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
34374 {
34375 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
34376 bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
34377 bitmap_clear (isa_all_fpubits_internal);
34378 bitmap_copy (isa_all_fpubits_internal, tmpset);
34379 }
34380
34381 if (!bitmap_empty_p (isa_all_fpubits_internal))
34382 {
34383 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
34384 " group that are not defined by any FPU.\n"
34385 " Check your arm-cpus.in.\n");
34386 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
34387 }
34388 }
34389
34390 static void
34391 arm_run_selftests (void)
34392 {
34393 arm_test_cpu_arch_data ();
34394 arm_test_fpu_data ();
34395 }
34396 } /* Namespace selftest. */
34397
34398 #undef TARGET_RUN_TARGET_SELFTESTS
34399 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34400 #endif /* CHECKING_P */
34401
34402 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34403 global variable based guard use the default else
34404 return a null tree. */
34405 static tree
34406 arm_stack_protect_guard (void)
34407 {
34408 if (arm_stack_protector_guard == SSP_GLOBAL)
34409 return default_stack_protect_guard ();
34410
34411 return NULL_TREE;
34412 }
34413
34414 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34415 Unlike the arm version, we do NOT implement asm flag outputs. */
34416
34417 rtx_insn *
34418 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
34419 vec<machine_mode> & /*input_modes*/,
34420 vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
34421 HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
34422 {
34423 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
34424 if (startswith (constraints[i], "=@cc"))
34425 {
34426 sorry ("%<asm%> flags not supported in thumb1 mode");
34427 break;
34428 }
34429 return NULL;
34430 }
34431
34432 /* Generate code to enable conditional branches in functions over 1 MiB.
34433 Parameters are:
34434 operands: is the operands list of the asm insn (see arm_cond_branch or
34435 arm_cond_branch_reversed).
34436 pos_label: is an index into the operands array where operands[pos_label] is
34437 the asm label of the final jump destination.
34438 dest: is a string which is used to generate the asm label of the intermediate
34439 destination
34440 branch_format: is a string denoting the intermediate branch format, e.g.
34441 "beq", "bne", etc. */
34442
34443 const char *
34444 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
34445 const char * branch_format)
34446 {
34447 rtx_code_label * tmp_label = gen_label_rtx ();
34448 char label_buf[256];
34449 char buffer[128];
34450 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
34451 CODE_LABEL_NUMBER (tmp_label));
34452 const char *label_ptr = arm_strip_name_encoding (label_buf);
34453 rtx dest_label = operands[pos_label];
34454 operands[pos_label] = tmp_label;
34455
34456 snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
34457 output_asm_insn (buffer, operands);
34458
34459 snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
34460 operands[pos_label] = dest_label;
34461 output_asm_insn (buffer, operands);
34462 return "";
34463 }
34464
34465 /* If given mode matches, load from memory to LO_REGS.
34466 (i.e [Rn], Rn <= LO_REGS). */
34467 enum reg_class
34468 arm_mode_base_reg_class (machine_mode mode)
34469 {
34470 if (TARGET_HAVE_MVE
34471 && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
34472 return LO_REGS;
34473
34474 return MODE_BASE_REG_REG_CLASS (mode);
34475 }
34476
34477 struct gcc_target targetm = TARGET_INITIALIZER;
34478
34479 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
34480
34481 opt_machine_mode
34482 arm_get_mask_mode (machine_mode mode)
34483 {
34484 if (TARGET_HAVE_MVE)
34485 return arm_mode_to_pred_mode (mode);
34486
34487 return default_get_mask_mode (mode);
34488 }
34489
34490 #include "gt-arm.h"