]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.cc
Fix memory constraint on MVE v[ld/st][2/4] instructions [PR107714]
[thirdparty/gcc.git] / gcc / config / arm / arm.cc
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2022 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "tm_p.h"
38 #include "stringpool.h"
39 #include "attribs.h"
40 #include "optabs.h"
41 #include "regs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "cgraph.h"
45 #include "diagnostic-core.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "varasm.h"
51 #include "output.h"
52 #include "insn-attr.h"
53 #include "flags.h"
54 #include "reload.h"
55 #include "explow.h"
56 #include "expr.h"
57 #include "cfgrtl.h"
58 #include "sched-int.h"
59 #include "common/common-target.h"
60 #include "langhooks.h"
61 #include "intl.h"
62 #include "libfuncs.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "selftest.h"
73 #include "tree-vectorizer.h"
74 #include "opts.h"
75
76 /* This file should be included last. */
77 #include "target-def.h"
78
79 /* Forward definitions of types. */
80 typedef struct minipool_node Mnode;
81 typedef struct minipool_fixup Mfix;
82
83 void (*arm_lang_output_object_attributes_hook)(void);
84
85 struct four_ints
86 {
87 int i[4];
88 };
89
90 /* Forward function declarations. */
91 static bool arm_const_not_ok_for_debug_p (rtx);
92 static int arm_needs_doubleword_align (machine_mode, const_tree);
93 static int arm_compute_static_chain_stack_bytes (void);
94 static arm_stack_offsets *arm_get_frame_offsets (void);
95 static void arm_compute_frame_layout (void);
96 static void arm_add_gc_roots (void);
97 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
98 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
99 static unsigned bit_count (unsigned long);
100 static unsigned bitmap_popcount (const sbitmap);
101 static int arm_address_register_rtx_p (rtx, int);
102 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
103 static bool is_called_in_ARM_mode (tree);
104 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
105 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
106 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
107 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
108 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
109 inline static int thumb1_index_register_rtx_p (rtx, int);
110 static int thumb_far_jump_used_p (void);
111 static bool thumb_force_lr_save (void);
112 static unsigned arm_size_return_regs (void);
113 static bool arm_assemble_integer (rtx, unsigned int, int);
114 static void arm_print_operand (FILE *, rtx, int);
115 static void arm_print_operand_address (FILE *, machine_mode, rtx);
116 static bool arm_print_operand_punct_valid_p (unsigned char code);
117 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
118 static arm_cc get_arm_condition_code (rtx);
119 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
120 static const char *output_multi_immediate (rtx *, const char *, const char *,
121 int, HOST_WIDE_INT);
122 static const char *shift_op (rtx, HOST_WIDE_INT *);
123 static struct machine_function *arm_init_machine_status (void);
124 static void thumb_exit (FILE *, int);
125 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
126 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
127 static Mnode *add_minipool_forward_ref (Mfix *);
128 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
129 static Mnode *add_minipool_backward_ref (Mfix *);
130 static void assign_minipool_offsets (Mfix *);
131 static void arm_print_value (FILE *, rtx);
132 static void dump_minipool (rtx_insn *);
133 static int arm_barrier_cost (rtx_insn *);
134 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
135 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
136 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
137 machine_mode, rtx);
138 static void arm_reorg (void);
139 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
140 static unsigned long arm_compute_save_reg0_reg12_mask (void);
141 static unsigned long arm_compute_save_core_reg_mask (void);
142 static unsigned long arm_isr_value (tree);
143 static unsigned long arm_compute_func_type (void);
144 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
145 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
146 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
147 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
148 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
149 #endif
150 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
151 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
152 static void arm_output_function_epilogue (FILE *);
153 static void arm_output_function_prologue (FILE *);
154 static int arm_comp_type_attributes (const_tree, const_tree);
155 static void arm_set_default_type_attributes (tree);
156 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
157 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
158 static int optimal_immediate_sequence (enum rtx_code code,
159 unsigned HOST_WIDE_INT val,
160 struct four_ints *return_sequence);
161 static int optimal_immediate_sequence_1 (enum rtx_code code,
162 unsigned HOST_WIDE_INT val,
163 struct four_ints *return_sequence,
164 int i);
165 static int arm_get_strip_length (int);
166 static bool arm_function_ok_for_sibcall (tree, tree);
167 static machine_mode arm_promote_function_mode (const_tree,
168 machine_mode, int *,
169 const_tree, int);
170 static bool arm_return_in_memory (const_tree, const_tree);
171 static rtx arm_function_value (const_tree, const_tree, bool);
172 static rtx arm_libcall_value_1 (machine_mode);
173 static rtx arm_libcall_value (machine_mode, const_rtx);
174 static bool arm_function_value_regno_p (const unsigned int);
175 static void arm_internal_label (FILE *, const char *, unsigned long);
176 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
177 tree);
178 static bool arm_have_conditional_execution (void);
179 static bool arm_cannot_force_const_mem (machine_mode, rtx);
180 static bool arm_legitimate_constant_p (machine_mode, rtx);
181 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
182 static int arm_insn_cost (rtx_insn *, bool);
183 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
184 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
185 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
186 static void emit_constant_insn (rtx cond, rtx pattern);
187 static rtx_insn *emit_set_insn (rtx, rtx);
188 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
190 static void arm_emit_multi_reg_pop (unsigned long);
191 static int vfp_emit_fstmd (int, int);
192 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
193 static int arm_arg_partial_bytes (cumulative_args_t,
194 const function_arg_info &);
195 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
196 static void arm_function_arg_advance (cumulative_args_t,
197 const function_arg_info &);
198 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
199 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
200 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
201 const_tree);
202 static rtx aapcs_libcall_value (machine_mode);
203 static int aapcs_select_return_coproc (const_tree, const_tree);
204
205 #ifdef OBJECT_FORMAT_ELF
206 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
207 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
208 #endif
209 #ifndef ARM_PE
210 static void arm_encode_section_info (tree, rtx, int);
211 #endif
212
213 static void arm_file_end (void);
214 static void arm_file_start (void);
215 static void arm_insert_attributes (tree, tree *);
216
217 static void arm_setup_incoming_varargs (cumulative_args_t,
218 const function_arg_info &, int *, int);
219 static bool arm_pass_by_reference (cumulative_args_t,
220 const function_arg_info &);
221 static bool arm_promote_prototypes (const_tree);
222 static bool arm_default_short_enums (void);
223 static bool arm_align_anon_bitfield (void);
224 static bool arm_return_in_msb (const_tree);
225 static bool arm_must_pass_in_stack (const function_arg_info &);
226 static bool arm_return_in_memory (const_tree, const_tree);
227 #if ARM_UNWIND_INFO
228 static void arm_unwind_emit (FILE *, rtx_insn *);
229 static bool arm_output_ttype (rtx);
230 static void arm_asm_emit_except_personality (rtx);
231 #endif
232 static void arm_asm_init_sections (void);
233 static rtx arm_dwarf_register_span (rtx);
234
235 static tree arm_cxx_guard_type (void);
236 static bool arm_cxx_guard_mask_bit (void);
237 static tree arm_get_cookie_size (tree);
238 static bool arm_cookie_has_size (void);
239 static bool arm_cxx_cdtor_returns_this (void);
240 static bool arm_cxx_key_method_may_be_inline (void);
241 static void arm_cxx_determine_class_data_visibility (tree);
242 static bool arm_cxx_class_data_always_comdat (void);
243 static bool arm_cxx_use_aeabi_atexit (void);
244 static void arm_init_libfuncs (void);
245 static tree arm_build_builtin_va_list (void);
246 static void arm_expand_builtin_va_start (tree, rtx);
247 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
248 static void arm_option_override (void);
249 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
250 struct cl_target_option *);
251 static void arm_override_options_after_change (void);
252 static void arm_option_print (FILE *, int, struct cl_target_option *);
253 static void arm_set_current_function (tree);
254 static bool arm_can_inline_p (tree, tree);
255 static void arm_relayout_function (tree);
256 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
257 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
258 static bool arm_sched_can_speculate_insn (rtx_insn *);
259 static bool arm_macro_fusion_p (void);
260 static bool arm_cannot_copy_insn_p (rtx_insn *);
261 static int arm_issue_rate (void);
262 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
263 static int arm_first_cycle_multipass_dfa_lookahead (void);
264 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
265 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
266 static bool arm_output_addr_const_extra (FILE *, rtx);
267 static bool arm_allocate_stack_slots_for_args (void);
268 static bool arm_warn_func_return (tree);
269 static tree arm_promoted_type (const_tree t);
270 static bool arm_scalar_mode_supported_p (scalar_mode);
271 static bool arm_frame_pointer_required (void);
272 static bool arm_can_eliminate (const int, const int);
273 static void arm_asm_trampoline_template (FILE *);
274 static void arm_trampoline_init (rtx, tree, rtx);
275 static rtx arm_trampoline_adjust_address (rtx);
276 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
277 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
278 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
279 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
280 static bool arm_array_mode_supported_p (machine_mode,
281 unsigned HOST_WIDE_INT);
282 static machine_mode arm_preferred_simd_mode (scalar_mode);
283 static bool arm_class_likely_spilled_p (reg_class_t);
284 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
285 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
286 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
287 const_tree type,
288 int misalignment,
289 bool is_packed);
290 static void arm_conditional_register_usage (void);
291 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
292 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
293 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
294 static int arm_default_branch_cost (bool, bool);
295 static int arm_cortex_a5_branch_cost (bool, bool);
296 static int arm_cortex_m_branch_cost (bool, bool);
297 static int arm_cortex_m7_branch_cost (bool, bool);
298
299 static bool arm_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, rtx,
300 rtx, const vec_perm_indices &);
301
302 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
303
304 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
305 tree vectype,
306 int misalign ATTRIBUTE_UNUSED);
307
308 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
309 bool op0_preserve_value);
310 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
311
312 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
313 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
314 const_tree);
315 static section *arm_function_section (tree, enum node_frequency, bool, bool);
316 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
317 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
318 int reloc);
319 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
320 static opt_scalar_float_mode arm_floatn_mode (int, bool);
321 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
322 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
323 static bool arm_modes_tieable_p (machine_mode, machine_mode);
324 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
325 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
326 vec<machine_mode> &,
327 vec<const char *> &, vec<rtx> &,
328 HARD_REG_SET &, location_t);
329 static const char *arm_identify_fpu_from_isa (sbitmap);
330 \f
331 /* Table of machine attributes. */
332 static const struct attribute_spec arm_attribute_table[] =
333 {
334 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
335 affects_type_identity, handler, exclude } */
336 /* Function calls made to this symbol must be done indirectly, because
337 it may lie outside of the 26 bit addressing range of a normal function
338 call. */
339 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
340 /* Whereas these functions are always known to reside within the 26 bit
341 addressing range. */
342 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
343 /* Specify the procedure call conventions for a function. */
344 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
345 NULL },
346 /* Interrupt Service Routines have special prologue and epilogue requirements. */
347 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
348 NULL },
349 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
350 NULL },
351 { "naked", 0, 0, true, false, false, false,
352 arm_handle_fndecl_attribute, NULL },
353 #ifdef ARM_PE
354 /* ARM/PE has three new attributes:
355 interfacearm - ?
356 dllexport - for exporting a function/variable that will live in a dll
357 dllimport - for importing a function/variable from a dll
358
359 Microsoft allows multiple declspecs in one __declspec, separating
360 them with spaces. We do NOT support this. Instead, use __declspec
361 multiple times.
362 */
363 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
364 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
365 { "interfacearm", 0, 0, true, false, false, false,
366 arm_handle_fndecl_attribute, NULL },
367 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
368 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
369 NULL },
370 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
371 NULL },
372 { "notshared", 0, 0, false, true, false, false,
373 arm_handle_notshared_attribute, NULL },
374 #endif
375 /* ARMv8-M Security Extensions support. */
376 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
377 arm_handle_cmse_nonsecure_entry, NULL },
378 { "cmse_nonsecure_call", 0, 0, false, false, false, true,
379 arm_handle_cmse_nonsecure_call, NULL },
380 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL },
381 { NULL, 0, 0, false, false, false, false, NULL, NULL }
382 };
383 \f
384 /* Initialize the GCC target structure. */
385 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
386 #undef TARGET_MERGE_DECL_ATTRIBUTES
387 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
388 #endif
389
390 #undef TARGET_CHECK_BUILTIN_CALL
391 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
392
393 #undef TARGET_LEGITIMIZE_ADDRESS
394 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
395
396 #undef TARGET_ATTRIBUTE_TABLE
397 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
398
399 #undef TARGET_INSERT_ATTRIBUTES
400 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
401
402 #undef TARGET_ASM_FILE_START
403 #define TARGET_ASM_FILE_START arm_file_start
404 #undef TARGET_ASM_FILE_END
405 #define TARGET_ASM_FILE_END arm_file_end
406
407 #undef TARGET_ASM_ALIGNED_SI_OP
408 #define TARGET_ASM_ALIGNED_SI_OP NULL
409 #undef TARGET_ASM_INTEGER
410 #define TARGET_ASM_INTEGER arm_assemble_integer
411
412 #undef TARGET_PRINT_OPERAND
413 #define TARGET_PRINT_OPERAND arm_print_operand
414 #undef TARGET_PRINT_OPERAND_ADDRESS
415 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
416 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
417 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
418
419 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
420 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
421
422 #undef TARGET_ASM_FUNCTION_PROLOGUE
423 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
424
425 #undef TARGET_ASM_FUNCTION_EPILOGUE
426 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
427
428 #undef TARGET_CAN_INLINE_P
429 #define TARGET_CAN_INLINE_P arm_can_inline_p
430
431 #undef TARGET_RELAYOUT_FUNCTION
432 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
433
434 #undef TARGET_OPTION_OVERRIDE
435 #define TARGET_OPTION_OVERRIDE arm_option_override
436
437 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
438 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
439
440 #undef TARGET_OPTION_RESTORE
441 #define TARGET_OPTION_RESTORE arm_option_restore
442
443 #undef TARGET_OPTION_PRINT
444 #define TARGET_OPTION_PRINT arm_option_print
445
446 #undef TARGET_COMP_TYPE_ATTRIBUTES
447 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
448
449 #undef TARGET_SCHED_CAN_SPECULATE_INSN
450 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
451
452 #undef TARGET_SCHED_MACRO_FUSION_P
453 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
454
455 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
456 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
457
458 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
459 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
460
461 #undef TARGET_SCHED_ADJUST_COST
462 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
463
464 #undef TARGET_SET_CURRENT_FUNCTION
465 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
466
467 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
468 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
469
470 #undef TARGET_SCHED_REORDER
471 #define TARGET_SCHED_REORDER arm_sched_reorder
472
473 #undef TARGET_REGISTER_MOVE_COST
474 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
475
476 #undef TARGET_MEMORY_MOVE_COST
477 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
478
479 #undef TARGET_ENCODE_SECTION_INFO
480 #ifdef ARM_PE
481 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
482 #else
483 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
484 #endif
485
486 #undef TARGET_STRIP_NAME_ENCODING
487 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
488
489 #undef TARGET_ASM_INTERNAL_LABEL
490 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
491
492 #undef TARGET_FLOATN_MODE
493 #define TARGET_FLOATN_MODE arm_floatn_mode
494
495 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
496 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
497
498 #undef TARGET_FUNCTION_VALUE
499 #define TARGET_FUNCTION_VALUE arm_function_value
500
501 #undef TARGET_LIBCALL_VALUE
502 #define TARGET_LIBCALL_VALUE arm_libcall_value
503
504 #undef TARGET_FUNCTION_VALUE_REGNO_P
505 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
506
507 #undef TARGET_ASM_OUTPUT_MI_THUNK
508 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
509 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
510 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
511
512 #undef TARGET_RTX_COSTS
513 #define TARGET_RTX_COSTS arm_rtx_costs
514 #undef TARGET_ADDRESS_COST
515 #define TARGET_ADDRESS_COST arm_address_cost
516 #undef TARGET_INSN_COST
517 #define TARGET_INSN_COST arm_insn_cost
518
519 #undef TARGET_SHIFT_TRUNCATION_MASK
520 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
521 #undef TARGET_VECTOR_MODE_SUPPORTED_P
522 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
523 #undef TARGET_ARRAY_MODE_SUPPORTED_P
524 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
525 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
526 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
527 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
528 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
529 arm_autovectorize_vector_modes
530
531 #undef TARGET_MACHINE_DEPENDENT_REORG
532 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
533
534 #undef TARGET_INIT_BUILTINS
535 #define TARGET_INIT_BUILTINS arm_init_builtins
536 #undef TARGET_EXPAND_BUILTIN
537 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
538 #undef TARGET_BUILTIN_DECL
539 #define TARGET_BUILTIN_DECL arm_builtin_decl
540
541 #undef TARGET_INIT_LIBFUNCS
542 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
543
544 #undef TARGET_PROMOTE_FUNCTION_MODE
545 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
546 #undef TARGET_PROMOTE_PROTOTYPES
547 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
548 #undef TARGET_PASS_BY_REFERENCE
549 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
550 #undef TARGET_ARG_PARTIAL_BYTES
551 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
552 #undef TARGET_FUNCTION_ARG
553 #define TARGET_FUNCTION_ARG arm_function_arg
554 #undef TARGET_FUNCTION_ARG_ADVANCE
555 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
556 #undef TARGET_FUNCTION_ARG_PADDING
557 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
558 #undef TARGET_FUNCTION_ARG_BOUNDARY
559 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
560
561 #undef TARGET_SETUP_INCOMING_VARARGS
562 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
563
564 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
565 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
566
567 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
568 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
569 #undef TARGET_TRAMPOLINE_INIT
570 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
571 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
572 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
573
574 #undef TARGET_WARN_FUNC_RETURN
575 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
576
577 #undef TARGET_DEFAULT_SHORT_ENUMS
578 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
579
580 #undef TARGET_ALIGN_ANON_BITFIELD
581 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
582
583 #undef TARGET_NARROW_VOLATILE_BITFIELD
584 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
585
586 #undef TARGET_CXX_GUARD_TYPE
587 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
588
589 #undef TARGET_CXX_GUARD_MASK_BIT
590 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
591
592 #undef TARGET_CXX_GET_COOKIE_SIZE
593 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
594
595 #undef TARGET_CXX_COOKIE_HAS_SIZE
596 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
597
598 #undef TARGET_CXX_CDTOR_RETURNS_THIS
599 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
600
601 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
602 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
603
604 #undef TARGET_CXX_USE_AEABI_ATEXIT
605 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
606
607 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
608 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
609 arm_cxx_determine_class_data_visibility
610
611 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
612 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
613
614 #undef TARGET_RETURN_IN_MSB
615 #define TARGET_RETURN_IN_MSB arm_return_in_msb
616
617 #undef TARGET_RETURN_IN_MEMORY
618 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
619
620 #undef TARGET_MUST_PASS_IN_STACK
621 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
622
623 #if ARM_UNWIND_INFO
624 #undef TARGET_ASM_UNWIND_EMIT
625 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
626
627 /* EABI unwinding tables use a different format for the typeinfo tables. */
628 #undef TARGET_ASM_TTYPE
629 #define TARGET_ASM_TTYPE arm_output_ttype
630
631 #undef TARGET_ARM_EABI_UNWINDER
632 #define TARGET_ARM_EABI_UNWINDER true
633
634 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
635 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
636
637 #endif /* ARM_UNWIND_INFO */
638
639 #undef TARGET_ASM_INIT_SECTIONS
640 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
641
642 #undef TARGET_DWARF_REGISTER_SPAN
643 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
644
645 #undef TARGET_CANNOT_COPY_INSN_P
646 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
647
648 #ifdef HAVE_AS_TLS
649 #undef TARGET_HAVE_TLS
650 #define TARGET_HAVE_TLS true
651 #endif
652
653 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
654 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
655
656 #undef TARGET_LEGITIMATE_CONSTANT_P
657 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
658
659 #undef TARGET_CANNOT_FORCE_CONST_MEM
660 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
661
662 #undef TARGET_MAX_ANCHOR_OFFSET
663 #define TARGET_MAX_ANCHOR_OFFSET 4095
664
665 /* The minimum is set such that the total size of the block
666 for a particular anchor is -4088 + 1 + 4095 bytes, which is
667 divisible by eight, ensuring natural spacing of anchors. */
668 #undef TARGET_MIN_ANCHOR_OFFSET
669 #define TARGET_MIN_ANCHOR_OFFSET -4088
670
671 #undef TARGET_SCHED_ISSUE_RATE
672 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
673
674 #undef TARGET_SCHED_VARIABLE_ISSUE
675 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
676
677 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
678 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
679 arm_first_cycle_multipass_dfa_lookahead
680
681 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
682 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
683 arm_first_cycle_multipass_dfa_lookahead_guard
684
685 #undef TARGET_MANGLE_TYPE
686 #define TARGET_MANGLE_TYPE arm_mangle_type
687
688 #undef TARGET_INVALID_CONVERSION
689 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
690
691 #undef TARGET_INVALID_UNARY_OP
692 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
693
694 #undef TARGET_INVALID_BINARY_OP
695 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
696
697 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
698 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
699
700 #undef TARGET_BUILD_BUILTIN_VA_LIST
701 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
702 #undef TARGET_EXPAND_BUILTIN_VA_START
703 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
704 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
705 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
706
707 #ifdef HAVE_AS_TLS
708 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
709 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
710 #endif
711
712 #undef TARGET_LEGITIMATE_ADDRESS_P
713 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
714
715 #undef TARGET_PREFERRED_RELOAD_CLASS
716 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
717
718 #undef TARGET_PROMOTED_TYPE
719 #define TARGET_PROMOTED_TYPE arm_promoted_type
720
721 #undef TARGET_SCALAR_MODE_SUPPORTED_P
722 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
723
724 #undef TARGET_COMPUTE_FRAME_LAYOUT
725 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
726
727 #undef TARGET_FRAME_POINTER_REQUIRED
728 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
729
730 #undef TARGET_CAN_ELIMINATE
731 #define TARGET_CAN_ELIMINATE arm_can_eliminate
732
733 #undef TARGET_CONDITIONAL_REGISTER_USAGE
734 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
735
736 #undef TARGET_CLASS_LIKELY_SPILLED_P
737 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
738
739 #undef TARGET_VECTORIZE_BUILTINS
740 #define TARGET_VECTORIZE_BUILTINS
741
742 #undef TARGET_VECTOR_ALIGNMENT
743 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
744
745 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
746 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
747 arm_vector_alignment_reachable
748
749 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
750 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
751 arm_builtin_support_vector_misalignment
752
753 #undef TARGET_PREFERRED_RENAME_CLASS
754 #define TARGET_PREFERRED_RENAME_CLASS \
755 arm_preferred_rename_class
756
757 #undef TARGET_VECTORIZE_VEC_PERM_CONST
758 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
759
760 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
761 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
762 arm_builtin_vectorization_cost
763
764 #undef TARGET_CANONICALIZE_COMPARISON
765 #define TARGET_CANONICALIZE_COMPARISON \
766 arm_canonicalize_comparison
767
768 #undef TARGET_ASAN_SHADOW_OFFSET
769 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
770
771 #undef MAX_INSN_PER_IT_BLOCK
772 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
773
774 #undef TARGET_CAN_USE_DOLOOP_P
775 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
776
777 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
778 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
779
780 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
781 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
782
783 #undef TARGET_SCHED_FUSION_PRIORITY
784 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
785
786 #undef TARGET_ASM_FUNCTION_SECTION
787 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
788
789 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
790 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
791
792 #undef TARGET_SECTION_TYPE_FLAGS
793 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
794
795 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
796 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
797
798 #undef TARGET_C_EXCESS_PRECISION
799 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
800
801 /* Although the architecture reserves bits 0 and 1, only the former is
802 used for ARM/Thumb ISA selection in v7 and earlier versions. */
803 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
804 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
805
806 #undef TARGET_FIXED_CONDITION_CODE_REGS
807 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
808
809 #undef TARGET_HARD_REGNO_NREGS
810 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
811 #undef TARGET_HARD_REGNO_MODE_OK
812 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
813
814 #undef TARGET_MODES_TIEABLE_P
815 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
816
817 #undef TARGET_CAN_CHANGE_MODE_CLASS
818 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
819
820 #undef TARGET_CONSTANT_ALIGNMENT
821 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
822
823 #undef TARGET_INVALID_WITHIN_DOLOOP
824 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
825
826 #undef TARGET_MD_ASM_ADJUST
827 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
828
829 #undef TARGET_STACK_PROTECT_GUARD
830 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
831
832 #undef TARGET_VECTORIZE_GET_MASK_MODE
833 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
834 \f
835 /* Obstack for minipool constant handling. */
836 static struct obstack minipool_obstack;
837 static char * minipool_startobj;
838
839 /* The maximum number of insns skipped which
840 will be conditionalised if possible. */
841 static int max_insns_skipped = 5;
842
843 /* True if we are currently building a constant table. */
844 int making_const_table;
845
846 /* The processor for which instructions should be scheduled. */
847 enum processor_type arm_tune = TARGET_CPU_arm_none;
848
849 /* The current tuning set. */
850 const struct tune_params *current_tune;
851
852 /* Which floating point hardware to schedule for. */
853 int arm_fpu_attr;
854
855 /* Used for Thumb call_via trampolines. */
856 rtx thumb_call_via_label[14];
857 static int thumb_call_reg_needed;
858
859 /* The bits in this mask specify which instruction scheduling options should
860 be used. */
861 unsigned int tune_flags = 0;
862
863 /* The highest ARM architecture version supported by the
864 target. */
865 enum base_architecture arm_base_arch = BASE_ARCH_0;
866
867 /* Active target architecture and tuning. */
868
869 struct arm_build_target arm_active_target;
870
871 /* The following are used in the arm.md file as equivalents to bits
872 in the above two flag variables. */
873
874 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
875 int arm_arch4 = 0;
876
877 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
878 int arm_arch4t = 0;
879
880 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
881 int arm_arch5t = 0;
882
883 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
884 int arm_arch5te = 0;
885
886 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
887 int arm_arch6 = 0;
888
889 /* Nonzero if this chip supports the ARM 6K extensions. */
890 int arm_arch6k = 0;
891
892 /* Nonzero if this chip supports the ARM 6KZ extensions. */
893 int arm_arch6kz = 0;
894
895 /* Nonzero if instructions present in ARMv6-M can be used. */
896 int arm_arch6m = 0;
897
898 /* Nonzero if this chip supports the ARM 7 extensions. */
899 int arm_arch7 = 0;
900
901 /* Nonzero if this chip supports the Large Physical Address Extension. */
902 int arm_arch_lpae = 0;
903
904 /* Nonzero if instructions not present in the 'M' profile can be used. */
905 int arm_arch_notm = 0;
906
907 /* Nonzero if instructions present in ARMv7E-M can be used. */
908 int arm_arch7em = 0;
909
910 /* Nonzero if instructions present in ARMv8 can be used. */
911 int arm_arch8 = 0;
912
913 /* Nonzero if this chip supports the ARMv8.1 extensions. */
914 int arm_arch8_1 = 0;
915
916 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
917 int arm_arch8_2 = 0;
918
919 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
920 int arm_arch8_3 = 0;
921
922 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
923 int arm_arch8_4 = 0;
924 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
925 extensions. */
926 int arm_arch8_1m_main = 0;
927
928 /* Nonzero if this chip supports the FP16 instructions extension of ARM
929 Architecture 8.2. */
930 int arm_fp16_inst = 0;
931
932 /* Nonzero if this chip can benefit from load scheduling. */
933 int arm_ld_sched = 0;
934
935 /* Nonzero if this chip is a StrongARM. */
936 int arm_tune_strongarm = 0;
937
938 /* Nonzero if this chip supports Intel Wireless MMX technology. */
939 int arm_arch_iwmmxt = 0;
940
941 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
942 int arm_arch_iwmmxt2 = 0;
943
944 /* Nonzero if this chip is an XScale. */
945 int arm_arch_xscale = 0;
946
947 /* Nonzero if tuning for XScale */
948 int arm_tune_xscale = 0;
949
950 /* Nonzero if we want to tune for stores that access the write-buffer.
951 This typically means an ARM6 or ARM7 with MMU or MPU. */
952 int arm_tune_wbuf = 0;
953
954 /* Nonzero if tuning for Cortex-A9. */
955 int arm_tune_cortex_a9 = 0;
956
957 /* Nonzero if we should define __THUMB_INTERWORK__ in the
958 preprocessor.
959 XXX This is a bit of a hack, it's intended to help work around
960 problems in GLD which doesn't understand that armv5t code is
961 interworking clean. */
962 int arm_cpp_interwork = 0;
963
964 /* Nonzero if chip supports Thumb 1. */
965 int arm_arch_thumb1;
966
967 /* Nonzero if chip supports Thumb 2. */
968 int arm_arch_thumb2;
969
970 /* Nonzero if chip supports integer division instruction. */
971 int arm_arch_arm_hwdiv;
972 int arm_arch_thumb_hwdiv;
973
974 /* Nonzero if chip disallows volatile memory access in IT block. */
975 int arm_arch_no_volatile_ce;
976
977 /* Nonzero if we shouldn't use literal pools. */
978 bool arm_disable_literal_pool = false;
979
980 /* The register number to be used for the PIC offset register. */
981 unsigned arm_pic_register = INVALID_REGNUM;
982
983 enum arm_pcs arm_pcs_default;
984
985 /* For an explanation of these variables, see final_prescan_insn below. */
986 int arm_ccfsm_state;
987 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
988 enum arm_cond_code arm_current_cc;
989
990 rtx arm_target_insn;
991 int arm_target_label;
992 /* The number of conditionally executed insns, including the current insn. */
993 int arm_condexec_count = 0;
994 /* A bitmask specifying the patterns for the IT block.
995 Zero means do not output an IT block before this insn. */
996 int arm_condexec_mask = 0;
997 /* The number of bits used in arm_condexec_mask. */
998 int arm_condexec_masklen = 0;
999
1000 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1001 int arm_arch_crc = 0;
1002
1003 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1004 int arm_arch_dotprod = 0;
1005
1006 /* Nonzero if chip supports the ARMv8-M security extensions. */
1007 int arm_arch_cmse = 0;
1008
1009 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1010 int arm_m_profile_small_mul = 0;
1011
1012 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1013 int arm_arch_i8mm = 0;
1014
1015 /* Nonzero if chip supports the BFloat16 instructions. */
1016 int arm_arch_bf16 = 0;
1017
1018 /* Nonzero if chip supports the Custom Datapath Extension. */
1019 int arm_arch_cde = 0;
1020 int arm_arch_cde_coproc = 0;
1021 const int arm_arch_cde_coproc_bits[] = {
1022 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1023 };
1024
1025 /* The condition codes of the ARM, and the inverse function. */
1026 static const char * const arm_condition_codes[] =
1027 {
1028 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1029 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1030 };
1031
1032 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1033 int arm_regs_in_sequence[] =
1034 {
1035 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1036 };
1037
1038 #define DEF_FP_SYSREG(reg) #reg,
1039 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1040 FP_SYSREGS
1041 };
1042 #undef DEF_FP_SYSREG
1043
1044 #define ARM_LSL_NAME "lsl"
1045 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1046
1047 #define THUMB2_WORK_REGS \
1048 (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1049 | (1 << SP_REGNUM) \
1050 | (1 << PC_REGNUM) \
1051 | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM \
1052 ? (1 << PIC_OFFSET_TABLE_REGNUM) \
1053 : 0)))
1054 \f
1055 /* Initialization code. */
1056
1057 struct cpu_tune
1058 {
1059 enum processor_type scheduler;
1060 unsigned int tune_flags;
1061 const struct tune_params *tune;
1062 };
1063
1064 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1065 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1066 { \
1067 num_slots, \
1068 l1_size, \
1069 l1_line_size \
1070 }
1071
1072 /* arm generic vectorizer costs. */
1073 static const
1074 struct cpu_vec_costs arm_default_vec_cost = {
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 1, /* vec_unalign_load_cost. */
1083 1, /* vec_unalign_store_cost. */
1084 1, /* vec_store_cost. */
1085 3, /* cond_taken_branch_cost. */
1086 1, /* cond_not_taken_branch_cost. */
1087 };
1088
1089 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1090 #include "aarch-cost-tables.h"
1091
1092
1093
1094 const struct cpu_cost_table cortexa9_extra_costs =
1095 {
1096 /* ALU */
1097 {
1098 0, /* arith. */
1099 0, /* logical. */
1100 0, /* shift. */
1101 COSTS_N_INSNS (1), /* shift_reg. */
1102 COSTS_N_INSNS (1), /* arith_shift. */
1103 COSTS_N_INSNS (2), /* arith_shift_reg. */
1104 0, /* log_shift. */
1105 COSTS_N_INSNS (1), /* log_shift_reg. */
1106 COSTS_N_INSNS (1), /* extend. */
1107 COSTS_N_INSNS (2), /* extend_arith. */
1108 COSTS_N_INSNS (1), /* bfi. */
1109 COSTS_N_INSNS (1), /* bfx. */
1110 0, /* clz. */
1111 0, /* rev. */
1112 0, /* non_exec. */
1113 true /* non_exec_costs_exec. */
1114 },
1115 {
1116 /* MULT SImode */
1117 {
1118 COSTS_N_INSNS (3), /* simple. */
1119 COSTS_N_INSNS (3), /* flag_setting. */
1120 COSTS_N_INSNS (2), /* extend. */
1121 COSTS_N_INSNS (3), /* add. */
1122 COSTS_N_INSNS (2), /* extend_add. */
1123 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1124 },
1125 /* MULT DImode */
1126 {
1127 0, /* simple (N/A). */
1128 0, /* flag_setting (N/A). */
1129 COSTS_N_INSNS (4), /* extend. */
1130 0, /* add (N/A). */
1131 COSTS_N_INSNS (4), /* extend_add. */
1132 0 /* idiv (N/A). */
1133 }
1134 },
1135 /* LD/ST */
1136 {
1137 COSTS_N_INSNS (2), /* load. */
1138 COSTS_N_INSNS (2), /* load_sign_extend. */
1139 COSTS_N_INSNS (2), /* ldrd. */
1140 COSTS_N_INSNS (2), /* ldm_1st. */
1141 1, /* ldm_regs_per_insn_1st. */
1142 2, /* ldm_regs_per_insn_subsequent. */
1143 COSTS_N_INSNS (5), /* loadf. */
1144 COSTS_N_INSNS (5), /* loadd. */
1145 COSTS_N_INSNS (1), /* load_unaligned. */
1146 COSTS_N_INSNS (2), /* store. */
1147 COSTS_N_INSNS (2), /* strd. */
1148 COSTS_N_INSNS (2), /* stm_1st. */
1149 1, /* stm_regs_per_insn_1st. */
1150 2, /* stm_regs_per_insn_subsequent. */
1151 COSTS_N_INSNS (1), /* storef. */
1152 COSTS_N_INSNS (1), /* stored. */
1153 COSTS_N_INSNS (1), /* store_unaligned. */
1154 COSTS_N_INSNS (1), /* loadv. */
1155 COSTS_N_INSNS (1) /* storev. */
1156 },
1157 {
1158 /* FP SFmode */
1159 {
1160 COSTS_N_INSNS (14), /* div. */
1161 COSTS_N_INSNS (4), /* mult. */
1162 COSTS_N_INSNS (7), /* mult_addsub. */
1163 COSTS_N_INSNS (30), /* fma. */
1164 COSTS_N_INSNS (3), /* addsub. */
1165 COSTS_N_INSNS (1), /* fpconst. */
1166 COSTS_N_INSNS (1), /* neg. */
1167 COSTS_N_INSNS (3), /* compare. */
1168 COSTS_N_INSNS (3), /* widen. */
1169 COSTS_N_INSNS (3), /* narrow. */
1170 COSTS_N_INSNS (3), /* toint. */
1171 COSTS_N_INSNS (3), /* fromint. */
1172 COSTS_N_INSNS (3) /* roundint. */
1173 },
1174 /* FP DFmode */
1175 {
1176 COSTS_N_INSNS (24), /* div. */
1177 COSTS_N_INSNS (5), /* mult. */
1178 COSTS_N_INSNS (8), /* mult_addsub. */
1179 COSTS_N_INSNS (30), /* fma. */
1180 COSTS_N_INSNS (3), /* addsub. */
1181 COSTS_N_INSNS (1), /* fpconst. */
1182 COSTS_N_INSNS (1), /* neg. */
1183 COSTS_N_INSNS (3), /* compare. */
1184 COSTS_N_INSNS (3), /* widen. */
1185 COSTS_N_INSNS (3), /* narrow. */
1186 COSTS_N_INSNS (3), /* toint. */
1187 COSTS_N_INSNS (3), /* fromint. */
1188 COSTS_N_INSNS (3) /* roundint. */
1189 }
1190 },
1191 /* Vector */
1192 {
1193 COSTS_N_INSNS (1), /* alu. */
1194 COSTS_N_INSNS (4), /* mult. */
1195 COSTS_N_INSNS (1), /* movi. */
1196 COSTS_N_INSNS (2), /* dup. */
1197 COSTS_N_INSNS (2) /* extract. */
1198 }
1199 };
1200
1201 const struct cpu_cost_table cortexa8_extra_costs =
1202 {
1203 /* ALU */
1204 {
1205 0, /* arith. */
1206 0, /* logical. */
1207 COSTS_N_INSNS (1), /* shift. */
1208 0, /* shift_reg. */
1209 COSTS_N_INSNS (1), /* arith_shift. */
1210 0, /* arith_shift_reg. */
1211 COSTS_N_INSNS (1), /* log_shift. */
1212 0, /* log_shift_reg. */
1213 0, /* extend. */
1214 0, /* extend_arith. */
1215 0, /* bfi. */
1216 0, /* bfx. */
1217 0, /* clz. */
1218 0, /* rev. */
1219 0, /* non_exec. */
1220 true /* non_exec_costs_exec. */
1221 },
1222 {
1223 /* MULT SImode */
1224 {
1225 COSTS_N_INSNS (1), /* simple. */
1226 COSTS_N_INSNS (1), /* flag_setting. */
1227 COSTS_N_INSNS (1), /* extend. */
1228 COSTS_N_INSNS (1), /* add. */
1229 COSTS_N_INSNS (1), /* extend_add. */
1230 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1231 },
1232 /* MULT DImode */
1233 {
1234 0, /* simple (N/A). */
1235 0, /* flag_setting (N/A). */
1236 COSTS_N_INSNS (2), /* extend. */
1237 0, /* add (N/A). */
1238 COSTS_N_INSNS (2), /* extend_add. */
1239 0 /* idiv (N/A). */
1240 }
1241 },
1242 /* LD/ST */
1243 {
1244 COSTS_N_INSNS (1), /* load. */
1245 COSTS_N_INSNS (1), /* load_sign_extend. */
1246 COSTS_N_INSNS (1), /* ldrd. */
1247 COSTS_N_INSNS (1), /* ldm_1st. */
1248 1, /* ldm_regs_per_insn_1st. */
1249 2, /* ldm_regs_per_insn_subsequent. */
1250 COSTS_N_INSNS (1), /* loadf. */
1251 COSTS_N_INSNS (1), /* loadd. */
1252 COSTS_N_INSNS (1), /* load_unaligned. */
1253 COSTS_N_INSNS (1), /* store. */
1254 COSTS_N_INSNS (1), /* strd. */
1255 COSTS_N_INSNS (1), /* stm_1st. */
1256 1, /* stm_regs_per_insn_1st. */
1257 2, /* stm_regs_per_insn_subsequent. */
1258 COSTS_N_INSNS (1), /* storef. */
1259 COSTS_N_INSNS (1), /* stored. */
1260 COSTS_N_INSNS (1), /* store_unaligned. */
1261 COSTS_N_INSNS (1), /* loadv. */
1262 COSTS_N_INSNS (1) /* storev. */
1263 },
1264 {
1265 /* FP SFmode */
1266 {
1267 COSTS_N_INSNS (36), /* div. */
1268 COSTS_N_INSNS (11), /* mult. */
1269 COSTS_N_INSNS (20), /* mult_addsub. */
1270 COSTS_N_INSNS (30), /* fma. */
1271 COSTS_N_INSNS (9), /* addsub. */
1272 COSTS_N_INSNS (3), /* fpconst. */
1273 COSTS_N_INSNS (3), /* neg. */
1274 COSTS_N_INSNS (6), /* compare. */
1275 COSTS_N_INSNS (4), /* widen. */
1276 COSTS_N_INSNS (4), /* narrow. */
1277 COSTS_N_INSNS (8), /* toint. */
1278 COSTS_N_INSNS (8), /* fromint. */
1279 COSTS_N_INSNS (8) /* roundint. */
1280 },
1281 /* FP DFmode */
1282 {
1283 COSTS_N_INSNS (64), /* div. */
1284 COSTS_N_INSNS (16), /* mult. */
1285 COSTS_N_INSNS (25), /* mult_addsub. */
1286 COSTS_N_INSNS (30), /* fma. */
1287 COSTS_N_INSNS (9), /* addsub. */
1288 COSTS_N_INSNS (3), /* fpconst. */
1289 COSTS_N_INSNS (3), /* neg. */
1290 COSTS_N_INSNS (6), /* compare. */
1291 COSTS_N_INSNS (6), /* widen. */
1292 COSTS_N_INSNS (6), /* narrow. */
1293 COSTS_N_INSNS (8), /* toint. */
1294 COSTS_N_INSNS (8), /* fromint. */
1295 COSTS_N_INSNS (8) /* roundint. */
1296 }
1297 },
1298 /* Vector */
1299 {
1300 COSTS_N_INSNS (1), /* alu. */
1301 COSTS_N_INSNS (4), /* mult. */
1302 COSTS_N_INSNS (1), /* movi. */
1303 COSTS_N_INSNS (2), /* dup. */
1304 COSTS_N_INSNS (2) /* extract. */
1305 }
1306 };
1307
1308 const struct cpu_cost_table cortexa5_extra_costs =
1309 {
1310 /* ALU */
1311 {
1312 0, /* arith. */
1313 0, /* logical. */
1314 COSTS_N_INSNS (1), /* shift. */
1315 COSTS_N_INSNS (1), /* shift_reg. */
1316 COSTS_N_INSNS (1), /* arith_shift. */
1317 COSTS_N_INSNS (1), /* arith_shift_reg. */
1318 COSTS_N_INSNS (1), /* log_shift. */
1319 COSTS_N_INSNS (1), /* log_shift_reg. */
1320 COSTS_N_INSNS (1), /* extend. */
1321 COSTS_N_INSNS (1), /* extend_arith. */
1322 COSTS_N_INSNS (1), /* bfi. */
1323 COSTS_N_INSNS (1), /* bfx. */
1324 COSTS_N_INSNS (1), /* clz. */
1325 COSTS_N_INSNS (1), /* rev. */
1326 0, /* non_exec. */
1327 true /* non_exec_costs_exec. */
1328 },
1329
1330 {
1331 /* MULT SImode */
1332 {
1333 0, /* simple. */
1334 COSTS_N_INSNS (1), /* flag_setting. */
1335 COSTS_N_INSNS (1), /* extend. */
1336 COSTS_N_INSNS (1), /* add. */
1337 COSTS_N_INSNS (1), /* extend_add. */
1338 COSTS_N_INSNS (7) /* idiv. */
1339 },
1340 /* MULT DImode */
1341 {
1342 0, /* simple (N/A). */
1343 0, /* flag_setting (N/A). */
1344 COSTS_N_INSNS (1), /* extend. */
1345 0, /* add. */
1346 COSTS_N_INSNS (2), /* extend_add. */
1347 0 /* idiv (N/A). */
1348 }
1349 },
1350 /* LD/ST */
1351 {
1352 COSTS_N_INSNS (1), /* load. */
1353 COSTS_N_INSNS (1), /* load_sign_extend. */
1354 COSTS_N_INSNS (6), /* ldrd. */
1355 COSTS_N_INSNS (1), /* ldm_1st. */
1356 1, /* ldm_regs_per_insn_1st. */
1357 2, /* ldm_regs_per_insn_subsequent. */
1358 COSTS_N_INSNS (2), /* loadf. */
1359 COSTS_N_INSNS (4), /* loadd. */
1360 COSTS_N_INSNS (1), /* load_unaligned. */
1361 COSTS_N_INSNS (1), /* store. */
1362 COSTS_N_INSNS (3), /* strd. */
1363 COSTS_N_INSNS (1), /* stm_1st. */
1364 1, /* stm_regs_per_insn_1st. */
1365 2, /* stm_regs_per_insn_subsequent. */
1366 COSTS_N_INSNS (2), /* storef. */
1367 COSTS_N_INSNS (2), /* stored. */
1368 COSTS_N_INSNS (1), /* store_unaligned. */
1369 COSTS_N_INSNS (1), /* loadv. */
1370 COSTS_N_INSNS (1) /* storev. */
1371 },
1372 {
1373 /* FP SFmode */
1374 {
1375 COSTS_N_INSNS (15), /* div. */
1376 COSTS_N_INSNS (3), /* mult. */
1377 COSTS_N_INSNS (7), /* mult_addsub. */
1378 COSTS_N_INSNS (7), /* fma. */
1379 COSTS_N_INSNS (3), /* addsub. */
1380 COSTS_N_INSNS (3), /* fpconst. */
1381 COSTS_N_INSNS (3), /* neg. */
1382 COSTS_N_INSNS (3), /* compare. */
1383 COSTS_N_INSNS (3), /* widen. */
1384 COSTS_N_INSNS (3), /* narrow. */
1385 COSTS_N_INSNS (3), /* toint. */
1386 COSTS_N_INSNS (3), /* fromint. */
1387 COSTS_N_INSNS (3) /* roundint. */
1388 },
1389 /* FP DFmode */
1390 {
1391 COSTS_N_INSNS (30), /* div. */
1392 COSTS_N_INSNS (6), /* mult. */
1393 COSTS_N_INSNS (10), /* mult_addsub. */
1394 COSTS_N_INSNS (7), /* fma. */
1395 COSTS_N_INSNS (3), /* addsub. */
1396 COSTS_N_INSNS (3), /* fpconst. */
1397 COSTS_N_INSNS (3), /* neg. */
1398 COSTS_N_INSNS (3), /* compare. */
1399 COSTS_N_INSNS (3), /* widen. */
1400 COSTS_N_INSNS (3), /* narrow. */
1401 COSTS_N_INSNS (3), /* toint. */
1402 COSTS_N_INSNS (3), /* fromint. */
1403 COSTS_N_INSNS (3) /* roundint. */
1404 }
1405 },
1406 /* Vector */
1407 {
1408 COSTS_N_INSNS (1), /* alu. */
1409 COSTS_N_INSNS (4), /* mult. */
1410 COSTS_N_INSNS (1), /* movi. */
1411 COSTS_N_INSNS (2), /* dup. */
1412 COSTS_N_INSNS (2) /* extract. */
1413 }
1414 };
1415
1416
1417 const struct cpu_cost_table cortexa7_extra_costs =
1418 {
1419 /* ALU */
1420 {
1421 0, /* arith. */
1422 0, /* logical. */
1423 COSTS_N_INSNS (1), /* shift. */
1424 COSTS_N_INSNS (1), /* shift_reg. */
1425 COSTS_N_INSNS (1), /* arith_shift. */
1426 COSTS_N_INSNS (1), /* arith_shift_reg. */
1427 COSTS_N_INSNS (1), /* log_shift. */
1428 COSTS_N_INSNS (1), /* log_shift_reg. */
1429 COSTS_N_INSNS (1), /* extend. */
1430 COSTS_N_INSNS (1), /* extend_arith. */
1431 COSTS_N_INSNS (1), /* bfi. */
1432 COSTS_N_INSNS (1), /* bfx. */
1433 COSTS_N_INSNS (1), /* clz. */
1434 COSTS_N_INSNS (1), /* rev. */
1435 0, /* non_exec. */
1436 true /* non_exec_costs_exec. */
1437 },
1438
1439 {
1440 /* MULT SImode */
1441 {
1442 0, /* simple. */
1443 COSTS_N_INSNS (1), /* flag_setting. */
1444 COSTS_N_INSNS (1), /* extend. */
1445 COSTS_N_INSNS (1), /* add. */
1446 COSTS_N_INSNS (1), /* extend_add. */
1447 COSTS_N_INSNS (7) /* idiv. */
1448 },
1449 /* MULT DImode */
1450 {
1451 0, /* simple (N/A). */
1452 0, /* flag_setting (N/A). */
1453 COSTS_N_INSNS (1), /* extend. */
1454 0, /* add. */
1455 COSTS_N_INSNS (2), /* extend_add. */
1456 0 /* idiv (N/A). */
1457 }
1458 },
1459 /* LD/ST */
1460 {
1461 COSTS_N_INSNS (1), /* load. */
1462 COSTS_N_INSNS (1), /* load_sign_extend. */
1463 COSTS_N_INSNS (3), /* ldrd. */
1464 COSTS_N_INSNS (1), /* ldm_1st. */
1465 1, /* ldm_regs_per_insn_1st. */
1466 2, /* ldm_regs_per_insn_subsequent. */
1467 COSTS_N_INSNS (2), /* loadf. */
1468 COSTS_N_INSNS (2), /* loadd. */
1469 COSTS_N_INSNS (1), /* load_unaligned. */
1470 COSTS_N_INSNS (1), /* store. */
1471 COSTS_N_INSNS (3), /* strd. */
1472 COSTS_N_INSNS (1), /* stm_1st. */
1473 1, /* stm_regs_per_insn_1st. */
1474 2, /* stm_regs_per_insn_subsequent. */
1475 COSTS_N_INSNS (2), /* storef. */
1476 COSTS_N_INSNS (2), /* stored. */
1477 COSTS_N_INSNS (1), /* store_unaligned. */
1478 COSTS_N_INSNS (1), /* loadv. */
1479 COSTS_N_INSNS (1) /* storev. */
1480 },
1481 {
1482 /* FP SFmode */
1483 {
1484 COSTS_N_INSNS (15), /* div. */
1485 COSTS_N_INSNS (3), /* mult. */
1486 COSTS_N_INSNS (7), /* mult_addsub. */
1487 COSTS_N_INSNS (7), /* fma. */
1488 COSTS_N_INSNS (3), /* addsub. */
1489 COSTS_N_INSNS (3), /* fpconst. */
1490 COSTS_N_INSNS (3), /* neg. */
1491 COSTS_N_INSNS (3), /* compare. */
1492 COSTS_N_INSNS (3), /* widen. */
1493 COSTS_N_INSNS (3), /* narrow. */
1494 COSTS_N_INSNS (3), /* toint. */
1495 COSTS_N_INSNS (3), /* fromint. */
1496 COSTS_N_INSNS (3) /* roundint. */
1497 },
1498 /* FP DFmode */
1499 {
1500 COSTS_N_INSNS (30), /* div. */
1501 COSTS_N_INSNS (6), /* mult. */
1502 COSTS_N_INSNS (10), /* mult_addsub. */
1503 COSTS_N_INSNS (7), /* fma. */
1504 COSTS_N_INSNS (3), /* addsub. */
1505 COSTS_N_INSNS (3), /* fpconst. */
1506 COSTS_N_INSNS (3), /* neg. */
1507 COSTS_N_INSNS (3), /* compare. */
1508 COSTS_N_INSNS (3), /* widen. */
1509 COSTS_N_INSNS (3), /* narrow. */
1510 COSTS_N_INSNS (3), /* toint. */
1511 COSTS_N_INSNS (3), /* fromint. */
1512 COSTS_N_INSNS (3) /* roundint. */
1513 }
1514 },
1515 /* Vector */
1516 {
1517 COSTS_N_INSNS (1), /* alu. */
1518 COSTS_N_INSNS (4), /* mult. */
1519 COSTS_N_INSNS (1), /* movi. */
1520 COSTS_N_INSNS (2), /* dup. */
1521 COSTS_N_INSNS (2) /* extract. */
1522 }
1523 };
1524
1525 const struct cpu_cost_table cortexa12_extra_costs =
1526 {
1527 /* ALU */
1528 {
1529 0, /* arith. */
1530 0, /* logical. */
1531 0, /* shift. */
1532 COSTS_N_INSNS (1), /* shift_reg. */
1533 COSTS_N_INSNS (1), /* arith_shift. */
1534 COSTS_N_INSNS (1), /* arith_shift_reg. */
1535 COSTS_N_INSNS (1), /* log_shift. */
1536 COSTS_N_INSNS (1), /* log_shift_reg. */
1537 0, /* extend. */
1538 COSTS_N_INSNS (1), /* extend_arith. */
1539 0, /* bfi. */
1540 COSTS_N_INSNS (1), /* bfx. */
1541 COSTS_N_INSNS (1), /* clz. */
1542 COSTS_N_INSNS (1), /* rev. */
1543 0, /* non_exec. */
1544 true /* non_exec_costs_exec. */
1545 },
1546 /* MULT SImode */
1547 {
1548 {
1549 COSTS_N_INSNS (2), /* simple. */
1550 COSTS_N_INSNS (3), /* flag_setting. */
1551 COSTS_N_INSNS (2), /* extend. */
1552 COSTS_N_INSNS (3), /* add. */
1553 COSTS_N_INSNS (2), /* extend_add. */
1554 COSTS_N_INSNS (18) /* idiv. */
1555 },
1556 /* MULT DImode */
1557 {
1558 0, /* simple (N/A). */
1559 0, /* flag_setting (N/A). */
1560 COSTS_N_INSNS (3), /* extend. */
1561 0, /* add (N/A). */
1562 COSTS_N_INSNS (3), /* extend_add. */
1563 0 /* idiv (N/A). */
1564 }
1565 },
1566 /* LD/ST */
1567 {
1568 COSTS_N_INSNS (3), /* load. */
1569 COSTS_N_INSNS (3), /* load_sign_extend. */
1570 COSTS_N_INSNS (3), /* ldrd. */
1571 COSTS_N_INSNS (3), /* ldm_1st. */
1572 1, /* ldm_regs_per_insn_1st. */
1573 2, /* ldm_regs_per_insn_subsequent. */
1574 COSTS_N_INSNS (3), /* loadf. */
1575 COSTS_N_INSNS (3), /* loadd. */
1576 0, /* load_unaligned. */
1577 0, /* store. */
1578 0, /* strd. */
1579 0, /* stm_1st. */
1580 1, /* stm_regs_per_insn_1st. */
1581 2, /* stm_regs_per_insn_subsequent. */
1582 COSTS_N_INSNS (2), /* storef. */
1583 COSTS_N_INSNS (2), /* stored. */
1584 0, /* store_unaligned. */
1585 COSTS_N_INSNS (1), /* loadv. */
1586 COSTS_N_INSNS (1) /* storev. */
1587 },
1588 {
1589 /* FP SFmode */
1590 {
1591 COSTS_N_INSNS (17), /* div. */
1592 COSTS_N_INSNS (4), /* mult. */
1593 COSTS_N_INSNS (8), /* mult_addsub. */
1594 COSTS_N_INSNS (8), /* fma. */
1595 COSTS_N_INSNS (4), /* addsub. */
1596 COSTS_N_INSNS (2), /* fpconst. */
1597 COSTS_N_INSNS (2), /* neg. */
1598 COSTS_N_INSNS (2), /* compare. */
1599 COSTS_N_INSNS (4), /* widen. */
1600 COSTS_N_INSNS (4), /* narrow. */
1601 COSTS_N_INSNS (4), /* toint. */
1602 COSTS_N_INSNS (4), /* fromint. */
1603 COSTS_N_INSNS (4) /* roundint. */
1604 },
1605 /* FP DFmode */
1606 {
1607 COSTS_N_INSNS (31), /* div. */
1608 COSTS_N_INSNS (4), /* mult. */
1609 COSTS_N_INSNS (8), /* mult_addsub. */
1610 COSTS_N_INSNS (8), /* fma. */
1611 COSTS_N_INSNS (4), /* addsub. */
1612 COSTS_N_INSNS (2), /* fpconst. */
1613 COSTS_N_INSNS (2), /* neg. */
1614 COSTS_N_INSNS (2), /* compare. */
1615 COSTS_N_INSNS (4), /* widen. */
1616 COSTS_N_INSNS (4), /* narrow. */
1617 COSTS_N_INSNS (4), /* toint. */
1618 COSTS_N_INSNS (4), /* fromint. */
1619 COSTS_N_INSNS (4) /* roundint. */
1620 }
1621 },
1622 /* Vector */
1623 {
1624 COSTS_N_INSNS (1), /* alu. */
1625 COSTS_N_INSNS (4), /* mult. */
1626 COSTS_N_INSNS (1), /* movi. */
1627 COSTS_N_INSNS (2), /* dup. */
1628 COSTS_N_INSNS (2) /* extract. */
1629 }
1630 };
1631
1632 const struct cpu_cost_table cortexa15_extra_costs =
1633 {
1634 /* ALU */
1635 {
1636 0, /* arith. */
1637 0, /* logical. */
1638 0, /* shift. */
1639 0, /* shift_reg. */
1640 COSTS_N_INSNS (1), /* arith_shift. */
1641 COSTS_N_INSNS (1), /* arith_shift_reg. */
1642 COSTS_N_INSNS (1), /* log_shift. */
1643 COSTS_N_INSNS (1), /* log_shift_reg. */
1644 0, /* extend. */
1645 COSTS_N_INSNS (1), /* extend_arith. */
1646 COSTS_N_INSNS (1), /* bfi. */
1647 0, /* bfx. */
1648 0, /* clz. */
1649 0, /* rev. */
1650 0, /* non_exec. */
1651 true /* non_exec_costs_exec. */
1652 },
1653 /* MULT SImode */
1654 {
1655 {
1656 COSTS_N_INSNS (2), /* simple. */
1657 COSTS_N_INSNS (3), /* flag_setting. */
1658 COSTS_N_INSNS (2), /* extend. */
1659 COSTS_N_INSNS (2), /* add. */
1660 COSTS_N_INSNS (2), /* extend_add. */
1661 COSTS_N_INSNS (18) /* idiv. */
1662 },
1663 /* MULT DImode */
1664 {
1665 0, /* simple (N/A). */
1666 0, /* flag_setting (N/A). */
1667 COSTS_N_INSNS (3), /* extend. */
1668 0, /* add (N/A). */
1669 COSTS_N_INSNS (3), /* extend_add. */
1670 0 /* idiv (N/A). */
1671 }
1672 },
1673 /* LD/ST */
1674 {
1675 COSTS_N_INSNS (3), /* load. */
1676 COSTS_N_INSNS (3), /* load_sign_extend. */
1677 COSTS_N_INSNS (3), /* ldrd. */
1678 COSTS_N_INSNS (4), /* ldm_1st. */
1679 1, /* ldm_regs_per_insn_1st. */
1680 2, /* ldm_regs_per_insn_subsequent. */
1681 COSTS_N_INSNS (4), /* loadf. */
1682 COSTS_N_INSNS (4), /* loadd. */
1683 0, /* load_unaligned. */
1684 0, /* store. */
1685 0, /* strd. */
1686 COSTS_N_INSNS (1), /* stm_1st. */
1687 1, /* stm_regs_per_insn_1st. */
1688 2, /* stm_regs_per_insn_subsequent. */
1689 0, /* storef. */
1690 0, /* stored. */
1691 0, /* store_unaligned. */
1692 COSTS_N_INSNS (1), /* loadv. */
1693 COSTS_N_INSNS (1) /* storev. */
1694 },
1695 {
1696 /* FP SFmode */
1697 {
1698 COSTS_N_INSNS (17), /* div. */
1699 COSTS_N_INSNS (4), /* mult. */
1700 COSTS_N_INSNS (8), /* mult_addsub. */
1701 COSTS_N_INSNS (8), /* fma. */
1702 COSTS_N_INSNS (4), /* addsub. */
1703 COSTS_N_INSNS (2), /* fpconst. */
1704 COSTS_N_INSNS (2), /* neg. */
1705 COSTS_N_INSNS (5), /* compare. */
1706 COSTS_N_INSNS (4), /* widen. */
1707 COSTS_N_INSNS (4), /* narrow. */
1708 COSTS_N_INSNS (4), /* toint. */
1709 COSTS_N_INSNS (4), /* fromint. */
1710 COSTS_N_INSNS (4) /* roundint. */
1711 },
1712 /* FP DFmode */
1713 {
1714 COSTS_N_INSNS (31), /* div. */
1715 COSTS_N_INSNS (4), /* mult. */
1716 COSTS_N_INSNS (8), /* mult_addsub. */
1717 COSTS_N_INSNS (8), /* fma. */
1718 COSTS_N_INSNS (4), /* addsub. */
1719 COSTS_N_INSNS (2), /* fpconst. */
1720 COSTS_N_INSNS (2), /* neg. */
1721 COSTS_N_INSNS (2), /* compare. */
1722 COSTS_N_INSNS (4), /* widen. */
1723 COSTS_N_INSNS (4), /* narrow. */
1724 COSTS_N_INSNS (4), /* toint. */
1725 COSTS_N_INSNS (4), /* fromint. */
1726 COSTS_N_INSNS (4) /* roundint. */
1727 }
1728 },
1729 /* Vector */
1730 {
1731 COSTS_N_INSNS (1), /* alu. */
1732 COSTS_N_INSNS (4), /* mult. */
1733 COSTS_N_INSNS (1), /* movi. */
1734 COSTS_N_INSNS (2), /* dup. */
1735 COSTS_N_INSNS (2) /* extract. */
1736 }
1737 };
1738
1739 const struct cpu_cost_table v7m_extra_costs =
1740 {
1741 /* ALU */
1742 {
1743 0, /* arith. */
1744 0, /* logical. */
1745 0, /* shift. */
1746 0, /* shift_reg. */
1747 0, /* arith_shift. */
1748 COSTS_N_INSNS (1), /* arith_shift_reg. */
1749 0, /* log_shift. */
1750 COSTS_N_INSNS (1), /* log_shift_reg. */
1751 0, /* extend. */
1752 COSTS_N_INSNS (1), /* extend_arith. */
1753 0, /* bfi. */
1754 0, /* bfx. */
1755 0, /* clz. */
1756 0, /* rev. */
1757 COSTS_N_INSNS (1), /* non_exec. */
1758 false /* non_exec_costs_exec. */
1759 },
1760 {
1761 /* MULT SImode */
1762 {
1763 COSTS_N_INSNS (1), /* simple. */
1764 COSTS_N_INSNS (1), /* flag_setting. */
1765 COSTS_N_INSNS (2), /* extend. */
1766 COSTS_N_INSNS (1), /* add. */
1767 COSTS_N_INSNS (3), /* extend_add. */
1768 COSTS_N_INSNS (8) /* idiv. */
1769 },
1770 /* MULT DImode */
1771 {
1772 0, /* simple (N/A). */
1773 0, /* flag_setting (N/A). */
1774 COSTS_N_INSNS (2), /* extend. */
1775 0, /* add (N/A). */
1776 COSTS_N_INSNS (3), /* extend_add. */
1777 0 /* idiv (N/A). */
1778 }
1779 },
1780 /* LD/ST */
1781 {
1782 COSTS_N_INSNS (2), /* load. */
1783 0, /* load_sign_extend. */
1784 COSTS_N_INSNS (3), /* ldrd. */
1785 COSTS_N_INSNS (2), /* ldm_1st. */
1786 1, /* ldm_regs_per_insn_1st. */
1787 1, /* ldm_regs_per_insn_subsequent. */
1788 COSTS_N_INSNS (2), /* loadf. */
1789 COSTS_N_INSNS (3), /* loadd. */
1790 COSTS_N_INSNS (1), /* load_unaligned. */
1791 COSTS_N_INSNS (2), /* store. */
1792 COSTS_N_INSNS (3), /* strd. */
1793 COSTS_N_INSNS (2), /* stm_1st. */
1794 1, /* stm_regs_per_insn_1st. */
1795 1, /* stm_regs_per_insn_subsequent. */
1796 COSTS_N_INSNS (2), /* storef. */
1797 COSTS_N_INSNS (3), /* stored. */
1798 COSTS_N_INSNS (1), /* store_unaligned. */
1799 COSTS_N_INSNS (1), /* loadv. */
1800 COSTS_N_INSNS (1) /* storev. */
1801 },
1802 {
1803 /* FP SFmode */
1804 {
1805 COSTS_N_INSNS (7), /* div. */
1806 COSTS_N_INSNS (2), /* mult. */
1807 COSTS_N_INSNS (5), /* mult_addsub. */
1808 COSTS_N_INSNS (3), /* fma. */
1809 COSTS_N_INSNS (1), /* addsub. */
1810 0, /* fpconst. */
1811 0, /* neg. */
1812 0, /* compare. */
1813 0, /* widen. */
1814 0, /* narrow. */
1815 0, /* toint. */
1816 0, /* fromint. */
1817 0 /* roundint. */
1818 },
1819 /* FP DFmode */
1820 {
1821 COSTS_N_INSNS (15), /* div. */
1822 COSTS_N_INSNS (5), /* mult. */
1823 COSTS_N_INSNS (7), /* mult_addsub. */
1824 COSTS_N_INSNS (7), /* fma. */
1825 COSTS_N_INSNS (3), /* addsub. */
1826 0, /* fpconst. */
1827 0, /* neg. */
1828 0, /* compare. */
1829 0, /* widen. */
1830 0, /* narrow. */
1831 0, /* toint. */
1832 0, /* fromint. */
1833 0 /* roundint. */
1834 }
1835 },
1836 /* Vector */
1837 {
1838 COSTS_N_INSNS (1), /* alu. */
1839 COSTS_N_INSNS (4), /* mult. */
1840 COSTS_N_INSNS (1), /* movi. */
1841 COSTS_N_INSNS (2), /* dup. */
1842 COSTS_N_INSNS (2) /* extract. */
1843 }
1844 };
1845
1846 const struct addr_mode_cost_table generic_addr_mode_costs =
1847 {
1848 /* int. */
1849 {
1850 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1851 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1852 COSTS_N_INSNS (0) /* AMO_WB. */
1853 },
1854 /* float. */
1855 {
1856 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1857 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1858 COSTS_N_INSNS (0) /* AMO_WB. */
1859 },
1860 /* vector. */
1861 {
1862 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1863 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1864 COSTS_N_INSNS (0) /* AMO_WB. */
1865 }
1866 };
1867
1868 const struct tune_params arm_slowmul_tune =
1869 {
1870 &generic_extra_costs, /* Insn extra costs. */
1871 &generic_addr_mode_costs, /* Addressing mode costs. */
1872 NULL, /* Sched adj cost. */
1873 arm_default_branch_cost,
1874 &arm_default_vec_cost,
1875 3, /* Constant limit. */
1876 5, /* Max cond insns. */
1877 8, /* Memset max inline. */
1878 1, /* Issue rate. */
1879 ARM_PREFETCH_NOT_BENEFICIAL,
1880 tune_params::PREF_CONST_POOL_TRUE,
1881 tune_params::PREF_LDRD_FALSE,
1882 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1883 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1884 tune_params::DISPARAGE_FLAGS_NEITHER,
1885 tune_params::PREF_NEON_STRINGOPS_FALSE,
1886 tune_params::FUSE_NOTHING,
1887 tune_params::SCHED_AUTOPREF_OFF
1888 };
1889
1890 const struct tune_params arm_fastmul_tune =
1891 {
1892 &generic_extra_costs, /* Insn extra costs. */
1893 &generic_addr_mode_costs, /* Addressing mode costs. */
1894 NULL, /* Sched adj cost. */
1895 arm_default_branch_cost,
1896 &arm_default_vec_cost,
1897 1, /* Constant limit. */
1898 5, /* Max cond insns. */
1899 8, /* Memset max inline. */
1900 1, /* Issue rate. */
1901 ARM_PREFETCH_NOT_BENEFICIAL,
1902 tune_params::PREF_CONST_POOL_TRUE,
1903 tune_params::PREF_LDRD_FALSE,
1904 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1905 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1906 tune_params::DISPARAGE_FLAGS_NEITHER,
1907 tune_params::PREF_NEON_STRINGOPS_FALSE,
1908 tune_params::FUSE_NOTHING,
1909 tune_params::SCHED_AUTOPREF_OFF
1910 };
1911
1912 /* StrongARM has early execution of branches, so a sequence that is worth
1913 skipping is shorter. Set max_insns_skipped to a lower value. */
1914
1915 const struct tune_params arm_strongarm_tune =
1916 {
1917 &generic_extra_costs, /* Insn extra costs. */
1918 &generic_addr_mode_costs, /* Addressing mode costs. */
1919 NULL, /* Sched adj cost. */
1920 arm_default_branch_cost,
1921 &arm_default_vec_cost,
1922 1, /* Constant limit. */
1923 3, /* Max cond insns. */
1924 8, /* Memset max inline. */
1925 1, /* Issue rate. */
1926 ARM_PREFETCH_NOT_BENEFICIAL,
1927 tune_params::PREF_CONST_POOL_TRUE,
1928 tune_params::PREF_LDRD_FALSE,
1929 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1930 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1931 tune_params::DISPARAGE_FLAGS_NEITHER,
1932 tune_params::PREF_NEON_STRINGOPS_FALSE,
1933 tune_params::FUSE_NOTHING,
1934 tune_params::SCHED_AUTOPREF_OFF
1935 };
1936
1937 const struct tune_params arm_xscale_tune =
1938 {
1939 &generic_extra_costs, /* Insn extra costs. */
1940 &generic_addr_mode_costs, /* Addressing mode costs. */
1941 xscale_sched_adjust_cost,
1942 arm_default_branch_cost,
1943 &arm_default_vec_cost,
1944 2, /* Constant limit. */
1945 3, /* Max cond insns. */
1946 8, /* Memset max inline. */
1947 1, /* Issue rate. */
1948 ARM_PREFETCH_NOT_BENEFICIAL,
1949 tune_params::PREF_CONST_POOL_TRUE,
1950 tune_params::PREF_LDRD_FALSE,
1951 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1952 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1953 tune_params::DISPARAGE_FLAGS_NEITHER,
1954 tune_params::PREF_NEON_STRINGOPS_FALSE,
1955 tune_params::FUSE_NOTHING,
1956 tune_params::SCHED_AUTOPREF_OFF
1957 };
1958
1959 const struct tune_params arm_9e_tune =
1960 {
1961 &generic_extra_costs, /* Insn extra costs. */
1962 &generic_addr_mode_costs, /* Addressing mode costs. */
1963 NULL, /* Sched adj cost. */
1964 arm_default_branch_cost,
1965 &arm_default_vec_cost,
1966 1, /* Constant limit. */
1967 5, /* Max cond insns. */
1968 8, /* Memset max inline. */
1969 1, /* Issue rate. */
1970 ARM_PREFETCH_NOT_BENEFICIAL,
1971 tune_params::PREF_CONST_POOL_TRUE,
1972 tune_params::PREF_LDRD_FALSE,
1973 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1974 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1975 tune_params::DISPARAGE_FLAGS_NEITHER,
1976 tune_params::PREF_NEON_STRINGOPS_FALSE,
1977 tune_params::FUSE_NOTHING,
1978 tune_params::SCHED_AUTOPREF_OFF
1979 };
1980
1981 const struct tune_params arm_marvell_pj4_tune =
1982 {
1983 &generic_extra_costs, /* Insn extra costs. */
1984 &generic_addr_mode_costs, /* Addressing mode costs. */
1985 NULL, /* Sched adj cost. */
1986 arm_default_branch_cost,
1987 &arm_default_vec_cost,
1988 1, /* Constant limit. */
1989 5, /* Max cond insns. */
1990 8, /* Memset max inline. */
1991 2, /* Issue rate. */
1992 ARM_PREFETCH_NOT_BENEFICIAL,
1993 tune_params::PREF_CONST_POOL_TRUE,
1994 tune_params::PREF_LDRD_FALSE,
1995 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1996 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1997 tune_params::DISPARAGE_FLAGS_NEITHER,
1998 tune_params::PREF_NEON_STRINGOPS_FALSE,
1999 tune_params::FUSE_NOTHING,
2000 tune_params::SCHED_AUTOPREF_OFF
2001 };
2002
2003 const struct tune_params arm_v6t2_tune =
2004 {
2005 &generic_extra_costs, /* Insn extra costs. */
2006 &generic_addr_mode_costs, /* Addressing mode costs. */
2007 NULL, /* Sched adj cost. */
2008 arm_default_branch_cost,
2009 &arm_default_vec_cost,
2010 1, /* Constant limit. */
2011 5, /* Max cond insns. */
2012 8, /* Memset max inline. */
2013 1, /* Issue rate. */
2014 ARM_PREFETCH_NOT_BENEFICIAL,
2015 tune_params::PREF_CONST_POOL_FALSE,
2016 tune_params::PREF_LDRD_FALSE,
2017 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2019 tune_params::DISPARAGE_FLAGS_NEITHER,
2020 tune_params::PREF_NEON_STRINGOPS_FALSE,
2021 tune_params::FUSE_NOTHING,
2022 tune_params::SCHED_AUTOPREF_OFF
2023 };
2024
2025
2026 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2027 const struct tune_params arm_cortex_tune =
2028 {
2029 &generic_extra_costs,
2030 &generic_addr_mode_costs, /* Addressing mode costs. */
2031 NULL, /* Sched adj cost. */
2032 arm_default_branch_cost,
2033 &arm_default_vec_cost,
2034 1, /* Constant limit. */
2035 5, /* Max cond insns. */
2036 8, /* Memset max inline. */
2037 2, /* Issue rate. */
2038 ARM_PREFETCH_NOT_BENEFICIAL,
2039 tune_params::PREF_CONST_POOL_FALSE,
2040 tune_params::PREF_LDRD_FALSE,
2041 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2042 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2043 tune_params::DISPARAGE_FLAGS_NEITHER,
2044 tune_params::PREF_NEON_STRINGOPS_FALSE,
2045 tune_params::FUSE_NOTHING,
2046 tune_params::SCHED_AUTOPREF_OFF
2047 };
2048
2049 const struct tune_params arm_cortex_a8_tune =
2050 {
2051 &cortexa8_extra_costs,
2052 &generic_addr_mode_costs, /* Addressing mode costs. */
2053 NULL, /* Sched adj cost. */
2054 arm_default_branch_cost,
2055 &arm_default_vec_cost,
2056 1, /* Constant limit. */
2057 5, /* Max cond insns. */
2058 8, /* Memset max inline. */
2059 2, /* Issue rate. */
2060 ARM_PREFETCH_NOT_BENEFICIAL,
2061 tune_params::PREF_CONST_POOL_FALSE,
2062 tune_params::PREF_LDRD_FALSE,
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2064 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2065 tune_params::DISPARAGE_FLAGS_NEITHER,
2066 tune_params::PREF_NEON_STRINGOPS_TRUE,
2067 tune_params::FUSE_NOTHING,
2068 tune_params::SCHED_AUTOPREF_OFF
2069 };
2070
2071 const struct tune_params arm_cortex_a7_tune =
2072 {
2073 &cortexa7_extra_costs,
2074 &generic_addr_mode_costs, /* Addressing mode costs. */
2075 NULL, /* Sched adj cost. */
2076 arm_default_branch_cost,
2077 &arm_default_vec_cost,
2078 1, /* Constant limit. */
2079 5, /* Max cond insns. */
2080 8, /* Memset max inline. */
2081 2, /* Issue rate. */
2082 ARM_PREFETCH_NOT_BENEFICIAL,
2083 tune_params::PREF_CONST_POOL_FALSE,
2084 tune_params::PREF_LDRD_FALSE,
2085 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2086 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2087 tune_params::DISPARAGE_FLAGS_NEITHER,
2088 tune_params::PREF_NEON_STRINGOPS_TRUE,
2089 tune_params::FUSE_NOTHING,
2090 tune_params::SCHED_AUTOPREF_OFF
2091 };
2092
2093 const struct tune_params arm_cortex_a15_tune =
2094 {
2095 &cortexa15_extra_costs,
2096 &generic_addr_mode_costs, /* Addressing mode costs. */
2097 NULL, /* Sched adj cost. */
2098 arm_default_branch_cost,
2099 &arm_default_vec_cost,
2100 1, /* Constant limit. */
2101 2, /* Max cond insns. */
2102 8, /* Memset max inline. */
2103 3, /* Issue rate. */
2104 ARM_PREFETCH_NOT_BENEFICIAL,
2105 tune_params::PREF_CONST_POOL_FALSE,
2106 tune_params::PREF_LDRD_TRUE,
2107 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2108 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2109 tune_params::DISPARAGE_FLAGS_ALL,
2110 tune_params::PREF_NEON_STRINGOPS_TRUE,
2111 tune_params::FUSE_NOTHING,
2112 tune_params::SCHED_AUTOPREF_FULL
2113 };
2114
2115 const struct tune_params arm_cortex_a35_tune =
2116 {
2117 &cortexa53_extra_costs,
2118 &generic_addr_mode_costs, /* Addressing mode costs. */
2119 NULL, /* Sched adj cost. */
2120 arm_default_branch_cost,
2121 &arm_default_vec_cost,
2122 1, /* Constant limit. */
2123 5, /* Max cond insns. */
2124 8, /* Memset max inline. */
2125 1, /* Issue rate. */
2126 ARM_PREFETCH_NOT_BENEFICIAL,
2127 tune_params::PREF_CONST_POOL_FALSE,
2128 tune_params::PREF_LDRD_FALSE,
2129 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2130 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2131 tune_params::DISPARAGE_FLAGS_NEITHER,
2132 tune_params::PREF_NEON_STRINGOPS_TRUE,
2133 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2134 tune_params::SCHED_AUTOPREF_OFF
2135 };
2136
2137 const struct tune_params arm_cortex_a53_tune =
2138 {
2139 &cortexa53_extra_costs,
2140 &generic_addr_mode_costs, /* Addressing mode costs. */
2141 NULL, /* Sched adj cost. */
2142 arm_default_branch_cost,
2143 &arm_default_vec_cost,
2144 1, /* Constant limit. */
2145 5, /* Max cond insns. */
2146 8, /* Memset max inline. */
2147 2, /* Issue rate. */
2148 ARM_PREFETCH_NOT_BENEFICIAL,
2149 tune_params::PREF_CONST_POOL_FALSE,
2150 tune_params::PREF_LDRD_FALSE,
2151 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2152 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2153 tune_params::DISPARAGE_FLAGS_NEITHER,
2154 tune_params::PREF_NEON_STRINGOPS_TRUE,
2155 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2156 tune_params::SCHED_AUTOPREF_OFF
2157 };
2158
2159 const struct tune_params arm_cortex_a57_tune =
2160 {
2161 &cortexa57_extra_costs,
2162 &generic_addr_mode_costs, /* addressing mode costs */
2163 NULL, /* Sched adj cost. */
2164 arm_default_branch_cost,
2165 &arm_default_vec_cost,
2166 1, /* Constant limit. */
2167 2, /* Max cond insns. */
2168 8, /* Memset max inline. */
2169 3, /* Issue rate. */
2170 ARM_PREFETCH_NOT_BENEFICIAL,
2171 tune_params::PREF_CONST_POOL_FALSE,
2172 tune_params::PREF_LDRD_TRUE,
2173 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2174 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2175 tune_params::DISPARAGE_FLAGS_ALL,
2176 tune_params::PREF_NEON_STRINGOPS_TRUE,
2177 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2178 tune_params::SCHED_AUTOPREF_FULL
2179 };
2180
2181 const struct tune_params arm_exynosm1_tune =
2182 {
2183 &exynosm1_extra_costs,
2184 &generic_addr_mode_costs, /* Addressing mode costs. */
2185 NULL, /* Sched adj cost. */
2186 arm_default_branch_cost,
2187 &arm_default_vec_cost,
2188 1, /* Constant limit. */
2189 2, /* Max cond insns. */
2190 8, /* Memset max inline. */
2191 3, /* Issue rate. */
2192 ARM_PREFETCH_NOT_BENEFICIAL,
2193 tune_params::PREF_CONST_POOL_FALSE,
2194 tune_params::PREF_LDRD_TRUE,
2195 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2196 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2197 tune_params::DISPARAGE_FLAGS_ALL,
2198 tune_params::PREF_NEON_STRINGOPS_TRUE,
2199 tune_params::FUSE_NOTHING,
2200 tune_params::SCHED_AUTOPREF_OFF
2201 };
2202
2203 const struct tune_params arm_xgene1_tune =
2204 {
2205 &xgene1_extra_costs,
2206 &generic_addr_mode_costs, /* Addressing mode costs. */
2207 NULL, /* Sched adj cost. */
2208 arm_default_branch_cost,
2209 &arm_default_vec_cost,
2210 1, /* Constant limit. */
2211 2, /* Max cond insns. */
2212 32, /* Memset max inline. */
2213 4, /* Issue rate. */
2214 ARM_PREFETCH_NOT_BENEFICIAL,
2215 tune_params::PREF_CONST_POOL_FALSE,
2216 tune_params::PREF_LDRD_TRUE,
2217 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2218 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2219 tune_params::DISPARAGE_FLAGS_ALL,
2220 tune_params::PREF_NEON_STRINGOPS_FALSE,
2221 tune_params::FUSE_NOTHING,
2222 tune_params::SCHED_AUTOPREF_OFF
2223 };
2224
2225 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2226 less appealing. Set max_insns_skipped to a low value. */
2227
2228 const struct tune_params arm_cortex_a5_tune =
2229 {
2230 &cortexa5_extra_costs,
2231 &generic_addr_mode_costs, /* Addressing mode costs. */
2232 NULL, /* Sched adj cost. */
2233 arm_cortex_a5_branch_cost,
2234 &arm_default_vec_cost,
2235 1, /* Constant limit. */
2236 1, /* Max cond insns. */
2237 8, /* Memset max inline. */
2238 2, /* Issue rate. */
2239 ARM_PREFETCH_NOT_BENEFICIAL,
2240 tune_params::PREF_CONST_POOL_FALSE,
2241 tune_params::PREF_LDRD_FALSE,
2242 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2243 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2244 tune_params::DISPARAGE_FLAGS_NEITHER,
2245 tune_params::PREF_NEON_STRINGOPS_TRUE,
2246 tune_params::FUSE_NOTHING,
2247 tune_params::SCHED_AUTOPREF_OFF
2248 };
2249
2250 const struct tune_params arm_cortex_a9_tune =
2251 {
2252 &cortexa9_extra_costs,
2253 &generic_addr_mode_costs, /* Addressing mode costs. */
2254 cortex_a9_sched_adjust_cost,
2255 arm_default_branch_cost,
2256 &arm_default_vec_cost,
2257 1, /* Constant limit. */
2258 5, /* Max cond insns. */
2259 8, /* Memset max inline. */
2260 2, /* Issue rate. */
2261 ARM_PREFETCH_BENEFICIAL(4,32,32),
2262 tune_params::PREF_CONST_POOL_FALSE,
2263 tune_params::PREF_LDRD_FALSE,
2264 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2265 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2266 tune_params::DISPARAGE_FLAGS_NEITHER,
2267 tune_params::PREF_NEON_STRINGOPS_FALSE,
2268 tune_params::FUSE_NOTHING,
2269 tune_params::SCHED_AUTOPREF_OFF
2270 };
2271
2272 const struct tune_params arm_cortex_a12_tune =
2273 {
2274 &cortexa12_extra_costs,
2275 &generic_addr_mode_costs, /* Addressing mode costs. */
2276 NULL, /* Sched adj cost. */
2277 arm_default_branch_cost,
2278 &arm_default_vec_cost, /* Vectorizer costs. */
2279 1, /* Constant limit. */
2280 2, /* Max cond insns. */
2281 8, /* Memset max inline. */
2282 2, /* Issue rate. */
2283 ARM_PREFETCH_NOT_BENEFICIAL,
2284 tune_params::PREF_CONST_POOL_FALSE,
2285 tune_params::PREF_LDRD_TRUE,
2286 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2287 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2288 tune_params::DISPARAGE_FLAGS_ALL,
2289 tune_params::PREF_NEON_STRINGOPS_TRUE,
2290 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2291 tune_params::SCHED_AUTOPREF_OFF
2292 };
2293
2294 const struct tune_params arm_cortex_a73_tune =
2295 {
2296 &cortexa57_extra_costs,
2297 &generic_addr_mode_costs, /* Addressing mode costs. */
2298 NULL, /* Sched adj cost. */
2299 arm_default_branch_cost,
2300 &arm_default_vec_cost, /* Vectorizer costs. */
2301 1, /* Constant limit. */
2302 2, /* Max cond insns. */
2303 8, /* Memset max inline. */
2304 2, /* Issue rate. */
2305 ARM_PREFETCH_NOT_BENEFICIAL,
2306 tune_params::PREF_CONST_POOL_FALSE,
2307 tune_params::PREF_LDRD_TRUE,
2308 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2309 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2310 tune_params::DISPARAGE_FLAGS_ALL,
2311 tune_params::PREF_NEON_STRINGOPS_TRUE,
2312 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2313 tune_params::SCHED_AUTOPREF_FULL
2314 };
2315
2316 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2317 cycle to execute each. An LDR from the constant pool also takes two cycles
2318 to execute, but mildly increases pipelining opportunity (consecutive
2319 loads/stores can be pipelined together, saving one cycle), and may also
2320 improve icache utilisation. Hence we prefer the constant pool for such
2321 processors. */
2322
2323 const struct tune_params arm_v7m_tune =
2324 {
2325 &v7m_extra_costs,
2326 &generic_addr_mode_costs, /* Addressing mode costs. */
2327 NULL, /* Sched adj cost. */
2328 arm_cortex_m_branch_cost,
2329 &arm_default_vec_cost,
2330 1, /* Constant limit. */
2331 2, /* Max cond insns. */
2332 8, /* Memset max inline. */
2333 1, /* Issue rate. */
2334 ARM_PREFETCH_NOT_BENEFICIAL,
2335 tune_params::PREF_CONST_POOL_TRUE,
2336 tune_params::PREF_LDRD_FALSE,
2337 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2338 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2339 tune_params::DISPARAGE_FLAGS_NEITHER,
2340 tune_params::PREF_NEON_STRINGOPS_FALSE,
2341 tune_params::FUSE_NOTHING,
2342 tune_params::SCHED_AUTOPREF_OFF
2343 };
2344
2345 /* Cortex-M7 tuning. */
2346
2347 const struct tune_params arm_cortex_m7_tune =
2348 {
2349 &v7m_extra_costs,
2350 &generic_addr_mode_costs, /* Addressing mode costs. */
2351 NULL, /* Sched adj cost. */
2352 arm_cortex_m7_branch_cost,
2353 &arm_default_vec_cost,
2354 0, /* Constant limit. */
2355 1, /* Max cond insns. */
2356 8, /* Memset max inline. */
2357 2, /* Issue rate. */
2358 ARM_PREFETCH_NOT_BENEFICIAL,
2359 tune_params::PREF_CONST_POOL_TRUE,
2360 tune_params::PREF_LDRD_FALSE,
2361 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2362 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2363 tune_params::DISPARAGE_FLAGS_NEITHER,
2364 tune_params::PREF_NEON_STRINGOPS_FALSE,
2365 tune_params::FUSE_NOTHING,
2366 tune_params::SCHED_AUTOPREF_OFF
2367 };
2368
2369 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2370 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2371 cortex-m23. */
2372 const struct tune_params arm_v6m_tune =
2373 {
2374 &generic_extra_costs, /* Insn extra costs. */
2375 &generic_addr_mode_costs, /* Addressing mode costs. */
2376 NULL, /* Sched adj cost. */
2377 arm_default_branch_cost,
2378 &arm_default_vec_cost, /* Vectorizer costs. */
2379 1, /* Constant limit. */
2380 5, /* Max cond insns. */
2381 8, /* Memset max inline. */
2382 1, /* Issue rate. */
2383 ARM_PREFETCH_NOT_BENEFICIAL,
2384 tune_params::PREF_CONST_POOL_FALSE,
2385 tune_params::PREF_LDRD_FALSE,
2386 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2387 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2388 tune_params::DISPARAGE_FLAGS_NEITHER,
2389 tune_params::PREF_NEON_STRINGOPS_FALSE,
2390 tune_params::FUSE_NOTHING,
2391 tune_params::SCHED_AUTOPREF_OFF
2392 };
2393
2394 const struct tune_params arm_fa726te_tune =
2395 {
2396 &generic_extra_costs, /* Insn extra costs. */
2397 &generic_addr_mode_costs, /* Addressing mode costs. */
2398 fa726te_sched_adjust_cost,
2399 arm_default_branch_cost,
2400 &arm_default_vec_cost,
2401 1, /* Constant limit. */
2402 5, /* Max cond insns. */
2403 8, /* Memset max inline. */
2404 2, /* Issue rate. */
2405 ARM_PREFETCH_NOT_BENEFICIAL,
2406 tune_params::PREF_CONST_POOL_TRUE,
2407 tune_params::PREF_LDRD_FALSE,
2408 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2409 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2410 tune_params::DISPARAGE_FLAGS_NEITHER,
2411 tune_params::PREF_NEON_STRINGOPS_FALSE,
2412 tune_params::FUSE_NOTHING,
2413 tune_params::SCHED_AUTOPREF_OFF
2414 };
2415
2416 /* Auto-generated CPU, FPU and architecture tables. */
2417 #include "arm-cpu-data.h"
2418
2419 /* The name of the preprocessor macro to define for this architecture. PROFILE
2420 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2421 is thus chosen to be big enough to hold the longest architecture name. */
2422
2423 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2424
2425 /* Supported TLS relocations. */
2426
2427 enum tls_reloc {
2428 TLS_GD32,
2429 TLS_GD32_FDPIC,
2430 TLS_LDM32,
2431 TLS_LDM32_FDPIC,
2432 TLS_LDO32,
2433 TLS_IE32,
2434 TLS_IE32_FDPIC,
2435 TLS_LE32,
2436 TLS_DESCSEQ /* GNU scheme */
2437 };
2438
2439 /* The maximum number of insns to be used when loading a constant. */
2440 inline static int
2441 arm_constant_limit (bool size_p)
2442 {
2443 return size_p ? 1 : current_tune->constant_limit;
2444 }
2445
2446 /* Emit an insn that's a simple single-set. Both the operands must be known
2447 to be valid. */
2448 inline static rtx_insn *
2449 emit_set_insn (rtx x, rtx y)
2450 {
2451 return emit_insn (gen_rtx_SET (x, y));
2452 }
2453
2454 /* Return the number of bits set in VALUE. */
2455 static unsigned
2456 bit_count (unsigned long value)
2457 {
2458 unsigned long count = 0;
2459
2460 while (value)
2461 {
2462 count++;
2463 value &= value - 1; /* Clear the least-significant set bit. */
2464 }
2465
2466 return count;
2467 }
2468
2469 /* Return the number of bits set in BMAP. */
2470 static unsigned
2471 bitmap_popcount (const sbitmap bmap)
2472 {
2473 unsigned int count = 0;
2474 unsigned int n = 0;
2475 sbitmap_iterator sbi;
2476
2477 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2478 count++;
2479 return count;
2480 }
2481
2482 typedef struct
2483 {
2484 machine_mode mode;
2485 const char *name;
2486 } arm_fixed_mode_set;
2487
2488 /* A small helper for setting fixed-point library libfuncs. */
2489
2490 static void
2491 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2492 const char *funcname, const char *modename,
2493 int num_suffix)
2494 {
2495 char buffer[50];
2496
2497 if (num_suffix == 0)
2498 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2499 else
2500 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2501
2502 set_optab_libfunc (optable, mode, buffer);
2503 }
2504
2505 static void
2506 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2507 machine_mode from, const char *funcname,
2508 const char *toname, const char *fromname)
2509 {
2510 char buffer[50];
2511 const char *maybe_suffix_2 = "";
2512
2513 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2514 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2515 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2516 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2517 maybe_suffix_2 = "2";
2518
2519 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2520 maybe_suffix_2);
2521
2522 set_conv_libfunc (optable, to, from, buffer);
2523 }
2524
2525 static GTY(()) rtx speculation_barrier_libfunc;
2526
2527 /* Record that we have no arithmetic or comparison libfuncs for
2528 machine mode MODE. */
2529
2530 static void
2531 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2532 {
2533 /* Arithmetic. */
2534 set_optab_libfunc (add_optab, mode, NULL);
2535 set_optab_libfunc (sdiv_optab, mode, NULL);
2536 set_optab_libfunc (smul_optab, mode, NULL);
2537 set_optab_libfunc (neg_optab, mode, NULL);
2538 set_optab_libfunc (sub_optab, mode, NULL);
2539
2540 /* Comparisons. */
2541 set_optab_libfunc (eq_optab, mode, NULL);
2542 set_optab_libfunc (ne_optab, mode, NULL);
2543 set_optab_libfunc (lt_optab, mode, NULL);
2544 set_optab_libfunc (le_optab, mode, NULL);
2545 set_optab_libfunc (ge_optab, mode, NULL);
2546 set_optab_libfunc (gt_optab, mode, NULL);
2547 set_optab_libfunc (unord_optab, mode, NULL);
2548 }
2549
2550 /* Set up library functions unique to ARM. */
2551 static void
2552 arm_init_libfuncs (void)
2553 {
2554 machine_mode mode_iter;
2555
2556 /* For Linux, we have access to kernel support for atomic operations. */
2557 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2558 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2559
2560 /* There are no special library functions unless we are using the
2561 ARM BPABI. */
2562 if (!TARGET_BPABI)
2563 return;
2564
2565 /* The functions below are described in Section 4 of the "Run-Time
2566 ABI for the ARM architecture", Version 1.0. */
2567
2568 /* Double-precision floating-point arithmetic. Table 2. */
2569 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2570 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2571 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2572 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2573 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2574
2575 /* Double-precision comparisons. Table 3. */
2576 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2577 set_optab_libfunc (ne_optab, DFmode, NULL);
2578 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2579 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2580 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2581 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2582 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2583
2584 /* Single-precision floating-point arithmetic. Table 4. */
2585 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2586 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2587 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2588 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2589 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2590
2591 /* Single-precision comparisons. Table 5. */
2592 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2593 set_optab_libfunc (ne_optab, SFmode, NULL);
2594 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2595 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2596 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2597 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2598 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2599
2600 /* Floating-point to integer conversions. Table 6. */
2601 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2602 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2603 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2604 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2605 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2606 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2607 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2608 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2609
2610 /* Conversions between floating types. Table 7. */
2611 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2612 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2613
2614 /* Integer to floating-point conversions. Table 8. */
2615 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2616 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2617 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2618 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2619 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2620 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2621 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2622 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2623
2624 /* Long long. Table 9. */
2625 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2626 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2627 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2628 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2629 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2630 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2631 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2632 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2633
2634 /* Integer (32/32->32) division. \S 4.3.1. */
2635 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2636 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2637
2638 /* The divmod functions are designed so that they can be used for
2639 plain division, even though they return both the quotient and the
2640 remainder. The quotient is returned in the usual location (i.e.,
2641 r0 for SImode, {r0, r1} for DImode), just as would be expected
2642 for an ordinary division routine. Because the AAPCS calling
2643 conventions specify that all of { r0, r1, r2, r3 } are
2644 callee-saved registers, there is no need to tell the compiler
2645 explicitly that those registers are clobbered by these
2646 routines. */
2647 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2648 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2649
2650 /* For SImode division the ABI provides div-without-mod routines,
2651 which are faster. */
2652 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2653 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2654
2655 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2656 divmod libcalls instead. */
2657 set_optab_libfunc (smod_optab, DImode, NULL);
2658 set_optab_libfunc (umod_optab, DImode, NULL);
2659 set_optab_libfunc (smod_optab, SImode, NULL);
2660 set_optab_libfunc (umod_optab, SImode, NULL);
2661
2662 /* Half-precision float operations. The compiler handles all operations
2663 with NULL libfuncs by converting the SFmode. */
2664 switch (arm_fp16_format)
2665 {
2666 case ARM_FP16_FORMAT_IEEE:
2667 case ARM_FP16_FORMAT_ALTERNATIVE:
2668
2669 /* Conversions. */
2670 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2671 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2672 ? "__gnu_f2h_ieee"
2673 : "__gnu_f2h_alternative"));
2674 set_conv_libfunc (sext_optab, SFmode, HFmode,
2675 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2676 ? "__gnu_h2f_ieee"
2677 : "__gnu_h2f_alternative"));
2678
2679 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2680 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2681 ? "__gnu_d2h_ieee"
2682 : "__gnu_d2h_alternative"));
2683
2684 arm_block_arith_comp_libfuncs_for_mode (HFmode);
2685 break;
2686
2687 default:
2688 break;
2689 }
2690
2691 /* For all possible libcalls in BFmode, record NULL. */
2692 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2693 {
2694 set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2695 set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2696 set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2697 set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2698 }
2699 arm_block_arith_comp_libfuncs_for_mode (BFmode);
2700
2701 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2702 {
2703 const arm_fixed_mode_set fixed_arith_modes[] =
2704 {
2705 { E_QQmode, "qq" },
2706 { E_UQQmode, "uqq" },
2707 { E_HQmode, "hq" },
2708 { E_UHQmode, "uhq" },
2709 { E_SQmode, "sq" },
2710 { E_USQmode, "usq" },
2711 { E_DQmode, "dq" },
2712 { E_UDQmode, "udq" },
2713 { E_TQmode, "tq" },
2714 { E_UTQmode, "utq" },
2715 { E_HAmode, "ha" },
2716 { E_UHAmode, "uha" },
2717 { E_SAmode, "sa" },
2718 { E_USAmode, "usa" },
2719 { E_DAmode, "da" },
2720 { E_UDAmode, "uda" },
2721 { E_TAmode, "ta" },
2722 { E_UTAmode, "uta" }
2723 };
2724 const arm_fixed_mode_set fixed_conv_modes[] =
2725 {
2726 { E_QQmode, "qq" },
2727 { E_UQQmode, "uqq" },
2728 { E_HQmode, "hq" },
2729 { E_UHQmode, "uhq" },
2730 { E_SQmode, "sq" },
2731 { E_USQmode, "usq" },
2732 { E_DQmode, "dq" },
2733 { E_UDQmode, "udq" },
2734 { E_TQmode, "tq" },
2735 { E_UTQmode, "utq" },
2736 { E_HAmode, "ha" },
2737 { E_UHAmode, "uha" },
2738 { E_SAmode, "sa" },
2739 { E_USAmode, "usa" },
2740 { E_DAmode, "da" },
2741 { E_UDAmode, "uda" },
2742 { E_TAmode, "ta" },
2743 { E_UTAmode, "uta" },
2744 { E_QImode, "qi" },
2745 { E_HImode, "hi" },
2746 { E_SImode, "si" },
2747 { E_DImode, "di" },
2748 { E_TImode, "ti" },
2749 { E_SFmode, "sf" },
2750 { E_DFmode, "df" }
2751 };
2752 unsigned int i, j;
2753
2754 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2755 {
2756 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2757 "add", fixed_arith_modes[i].name, 3);
2758 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2759 "ssadd", fixed_arith_modes[i].name, 3);
2760 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2761 "usadd", fixed_arith_modes[i].name, 3);
2762 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2763 "sub", fixed_arith_modes[i].name, 3);
2764 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2765 "sssub", fixed_arith_modes[i].name, 3);
2766 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2767 "ussub", fixed_arith_modes[i].name, 3);
2768 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2769 "mul", fixed_arith_modes[i].name, 3);
2770 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2771 "ssmul", fixed_arith_modes[i].name, 3);
2772 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2773 "usmul", fixed_arith_modes[i].name, 3);
2774 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2775 "div", fixed_arith_modes[i].name, 3);
2776 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2777 "udiv", fixed_arith_modes[i].name, 3);
2778 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2779 "ssdiv", fixed_arith_modes[i].name, 3);
2780 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2781 "usdiv", fixed_arith_modes[i].name, 3);
2782 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2783 "neg", fixed_arith_modes[i].name, 2);
2784 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2785 "ssneg", fixed_arith_modes[i].name, 2);
2786 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2787 "usneg", fixed_arith_modes[i].name, 2);
2788 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2789 "ashl", fixed_arith_modes[i].name, 3);
2790 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2791 "ashr", fixed_arith_modes[i].name, 3);
2792 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2793 "lshr", fixed_arith_modes[i].name, 3);
2794 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2795 "ssashl", fixed_arith_modes[i].name, 3);
2796 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2797 "usashl", fixed_arith_modes[i].name, 3);
2798 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2799 "cmp", fixed_arith_modes[i].name, 2);
2800 }
2801
2802 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2803 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2804 {
2805 if (i == j
2806 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2807 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2808 continue;
2809
2810 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2811 fixed_conv_modes[j].mode, "fract",
2812 fixed_conv_modes[i].name,
2813 fixed_conv_modes[j].name);
2814 arm_set_fixed_conv_libfunc (satfract_optab,
2815 fixed_conv_modes[i].mode,
2816 fixed_conv_modes[j].mode, "satfract",
2817 fixed_conv_modes[i].name,
2818 fixed_conv_modes[j].name);
2819 arm_set_fixed_conv_libfunc (fractuns_optab,
2820 fixed_conv_modes[i].mode,
2821 fixed_conv_modes[j].mode, "fractuns",
2822 fixed_conv_modes[i].name,
2823 fixed_conv_modes[j].name);
2824 arm_set_fixed_conv_libfunc (satfractuns_optab,
2825 fixed_conv_modes[i].mode,
2826 fixed_conv_modes[j].mode, "satfractuns",
2827 fixed_conv_modes[i].name,
2828 fixed_conv_modes[j].name);
2829 }
2830 }
2831
2832 if (TARGET_AAPCS_BASED)
2833 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2834
2835 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2836 }
2837
2838 /* On AAPCS systems, this is the "struct __va_list". */
2839 static GTY(()) tree va_list_type;
2840
2841 /* Return the type to use as __builtin_va_list. */
2842 static tree
2843 arm_build_builtin_va_list (void)
2844 {
2845 tree va_list_name;
2846 tree ap_field;
2847
2848 if (!TARGET_AAPCS_BASED)
2849 return std_build_builtin_va_list ();
2850
2851 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2852 defined as:
2853
2854 struct __va_list
2855 {
2856 void *__ap;
2857 };
2858
2859 The C Library ABI further reinforces this definition in \S
2860 4.1.
2861
2862 We must follow this definition exactly. The structure tag
2863 name is visible in C++ mangled names, and thus forms a part
2864 of the ABI. The field name may be used by people who
2865 #include <stdarg.h>. */
2866 /* Create the type. */
2867 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2868 /* Give it the required name. */
2869 va_list_name = build_decl (BUILTINS_LOCATION,
2870 TYPE_DECL,
2871 get_identifier ("__va_list"),
2872 va_list_type);
2873 DECL_ARTIFICIAL (va_list_name) = 1;
2874 TYPE_NAME (va_list_type) = va_list_name;
2875 TYPE_STUB_DECL (va_list_type) = va_list_name;
2876 /* Create the __ap field. */
2877 ap_field = build_decl (BUILTINS_LOCATION,
2878 FIELD_DECL,
2879 get_identifier ("__ap"),
2880 ptr_type_node);
2881 DECL_ARTIFICIAL (ap_field) = 1;
2882 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2883 TYPE_FIELDS (va_list_type) = ap_field;
2884 /* Compute its layout. */
2885 layout_type (va_list_type);
2886
2887 return va_list_type;
2888 }
2889
2890 /* Return an expression of type "void *" pointing to the next
2891 available argument in a variable-argument list. VALIST is the
2892 user-level va_list object, of type __builtin_va_list. */
2893 static tree
2894 arm_extract_valist_ptr (tree valist)
2895 {
2896 if (TREE_TYPE (valist) == error_mark_node)
2897 return error_mark_node;
2898
2899 /* On an AAPCS target, the pointer is stored within "struct
2900 va_list". */
2901 if (TARGET_AAPCS_BASED)
2902 {
2903 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2904 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2905 valist, ap_field, NULL_TREE);
2906 }
2907
2908 return valist;
2909 }
2910
2911 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2912 static void
2913 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2914 {
2915 valist = arm_extract_valist_ptr (valist);
2916 std_expand_builtin_va_start (valist, nextarg);
2917 }
2918
2919 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2920 static tree
2921 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2922 gimple_seq *post_p)
2923 {
2924 valist = arm_extract_valist_ptr (valist);
2925 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2926 }
2927
2928 /* Check any incompatible options that the user has specified. */
2929 static void
2930 arm_option_check_internal (struct gcc_options *opts)
2931 {
2932 int flags = opts->x_target_flags;
2933
2934 /* iWMMXt and NEON are incompatible. */
2935 if (TARGET_IWMMXT
2936 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2937 error ("iWMMXt and NEON are incompatible");
2938
2939 /* Make sure that the processor choice does not conflict with any of the
2940 other command line choices. */
2941 if (TARGET_ARM_P (flags)
2942 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2943 error ("target CPU does not support ARM mode");
2944
2945 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2946 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2947 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2948
2949 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2950 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2951
2952 /* If this target is normally configured to use APCS frames, warn if they
2953 are turned off and debugging is turned on. */
2954 if (TARGET_ARM_P (flags)
2955 && write_symbols != NO_DEBUG
2956 && !TARGET_APCS_FRAME
2957 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2958 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2959 "debugging");
2960
2961 /* iWMMXt unsupported under Thumb mode. */
2962 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2963 error ("iWMMXt unsupported under Thumb mode");
2964
2965 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2966 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2967
2968 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2969 {
2970 error ("RTP PIC is incompatible with Thumb");
2971 flag_pic = 0;
2972 }
2973
2974 if (target_pure_code || target_slow_flash_data)
2975 {
2976 const char *flag = (target_pure_code ? "-mpure-code" :
2977 "-mslow-flash-data");
2978 bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
2979
2980 /* We only support -mslow-flash-data on M-profile targets with
2981 MOVT. */
2982 if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
2983 error ("%s only supports non-pic code on M-profile targets with the "
2984 "MOVT instruction", flag);
2985
2986 /* We only support -mpure-code on M-profile targets. */
2987 if (target_pure_code && common_unsupported_modes)
2988 error ("%s only supports non-pic code on M-profile targets", flag);
2989
2990 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2991 -mword-relocations forbids relocation of MOVT/MOVW. */
2992 if (target_word_relocations)
2993 error ("%s incompatible with %<-mword-relocations%>", flag);
2994 }
2995 }
2996
2997 /* Recompute the global settings depending on target attribute options. */
2998
2999 static void
3000 arm_option_params_internal (void)
3001 {
3002 /* If we are not using the default (ARM mode) section anchor offset
3003 ranges, then set the correct ranges now. */
3004 if (TARGET_THUMB1)
3005 {
3006 /* Thumb-1 LDR instructions cannot have negative offsets.
3007 Permissible positive offset ranges are 5-bit (for byte loads),
3008 6-bit (for halfword loads), or 7-bit (for word loads).
3009 Empirical results suggest a 7-bit anchor range gives the best
3010 overall code size. */
3011 targetm.min_anchor_offset = 0;
3012 targetm.max_anchor_offset = 127;
3013 }
3014 else if (TARGET_THUMB2)
3015 {
3016 /* The minimum is set such that the total size of the block
3017 for a particular anchor is 248 + 1 + 4095 bytes, which is
3018 divisible by eight, ensuring natural spacing of anchors. */
3019 targetm.min_anchor_offset = -248;
3020 targetm.max_anchor_offset = 4095;
3021 }
3022 else
3023 {
3024 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3025 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3026 }
3027
3028 /* Increase the number of conditional instructions with -Os. */
3029 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3030
3031 /* For THUMB2, we limit the conditional sequence to one IT block. */
3032 if (TARGET_THUMB2)
3033 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3034
3035 if (TARGET_THUMB1)
3036 targetm.md_asm_adjust = thumb1_md_asm_adjust;
3037 else
3038 targetm.md_asm_adjust = arm_md_asm_adjust;
3039 }
3040
3041 /* True if -mflip-thumb should next add an attribute for the default
3042 mode, false if it should next add an attribute for the opposite mode. */
3043 static GTY(()) bool thumb_flipper;
3044
3045 /* Options after initial target override. */
3046 static GTY(()) tree init_optimize;
3047
3048 static void
3049 arm_override_options_after_change_1 (struct gcc_options *opts,
3050 struct gcc_options *opts_set)
3051 {
3052 /* -falign-functions without argument: supply one. */
3053 if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3054 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3055 && opts->x_optimize_size ? "2" : "4";
3056 }
3057
3058 /* Implement targetm.override_options_after_change. */
3059
3060 static void
3061 arm_override_options_after_change (void)
3062 {
3063 arm_override_options_after_change_1 (&global_options, &global_options_set);
3064 }
3065
3066 /* Implement TARGET_OPTION_RESTORE. */
3067 static void
3068 arm_option_restore (struct gcc_options */* opts */,
3069 struct gcc_options */* opts_set */,
3070 struct cl_target_option *ptr)
3071 {
3072 arm_configure_build_target (&arm_active_target, ptr, false);
3073 arm_option_reconfigure_globals ();
3074 }
3075
3076 /* Reset options between modes that the user has specified. */
3077 static void
3078 arm_option_override_internal (struct gcc_options *opts,
3079 struct gcc_options *opts_set)
3080 {
3081 arm_override_options_after_change_1 (opts, opts_set);
3082
3083 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3084 {
3085 /* The default is to enable interworking, so this warning message would
3086 be confusing to users who have just compiled with
3087 eg, -march=armv4. */
3088 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3089 opts->x_target_flags &= ~MASK_INTERWORK;
3090 }
3091
3092 if (TARGET_THUMB_P (opts->x_target_flags)
3093 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3094 {
3095 warning (0, "target CPU does not support THUMB instructions");
3096 opts->x_target_flags &= ~MASK_THUMB;
3097 }
3098
3099 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3100 {
3101 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3102 opts->x_target_flags &= ~MASK_APCS_FRAME;
3103 }
3104
3105 /* Callee super interworking implies thumb interworking. Adding
3106 this to the flags here simplifies the logic elsewhere. */
3107 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3108 opts->x_target_flags |= MASK_INTERWORK;
3109
3110 /* need to remember initial values so combinaisons of options like
3111 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3112 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3113
3114 if (! opts_set->x_arm_restrict_it)
3115 opts->x_arm_restrict_it = arm_arch8;
3116
3117 /* ARM execution state and M profile don't have [restrict] IT. */
3118 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3119 opts->x_arm_restrict_it = 0;
3120
3121 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3122 if (!opts_set->x_arm_restrict_it
3123 && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3124 opts->x_arm_restrict_it = 0;
3125
3126 /* Enable -munaligned-access by default for
3127 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3128 i.e. Thumb2 and ARM state only.
3129 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3130 - ARMv8 architecture-base processors.
3131
3132 Disable -munaligned-access by default for
3133 - all pre-ARMv6 architecture-based processors
3134 - ARMv6-M architecture-based processors
3135 - ARMv8-M Baseline processors. */
3136
3137 if (! opts_set->x_unaligned_access)
3138 {
3139 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3140 && arm_arch6 && (arm_arch_notm || arm_arch7));
3141 }
3142 else if (opts->x_unaligned_access == 1
3143 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3144 {
3145 warning (0, "target CPU does not support unaligned accesses");
3146 opts->x_unaligned_access = 0;
3147 }
3148
3149 /* Don't warn since it's on by default in -O2. */
3150 if (TARGET_THUMB1_P (opts->x_target_flags))
3151 opts->x_flag_schedule_insns = 0;
3152 else
3153 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3154
3155 /* Disable shrink-wrap when optimizing function for size, since it tends to
3156 generate additional returns. */
3157 if (optimize_function_for_size_p (cfun)
3158 && TARGET_THUMB2_P (opts->x_target_flags))
3159 opts->x_flag_shrink_wrap = false;
3160 else
3161 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3162
3163 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3164 - epilogue_insns - does not accurately model the corresponding insns
3165 emitted in the asm file. In particular, see the comment in thumb_exit
3166 'Find out how many of the (return) argument registers we can corrupt'.
3167 As a consequence, the epilogue may clobber registers without fipa-ra
3168 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3169 TODO: Accurately model clobbers for epilogue_insns and reenable
3170 fipa-ra. */
3171 if (TARGET_THUMB1_P (opts->x_target_flags))
3172 opts->x_flag_ipa_ra = 0;
3173 else
3174 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3175
3176 /* Thumb2 inline assembly code should always use unified syntax.
3177 This will apply to ARM and Thumb1 eventually. */
3178 if (TARGET_THUMB2_P (opts->x_target_flags))
3179 opts->x_inline_asm_unified = true;
3180
3181 if (arm_stack_protector_guard == SSP_GLOBAL
3182 && opts->x_arm_stack_protector_guard_offset_str)
3183 {
3184 error ("incompatible options %<-mstack-protector-guard=global%> and "
3185 "%<-mstack-protector-guard-offset=%s%>",
3186 arm_stack_protector_guard_offset_str);
3187 }
3188
3189 if (opts->x_arm_stack_protector_guard_offset_str)
3190 {
3191 char *end;
3192 const char *str = arm_stack_protector_guard_offset_str;
3193 errno = 0;
3194 long offs = strtol (arm_stack_protector_guard_offset_str, &end, 0);
3195 if (!*str || *end || errno)
3196 error ("%qs is not a valid offset in %qs", str,
3197 "-mstack-protector-guard-offset=");
3198 arm_stack_protector_guard_offset = offs;
3199 }
3200
3201 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3202 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3203 #endif
3204 }
3205
3206 static sbitmap isa_all_fpubits_internal;
3207 static sbitmap isa_all_fpbits;
3208 static sbitmap isa_quirkbits;
3209
3210 /* Configure a build target TARGET from the user-specified options OPTS and
3211 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3212 architecture have been specified, but the two are not identical. */
3213 void
3214 arm_configure_build_target (struct arm_build_target *target,
3215 struct cl_target_option *opts,
3216 bool warn_compatible)
3217 {
3218 const cpu_option *arm_selected_tune = NULL;
3219 const arch_option *arm_selected_arch = NULL;
3220 const cpu_option *arm_selected_cpu = NULL;
3221 const arm_fpu_desc *arm_selected_fpu = NULL;
3222 const char *tune_opts = NULL;
3223 const char *arch_opts = NULL;
3224 const char *cpu_opts = NULL;
3225
3226 bitmap_clear (target->isa);
3227 target->core_name = NULL;
3228 target->arch_name = NULL;
3229
3230 if (opts->x_arm_arch_string)
3231 {
3232 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3233 "-march",
3234 opts->x_arm_arch_string);
3235 arch_opts = strchr (opts->x_arm_arch_string, '+');
3236 }
3237
3238 if (opts->x_arm_cpu_string)
3239 {
3240 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3241 opts->x_arm_cpu_string);
3242 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3243 arm_selected_tune = arm_selected_cpu;
3244 /* If taking the tuning from -mcpu, we don't need to rescan the
3245 options for tuning. */
3246 }
3247
3248 if (opts->x_arm_tune_string)
3249 {
3250 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3251 opts->x_arm_tune_string);
3252 tune_opts = strchr (opts->x_arm_tune_string, '+');
3253 }
3254
3255 if (arm_selected_arch)
3256 {
3257 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3258 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3259 arch_opts);
3260
3261 if (arm_selected_cpu)
3262 {
3263 auto_sbitmap cpu_isa (isa_num_bits);
3264 auto_sbitmap isa_delta (isa_num_bits);
3265
3266 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3267 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3268 cpu_opts);
3269 bitmap_xor (isa_delta, cpu_isa, target->isa);
3270 /* Ignore any bits that are quirk bits. */
3271 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3272 /* If the user (or the default configuration) has specified a
3273 specific FPU, then ignore any bits that depend on the FPU
3274 configuration. Do similarly if using the soft-float
3275 ABI. */
3276 if (opts->x_arm_fpu_index != TARGET_FPU_auto
3277 || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3278 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3279
3280 if (!bitmap_empty_p (isa_delta))
3281 {
3282 if (warn_compatible)
3283 warning (0, "switch %<-mcpu=%s%> conflicts "
3284 "with switch %<-march=%s%>",
3285 opts->x_arm_cpu_string,
3286 opts->x_arm_arch_string);
3287
3288 /* -march wins for code generation.
3289 -mcpu wins for default tuning. */
3290 if (!arm_selected_tune)
3291 arm_selected_tune = arm_selected_cpu;
3292
3293 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3294 target->arch_name = arm_selected_arch->common.name;
3295 }
3296 else
3297 {
3298 /* Architecture and CPU are essentially the same.
3299 Prefer the CPU setting. */
3300 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3301 target->core_name = arm_selected_cpu->common.name;
3302 /* Copy the CPU's capabilities, so that we inherit the
3303 appropriate extensions and quirks. */
3304 bitmap_copy (target->isa, cpu_isa);
3305 }
3306 }
3307 else
3308 {
3309 /* Pick a CPU based on the architecture. */
3310 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3311 target->arch_name = arm_selected_arch->common.name;
3312 /* Note: target->core_name is left unset in this path. */
3313 }
3314 }
3315 else if (arm_selected_cpu)
3316 {
3317 target->core_name = arm_selected_cpu->common.name;
3318 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3319 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3320 cpu_opts);
3321 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3322 }
3323 /* If the user did not specify a processor or architecture, choose
3324 one for them. */
3325 else
3326 {
3327 const cpu_option *sel;
3328 auto_sbitmap sought_isa (isa_num_bits);
3329 bitmap_clear (sought_isa);
3330 auto_sbitmap default_isa (isa_num_bits);
3331
3332 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3333 TARGET_CPU_DEFAULT);
3334 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3335 gcc_assert (arm_selected_cpu->common.name);
3336
3337 /* RWE: All of the selection logic below (to the end of this
3338 'if' clause) looks somewhat suspect. It appears to be mostly
3339 there to support forcing thumb support when the default CPU
3340 does not have thumb (somewhat dubious in terms of what the
3341 user might be expecting). I think it should be removed once
3342 support for the pre-thumb era cores is removed. */
3343 sel = arm_selected_cpu;
3344 arm_initialize_isa (default_isa, sel->common.isa_bits);
3345 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3346 cpu_opts);
3347
3348 /* Now check to see if the user has specified any command line
3349 switches that require certain abilities from the cpu. */
3350
3351 if (TARGET_INTERWORK || TARGET_THUMB)
3352 bitmap_set_bit (sought_isa, isa_bit_thumb);
3353
3354 /* If there are such requirements and the default CPU does not
3355 satisfy them, we need to run over the complete list of
3356 cores looking for one that is satisfactory. */
3357 if (!bitmap_empty_p (sought_isa)
3358 && !bitmap_subset_p (sought_isa, default_isa))
3359 {
3360 auto_sbitmap candidate_isa (isa_num_bits);
3361 /* We're only interested in a CPU with at least the
3362 capabilities of the default CPU and the required
3363 additional features. */
3364 bitmap_ior (default_isa, default_isa, sought_isa);
3365
3366 /* Try to locate a CPU type that supports all of the abilities
3367 of the default CPU, plus the extra abilities requested by
3368 the user. */
3369 for (sel = all_cores; sel->common.name != NULL; sel++)
3370 {
3371 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3372 /* An exact match? */
3373 if (bitmap_equal_p (default_isa, candidate_isa))
3374 break;
3375 }
3376
3377 if (sel->common.name == NULL)
3378 {
3379 unsigned current_bit_count = isa_num_bits;
3380 const cpu_option *best_fit = NULL;
3381
3382 /* Ideally we would like to issue an error message here
3383 saying that it was not possible to find a CPU compatible
3384 with the default CPU, but which also supports the command
3385 line options specified by the programmer, and so they
3386 ought to use the -mcpu=<name> command line option to
3387 override the default CPU type.
3388
3389 If we cannot find a CPU that has exactly the
3390 characteristics of the default CPU and the given
3391 command line options we scan the array again looking
3392 for a best match. The best match must have at least
3393 the capabilities of the perfect match. */
3394 for (sel = all_cores; sel->common.name != NULL; sel++)
3395 {
3396 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3397
3398 if (bitmap_subset_p (default_isa, candidate_isa))
3399 {
3400 unsigned count;
3401
3402 bitmap_and_compl (candidate_isa, candidate_isa,
3403 default_isa);
3404 count = bitmap_popcount (candidate_isa);
3405
3406 if (count < current_bit_count)
3407 {
3408 best_fit = sel;
3409 current_bit_count = count;
3410 }
3411 }
3412
3413 gcc_assert (best_fit);
3414 sel = best_fit;
3415 }
3416 }
3417 arm_selected_cpu = sel;
3418 }
3419
3420 /* Now we know the CPU, we can finally initialize the target
3421 structure. */
3422 target->core_name = arm_selected_cpu->common.name;
3423 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3424 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3425 cpu_opts);
3426 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3427 }
3428
3429 gcc_assert (arm_selected_cpu);
3430 gcc_assert (arm_selected_arch);
3431
3432 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3433 {
3434 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3435 auto_sbitmap fpu_bits (isa_num_bits);
3436
3437 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3438 /* This should clear out ALL bits relating to the FPU/simd
3439 extensions, to avoid potentially invalid combinations later on
3440 that we can't match. At present we only clear out those bits
3441 that can be set by -mfpu. This should be fixed in GCC-12. */
3442 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3443 bitmap_ior (target->isa, target->isa, fpu_bits);
3444 }
3445
3446 /* If we have the soft-float ABI, clear any feature bits relating to use of
3447 floating-point operations. They'll just confuse things later on. */
3448 if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3449 bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3450
3451 /* There may be implied bits which we still need to enable. These are
3452 non-named features which are needed to complete other sets of features,
3453 but cannot be enabled from arm-cpus.in due to being shared between
3454 multiple fgroups. Each entry in all_implied_fbits is of the form
3455 ante -> cons, meaning that if the feature "ante" is enabled, we should
3456 implicitly enable "cons". */
3457 const struct fbit_implication *impl = all_implied_fbits;
3458 while (impl->ante)
3459 {
3460 if (bitmap_bit_p (target->isa, impl->ante))
3461 bitmap_set_bit (target->isa, impl->cons);
3462 impl++;
3463 }
3464
3465 if (!arm_selected_tune)
3466 arm_selected_tune = arm_selected_cpu;
3467 else /* Validate the features passed to -mtune. */
3468 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3469
3470 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3471
3472 /* Finish initializing the target structure. */
3473 if (!target->arch_name)
3474 target->arch_name = arm_selected_arch->common.name;
3475 target->arch_pp_name = arm_selected_arch->arch;
3476 target->base_arch = arm_selected_arch->base_arch;
3477 target->profile = arm_selected_arch->profile;
3478
3479 target->tune_flags = tune_data->tune_flags;
3480 target->tune = tune_data->tune;
3481 target->tune_core = tune_data->scheduler;
3482 }
3483
3484 /* Fix up any incompatible options that the user has specified. */
3485 static void
3486 arm_option_override (void)
3487 {
3488 static const enum isa_feature fpu_bitlist_internal[]
3489 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3490 /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main. */
3491 static const enum isa_feature fp_bitlist[]
3492 = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3493 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3494 cl_target_option opts;
3495
3496 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3497 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3498
3499 isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3500 isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3501 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3502 arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3503
3504 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3505
3506 if (!OPTION_SET_P (arm_fpu_index))
3507 {
3508 bool ok;
3509 int fpu_index;
3510
3511 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3512 CL_TARGET);
3513 gcc_assert (ok);
3514 arm_fpu_index = (enum fpu_type) fpu_index;
3515 }
3516
3517 cl_target_option_save (&opts, &global_options, &global_options_set);
3518 arm_configure_build_target (&arm_active_target, &opts, true);
3519
3520 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3521 SUBTARGET_OVERRIDE_OPTIONS;
3522 #endif
3523
3524 /* Initialize boolean versions of the architectural flags, for use
3525 in the arm.md file and for enabling feature flags. */
3526 arm_option_reconfigure_globals ();
3527
3528 arm_tune = arm_active_target.tune_core;
3529 tune_flags = arm_active_target.tune_flags;
3530 current_tune = arm_active_target.tune;
3531
3532 /* TBD: Dwarf info for apcs frame is not handled yet. */
3533 if (TARGET_APCS_FRAME)
3534 flag_shrink_wrap = false;
3535
3536 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3537 {
3538 warning (0, "%<-mapcs-stack-check%> incompatible with "
3539 "%<-mno-apcs-frame%>");
3540 target_flags |= MASK_APCS_FRAME;
3541 }
3542
3543 if (TARGET_POKE_FUNCTION_NAME)
3544 target_flags |= MASK_APCS_FRAME;
3545
3546 if (TARGET_APCS_REENT && flag_pic)
3547 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3548
3549 if (TARGET_APCS_REENT)
3550 warning (0, "APCS reentrant code not supported. Ignored");
3551
3552 /* Set up some tuning parameters. */
3553 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3554 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3555 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3556 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3557 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3558 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3559
3560 /* For arm2/3 there is no need to do any scheduling if we are doing
3561 software floating-point. */
3562 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3563 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3564
3565 /* Override the default structure alignment for AAPCS ABI. */
3566 if (!OPTION_SET_P (arm_structure_size_boundary))
3567 {
3568 if (TARGET_AAPCS_BASED)
3569 arm_structure_size_boundary = 8;
3570 }
3571 else
3572 {
3573 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3574
3575 if (arm_structure_size_boundary != 8
3576 && arm_structure_size_boundary != 32
3577 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3578 {
3579 if (ARM_DOUBLEWORD_ALIGN)
3580 warning (0,
3581 "structure size boundary can only be set to 8, 32 or 64");
3582 else
3583 warning (0, "structure size boundary can only be set to 8 or 32");
3584 arm_structure_size_boundary
3585 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3586 }
3587 }
3588
3589 if (TARGET_VXWORKS_RTP)
3590 {
3591 if (!OPTION_SET_P (arm_pic_data_is_text_relative))
3592 arm_pic_data_is_text_relative = 0;
3593 }
3594 else if (flag_pic
3595 && !arm_pic_data_is_text_relative
3596 && !(OPTION_SET_P (target_flags) & MASK_SINGLE_PIC_BASE))
3597 /* When text & data segments don't have a fixed displacement, the
3598 intended use is with a single, read only, pic base register.
3599 Unless the user explicitly requested not to do that, set
3600 it. */
3601 target_flags |= MASK_SINGLE_PIC_BASE;
3602
3603 /* If stack checking is disabled, we can use r10 as the PIC register,
3604 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3605 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3606 {
3607 if (TARGET_VXWORKS_RTP)
3608 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3609 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3610 }
3611
3612 if (flag_pic && TARGET_VXWORKS_RTP)
3613 arm_pic_register = 9;
3614
3615 /* If in FDPIC mode then force arm_pic_register to be r9. */
3616 if (TARGET_FDPIC)
3617 {
3618 arm_pic_register = FDPIC_REGNUM;
3619 if (TARGET_THUMB1)
3620 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3621 }
3622
3623 if (arm_pic_register_string != NULL)
3624 {
3625 int pic_register = decode_reg_name (arm_pic_register_string);
3626
3627 if (!flag_pic)
3628 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3629
3630 /* Prevent the user from choosing an obviously stupid PIC register. */
3631 else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3632 || pic_register == HARD_FRAME_POINTER_REGNUM
3633 || pic_register == STACK_POINTER_REGNUM
3634 || pic_register >= PC_REGNUM
3635 || (TARGET_VXWORKS_RTP
3636 && (unsigned int) pic_register != arm_pic_register))
3637 error ("unable to use %qs for PIC register", arm_pic_register_string);
3638 else
3639 arm_pic_register = pic_register;
3640 }
3641
3642 if (flag_pic)
3643 target_word_relocations = 1;
3644
3645 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3646 if (fix_cm3_ldrd == 2)
3647 {
3648 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3649 fix_cm3_ldrd = 1;
3650 else
3651 fix_cm3_ldrd = 0;
3652 }
3653
3654 /* Enable fix_vlldm by default if required. */
3655 if (fix_vlldm == 2)
3656 {
3657 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3658 fix_vlldm = 1;
3659 else
3660 fix_vlldm = 0;
3661 }
3662
3663 /* Enable fix_aes by default if required. */
3664 if (fix_aes_erratum_1742098 == 2)
3665 {
3666 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_aes_1742098))
3667 fix_aes_erratum_1742098 = 1;
3668 else
3669 fix_aes_erratum_1742098 = 0;
3670 }
3671
3672 /* Hot/Cold partitioning is not currently supported, since we can't
3673 handle literal pool placement in that case. */
3674 if (flag_reorder_blocks_and_partition)
3675 {
3676 inform (input_location,
3677 "%<-freorder-blocks-and-partition%> not supported "
3678 "on this architecture");
3679 flag_reorder_blocks_and_partition = 0;
3680 flag_reorder_blocks = 1;
3681 }
3682
3683 if (flag_pic)
3684 /* Hoisting PIC address calculations more aggressively provides a small,
3685 but measurable, size reduction for PIC code. Therefore, we decrease
3686 the bar for unrestricted expression hoisting to the cost of PIC address
3687 calculation, which is 2 instructions. */
3688 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3689 param_gcse_unrestricted_cost, 2);
3690
3691 /* ARM EABI defaults to strict volatile bitfields. */
3692 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3693 && abi_version_at_least(2))
3694 flag_strict_volatile_bitfields = 1;
3695
3696 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3697 have deemed it beneficial (signified by setting
3698 prefetch.num_slots to 1 or more). */
3699 if (flag_prefetch_loop_arrays < 0
3700 && HAVE_prefetch
3701 && optimize >= 3
3702 && current_tune->prefetch.num_slots > 0)
3703 flag_prefetch_loop_arrays = 1;
3704
3705 /* Set up parameters to be used in prefetching algorithm. Do not
3706 override the defaults unless we are tuning for a core we have
3707 researched values for. */
3708 if (current_tune->prefetch.num_slots > 0)
3709 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3710 param_simultaneous_prefetches,
3711 current_tune->prefetch.num_slots);
3712 if (current_tune->prefetch.l1_cache_line_size >= 0)
3713 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3714 param_l1_cache_line_size,
3715 current_tune->prefetch.l1_cache_line_size);
3716 if (current_tune->prefetch.l1_cache_line_size >= 0)
3717 {
3718 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3719 param_destruct_interfere_size,
3720 current_tune->prefetch.l1_cache_line_size);
3721 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3722 param_construct_interfere_size,
3723 current_tune->prefetch.l1_cache_line_size);
3724 }
3725 else
3726 {
3727 /* For a generic ARM target, JF Bastien proposed using 64 for both. */
3728 /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3729 constructive? */
3730 /* More recent Cortex chips have a 64-byte cache line, but are marked
3731 ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults. */
3732 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3733 param_destruct_interfere_size, 64);
3734 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3735 param_construct_interfere_size, 64);
3736 }
3737
3738 if (current_tune->prefetch.l1_cache_size >= 0)
3739 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3740 param_l1_cache_size,
3741 current_tune->prefetch.l1_cache_size);
3742
3743 /* Look through ready list and all of queue for instructions
3744 relevant for L2 auto-prefetcher. */
3745 int sched_autopref_queue_depth;
3746
3747 switch (current_tune->sched_autopref)
3748 {
3749 case tune_params::SCHED_AUTOPREF_OFF:
3750 sched_autopref_queue_depth = -1;
3751 break;
3752
3753 case tune_params::SCHED_AUTOPREF_RANK:
3754 sched_autopref_queue_depth = 0;
3755 break;
3756
3757 case tune_params::SCHED_AUTOPREF_FULL:
3758 sched_autopref_queue_depth = max_insn_queue_index + 1;
3759 break;
3760
3761 default:
3762 gcc_unreachable ();
3763 }
3764
3765 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3766 param_sched_autopref_queue_depth,
3767 sched_autopref_queue_depth);
3768
3769 /* Currently, for slow flash data, we just disable literal pools. We also
3770 disable it for pure-code. */
3771 if (target_slow_flash_data || target_pure_code)
3772 arm_disable_literal_pool = true;
3773
3774 /* Disable scheduling fusion by default if it's not armv7 processor
3775 or doesn't prefer ldrd/strd. */
3776 if (flag_schedule_fusion == 2
3777 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3778 flag_schedule_fusion = 0;
3779
3780 /* Need to remember initial options before they are overriden. */
3781 init_optimize = build_optimization_node (&global_options,
3782 &global_options_set);
3783
3784 arm_options_perform_arch_sanity_checks ();
3785 arm_option_override_internal (&global_options, &global_options_set);
3786 arm_option_check_internal (&global_options);
3787 arm_option_params_internal ();
3788
3789 /* Create the default target_options structure. */
3790 target_option_default_node = target_option_current_node
3791 = build_target_option_node (&global_options, &global_options_set);
3792
3793 /* Register global variables with the garbage collector. */
3794 arm_add_gc_roots ();
3795
3796 /* Init initial mode for testing. */
3797 thumb_flipper = TARGET_THUMB;
3798 }
3799
3800
3801 /* Reconfigure global status flags from the active_target.isa. */
3802 void
3803 arm_option_reconfigure_globals (void)
3804 {
3805 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3806 arm_base_arch = arm_active_target.base_arch;
3807
3808 /* Initialize boolean versions of the architectural flags, for use
3809 in the arm.md file. */
3810 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3811 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3812 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3813 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3814 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3815 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3816 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3817 arm_arch6m = arm_arch6 && !arm_arch_notm;
3818 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3819 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3820 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3821 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3822 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3823 arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3824 arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3825 arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3826 isa_bit_armv8_1m_main);
3827 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3828 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3829 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3830 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3831 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3832 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3833 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3834 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3835 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3836 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3837 arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3838 arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3839
3840 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3841 if (arm_fp16_inst)
3842 {
3843 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3844 error ("selected fp16 options are incompatible");
3845 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3846 }
3847
3848 arm_arch_cde = 0;
3849 arm_arch_cde_coproc = 0;
3850 int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3851 isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3852 isa_bit_cdecp6, isa_bit_cdecp7};
3853 for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3854 {
3855 int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3856 if (cde_bit)
3857 {
3858 arm_arch_cde |= cde_bit;
3859 arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3860 }
3861 }
3862
3863 /* And finally, set up some quirks. */
3864 arm_arch_no_volatile_ce
3865 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3866 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3867 isa_bit_quirk_armv6kz);
3868
3869 /* Use the cp15 method if it is available. */
3870 if (target_thread_pointer == TP_AUTO)
3871 {
3872 if (arm_arch6k && !TARGET_THUMB1)
3873 target_thread_pointer = TP_CP15;
3874 else
3875 target_thread_pointer = TP_SOFT;
3876 }
3877
3878 if (!TARGET_HARD_TP && arm_stack_protector_guard == SSP_TLSREG)
3879 error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3880 }
3881
3882 /* Perform some validation between the desired architecture and the rest of the
3883 options. */
3884 void
3885 arm_options_perform_arch_sanity_checks (void)
3886 {
3887 /* V5T code we generate is completely interworking capable, so we turn off
3888 TARGET_INTERWORK here to avoid many tests later on. */
3889
3890 /* XXX However, we must pass the right pre-processor defines to CPP
3891 or GLD can get confused. This is a hack. */
3892 if (TARGET_INTERWORK)
3893 arm_cpp_interwork = 1;
3894
3895 if (arm_arch5t)
3896 target_flags &= ~MASK_INTERWORK;
3897
3898 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3899 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3900
3901 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3902 error ("iwmmxt abi requires an iwmmxt capable cpu");
3903
3904 /* BPABI targets use linker tricks to allow interworking on cores
3905 without thumb support. */
3906 if (TARGET_INTERWORK
3907 && !TARGET_BPABI
3908 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3909 {
3910 warning (0, "target CPU does not support interworking" );
3911 target_flags &= ~MASK_INTERWORK;
3912 }
3913
3914 /* If soft-float is specified then don't use FPU. */
3915 if (TARGET_SOFT_FLOAT)
3916 arm_fpu_attr = FPU_NONE;
3917 else
3918 arm_fpu_attr = FPU_VFP;
3919
3920 if (TARGET_AAPCS_BASED)
3921 {
3922 if (TARGET_CALLER_INTERWORKING)
3923 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3924 else
3925 if (TARGET_CALLEE_INTERWORKING)
3926 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3927 }
3928
3929 /* __fp16 support currently assumes the core has ldrh. */
3930 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3931 sorry ("%<__fp16%> and no ldrh");
3932
3933 if (use_cmse && !arm_arch_cmse)
3934 error ("target CPU does not support ARMv8-M Security Extensions");
3935
3936 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3937 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3938 if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3939 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3940
3941
3942 if (TARGET_AAPCS_BASED)
3943 {
3944 if (arm_abi == ARM_ABI_IWMMXT)
3945 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3946 else if (TARGET_HARD_FLOAT_ABI)
3947 {
3948 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3949 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
3950 && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
3951 error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
3952 }
3953 else
3954 arm_pcs_default = ARM_PCS_AAPCS;
3955 }
3956 else
3957 {
3958 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3959 sorry ("%<-mfloat-abi=hard%> and VFP");
3960
3961 if (arm_abi == ARM_ABI_APCS)
3962 arm_pcs_default = ARM_PCS_APCS;
3963 else
3964 arm_pcs_default = ARM_PCS_ATPCS;
3965 }
3966 }
3967
3968 /* Test whether a local function descriptor is canonical, i.e.,
3969 whether we can use GOTOFFFUNCDESC to compute the address of the
3970 function. */
3971 static bool
3972 arm_fdpic_local_funcdesc_p (rtx fnx)
3973 {
3974 tree fn;
3975 enum symbol_visibility vis;
3976 bool ret;
3977
3978 if (!TARGET_FDPIC)
3979 return true;
3980
3981 if (! SYMBOL_REF_LOCAL_P (fnx))
3982 return false;
3983
3984 fn = SYMBOL_REF_DECL (fnx);
3985
3986 if (! fn)
3987 return false;
3988
3989 vis = DECL_VISIBILITY (fn);
3990
3991 if (vis == VISIBILITY_PROTECTED)
3992 /* Private function descriptors for protected functions are not
3993 canonical. Temporarily change the visibility to global so that
3994 we can ensure uniqueness of funcdesc pointers. */
3995 DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
3996
3997 ret = default_binds_local_p_1 (fn, flag_pic);
3998
3999 DECL_VISIBILITY (fn) = vis;
4000
4001 return ret;
4002 }
4003
4004 static void
4005 arm_add_gc_roots (void)
4006 {
4007 gcc_obstack_init(&minipool_obstack);
4008 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
4009 }
4010 \f
4011 /* A table of known ARM exception types.
4012 For use with the interrupt function attribute. */
4013
4014 typedef struct
4015 {
4016 const char *const arg;
4017 const unsigned long return_value;
4018 }
4019 isr_attribute_arg;
4020
4021 static const isr_attribute_arg isr_attribute_args [] =
4022 {
4023 { "IRQ", ARM_FT_ISR },
4024 { "irq", ARM_FT_ISR },
4025 { "FIQ", ARM_FT_FIQ },
4026 { "fiq", ARM_FT_FIQ },
4027 { "ABORT", ARM_FT_ISR },
4028 { "abort", ARM_FT_ISR },
4029 { "UNDEF", ARM_FT_EXCEPTION },
4030 { "undef", ARM_FT_EXCEPTION },
4031 { "SWI", ARM_FT_EXCEPTION },
4032 { "swi", ARM_FT_EXCEPTION },
4033 { NULL, ARM_FT_NORMAL }
4034 };
4035
4036 /* Returns the (interrupt) function type of the current
4037 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
4038
4039 static unsigned long
4040 arm_isr_value (tree argument)
4041 {
4042 const isr_attribute_arg * ptr;
4043 const char * arg;
4044
4045 if (!arm_arch_notm)
4046 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
4047
4048 /* No argument - default to IRQ. */
4049 if (argument == NULL_TREE)
4050 return ARM_FT_ISR;
4051
4052 /* Get the value of the argument. */
4053 if (TREE_VALUE (argument) == NULL_TREE
4054 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
4055 return ARM_FT_UNKNOWN;
4056
4057 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
4058
4059 /* Check it against the list of known arguments. */
4060 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
4061 if (streq (arg, ptr->arg))
4062 return ptr->return_value;
4063
4064 /* An unrecognized interrupt type. */
4065 return ARM_FT_UNKNOWN;
4066 }
4067
4068 /* Computes the type of the current function. */
4069
4070 static unsigned long
4071 arm_compute_func_type (void)
4072 {
4073 unsigned long type = ARM_FT_UNKNOWN;
4074 tree a;
4075 tree attr;
4076
4077 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4078
4079 /* Decide if the current function is volatile. Such functions
4080 never return, and many memory cycles can be saved by not storing
4081 register values that will never be needed again. This optimization
4082 was added to speed up context switching in a kernel application. */
4083 if (optimize > 0
4084 && (TREE_NOTHROW (current_function_decl)
4085 || !(flag_unwind_tables
4086 || (flag_exceptions
4087 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4088 && TREE_THIS_VOLATILE (current_function_decl))
4089 type |= ARM_FT_VOLATILE;
4090
4091 if (cfun->static_chain_decl != NULL)
4092 type |= ARM_FT_NESTED;
4093
4094 attr = DECL_ATTRIBUTES (current_function_decl);
4095
4096 a = lookup_attribute ("naked", attr);
4097 if (a != NULL_TREE)
4098 type |= ARM_FT_NAKED;
4099
4100 a = lookup_attribute ("isr", attr);
4101 if (a == NULL_TREE)
4102 a = lookup_attribute ("interrupt", attr);
4103
4104 if (a == NULL_TREE)
4105 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4106 else
4107 type |= arm_isr_value (TREE_VALUE (a));
4108
4109 if (lookup_attribute ("cmse_nonsecure_entry", attr))
4110 type |= ARM_FT_CMSE_ENTRY;
4111
4112 return type;
4113 }
4114
4115 /* Returns the type of the current function. */
4116
4117 unsigned long
4118 arm_current_func_type (void)
4119 {
4120 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4121 cfun->machine->func_type = arm_compute_func_type ();
4122
4123 return cfun->machine->func_type;
4124 }
4125
4126 bool
4127 arm_allocate_stack_slots_for_args (void)
4128 {
4129 /* Naked functions should not allocate stack slots for arguments. */
4130 return !IS_NAKED (arm_current_func_type ());
4131 }
4132
4133 static bool
4134 arm_warn_func_return (tree decl)
4135 {
4136 /* Naked functions are implemented entirely in assembly, including the
4137 return sequence, so suppress warnings about this. */
4138 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4139 }
4140
4141 \f
4142 /* Output assembler code for a block containing the constant parts
4143 of a trampoline, leaving space for the variable parts.
4144
4145 On the ARM, (if r8 is the static chain regnum, and remembering that
4146 referencing pc adds an offset of 8) the trampoline looks like:
4147 ldr r8, [pc, #0]
4148 ldr pc, [pc]
4149 .word static chain value
4150 .word function's address
4151 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4152
4153 In FDPIC mode, the trampoline looks like:
4154 .word trampoline address
4155 .word trampoline GOT address
4156 ldr r12, [pc, #8] ; #4 for Arm mode
4157 ldr r9, [pc, #8] ; #4 for Arm mode
4158 ldr pc, [pc, #8] ; #4 for Arm mode
4159 .word static chain value
4160 .word GOT address
4161 .word function's address
4162 */
4163
4164 static void
4165 arm_asm_trampoline_template (FILE *f)
4166 {
4167 fprintf (f, "\t.syntax unified\n");
4168
4169 if (TARGET_FDPIC)
4170 {
4171 /* The first two words are a function descriptor pointing to the
4172 trampoline code just below. */
4173 if (TARGET_ARM)
4174 fprintf (f, "\t.arm\n");
4175 else if (TARGET_THUMB2)
4176 fprintf (f, "\t.thumb\n");
4177 else
4178 /* Only ARM and Thumb-2 are supported. */
4179 gcc_unreachable ();
4180
4181 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4182 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4183 /* Trampoline code which sets the static chain register but also
4184 PIC register before jumping into real code. */
4185 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4186 STATIC_CHAIN_REGNUM, PC_REGNUM,
4187 TARGET_THUMB2 ? 8 : 4);
4188 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4189 PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4190 TARGET_THUMB2 ? 8 : 4);
4191 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4192 PC_REGNUM, PC_REGNUM,
4193 TARGET_THUMB2 ? 8 : 4);
4194 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4195 }
4196 else if (TARGET_ARM)
4197 {
4198 fprintf (f, "\t.arm\n");
4199 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4200 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4201 }
4202 else if (TARGET_THUMB2)
4203 {
4204 fprintf (f, "\t.thumb\n");
4205 /* The Thumb-2 trampoline is similar to the arm implementation.
4206 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4207 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4208 STATIC_CHAIN_REGNUM, PC_REGNUM);
4209 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4210 }
4211 else
4212 {
4213 ASM_OUTPUT_ALIGN (f, 2);
4214 fprintf (f, "\t.code\t16\n");
4215 fprintf (f, ".Ltrampoline_start:\n");
4216 asm_fprintf (f, "\tpush\t{r0, r1}\n");
4217 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4218 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4219 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4220 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4221 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4222 }
4223 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4224 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4225 }
4226
4227 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4228
4229 static void
4230 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4231 {
4232 rtx fnaddr, mem, a_tramp;
4233
4234 emit_block_move (m_tramp, assemble_trampoline_template (),
4235 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4236
4237 if (TARGET_FDPIC)
4238 {
4239 rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4240 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4241 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4242 /* The function start address is at offset 8, but in Thumb mode
4243 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4244 below. */
4245 rtx trampoline_code_start
4246 = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4247
4248 /* Write initial funcdesc which points to the trampoline. */
4249 mem = adjust_address (m_tramp, SImode, 0);
4250 emit_move_insn (mem, trampoline_code_start);
4251 mem = adjust_address (m_tramp, SImode, 4);
4252 emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4253 /* Setup static chain. */
4254 mem = adjust_address (m_tramp, SImode, 20);
4255 emit_move_insn (mem, chain_value);
4256 /* GOT + real function entry point. */
4257 mem = adjust_address (m_tramp, SImode, 24);
4258 emit_move_insn (mem, gotaddr);
4259 mem = adjust_address (m_tramp, SImode, 28);
4260 emit_move_insn (mem, fnaddr);
4261 }
4262 else
4263 {
4264 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4265 emit_move_insn (mem, chain_value);
4266
4267 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4268 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4269 emit_move_insn (mem, fnaddr);
4270 }
4271
4272 a_tramp = XEXP (m_tramp, 0);
4273 maybe_emit_call_builtin___clear_cache (a_tramp,
4274 plus_constant (ptr_mode,
4275 a_tramp,
4276 TRAMPOLINE_SIZE));
4277 }
4278
4279 /* Thumb trampolines should be entered in thumb mode, so set
4280 the bottom bit of the address. */
4281
4282 static rtx
4283 arm_trampoline_adjust_address (rtx addr)
4284 {
4285 /* For FDPIC don't fix trampoline address since it's a function
4286 descriptor and not a function address. */
4287 if (TARGET_THUMB && !TARGET_FDPIC)
4288 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4289 NULL, 0, OPTAB_LIB_WIDEN);
4290 return addr;
4291 }
4292 \f
4293 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4294 includes call-clobbered registers too. If this is a leaf function
4295 we can just examine the registers used by the RTL, but otherwise we
4296 have to assume that whatever function is called might clobber
4297 anything, and so we have to save all the call-clobbered registers
4298 as well. */
4299 static inline bool reg_needs_saving_p (unsigned reg)
4300 {
4301 unsigned long func_type = arm_current_func_type ();
4302
4303 if (IS_INTERRUPT (func_type))
4304 if (df_regs_ever_live_p (reg)
4305 /* Save call-clobbered core registers. */
4306 || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4307 return true;
4308 else
4309 return false;
4310 else
4311 if (!df_regs_ever_live_p (reg)
4312 || call_used_or_fixed_reg_p (reg))
4313 return false;
4314 else
4315 return true;
4316 }
4317
4318 /* Return 1 if it is possible to return using a single instruction.
4319 If SIBLING is non-null, this is a test for a return before a sibling
4320 call. SIBLING is the call insn, so we can examine its register usage. */
4321
4322 int
4323 use_return_insn (int iscond, rtx sibling)
4324 {
4325 int regno;
4326 unsigned int func_type;
4327 unsigned long saved_int_regs;
4328 unsigned HOST_WIDE_INT stack_adjust;
4329 arm_stack_offsets *offsets;
4330
4331 /* Never use a return instruction before reload has run. */
4332 if (!reload_completed)
4333 return 0;
4334
4335 func_type = arm_current_func_type ();
4336
4337 /* Naked, volatile and stack alignment functions need special
4338 consideration. */
4339 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4340 return 0;
4341
4342 /* So do interrupt functions that use the frame pointer and Thumb
4343 interrupt functions. */
4344 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4345 return 0;
4346
4347 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4348 && !optimize_function_for_size_p (cfun))
4349 return 0;
4350
4351 offsets = arm_get_frame_offsets ();
4352 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4353
4354 /* As do variadic functions. */
4355 if (crtl->args.pretend_args_size
4356 || cfun->machine->uses_anonymous_args
4357 /* Or if the function calls __builtin_eh_return () */
4358 || crtl->calls_eh_return
4359 /* Or if the function calls alloca */
4360 || cfun->calls_alloca
4361 /* Or if there is a stack adjustment. However, if the stack pointer
4362 is saved on the stack, we can use a pre-incrementing stack load. */
4363 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4364 && stack_adjust == 4))
4365 /* Or if the static chain register was saved above the frame, under the
4366 assumption that the stack pointer isn't saved on the stack. */
4367 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4368 && arm_compute_static_chain_stack_bytes() != 0))
4369 return 0;
4370
4371 saved_int_regs = offsets->saved_regs_mask;
4372
4373 /* Unfortunately, the insn
4374
4375 ldmib sp, {..., sp, ...}
4376
4377 triggers a bug on most SA-110 based devices, such that the stack
4378 pointer won't be correctly restored if the instruction takes a
4379 page fault. We work around this problem by popping r3 along with
4380 the other registers, since that is never slower than executing
4381 another instruction.
4382
4383 We test for !arm_arch5t here, because code for any architecture
4384 less than this could potentially be run on one of the buggy
4385 chips. */
4386 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4387 {
4388 /* Validate that r3 is a call-clobbered register (always true in
4389 the default abi) ... */
4390 if (!call_used_or_fixed_reg_p (3))
4391 return 0;
4392
4393 /* ... that it isn't being used for a return value ... */
4394 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4395 return 0;
4396
4397 /* ... or for a tail-call argument ... */
4398 if (sibling)
4399 {
4400 gcc_assert (CALL_P (sibling));
4401
4402 if (find_regno_fusage (sibling, USE, 3))
4403 return 0;
4404 }
4405
4406 /* ... and that there are no call-saved registers in r0-r2
4407 (always true in the default ABI). */
4408 if (saved_int_regs & 0x7)
4409 return 0;
4410 }
4411
4412 /* Can't be done if interworking with Thumb, and any registers have been
4413 stacked. */
4414 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4415 return 0;
4416
4417 /* On StrongARM, conditional returns are expensive if they aren't
4418 taken and multiple registers have been stacked. */
4419 if (iscond && arm_tune_strongarm)
4420 {
4421 /* Conditional return when just the LR is stored is a simple
4422 conditional-load instruction, that's not expensive. */
4423 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4424 return 0;
4425
4426 if (flag_pic
4427 && arm_pic_register != INVALID_REGNUM
4428 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4429 return 0;
4430 }
4431
4432 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4433 several instructions if anything needs to be popped. Armv8.1-M Mainline
4434 also needs several instructions to save and restore FP context. */
4435 if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4436 return 0;
4437
4438 /* If there are saved registers but the LR isn't saved, then we need
4439 two instructions for the return. */
4440 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4441 return 0;
4442
4443 /* Can't be done if any of the VFP regs are pushed,
4444 since this also requires an insn. */
4445 if (TARGET_VFP_BASE)
4446 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4447 if (reg_needs_saving_p (regno))
4448 return 0;
4449
4450 if (TARGET_REALLY_IWMMXT)
4451 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4452 if (reg_needs_saving_p (regno))
4453 return 0;
4454
4455 return 1;
4456 }
4457
4458 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4459 shrink-wrapping if possible. This is the case if we need to emit a
4460 prologue, which we can test by looking at the offsets. */
4461 bool
4462 use_simple_return_p (void)
4463 {
4464 arm_stack_offsets *offsets;
4465
4466 /* Note this function can be called before or after reload. */
4467 if (!reload_completed)
4468 arm_compute_frame_layout ();
4469
4470 offsets = arm_get_frame_offsets ();
4471 return offsets->outgoing_args != 0;
4472 }
4473
4474 /* Return TRUE if int I is a valid immediate ARM constant. */
4475
4476 int
4477 const_ok_for_arm (HOST_WIDE_INT i)
4478 {
4479 int lowbit;
4480
4481 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4482 be all zero, or all one. */
4483 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4484 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4485 != ((~(unsigned HOST_WIDE_INT) 0)
4486 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4487 return FALSE;
4488
4489 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4490
4491 /* Fast return for 0 and small values. We must do this for zero, since
4492 the code below can't handle that one case. */
4493 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4494 return TRUE;
4495
4496 /* Get the number of trailing zeros. */
4497 lowbit = ffs((int) i) - 1;
4498
4499 /* Only even shifts are allowed in ARM mode so round down to the
4500 nearest even number. */
4501 if (TARGET_ARM)
4502 lowbit &= ~1;
4503
4504 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4505 return TRUE;
4506
4507 if (TARGET_ARM)
4508 {
4509 /* Allow rotated constants in ARM mode. */
4510 if (lowbit <= 4
4511 && ((i & ~0xc000003f) == 0
4512 || (i & ~0xf000000f) == 0
4513 || (i & ~0xfc000003) == 0))
4514 return TRUE;
4515 }
4516 else if (TARGET_THUMB2)
4517 {
4518 HOST_WIDE_INT v;
4519
4520 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4521 v = i & 0xff;
4522 v |= v << 16;
4523 if (i == v || i == (v | (v << 8)))
4524 return TRUE;
4525
4526 /* Allow repeated pattern 0xXY00XY00. */
4527 v = i & 0xff00;
4528 v |= v << 16;
4529 if (i == v)
4530 return TRUE;
4531 }
4532 else if (TARGET_HAVE_MOVT)
4533 {
4534 /* Thumb-1 Targets with MOVT. */
4535 if (i > 0xffff)
4536 return FALSE;
4537 else
4538 return TRUE;
4539 }
4540
4541 return FALSE;
4542 }
4543
4544 /* Return true if I is a valid constant for the operation CODE. */
4545 int
4546 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4547 {
4548 if (const_ok_for_arm (i))
4549 return 1;
4550
4551 switch (code)
4552 {
4553 case SET:
4554 /* See if we can use movw. */
4555 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4556 return 1;
4557 else
4558 /* Otherwise, try mvn. */
4559 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4560
4561 case PLUS:
4562 /* See if we can use addw or subw. */
4563 if (TARGET_THUMB2
4564 && ((i & 0xfffff000) == 0
4565 || ((-i) & 0xfffff000) == 0))
4566 return 1;
4567 /* Fall through. */
4568 case COMPARE:
4569 case EQ:
4570 case NE:
4571 case GT:
4572 case LE:
4573 case LT:
4574 case GE:
4575 case GEU:
4576 case LTU:
4577 case GTU:
4578 case LEU:
4579 case UNORDERED:
4580 case ORDERED:
4581 case UNEQ:
4582 case UNGE:
4583 case UNLT:
4584 case UNGT:
4585 case UNLE:
4586 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4587
4588 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4589 case XOR:
4590 return 0;
4591
4592 case IOR:
4593 if (TARGET_THUMB2)
4594 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4595 return 0;
4596
4597 case AND:
4598 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4599
4600 default:
4601 gcc_unreachable ();
4602 }
4603 }
4604
4605 /* Return true if I is a valid di mode constant for the operation CODE. */
4606 int
4607 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4608 {
4609 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4610 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4611 rtx hi = GEN_INT (hi_val);
4612 rtx lo = GEN_INT (lo_val);
4613
4614 if (TARGET_THUMB1)
4615 return 0;
4616
4617 switch (code)
4618 {
4619 case AND:
4620 case IOR:
4621 case XOR:
4622 return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4623 || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4624 case PLUS:
4625 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4626
4627 default:
4628 return 0;
4629 }
4630 }
4631
4632 /* Emit a sequence of insns to handle a large constant.
4633 CODE is the code of the operation required, it can be any of SET, PLUS,
4634 IOR, AND, XOR, MINUS;
4635 MODE is the mode in which the operation is being performed;
4636 VAL is the integer to operate on;
4637 SOURCE is the other operand (a register, or a null-pointer for SET);
4638 SUBTARGETS means it is safe to create scratch registers if that will
4639 either produce a simpler sequence, or we will want to cse the values.
4640 Return value is the number of insns emitted. */
4641
4642 /* ??? Tweak this for thumb2. */
4643 int
4644 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4645 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4646 {
4647 rtx cond;
4648
4649 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4650 cond = COND_EXEC_TEST (PATTERN (insn));
4651 else
4652 cond = NULL_RTX;
4653
4654 if (subtargets || code == SET
4655 || (REG_P (target) && REG_P (source)
4656 && REGNO (target) != REGNO (source)))
4657 {
4658 /* After arm_reorg has been called, we can't fix up expensive
4659 constants by pushing them into memory so we must synthesize
4660 them in-line, regardless of the cost. This is only likely to
4661 be more costly on chips that have load delay slots and we are
4662 compiling without running the scheduler (so no splitting
4663 occurred before the final instruction emission).
4664
4665 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4666 */
4667 if (!cfun->machine->after_arm_reorg
4668 && !cond
4669 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4670 1, 0)
4671 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4672 + (code != SET))))
4673 {
4674 if (code == SET)
4675 {
4676 /* Currently SET is the only monadic value for CODE, all
4677 the rest are diadic. */
4678 if (TARGET_USE_MOVT)
4679 arm_emit_movpair (target, GEN_INT (val));
4680 else
4681 emit_set_insn (target, GEN_INT (val));
4682
4683 return 1;
4684 }
4685 else
4686 {
4687 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4688
4689 if (TARGET_USE_MOVT)
4690 arm_emit_movpair (temp, GEN_INT (val));
4691 else
4692 emit_set_insn (temp, GEN_INT (val));
4693
4694 /* For MINUS, the value is subtracted from, since we never
4695 have subtraction of a constant. */
4696 if (code == MINUS)
4697 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4698 else
4699 emit_set_insn (target,
4700 gen_rtx_fmt_ee (code, mode, source, temp));
4701 return 2;
4702 }
4703 }
4704 }
4705
4706 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4707 1);
4708 }
4709
4710 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4711 ARM/THUMB2 immediates, and add up to VAL.
4712 Thr function return value gives the number of insns required. */
4713 static int
4714 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4715 struct four_ints *return_sequence)
4716 {
4717 int best_consecutive_zeros = 0;
4718 int i;
4719 int best_start = 0;
4720 int insns1, insns2;
4721 struct four_ints tmp_sequence;
4722
4723 /* If we aren't targeting ARM, the best place to start is always at
4724 the bottom, otherwise look more closely. */
4725 if (TARGET_ARM)
4726 {
4727 for (i = 0; i < 32; i += 2)
4728 {
4729 int consecutive_zeros = 0;
4730
4731 if (!(val & (3 << i)))
4732 {
4733 while ((i < 32) && !(val & (3 << i)))
4734 {
4735 consecutive_zeros += 2;
4736 i += 2;
4737 }
4738 if (consecutive_zeros > best_consecutive_zeros)
4739 {
4740 best_consecutive_zeros = consecutive_zeros;
4741 best_start = i - consecutive_zeros;
4742 }
4743 i -= 2;
4744 }
4745 }
4746 }
4747
4748 /* So long as it won't require any more insns to do so, it's
4749 desirable to emit a small constant (in bits 0...9) in the last
4750 insn. This way there is more chance that it can be combined with
4751 a later addressing insn to form a pre-indexed load or store
4752 operation. Consider:
4753
4754 *((volatile int *)0xe0000100) = 1;
4755 *((volatile int *)0xe0000110) = 2;
4756
4757 We want this to wind up as:
4758
4759 mov rA, #0xe0000000
4760 mov rB, #1
4761 str rB, [rA, #0x100]
4762 mov rB, #2
4763 str rB, [rA, #0x110]
4764
4765 rather than having to synthesize both large constants from scratch.
4766
4767 Therefore, we calculate how many insns would be required to emit
4768 the constant starting from `best_start', and also starting from
4769 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4770 yield a shorter sequence, we may as well use zero. */
4771 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4772 if (best_start != 0
4773 && ((HOST_WIDE_INT_1U << best_start) < val))
4774 {
4775 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4776 if (insns2 <= insns1)
4777 {
4778 *return_sequence = tmp_sequence;
4779 insns1 = insns2;
4780 }
4781 }
4782
4783 return insns1;
4784 }
4785
4786 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4787 static int
4788 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4789 struct four_ints *return_sequence, int i)
4790 {
4791 int remainder = val & 0xffffffff;
4792 int insns = 0;
4793
4794 /* Try and find a way of doing the job in either two or three
4795 instructions.
4796
4797 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4798 location. We start at position I. This may be the MSB, or
4799 optimial_immediate_sequence may have positioned it at the largest block
4800 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4801 wrapping around to the top of the word when we drop off the bottom.
4802 In the worst case this code should produce no more than four insns.
4803
4804 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4805 constants, shifted to any arbitrary location. We should always start
4806 at the MSB. */
4807 do
4808 {
4809 int end;
4810 unsigned int b1, b2, b3, b4;
4811 unsigned HOST_WIDE_INT result;
4812 int loc;
4813
4814 gcc_assert (insns < 4);
4815
4816 if (i <= 0)
4817 i += 32;
4818
4819 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4820 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4821 {
4822 loc = i;
4823 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4824 /* We can use addw/subw for the last 12 bits. */
4825 result = remainder;
4826 else
4827 {
4828 /* Use an 8-bit shifted/rotated immediate. */
4829 end = i - 8;
4830 if (end < 0)
4831 end += 32;
4832 result = remainder & ((0x0ff << end)
4833 | ((i < end) ? (0xff >> (32 - end))
4834 : 0));
4835 i -= 8;
4836 }
4837 }
4838 else
4839 {
4840 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4841 arbitrary shifts. */
4842 i -= TARGET_ARM ? 2 : 1;
4843 continue;
4844 }
4845
4846 /* Next, see if we can do a better job with a thumb2 replicated
4847 constant.
4848
4849 We do it this way around to catch the cases like 0x01F001E0 where
4850 two 8-bit immediates would work, but a replicated constant would
4851 make it worse.
4852
4853 TODO: 16-bit constants that don't clear all the bits, but still win.
4854 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4855 if (TARGET_THUMB2)
4856 {
4857 b1 = (remainder & 0xff000000) >> 24;
4858 b2 = (remainder & 0x00ff0000) >> 16;
4859 b3 = (remainder & 0x0000ff00) >> 8;
4860 b4 = remainder & 0xff;
4861
4862 if (loc > 24)
4863 {
4864 /* The 8-bit immediate already found clears b1 (and maybe b2),
4865 but must leave b3 and b4 alone. */
4866
4867 /* First try to find a 32-bit replicated constant that clears
4868 almost everything. We can assume that we can't do it in one,
4869 or else we wouldn't be here. */
4870 unsigned int tmp = b1 & b2 & b3 & b4;
4871 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4872 + (tmp << 24);
4873 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4874 + (tmp == b3) + (tmp == b4);
4875 if (tmp
4876 && (matching_bytes >= 3
4877 || (matching_bytes == 2
4878 && const_ok_for_op (remainder & ~tmp2, code))))
4879 {
4880 /* At least 3 of the bytes match, and the fourth has at
4881 least as many bits set, or two of the bytes match
4882 and it will only require one more insn to finish. */
4883 result = tmp2;
4884 i = tmp != b1 ? 32
4885 : tmp != b2 ? 24
4886 : tmp != b3 ? 16
4887 : 8;
4888 }
4889
4890 /* Second, try to find a 16-bit replicated constant that can
4891 leave three of the bytes clear. If b2 or b4 is already
4892 zero, then we can. If the 8-bit from above would not
4893 clear b2 anyway, then we still win. */
4894 else if (b1 == b3 && (!b2 || !b4
4895 || (remainder & 0x00ff0000 & ~result)))
4896 {
4897 result = remainder & 0xff00ff00;
4898 i = 24;
4899 }
4900 }
4901 else if (loc > 16)
4902 {
4903 /* The 8-bit immediate already found clears b2 (and maybe b3)
4904 and we don't get here unless b1 is alredy clear, but it will
4905 leave b4 unchanged. */
4906
4907 /* If we can clear b2 and b4 at once, then we win, since the
4908 8-bits couldn't possibly reach that far. */
4909 if (b2 == b4)
4910 {
4911 result = remainder & 0x00ff00ff;
4912 i = 16;
4913 }
4914 }
4915 }
4916
4917 return_sequence->i[insns++] = result;
4918 remainder &= ~result;
4919
4920 if (code == SET || code == MINUS)
4921 code = PLUS;
4922 }
4923 while (remainder);
4924
4925 return insns;
4926 }
4927
4928 /* Emit an instruction with the indicated PATTERN. If COND is
4929 non-NULL, conditionalize the execution of the instruction on COND
4930 being true. */
4931
4932 static void
4933 emit_constant_insn (rtx cond, rtx pattern)
4934 {
4935 if (cond)
4936 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4937 emit_insn (pattern);
4938 }
4939
4940 /* As above, but extra parameter GENERATE which, if clear, suppresses
4941 RTL generation. */
4942
4943 static int
4944 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4945 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4946 int subtargets, int generate)
4947 {
4948 int can_invert = 0;
4949 int can_negate = 0;
4950 int final_invert = 0;
4951 int i;
4952 int set_sign_bit_copies = 0;
4953 int clear_sign_bit_copies = 0;
4954 int clear_zero_bit_copies = 0;
4955 int set_zero_bit_copies = 0;
4956 int insns = 0, neg_insns, inv_insns;
4957 unsigned HOST_WIDE_INT temp1, temp2;
4958 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4959 struct four_ints *immediates;
4960 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4961
4962 /* Find out which operations are safe for a given CODE. Also do a quick
4963 check for degenerate cases; these can occur when DImode operations
4964 are split. */
4965 switch (code)
4966 {
4967 case SET:
4968 can_invert = 1;
4969 break;
4970
4971 case PLUS:
4972 can_negate = 1;
4973 break;
4974
4975 case IOR:
4976 if (remainder == 0xffffffff)
4977 {
4978 if (generate)
4979 emit_constant_insn (cond,
4980 gen_rtx_SET (target,
4981 GEN_INT (ARM_SIGN_EXTEND (val))));
4982 return 1;
4983 }
4984
4985 if (remainder == 0)
4986 {
4987 if (reload_completed && rtx_equal_p (target, source))
4988 return 0;
4989
4990 if (generate)
4991 emit_constant_insn (cond, gen_rtx_SET (target, source));
4992 return 1;
4993 }
4994 break;
4995
4996 case AND:
4997 if (remainder == 0)
4998 {
4999 if (generate)
5000 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
5001 return 1;
5002 }
5003 if (remainder == 0xffffffff)
5004 {
5005 if (reload_completed && rtx_equal_p (target, source))
5006 return 0;
5007 if (generate)
5008 emit_constant_insn (cond, gen_rtx_SET (target, source));
5009 return 1;
5010 }
5011 can_invert = 1;
5012 break;
5013
5014 case XOR:
5015 if (remainder == 0)
5016 {
5017 if (reload_completed && rtx_equal_p (target, source))
5018 return 0;
5019 if (generate)
5020 emit_constant_insn (cond, gen_rtx_SET (target, source));
5021 return 1;
5022 }
5023
5024 if (remainder == 0xffffffff)
5025 {
5026 if (generate)
5027 emit_constant_insn (cond,
5028 gen_rtx_SET (target,
5029 gen_rtx_NOT (mode, source)));
5030 return 1;
5031 }
5032 final_invert = 1;
5033 break;
5034
5035 case MINUS:
5036 /* We treat MINUS as (val - source), since (source - val) is always
5037 passed as (source + (-val)). */
5038 if (remainder == 0)
5039 {
5040 if (generate)
5041 emit_constant_insn (cond,
5042 gen_rtx_SET (target,
5043 gen_rtx_NEG (mode, source)));
5044 return 1;
5045 }
5046 if (const_ok_for_arm (val))
5047 {
5048 if (generate)
5049 emit_constant_insn (cond,
5050 gen_rtx_SET (target,
5051 gen_rtx_MINUS (mode, GEN_INT (val),
5052 source)));
5053 return 1;
5054 }
5055
5056 break;
5057
5058 default:
5059 gcc_unreachable ();
5060 }
5061
5062 /* If we can do it in one insn get out quickly. */
5063 if (const_ok_for_op (val, code))
5064 {
5065 if (generate)
5066 emit_constant_insn (cond,
5067 gen_rtx_SET (target,
5068 (source
5069 ? gen_rtx_fmt_ee (code, mode, source,
5070 GEN_INT (val))
5071 : GEN_INT (val))));
5072 return 1;
5073 }
5074
5075 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5076 insn. */
5077 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5078 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5079 {
5080 if (generate)
5081 {
5082 if (mode == SImode && i == 16)
5083 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5084 smaller insn. */
5085 emit_constant_insn (cond,
5086 gen_zero_extendhisi2
5087 (target, gen_lowpart (HImode, source)));
5088 else
5089 /* Extz only supports SImode, but we can coerce the operands
5090 into that mode. */
5091 emit_constant_insn (cond,
5092 gen_extzv_t2 (gen_lowpart (SImode, target),
5093 gen_lowpart (SImode, source),
5094 GEN_INT (i), const0_rtx));
5095 }
5096
5097 return 1;
5098 }
5099
5100 /* Calculate a few attributes that may be useful for specific
5101 optimizations. */
5102 /* Count number of leading zeros. */
5103 for (i = 31; i >= 0; i--)
5104 {
5105 if ((remainder & (1 << i)) == 0)
5106 clear_sign_bit_copies++;
5107 else
5108 break;
5109 }
5110
5111 /* Count number of leading 1's. */
5112 for (i = 31; i >= 0; i--)
5113 {
5114 if ((remainder & (1 << i)) != 0)
5115 set_sign_bit_copies++;
5116 else
5117 break;
5118 }
5119
5120 /* Count number of trailing zero's. */
5121 for (i = 0; i <= 31; i++)
5122 {
5123 if ((remainder & (1 << i)) == 0)
5124 clear_zero_bit_copies++;
5125 else
5126 break;
5127 }
5128
5129 /* Count number of trailing 1's. */
5130 for (i = 0; i <= 31; i++)
5131 {
5132 if ((remainder & (1 << i)) != 0)
5133 set_zero_bit_copies++;
5134 else
5135 break;
5136 }
5137
5138 switch (code)
5139 {
5140 case SET:
5141 /* See if we can do this by sign_extending a constant that is known
5142 to be negative. This is a good, way of doing it, since the shift
5143 may well merge into a subsequent insn. */
5144 if (set_sign_bit_copies > 1)
5145 {
5146 if (const_ok_for_arm
5147 (temp1 = ARM_SIGN_EXTEND (remainder
5148 << (set_sign_bit_copies - 1))))
5149 {
5150 if (generate)
5151 {
5152 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5153 emit_constant_insn (cond,
5154 gen_rtx_SET (new_src, GEN_INT (temp1)));
5155 emit_constant_insn (cond,
5156 gen_ashrsi3 (target, new_src,
5157 GEN_INT (set_sign_bit_copies - 1)));
5158 }
5159 return 2;
5160 }
5161 /* For an inverted constant, we will need to set the low bits,
5162 these will be shifted out of harm's way. */
5163 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5164 if (const_ok_for_arm (~temp1))
5165 {
5166 if (generate)
5167 {
5168 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5169 emit_constant_insn (cond,
5170 gen_rtx_SET (new_src, GEN_INT (temp1)));
5171 emit_constant_insn (cond,
5172 gen_ashrsi3 (target, new_src,
5173 GEN_INT (set_sign_bit_copies - 1)));
5174 }
5175 return 2;
5176 }
5177 }
5178
5179 /* See if we can calculate the value as the difference between two
5180 valid immediates. */
5181 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5182 {
5183 int topshift = clear_sign_bit_copies & ~1;
5184
5185 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5186 & (0xff000000 >> topshift));
5187
5188 /* If temp1 is zero, then that means the 9 most significant
5189 bits of remainder were 1 and we've caused it to overflow.
5190 When topshift is 0 we don't need to do anything since we
5191 can borrow from 'bit 32'. */
5192 if (temp1 == 0 && topshift != 0)
5193 temp1 = 0x80000000 >> (topshift - 1);
5194
5195 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5196
5197 if (const_ok_for_arm (temp2))
5198 {
5199 if (generate)
5200 {
5201 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5202 emit_constant_insn (cond,
5203 gen_rtx_SET (new_src, GEN_INT (temp1)));
5204 emit_constant_insn (cond,
5205 gen_addsi3 (target, new_src,
5206 GEN_INT (-temp2)));
5207 }
5208
5209 return 2;
5210 }
5211 }
5212
5213 /* See if we can generate this by setting the bottom (or the top)
5214 16 bits, and then shifting these into the other half of the
5215 word. We only look for the simplest cases, to do more would cost
5216 too much. Be careful, however, not to generate this when the
5217 alternative would take fewer insns. */
5218 if (val & 0xffff0000)
5219 {
5220 temp1 = remainder & 0xffff0000;
5221 temp2 = remainder & 0x0000ffff;
5222
5223 /* Overlaps outside this range are best done using other methods. */
5224 for (i = 9; i < 24; i++)
5225 {
5226 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5227 && !const_ok_for_arm (temp2))
5228 {
5229 rtx new_src = (subtargets
5230 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5231 : target);
5232 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5233 source, subtargets, generate);
5234 source = new_src;
5235 if (generate)
5236 emit_constant_insn
5237 (cond,
5238 gen_rtx_SET
5239 (target,
5240 gen_rtx_IOR (mode,
5241 gen_rtx_ASHIFT (mode, source,
5242 GEN_INT (i)),
5243 source)));
5244 return insns + 1;
5245 }
5246 }
5247
5248 /* Don't duplicate cases already considered. */
5249 for (i = 17; i < 24; i++)
5250 {
5251 if (((temp1 | (temp1 >> i)) == remainder)
5252 && !const_ok_for_arm (temp1))
5253 {
5254 rtx new_src = (subtargets
5255 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5256 : target);
5257 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5258 source, subtargets, generate);
5259 source = new_src;
5260 if (generate)
5261 emit_constant_insn
5262 (cond,
5263 gen_rtx_SET (target,
5264 gen_rtx_IOR
5265 (mode,
5266 gen_rtx_LSHIFTRT (mode, source,
5267 GEN_INT (i)),
5268 source)));
5269 return insns + 1;
5270 }
5271 }
5272 }
5273 break;
5274
5275 case IOR:
5276 case XOR:
5277 /* If we have IOR or XOR, and the constant can be loaded in a
5278 single instruction, and we can find a temporary to put it in,
5279 then this can be done in two instructions instead of 3-4. */
5280 if (subtargets
5281 /* TARGET can't be NULL if SUBTARGETS is 0 */
5282 || (reload_completed && !reg_mentioned_p (target, source)))
5283 {
5284 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5285 {
5286 if (generate)
5287 {
5288 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5289
5290 emit_constant_insn (cond,
5291 gen_rtx_SET (sub, GEN_INT (val)));
5292 emit_constant_insn (cond,
5293 gen_rtx_SET (target,
5294 gen_rtx_fmt_ee (code, mode,
5295 source, sub)));
5296 }
5297 return 2;
5298 }
5299 }
5300
5301 if (code == XOR)
5302 break;
5303
5304 /* Convert.
5305 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5306 and the remainder 0s for e.g. 0xfff00000)
5307 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5308
5309 This can be done in 2 instructions by using shifts with mov or mvn.
5310 e.g. for
5311 x = x | 0xfff00000;
5312 we generate.
5313 mvn r0, r0, asl #12
5314 mvn r0, r0, lsr #12 */
5315 if (set_sign_bit_copies > 8
5316 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5317 {
5318 if (generate)
5319 {
5320 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5321 rtx shift = GEN_INT (set_sign_bit_copies);
5322
5323 emit_constant_insn
5324 (cond,
5325 gen_rtx_SET (sub,
5326 gen_rtx_NOT (mode,
5327 gen_rtx_ASHIFT (mode,
5328 source,
5329 shift))));
5330 emit_constant_insn
5331 (cond,
5332 gen_rtx_SET (target,
5333 gen_rtx_NOT (mode,
5334 gen_rtx_LSHIFTRT (mode, sub,
5335 shift))));
5336 }
5337 return 2;
5338 }
5339
5340 /* Convert
5341 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5342 to
5343 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5344
5345 For eg. r0 = r0 | 0xfff
5346 mvn r0, r0, lsr #12
5347 mvn r0, r0, asl #12
5348
5349 */
5350 if (set_zero_bit_copies > 8
5351 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5352 {
5353 if (generate)
5354 {
5355 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5356 rtx shift = GEN_INT (set_zero_bit_copies);
5357
5358 emit_constant_insn
5359 (cond,
5360 gen_rtx_SET (sub,
5361 gen_rtx_NOT (mode,
5362 gen_rtx_LSHIFTRT (mode,
5363 source,
5364 shift))));
5365 emit_constant_insn
5366 (cond,
5367 gen_rtx_SET (target,
5368 gen_rtx_NOT (mode,
5369 gen_rtx_ASHIFT (mode, sub,
5370 shift))));
5371 }
5372 return 2;
5373 }
5374
5375 /* This will never be reached for Thumb2 because orn is a valid
5376 instruction. This is for Thumb1 and the ARM 32 bit cases.
5377
5378 x = y | constant (such that ~constant is a valid constant)
5379 Transform this to
5380 x = ~(~y & ~constant).
5381 */
5382 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5383 {
5384 if (generate)
5385 {
5386 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5387 emit_constant_insn (cond,
5388 gen_rtx_SET (sub,
5389 gen_rtx_NOT (mode, source)));
5390 source = sub;
5391 if (subtargets)
5392 sub = gen_reg_rtx (mode);
5393 emit_constant_insn (cond,
5394 gen_rtx_SET (sub,
5395 gen_rtx_AND (mode, source,
5396 GEN_INT (temp1))));
5397 emit_constant_insn (cond,
5398 gen_rtx_SET (target,
5399 gen_rtx_NOT (mode, sub)));
5400 }
5401 return 3;
5402 }
5403 break;
5404
5405 case AND:
5406 /* See if two shifts will do 2 or more insn's worth of work. */
5407 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5408 {
5409 HOST_WIDE_INT shift_mask = ((0xffffffff
5410 << (32 - clear_sign_bit_copies))
5411 & 0xffffffff);
5412
5413 if ((remainder | shift_mask) != 0xffffffff)
5414 {
5415 HOST_WIDE_INT new_val
5416 = ARM_SIGN_EXTEND (remainder | shift_mask);
5417
5418 if (generate)
5419 {
5420 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5421 insns = arm_gen_constant (AND, SImode, cond, new_val,
5422 new_src, source, subtargets, 1);
5423 source = new_src;
5424 }
5425 else
5426 {
5427 rtx targ = subtargets ? NULL_RTX : target;
5428 insns = arm_gen_constant (AND, mode, cond, new_val,
5429 targ, source, subtargets, 0);
5430 }
5431 }
5432
5433 if (generate)
5434 {
5435 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5436 rtx shift = GEN_INT (clear_sign_bit_copies);
5437
5438 emit_insn (gen_ashlsi3 (new_src, source, shift));
5439 emit_insn (gen_lshrsi3 (target, new_src, shift));
5440 }
5441
5442 return insns + 2;
5443 }
5444
5445 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5446 {
5447 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5448
5449 if ((remainder | shift_mask) != 0xffffffff)
5450 {
5451 HOST_WIDE_INT new_val
5452 = ARM_SIGN_EXTEND (remainder | shift_mask);
5453 if (generate)
5454 {
5455 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5456
5457 insns = arm_gen_constant (AND, mode, cond, new_val,
5458 new_src, source, subtargets, 1);
5459 source = new_src;
5460 }
5461 else
5462 {
5463 rtx targ = subtargets ? NULL_RTX : target;
5464
5465 insns = arm_gen_constant (AND, mode, cond, new_val,
5466 targ, source, subtargets, 0);
5467 }
5468 }
5469
5470 if (generate)
5471 {
5472 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5473 rtx shift = GEN_INT (clear_zero_bit_copies);
5474
5475 emit_insn (gen_lshrsi3 (new_src, source, shift));
5476 emit_insn (gen_ashlsi3 (target, new_src, shift));
5477 }
5478
5479 return insns + 2;
5480 }
5481
5482 break;
5483
5484 default:
5485 break;
5486 }
5487
5488 /* Calculate what the instruction sequences would be if we generated it
5489 normally, negated, or inverted. */
5490 if (code == AND)
5491 /* AND cannot be split into multiple insns, so invert and use BIC. */
5492 insns = 99;
5493 else
5494 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5495
5496 if (can_negate)
5497 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5498 &neg_immediates);
5499 else
5500 neg_insns = 99;
5501
5502 if (can_invert || final_invert)
5503 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5504 &inv_immediates);
5505 else
5506 inv_insns = 99;
5507
5508 immediates = &pos_immediates;
5509
5510 /* Is the negated immediate sequence more efficient? */
5511 if (neg_insns < insns && neg_insns <= inv_insns)
5512 {
5513 insns = neg_insns;
5514 immediates = &neg_immediates;
5515 }
5516 else
5517 can_negate = 0;
5518
5519 /* Is the inverted immediate sequence more efficient?
5520 We must allow for an extra NOT instruction for XOR operations, although
5521 there is some chance that the final 'mvn' will get optimized later. */
5522 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5523 {
5524 insns = inv_insns;
5525 immediates = &inv_immediates;
5526 }
5527 else
5528 {
5529 can_invert = 0;
5530 final_invert = 0;
5531 }
5532
5533 /* Now output the chosen sequence as instructions. */
5534 if (generate)
5535 {
5536 for (i = 0; i < insns; i++)
5537 {
5538 rtx new_src, temp1_rtx;
5539
5540 temp1 = immediates->i[i];
5541
5542 if (code == SET || code == MINUS)
5543 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5544 else if ((final_invert || i < (insns - 1)) && subtargets)
5545 new_src = gen_reg_rtx (mode);
5546 else
5547 new_src = target;
5548
5549 if (can_invert)
5550 temp1 = ~temp1;
5551 else if (can_negate)
5552 temp1 = -temp1;
5553
5554 temp1 = trunc_int_for_mode (temp1, mode);
5555 temp1_rtx = GEN_INT (temp1);
5556
5557 if (code == SET)
5558 ;
5559 else if (code == MINUS)
5560 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5561 else
5562 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5563
5564 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5565 source = new_src;
5566
5567 if (code == SET)
5568 {
5569 can_negate = can_invert;
5570 can_invert = 0;
5571 code = PLUS;
5572 }
5573 else if (code == MINUS)
5574 code = PLUS;
5575 }
5576 }
5577
5578 if (final_invert)
5579 {
5580 if (generate)
5581 emit_constant_insn (cond, gen_rtx_SET (target,
5582 gen_rtx_NOT (mode, source)));
5583 insns++;
5584 }
5585
5586 return insns;
5587 }
5588
5589 /* Return TRUE if op is a constant where both the low and top words are
5590 suitable for RSB/RSC instructions. This is never true for Thumb, since
5591 we do not have RSC in that case. */
5592 static bool
5593 arm_const_double_prefer_rsbs_rsc (rtx op)
5594 {
5595 /* Thumb lacks RSC, so we never prefer that sequence. */
5596 if (TARGET_THUMB || !CONST_INT_P (op))
5597 return false;
5598 HOST_WIDE_INT hi, lo;
5599 lo = UINTVAL (op) & 0xffffffffULL;
5600 hi = UINTVAL (op) >> 32;
5601 return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5602 }
5603
5604 /* Canonicalize a comparison so that we are more likely to recognize it.
5605 This can be done for a few constant compares, where we can make the
5606 immediate value easier to load. */
5607
5608 static void
5609 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5610 bool op0_preserve_value)
5611 {
5612 machine_mode mode;
5613 unsigned HOST_WIDE_INT i, maxval;
5614
5615 mode = GET_MODE (*op0);
5616 if (mode == VOIDmode)
5617 mode = GET_MODE (*op1);
5618
5619 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5620
5621 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5622 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5623 either reversed or (for constant OP1) adjusted to GE/LT.
5624 Similarly for GTU/LEU in Thumb mode. */
5625 if (mode == DImode)
5626 {
5627
5628 if (*code == GT || *code == LE
5629 || *code == GTU || *code == LEU)
5630 {
5631 /* Missing comparison. First try to use an available
5632 comparison. */
5633 if (CONST_INT_P (*op1))
5634 {
5635 i = INTVAL (*op1);
5636 switch (*code)
5637 {
5638 case GT:
5639 case LE:
5640 if (i != maxval)
5641 {
5642 /* Try to convert to GE/LT, unless that would be more
5643 expensive. */
5644 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5645 && arm_const_double_prefer_rsbs_rsc (*op1))
5646 return;
5647 *op1 = GEN_INT (i + 1);
5648 *code = *code == GT ? GE : LT;
5649 }
5650 else
5651 {
5652 /* GT maxval is always false, LE maxval is always true.
5653 We can't fold that away here as we must make a
5654 comparison, but we can fold them to comparisons
5655 with the same result that can be handled:
5656 op0 GT maxval -> op0 LT minval
5657 op0 LE maxval -> op0 GE minval
5658 where minval = (-maxval - 1). */
5659 *op1 = GEN_INT (-maxval - 1);
5660 *code = *code == GT ? LT : GE;
5661 }
5662 return;
5663
5664 case GTU:
5665 case LEU:
5666 if (i != ~((unsigned HOST_WIDE_INT) 0))
5667 {
5668 /* Try to convert to GEU/LTU, unless that would
5669 be more expensive. */
5670 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5671 && arm_const_double_prefer_rsbs_rsc (*op1))
5672 return;
5673 *op1 = GEN_INT (i + 1);
5674 *code = *code == GTU ? GEU : LTU;
5675 }
5676 else
5677 {
5678 /* GTU ~0 is always false, LEU ~0 is always true.
5679 We can't fold that away here as we must make a
5680 comparison, but we can fold them to comparisons
5681 with the same result that can be handled:
5682 op0 GTU ~0 -> op0 LTU 0
5683 op0 LEU ~0 -> op0 GEU 0. */
5684 *op1 = const0_rtx;
5685 *code = *code == GTU ? LTU : GEU;
5686 }
5687 return;
5688
5689 default:
5690 gcc_unreachable ();
5691 }
5692 }
5693
5694 if (!op0_preserve_value)
5695 {
5696 std::swap (*op0, *op1);
5697 *code = (int)swap_condition ((enum rtx_code)*code);
5698 }
5699 }
5700 return;
5701 }
5702
5703 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5704 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5705 to facilitate possible combining with a cmp into 'ands'. */
5706 if (mode == SImode
5707 && GET_CODE (*op0) == ZERO_EXTEND
5708 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5709 && GET_MODE (XEXP (*op0, 0)) == QImode
5710 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5711 && subreg_lowpart_p (XEXP (*op0, 0))
5712 && *op1 == const0_rtx)
5713 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5714 GEN_INT (255));
5715
5716 /* Comparisons smaller than DImode. Only adjust comparisons against
5717 an out-of-range constant. */
5718 if (!CONST_INT_P (*op1)
5719 || const_ok_for_arm (INTVAL (*op1))
5720 || const_ok_for_arm (- INTVAL (*op1)))
5721 return;
5722
5723 i = INTVAL (*op1);
5724
5725 switch (*code)
5726 {
5727 case EQ:
5728 case NE:
5729 return;
5730
5731 case GT:
5732 case LE:
5733 if (i != maxval
5734 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5735 {
5736 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5737 *code = *code == GT ? GE : LT;
5738 return;
5739 }
5740 break;
5741
5742 case GE:
5743 case LT:
5744 if (i != ~maxval
5745 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5746 {
5747 *op1 = GEN_INT (i - 1);
5748 *code = *code == GE ? GT : LE;
5749 return;
5750 }
5751 break;
5752
5753 case GTU:
5754 case LEU:
5755 if (i != ~((unsigned HOST_WIDE_INT) 0)
5756 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5757 {
5758 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5759 *code = *code == GTU ? GEU : LTU;
5760 return;
5761 }
5762 break;
5763
5764 case GEU:
5765 case LTU:
5766 if (i != 0
5767 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5768 {
5769 *op1 = GEN_INT (i - 1);
5770 *code = *code == GEU ? GTU : LEU;
5771 return;
5772 }
5773 break;
5774
5775 default:
5776 gcc_unreachable ();
5777 }
5778 }
5779
5780
5781 /* Define how to find the value returned by a function. */
5782
5783 static rtx
5784 arm_function_value(const_tree type, const_tree func,
5785 bool outgoing ATTRIBUTE_UNUSED)
5786 {
5787 machine_mode mode;
5788 int unsignedp ATTRIBUTE_UNUSED;
5789 rtx r ATTRIBUTE_UNUSED;
5790
5791 mode = TYPE_MODE (type);
5792
5793 if (TARGET_AAPCS_BASED)
5794 return aapcs_allocate_return_reg (mode, type, func);
5795
5796 /* Promote integer types. */
5797 if (INTEGRAL_TYPE_P (type))
5798 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5799
5800 /* Promotes small structs returned in a register to full-word size
5801 for big-endian AAPCS. */
5802 if (arm_return_in_msb (type))
5803 {
5804 HOST_WIDE_INT size = int_size_in_bytes (type);
5805 if (size % UNITS_PER_WORD != 0)
5806 {
5807 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5808 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5809 }
5810 }
5811
5812 return arm_libcall_value_1 (mode);
5813 }
5814
5815 /* libcall hashtable helpers. */
5816
5817 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5818 {
5819 static inline hashval_t hash (const rtx_def *);
5820 static inline bool equal (const rtx_def *, const rtx_def *);
5821 static inline void remove (rtx_def *);
5822 };
5823
5824 inline bool
5825 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5826 {
5827 return rtx_equal_p (p1, p2);
5828 }
5829
5830 inline hashval_t
5831 libcall_hasher::hash (const rtx_def *p1)
5832 {
5833 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5834 }
5835
5836 typedef hash_table<libcall_hasher> libcall_table_type;
5837
5838 static void
5839 add_libcall (libcall_table_type *htab, rtx libcall)
5840 {
5841 *htab->find_slot (libcall, INSERT) = libcall;
5842 }
5843
5844 static bool
5845 arm_libcall_uses_aapcs_base (const_rtx libcall)
5846 {
5847 static bool init_done = false;
5848 static libcall_table_type *libcall_htab = NULL;
5849
5850 if (!init_done)
5851 {
5852 init_done = true;
5853
5854 libcall_htab = new libcall_table_type (31);
5855 add_libcall (libcall_htab,
5856 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5857 add_libcall (libcall_htab,
5858 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5859 add_libcall (libcall_htab,
5860 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5861 add_libcall (libcall_htab,
5862 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5863
5864 add_libcall (libcall_htab,
5865 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5866 add_libcall (libcall_htab,
5867 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5868 add_libcall (libcall_htab,
5869 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5870 add_libcall (libcall_htab,
5871 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5872
5873 add_libcall (libcall_htab,
5874 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5875 add_libcall (libcall_htab,
5876 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5877 add_libcall (libcall_htab,
5878 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5879 add_libcall (libcall_htab,
5880 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5881 add_libcall (libcall_htab,
5882 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5883 add_libcall (libcall_htab,
5884 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5885 add_libcall (libcall_htab,
5886 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5887 add_libcall (libcall_htab,
5888 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5889 add_libcall (libcall_htab,
5890 convert_optab_libfunc (sfix_optab, SImode, SFmode));
5891 add_libcall (libcall_htab,
5892 convert_optab_libfunc (ufix_optab, SImode, SFmode));
5893
5894 /* Values from double-precision helper functions are returned in core
5895 registers if the selected core only supports single-precision
5896 arithmetic, even if we are using the hard-float ABI. The same is
5897 true for single-precision helpers except in case of MVE, because in
5898 MVE we will be using the hard-float ABI on a CPU which doesn't support
5899 single-precision operations in hardware. In MVE the following check
5900 enables use of emulation for the single-precision arithmetic
5901 operations. */
5902 if (TARGET_HAVE_MVE)
5903 {
5904 add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5905 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5906 add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5907 add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5908 add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5909 add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5910 add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5911 add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5912 add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5913 add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5914 add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5915 }
5916 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5917 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5918 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5919 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5920 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5921 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5922 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5923 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5924 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5925 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5926 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5927 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5928 SFmode));
5929 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5930 DFmode));
5931 add_libcall (libcall_htab,
5932 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5933 }
5934
5935 return libcall && libcall_htab->find (libcall) != NULL;
5936 }
5937
5938 static rtx
5939 arm_libcall_value_1 (machine_mode mode)
5940 {
5941 if (TARGET_AAPCS_BASED)
5942 return aapcs_libcall_value (mode);
5943 else if (TARGET_IWMMXT_ABI
5944 && arm_vector_mode_supported_p (mode))
5945 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5946 else
5947 return gen_rtx_REG (mode, ARG_REGISTER (1));
5948 }
5949
5950 /* Define how to find the value returned by a library function
5951 assuming the value has mode MODE. */
5952
5953 static rtx
5954 arm_libcall_value (machine_mode mode, const_rtx libcall)
5955 {
5956 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5957 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5958 {
5959 /* The following libcalls return their result in integer registers,
5960 even though they return a floating point value. */
5961 if (arm_libcall_uses_aapcs_base (libcall))
5962 return gen_rtx_REG (mode, ARG_REGISTER(1));
5963
5964 }
5965
5966 return arm_libcall_value_1 (mode);
5967 }
5968
5969 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5970
5971 static bool
5972 arm_function_value_regno_p (const unsigned int regno)
5973 {
5974 if (regno == ARG_REGISTER (1)
5975 || (TARGET_32BIT
5976 && TARGET_AAPCS_BASED
5977 && TARGET_HARD_FLOAT
5978 && regno == FIRST_VFP_REGNUM)
5979 || (TARGET_IWMMXT_ABI
5980 && regno == FIRST_IWMMXT_REGNUM))
5981 return true;
5982
5983 return false;
5984 }
5985
5986 /* Determine the amount of memory needed to store the possible return
5987 registers of an untyped call. */
5988 int
5989 arm_apply_result_size (void)
5990 {
5991 int size = 16;
5992
5993 if (TARGET_32BIT)
5994 {
5995 if (TARGET_HARD_FLOAT_ABI)
5996 size += 32;
5997 if (TARGET_IWMMXT_ABI)
5998 size += 8;
5999 }
6000
6001 return size;
6002 }
6003
6004 /* Decide whether TYPE should be returned in memory (true)
6005 or in a register (false). FNTYPE is the type of the function making
6006 the call. */
6007 static bool
6008 arm_return_in_memory (const_tree type, const_tree fntype)
6009 {
6010 HOST_WIDE_INT size;
6011
6012 size = int_size_in_bytes (type); /* Negative if not fixed size. */
6013
6014 if (TARGET_AAPCS_BASED)
6015 {
6016 /* Simple, non-aggregate types (ie not including vectors and
6017 complex) are always returned in a register (or registers).
6018 We don't care about which register here, so we can short-cut
6019 some of the detail. */
6020 if (!AGGREGATE_TYPE_P (type)
6021 && TREE_CODE (type) != VECTOR_TYPE
6022 && TREE_CODE (type) != COMPLEX_TYPE)
6023 return false;
6024
6025 /* Any return value that is no larger than one word can be
6026 returned in r0. */
6027 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
6028 return false;
6029
6030 /* Check any available co-processors to see if they accept the
6031 type as a register candidate (VFP, for example, can return
6032 some aggregates in consecutive registers). These aren't
6033 available if the call is variadic. */
6034 if (aapcs_select_return_coproc (type, fntype) >= 0)
6035 return false;
6036
6037 /* Vector values should be returned using ARM registers, not
6038 memory (unless they're over 16 bytes, which will break since
6039 we only have four call-clobbered registers to play with). */
6040 if (TREE_CODE (type) == VECTOR_TYPE)
6041 return (size < 0 || size > (4 * UNITS_PER_WORD));
6042
6043 /* The rest go in memory. */
6044 return true;
6045 }
6046
6047 if (TREE_CODE (type) == VECTOR_TYPE)
6048 return (size < 0 || size > (4 * UNITS_PER_WORD));
6049
6050 if (!AGGREGATE_TYPE_P (type) &&
6051 (TREE_CODE (type) != VECTOR_TYPE))
6052 /* All simple types are returned in registers. */
6053 return false;
6054
6055 if (arm_abi != ARM_ABI_APCS)
6056 {
6057 /* ATPCS and later return aggregate types in memory only if they are
6058 larger than a word (or are variable size). */
6059 return (size < 0 || size > UNITS_PER_WORD);
6060 }
6061
6062 /* For the arm-wince targets we choose to be compatible with Microsoft's
6063 ARM and Thumb compilers, which always return aggregates in memory. */
6064 #ifndef ARM_WINCE
6065 /* All structures/unions bigger than one word are returned in memory.
6066 Also catch the case where int_size_in_bytes returns -1. In this case
6067 the aggregate is either huge or of variable size, and in either case
6068 we will want to return it via memory and not in a register. */
6069 if (size < 0 || size > UNITS_PER_WORD)
6070 return true;
6071
6072 if (TREE_CODE (type) == RECORD_TYPE)
6073 {
6074 tree field;
6075
6076 /* For a struct the APCS says that we only return in a register
6077 if the type is 'integer like' and every addressable element
6078 has an offset of zero. For practical purposes this means
6079 that the structure can have at most one non bit-field element
6080 and that this element must be the first one in the structure. */
6081
6082 /* Find the first field, ignoring non FIELD_DECL things which will
6083 have been created by C++. */
6084 /* NOTE: This code is deprecated and has not been updated to handle
6085 DECL_FIELD_ABI_IGNORED. */
6086 for (field = TYPE_FIELDS (type);
6087 field && TREE_CODE (field) != FIELD_DECL;
6088 field = DECL_CHAIN (field))
6089 continue;
6090
6091 if (field == NULL)
6092 return false; /* An empty structure. Allowed by an extension to ANSI C. */
6093
6094 /* Check that the first field is valid for returning in a register. */
6095
6096 /* ... Floats are not allowed */
6097 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6098 return true;
6099
6100 /* ... Aggregates that are not themselves valid for returning in
6101 a register are not allowed. */
6102 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6103 return true;
6104
6105 /* Now check the remaining fields, if any. Only bitfields are allowed,
6106 since they are not addressable. */
6107 for (field = DECL_CHAIN (field);
6108 field;
6109 field = DECL_CHAIN (field))
6110 {
6111 if (TREE_CODE (field) != FIELD_DECL)
6112 continue;
6113
6114 if (!DECL_BIT_FIELD_TYPE (field))
6115 return true;
6116 }
6117
6118 return false;
6119 }
6120
6121 if (TREE_CODE (type) == UNION_TYPE)
6122 {
6123 tree field;
6124
6125 /* Unions can be returned in registers if every element is
6126 integral, or can be returned in an integer register. */
6127 for (field = TYPE_FIELDS (type);
6128 field;
6129 field = DECL_CHAIN (field))
6130 {
6131 if (TREE_CODE (field) != FIELD_DECL)
6132 continue;
6133
6134 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6135 return true;
6136
6137 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6138 return true;
6139 }
6140
6141 return false;
6142 }
6143 #endif /* not ARM_WINCE */
6144
6145 /* Return all other types in memory. */
6146 return true;
6147 }
6148
6149 const struct pcs_attribute_arg
6150 {
6151 const char *arg;
6152 enum arm_pcs value;
6153 } pcs_attribute_args[] =
6154 {
6155 {"aapcs", ARM_PCS_AAPCS},
6156 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6157 #if 0
6158 /* We could recognize these, but changes would be needed elsewhere
6159 * to implement them. */
6160 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6161 {"atpcs", ARM_PCS_ATPCS},
6162 {"apcs", ARM_PCS_APCS},
6163 #endif
6164 {NULL, ARM_PCS_UNKNOWN}
6165 };
6166
6167 static enum arm_pcs
6168 arm_pcs_from_attribute (tree attr)
6169 {
6170 const struct pcs_attribute_arg *ptr;
6171 const char *arg;
6172
6173 /* Get the value of the argument. */
6174 if (TREE_VALUE (attr) == NULL_TREE
6175 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6176 return ARM_PCS_UNKNOWN;
6177
6178 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6179
6180 /* Check it against the list of known arguments. */
6181 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6182 if (streq (arg, ptr->arg))
6183 return ptr->value;
6184
6185 /* An unrecognized interrupt type. */
6186 return ARM_PCS_UNKNOWN;
6187 }
6188
6189 /* Get the PCS variant to use for this call. TYPE is the function's type
6190 specification, DECL is the specific declartion. DECL may be null if
6191 the call could be indirect or if this is a library call. */
6192 static enum arm_pcs
6193 arm_get_pcs_model (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
6194 {
6195 bool user_convention = false;
6196 enum arm_pcs user_pcs = arm_pcs_default;
6197 tree attr;
6198
6199 gcc_assert (type);
6200
6201 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6202 if (attr)
6203 {
6204 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6205 user_convention = true;
6206 }
6207
6208 if (TARGET_AAPCS_BASED)
6209 {
6210 /* Detect varargs functions. These always use the base rules
6211 (no argument is ever a candidate for a co-processor
6212 register). */
6213 bool base_rules = stdarg_p (type);
6214
6215 if (user_convention)
6216 {
6217 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6218 sorry ("non-AAPCS derived PCS variant");
6219 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6220 error ("variadic functions must use the base AAPCS variant");
6221 }
6222
6223 if (base_rules)
6224 return ARM_PCS_AAPCS;
6225 else if (user_convention)
6226 return user_pcs;
6227 #if 0
6228 /* Unfortunately, this is not safe and can lead to wrong code
6229 being generated (PR96882). Not all calls into the back-end
6230 pass the DECL, so it is unsafe to make any PCS-changing
6231 decisions based on it. In particular the RETURN_IN_MEMORY
6232 hook is only ever passed a TYPE. This needs revisiting to
6233 see if there are any partial improvements that can be
6234 re-enabled. */
6235 else if (decl && flag_unit_at_a_time)
6236 {
6237 /* Local functions never leak outside this compilation unit,
6238 so we are free to use whatever conventions are
6239 appropriate. */
6240 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6241 cgraph_node *local_info_node
6242 = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6243 if (local_info_node && local_info_node->local)
6244 return ARM_PCS_AAPCS_LOCAL;
6245 }
6246 #endif
6247 }
6248 else if (user_convention && user_pcs != arm_pcs_default)
6249 sorry ("PCS variant");
6250
6251 /* For everything else we use the target's default. */
6252 return arm_pcs_default;
6253 }
6254
6255
6256 static void
6257 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6258 const_tree fntype ATTRIBUTE_UNUSED,
6259 rtx libcall ATTRIBUTE_UNUSED,
6260 const_tree fndecl ATTRIBUTE_UNUSED)
6261 {
6262 /* Record the unallocated VFP registers. */
6263 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6264 pcum->aapcs_vfp_reg_alloc = 0;
6265 }
6266
6267 /* Bitmasks that indicate whether earlier versions of GCC would have
6268 taken a different path through the ABI logic. This should result in
6269 a -Wpsabi warning if the earlier path led to a different ABI decision.
6270
6271 WARN_PSABI_EMPTY_CXX17_BASE
6272 Indicates that the type includes an artificial empty C++17 base field
6273 that, prior to GCC 10.1, would prevent the type from being treated as
6274 a HFA or HVA. See PR94711 for details.
6275
6276 WARN_PSABI_NO_UNIQUE_ADDRESS
6277 Indicates that the type includes an empty [[no_unique_address]] field
6278 that, prior to GCC 10.1, would prevent the type from being treated as
6279 a HFA or HVA. */
6280 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6281 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6282 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD = 1U << 2;
6283
6284 /* Walk down the type tree of TYPE counting consecutive base elements.
6285 If *MODEP is VOIDmode, then set it to the first valid floating point
6286 type. If a non-floating point type is found, or if a floating point
6287 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6288 otherwise return the count in the sub-tree.
6289
6290 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6291 function has changed its behavior relative to earlier versions of GCC.
6292 Normally the argument should be nonnull and point to a zero-initialized
6293 variable. The function then records whether the ABI decision might
6294 be affected by a known fix to the ABI logic, setting the associated
6295 WARN_PSABI_* bits if so.
6296
6297 When the argument is instead a null pointer, the function tries to
6298 simulate the behavior of GCC before all such ABI fixes were made.
6299 This is useful to check whether the function returns something
6300 different after the ABI fixes. */
6301 static int
6302 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6303 unsigned int *warn_psabi_flags)
6304 {
6305 machine_mode mode;
6306 HOST_WIDE_INT size;
6307
6308 switch (TREE_CODE (type))
6309 {
6310 case REAL_TYPE:
6311 mode = TYPE_MODE (type);
6312 if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6313 return -1;
6314
6315 if (*modep == VOIDmode)
6316 *modep = mode;
6317
6318 if (*modep == mode)
6319 return 1;
6320
6321 break;
6322
6323 case COMPLEX_TYPE:
6324 mode = TYPE_MODE (TREE_TYPE (type));
6325 if (mode != DFmode && mode != SFmode)
6326 return -1;
6327
6328 if (*modep == VOIDmode)
6329 *modep = mode;
6330
6331 if (*modep == mode)
6332 return 2;
6333
6334 break;
6335
6336 case VECTOR_TYPE:
6337 /* Use V2SImode and V4SImode as representatives of all 64-bit
6338 and 128-bit vector types, whether or not those modes are
6339 supported with the present options. */
6340 size = int_size_in_bytes (type);
6341 switch (size)
6342 {
6343 case 8:
6344 mode = V2SImode;
6345 break;
6346 case 16:
6347 mode = V4SImode;
6348 break;
6349 default:
6350 return -1;
6351 }
6352
6353 if (*modep == VOIDmode)
6354 *modep = mode;
6355
6356 /* Vector modes are considered to be opaque: two vectors are
6357 equivalent for the purposes of being homogeneous aggregates
6358 if they are the same size. */
6359 if (*modep == mode)
6360 return 1;
6361
6362 break;
6363
6364 case ARRAY_TYPE:
6365 {
6366 int count;
6367 tree index = TYPE_DOMAIN (type);
6368
6369 /* Can't handle incomplete types nor sizes that are not
6370 fixed. */
6371 if (!COMPLETE_TYPE_P (type)
6372 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6373 return -1;
6374
6375 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6376 warn_psabi_flags);
6377 if (count == -1
6378 || !index
6379 || !TYPE_MAX_VALUE (index)
6380 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6381 || !TYPE_MIN_VALUE (index)
6382 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6383 || count < 0)
6384 return -1;
6385
6386 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6387 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6388
6389 /* There must be no padding. */
6390 if (wi::to_wide (TYPE_SIZE (type))
6391 != count * GET_MODE_BITSIZE (*modep))
6392 return -1;
6393
6394 return count;
6395 }
6396
6397 case RECORD_TYPE:
6398 {
6399 int count = 0;
6400 int sub_count;
6401 tree field;
6402
6403 /* Can't handle incomplete types nor sizes that are not
6404 fixed. */
6405 if (!COMPLETE_TYPE_P (type)
6406 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6407 return -1;
6408
6409 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6410 {
6411 if (TREE_CODE (field) != FIELD_DECL)
6412 continue;
6413
6414 if (DECL_FIELD_ABI_IGNORED (field))
6415 {
6416 /* See whether this is something that earlier versions of
6417 GCC failed to ignore. */
6418 unsigned int flag;
6419 if (lookup_attribute ("no_unique_address",
6420 DECL_ATTRIBUTES (field)))
6421 flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6422 else if (cxx17_empty_base_field_p (field))
6423 flag = WARN_PSABI_EMPTY_CXX17_BASE;
6424 else
6425 /* No compatibility problem. */
6426 continue;
6427
6428 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6429 if (warn_psabi_flags)
6430 {
6431 *warn_psabi_flags |= flag;
6432 continue;
6433 }
6434 }
6435 /* A zero-width bitfield may affect layout in some
6436 circumstances, but adds no members. The determination
6437 of whether or not a type is an HFA is performed after
6438 layout is complete, so if the type still looks like an
6439 HFA afterwards, it is still classed as one. This is
6440 potentially an ABI break for the hard-float ABI. */
6441 else if (DECL_BIT_FIELD (field)
6442 && integer_zerop (DECL_SIZE (field)))
6443 {
6444 /* Prior to GCC-12 these fields were striped early,
6445 hiding them from the back-end entirely and
6446 resulting in the correct behaviour for argument
6447 passing. Simulate that old behaviour without
6448 generating a warning. */
6449 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
6450 continue;
6451 if (warn_psabi_flags)
6452 {
6453 *warn_psabi_flags |= WARN_PSABI_ZERO_WIDTH_BITFIELD;
6454 continue;
6455 }
6456 }
6457
6458 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6459 warn_psabi_flags);
6460 if (sub_count < 0)
6461 return -1;
6462 count += sub_count;
6463 }
6464
6465 /* There must be no padding. */
6466 if (wi::to_wide (TYPE_SIZE (type))
6467 != count * GET_MODE_BITSIZE (*modep))
6468 return -1;
6469
6470 return count;
6471 }
6472
6473 case UNION_TYPE:
6474 case QUAL_UNION_TYPE:
6475 {
6476 /* These aren't very interesting except in a degenerate case. */
6477 int count = 0;
6478 int sub_count;
6479 tree field;
6480
6481 /* Can't handle incomplete types nor sizes that are not
6482 fixed. */
6483 if (!COMPLETE_TYPE_P (type)
6484 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6485 return -1;
6486
6487 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6488 {
6489 if (TREE_CODE (field) != FIELD_DECL)
6490 continue;
6491
6492 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6493 warn_psabi_flags);
6494 if (sub_count < 0)
6495 return -1;
6496 count = count > sub_count ? count : sub_count;
6497 }
6498
6499 /* There must be no padding. */
6500 if (wi::to_wide (TYPE_SIZE (type))
6501 != count * GET_MODE_BITSIZE (*modep))
6502 return -1;
6503
6504 return count;
6505 }
6506
6507 default:
6508 break;
6509 }
6510
6511 return -1;
6512 }
6513
6514 /* Return true if PCS_VARIANT should use VFP registers. */
6515 static bool
6516 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6517 {
6518 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6519 {
6520 static bool seen_thumb1_vfp = false;
6521
6522 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6523 {
6524 sorry ("Thumb-1 %<hard-float%> VFP ABI");
6525 /* sorry() is not immediately fatal, so only display this once. */
6526 seen_thumb1_vfp = true;
6527 }
6528
6529 return true;
6530 }
6531
6532 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6533 return false;
6534
6535 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6536 (TARGET_VFP_DOUBLE || !is_double));
6537 }
6538
6539 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6540 suitable for passing or returning in VFP registers for the PCS
6541 variant selected. If it is, then *BASE_MODE is updated to contain
6542 a machine mode describing each element of the argument's type and
6543 *COUNT to hold the number of such elements. */
6544 static bool
6545 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6546 machine_mode mode, const_tree type,
6547 machine_mode *base_mode, int *count)
6548 {
6549 machine_mode new_mode = VOIDmode;
6550
6551 /* If we have the type information, prefer that to working things
6552 out from the mode. */
6553 if (type)
6554 {
6555 unsigned int warn_psabi_flags = 0;
6556 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6557 &warn_psabi_flags);
6558 if (ag_count > 0 && ag_count <= 4)
6559 {
6560 static unsigned last_reported_type_uid;
6561 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6562 int alt;
6563 if (warn_psabi
6564 && warn_psabi_flags
6565 && uid != last_reported_type_uid
6566 && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6567 != ag_count))
6568 {
6569 const char *url10
6570 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6571 const char *url12
6572 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
6573 gcc_assert (alt == -1);
6574 last_reported_type_uid = uid;
6575 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6576 qualification. */
6577 if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6578 inform (input_location, "parameter passing for argument of "
6579 "type %qT with %<[[no_unique_address]]%> members "
6580 "changed %{in GCC 10.1%}",
6581 TYPE_MAIN_VARIANT (type), url10);
6582 else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6583 inform (input_location, "parameter passing for argument of "
6584 "type %qT when C++17 is enabled changed to match "
6585 "C++14 %{in GCC 10.1%}",
6586 TYPE_MAIN_VARIANT (type), url10);
6587 else if (warn_psabi_flags & WARN_PSABI_ZERO_WIDTH_BITFIELD)
6588 inform (input_location, "parameter passing for argument of "
6589 "type %qT changed %{in GCC 12.1%}",
6590 TYPE_MAIN_VARIANT (type), url12);
6591 }
6592 *count = ag_count;
6593 }
6594 else
6595 return false;
6596 }
6597 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6598 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6599 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6600 {
6601 *count = 1;
6602 new_mode = mode;
6603 }
6604 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6605 {
6606 *count = 2;
6607 new_mode = (mode == DCmode ? DFmode : SFmode);
6608 }
6609 else
6610 return false;
6611
6612
6613 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6614 return false;
6615
6616 *base_mode = new_mode;
6617
6618 if (TARGET_GENERAL_REGS_ONLY)
6619 error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6620 type);
6621
6622 return true;
6623 }
6624
6625 static bool
6626 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6627 machine_mode mode, const_tree type)
6628 {
6629 int count ATTRIBUTE_UNUSED;
6630 machine_mode ag_mode ATTRIBUTE_UNUSED;
6631
6632 if (!use_vfp_abi (pcs_variant, false))
6633 return false;
6634 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6635 &ag_mode, &count);
6636 }
6637
6638 static bool
6639 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6640 const_tree type)
6641 {
6642 if (!use_vfp_abi (pcum->pcs_variant, false))
6643 return false;
6644
6645 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6646 &pcum->aapcs_vfp_rmode,
6647 &pcum->aapcs_vfp_rcount);
6648 }
6649
6650 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6651 for the behaviour of this function. */
6652
6653 static bool
6654 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6655 const_tree type ATTRIBUTE_UNUSED)
6656 {
6657 int rmode_size
6658 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6659 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6660 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6661 int regno;
6662
6663 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6664 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6665 {
6666 pcum->aapcs_vfp_reg_alloc = mask << regno;
6667 if (mode == BLKmode
6668 || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6669 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6670 {
6671 int i;
6672 int rcount = pcum->aapcs_vfp_rcount;
6673 int rshift = shift;
6674 machine_mode rmode = pcum->aapcs_vfp_rmode;
6675 rtx par;
6676 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6677 {
6678 /* Avoid using unsupported vector modes. */
6679 if (rmode == V2SImode)
6680 rmode = DImode;
6681 else if (rmode == V4SImode)
6682 {
6683 rmode = DImode;
6684 rcount *= 2;
6685 rshift /= 2;
6686 }
6687 }
6688 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6689 for (i = 0; i < rcount; i++)
6690 {
6691 rtx tmp = gen_rtx_REG (rmode,
6692 FIRST_VFP_REGNUM + regno + i * rshift);
6693 tmp = gen_rtx_EXPR_LIST
6694 (VOIDmode, tmp,
6695 GEN_INT (i * GET_MODE_SIZE (rmode)));
6696 XVECEXP (par, 0, i) = tmp;
6697 }
6698
6699 pcum->aapcs_reg = par;
6700 }
6701 else
6702 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6703 return true;
6704 }
6705 return false;
6706 }
6707
6708 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6709 comment there for the behaviour of this function. */
6710
6711 static rtx
6712 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6713 machine_mode mode,
6714 const_tree type ATTRIBUTE_UNUSED)
6715 {
6716 if (!use_vfp_abi (pcs_variant, false))
6717 return NULL;
6718
6719 if (mode == BLKmode
6720 || (GET_MODE_CLASS (mode) == MODE_INT
6721 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6722 && !(TARGET_NEON || TARGET_HAVE_MVE)))
6723 {
6724 int count;
6725 machine_mode ag_mode;
6726 int i;
6727 rtx par;
6728 int shift;
6729
6730 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6731 &ag_mode, &count);
6732
6733 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6734 {
6735 if (ag_mode == V2SImode)
6736 ag_mode = DImode;
6737 else if (ag_mode == V4SImode)
6738 {
6739 ag_mode = DImode;
6740 count *= 2;
6741 }
6742 }
6743 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6744 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6745 for (i = 0; i < count; i++)
6746 {
6747 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6748 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6749 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6750 XVECEXP (par, 0, i) = tmp;
6751 }
6752
6753 return par;
6754 }
6755
6756 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6757 }
6758
6759 static void
6760 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6761 machine_mode mode ATTRIBUTE_UNUSED,
6762 const_tree type ATTRIBUTE_UNUSED)
6763 {
6764 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6765 pcum->aapcs_vfp_reg_alloc = 0;
6766 return;
6767 }
6768
6769 #define AAPCS_CP(X) \
6770 { \
6771 aapcs_ ## X ## _cum_init, \
6772 aapcs_ ## X ## _is_call_candidate, \
6773 aapcs_ ## X ## _allocate, \
6774 aapcs_ ## X ## _is_return_candidate, \
6775 aapcs_ ## X ## _allocate_return_reg, \
6776 aapcs_ ## X ## _advance \
6777 }
6778
6779 /* Table of co-processors that can be used to pass arguments in
6780 registers. Idealy no arugment should be a candidate for more than
6781 one co-processor table entry, but the table is processed in order
6782 and stops after the first match. If that entry then fails to put
6783 the argument into a co-processor register, the argument will go on
6784 the stack. */
6785 static struct
6786 {
6787 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6788 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6789
6790 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6791 BLKmode) is a candidate for this co-processor's registers; this
6792 function should ignore any position-dependent state in
6793 CUMULATIVE_ARGS and only use call-type dependent information. */
6794 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6795
6796 /* Return true if the argument does get a co-processor register; it
6797 should set aapcs_reg to an RTX of the register allocated as is
6798 required for a return from FUNCTION_ARG. */
6799 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6800
6801 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6802 be returned in this co-processor's registers. */
6803 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6804
6805 /* Allocate and return an RTX element to hold the return type of a call. This
6806 routine must not fail and will only be called if is_return_candidate
6807 returned true with the same parameters. */
6808 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6809
6810 /* Finish processing this argument and prepare to start processing
6811 the next one. */
6812 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6813 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6814 {
6815 AAPCS_CP(vfp)
6816 };
6817
6818 #undef AAPCS_CP
6819
6820 static int
6821 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6822 const_tree type)
6823 {
6824 int i;
6825
6826 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6827 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6828 return i;
6829
6830 return -1;
6831 }
6832
6833 static int
6834 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6835 {
6836 /* We aren't passed a decl, so we can't check that a call is local.
6837 However, it isn't clear that that would be a win anyway, since it
6838 might limit some tail-calling opportunities. */
6839 enum arm_pcs pcs_variant;
6840
6841 if (fntype)
6842 {
6843 const_tree fndecl = NULL_TREE;
6844
6845 if (TREE_CODE (fntype) == FUNCTION_DECL)
6846 {
6847 fndecl = fntype;
6848 fntype = TREE_TYPE (fntype);
6849 }
6850
6851 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6852 }
6853 else
6854 pcs_variant = arm_pcs_default;
6855
6856 if (pcs_variant != ARM_PCS_AAPCS)
6857 {
6858 int i;
6859
6860 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6861 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6862 TYPE_MODE (type),
6863 type))
6864 return i;
6865 }
6866 return -1;
6867 }
6868
6869 static rtx
6870 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6871 const_tree fntype)
6872 {
6873 /* We aren't passed a decl, so we can't check that a call is local.
6874 However, it isn't clear that that would be a win anyway, since it
6875 might limit some tail-calling opportunities. */
6876 enum arm_pcs pcs_variant;
6877 int unsignedp ATTRIBUTE_UNUSED;
6878
6879 if (fntype)
6880 {
6881 const_tree fndecl = NULL_TREE;
6882
6883 if (TREE_CODE (fntype) == FUNCTION_DECL)
6884 {
6885 fndecl = fntype;
6886 fntype = TREE_TYPE (fntype);
6887 }
6888
6889 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6890 }
6891 else
6892 pcs_variant = arm_pcs_default;
6893
6894 /* Promote integer types. */
6895 if (type && INTEGRAL_TYPE_P (type))
6896 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6897
6898 if (pcs_variant != ARM_PCS_AAPCS)
6899 {
6900 int i;
6901
6902 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6903 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6904 type))
6905 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6906 mode, type);
6907 }
6908
6909 /* Promotes small structs returned in a register to full-word size
6910 for big-endian AAPCS. */
6911 if (type && arm_return_in_msb (type))
6912 {
6913 HOST_WIDE_INT size = int_size_in_bytes (type);
6914 if (size % UNITS_PER_WORD != 0)
6915 {
6916 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6917 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6918 }
6919 }
6920
6921 return gen_rtx_REG (mode, R0_REGNUM);
6922 }
6923
6924 static rtx
6925 aapcs_libcall_value (machine_mode mode)
6926 {
6927 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6928 && GET_MODE_SIZE (mode) <= 4)
6929 mode = SImode;
6930
6931 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6932 }
6933
6934 /* Lay out a function argument using the AAPCS rules. The rule
6935 numbers referred to here are those in the AAPCS. */
6936 static void
6937 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6938 const_tree type, bool named)
6939 {
6940 int nregs, nregs2;
6941 int ncrn;
6942
6943 /* We only need to do this once per argument. */
6944 if (pcum->aapcs_arg_processed)
6945 return;
6946
6947 pcum->aapcs_arg_processed = true;
6948
6949 /* Special case: if named is false then we are handling an incoming
6950 anonymous argument which is on the stack. */
6951 if (!named)
6952 return;
6953
6954 /* Is this a potential co-processor register candidate? */
6955 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6956 {
6957 int slot = aapcs_select_call_coproc (pcum, mode, type);
6958 pcum->aapcs_cprc_slot = slot;
6959
6960 /* We don't have to apply any of the rules from part B of the
6961 preparation phase, these are handled elsewhere in the
6962 compiler. */
6963
6964 if (slot >= 0)
6965 {
6966 /* A Co-processor register candidate goes either in its own
6967 class of registers or on the stack. */
6968 if (!pcum->aapcs_cprc_failed[slot])
6969 {
6970 /* C1.cp - Try to allocate the argument to co-processor
6971 registers. */
6972 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6973 return;
6974
6975 /* C2.cp - Put the argument on the stack and note that we
6976 can't assign any more candidates in this slot. We also
6977 need to note that we have allocated stack space, so that
6978 we won't later try to split a non-cprc candidate between
6979 core registers and the stack. */
6980 pcum->aapcs_cprc_failed[slot] = true;
6981 pcum->can_split = false;
6982 }
6983
6984 /* We didn't get a register, so this argument goes on the
6985 stack. */
6986 gcc_assert (pcum->can_split == false);
6987 return;
6988 }
6989 }
6990
6991 /* C3 - For double-word aligned arguments, round the NCRN up to the
6992 next even number. */
6993 ncrn = pcum->aapcs_ncrn;
6994 if (ncrn & 1)
6995 {
6996 int res = arm_needs_doubleword_align (mode, type);
6997 /* Only warn during RTL expansion of call stmts, otherwise we would
6998 warn e.g. during gimplification even on functions that will be
6999 always inlined, and we'd warn multiple times. Don't warn when
7000 called in expand_function_start either, as we warn instead in
7001 arm_function_arg_boundary in that case. */
7002 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
7003 inform (input_location, "parameter passing for argument of type "
7004 "%qT changed in GCC 7.1", type);
7005 else if (res > 0)
7006 ncrn++;
7007 }
7008
7009 nregs = ARM_NUM_REGS2(mode, type);
7010
7011 /* Sigh, this test should really assert that nregs > 0, but a GCC
7012 extension allows empty structs and then gives them empty size; it
7013 then allows such a structure to be passed by value. For some of
7014 the code below we have to pretend that such an argument has
7015 non-zero size so that we 'locate' it correctly either in
7016 registers or on the stack. */
7017 gcc_assert (nregs >= 0);
7018
7019 nregs2 = nregs ? nregs : 1;
7020
7021 /* C4 - Argument fits entirely in core registers. */
7022 if (ncrn + nregs2 <= NUM_ARG_REGS)
7023 {
7024 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7025 pcum->aapcs_next_ncrn = ncrn + nregs;
7026 return;
7027 }
7028
7029 /* C5 - Some core registers left and there are no arguments already
7030 on the stack: split this argument between the remaining core
7031 registers and the stack. */
7032 if (ncrn < NUM_ARG_REGS && pcum->can_split)
7033 {
7034 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7035 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7036 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
7037 return;
7038 }
7039
7040 /* C6 - NCRN is set to 4. */
7041 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7042
7043 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
7044 return;
7045 }
7046
7047 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7048 for a call to a function whose data type is FNTYPE.
7049 For a library call, FNTYPE is NULL. */
7050 void
7051 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
7052 rtx libname,
7053 tree fndecl ATTRIBUTE_UNUSED)
7054 {
7055 /* Long call handling. */
7056 if (fntype)
7057 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
7058 else
7059 pcum->pcs_variant = arm_pcs_default;
7060
7061 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7062 {
7063 if (arm_libcall_uses_aapcs_base (libname))
7064 pcum->pcs_variant = ARM_PCS_AAPCS;
7065
7066 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
7067 pcum->aapcs_reg = NULL_RTX;
7068 pcum->aapcs_partial = 0;
7069 pcum->aapcs_arg_processed = false;
7070 pcum->aapcs_cprc_slot = -1;
7071 pcum->can_split = true;
7072
7073 if (pcum->pcs_variant != ARM_PCS_AAPCS)
7074 {
7075 int i;
7076
7077 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7078 {
7079 pcum->aapcs_cprc_failed[i] = false;
7080 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
7081 }
7082 }
7083 return;
7084 }
7085
7086 /* Legacy ABIs */
7087
7088 /* On the ARM, the offset starts at 0. */
7089 pcum->nregs = 0;
7090 pcum->iwmmxt_nregs = 0;
7091 pcum->can_split = true;
7092
7093 /* Varargs vectors are treated the same as long long.
7094 named_count avoids having to change the way arm handles 'named' */
7095 pcum->named_count = 0;
7096 pcum->nargs = 0;
7097
7098 if (TARGET_REALLY_IWMMXT && fntype)
7099 {
7100 tree fn_arg;
7101
7102 for (fn_arg = TYPE_ARG_TYPES (fntype);
7103 fn_arg;
7104 fn_arg = TREE_CHAIN (fn_arg))
7105 pcum->named_count += 1;
7106
7107 if (! pcum->named_count)
7108 pcum->named_count = INT_MAX;
7109 }
7110 }
7111
7112 /* Return 2 if double word alignment is required for argument passing,
7113 but wasn't required before the fix for PR88469.
7114 Return 1 if double word alignment is required for argument passing.
7115 Return -1 if double word alignment used to be required for argument
7116 passing before PR77728 ABI fix, but is not required anymore.
7117 Return 0 if double word alignment is not required and wasn't requried
7118 before either. */
7119 static int
7120 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7121 {
7122 if (!type)
7123 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7124
7125 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
7126 if (!AGGREGATE_TYPE_P (type))
7127 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7128
7129 /* Array types: Use member alignment of element type. */
7130 if (TREE_CODE (type) == ARRAY_TYPE)
7131 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7132
7133 int ret = 0;
7134 int ret2 = 0;
7135 /* Record/aggregate types: Use greatest member alignment of any member.
7136
7137 Note that we explicitly consider zero-sized fields here, even though
7138 they don't map to AAPCS machine types. For example, in:
7139
7140 struct __attribute__((aligned(8))) empty {};
7141
7142 struct s {
7143 [[no_unique_address]] empty e;
7144 int x;
7145 };
7146
7147 "s" contains only one Fundamental Data Type (the int field)
7148 but gains 8-byte alignment and size thanks to "e". */
7149 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7150 if (DECL_ALIGN (field) > PARM_BOUNDARY)
7151 {
7152 if (TREE_CODE (field) == FIELD_DECL)
7153 return 1;
7154 else
7155 /* Before PR77728 fix, we were incorrectly considering also
7156 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7157 Make sure we can warn about that with -Wpsabi. */
7158 ret = -1;
7159 }
7160 else if (TREE_CODE (field) == FIELD_DECL
7161 && DECL_BIT_FIELD_TYPE (field)
7162 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7163 ret2 = 1;
7164
7165 if (ret2)
7166 return 2;
7167
7168 return ret;
7169 }
7170
7171
7172 /* Determine where to put an argument to a function.
7173 Value is zero to push the argument on the stack,
7174 or a hard register in which to store the argument.
7175
7176 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7177 the preceding args and about the function being called.
7178 ARG is a description of the argument.
7179
7180 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7181 other arguments are passed on the stack. If (NAMED == 0) (which happens
7182 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7183 defined), say it is passed in the stack (function_prologue will
7184 indeed make it pass in the stack if necessary). */
7185
7186 static rtx
7187 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7188 {
7189 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7190 int nregs;
7191
7192 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7193 a call insn (op3 of a call_value insn). */
7194 if (arg.end_marker_p ())
7195 return const0_rtx;
7196
7197 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7198 {
7199 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7200 return pcum->aapcs_reg;
7201 }
7202
7203 /* Varargs vectors are treated the same as long long.
7204 named_count avoids having to change the way arm handles 'named' */
7205 if (TARGET_IWMMXT_ABI
7206 && arm_vector_mode_supported_p (arg.mode)
7207 && pcum->named_count > pcum->nargs + 1)
7208 {
7209 if (pcum->iwmmxt_nregs <= 9)
7210 return gen_rtx_REG (arg.mode,
7211 pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7212 else
7213 {
7214 pcum->can_split = false;
7215 return NULL_RTX;
7216 }
7217 }
7218
7219 /* Put doubleword aligned quantities in even register pairs. */
7220 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7221 {
7222 int res = arm_needs_doubleword_align (arg.mode, arg.type);
7223 if (res < 0 && warn_psabi)
7224 inform (input_location, "parameter passing for argument of type "
7225 "%qT changed in GCC 7.1", arg.type);
7226 else if (res > 0)
7227 {
7228 pcum->nregs++;
7229 if (res > 1 && warn_psabi)
7230 inform (input_location, "parameter passing for argument of type "
7231 "%qT changed in GCC 9.1", arg.type);
7232 }
7233 }
7234
7235 /* Only allow splitting an arg between regs and memory if all preceding
7236 args were allocated to regs. For args passed by reference we only count
7237 the reference pointer. */
7238 if (pcum->can_split)
7239 nregs = 1;
7240 else
7241 nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7242
7243 if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7244 return NULL_RTX;
7245
7246 return gen_rtx_REG (arg.mode, pcum->nregs);
7247 }
7248
7249 static unsigned int
7250 arm_function_arg_boundary (machine_mode mode, const_tree type)
7251 {
7252 if (!ARM_DOUBLEWORD_ALIGN)
7253 return PARM_BOUNDARY;
7254
7255 int res = arm_needs_doubleword_align (mode, type);
7256 if (res < 0 && warn_psabi)
7257 inform (input_location, "parameter passing for argument of type %qT "
7258 "changed in GCC 7.1", type);
7259 if (res > 1 && warn_psabi)
7260 inform (input_location, "parameter passing for argument of type "
7261 "%qT changed in GCC 9.1", type);
7262
7263 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7264 }
7265
7266 static int
7267 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7268 {
7269 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7270 int nregs = pcum->nregs;
7271
7272 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7273 {
7274 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7275 return pcum->aapcs_partial;
7276 }
7277
7278 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7279 return 0;
7280
7281 if (NUM_ARG_REGS > nregs
7282 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7283 && pcum->can_split)
7284 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7285
7286 return 0;
7287 }
7288
7289 /* Update the data in PCUM to advance over argument ARG. */
7290
7291 static void
7292 arm_function_arg_advance (cumulative_args_t pcum_v,
7293 const function_arg_info &arg)
7294 {
7295 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7296
7297 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7298 {
7299 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7300
7301 if (pcum->aapcs_cprc_slot >= 0)
7302 {
7303 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7304 arg.type);
7305 pcum->aapcs_cprc_slot = -1;
7306 }
7307
7308 /* Generic stuff. */
7309 pcum->aapcs_arg_processed = false;
7310 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7311 pcum->aapcs_reg = NULL_RTX;
7312 pcum->aapcs_partial = 0;
7313 }
7314 else
7315 {
7316 pcum->nargs += 1;
7317 if (arm_vector_mode_supported_p (arg.mode)
7318 && pcum->named_count > pcum->nargs
7319 && TARGET_IWMMXT_ABI)
7320 pcum->iwmmxt_nregs += 1;
7321 else
7322 pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7323 }
7324 }
7325
7326 /* Variable sized types are passed by reference. This is a GCC
7327 extension to the ARM ABI. */
7328
7329 static bool
7330 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7331 {
7332 return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7333 }
7334 \f
7335 /* Encode the current state of the #pragma [no_]long_calls. */
7336 typedef enum
7337 {
7338 OFF, /* No #pragma [no_]long_calls is in effect. */
7339 LONG, /* #pragma long_calls is in effect. */
7340 SHORT /* #pragma no_long_calls is in effect. */
7341 } arm_pragma_enum;
7342
7343 static arm_pragma_enum arm_pragma_long_calls = OFF;
7344
7345 void
7346 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7347 {
7348 arm_pragma_long_calls = LONG;
7349 }
7350
7351 void
7352 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7353 {
7354 arm_pragma_long_calls = SHORT;
7355 }
7356
7357 void
7358 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7359 {
7360 arm_pragma_long_calls = OFF;
7361 }
7362 \f
7363 /* Handle an attribute requiring a FUNCTION_DECL;
7364 arguments as in struct attribute_spec.handler. */
7365 static tree
7366 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7367 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7368 {
7369 if (TREE_CODE (*node) != FUNCTION_DECL)
7370 {
7371 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7372 name);
7373 *no_add_attrs = true;
7374 }
7375
7376 return NULL_TREE;
7377 }
7378
7379 /* Handle an "interrupt" or "isr" attribute;
7380 arguments as in struct attribute_spec.handler. */
7381 static tree
7382 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7383 bool *no_add_attrs)
7384 {
7385 if (DECL_P (*node))
7386 {
7387 if (TREE_CODE (*node) != FUNCTION_DECL)
7388 {
7389 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7390 name);
7391 *no_add_attrs = true;
7392 }
7393 else if (TARGET_VFP_BASE)
7394 {
7395 warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7396 name);
7397 }
7398 /* FIXME: the argument if any is checked for type attributes;
7399 should it be checked for decl ones? */
7400 }
7401 else
7402 {
7403 if (TREE_CODE (*node) == FUNCTION_TYPE
7404 || TREE_CODE (*node) == METHOD_TYPE)
7405 {
7406 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7407 {
7408 warning (OPT_Wattributes, "%qE attribute ignored",
7409 name);
7410 *no_add_attrs = true;
7411 }
7412 }
7413 else if (TREE_CODE (*node) == POINTER_TYPE
7414 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
7415 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
7416 && arm_isr_value (args) != ARM_FT_UNKNOWN)
7417 {
7418 *node = build_variant_type_copy (*node);
7419 TREE_TYPE (*node) = build_type_attribute_variant
7420 (TREE_TYPE (*node),
7421 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7422 *no_add_attrs = true;
7423 }
7424 else
7425 {
7426 /* Possibly pass this attribute on from the type to a decl. */
7427 if (flags & ((int) ATTR_FLAG_DECL_NEXT
7428 | (int) ATTR_FLAG_FUNCTION_NEXT
7429 | (int) ATTR_FLAG_ARRAY_NEXT))
7430 {
7431 *no_add_attrs = true;
7432 return tree_cons (name, args, NULL_TREE);
7433 }
7434 else
7435 {
7436 warning (OPT_Wattributes, "%qE attribute ignored",
7437 name);
7438 }
7439 }
7440 }
7441
7442 return NULL_TREE;
7443 }
7444
7445 /* Handle a "pcs" attribute; arguments as in struct
7446 attribute_spec.handler. */
7447 static tree
7448 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7449 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7450 {
7451 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7452 {
7453 warning (OPT_Wattributes, "%qE attribute ignored", name);
7454 *no_add_attrs = true;
7455 }
7456 return NULL_TREE;
7457 }
7458
7459 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7460 /* Handle the "notshared" attribute. This attribute is another way of
7461 requesting hidden visibility. ARM's compiler supports
7462 "__declspec(notshared)"; we support the same thing via an
7463 attribute. */
7464
7465 static tree
7466 arm_handle_notshared_attribute (tree *node,
7467 tree name ATTRIBUTE_UNUSED,
7468 tree args ATTRIBUTE_UNUSED,
7469 int flags ATTRIBUTE_UNUSED,
7470 bool *no_add_attrs)
7471 {
7472 tree decl = TYPE_NAME (*node);
7473
7474 if (decl)
7475 {
7476 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7477 DECL_VISIBILITY_SPECIFIED (decl) = 1;
7478 *no_add_attrs = false;
7479 }
7480 return NULL_TREE;
7481 }
7482 #endif
7483
7484 /* This function returns true if a function with declaration FNDECL and type
7485 FNTYPE uses the stack to pass arguments or return variables and false
7486 otherwise. This is used for functions with the attributes
7487 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7488 diagnostic messages if the stack is used. NAME is the name of the attribute
7489 used. */
7490
7491 static bool
7492 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7493 {
7494 function_args_iterator args_iter;
7495 CUMULATIVE_ARGS args_so_far_v;
7496 cumulative_args_t args_so_far;
7497 bool first_param = true;
7498 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7499
7500 /* Error out if any argument is passed on the stack. */
7501 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7502 args_so_far = pack_cumulative_args (&args_so_far_v);
7503 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7504 {
7505 rtx arg_rtx;
7506
7507 prev_arg_type = arg_type;
7508 if (VOID_TYPE_P (arg_type))
7509 continue;
7510
7511 function_arg_info arg (arg_type, /*named=*/true);
7512 if (!first_param)
7513 /* ??? We should advance after processing the argument and pass
7514 the argument we're advancing past. */
7515 arm_function_arg_advance (args_so_far, arg);
7516 arg_rtx = arm_function_arg (args_so_far, arg);
7517 if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7518 {
7519 error ("%qE attribute not available to functions with arguments "
7520 "passed on the stack", name);
7521 return true;
7522 }
7523 first_param = false;
7524 }
7525
7526 /* Error out for variadic functions since we cannot control how many
7527 arguments will be passed and thus stack could be used. stdarg_p () is not
7528 used for the checking to avoid browsing arguments twice. */
7529 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7530 {
7531 error ("%qE attribute not available to functions with variable number "
7532 "of arguments", name);
7533 return true;
7534 }
7535
7536 /* Error out if return value is passed on the stack. */
7537 ret_type = TREE_TYPE (fntype);
7538 if (arm_return_in_memory (ret_type, fntype))
7539 {
7540 error ("%qE attribute not available to functions that return value on "
7541 "the stack", name);
7542 return true;
7543 }
7544 return false;
7545 }
7546
7547 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7548 function will check whether the attribute is allowed here and will add the
7549 attribute to the function declaration tree or otherwise issue a warning. */
7550
7551 static tree
7552 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7553 tree /* args */,
7554 int /* flags */,
7555 bool *no_add_attrs)
7556 {
7557 tree fndecl;
7558
7559 if (!use_cmse)
7560 {
7561 *no_add_attrs = true;
7562 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7563 "option", name);
7564 return NULL_TREE;
7565 }
7566
7567 /* Ignore attribute for function types. */
7568 if (TREE_CODE (*node) != FUNCTION_DECL)
7569 {
7570 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7571 name);
7572 *no_add_attrs = true;
7573 return NULL_TREE;
7574 }
7575
7576 fndecl = *node;
7577
7578 /* Warn for static linkage functions. */
7579 if (!TREE_PUBLIC (fndecl))
7580 {
7581 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7582 "with static linkage", name);
7583 *no_add_attrs = true;
7584 return NULL_TREE;
7585 }
7586
7587 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7588 TREE_TYPE (fndecl));
7589 return NULL_TREE;
7590 }
7591
7592
7593 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7594 function will check whether the attribute is allowed here and will add the
7595 attribute to the function type tree or otherwise issue a diagnostic. The
7596 reason we check this at declaration time is to only allow the use of the
7597 attribute with declarations of function pointers and not function
7598 declarations. This function checks NODE is of the expected type and issues
7599 diagnostics otherwise using NAME. If it is not of the expected type
7600 *NO_ADD_ATTRS will be set to true. */
7601
7602 static tree
7603 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7604 tree /* args */,
7605 int /* flags */,
7606 bool *no_add_attrs)
7607 {
7608 tree decl = NULL_TREE;
7609 tree fntype, type;
7610
7611 if (!use_cmse)
7612 {
7613 *no_add_attrs = true;
7614 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7615 "option", name);
7616 return NULL_TREE;
7617 }
7618
7619 if (DECL_P (*node))
7620 {
7621 fntype = TREE_TYPE (*node);
7622
7623 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7624 decl = *node;
7625 }
7626 else
7627 fntype = *node;
7628
7629 while (fntype && TREE_CODE (fntype) == POINTER_TYPE)
7630 fntype = TREE_TYPE (fntype);
7631
7632 if ((DECL_P (*node) && !decl) || TREE_CODE (fntype) != FUNCTION_TYPE)
7633 {
7634 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7635 "function pointer", name);
7636 *no_add_attrs = true;
7637 return NULL_TREE;
7638 }
7639
7640 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7641
7642 if (*no_add_attrs)
7643 return NULL_TREE;
7644
7645 /* Prevent trees being shared among function types with and without
7646 cmse_nonsecure_call attribute. */
7647 if (decl)
7648 {
7649 type = build_distinct_type_copy (TREE_TYPE (decl));
7650 TREE_TYPE (decl) = type;
7651 }
7652 else
7653 {
7654 type = build_distinct_type_copy (*node);
7655 *node = type;
7656 }
7657
7658 fntype = type;
7659
7660 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7661 {
7662 type = fntype;
7663 fntype = TREE_TYPE (fntype);
7664 fntype = build_distinct_type_copy (fntype);
7665 TREE_TYPE (type) = fntype;
7666 }
7667
7668 /* Construct a type attribute and add it to the function type. */
7669 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7670 TYPE_ATTRIBUTES (fntype));
7671 TYPE_ATTRIBUTES (fntype) = attrs;
7672 return NULL_TREE;
7673 }
7674
7675 /* Return 0 if the attributes for two types are incompatible, 1 if they
7676 are compatible, and 2 if they are nearly compatible (which causes a
7677 warning to be generated). */
7678 static int
7679 arm_comp_type_attributes (const_tree type1, const_tree type2)
7680 {
7681 int l1, l2, s1, s2;
7682
7683 tree attrs1 = lookup_attribute ("Advanced SIMD type",
7684 TYPE_ATTRIBUTES (type1));
7685 tree attrs2 = lookup_attribute ("Advanced SIMD type",
7686 TYPE_ATTRIBUTES (type2));
7687 if (bool (attrs1) != bool (attrs2))
7688 return 0;
7689 if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7690 return 0;
7691
7692 /* Check for mismatch of non-default calling convention. */
7693 if (TREE_CODE (type1) != FUNCTION_TYPE)
7694 return 1;
7695
7696 /* Check for mismatched call attributes. */
7697 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7698 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7699 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7700 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7701
7702 /* Only bother to check if an attribute is defined. */
7703 if (l1 | l2 | s1 | s2)
7704 {
7705 /* If one type has an attribute, the other must have the same attribute. */
7706 if ((l1 != l2) || (s1 != s2))
7707 return 0;
7708
7709 /* Disallow mixed attributes. */
7710 if ((l1 & s2) || (l2 & s1))
7711 return 0;
7712 }
7713
7714 /* Check for mismatched ISR attribute. */
7715 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7716 if (! l1)
7717 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7718 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7719 if (! l2)
7720 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7721 if (l1 != l2)
7722 return 0;
7723
7724 l1 = lookup_attribute ("cmse_nonsecure_call",
7725 TYPE_ATTRIBUTES (type1)) != NULL;
7726 l2 = lookup_attribute ("cmse_nonsecure_call",
7727 TYPE_ATTRIBUTES (type2)) != NULL;
7728
7729 if (l1 != l2)
7730 return 0;
7731
7732 return 1;
7733 }
7734
7735 /* Assigns default attributes to newly defined type. This is used to
7736 set short_call/long_call attributes for function types of
7737 functions defined inside corresponding #pragma scopes. */
7738 static void
7739 arm_set_default_type_attributes (tree type)
7740 {
7741 /* Add __attribute__ ((long_call)) to all functions, when
7742 inside #pragma long_calls or __attribute__ ((short_call)),
7743 when inside #pragma no_long_calls. */
7744 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7745 {
7746 tree type_attr_list, attr_name;
7747 type_attr_list = TYPE_ATTRIBUTES (type);
7748
7749 if (arm_pragma_long_calls == LONG)
7750 attr_name = get_identifier ("long_call");
7751 else if (arm_pragma_long_calls == SHORT)
7752 attr_name = get_identifier ("short_call");
7753 else
7754 return;
7755
7756 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7757 TYPE_ATTRIBUTES (type) = type_attr_list;
7758 }
7759 }
7760 \f
7761 /* Return true if DECL is known to be linked into section SECTION. */
7762
7763 static bool
7764 arm_function_in_section_p (tree decl, section *section)
7765 {
7766 /* We can only be certain about the prevailing symbol definition. */
7767 if (!decl_binds_to_current_def_p (decl))
7768 return false;
7769
7770 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7771 if (!DECL_SECTION_NAME (decl))
7772 {
7773 /* Make sure that we will not create a unique section for DECL. */
7774 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7775 return false;
7776 }
7777
7778 return function_section (decl) == section;
7779 }
7780
7781 /* Return nonzero if a 32-bit "long_call" should be generated for
7782 a call from the current function to DECL. We generate a long_call
7783 if the function:
7784
7785 a. has an __attribute__((long call))
7786 or b. is within the scope of a #pragma long_calls
7787 or c. the -mlong-calls command line switch has been specified
7788
7789 However we do not generate a long call if the function:
7790
7791 d. has an __attribute__ ((short_call))
7792 or e. is inside the scope of a #pragma no_long_calls
7793 or f. is defined in the same section as the current function. */
7794
7795 bool
7796 arm_is_long_call_p (tree decl)
7797 {
7798 tree attrs;
7799
7800 if (!decl)
7801 return TARGET_LONG_CALLS;
7802
7803 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7804 if (lookup_attribute ("short_call", attrs))
7805 return false;
7806
7807 /* For "f", be conservative, and only cater for cases in which the
7808 whole of the current function is placed in the same section. */
7809 if (!flag_reorder_blocks_and_partition
7810 && TREE_CODE (decl) == FUNCTION_DECL
7811 && arm_function_in_section_p (decl, current_function_section ()))
7812 return false;
7813
7814 if (lookup_attribute ("long_call", attrs))
7815 return true;
7816
7817 return TARGET_LONG_CALLS;
7818 }
7819
7820 /* Return nonzero if it is ok to make a tail-call to DECL. */
7821 static bool
7822 arm_function_ok_for_sibcall (tree decl, tree exp)
7823 {
7824 unsigned long func_type;
7825
7826 if (cfun->machine->sibcall_blocked)
7827 return false;
7828
7829 if (TARGET_FDPIC)
7830 {
7831 /* In FDPIC, never tailcall something for which we have no decl:
7832 the target function could be in a different module, requiring
7833 a different FDPIC register value. */
7834 if (decl == NULL)
7835 return false;
7836 }
7837
7838 /* Never tailcall something if we are generating code for Thumb-1. */
7839 if (TARGET_THUMB1)
7840 return false;
7841
7842 /* The PIC register is live on entry to VxWorks PLT entries, so we
7843 must make the call before restoring the PIC register. */
7844 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7845 return false;
7846
7847 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7848 may be used both as target of the call and base register for restoring
7849 the VFP registers */
7850 if (TARGET_APCS_FRAME && TARGET_ARM
7851 && TARGET_HARD_FLOAT
7852 && decl && arm_is_long_call_p (decl))
7853 return false;
7854
7855 /* If we are interworking and the function is not declared static
7856 then we can't tail-call it unless we know that it exists in this
7857 compilation unit (since it might be a Thumb routine). */
7858 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7859 && !TREE_ASM_WRITTEN (decl))
7860 return false;
7861
7862 func_type = arm_current_func_type ();
7863 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7864 if (IS_INTERRUPT (func_type))
7865 return false;
7866
7867 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7868 generated for entry functions themselves. */
7869 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7870 return false;
7871
7872 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7873 this would complicate matters for later code generation. */
7874 if (TREE_CODE (exp) == CALL_EXPR)
7875 {
7876 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7877 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7878 return false;
7879 }
7880
7881 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7882 {
7883 /* Check that the return value locations are the same. For
7884 example that we aren't returning a value from the sibling in
7885 a VFP register but then need to transfer it to a core
7886 register. */
7887 rtx a, b;
7888 tree decl_or_type = decl;
7889
7890 /* If it is an indirect function pointer, get the function type. */
7891 if (!decl)
7892 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7893
7894 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7895 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7896 cfun->decl, false);
7897 if (!rtx_equal_p (a, b))
7898 return false;
7899 }
7900
7901 /* Never tailcall if function may be called with a misaligned SP. */
7902 if (IS_STACKALIGN (func_type))
7903 return false;
7904
7905 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7906 references should become a NOP. Don't convert such calls into
7907 sibling calls. */
7908 if (TARGET_AAPCS_BASED
7909 && arm_abi == ARM_ABI_AAPCS
7910 && decl
7911 && DECL_WEAK (decl))
7912 return false;
7913
7914 /* We cannot do a tailcall for an indirect call by descriptor if all the
7915 argument registers are used because the only register left to load the
7916 address is IP and it will already contain the static chain. */
7917 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7918 {
7919 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7920 CUMULATIVE_ARGS cum;
7921 cumulative_args_t cum_v;
7922
7923 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7924 cum_v = pack_cumulative_args (&cum);
7925
7926 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7927 {
7928 tree type = TREE_VALUE (t);
7929 if (!VOID_TYPE_P (type))
7930 {
7931 function_arg_info arg (type, /*named=*/true);
7932 arm_function_arg_advance (cum_v, arg);
7933 }
7934 }
7935
7936 function_arg_info arg (integer_type_node, /*named=*/true);
7937 if (!arm_function_arg (cum_v, arg))
7938 return false;
7939 }
7940
7941 /* Everything else is ok. */
7942 return true;
7943 }
7944
7945 \f
7946 /* Addressing mode support functions. */
7947
7948 /* Return nonzero if X is a legitimate immediate operand when compiling
7949 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7950 int
7951 legitimate_pic_operand_p (rtx x)
7952 {
7953 if (SYMBOL_REF_P (x)
7954 || (GET_CODE (x) == CONST
7955 && GET_CODE (XEXP (x, 0)) == PLUS
7956 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7957 return 0;
7958
7959 return 1;
7960 }
7961
7962 /* Record that the current function needs a PIC register. If PIC_REG is null,
7963 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
7964 both case cfun->machine->pic_reg is initialized if we have not already done
7965 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
7966 PIC register is reloaded in the current position of the instruction stream
7967 irregardless of whether it was loaded before. Otherwise, it is only loaded
7968 if not already done so (crtl->uses_pic_offset_table is null). Note that
7969 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7970 is only supported iff COMPUTE_NOW is false. */
7971
7972 static void
7973 require_pic_register (rtx pic_reg, bool compute_now)
7974 {
7975 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7976
7977 /* A lot of the logic here is made obscure by the fact that this
7978 routine gets called as part of the rtx cost estimation process.
7979 We don't want those calls to affect any assumptions about the real
7980 function; and further, we can't call entry_of_function() until we
7981 start the real expansion process. */
7982 if (!crtl->uses_pic_offset_table || compute_now)
7983 {
7984 gcc_assert (can_create_pseudo_p ()
7985 || (pic_reg != NULL_RTX
7986 && REG_P (pic_reg)
7987 && GET_MODE (pic_reg) == Pmode));
7988 if (arm_pic_register != INVALID_REGNUM
7989 && !compute_now
7990 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7991 {
7992 if (!cfun->machine->pic_reg)
7993 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7994
7995 /* Play games to avoid marking the function as needing pic
7996 if we are being called as part of the cost-estimation
7997 process. */
7998 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7999 crtl->uses_pic_offset_table = 1;
8000 }
8001 else
8002 {
8003 rtx_insn *seq, *insn;
8004
8005 if (pic_reg == NULL_RTX)
8006 pic_reg = gen_reg_rtx (Pmode);
8007 if (!cfun->machine->pic_reg)
8008 cfun->machine->pic_reg = pic_reg;
8009
8010 /* Play games to avoid marking the function as needing pic
8011 if we are being called as part of the cost-estimation
8012 process. */
8013 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8014 {
8015 crtl->uses_pic_offset_table = 1;
8016 start_sequence ();
8017
8018 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
8019 && arm_pic_register > LAST_LO_REGNUM
8020 && !compute_now)
8021 emit_move_insn (cfun->machine->pic_reg,
8022 gen_rtx_REG (Pmode, arm_pic_register));
8023 else
8024 arm_load_pic_register (0UL, pic_reg);
8025
8026 seq = get_insns ();
8027 end_sequence ();
8028
8029 for (insn = seq; insn; insn = NEXT_INSN (insn))
8030 if (INSN_P (insn))
8031 INSN_LOCATION (insn) = prologue_location;
8032
8033 /* We can be called during expansion of PHI nodes, where
8034 we can't yet emit instructions directly in the final
8035 insn stream. Queue the insns on the entry edge, they will
8036 be committed after everything else is expanded. */
8037 if (currently_expanding_to_rtl)
8038 insert_insn_on_edge (seq,
8039 single_succ_edge
8040 (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
8041 else
8042 emit_insn (seq);
8043 }
8044 }
8045 }
8046 }
8047
8048 /* Generate insns to calculate the address of ORIG in pic mode. */
8049 static rtx_insn *
8050 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
8051 {
8052 rtx pat;
8053 rtx mem;
8054
8055 pat = gen_calculate_pic_address (reg, pic_reg, orig);
8056
8057 /* Make the MEM as close to a constant as possible. */
8058 mem = SET_SRC (pat);
8059 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
8060 MEM_READONLY_P (mem) = 1;
8061 MEM_NOTRAP_P (mem) = 1;
8062
8063 return emit_insn (pat);
8064 }
8065
8066 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
8067 created to hold the result of the load. If not NULL, PIC_REG indicates
8068 which register to use as PIC register, otherwise it is decided by register
8069 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
8070 location in the instruction stream, irregardless of whether it was loaded
8071 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8072 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8073
8074 Returns the register REG into which the PIC load is performed. */
8075
8076 rtx
8077 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
8078 bool compute_now)
8079 {
8080 gcc_assert (compute_now == (pic_reg != NULL_RTX));
8081
8082 if (SYMBOL_REF_P (orig)
8083 || LABEL_REF_P (orig))
8084 {
8085 if (reg == 0)
8086 {
8087 gcc_assert (can_create_pseudo_p ());
8088 reg = gen_reg_rtx (Pmode);
8089 }
8090
8091 /* VxWorks does not impose a fixed gap between segments; the run-time
8092 gap can be different from the object-file gap. We therefore can't
8093 use GOTOFF unless we are absolutely sure that the symbol is in the
8094 same segment as the GOT. Unfortunately, the flexibility of linker
8095 scripts means that we can't be sure of that in general, so assume
8096 that GOTOFF is never valid on VxWorks. */
8097 /* References to weak symbols cannot be resolved locally: they
8098 may be overridden by a non-weak definition at link time. */
8099 rtx_insn *insn;
8100 if ((LABEL_REF_P (orig)
8101 || (SYMBOL_REF_P (orig)
8102 && SYMBOL_REF_LOCAL_P (orig)
8103 && (SYMBOL_REF_DECL (orig)
8104 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
8105 && (!SYMBOL_REF_FUNCTION_P (orig)
8106 || arm_fdpic_local_funcdesc_p (orig))))
8107 && NEED_GOT_RELOC
8108 && arm_pic_data_is_text_relative)
8109 insn = arm_pic_static_addr (orig, reg);
8110 else
8111 {
8112 /* If this function doesn't have a pic register, create one now. */
8113 require_pic_register (pic_reg, compute_now);
8114
8115 if (pic_reg == NULL_RTX)
8116 pic_reg = cfun->machine->pic_reg;
8117
8118 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8119 }
8120
8121 /* Put a REG_EQUAL note on this insn, so that it can be optimized
8122 by loop. */
8123 set_unique_reg_note (insn, REG_EQUAL, orig);
8124
8125 return reg;
8126 }
8127 else if (GET_CODE (orig) == CONST)
8128 {
8129 rtx base, offset;
8130
8131 if (GET_CODE (XEXP (orig, 0)) == PLUS
8132 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8133 return orig;
8134
8135 /* Handle the case where we have: const (UNSPEC_TLS). */
8136 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8137 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8138 return orig;
8139
8140 /* Handle the case where we have:
8141 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
8142 CONST_INT. */
8143 if (GET_CODE (XEXP (orig, 0)) == PLUS
8144 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8145 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8146 {
8147 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8148 return orig;
8149 }
8150
8151 if (reg == 0)
8152 {
8153 gcc_assert (can_create_pseudo_p ());
8154 reg = gen_reg_rtx (Pmode);
8155 }
8156
8157 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8158
8159 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8160 pic_reg, compute_now);
8161 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8162 base == reg ? 0 : reg, pic_reg,
8163 compute_now);
8164
8165 if (CONST_INT_P (offset))
8166 {
8167 /* The base register doesn't really matter, we only want to
8168 test the index for the appropriate mode. */
8169 if (!arm_legitimate_index_p (mode, offset, SET, 0))
8170 {
8171 gcc_assert (can_create_pseudo_p ());
8172 offset = force_reg (Pmode, offset);
8173 }
8174
8175 if (CONST_INT_P (offset))
8176 return plus_constant (Pmode, base, INTVAL (offset));
8177 }
8178
8179 if (GET_MODE_SIZE (mode) > 4
8180 && (GET_MODE_CLASS (mode) == MODE_INT
8181 || TARGET_SOFT_FLOAT))
8182 {
8183 emit_insn (gen_addsi3 (reg, base, offset));
8184 return reg;
8185 }
8186
8187 return gen_rtx_PLUS (Pmode, base, offset);
8188 }
8189
8190 return orig;
8191 }
8192
8193
8194 /* Generate insns that produce the address of the stack canary */
8195 rtx
8196 arm_stack_protect_tls_canary_mem (bool reload)
8197 {
8198 rtx tp = gen_reg_rtx (SImode);
8199 if (reload)
8200 emit_insn (gen_reload_tp_hard (tp));
8201 else
8202 emit_insn (gen_load_tp_hard (tp));
8203
8204 rtx reg = gen_reg_rtx (SImode);
8205 rtx offset = GEN_INT (arm_stack_protector_guard_offset);
8206 emit_set_insn (reg, gen_rtx_PLUS (SImode, tp, offset));
8207 return gen_rtx_MEM (SImode, reg);
8208 }
8209
8210
8211 /* Whether a register is callee saved or not. This is necessary because high
8212 registers are marked as caller saved when optimizing for size on Thumb-1
8213 targets despite being callee saved in order to avoid using them. */
8214 #define callee_saved_reg_p(reg) \
8215 (!call_used_or_fixed_reg_p (reg) \
8216 || (TARGET_THUMB1 && optimize_size \
8217 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8218
8219 /* Return a mask for the call-clobbered low registers that are unused
8220 at the end of the prologue. */
8221 static unsigned long
8222 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8223 {
8224 unsigned long mask = 0;
8225 bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8226
8227 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8228 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8229 mask |= 1 << (reg - FIRST_LO_REGNUM);
8230 return mask;
8231 }
8232
8233 /* Similarly for the start of the epilogue. */
8234 static unsigned long
8235 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8236 {
8237 unsigned long mask = 0;
8238 bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8239
8240 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8241 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8242 mask |= 1 << (reg - FIRST_LO_REGNUM);
8243 return mask;
8244 }
8245
8246 /* Find a spare register to use during the prolog of a function. */
8247
8248 static int
8249 thumb_find_work_register (unsigned long pushed_regs_mask)
8250 {
8251 int reg;
8252
8253 unsigned long unused_regs
8254 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8255
8256 /* Check the argument registers first as these are call-used. The
8257 register allocation order means that sometimes r3 might be used
8258 but earlier argument registers might not, so check them all. */
8259 for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8260 if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8261 return reg;
8262
8263 /* Otherwise look for a call-saved register that is going to be pushed. */
8264 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8265 if (pushed_regs_mask & (1 << reg))
8266 return reg;
8267
8268 if (TARGET_THUMB2)
8269 {
8270 /* Thumb-2 can use high regs. */
8271 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8272 if (pushed_regs_mask & (1 << reg))
8273 return reg;
8274 }
8275 /* Something went wrong - thumb_compute_save_reg_mask()
8276 should have arranged for a suitable register to be pushed. */
8277 gcc_unreachable ();
8278 }
8279
8280 static GTY(()) int pic_labelno;
8281
8282 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8283 low register. */
8284
8285 void
8286 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8287 {
8288 rtx l1, labelno, pic_tmp, pic_rtx;
8289
8290 if (crtl->uses_pic_offset_table == 0
8291 || TARGET_SINGLE_PIC_BASE
8292 || TARGET_FDPIC)
8293 return;
8294
8295 gcc_assert (flag_pic);
8296
8297 if (pic_reg == NULL_RTX)
8298 pic_reg = cfun->machine->pic_reg;
8299 if (TARGET_VXWORKS_RTP)
8300 {
8301 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8302 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8303 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8304
8305 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8306
8307 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8308 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8309 }
8310 else
8311 {
8312 /* We use an UNSPEC rather than a LABEL_REF because this label
8313 never appears in the code stream. */
8314
8315 labelno = GEN_INT (pic_labelno++);
8316 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8317 l1 = gen_rtx_CONST (VOIDmode, l1);
8318
8319 /* On the ARM the PC register contains 'dot + 8' at the time of the
8320 addition, on the Thumb it is 'dot + 4'. */
8321 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8322 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8323 UNSPEC_GOTSYM_OFF);
8324 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8325
8326 if (TARGET_32BIT)
8327 {
8328 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8329 }
8330 else /* TARGET_THUMB1 */
8331 {
8332 if (arm_pic_register != INVALID_REGNUM
8333 && REGNO (pic_reg) > LAST_LO_REGNUM)
8334 {
8335 /* We will have pushed the pic register, so we should always be
8336 able to find a work register. */
8337 pic_tmp = gen_rtx_REG (SImode,
8338 thumb_find_work_register (saved_regs));
8339 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8340 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8341 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8342 }
8343 else if (arm_pic_register != INVALID_REGNUM
8344 && arm_pic_register > LAST_LO_REGNUM
8345 && REGNO (pic_reg) <= LAST_LO_REGNUM)
8346 {
8347 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8348 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8349 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8350 }
8351 else
8352 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8353 }
8354 }
8355
8356 /* Need to emit this whether or not we obey regdecls,
8357 since setjmp/longjmp can cause life info to screw up. */
8358 emit_use (pic_reg);
8359 }
8360
8361 /* Try to determine whether an object, referenced via ORIG, will be
8362 placed in the text or data segment. This is used in FDPIC mode, to
8363 decide which relocations to use when accessing ORIG. *IS_READONLY
8364 is set to true if ORIG is a read-only location, false otherwise.
8365 Return true if we could determine the location of ORIG, false
8366 otherwise. *IS_READONLY is valid only when we return true. */
8367 static bool
8368 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8369 {
8370 *is_readonly = false;
8371
8372 if (LABEL_REF_P (orig))
8373 {
8374 *is_readonly = true;
8375 return true;
8376 }
8377
8378 if (SYMBOL_REF_P (orig))
8379 {
8380 if (CONSTANT_POOL_ADDRESS_P (orig))
8381 {
8382 *is_readonly = true;
8383 return true;
8384 }
8385 if (SYMBOL_REF_LOCAL_P (orig)
8386 && !SYMBOL_REF_EXTERNAL_P (orig)
8387 && SYMBOL_REF_DECL (orig)
8388 && (!DECL_P (SYMBOL_REF_DECL (orig))
8389 || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8390 {
8391 tree decl = SYMBOL_REF_DECL (orig);
8392 tree init = (TREE_CODE (decl) == VAR_DECL)
8393 ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8394 ? decl : 0;
8395 int reloc = 0;
8396 bool named_section, readonly;
8397
8398 if (init && init != error_mark_node)
8399 reloc = compute_reloc_for_constant (init);
8400
8401 named_section = TREE_CODE (decl) == VAR_DECL
8402 && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8403 readonly = decl_readonly_section (decl, reloc);
8404
8405 /* We don't know where the link script will put a named
8406 section, so return false in such a case. */
8407 if (named_section)
8408 return false;
8409
8410 *is_readonly = readonly;
8411 return true;
8412 }
8413
8414 /* We don't know. */
8415 return false;
8416 }
8417
8418 gcc_unreachable ();
8419 }
8420
8421 /* Generate code to load the address of a static var when flag_pic is set. */
8422 static rtx_insn *
8423 arm_pic_static_addr (rtx orig, rtx reg)
8424 {
8425 rtx l1, labelno, offset_rtx;
8426 rtx_insn *insn;
8427
8428 gcc_assert (flag_pic);
8429
8430 bool is_readonly = false;
8431 bool info_known = false;
8432
8433 if (TARGET_FDPIC
8434 && SYMBOL_REF_P (orig)
8435 && !SYMBOL_REF_FUNCTION_P (orig))
8436 info_known = arm_is_segment_info_known (orig, &is_readonly);
8437
8438 if (TARGET_FDPIC
8439 && SYMBOL_REF_P (orig)
8440 && !SYMBOL_REF_FUNCTION_P (orig)
8441 && !info_known)
8442 {
8443 /* We don't know where orig is stored, so we have be
8444 pessimistic and use a GOT relocation. */
8445 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8446
8447 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8448 }
8449 else if (TARGET_FDPIC
8450 && SYMBOL_REF_P (orig)
8451 && (SYMBOL_REF_FUNCTION_P (orig)
8452 || !is_readonly))
8453 {
8454 /* We use the GOTOFF relocation. */
8455 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8456
8457 rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8458 emit_insn (gen_movsi (reg, l1));
8459 insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8460 }
8461 else
8462 {
8463 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8464 PC-relative access. */
8465 /* We use an UNSPEC rather than a LABEL_REF because this label
8466 never appears in the code stream. */
8467 labelno = GEN_INT (pic_labelno++);
8468 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8469 l1 = gen_rtx_CONST (VOIDmode, l1);
8470
8471 /* On the ARM the PC register contains 'dot + 8' at the time of the
8472 addition, on the Thumb it is 'dot + 4'. */
8473 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8474 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8475 UNSPEC_SYMBOL_OFFSET);
8476 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8477
8478 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8479 labelno));
8480 }
8481
8482 return insn;
8483 }
8484
8485 /* Return nonzero if X is valid as an ARM state addressing register. */
8486 static int
8487 arm_address_register_rtx_p (rtx x, int strict_p)
8488 {
8489 int regno;
8490
8491 if (!REG_P (x))
8492 return 0;
8493
8494 regno = REGNO (x);
8495
8496 if (strict_p)
8497 return ARM_REGNO_OK_FOR_BASE_P (regno);
8498
8499 return (regno <= LAST_ARM_REGNUM
8500 || regno >= FIRST_PSEUDO_REGISTER
8501 || regno == FRAME_POINTER_REGNUM
8502 || regno == ARG_POINTER_REGNUM);
8503 }
8504
8505 /* Return TRUE if this rtx is the difference of a symbol and a label,
8506 and will reduce to a PC-relative relocation in the object file.
8507 Expressions like this can be left alone when generating PIC, rather
8508 than forced through the GOT. */
8509 static int
8510 pcrel_constant_p (rtx x)
8511 {
8512 if (GET_CODE (x) == MINUS)
8513 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8514
8515 return FALSE;
8516 }
8517
8518 /* Return true if X will surely end up in an index register after next
8519 splitting pass. */
8520 static bool
8521 will_be_in_index_register (const_rtx x)
8522 {
8523 /* arm.md: calculate_pic_address will split this into a register. */
8524 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8525 }
8526
8527 /* Return nonzero if X is a valid ARM state address operand. */
8528 int
8529 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8530 int strict_p)
8531 {
8532 bool use_ldrd;
8533 enum rtx_code code = GET_CODE (x);
8534
8535 if (arm_address_register_rtx_p (x, strict_p))
8536 return 1;
8537
8538 use_ldrd = (TARGET_LDRD
8539 && (mode == DImode || mode == DFmode));
8540
8541 if (code == POST_INC || code == PRE_DEC
8542 || ((code == PRE_INC || code == POST_DEC)
8543 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8544 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8545
8546 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8547 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8548 && GET_CODE (XEXP (x, 1)) == PLUS
8549 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8550 {
8551 rtx addend = XEXP (XEXP (x, 1), 1);
8552
8553 /* Don't allow ldrd post increment by register because it's hard
8554 to fixup invalid register choices. */
8555 if (use_ldrd
8556 && GET_CODE (x) == POST_MODIFY
8557 && REG_P (addend))
8558 return 0;
8559
8560 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8561 && arm_legitimate_index_p (mode, addend, outer, strict_p));
8562 }
8563
8564 /* After reload constants split into minipools will have addresses
8565 from a LABEL_REF. */
8566 else if (reload_completed
8567 && (code == LABEL_REF
8568 || (code == CONST
8569 && GET_CODE (XEXP (x, 0)) == PLUS
8570 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8571 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8572 return 1;
8573
8574 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8575 return 0;
8576
8577 else if (code == PLUS)
8578 {
8579 rtx xop0 = XEXP (x, 0);
8580 rtx xop1 = XEXP (x, 1);
8581
8582 return ((arm_address_register_rtx_p (xop0, strict_p)
8583 && ((CONST_INT_P (xop1)
8584 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8585 || (!strict_p && will_be_in_index_register (xop1))))
8586 || (arm_address_register_rtx_p (xop1, strict_p)
8587 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8588 }
8589
8590 #if 0
8591 /* Reload currently can't handle MINUS, so disable this for now */
8592 else if (GET_CODE (x) == MINUS)
8593 {
8594 rtx xop0 = XEXP (x, 0);
8595 rtx xop1 = XEXP (x, 1);
8596
8597 return (arm_address_register_rtx_p (xop0, strict_p)
8598 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8599 }
8600 #endif
8601
8602 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8603 && code == SYMBOL_REF
8604 && CONSTANT_POOL_ADDRESS_P (x)
8605 && ! (flag_pic
8606 && symbol_mentioned_p (get_pool_constant (x))
8607 && ! pcrel_constant_p (get_pool_constant (x))))
8608 return 1;
8609
8610 return 0;
8611 }
8612
8613 /* Return true if we can avoid creating a constant pool entry for x. */
8614 static bool
8615 can_avoid_literal_pool_for_label_p (rtx x)
8616 {
8617 /* Normally we can assign constant values to target registers without
8618 the help of constant pool. But there are cases we have to use constant
8619 pool like:
8620 1) assign a label to register.
8621 2) sign-extend a 8bit value to 32bit and then assign to register.
8622
8623 Constant pool access in format:
8624 (set (reg r0) (mem (symbol_ref (".LC0"))))
8625 will cause the use of literal pool (later in function arm_reorg).
8626 So here we mark such format as an invalid format, then the compiler
8627 will adjust it into:
8628 (set (reg r0) (symbol_ref (".LC0")))
8629 (set (reg r0) (mem (reg r0))).
8630 No extra register is required, and (mem (reg r0)) won't cause the use
8631 of literal pools. */
8632 if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8633 && CONSTANT_POOL_ADDRESS_P (x))
8634 return 1;
8635 return 0;
8636 }
8637
8638
8639 /* Return nonzero if X is a valid Thumb-2 address operand. */
8640 static int
8641 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8642 {
8643 bool use_ldrd;
8644 enum rtx_code code = GET_CODE (x);
8645
8646 if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
8647 return mve_vector_mem_operand (mode, x, strict_p);
8648
8649 if (arm_address_register_rtx_p (x, strict_p))
8650 return 1;
8651
8652 use_ldrd = (TARGET_LDRD
8653 && (mode == DImode || mode == DFmode));
8654
8655 if (code == POST_INC || code == PRE_DEC
8656 || ((code == PRE_INC || code == POST_DEC)
8657 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8658 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8659
8660 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8661 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8662 && GET_CODE (XEXP (x, 1)) == PLUS
8663 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8664 {
8665 /* Thumb-2 only has autoincrement by constant. */
8666 rtx addend = XEXP (XEXP (x, 1), 1);
8667 HOST_WIDE_INT offset;
8668
8669 if (!CONST_INT_P (addend))
8670 return 0;
8671
8672 offset = INTVAL(addend);
8673 if (GET_MODE_SIZE (mode) <= 4)
8674 return (offset > -256 && offset < 256);
8675
8676 return (use_ldrd && offset > -1024 && offset < 1024
8677 && (offset & 3) == 0);
8678 }
8679
8680 /* After reload constants split into minipools will have addresses
8681 from a LABEL_REF. */
8682 else if (reload_completed
8683 && (code == LABEL_REF
8684 || (code == CONST
8685 && GET_CODE (XEXP (x, 0)) == PLUS
8686 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8687 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8688 return 1;
8689
8690 else if (mode == TImode
8691 || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8692 || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8693 return 0;
8694
8695 else if (code == PLUS)
8696 {
8697 rtx xop0 = XEXP (x, 0);
8698 rtx xop1 = XEXP (x, 1);
8699
8700 return ((arm_address_register_rtx_p (xop0, strict_p)
8701 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8702 || (!strict_p && will_be_in_index_register (xop1))))
8703 || (arm_address_register_rtx_p (xop1, strict_p)
8704 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8705 }
8706
8707 else if (can_avoid_literal_pool_for_label_p (x))
8708 return 0;
8709
8710 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8711 && code == SYMBOL_REF
8712 && CONSTANT_POOL_ADDRESS_P (x)
8713 && ! (flag_pic
8714 && symbol_mentioned_p (get_pool_constant (x))
8715 && ! pcrel_constant_p (get_pool_constant (x))))
8716 return 1;
8717
8718 return 0;
8719 }
8720
8721 /* Return nonzero if INDEX is valid for an address index operand in
8722 ARM state. */
8723 static int
8724 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8725 int strict_p)
8726 {
8727 HOST_WIDE_INT range;
8728 enum rtx_code code = GET_CODE (index);
8729
8730 /* Standard coprocessor addressing modes. */
8731 if (TARGET_HARD_FLOAT
8732 && (mode == SFmode || mode == DFmode))
8733 return (code == CONST_INT && INTVAL (index) < 1024
8734 && INTVAL (index) > -1024
8735 && (INTVAL (index) & 3) == 0);
8736
8737 /* For quad modes, we restrict the constant offset to be slightly less
8738 than what the instruction format permits. We do this because for
8739 quad mode moves, we will actually decompose them into two separate
8740 double-mode reads or writes. INDEX must therefore be a valid
8741 (double-mode) offset and so should INDEX+8. */
8742 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8743 return (code == CONST_INT
8744 && INTVAL (index) < 1016
8745 && INTVAL (index) > -1024
8746 && (INTVAL (index) & 3) == 0);
8747
8748 /* We have no such constraint on double mode offsets, so we permit the
8749 full range of the instruction format. */
8750 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8751 return (code == CONST_INT
8752 && INTVAL (index) < 1024
8753 && INTVAL (index) > -1024
8754 && (INTVAL (index) & 3) == 0);
8755
8756 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8757 return (code == CONST_INT
8758 && INTVAL (index) < 1024
8759 && INTVAL (index) > -1024
8760 && (INTVAL (index) & 3) == 0);
8761
8762 if (arm_address_register_rtx_p (index, strict_p)
8763 && (GET_MODE_SIZE (mode) <= 4))
8764 return 1;
8765
8766 if (mode == DImode || mode == DFmode)
8767 {
8768 if (code == CONST_INT)
8769 {
8770 HOST_WIDE_INT val = INTVAL (index);
8771
8772 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8773 If vldr is selected it uses arm_coproc_mem_operand. */
8774 if (TARGET_LDRD)
8775 return val > -256 && val < 256;
8776 else
8777 return val > -4096 && val < 4092;
8778 }
8779
8780 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8781 }
8782
8783 if (GET_MODE_SIZE (mode) <= 4
8784 && ! (arm_arch4
8785 && (mode == HImode
8786 || mode == HFmode
8787 || (mode == QImode && outer == SIGN_EXTEND))))
8788 {
8789 if (code == MULT)
8790 {
8791 rtx xiop0 = XEXP (index, 0);
8792 rtx xiop1 = XEXP (index, 1);
8793
8794 return ((arm_address_register_rtx_p (xiop0, strict_p)
8795 && power_of_two_operand (xiop1, SImode))
8796 || (arm_address_register_rtx_p (xiop1, strict_p)
8797 && power_of_two_operand (xiop0, SImode)));
8798 }
8799 else if (code == LSHIFTRT || code == ASHIFTRT
8800 || code == ASHIFT || code == ROTATERT)
8801 {
8802 rtx op = XEXP (index, 1);
8803
8804 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8805 && CONST_INT_P (op)
8806 && INTVAL (op) > 0
8807 && INTVAL (op) <= 31);
8808 }
8809 }
8810
8811 /* For ARM v4 we may be doing a sign-extend operation during the
8812 load. */
8813 if (arm_arch4)
8814 {
8815 if (mode == HImode
8816 || mode == HFmode
8817 || (outer == SIGN_EXTEND && mode == QImode))
8818 range = 256;
8819 else
8820 range = 4096;
8821 }
8822 else
8823 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8824
8825 return (code == CONST_INT
8826 && INTVAL (index) < range
8827 && INTVAL (index) > -range);
8828 }
8829
8830 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8831 index operand. i.e. 1, 2, 4 or 8. */
8832 static bool
8833 thumb2_index_mul_operand (rtx op)
8834 {
8835 HOST_WIDE_INT val;
8836
8837 if (!CONST_INT_P (op))
8838 return false;
8839
8840 val = INTVAL(op);
8841 return (val == 1 || val == 2 || val == 4 || val == 8);
8842 }
8843
8844 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8845 static int
8846 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8847 {
8848 enum rtx_code code = GET_CODE (index);
8849
8850 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8851 /* Standard coprocessor addressing modes. */
8852 if (TARGET_VFP_BASE
8853 && (mode == SFmode || mode == DFmode))
8854 return (code == CONST_INT && INTVAL (index) < 1024
8855 /* Thumb-2 allows only > -256 index range for it's core register
8856 load/stores. Since we allow SF/DF in core registers, we have
8857 to use the intersection between -256~4096 (core) and -1024~1024
8858 (coprocessor). */
8859 && INTVAL (index) > -256
8860 && (INTVAL (index) & 3) == 0);
8861
8862 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8863 {
8864 /* For DImode assume values will usually live in core regs
8865 and only allow LDRD addressing modes. */
8866 if (!TARGET_LDRD || mode != DImode)
8867 return (code == CONST_INT
8868 && INTVAL (index) < 1024
8869 && INTVAL (index) > -1024
8870 && (INTVAL (index) & 3) == 0);
8871 }
8872
8873 /* For quad modes, we restrict the constant offset to be slightly less
8874 than what the instruction format permits. We do this because for
8875 quad mode moves, we will actually decompose them into two separate
8876 double-mode reads or writes. INDEX must therefore be a valid
8877 (double-mode) offset and so should INDEX+8. */
8878 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8879 return (code == CONST_INT
8880 && INTVAL (index) < 1016
8881 && INTVAL (index) > -1024
8882 && (INTVAL (index) & 3) == 0);
8883
8884 /* We have no such constraint on double mode offsets, so we permit the
8885 full range of the instruction format. */
8886 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8887 return (code == CONST_INT
8888 && INTVAL (index) < 1024
8889 && INTVAL (index) > -1024
8890 && (INTVAL (index) & 3) == 0);
8891
8892 if (arm_address_register_rtx_p (index, strict_p)
8893 && (GET_MODE_SIZE (mode) <= 4))
8894 return 1;
8895
8896 if (mode == DImode || mode == DFmode)
8897 {
8898 if (code == CONST_INT)
8899 {
8900 HOST_WIDE_INT val = INTVAL (index);
8901 /* Thumb-2 ldrd only has reg+const addressing modes.
8902 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8903 If vldr is selected it uses arm_coproc_mem_operand. */
8904 if (TARGET_LDRD)
8905 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8906 else
8907 return IN_RANGE (val, -255, 4095 - 4);
8908 }
8909 else
8910 return 0;
8911 }
8912
8913 if (code == MULT)
8914 {
8915 rtx xiop0 = XEXP (index, 0);
8916 rtx xiop1 = XEXP (index, 1);
8917
8918 return ((arm_address_register_rtx_p (xiop0, strict_p)
8919 && thumb2_index_mul_operand (xiop1))
8920 || (arm_address_register_rtx_p (xiop1, strict_p)
8921 && thumb2_index_mul_operand (xiop0)));
8922 }
8923 else if (code == ASHIFT)
8924 {
8925 rtx op = XEXP (index, 1);
8926
8927 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8928 && CONST_INT_P (op)
8929 && INTVAL (op) > 0
8930 && INTVAL (op) <= 3);
8931 }
8932
8933 return (code == CONST_INT
8934 && INTVAL (index) < 4096
8935 && INTVAL (index) > -256);
8936 }
8937
8938 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8939 static int
8940 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8941 {
8942 int regno;
8943
8944 if (!REG_P (x))
8945 return 0;
8946
8947 regno = REGNO (x);
8948
8949 if (strict_p)
8950 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8951
8952 return (regno <= LAST_LO_REGNUM
8953 || regno > LAST_VIRTUAL_REGISTER
8954 || regno == FRAME_POINTER_REGNUM
8955 || (GET_MODE_SIZE (mode) >= 4
8956 && (regno == STACK_POINTER_REGNUM
8957 || regno >= FIRST_PSEUDO_REGISTER
8958 || x == hard_frame_pointer_rtx
8959 || x == arg_pointer_rtx)));
8960 }
8961
8962 /* Return nonzero if x is a legitimate index register. This is the case
8963 for any base register that can access a QImode object. */
8964 inline static int
8965 thumb1_index_register_rtx_p (rtx x, int strict_p)
8966 {
8967 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8968 }
8969
8970 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8971
8972 The AP may be eliminated to either the SP or the FP, so we use the
8973 least common denominator, e.g. SImode, and offsets from 0 to 64.
8974
8975 ??? Verify whether the above is the right approach.
8976
8977 ??? Also, the FP may be eliminated to the SP, so perhaps that
8978 needs special handling also.
8979
8980 ??? Look at how the mips16 port solves this problem. It probably uses
8981 better ways to solve some of these problems.
8982
8983 Although it is not incorrect, we don't accept QImode and HImode
8984 addresses based on the frame pointer or arg pointer until the
8985 reload pass starts. This is so that eliminating such addresses
8986 into stack based ones won't produce impossible code. */
8987 int
8988 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8989 {
8990 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8991 return 0;
8992
8993 /* ??? Not clear if this is right. Experiment. */
8994 if (GET_MODE_SIZE (mode) < 4
8995 && !(reload_in_progress || reload_completed)
8996 && (reg_mentioned_p (frame_pointer_rtx, x)
8997 || reg_mentioned_p (arg_pointer_rtx, x)
8998 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8999 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
9000 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
9001 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
9002 return 0;
9003
9004 /* Accept any base register. SP only in SImode or larger. */
9005 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
9006 return 1;
9007
9008 /* This is PC relative data before arm_reorg runs. */
9009 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
9010 && SYMBOL_REF_P (x)
9011 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
9012 && !arm_disable_literal_pool)
9013 return 1;
9014
9015 /* This is PC relative data after arm_reorg runs. */
9016 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
9017 && reload_completed
9018 && (LABEL_REF_P (x)
9019 || (GET_CODE (x) == CONST
9020 && GET_CODE (XEXP (x, 0)) == PLUS
9021 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
9022 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
9023 return 1;
9024
9025 /* Post-inc indexing only supported for SImode and larger. */
9026 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
9027 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
9028 return 1;
9029
9030 else if (GET_CODE (x) == PLUS)
9031 {
9032 /* REG+REG address can be any two index registers. */
9033 /* We disallow FRAME+REG addressing since we know that FRAME
9034 will be replaced with STACK, and SP relative addressing only
9035 permits SP+OFFSET. */
9036 if (GET_MODE_SIZE (mode) <= 4
9037 && XEXP (x, 0) != frame_pointer_rtx
9038 && XEXP (x, 1) != frame_pointer_rtx
9039 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9040 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
9041 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
9042 return 1;
9043
9044 /* REG+const has 5-7 bit offset for non-SP registers. */
9045 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9046 || XEXP (x, 0) == arg_pointer_rtx)
9047 && CONST_INT_P (XEXP (x, 1))
9048 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
9049 return 1;
9050
9051 /* REG+const has 10-bit offset for SP, but only SImode and
9052 larger is supported. */
9053 /* ??? Should probably check for DI/DFmode overflow here
9054 just like GO_IF_LEGITIMATE_OFFSET does. */
9055 else if (REG_P (XEXP (x, 0))
9056 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
9057 && GET_MODE_SIZE (mode) >= 4
9058 && CONST_INT_P (XEXP (x, 1))
9059 && INTVAL (XEXP (x, 1)) >= 0
9060 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
9061 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9062 return 1;
9063
9064 else if (REG_P (XEXP (x, 0))
9065 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
9066 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
9067 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
9068 && REGNO (XEXP (x, 0))
9069 <= LAST_VIRTUAL_POINTER_REGISTER))
9070 && GET_MODE_SIZE (mode) >= 4
9071 && CONST_INT_P (XEXP (x, 1))
9072 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9073 return 1;
9074 }
9075
9076 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
9077 && GET_MODE_SIZE (mode) == 4
9078 && SYMBOL_REF_P (x)
9079 && CONSTANT_POOL_ADDRESS_P (x)
9080 && !arm_disable_literal_pool
9081 && ! (flag_pic
9082 && symbol_mentioned_p (get_pool_constant (x))
9083 && ! pcrel_constant_p (get_pool_constant (x))))
9084 return 1;
9085
9086 return 0;
9087 }
9088
9089 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9090 instruction of mode MODE. */
9091 int
9092 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
9093 {
9094 switch (GET_MODE_SIZE (mode))
9095 {
9096 case 1:
9097 return val >= 0 && val < 32;
9098
9099 case 2:
9100 return val >= 0 && val < 64 && (val & 1) == 0;
9101
9102 default:
9103 return (val >= 0
9104 && (val + GET_MODE_SIZE (mode)) <= 128
9105 && (val & 3) == 0);
9106 }
9107 }
9108
9109 bool
9110 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
9111 {
9112 if (TARGET_ARM)
9113 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
9114 else if (TARGET_THUMB2)
9115 return thumb2_legitimate_address_p (mode, x, strict_p);
9116 else /* if (TARGET_THUMB1) */
9117 return thumb1_legitimate_address_p (mode, x, strict_p);
9118 }
9119
9120 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9121
9122 Given an rtx X being reloaded into a reg required to be
9123 in class CLASS, return the class of reg to actually use.
9124 In general this is just CLASS, but for the Thumb core registers and
9125 immediate constants we prefer a LO_REGS class or a subset. */
9126
9127 static reg_class_t
9128 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
9129 {
9130 if (TARGET_32BIT)
9131 return rclass;
9132 else
9133 {
9134 if (rclass == GENERAL_REGS)
9135 return LO_REGS;
9136 else
9137 return rclass;
9138 }
9139 }
9140
9141 /* Build the SYMBOL_REF for __tls_get_addr. */
9142
9143 static GTY(()) rtx tls_get_addr_libfunc;
9144
9145 static rtx
9146 get_tls_get_addr (void)
9147 {
9148 if (!tls_get_addr_libfunc)
9149 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9150 return tls_get_addr_libfunc;
9151 }
9152
9153 rtx
9154 arm_load_tp (rtx target)
9155 {
9156 if (!target)
9157 target = gen_reg_rtx (SImode);
9158
9159 if (TARGET_HARD_TP)
9160 {
9161 /* Can return in any reg. */
9162 emit_insn (gen_load_tp_hard (target));
9163 }
9164 else
9165 {
9166 /* Always returned in r0. Immediately copy the result into a pseudo,
9167 otherwise other uses of r0 (e.g. setting up function arguments) may
9168 clobber the value. */
9169
9170 rtx tmp;
9171
9172 if (TARGET_FDPIC)
9173 {
9174 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9175 rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9176
9177 emit_insn (gen_load_tp_soft_fdpic ());
9178
9179 /* Restore r9. */
9180 emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9181 }
9182 else
9183 emit_insn (gen_load_tp_soft ());
9184
9185 tmp = gen_rtx_REG (SImode, R0_REGNUM);
9186 emit_move_insn (target, tmp);
9187 }
9188 return target;
9189 }
9190
9191 static rtx
9192 load_tls_operand (rtx x, rtx reg)
9193 {
9194 rtx tmp;
9195
9196 if (reg == NULL_RTX)
9197 reg = gen_reg_rtx (SImode);
9198
9199 tmp = gen_rtx_CONST (SImode, x);
9200
9201 emit_move_insn (reg, tmp);
9202
9203 return reg;
9204 }
9205
9206 static rtx_insn *
9207 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9208 {
9209 rtx label, labelno = NULL_RTX, sum;
9210
9211 gcc_assert (reloc != TLS_DESCSEQ);
9212 start_sequence ();
9213
9214 if (TARGET_FDPIC)
9215 {
9216 sum = gen_rtx_UNSPEC (Pmode,
9217 gen_rtvec (2, x, GEN_INT (reloc)),
9218 UNSPEC_TLS);
9219 }
9220 else
9221 {
9222 labelno = GEN_INT (pic_labelno++);
9223 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9224 label = gen_rtx_CONST (VOIDmode, label);
9225
9226 sum = gen_rtx_UNSPEC (Pmode,
9227 gen_rtvec (4, x, GEN_INT (reloc), label,
9228 GEN_INT (TARGET_ARM ? 8 : 4)),
9229 UNSPEC_TLS);
9230 }
9231 reg = load_tls_operand (sum, reg);
9232
9233 if (TARGET_FDPIC)
9234 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9235 else if (TARGET_ARM)
9236 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9237 else
9238 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9239
9240 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9241 LCT_PURE, /* LCT_CONST? */
9242 Pmode, reg, Pmode);
9243
9244 rtx_insn *insns = get_insns ();
9245 end_sequence ();
9246
9247 return insns;
9248 }
9249
9250 static rtx
9251 arm_tls_descseq_addr (rtx x, rtx reg)
9252 {
9253 rtx labelno = GEN_INT (pic_labelno++);
9254 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9255 rtx sum = gen_rtx_UNSPEC (Pmode,
9256 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9257 gen_rtx_CONST (VOIDmode, label),
9258 GEN_INT (!TARGET_ARM)),
9259 UNSPEC_TLS);
9260 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9261
9262 emit_insn (gen_tlscall (x, labelno));
9263 if (!reg)
9264 reg = gen_reg_rtx (SImode);
9265 else
9266 gcc_assert (REGNO (reg) != R0_REGNUM);
9267
9268 emit_move_insn (reg, reg0);
9269
9270 return reg;
9271 }
9272
9273
9274 rtx
9275 legitimize_tls_address (rtx x, rtx reg)
9276 {
9277 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9278 rtx_insn *insns;
9279 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9280
9281 switch (model)
9282 {
9283 case TLS_MODEL_GLOBAL_DYNAMIC:
9284 if (TARGET_GNU2_TLS)
9285 {
9286 gcc_assert (!TARGET_FDPIC);
9287
9288 reg = arm_tls_descseq_addr (x, reg);
9289
9290 tp = arm_load_tp (NULL_RTX);
9291
9292 dest = gen_rtx_PLUS (Pmode, tp, reg);
9293 }
9294 else
9295 {
9296 /* Original scheme */
9297 if (TARGET_FDPIC)
9298 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9299 else
9300 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9301 dest = gen_reg_rtx (Pmode);
9302 emit_libcall_block (insns, dest, ret, x);
9303 }
9304 return dest;
9305
9306 case TLS_MODEL_LOCAL_DYNAMIC:
9307 if (TARGET_GNU2_TLS)
9308 {
9309 gcc_assert (!TARGET_FDPIC);
9310
9311 reg = arm_tls_descseq_addr (x, reg);
9312
9313 tp = arm_load_tp (NULL_RTX);
9314
9315 dest = gen_rtx_PLUS (Pmode, tp, reg);
9316 }
9317 else
9318 {
9319 if (TARGET_FDPIC)
9320 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9321 else
9322 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9323
9324 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9325 share the LDM result with other LD model accesses. */
9326 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9327 UNSPEC_TLS);
9328 dest = gen_reg_rtx (Pmode);
9329 emit_libcall_block (insns, dest, ret, eqv);
9330
9331 /* Load the addend. */
9332 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9333 GEN_INT (TLS_LDO32)),
9334 UNSPEC_TLS);
9335 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9336 dest = gen_rtx_PLUS (Pmode, dest, addend);
9337 }
9338 return dest;
9339
9340 case TLS_MODEL_INITIAL_EXEC:
9341 if (TARGET_FDPIC)
9342 {
9343 sum = gen_rtx_UNSPEC (Pmode,
9344 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9345 UNSPEC_TLS);
9346 reg = load_tls_operand (sum, reg);
9347 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9348 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9349 }
9350 else
9351 {
9352 labelno = GEN_INT (pic_labelno++);
9353 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9354 label = gen_rtx_CONST (VOIDmode, label);
9355 sum = gen_rtx_UNSPEC (Pmode,
9356 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9357 GEN_INT (TARGET_ARM ? 8 : 4)),
9358 UNSPEC_TLS);
9359 reg = load_tls_operand (sum, reg);
9360
9361 if (TARGET_ARM)
9362 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9363 else if (TARGET_THUMB2)
9364 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9365 else
9366 {
9367 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9368 emit_move_insn (reg, gen_const_mem (SImode, reg));
9369 }
9370 }
9371
9372 tp = arm_load_tp (NULL_RTX);
9373
9374 return gen_rtx_PLUS (Pmode, tp, reg);
9375
9376 case TLS_MODEL_LOCAL_EXEC:
9377 tp = arm_load_tp (NULL_RTX);
9378
9379 reg = gen_rtx_UNSPEC (Pmode,
9380 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9381 UNSPEC_TLS);
9382 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9383
9384 return gen_rtx_PLUS (Pmode, tp, reg);
9385
9386 default:
9387 abort ();
9388 }
9389 }
9390
9391 /* Try machine-dependent ways of modifying an illegitimate address
9392 to be legitimate. If we find one, return the new, valid address. */
9393 rtx
9394 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9395 {
9396 if (arm_tls_referenced_p (x))
9397 {
9398 rtx addend = NULL;
9399
9400 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9401 {
9402 addend = XEXP (XEXP (x, 0), 1);
9403 x = XEXP (XEXP (x, 0), 0);
9404 }
9405
9406 if (!SYMBOL_REF_P (x))
9407 return x;
9408
9409 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9410
9411 x = legitimize_tls_address (x, NULL_RTX);
9412
9413 if (addend)
9414 {
9415 x = gen_rtx_PLUS (SImode, x, addend);
9416 orig_x = x;
9417 }
9418 else
9419 return x;
9420 }
9421
9422 if (TARGET_THUMB1)
9423 return thumb_legitimize_address (x, orig_x, mode);
9424
9425 if (GET_CODE (x) == PLUS)
9426 {
9427 rtx xop0 = XEXP (x, 0);
9428 rtx xop1 = XEXP (x, 1);
9429
9430 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9431 xop0 = force_reg (SImode, xop0);
9432
9433 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9434 && !symbol_mentioned_p (xop1))
9435 xop1 = force_reg (SImode, xop1);
9436
9437 if (ARM_BASE_REGISTER_RTX_P (xop0)
9438 && CONST_INT_P (xop1))
9439 {
9440 HOST_WIDE_INT n, low_n;
9441 rtx base_reg, val;
9442 n = INTVAL (xop1);
9443
9444 /* VFP addressing modes actually allow greater offsets, but for
9445 now we just stick with the lowest common denominator. */
9446 if (mode == DImode || mode == DFmode)
9447 {
9448 low_n = n & 0x0f;
9449 n &= ~0x0f;
9450 if (low_n > 4)
9451 {
9452 n += 16;
9453 low_n -= 16;
9454 }
9455 }
9456 else
9457 {
9458 low_n = ((mode) == TImode ? 0
9459 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9460 n -= low_n;
9461 }
9462
9463 base_reg = gen_reg_rtx (SImode);
9464 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9465 emit_move_insn (base_reg, val);
9466 x = plus_constant (Pmode, base_reg, low_n);
9467 }
9468 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9469 x = gen_rtx_PLUS (SImode, xop0, xop1);
9470 }
9471
9472 /* XXX We don't allow MINUS any more -- see comment in
9473 arm_legitimate_address_outer_p (). */
9474 else if (GET_CODE (x) == MINUS)
9475 {
9476 rtx xop0 = XEXP (x, 0);
9477 rtx xop1 = XEXP (x, 1);
9478
9479 if (CONSTANT_P (xop0))
9480 xop0 = force_reg (SImode, xop0);
9481
9482 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9483 xop1 = force_reg (SImode, xop1);
9484
9485 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9486 x = gen_rtx_MINUS (SImode, xop0, xop1);
9487 }
9488
9489 /* Make sure to take full advantage of the pre-indexed addressing mode
9490 with absolute addresses which often allows for the base register to
9491 be factorized for multiple adjacent memory references, and it might
9492 even allows for the mini pool to be avoided entirely. */
9493 else if (CONST_INT_P (x) && optimize > 0)
9494 {
9495 unsigned int bits;
9496 HOST_WIDE_INT mask, base, index;
9497 rtx base_reg;
9498
9499 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9500 only use a 8-bit index. So let's use a 12-bit index for
9501 SImode only and hope that arm_gen_constant will enable LDRB
9502 to use more bits. */
9503 bits = (mode == SImode) ? 12 : 8;
9504 mask = (1 << bits) - 1;
9505 base = INTVAL (x) & ~mask;
9506 index = INTVAL (x) & mask;
9507 if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9508 {
9509 /* It'll most probably be more efficient to generate the
9510 base with more bits set and use a negative index instead.
9511 Don't do this for Thumb as negative offsets are much more
9512 limited. */
9513 base |= mask;
9514 index -= mask;
9515 }
9516 base_reg = force_reg (SImode, GEN_INT (base));
9517 x = plus_constant (Pmode, base_reg, index);
9518 }
9519
9520 if (flag_pic)
9521 {
9522 /* We need to find and carefully transform any SYMBOL and LABEL
9523 references; so go back to the original address expression. */
9524 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9525 false /*compute_now*/);
9526
9527 if (new_x != orig_x)
9528 x = new_x;
9529 }
9530
9531 return x;
9532 }
9533
9534
9535 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9536 to be legitimate. If we find one, return the new, valid address. */
9537 rtx
9538 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9539 {
9540 if (GET_CODE (x) == PLUS
9541 && CONST_INT_P (XEXP (x, 1))
9542 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9543 || INTVAL (XEXP (x, 1)) < 0))
9544 {
9545 rtx xop0 = XEXP (x, 0);
9546 rtx xop1 = XEXP (x, 1);
9547 HOST_WIDE_INT offset = INTVAL (xop1);
9548
9549 /* Try and fold the offset into a biasing of the base register and
9550 then offsetting that. Don't do this when optimizing for space
9551 since it can cause too many CSEs. */
9552 if (optimize_size && offset >= 0
9553 && offset < 256 + 31 * GET_MODE_SIZE (mode))
9554 {
9555 HOST_WIDE_INT delta;
9556
9557 if (offset >= 256)
9558 delta = offset - (256 - GET_MODE_SIZE (mode));
9559 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9560 delta = 31 * GET_MODE_SIZE (mode);
9561 else
9562 delta = offset & (~31 * GET_MODE_SIZE (mode));
9563
9564 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9565 NULL_RTX);
9566 x = plus_constant (Pmode, xop0, delta);
9567 }
9568 else if (offset < 0 && offset > -256)
9569 /* Small negative offsets are best done with a subtract before the
9570 dereference, forcing these into a register normally takes two
9571 instructions. */
9572 x = force_operand (x, NULL_RTX);
9573 else
9574 {
9575 /* For the remaining cases, force the constant into a register. */
9576 xop1 = force_reg (SImode, xop1);
9577 x = gen_rtx_PLUS (SImode, xop0, xop1);
9578 }
9579 }
9580 else if (GET_CODE (x) == PLUS
9581 && s_register_operand (XEXP (x, 1), SImode)
9582 && !s_register_operand (XEXP (x, 0), SImode))
9583 {
9584 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9585
9586 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9587 }
9588
9589 if (flag_pic)
9590 {
9591 /* We need to find and carefully transform any SYMBOL and LABEL
9592 references; so go back to the original address expression. */
9593 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9594 false /*compute_now*/);
9595
9596 if (new_x != orig_x)
9597 x = new_x;
9598 }
9599
9600 return x;
9601 }
9602
9603 /* Return TRUE if X contains any TLS symbol references. */
9604
9605 bool
9606 arm_tls_referenced_p (rtx x)
9607 {
9608 if (! TARGET_HAVE_TLS)
9609 return false;
9610
9611 subrtx_iterator::array_type array;
9612 FOR_EACH_SUBRTX (iter, array, x, ALL)
9613 {
9614 const_rtx x = *iter;
9615 if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9616 {
9617 /* ARM currently does not provide relocations to encode TLS variables
9618 into AArch32 instructions, only data, so there is no way to
9619 currently implement these if a literal pool is disabled. */
9620 if (arm_disable_literal_pool)
9621 sorry ("accessing thread-local storage is not currently supported "
9622 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9623
9624 return true;
9625 }
9626
9627 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9628 TLS offsets, not real symbol references. */
9629 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9630 iter.skip_subrtxes ();
9631 }
9632 return false;
9633 }
9634
9635 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9636
9637 On the ARM, allow any integer (invalid ones are removed later by insn
9638 patterns), nice doubles and symbol_refs which refer to the function's
9639 constant pool XXX.
9640
9641 When generating pic allow anything. */
9642
9643 static bool
9644 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9645 {
9646 if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9647 return false;
9648
9649 return flag_pic || !label_mentioned_p (x);
9650 }
9651
9652 static bool
9653 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9654 {
9655 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9656 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9657 for ARMv8-M Baseline or later the result is valid. */
9658 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9659 x = XEXP (x, 0);
9660
9661 return (CONST_INT_P (x)
9662 || CONST_DOUBLE_P (x)
9663 || CONSTANT_ADDRESS_P (x)
9664 || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9665 /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9666 we build the symbol address with upper/lower
9667 relocations. */
9668 || (TARGET_THUMB1
9669 && !label_mentioned_p (x)
9670 && arm_valid_symbolic_address_p (x)
9671 && arm_disable_literal_pool)
9672 || flag_pic);
9673 }
9674
9675 static bool
9676 arm_legitimate_constant_p (machine_mode mode, rtx x)
9677 {
9678 return (!arm_cannot_force_const_mem (mode, x)
9679 && (TARGET_32BIT
9680 ? arm_legitimate_constant_p_1 (mode, x)
9681 : thumb_legitimate_constant_p (mode, x)));
9682 }
9683
9684 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9685
9686 static bool
9687 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9688 {
9689 rtx base, offset;
9690 split_const (x, &base, &offset);
9691
9692 if (SYMBOL_REF_P (base))
9693 {
9694 /* Function symbols cannot have an offset due to the Thumb bit. */
9695 if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9696 && INTVAL (offset) != 0)
9697 return true;
9698
9699 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9700 && !offset_within_block_p (base, INTVAL (offset)))
9701 return true;
9702 }
9703 return arm_tls_referenced_p (x);
9704 }
9705 \f
9706 #define REG_OR_SUBREG_REG(X) \
9707 (REG_P (X) \
9708 || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9709
9710 #define REG_OR_SUBREG_RTX(X) \
9711 (REG_P (X) ? (X) : SUBREG_REG (X))
9712
9713 static inline int
9714 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9715 {
9716 machine_mode mode = GET_MODE (x);
9717 int total, words;
9718
9719 switch (code)
9720 {
9721 case ASHIFT:
9722 case ASHIFTRT:
9723 case LSHIFTRT:
9724 case ROTATERT:
9725 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9726
9727 case PLUS:
9728 case MINUS:
9729 case COMPARE:
9730 case NEG:
9731 case NOT:
9732 return COSTS_N_INSNS (1);
9733
9734 case MULT:
9735 if (arm_arch6m && arm_m_profile_small_mul)
9736 return COSTS_N_INSNS (32);
9737
9738 if (CONST_INT_P (XEXP (x, 1)))
9739 {
9740 int cycles = 0;
9741 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9742
9743 while (i)
9744 {
9745 i >>= 2;
9746 cycles++;
9747 }
9748 return COSTS_N_INSNS (2) + cycles;
9749 }
9750 return COSTS_N_INSNS (1) + 16;
9751
9752 case SET:
9753 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9754 the mode. */
9755 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9756 return (COSTS_N_INSNS (words)
9757 + 4 * ((MEM_P (SET_SRC (x)))
9758 + MEM_P (SET_DEST (x))));
9759
9760 case CONST_INT:
9761 if (outer == SET)
9762 {
9763 if (UINTVAL (x) < 256
9764 /* 16-bit constant. */
9765 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9766 return 0;
9767 if (thumb_shiftable_const (INTVAL (x)))
9768 return COSTS_N_INSNS (2);
9769 return arm_disable_literal_pool
9770 ? COSTS_N_INSNS (8)
9771 : COSTS_N_INSNS (3);
9772 }
9773 else if ((outer == PLUS || outer == COMPARE)
9774 && INTVAL (x) < 256 && INTVAL (x) > -256)
9775 return 0;
9776 else if ((outer == IOR || outer == XOR || outer == AND)
9777 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9778 return COSTS_N_INSNS (1);
9779 else if (outer == AND)
9780 {
9781 int i;
9782 /* This duplicates the tests in the andsi3 expander. */
9783 for (i = 9; i <= 31; i++)
9784 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9785 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9786 return COSTS_N_INSNS (2);
9787 }
9788 else if (outer == ASHIFT || outer == ASHIFTRT
9789 || outer == LSHIFTRT)
9790 return 0;
9791 return COSTS_N_INSNS (2);
9792
9793 case CONST:
9794 case CONST_DOUBLE:
9795 case LABEL_REF:
9796 case SYMBOL_REF:
9797 return COSTS_N_INSNS (3);
9798
9799 case UDIV:
9800 case UMOD:
9801 case DIV:
9802 case MOD:
9803 return 100;
9804
9805 case TRUNCATE:
9806 return 99;
9807
9808 case AND:
9809 case XOR:
9810 case IOR:
9811 /* XXX guess. */
9812 return 8;
9813
9814 case MEM:
9815 /* XXX another guess. */
9816 /* Memory costs quite a lot for the first word, but subsequent words
9817 load at the equivalent of a single insn each. */
9818 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9819 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9820 ? 4 : 0));
9821
9822 case IF_THEN_ELSE:
9823 /* XXX a guess. */
9824 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9825 return 14;
9826 return 2;
9827
9828 case SIGN_EXTEND:
9829 case ZERO_EXTEND:
9830 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9831 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9832
9833 if (mode == SImode)
9834 return total;
9835
9836 if (arm_arch6)
9837 return total + COSTS_N_INSNS (1);
9838
9839 /* Assume a two-shift sequence. Increase the cost slightly so
9840 we prefer actual shifts over an extend operation. */
9841 return total + 1 + COSTS_N_INSNS (2);
9842
9843 default:
9844 return 99;
9845 }
9846 }
9847
9848 /* Estimates the size cost of thumb1 instructions.
9849 For now most of the code is copied from thumb1_rtx_costs. We need more
9850 fine grain tuning when we have more related test cases. */
9851 static inline int
9852 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9853 {
9854 machine_mode mode = GET_MODE (x);
9855 int words, cost;
9856
9857 switch (code)
9858 {
9859 case ASHIFT:
9860 case ASHIFTRT:
9861 case LSHIFTRT:
9862 case ROTATERT:
9863 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9864
9865 case PLUS:
9866 case MINUS:
9867 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9868 defined by RTL expansion, especially for the expansion of
9869 multiplication. */
9870 if ((GET_CODE (XEXP (x, 0)) == MULT
9871 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9872 || (GET_CODE (XEXP (x, 1)) == MULT
9873 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9874 return COSTS_N_INSNS (2);
9875 /* Fall through. */
9876 case COMPARE:
9877 case NEG:
9878 case NOT:
9879 return COSTS_N_INSNS (1);
9880
9881 case MULT:
9882 if (CONST_INT_P (XEXP (x, 1)))
9883 {
9884 /* Thumb1 mul instruction can't operate on const. We must Load it
9885 into a register first. */
9886 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9887 /* For the targets which have a very small and high-latency multiply
9888 unit, we prefer to synthesize the mult with up to 5 instructions,
9889 giving a good balance between size and performance. */
9890 if (arm_arch6m && arm_m_profile_small_mul)
9891 return COSTS_N_INSNS (5);
9892 else
9893 return COSTS_N_INSNS (1) + const_size;
9894 }
9895 return COSTS_N_INSNS (1);
9896
9897 case SET:
9898 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9899 the mode. */
9900 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9901 cost = COSTS_N_INSNS (words);
9902 if (satisfies_constraint_J (SET_SRC (x))
9903 || satisfies_constraint_K (SET_SRC (x))
9904 /* Too big an immediate for a 2-byte mov, using MOVT. */
9905 || (CONST_INT_P (SET_SRC (x))
9906 && UINTVAL (SET_SRC (x)) >= 256
9907 && TARGET_HAVE_MOVT
9908 && satisfies_constraint_j (SET_SRC (x)))
9909 /* thumb1_movdi_insn. */
9910 || ((words > 1) && MEM_P (SET_SRC (x))))
9911 cost += COSTS_N_INSNS (1);
9912 return cost;
9913
9914 case CONST_INT:
9915 if (outer == SET)
9916 {
9917 if (UINTVAL (x) < 256)
9918 return COSTS_N_INSNS (1);
9919 /* movw is 4byte long. */
9920 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9921 return COSTS_N_INSNS (2);
9922 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9923 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9924 return COSTS_N_INSNS (2);
9925 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9926 if (thumb_shiftable_const (INTVAL (x)))
9927 return COSTS_N_INSNS (2);
9928 return arm_disable_literal_pool
9929 ? COSTS_N_INSNS (8)
9930 : COSTS_N_INSNS (3);
9931 }
9932 else if ((outer == PLUS || outer == COMPARE)
9933 && INTVAL (x) < 256 && INTVAL (x) > -256)
9934 return 0;
9935 else if ((outer == IOR || outer == XOR || outer == AND)
9936 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9937 return COSTS_N_INSNS (1);
9938 else if (outer == AND)
9939 {
9940 int i;
9941 /* This duplicates the tests in the andsi3 expander. */
9942 for (i = 9; i <= 31; i++)
9943 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9944 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9945 return COSTS_N_INSNS (2);
9946 }
9947 else if (outer == ASHIFT || outer == ASHIFTRT
9948 || outer == LSHIFTRT)
9949 return 0;
9950 return COSTS_N_INSNS (2);
9951
9952 case CONST:
9953 case CONST_DOUBLE:
9954 case LABEL_REF:
9955 case SYMBOL_REF:
9956 return COSTS_N_INSNS (3);
9957
9958 case UDIV:
9959 case UMOD:
9960 case DIV:
9961 case MOD:
9962 return 100;
9963
9964 case TRUNCATE:
9965 return 99;
9966
9967 case AND:
9968 case XOR:
9969 case IOR:
9970 return COSTS_N_INSNS (1);
9971
9972 case MEM:
9973 return (COSTS_N_INSNS (1)
9974 + COSTS_N_INSNS (1)
9975 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9976 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9977 ? COSTS_N_INSNS (1) : 0));
9978
9979 case IF_THEN_ELSE:
9980 /* XXX a guess. */
9981 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9982 return 14;
9983 return 2;
9984
9985 case ZERO_EXTEND:
9986 /* XXX still guessing. */
9987 switch (GET_MODE (XEXP (x, 0)))
9988 {
9989 case E_QImode:
9990 return (1 + (mode == DImode ? 4 : 0)
9991 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9992
9993 case E_HImode:
9994 return (4 + (mode == DImode ? 4 : 0)
9995 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9996
9997 case E_SImode:
9998 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9999
10000 default:
10001 return 99;
10002 }
10003
10004 default:
10005 return 99;
10006 }
10007 }
10008
10009 /* Helper function for arm_rtx_costs. If one operand of the OP, a
10010 PLUS, adds the carry flag, then return the other operand. If
10011 neither is a carry, return OP unchanged. */
10012 static rtx
10013 strip_carry_operation (rtx op)
10014 {
10015 gcc_assert (GET_CODE (op) == PLUS);
10016 if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
10017 return XEXP (op, 1);
10018 else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
10019 return XEXP (op, 0);
10020 return op;
10021 }
10022
10023 /* Helper function for arm_rtx_costs. If the operand is a valid shift
10024 operand, then return the operand that is being shifted. If the shift
10025 is not by a constant, then set SHIFT_REG to point to the operand.
10026 Return NULL if OP is not a shifter operand. */
10027 static rtx
10028 shifter_op_p (rtx op, rtx *shift_reg)
10029 {
10030 enum rtx_code code = GET_CODE (op);
10031
10032 if (code == MULT && CONST_INT_P (XEXP (op, 1))
10033 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
10034 return XEXP (op, 0);
10035 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
10036 return XEXP (op, 0);
10037 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
10038 || code == ASHIFTRT)
10039 {
10040 if (!CONST_INT_P (XEXP (op, 1)))
10041 *shift_reg = XEXP (op, 1);
10042 return XEXP (op, 0);
10043 }
10044
10045 return NULL;
10046 }
10047
10048 static bool
10049 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
10050 {
10051 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
10052 rtx_code code = GET_CODE (x);
10053 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
10054
10055 switch (XINT (x, 1))
10056 {
10057 case UNSPEC_UNALIGNED_LOAD:
10058 /* We can only do unaligned loads into the integer unit, and we can't
10059 use LDM or LDRD. */
10060 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10061 if (speed_p)
10062 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
10063 + extra_cost->ldst.load_unaligned);
10064
10065 #ifdef NOT_YET
10066 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10067 ADDR_SPACE_GENERIC, speed_p);
10068 #endif
10069 return true;
10070
10071 case UNSPEC_UNALIGNED_STORE:
10072 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10073 if (speed_p)
10074 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
10075 + extra_cost->ldst.store_unaligned);
10076
10077 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
10078 #ifdef NOT_YET
10079 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10080 ADDR_SPACE_GENERIC, speed_p);
10081 #endif
10082 return true;
10083
10084 case UNSPEC_VRINTZ:
10085 case UNSPEC_VRINTP:
10086 case UNSPEC_VRINTM:
10087 case UNSPEC_VRINTR:
10088 case UNSPEC_VRINTX:
10089 case UNSPEC_VRINTA:
10090 if (speed_p)
10091 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
10092
10093 return true;
10094 default:
10095 *cost = COSTS_N_INSNS (2);
10096 break;
10097 }
10098 return true;
10099 }
10100
10101 /* Cost of a libcall. We assume one insn per argument, an amount for the
10102 call (one insn for -Os) and then one for processing the result. */
10103 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10104
10105 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
10106 do \
10107 { \
10108 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
10109 if (shift_op != NULL \
10110 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
10111 { \
10112 if (shift_reg) \
10113 { \
10114 if (speed_p) \
10115 *cost += extra_cost->alu.arith_shift_reg; \
10116 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10117 ASHIFT, 1, speed_p); \
10118 } \
10119 else if (speed_p) \
10120 *cost += extra_cost->alu.arith_shift; \
10121 \
10122 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
10123 ASHIFT, 0, speed_p) \
10124 + rtx_cost (XEXP (x, 1 - IDX), \
10125 GET_MODE (shift_op), \
10126 OP, 1, speed_p)); \
10127 return true; \
10128 } \
10129 } \
10130 while (0)
10131
10132 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
10133 considering the costs of the addressing mode and memory access
10134 separately. */
10135 static bool
10136 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
10137 int *cost, bool speed_p)
10138 {
10139 machine_mode mode = GET_MODE (x);
10140
10141 *cost = COSTS_N_INSNS (1);
10142
10143 if (flag_pic
10144 && GET_CODE (XEXP (x, 0)) == PLUS
10145 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
10146 /* This will be split into two instructions. Add the cost of the
10147 additional instruction here. The cost of the memory access is computed
10148 below. See arm.md:calculate_pic_address. */
10149 *cost += COSTS_N_INSNS (1);
10150
10151 /* Calculate cost of the addressing mode. */
10152 if (speed_p)
10153 {
10154 arm_addr_mode_op op_type;
10155 switch (GET_CODE (XEXP (x, 0)))
10156 {
10157 default:
10158 case REG:
10159 op_type = AMO_DEFAULT;
10160 break;
10161 case MINUS:
10162 /* MINUS does not appear in RTL, but the architecture supports it,
10163 so handle this case defensively. */
10164 /* fall through */
10165 case PLUS:
10166 op_type = AMO_NO_WB;
10167 break;
10168 case PRE_INC:
10169 case PRE_DEC:
10170 case POST_INC:
10171 case POST_DEC:
10172 case PRE_MODIFY:
10173 case POST_MODIFY:
10174 op_type = AMO_WB;
10175 break;
10176 }
10177
10178 if (VECTOR_MODE_P (mode))
10179 *cost += current_tune->addr_mode_costs->vector[op_type];
10180 else if (FLOAT_MODE_P (mode))
10181 *cost += current_tune->addr_mode_costs->fp[op_type];
10182 else
10183 *cost += current_tune->addr_mode_costs->integer[op_type];
10184 }
10185
10186 /* Calculate cost of memory access. */
10187 if (speed_p)
10188 {
10189 if (FLOAT_MODE_P (mode))
10190 {
10191 if (GET_MODE_SIZE (mode) == 8)
10192 *cost += extra_cost->ldst.loadd;
10193 else
10194 *cost += extra_cost->ldst.loadf;
10195 }
10196 else if (VECTOR_MODE_P (mode))
10197 *cost += extra_cost->ldst.loadv;
10198 else
10199 {
10200 /* Integer modes */
10201 if (GET_MODE_SIZE (mode) == 8)
10202 *cost += extra_cost->ldst.ldrd;
10203 else
10204 *cost += extra_cost->ldst.load;
10205 }
10206 }
10207
10208 return true;
10209 }
10210
10211 /* Helper for arm_bfi_p. */
10212 static bool
10213 arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
10214 {
10215 unsigned HOST_WIDE_INT const1;
10216 unsigned HOST_WIDE_INT const2 = 0;
10217
10218 if (!CONST_INT_P (XEXP (op0, 1)))
10219 return false;
10220
10221 const1 = UINTVAL (XEXP (op0, 1));
10222 if (!CONST_INT_P (XEXP (op1, 1))
10223 || ~UINTVAL (XEXP (op1, 1)) != const1)
10224 return false;
10225
10226 if (GET_CODE (XEXP (op0, 0)) == ASHIFT
10227 && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
10228 {
10229 const2 = UINTVAL (XEXP (XEXP (op0, 0), 1));
10230 *sub0 = XEXP (XEXP (op0, 0), 0);
10231 }
10232 else
10233 *sub0 = XEXP (op0, 0);
10234
10235 if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
10236 return false;
10237
10238 *sub1 = XEXP (op1, 0);
10239 return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
10240 }
10241
10242 /* Recognize a BFI idiom. Helper for arm_rtx_costs_internal. The
10243 format looks something like:
10244
10245 (IOR (AND (reg1) (~const1))
10246 (AND (ASHIFT (reg2) (const2))
10247 (const1)))
10248
10249 where const1 is a consecutive sequence of 1-bits with the
10250 least-significant non-zero bit starting at bit position const2. If
10251 const2 is zero, then the shift will not appear at all, due to
10252 canonicalization. The two arms of the IOR expression may be
10253 flipped. */
10254 static bool
10255 arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
10256 {
10257 if (GET_CODE (x) != IOR)
10258 return false;
10259 if (GET_CODE (XEXP (x, 0)) != AND
10260 || GET_CODE (XEXP (x, 1)) != AND)
10261 return false;
10262 return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
10263 || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
10264 }
10265
10266 /* RTX costs. Make an estimate of the cost of executing the operation
10267 X, which is contained within an operation with code OUTER_CODE.
10268 SPEED_P indicates whether the cost desired is the performance cost,
10269 or the size cost. The estimate is stored in COST and the return
10270 value is TRUE if the cost calculation is final, or FALSE if the
10271 caller should recurse through the operands of X to add additional
10272 costs.
10273
10274 We currently make no attempt to model the size savings of Thumb-2
10275 16-bit instructions. At the normal points in compilation where
10276 this code is called we have no measure of whether the condition
10277 flags are live or not, and thus no realistic way to determine what
10278 the size will eventually be. */
10279 static bool
10280 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10281 const struct cpu_cost_table *extra_cost,
10282 int *cost, bool speed_p)
10283 {
10284 machine_mode mode = GET_MODE (x);
10285
10286 *cost = COSTS_N_INSNS (1);
10287
10288 if (TARGET_THUMB1)
10289 {
10290 if (speed_p)
10291 *cost = thumb1_rtx_costs (x, code, outer_code);
10292 else
10293 *cost = thumb1_size_rtx_costs (x, code, outer_code);
10294 return true;
10295 }
10296
10297 switch (code)
10298 {
10299 case SET:
10300 *cost = 0;
10301 /* SET RTXs don't have a mode so we get it from the destination. */
10302 mode = GET_MODE (SET_DEST (x));
10303
10304 if (REG_P (SET_SRC (x))
10305 && REG_P (SET_DEST (x)))
10306 {
10307 /* Assume that most copies can be done with a single insn,
10308 unless we don't have HW FP, in which case everything
10309 larger than word mode will require two insns. */
10310 *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10311 && GET_MODE_SIZE (mode) > 4)
10312 || mode == DImode)
10313 ? 2 : 1);
10314 /* Conditional register moves can be encoded
10315 in 16 bits in Thumb mode. */
10316 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10317 *cost >>= 1;
10318
10319 return true;
10320 }
10321
10322 if (CONST_INT_P (SET_SRC (x)))
10323 {
10324 /* Handle CONST_INT here, since the value doesn't have a mode
10325 and we would otherwise be unable to work out the true cost. */
10326 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10327 0, speed_p);
10328 outer_code = SET;
10329 /* Slightly lower the cost of setting a core reg to a constant.
10330 This helps break up chains and allows for better scheduling. */
10331 if (REG_P (SET_DEST (x))
10332 && REGNO (SET_DEST (x)) <= LR_REGNUM)
10333 *cost -= 1;
10334 x = SET_SRC (x);
10335 /* Immediate moves with an immediate in the range [0, 255] can be
10336 encoded in 16 bits in Thumb mode. */
10337 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10338 && INTVAL (x) >= 0 && INTVAL (x) <=255)
10339 *cost >>= 1;
10340 goto const_int_cost;
10341 }
10342
10343 return false;
10344
10345 case MEM:
10346 return arm_mem_costs (x, extra_cost, cost, speed_p);
10347
10348 case PARALLEL:
10349 {
10350 /* Calculations of LDM costs are complex. We assume an initial cost
10351 (ldm_1st) which will load the number of registers mentioned in
10352 ldm_regs_per_insn_1st registers; then each additional
10353 ldm_regs_per_insn_subsequent registers cost one more insn. The
10354 formula for N regs is thus:
10355
10356 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10357 + ldm_regs_per_insn_subsequent - 1)
10358 / ldm_regs_per_insn_subsequent).
10359
10360 Additional costs may also be added for addressing. A similar
10361 formula is used for STM. */
10362
10363 bool is_ldm = load_multiple_operation (x, SImode);
10364 bool is_stm = store_multiple_operation (x, SImode);
10365
10366 if (is_ldm || is_stm)
10367 {
10368 if (speed_p)
10369 {
10370 HOST_WIDE_INT nregs = XVECLEN (x, 0);
10371 HOST_WIDE_INT regs_per_insn_1st = is_ldm
10372 ? extra_cost->ldst.ldm_regs_per_insn_1st
10373 : extra_cost->ldst.stm_regs_per_insn_1st;
10374 HOST_WIDE_INT regs_per_insn_sub = is_ldm
10375 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10376 : extra_cost->ldst.stm_regs_per_insn_subsequent;
10377
10378 *cost += regs_per_insn_1st
10379 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10380 + regs_per_insn_sub - 1)
10381 / regs_per_insn_sub);
10382 return true;
10383 }
10384
10385 }
10386 return false;
10387 }
10388 case DIV:
10389 case UDIV:
10390 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10391 && (mode == SFmode || !TARGET_VFP_SINGLE))
10392 *cost += COSTS_N_INSNS (speed_p
10393 ? extra_cost->fp[mode != SFmode].div : 0);
10394 else if (mode == SImode && TARGET_IDIV)
10395 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10396 else
10397 *cost = LIBCALL_COST (2);
10398
10399 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10400 possible udiv is prefered. */
10401 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10402 return false; /* All arguments must be in registers. */
10403
10404 case MOD:
10405 /* MOD by a power of 2 can be expanded as:
10406 rsbs r1, r0, #0
10407 and r0, r0, #(n - 1)
10408 and r1, r1, #(n - 1)
10409 rsbpl r0, r1, #0. */
10410 if (CONST_INT_P (XEXP (x, 1))
10411 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10412 && mode == SImode)
10413 {
10414 *cost += COSTS_N_INSNS (3);
10415
10416 if (speed_p)
10417 *cost += 2 * extra_cost->alu.logical
10418 + extra_cost->alu.arith;
10419 return true;
10420 }
10421
10422 /* Fall-through. */
10423 case UMOD:
10424 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10425 possible udiv is prefered. */
10426 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10427 return false; /* All arguments must be in registers. */
10428
10429 case ROTATE:
10430 if (mode == SImode && REG_P (XEXP (x, 1)))
10431 {
10432 *cost += (COSTS_N_INSNS (1)
10433 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10434 if (speed_p)
10435 *cost += extra_cost->alu.shift_reg;
10436 return true;
10437 }
10438 /* Fall through */
10439 case ROTATERT:
10440 case ASHIFT:
10441 case LSHIFTRT:
10442 case ASHIFTRT:
10443 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10444 {
10445 *cost += (COSTS_N_INSNS (2)
10446 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10447 if (speed_p)
10448 *cost += 2 * extra_cost->alu.shift;
10449 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10450 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10451 *cost += 1;
10452 return true;
10453 }
10454 else if (mode == SImode)
10455 {
10456 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10457 /* Slightly disparage register shifts at -Os, but not by much. */
10458 if (!CONST_INT_P (XEXP (x, 1)))
10459 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10460 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10461 return true;
10462 }
10463 else if (GET_MODE_CLASS (mode) == MODE_INT
10464 && GET_MODE_SIZE (mode) < 4)
10465 {
10466 if (code == ASHIFT)
10467 {
10468 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10469 /* Slightly disparage register shifts at -Os, but not by
10470 much. */
10471 if (!CONST_INT_P (XEXP (x, 1)))
10472 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10473 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10474 }
10475 else if (code == LSHIFTRT || code == ASHIFTRT)
10476 {
10477 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10478 {
10479 /* Can use SBFX/UBFX. */
10480 if (speed_p)
10481 *cost += extra_cost->alu.bfx;
10482 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10483 }
10484 else
10485 {
10486 *cost += COSTS_N_INSNS (1);
10487 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10488 if (speed_p)
10489 {
10490 if (CONST_INT_P (XEXP (x, 1)))
10491 *cost += 2 * extra_cost->alu.shift;
10492 else
10493 *cost += (extra_cost->alu.shift
10494 + extra_cost->alu.shift_reg);
10495 }
10496 else
10497 /* Slightly disparage register shifts. */
10498 *cost += !CONST_INT_P (XEXP (x, 1));
10499 }
10500 }
10501 else /* Rotates. */
10502 {
10503 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10504 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10505 if (speed_p)
10506 {
10507 if (CONST_INT_P (XEXP (x, 1)))
10508 *cost += (2 * extra_cost->alu.shift
10509 + extra_cost->alu.log_shift);
10510 else
10511 *cost += (extra_cost->alu.shift
10512 + extra_cost->alu.shift_reg
10513 + extra_cost->alu.log_shift_reg);
10514 }
10515 }
10516 return true;
10517 }
10518
10519 *cost = LIBCALL_COST (2);
10520 return false;
10521
10522 case BSWAP:
10523 if (arm_arch6)
10524 {
10525 if (mode == SImode)
10526 {
10527 if (speed_p)
10528 *cost += extra_cost->alu.rev;
10529
10530 return false;
10531 }
10532 }
10533 else
10534 {
10535 /* No rev instruction available. Look at arm_legacy_rev
10536 and thumb_legacy_rev for the form of RTL used then. */
10537 if (TARGET_THUMB)
10538 {
10539 *cost += COSTS_N_INSNS (9);
10540
10541 if (speed_p)
10542 {
10543 *cost += 6 * extra_cost->alu.shift;
10544 *cost += 3 * extra_cost->alu.logical;
10545 }
10546 }
10547 else
10548 {
10549 *cost += COSTS_N_INSNS (4);
10550
10551 if (speed_p)
10552 {
10553 *cost += 2 * extra_cost->alu.shift;
10554 *cost += extra_cost->alu.arith_shift;
10555 *cost += 2 * extra_cost->alu.logical;
10556 }
10557 }
10558 return true;
10559 }
10560 return false;
10561
10562 case MINUS:
10563 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10564 && (mode == SFmode || !TARGET_VFP_SINGLE))
10565 {
10566 if (GET_CODE (XEXP (x, 0)) == MULT
10567 || GET_CODE (XEXP (x, 1)) == MULT)
10568 {
10569 rtx mul_op0, mul_op1, sub_op;
10570
10571 if (speed_p)
10572 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10573
10574 if (GET_CODE (XEXP (x, 0)) == MULT)
10575 {
10576 mul_op0 = XEXP (XEXP (x, 0), 0);
10577 mul_op1 = XEXP (XEXP (x, 0), 1);
10578 sub_op = XEXP (x, 1);
10579 }
10580 else
10581 {
10582 mul_op0 = XEXP (XEXP (x, 1), 0);
10583 mul_op1 = XEXP (XEXP (x, 1), 1);
10584 sub_op = XEXP (x, 0);
10585 }
10586
10587 /* The first operand of the multiply may be optionally
10588 negated. */
10589 if (GET_CODE (mul_op0) == NEG)
10590 mul_op0 = XEXP (mul_op0, 0);
10591
10592 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10593 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10594 + rtx_cost (sub_op, mode, code, 0, speed_p));
10595
10596 return true;
10597 }
10598
10599 if (speed_p)
10600 *cost += extra_cost->fp[mode != SFmode].addsub;
10601 return false;
10602 }
10603
10604 if (mode == SImode)
10605 {
10606 rtx shift_by_reg = NULL;
10607 rtx shift_op;
10608 rtx non_shift_op;
10609 rtx op0 = XEXP (x, 0);
10610 rtx op1 = XEXP (x, 1);
10611
10612 /* Factor out any borrow operation. There's more than one way
10613 of expressing this; try to recognize them all. */
10614 if (GET_CODE (op0) == MINUS)
10615 {
10616 if (arm_borrow_operation (op1, SImode))
10617 {
10618 op1 = XEXP (op0, 1);
10619 op0 = XEXP (op0, 0);
10620 }
10621 else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10622 op0 = XEXP (op0, 0);
10623 }
10624 else if (GET_CODE (op1) == PLUS
10625 && arm_borrow_operation (XEXP (op1, 0), SImode))
10626 op1 = XEXP (op1, 0);
10627 else if (GET_CODE (op0) == NEG
10628 && arm_borrow_operation (op1, SImode))
10629 {
10630 /* Negate with carry-in. For Thumb2 this is done with
10631 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10632 RSC instruction that exists in Arm mode. */
10633 if (speed_p)
10634 *cost += (TARGET_THUMB2
10635 ? extra_cost->alu.arith_shift
10636 : extra_cost->alu.arith);
10637 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10638 return true;
10639 }
10640 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10641 Note we do mean ~borrow here. */
10642 else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10643 {
10644 *cost += rtx_cost (op1, mode, code, 1, speed_p);
10645 return true;
10646 }
10647
10648 shift_op = shifter_op_p (op0, &shift_by_reg);
10649 if (shift_op == NULL)
10650 {
10651 shift_op = shifter_op_p (op1, &shift_by_reg);
10652 non_shift_op = op0;
10653 }
10654 else
10655 non_shift_op = op1;
10656
10657 if (shift_op != NULL)
10658 {
10659 if (shift_by_reg != NULL)
10660 {
10661 if (speed_p)
10662 *cost += extra_cost->alu.arith_shift_reg;
10663 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10664 }
10665 else if (speed_p)
10666 *cost += extra_cost->alu.arith_shift;
10667
10668 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10669 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10670 return true;
10671 }
10672
10673 if (arm_arch_thumb2
10674 && GET_CODE (XEXP (x, 1)) == MULT)
10675 {
10676 /* MLS. */
10677 if (speed_p)
10678 *cost += extra_cost->mult[0].add;
10679 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10680 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10681 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10682 return true;
10683 }
10684
10685 if (CONST_INT_P (op0))
10686 {
10687 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10688 INTVAL (op0), NULL_RTX,
10689 NULL_RTX, 1, 0);
10690 *cost = COSTS_N_INSNS (insns);
10691 if (speed_p)
10692 *cost += insns * extra_cost->alu.arith;
10693 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10694 return true;
10695 }
10696 else if (speed_p)
10697 *cost += extra_cost->alu.arith;
10698
10699 /* Don't recurse as we don't want to cost any borrow that
10700 we've stripped. */
10701 *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10702 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10703 return true;
10704 }
10705
10706 if (GET_MODE_CLASS (mode) == MODE_INT
10707 && GET_MODE_SIZE (mode) < 4)
10708 {
10709 rtx shift_op, shift_reg;
10710 shift_reg = NULL;
10711
10712 /* We check both sides of the MINUS for shifter operands since,
10713 unlike PLUS, it's not commutative. */
10714
10715 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10716 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10717
10718 /* Slightly disparage, as we might need to widen the result. */
10719 *cost += 1;
10720 if (speed_p)
10721 *cost += extra_cost->alu.arith;
10722
10723 if (CONST_INT_P (XEXP (x, 0)))
10724 {
10725 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10726 return true;
10727 }
10728
10729 return false;
10730 }
10731
10732 if (mode == DImode)
10733 {
10734 *cost += COSTS_N_INSNS (1);
10735
10736 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10737 {
10738 rtx op1 = XEXP (x, 1);
10739
10740 if (speed_p)
10741 *cost += 2 * extra_cost->alu.arith;
10742
10743 if (GET_CODE (op1) == ZERO_EXTEND)
10744 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10745 0, speed_p);
10746 else
10747 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10748 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10749 0, speed_p);
10750 return true;
10751 }
10752 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10753 {
10754 if (speed_p)
10755 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10756 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10757 0, speed_p)
10758 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10759 return true;
10760 }
10761 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10762 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10763 {
10764 if (speed_p)
10765 *cost += (extra_cost->alu.arith
10766 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10767 ? extra_cost->alu.arith
10768 : extra_cost->alu.arith_shift));
10769 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10770 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10771 GET_CODE (XEXP (x, 1)), 0, speed_p));
10772 return true;
10773 }
10774
10775 if (speed_p)
10776 *cost += 2 * extra_cost->alu.arith;
10777 return false;
10778 }
10779
10780 /* Vector mode? */
10781
10782 *cost = LIBCALL_COST (2);
10783 return false;
10784
10785 case PLUS:
10786 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10787 && (mode == SFmode || !TARGET_VFP_SINGLE))
10788 {
10789 if (GET_CODE (XEXP (x, 0)) == MULT)
10790 {
10791 rtx mul_op0, mul_op1, add_op;
10792
10793 if (speed_p)
10794 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10795
10796 mul_op0 = XEXP (XEXP (x, 0), 0);
10797 mul_op1 = XEXP (XEXP (x, 0), 1);
10798 add_op = XEXP (x, 1);
10799
10800 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10801 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10802 + rtx_cost (add_op, mode, code, 0, speed_p));
10803
10804 return true;
10805 }
10806
10807 if (speed_p)
10808 *cost += extra_cost->fp[mode != SFmode].addsub;
10809 return false;
10810 }
10811 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10812 {
10813 *cost = LIBCALL_COST (2);
10814 return false;
10815 }
10816
10817 /* Narrow modes can be synthesized in SImode, but the range
10818 of useful sub-operations is limited. Check for shift operations
10819 on one of the operands. Only left shifts can be used in the
10820 narrow modes. */
10821 if (GET_MODE_CLASS (mode) == MODE_INT
10822 && GET_MODE_SIZE (mode) < 4)
10823 {
10824 rtx shift_op, shift_reg;
10825 shift_reg = NULL;
10826
10827 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10828
10829 if (CONST_INT_P (XEXP (x, 1)))
10830 {
10831 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10832 INTVAL (XEXP (x, 1)), NULL_RTX,
10833 NULL_RTX, 1, 0);
10834 *cost = COSTS_N_INSNS (insns);
10835 if (speed_p)
10836 *cost += insns * extra_cost->alu.arith;
10837 /* Slightly penalize a narrow operation as the result may
10838 need widening. */
10839 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10840 return true;
10841 }
10842
10843 /* Slightly penalize a narrow operation as the result may
10844 need widening. */
10845 *cost += 1;
10846 if (speed_p)
10847 *cost += extra_cost->alu.arith;
10848
10849 return false;
10850 }
10851
10852 if (mode == SImode)
10853 {
10854 rtx shift_op, shift_reg;
10855
10856 if (TARGET_INT_SIMD
10857 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10858 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10859 {
10860 /* UXTA[BH] or SXTA[BH]. */
10861 if (speed_p)
10862 *cost += extra_cost->alu.extend_arith;
10863 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10864 0, speed_p)
10865 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10866 return true;
10867 }
10868
10869 rtx op0 = XEXP (x, 0);
10870 rtx op1 = XEXP (x, 1);
10871
10872 /* Handle a side effect of adding in the carry to an addition. */
10873 if (GET_CODE (op0) == PLUS
10874 && arm_carry_operation (op1, mode))
10875 {
10876 op1 = XEXP (op0, 1);
10877 op0 = XEXP (op0, 0);
10878 }
10879 else if (GET_CODE (op1) == PLUS
10880 && arm_carry_operation (op0, mode))
10881 {
10882 op0 = XEXP (op1, 0);
10883 op1 = XEXP (op1, 1);
10884 }
10885 else if (GET_CODE (op0) == PLUS)
10886 {
10887 op0 = strip_carry_operation (op0);
10888 if (swap_commutative_operands_p (op0, op1))
10889 std::swap (op0, op1);
10890 }
10891
10892 if (arm_carry_operation (op0, mode))
10893 {
10894 /* Adding the carry to a register is a canonicalization of
10895 adding 0 to the register plus the carry. */
10896 if (speed_p)
10897 *cost += extra_cost->alu.arith;
10898 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10899 return true;
10900 }
10901
10902 shift_reg = NULL;
10903 shift_op = shifter_op_p (op0, &shift_reg);
10904 if (shift_op != NULL)
10905 {
10906 if (shift_reg)
10907 {
10908 if (speed_p)
10909 *cost += extra_cost->alu.arith_shift_reg;
10910 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10911 }
10912 else if (speed_p)
10913 *cost += extra_cost->alu.arith_shift;
10914
10915 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10916 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10917 return true;
10918 }
10919
10920 if (GET_CODE (op0) == MULT)
10921 {
10922 rtx mul_op = op0;
10923
10924 if (TARGET_DSP_MULTIPLY
10925 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10926 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10927 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10928 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10929 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10930 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10931 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10932 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10933 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10934 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10935 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10936 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10937 == 16))))))
10938 {
10939 /* SMLA[BT][BT]. */
10940 if (speed_p)
10941 *cost += extra_cost->mult[0].extend_add;
10942 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10943 SIGN_EXTEND, 0, speed_p)
10944 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10945 SIGN_EXTEND, 0, speed_p)
10946 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10947 return true;
10948 }
10949
10950 if (speed_p)
10951 *cost += extra_cost->mult[0].add;
10952 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10953 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10954 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10955 return true;
10956 }
10957
10958 if (CONST_INT_P (op1))
10959 {
10960 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10961 INTVAL (op1), NULL_RTX,
10962 NULL_RTX, 1, 0);
10963 *cost = COSTS_N_INSNS (insns);
10964 if (speed_p)
10965 *cost += insns * extra_cost->alu.arith;
10966 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10967 return true;
10968 }
10969
10970 if (speed_p)
10971 *cost += extra_cost->alu.arith;
10972
10973 /* Don't recurse here because we want to test the operands
10974 without any carry operation. */
10975 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10976 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10977 return true;
10978 }
10979
10980 if (mode == DImode)
10981 {
10982 if (GET_CODE (XEXP (x, 0)) == MULT
10983 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10984 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10985 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10986 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10987 {
10988 if (speed_p)
10989 *cost += extra_cost->mult[1].extend_add;
10990 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10991 ZERO_EXTEND, 0, speed_p)
10992 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10993 ZERO_EXTEND, 0, speed_p)
10994 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10995 return true;
10996 }
10997
10998 *cost += COSTS_N_INSNS (1);
10999
11000 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11001 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
11002 {
11003 if (speed_p)
11004 *cost += (extra_cost->alu.arith
11005 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11006 ? extra_cost->alu.arith
11007 : extra_cost->alu.arith_shift));
11008
11009 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
11010 0, speed_p)
11011 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11012 return true;
11013 }
11014
11015 if (speed_p)
11016 *cost += 2 * extra_cost->alu.arith;
11017 return false;
11018 }
11019
11020 /* Vector mode? */
11021 *cost = LIBCALL_COST (2);
11022 return false;
11023 case IOR:
11024 {
11025 rtx sub0, sub1;
11026 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
11027 {
11028 if (speed_p)
11029 *cost += extra_cost->alu.rev;
11030
11031 return true;
11032 }
11033 else if (mode == SImode && arm_arch_thumb2
11034 && arm_bfi_p (x, &sub0, &sub1))
11035 {
11036 *cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
11037 *cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
11038 if (speed_p)
11039 *cost += extra_cost->alu.bfi;
11040
11041 return true;
11042 }
11043 }
11044
11045 /* Fall through. */
11046 case AND: case XOR:
11047 if (mode == SImode)
11048 {
11049 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
11050 rtx op0 = XEXP (x, 0);
11051 rtx shift_op, shift_reg;
11052
11053 if (subcode == NOT
11054 && (code == AND
11055 || (code == IOR && TARGET_THUMB2)))
11056 op0 = XEXP (op0, 0);
11057
11058 shift_reg = NULL;
11059 shift_op = shifter_op_p (op0, &shift_reg);
11060 if (shift_op != NULL)
11061 {
11062 if (shift_reg)
11063 {
11064 if (speed_p)
11065 *cost += extra_cost->alu.log_shift_reg;
11066 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11067 }
11068 else if (speed_p)
11069 *cost += extra_cost->alu.log_shift;
11070
11071 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11072 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11073 return true;
11074 }
11075
11076 if (CONST_INT_P (XEXP (x, 1)))
11077 {
11078 int insns = arm_gen_constant (code, SImode, NULL_RTX,
11079 INTVAL (XEXP (x, 1)), NULL_RTX,
11080 NULL_RTX, 1, 0);
11081
11082 *cost = COSTS_N_INSNS (insns);
11083 if (speed_p)
11084 *cost += insns * extra_cost->alu.logical;
11085 *cost += rtx_cost (op0, mode, code, 0, speed_p);
11086 return true;
11087 }
11088
11089 if (speed_p)
11090 *cost += extra_cost->alu.logical;
11091 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
11092 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11093 return true;
11094 }
11095
11096 if (mode == DImode)
11097 {
11098 rtx op0 = XEXP (x, 0);
11099 enum rtx_code subcode = GET_CODE (op0);
11100
11101 *cost += COSTS_N_INSNS (1);
11102
11103 if (subcode == NOT
11104 && (code == AND
11105 || (code == IOR && TARGET_THUMB2)))
11106 op0 = XEXP (op0, 0);
11107
11108 if (GET_CODE (op0) == ZERO_EXTEND)
11109 {
11110 if (speed_p)
11111 *cost += 2 * extra_cost->alu.logical;
11112
11113 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
11114 0, speed_p)
11115 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11116 return true;
11117 }
11118 else if (GET_CODE (op0) == SIGN_EXTEND)
11119 {
11120 if (speed_p)
11121 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
11122
11123 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
11124 0, speed_p)
11125 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11126 return true;
11127 }
11128
11129 if (speed_p)
11130 *cost += 2 * extra_cost->alu.logical;
11131
11132 return true;
11133 }
11134 /* Vector mode? */
11135
11136 *cost = LIBCALL_COST (2);
11137 return false;
11138
11139 case MULT:
11140 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11141 && (mode == SFmode || !TARGET_VFP_SINGLE))
11142 {
11143 rtx op0 = XEXP (x, 0);
11144
11145 if (GET_CODE (op0) == NEG && !flag_rounding_math)
11146 op0 = XEXP (op0, 0);
11147
11148 if (speed_p)
11149 *cost += extra_cost->fp[mode != SFmode].mult;
11150
11151 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
11152 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
11153 return true;
11154 }
11155 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11156 {
11157 *cost = LIBCALL_COST (2);
11158 return false;
11159 }
11160
11161 if (mode == SImode)
11162 {
11163 if (TARGET_DSP_MULTIPLY
11164 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11165 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11166 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11167 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11168 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
11169 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11170 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11171 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
11172 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11173 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11174 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11175 && (INTVAL (XEXP (XEXP (x, 1), 1))
11176 == 16))))))
11177 {
11178 /* SMUL[TB][TB]. */
11179 if (speed_p)
11180 *cost += extra_cost->mult[0].extend;
11181 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
11182 SIGN_EXTEND, 0, speed_p);
11183 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
11184 SIGN_EXTEND, 1, speed_p);
11185 return true;
11186 }
11187 if (speed_p)
11188 *cost += extra_cost->mult[0].simple;
11189 return false;
11190 }
11191
11192 if (mode == DImode)
11193 {
11194 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11195 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
11196 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11197 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
11198 {
11199 if (speed_p)
11200 *cost += extra_cost->mult[1].extend;
11201 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
11202 ZERO_EXTEND, 0, speed_p)
11203 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
11204 ZERO_EXTEND, 0, speed_p));
11205 return true;
11206 }
11207
11208 *cost = LIBCALL_COST (2);
11209 return false;
11210 }
11211
11212 /* Vector mode? */
11213 *cost = LIBCALL_COST (2);
11214 return false;
11215
11216 case NEG:
11217 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11218 && (mode == SFmode || !TARGET_VFP_SINGLE))
11219 {
11220 if (GET_CODE (XEXP (x, 0)) == MULT)
11221 {
11222 /* VNMUL. */
11223 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11224 return true;
11225 }
11226
11227 if (speed_p)
11228 *cost += extra_cost->fp[mode != SFmode].neg;
11229
11230 return false;
11231 }
11232 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11233 {
11234 *cost = LIBCALL_COST (1);
11235 return false;
11236 }
11237
11238 if (mode == SImode)
11239 {
11240 if (GET_CODE (XEXP (x, 0)) == ABS)
11241 {
11242 *cost += COSTS_N_INSNS (1);
11243 /* Assume the non-flag-changing variant. */
11244 if (speed_p)
11245 *cost += (extra_cost->alu.log_shift
11246 + extra_cost->alu.arith_shift);
11247 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11248 return true;
11249 }
11250
11251 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11252 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11253 {
11254 *cost += COSTS_N_INSNS (1);
11255 /* No extra cost for MOV imm and MVN imm. */
11256 /* If the comparison op is using the flags, there's no further
11257 cost, otherwise we need to add the cost of the comparison. */
11258 if (!(REG_P (XEXP (XEXP (x, 0), 0))
11259 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11260 && XEXP (XEXP (x, 0), 1) == const0_rtx))
11261 {
11262 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11263 *cost += (COSTS_N_INSNS (1)
11264 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11265 0, speed_p)
11266 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11267 1, speed_p));
11268 if (speed_p)
11269 *cost += extra_cost->alu.arith;
11270 }
11271 return true;
11272 }
11273
11274 if (speed_p)
11275 *cost += extra_cost->alu.arith;
11276 return false;
11277 }
11278
11279 if (GET_MODE_CLASS (mode) == MODE_INT
11280 && GET_MODE_SIZE (mode) < 4)
11281 {
11282 /* Slightly disparage, as we might need an extend operation. */
11283 *cost += 1;
11284 if (speed_p)
11285 *cost += extra_cost->alu.arith;
11286 return false;
11287 }
11288
11289 if (mode == DImode)
11290 {
11291 *cost += COSTS_N_INSNS (1);
11292 if (speed_p)
11293 *cost += 2 * extra_cost->alu.arith;
11294 return false;
11295 }
11296
11297 /* Vector mode? */
11298 *cost = LIBCALL_COST (1);
11299 return false;
11300
11301 case NOT:
11302 if (mode == SImode)
11303 {
11304 rtx shift_op;
11305 rtx shift_reg = NULL;
11306
11307 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11308
11309 if (shift_op)
11310 {
11311 if (shift_reg != NULL)
11312 {
11313 if (speed_p)
11314 *cost += extra_cost->alu.log_shift_reg;
11315 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11316 }
11317 else if (speed_p)
11318 *cost += extra_cost->alu.log_shift;
11319 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11320 return true;
11321 }
11322
11323 if (speed_p)
11324 *cost += extra_cost->alu.logical;
11325 return false;
11326 }
11327 if (mode == DImode)
11328 {
11329 *cost += COSTS_N_INSNS (1);
11330 return false;
11331 }
11332
11333 /* Vector mode? */
11334
11335 *cost += LIBCALL_COST (1);
11336 return false;
11337
11338 case IF_THEN_ELSE:
11339 {
11340 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11341 {
11342 *cost += COSTS_N_INSNS (3);
11343 return true;
11344 }
11345 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11346 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11347
11348 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11349 /* Assume that if one arm of the if_then_else is a register,
11350 that it will be tied with the result and eliminate the
11351 conditional insn. */
11352 if (REG_P (XEXP (x, 1)))
11353 *cost += op2cost;
11354 else if (REG_P (XEXP (x, 2)))
11355 *cost += op1cost;
11356 else
11357 {
11358 if (speed_p)
11359 {
11360 if (extra_cost->alu.non_exec_costs_exec)
11361 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11362 else
11363 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11364 }
11365 else
11366 *cost += op1cost + op2cost;
11367 }
11368 }
11369 return true;
11370
11371 case COMPARE:
11372 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11373 *cost = 0;
11374 else
11375 {
11376 machine_mode op0mode;
11377 /* We'll mostly assume that the cost of a compare is the cost of the
11378 LHS. However, there are some notable exceptions. */
11379
11380 /* Floating point compares are never done as side-effects. */
11381 op0mode = GET_MODE (XEXP (x, 0));
11382 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11383 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11384 {
11385 if (speed_p)
11386 *cost += extra_cost->fp[op0mode != SFmode].compare;
11387
11388 if (XEXP (x, 1) == CONST0_RTX (op0mode))
11389 {
11390 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11391 return true;
11392 }
11393
11394 return false;
11395 }
11396 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11397 {
11398 *cost = LIBCALL_COST (2);
11399 return false;
11400 }
11401
11402 /* DImode compares normally take two insns. */
11403 if (op0mode == DImode)
11404 {
11405 *cost += COSTS_N_INSNS (1);
11406 if (speed_p)
11407 *cost += 2 * extra_cost->alu.arith;
11408 return false;
11409 }
11410
11411 if (op0mode == SImode)
11412 {
11413 rtx shift_op;
11414 rtx shift_reg;
11415
11416 if (XEXP (x, 1) == const0_rtx
11417 && !(REG_P (XEXP (x, 0))
11418 || (GET_CODE (XEXP (x, 0)) == SUBREG
11419 && REG_P (SUBREG_REG (XEXP (x, 0))))))
11420 {
11421 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11422
11423 /* Multiply operations that set the flags are often
11424 significantly more expensive. */
11425 if (speed_p
11426 && GET_CODE (XEXP (x, 0)) == MULT
11427 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11428 *cost += extra_cost->mult[0].flag_setting;
11429
11430 if (speed_p
11431 && GET_CODE (XEXP (x, 0)) == PLUS
11432 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11433 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11434 0), 1), mode))
11435 *cost += extra_cost->mult[0].flag_setting;
11436 return true;
11437 }
11438
11439 shift_reg = NULL;
11440 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11441 if (shift_op != NULL)
11442 {
11443 if (shift_reg != NULL)
11444 {
11445 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11446 1, speed_p);
11447 if (speed_p)
11448 *cost += extra_cost->alu.arith_shift_reg;
11449 }
11450 else if (speed_p)
11451 *cost += extra_cost->alu.arith_shift;
11452 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11453 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11454 return true;
11455 }
11456
11457 if (speed_p)
11458 *cost += extra_cost->alu.arith;
11459 if (CONST_INT_P (XEXP (x, 1))
11460 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11461 {
11462 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11463 return true;
11464 }
11465 return false;
11466 }
11467
11468 /* Vector mode? */
11469
11470 *cost = LIBCALL_COST (2);
11471 return false;
11472 }
11473 return true;
11474
11475 case EQ:
11476 case GE:
11477 case GT:
11478 case LE:
11479 case LT:
11480 /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11481 vcle and vclt). */
11482 if (TARGET_NEON
11483 && TARGET_HARD_FLOAT
11484 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11485 && (XEXP (x, 1) == CONST0_RTX (mode)))
11486 {
11487 *cost = 0;
11488 return true;
11489 }
11490
11491 /* Fall through. */
11492 case NE:
11493 case LTU:
11494 case LEU:
11495 case GEU:
11496 case GTU:
11497 case ORDERED:
11498 case UNORDERED:
11499 case UNEQ:
11500 case UNLE:
11501 case UNLT:
11502 case UNGE:
11503 case UNGT:
11504 case LTGT:
11505 if (outer_code == SET)
11506 {
11507 /* Is it a store-flag operation? */
11508 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11509 && XEXP (x, 1) == const0_rtx)
11510 {
11511 /* Thumb also needs an IT insn. */
11512 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11513 return true;
11514 }
11515 if (XEXP (x, 1) == const0_rtx)
11516 {
11517 switch (code)
11518 {
11519 case LT:
11520 /* LSR Rd, Rn, #31. */
11521 if (speed_p)
11522 *cost += extra_cost->alu.shift;
11523 break;
11524
11525 case EQ:
11526 /* RSBS T1, Rn, #0
11527 ADC Rd, Rn, T1. */
11528
11529 case NE:
11530 /* SUBS T1, Rn, #1
11531 SBC Rd, Rn, T1. */
11532 *cost += COSTS_N_INSNS (1);
11533 break;
11534
11535 case LE:
11536 /* RSBS T1, Rn, Rn, LSR #31
11537 ADC Rd, Rn, T1. */
11538 *cost += COSTS_N_INSNS (1);
11539 if (speed_p)
11540 *cost += extra_cost->alu.arith_shift;
11541 break;
11542
11543 case GT:
11544 /* RSB Rd, Rn, Rn, ASR #1
11545 LSR Rd, Rd, #31. */
11546 *cost += COSTS_N_INSNS (1);
11547 if (speed_p)
11548 *cost += (extra_cost->alu.arith_shift
11549 + extra_cost->alu.shift);
11550 break;
11551
11552 case GE:
11553 /* ASR Rd, Rn, #31
11554 ADD Rd, Rn, #1. */
11555 *cost += COSTS_N_INSNS (1);
11556 if (speed_p)
11557 *cost += extra_cost->alu.shift;
11558 break;
11559
11560 default:
11561 /* Remaining cases are either meaningless or would take
11562 three insns anyway. */
11563 *cost = COSTS_N_INSNS (3);
11564 break;
11565 }
11566 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11567 return true;
11568 }
11569 else
11570 {
11571 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11572 if (CONST_INT_P (XEXP (x, 1))
11573 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11574 {
11575 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11576 return true;
11577 }
11578
11579 return false;
11580 }
11581 }
11582 /* Not directly inside a set. If it involves the condition code
11583 register it must be the condition for a branch, cond_exec or
11584 I_T_E operation. Since the comparison is performed elsewhere
11585 this is just the control part which has no additional
11586 cost. */
11587 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11588 && XEXP (x, 1) == const0_rtx)
11589 {
11590 *cost = 0;
11591 return true;
11592 }
11593 return false;
11594
11595 case ABS:
11596 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11597 && (mode == SFmode || !TARGET_VFP_SINGLE))
11598 {
11599 if (speed_p)
11600 *cost += extra_cost->fp[mode != SFmode].neg;
11601
11602 return false;
11603 }
11604 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11605 {
11606 *cost = LIBCALL_COST (1);
11607 return false;
11608 }
11609
11610 if (mode == SImode)
11611 {
11612 if (speed_p)
11613 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11614 return false;
11615 }
11616 /* Vector mode? */
11617 *cost = LIBCALL_COST (1);
11618 return false;
11619
11620 case SIGN_EXTEND:
11621 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11622 && MEM_P (XEXP (x, 0)))
11623 {
11624 if (mode == DImode)
11625 *cost += COSTS_N_INSNS (1);
11626
11627 if (!speed_p)
11628 return true;
11629
11630 if (GET_MODE (XEXP (x, 0)) == SImode)
11631 *cost += extra_cost->ldst.load;
11632 else
11633 *cost += extra_cost->ldst.load_sign_extend;
11634
11635 if (mode == DImode)
11636 *cost += extra_cost->alu.shift;
11637
11638 return true;
11639 }
11640
11641 /* Widening from less than 32-bits requires an extend operation. */
11642 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11643 {
11644 /* We have SXTB/SXTH. */
11645 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11646 if (speed_p)
11647 *cost += extra_cost->alu.extend;
11648 }
11649 else if (GET_MODE (XEXP (x, 0)) != SImode)
11650 {
11651 /* Needs two shifts. */
11652 *cost += COSTS_N_INSNS (1);
11653 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11654 if (speed_p)
11655 *cost += 2 * extra_cost->alu.shift;
11656 }
11657
11658 /* Widening beyond 32-bits requires one more insn. */
11659 if (mode == DImode)
11660 {
11661 *cost += COSTS_N_INSNS (1);
11662 if (speed_p)
11663 *cost += extra_cost->alu.shift;
11664 }
11665
11666 return true;
11667
11668 case ZERO_EXTEND:
11669 if ((arm_arch4
11670 || GET_MODE (XEXP (x, 0)) == SImode
11671 || GET_MODE (XEXP (x, 0)) == QImode)
11672 && MEM_P (XEXP (x, 0)))
11673 {
11674 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11675
11676 if (mode == DImode)
11677 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11678
11679 return true;
11680 }
11681
11682 /* Widening from less than 32-bits requires an extend operation. */
11683 if (GET_MODE (XEXP (x, 0)) == QImode)
11684 {
11685 /* UXTB can be a shorter instruction in Thumb2, but it might
11686 be slower than the AND Rd, Rn, #255 alternative. When
11687 optimizing for speed it should never be slower to use
11688 AND, and we don't really model 16-bit vs 32-bit insns
11689 here. */
11690 if (speed_p)
11691 *cost += extra_cost->alu.logical;
11692 }
11693 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11694 {
11695 /* We have UXTB/UXTH. */
11696 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11697 if (speed_p)
11698 *cost += extra_cost->alu.extend;
11699 }
11700 else if (GET_MODE (XEXP (x, 0)) != SImode)
11701 {
11702 /* Needs two shifts. It's marginally preferable to use
11703 shifts rather than two BIC instructions as the second
11704 shift may merge with a subsequent insn as a shifter
11705 op. */
11706 *cost = COSTS_N_INSNS (2);
11707 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11708 if (speed_p)
11709 *cost += 2 * extra_cost->alu.shift;
11710 }
11711
11712 /* Widening beyond 32-bits requires one more insn. */
11713 if (mode == DImode)
11714 {
11715 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11716 }
11717
11718 return true;
11719
11720 case CONST_INT:
11721 *cost = 0;
11722 /* CONST_INT has no mode, so we cannot tell for sure how many
11723 insns are really going to be needed. The best we can do is
11724 look at the value passed. If it fits in SImode, then assume
11725 that's the mode it will be used for. Otherwise assume it
11726 will be used in DImode. */
11727 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11728 mode = SImode;
11729 else
11730 mode = DImode;
11731
11732 /* Avoid blowing up in arm_gen_constant (). */
11733 if (!(outer_code == PLUS
11734 || outer_code == AND
11735 || outer_code == IOR
11736 || outer_code == XOR
11737 || outer_code == MINUS))
11738 outer_code = SET;
11739
11740 const_int_cost:
11741 if (mode == SImode)
11742 {
11743 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11744 INTVAL (x), NULL, NULL,
11745 0, 0));
11746 /* Extra costs? */
11747 }
11748 else
11749 {
11750 *cost += COSTS_N_INSNS (arm_gen_constant
11751 (outer_code, SImode, NULL,
11752 trunc_int_for_mode (INTVAL (x), SImode),
11753 NULL, NULL, 0, 0)
11754 + arm_gen_constant (outer_code, SImode, NULL,
11755 INTVAL (x) >> 32, NULL,
11756 NULL, 0, 0));
11757 /* Extra costs? */
11758 }
11759
11760 return true;
11761
11762 case CONST:
11763 case LABEL_REF:
11764 case SYMBOL_REF:
11765 if (speed_p)
11766 {
11767 if (arm_arch_thumb2 && !flag_pic)
11768 *cost += COSTS_N_INSNS (1);
11769 else
11770 *cost += extra_cost->ldst.load;
11771 }
11772 else
11773 *cost += COSTS_N_INSNS (1);
11774
11775 if (flag_pic)
11776 {
11777 *cost += COSTS_N_INSNS (1);
11778 if (speed_p)
11779 *cost += extra_cost->alu.arith;
11780 }
11781
11782 return true;
11783
11784 case CONST_FIXED:
11785 *cost = COSTS_N_INSNS (4);
11786 /* Fixme. */
11787 return true;
11788
11789 case CONST_DOUBLE:
11790 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11791 && (mode == SFmode || !TARGET_VFP_SINGLE))
11792 {
11793 if (vfp3_const_double_rtx (x))
11794 {
11795 if (speed_p)
11796 *cost += extra_cost->fp[mode == DFmode].fpconst;
11797 return true;
11798 }
11799
11800 if (speed_p)
11801 {
11802 if (mode == DFmode)
11803 *cost += extra_cost->ldst.loadd;
11804 else
11805 *cost += extra_cost->ldst.loadf;
11806 }
11807 else
11808 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11809
11810 return true;
11811 }
11812 *cost = COSTS_N_INSNS (4);
11813 return true;
11814
11815 case CONST_VECTOR:
11816 /* Fixme. */
11817 if (((TARGET_NEON && TARGET_HARD_FLOAT
11818 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11819 || TARGET_HAVE_MVE)
11820 && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11821 *cost = COSTS_N_INSNS (1);
11822 else
11823 *cost = COSTS_N_INSNS (4);
11824 return true;
11825
11826 case HIGH:
11827 case LO_SUM:
11828 /* When optimizing for size, we prefer constant pool entries to
11829 MOVW/MOVT pairs, so bump the cost of these slightly. */
11830 if (!speed_p)
11831 *cost += 1;
11832 return true;
11833
11834 case CLZ:
11835 if (speed_p)
11836 *cost += extra_cost->alu.clz;
11837 return false;
11838
11839 case SMIN:
11840 if (XEXP (x, 1) == const0_rtx)
11841 {
11842 if (speed_p)
11843 *cost += extra_cost->alu.log_shift;
11844 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11845 return true;
11846 }
11847 /* Fall through. */
11848 case SMAX:
11849 case UMIN:
11850 case UMAX:
11851 *cost += COSTS_N_INSNS (1);
11852 return false;
11853
11854 case TRUNCATE:
11855 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11856 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11857 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11858 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11859 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11860 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11861 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11862 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11863 == ZERO_EXTEND))))
11864 {
11865 if (speed_p)
11866 *cost += extra_cost->mult[1].extend;
11867 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11868 ZERO_EXTEND, 0, speed_p)
11869 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11870 ZERO_EXTEND, 0, speed_p));
11871 return true;
11872 }
11873 *cost = LIBCALL_COST (1);
11874 return false;
11875
11876 case UNSPEC_VOLATILE:
11877 case UNSPEC:
11878 return arm_unspec_cost (x, outer_code, speed_p, cost);
11879
11880 case PC:
11881 /* Reading the PC is like reading any other register. Writing it
11882 is more expensive, but we take that into account elsewhere. */
11883 *cost = 0;
11884 return true;
11885
11886 case ZERO_EXTRACT:
11887 /* TODO: Simple zero_extract of bottom bits using AND. */
11888 /* Fall through. */
11889 case SIGN_EXTRACT:
11890 if (arm_arch6
11891 && mode == SImode
11892 && CONST_INT_P (XEXP (x, 1))
11893 && CONST_INT_P (XEXP (x, 2)))
11894 {
11895 if (speed_p)
11896 *cost += extra_cost->alu.bfx;
11897 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11898 return true;
11899 }
11900 /* Without UBFX/SBFX, need to resort to shift operations. */
11901 *cost += COSTS_N_INSNS (1);
11902 if (speed_p)
11903 *cost += 2 * extra_cost->alu.shift;
11904 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11905 return true;
11906
11907 case FLOAT_EXTEND:
11908 if (TARGET_HARD_FLOAT)
11909 {
11910 if (speed_p)
11911 *cost += extra_cost->fp[mode == DFmode].widen;
11912 if (!TARGET_VFP5
11913 && GET_MODE (XEXP (x, 0)) == HFmode)
11914 {
11915 /* Pre v8, widening HF->DF is a two-step process, first
11916 widening to SFmode. */
11917 *cost += COSTS_N_INSNS (1);
11918 if (speed_p)
11919 *cost += extra_cost->fp[0].widen;
11920 }
11921 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11922 return true;
11923 }
11924
11925 *cost = LIBCALL_COST (1);
11926 return false;
11927
11928 case FLOAT_TRUNCATE:
11929 if (TARGET_HARD_FLOAT)
11930 {
11931 if (speed_p)
11932 *cost += extra_cost->fp[mode == DFmode].narrow;
11933 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11934 return true;
11935 /* Vector modes? */
11936 }
11937 *cost = LIBCALL_COST (1);
11938 return false;
11939
11940 case FMA:
11941 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11942 {
11943 rtx op0 = XEXP (x, 0);
11944 rtx op1 = XEXP (x, 1);
11945 rtx op2 = XEXP (x, 2);
11946
11947
11948 /* vfms or vfnma. */
11949 if (GET_CODE (op0) == NEG)
11950 op0 = XEXP (op0, 0);
11951
11952 /* vfnms or vfnma. */
11953 if (GET_CODE (op2) == NEG)
11954 op2 = XEXP (op2, 0);
11955
11956 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11957 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11958 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11959
11960 if (speed_p)
11961 *cost += extra_cost->fp[mode ==DFmode].fma;
11962
11963 return true;
11964 }
11965
11966 *cost = LIBCALL_COST (3);
11967 return false;
11968
11969 case FIX:
11970 case UNSIGNED_FIX:
11971 if (TARGET_HARD_FLOAT)
11972 {
11973 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11974 a vcvt fixed-point conversion. */
11975 if (code == FIX && mode == SImode
11976 && GET_CODE (XEXP (x, 0)) == FIX
11977 && GET_MODE (XEXP (x, 0)) == SFmode
11978 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11979 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11980 > 0)
11981 {
11982 if (speed_p)
11983 *cost += extra_cost->fp[0].toint;
11984
11985 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11986 code, 0, speed_p);
11987 return true;
11988 }
11989
11990 if (GET_MODE_CLASS (mode) == MODE_INT)
11991 {
11992 mode = GET_MODE (XEXP (x, 0));
11993 if (speed_p)
11994 *cost += extra_cost->fp[mode == DFmode].toint;
11995 /* Strip of the 'cost' of rounding towards zero. */
11996 if (GET_CODE (XEXP (x, 0)) == FIX)
11997 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11998 0, speed_p);
11999 else
12000 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
12001 /* ??? Increase the cost to deal with transferring from
12002 FP -> CORE registers? */
12003 return true;
12004 }
12005 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
12006 && TARGET_VFP5)
12007 {
12008 if (speed_p)
12009 *cost += extra_cost->fp[mode == DFmode].roundint;
12010 return false;
12011 }
12012 /* Vector costs? */
12013 }
12014 *cost = LIBCALL_COST (1);
12015 return false;
12016
12017 case FLOAT:
12018 case UNSIGNED_FLOAT:
12019 if (TARGET_HARD_FLOAT)
12020 {
12021 /* ??? Increase the cost to deal with transferring from CORE
12022 -> FP registers? */
12023 if (speed_p)
12024 *cost += extra_cost->fp[mode == DFmode].fromint;
12025 return false;
12026 }
12027 *cost = LIBCALL_COST (1);
12028 return false;
12029
12030 case CALL:
12031 return true;
12032
12033 case ASM_OPERANDS:
12034 {
12035 /* Just a guess. Guess number of instructions in the asm
12036 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
12037 though (see PR60663). */
12038 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
12039 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
12040
12041 *cost = COSTS_N_INSNS (asm_length + num_operands);
12042 return true;
12043 }
12044 default:
12045 if (mode != VOIDmode)
12046 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
12047 else
12048 *cost = COSTS_N_INSNS (4); /* Who knows? */
12049 return false;
12050 }
12051 }
12052
12053 #undef HANDLE_NARROW_SHIFT_ARITH
12054
12055 /* RTX costs entry point. */
12056
12057 static bool
12058 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
12059 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
12060 {
12061 bool result;
12062 int code = GET_CODE (x);
12063 gcc_assert (current_tune->insn_extra_cost);
12064
12065 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
12066 (enum rtx_code) outer_code,
12067 current_tune->insn_extra_cost,
12068 total, speed);
12069
12070 if (dump_file && arm_verbose_cost)
12071 {
12072 print_rtl_single (dump_file, x);
12073 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
12074 *total, result ? "final" : "partial");
12075 }
12076 return result;
12077 }
12078
12079 static int
12080 arm_insn_cost (rtx_insn *insn, bool speed)
12081 {
12082 int cost;
12083
12084 /* Don't cost a simple reg-reg move at a full insn cost: such moves
12085 will likely disappear during register allocation. */
12086 if (!reload_completed
12087 && GET_CODE (PATTERN (insn)) == SET
12088 && REG_P (SET_DEST (PATTERN (insn)))
12089 && REG_P (SET_SRC (PATTERN (insn))))
12090 return 2;
12091 cost = pattern_cost (PATTERN (insn), speed);
12092 /* If the cost is zero, then it's likely a complex insn. We don't want the
12093 cost of these to be less than something we know about. */
12094 return cost ? cost : COSTS_N_INSNS (2);
12095 }
12096
12097 /* All address computations that can be done are free, but rtx cost returns
12098 the same for practically all of them. So we weight the different types
12099 of address here in the order (most pref first):
12100 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
12101 static inline int
12102 arm_arm_address_cost (rtx x)
12103 {
12104 enum rtx_code c = GET_CODE (x);
12105
12106 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
12107 return 0;
12108 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
12109 return 10;
12110
12111 if (c == PLUS)
12112 {
12113 if (CONST_INT_P (XEXP (x, 1)))
12114 return 2;
12115
12116 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
12117 return 3;
12118
12119 return 4;
12120 }
12121
12122 return 6;
12123 }
12124
12125 static inline int
12126 arm_thumb_address_cost (rtx x)
12127 {
12128 enum rtx_code c = GET_CODE (x);
12129
12130 if (c == REG)
12131 return 1;
12132 if (c == PLUS
12133 && REG_P (XEXP (x, 0))
12134 && CONST_INT_P (XEXP (x, 1)))
12135 return 1;
12136
12137 return 2;
12138 }
12139
12140 static int
12141 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
12142 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
12143 {
12144 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
12145 }
12146
12147 /* Adjust cost hook for XScale. */
12148 static bool
12149 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12150 int * cost)
12151 {
12152 /* Some true dependencies can have a higher cost depending
12153 on precisely how certain input operands are used. */
12154 if (dep_type == 0
12155 && recog_memoized (insn) >= 0
12156 && recog_memoized (dep) >= 0)
12157 {
12158 int shift_opnum = get_attr_shift (insn);
12159 enum attr_type attr_type = get_attr_type (dep);
12160
12161 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12162 operand for INSN. If we have a shifted input operand and the
12163 instruction we depend on is another ALU instruction, then we may
12164 have to account for an additional stall. */
12165 if (shift_opnum != 0
12166 && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
12167 || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
12168 || attr_type == TYPE_ALUS_SHIFT_IMM
12169 || attr_type == TYPE_LOGIC_SHIFT_IMM
12170 || attr_type == TYPE_LOGICS_SHIFT_IMM
12171 || attr_type == TYPE_ALU_SHIFT_REG
12172 || attr_type == TYPE_ALUS_SHIFT_REG
12173 || attr_type == TYPE_LOGIC_SHIFT_REG
12174 || attr_type == TYPE_LOGICS_SHIFT_REG
12175 || attr_type == TYPE_MOV_SHIFT
12176 || attr_type == TYPE_MVN_SHIFT
12177 || attr_type == TYPE_MOV_SHIFT_REG
12178 || attr_type == TYPE_MVN_SHIFT_REG))
12179 {
12180 rtx shifted_operand;
12181 int opno;
12182
12183 /* Get the shifted operand. */
12184 extract_insn (insn);
12185 shifted_operand = recog_data.operand[shift_opnum];
12186
12187 /* Iterate over all the operands in DEP. If we write an operand
12188 that overlaps with SHIFTED_OPERAND, then we have increase the
12189 cost of this dependency. */
12190 extract_insn (dep);
12191 preprocess_constraints (dep);
12192 for (opno = 0; opno < recog_data.n_operands; opno++)
12193 {
12194 /* We can ignore strict inputs. */
12195 if (recog_data.operand_type[opno] == OP_IN)
12196 continue;
12197
12198 if (reg_overlap_mentioned_p (recog_data.operand[opno],
12199 shifted_operand))
12200 {
12201 *cost = 2;
12202 return false;
12203 }
12204 }
12205 }
12206 }
12207 return true;
12208 }
12209
12210 /* Adjust cost hook for Cortex A9. */
12211 static bool
12212 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12213 int * cost)
12214 {
12215 switch (dep_type)
12216 {
12217 case REG_DEP_ANTI:
12218 *cost = 0;
12219 return false;
12220
12221 case REG_DEP_TRUE:
12222 case REG_DEP_OUTPUT:
12223 if (recog_memoized (insn) >= 0
12224 && recog_memoized (dep) >= 0)
12225 {
12226 if (GET_CODE (PATTERN (insn)) == SET)
12227 {
12228 if (GET_MODE_CLASS
12229 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
12230 || GET_MODE_CLASS
12231 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
12232 {
12233 enum attr_type attr_type_insn = get_attr_type (insn);
12234 enum attr_type attr_type_dep = get_attr_type (dep);
12235
12236 /* By default all dependencies of the form
12237 s0 = s0 <op> s1
12238 s0 = s0 <op> s2
12239 have an extra latency of 1 cycle because
12240 of the input and output dependency in this
12241 case. However this gets modeled as an true
12242 dependency and hence all these checks. */
12243 if (REG_P (SET_DEST (PATTERN (insn)))
12244 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
12245 {
12246 /* FMACS is a special case where the dependent
12247 instruction can be issued 3 cycles before
12248 the normal latency in case of an output
12249 dependency. */
12250 if ((attr_type_insn == TYPE_FMACS
12251 || attr_type_insn == TYPE_FMACD)
12252 && (attr_type_dep == TYPE_FMACS
12253 || attr_type_dep == TYPE_FMACD))
12254 {
12255 if (dep_type == REG_DEP_OUTPUT)
12256 *cost = insn_default_latency (dep) - 3;
12257 else
12258 *cost = insn_default_latency (dep);
12259 return false;
12260 }
12261 else
12262 {
12263 if (dep_type == REG_DEP_OUTPUT)
12264 *cost = insn_default_latency (dep) + 1;
12265 else
12266 *cost = insn_default_latency (dep);
12267 }
12268 return false;
12269 }
12270 }
12271 }
12272 }
12273 break;
12274
12275 default:
12276 gcc_unreachable ();
12277 }
12278
12279 return true;
12280 }
12281
12282 /* Adjust cost hook for FA726TE. */
12283 static bool
12284 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12285 int * cost)
12286 {
12287 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12288 have penalty of 3. */
12289 if (dep_type == REG_DEP_TRUE
12290 && recog_memoized (insn) >= 0
12291 && recog_memoized (dep) >= 0
12292 && get_attr_conds (dep) == CONDS_SET)
12293 {
12294 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12295 if (get_attr_conds (insn) == CONDS_USE
12296 && get_attr_type (insn) != TYPE_BRANCH)
12297 {
12298 *cost = 3;
12299 return false;
12300 }
12301
12302 if (GET_CODE (PATTERN (insn)) == COND_EXEC
12303 || get_attr_conds (insn) == CONDS_USE)
12304 {
12305 *cost = 0;
12306 return false;
12307 }
12308 }
12309
12310 return true;
12311 }
12312
12313 /* Implement TARGET_REGISTER_MOVE_COST.
12314
12315 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12316 it is typically more expensive than a single memory access. We set
12317 the cost to less than two memory accesses so that floating
12318 point to integer conversion does not go through memory. */
12319
12320 int
12321 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12322 reg_class_t from, reg_class_t to)
12323 {
12324 if (TARGET_32BIT)
12325 {
12326 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12327 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12328 return 15;
12329 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12330 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12331 return 4;
12332 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12333 return 20;
12334 else
12335 return 2;
12336 }
12337 else
12338 {
12339 if (from == HI_REGS || to == HI_REGS)
12340 return 4;
12341 else
12342 return 2;
12343 }
12344 }
12345
12346 /* Implement TARGET_MEMORY_MOVE_COST. */
12347
12348 int
12349 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12350 bool in ATTRIBUTE_UNUSED)
12351 {
12352 if (TARGET_32BIT)
12353 return 10;
12354 else
12355 {
12356 if (GET_MODE_SIZE (mode) < 4)
12357 return 8;
12358 else
12359 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12360 }
12361 }
12362
12363 /* Vectorizer cost model implementation. */
12364
12365 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12366 static int
12367 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12368 tree vectype,
12369 int misalign ATTRIBUTE_UNUSED)
12370 {
12371 unsigned elements;
12372
12373 switch (type_of_cost)
12374 {
12375 case scalar_stmt:
12376 return current_tune->vec_costs->scalar_stmt_cost;
12377
12378 case scalar_load:
12379 return current_tune->vec_costs->scalar_load_cost;
12380
12381 case scalar_store:
12382 return current_tune->vec_costs->scalar_store_cost;
12383
12384 case vector_stmt:
12385 return current_tune->vec_costs->vec_stmt_cost;
12386
12387 case vector_load:
12388 return current_tune->vec_costs->vec_align_load_cost;
12389
12390 case vector_store:
12391 return current_tune->vec_costs->vec_store_cost;
12392
12393 case vec_to_scalar:
12394 return current_tune->vec_costs->vec_to_scalar_cost;
12395
12396 case scalar_to_vec:
12397 return current_tune->vec_costs->scalar_to_vec_cost;
12398
12399 case unaligned_load:
12400 case vector_gather_load:
12401 return current_tune->vec_costs->vec_unalign_load_cost;
12402
12403 case unaligned_store:
12404 case vector_scatter_store:
12405 return current_tune->vec_costs->vec_unalign_store_cost;
12406
12407 case cond_branch_taken:
12408 return current_tune->vec_costs->cond_taken_branch_cost;
12409
12410 case cond_branch_not_taken:
12411 return current_tune->vec_costs->cond_not_taken_branch_cost;
12412
12413 case vec_perm:
12414 case vec_promote_demote:
12415 return current_tune->vec_costs->vec_stmt_cost;
12416
12417 case vec_construct:
12418 elements = TYPE_VECTOR_SUBPARTS (vectype);
12419 return elements / 2 + 1;
12420
12421 default:
12422 gcc_unreachable ();
12423 }
12424 }
12425
12426 /* Return true if and only if this insn can dual-issue only as older. */
12427 static bool
12428 cortexa7_older_only (rtx_insn *insn)
12429 {
12430 if (recog_memoized (insn) < 0)
12431 return false;
12432
12433 switch (get_attr_type (insn))
12434 {
12435 case TYPE_ALU_DSP_REG:
12436 case TYPE_ALU_SREG:
12437 case TYPE_ALUS_SREG:
12438 case TYPE_LOGIC_REG:
12439 case TYPE_LOGICS_REG:
12440 case TYPE_ADC_REG:
12441 case TYPE_ADCS_REG:
12442 case TYPE_ADR:
12443 case TYPE_BFM:
12444 case TYPE_REV:
12445 case TYPE_MVN_REG:
12446 case TYPE_SHIFT_IMM:
12447 case TYPE_SHIFT_REG:
12448 case TYPE_LOAD_BYTE:
12449 case TYPE_LOAD_4:
12450 case TYPE_STORE_4:
12451 case TYPE_FFARITHS:
12452 case TYPE_FADDS:
12453 case TYPE_FFARITHD:
12454 case TYPE_FADDD:
12455 case TYPE_FMOV:
12456 case TYPE_F_CVT:
12457 case TYPE_FCMPS:
12458 case TYPE_FCMPD:
12459 case TYPE_FCONSTS:
12460 case TYPE_FCONSTD:
12461 case TYPE_FMULS:
12462 case TYPE_FMACS:
12463 case TYPE_FMULD:
12464 case TYPE_FMACD:
12465 case TYPE_FDIVS:
12466 case TYPE_FDIVD:
12467 case TYPE_F_MRC:
12468 case TYPE_F_MRRC:
12469 case TYPE_F_FLAG:
12470 case TYPE_F_LOADS:
12471 case TYPE_F_STORES:
12472 return true;
12473 default:
12474 return false;
12475 }
12476 }
12477
12478 /* Return true if and only if this insn can dual-issue as younger. */
12479 static bool
12480 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12481 {
12482 if (recog_memoized (insn) < 0)
12483 {
12484 if (verbose > 5)
12485 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12486 return false;
12487 }
12488
12489 switch (get_attr_type (insn))
12490 {
12491 case TYPE_ALU_IMM:
12492 case TYPE_ALUS_IMM:
12493 case TYPE_LOGIC_IMM:
12494 case TYPE_LOGICS_IMM:
12495 case TYPE_EXTEND:
12496 case TYPE_MVN_IMM:
12497 case TYPE_MOV_IMM:
12498 case TYPE_MOV_REG:
12499 case TYPE_MOV_SHIFT:
12500 case TYPE_MOV_SHIFT_REG:
12501 case TYPE_BRANCH:
12502 case TYPE_CALL:
12503 return true;
12504 default:
12505 return false;
12506 }
12507 }
12508
12509
12510 /* Look for an instruction that can dual issue only as an older
12511 instruction, and move it in front of any instructions that can
12512 dual-issue as younger, while preserving the relative order of all
12513 other instructions in the ready list. This is a hueuristic to help
12514 dual-issue in later cycles, by postponing issue of more flexible
12515 instructions. This heuristic may affect dual issue opportunities
12516 in the current cycle. */
12517 static void
12518 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12519 int *n_readyp, int clock)
12520 {
12521 int i;
12522 int first_older_only = -1, first_younger = -1;
12523
12524 if (verbose > 5)
12525 fprintf (file,
12526 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12527 clock,
12528 *n_readyp);
12529
12530 /* Traverse the ready list from the head (the instruction to issue
12531 first), and looking for the first instruction that can issue as
12532 younger and the first instruction that can dual-issue only as
12533 older. */
12534 for (i = *n_readyp - 1; i >= 0; i--)
12535 {
12536 rtx_insn *insn = ready[i];
12537 if (cortexa7_older_only (insn))
12538 {
12539 first_older_only = i;
12540 if (verbose > 5)
12541 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12542 break;
12543 }
12544 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12545 first_younger = i;
12546 }
12547
12548 /* Nothing to reorder because either no younger insn found or insn
12549 that can dual-issue only as older appears before any insn that
12550 can dual-issue as younger. */
12551 if (first_younger == -1)
12552 {
12553 if (verbose > 5)
12554 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12555 return;
12556 }
12557
12558 /* Nothing to reorder because no older-only insn in the ready list. */
12559 if (first_older_only == -1)
12560 {
12561 if (verbose > 5)
12562 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12563 return;
12564 }
12565
12566 /* Move first_older_only insn before first_younger. */
12567 if (verbose > 5)
12568 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12569 INSN_UID(ready [first_older_only]),
12570 INSN_UID(ready [first_younger]));
12571 rtx_insn *first_older_only_insn = ready [first_older_only];
12572 for (i = first_older_only; i < first_younger; i++)
12573 {
12574 ready[i] = ready[i+1];
12575 }
12576
12577 ready[i] = first_older_only_insn;
12578 return;
12579 }
12580
12581 /* Implement TARGET_SCHED_REORDER. */
12582 static int
12583 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12584 int clock)
12585 {
12586 switch (arm_tune)
12587 {
12588 case TARGET_CPU_cortexa7:
12589 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12590 break;
12591 default:
12592 /* Do nothing for other cores. */
12593 break;
12594 }
12595
12596 return arm_issue_rate ();
12597 }
12598
12599 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12600 It corrects the value of COST based on the relationship between
12601 INSN and DEP through the dependence LINK. It returns the new
12602 value. There is a per-core adjust_cost hook to adjust scheduler costs
12603 and the per-core hook can choose to completely override the generic
12604 adjust_cost function. Only put bits of code into arm_adjust_cost that
12605 are common across all cores. */
12606 static int
12607 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12608 unsigned int)
12609 {
12610 rtx i_pat, d_pat;
12611
12612 /* When generating Thumb-1 code, we want to place flag-setting operations
12613 close to a conditional branch which depends on them, so that we can
12614 omit the comparison. */
12615 if (TARGET_THUMB1
12616 && dep_type == 0
12617 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12618 && recog_memoized (dep) >= 0
12619 && get_attr_conds (dep) == CONDS_SET)
12620 return 0;
12621
12622 if (current_tune->sched_adjust_cost != NULL)
12623 {
12624 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12625 return cost;
12626 }
12627
12628 /* XXX Is this strictly true? */
12629 if (dep_type == REG_DEP_ANTI
12630 || dep_type == REG_DEP_OUTPUT)
12631 return 0;
12632
12633 /* Call insns don't incur a stall, even if they follow a load. */
12634 if (dep_type == 0
12635 && CALL_P (insn))
12636 return 1;
12637
12638 if ((i_pat = single_set (insn)) != NULL
12639 && MEM_P (SET_SRC (i_pat))
12640 && (d_pat = single_set (dep)) != NULL
12641 && MEM_P (SET_DEST (d_pat)))
12642 {
12643 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12644 /* This is a load after a store, there is no conflict if the load reads
12645 from a cached area. Assume that loads from the stack, and from the
12646 constant pool are cached, and that others will miss. This is a
12647 hack. */
12648
12649 if ((SYMBOL_REF_P (src_mem)
12650 && CONSTANT_POOL_ADDRESS_P (src_mem))
12651 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12652 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12653 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12654 return 1;
12655 }
12656
12657 return cost;
12658 }
12659
12660 int
12661 arm_max_conditional_execute (void)
12662 {
12663 return max_insns_skipped;
12664 }
12665
12666 static int
12667 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12668 {
12669 if (TARGET_32BIT)
12670 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12671 else
12672 return (optimize > 0) ? 2 : 0;
12673 }
12674
12675 static int
12676 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12677 {
12678 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12679 }
12680
12681 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12682 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12683 sequences of non-executed instructions in IT blocks probably take the same
12684 amount of time as executed instructions (and the IT instruction itself takes
12685 space in icache). This function was experimentally determined to give good
12686 results on a popular embedded benchmark. */
12687
12688 static int
12689 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12690 {
12691 return (TARGET_32BIT && speed_p) ? 1
12692 : arm_default_branch_cost (speed_p, predictable_p);
12693 }
12694
12695 static int
12696 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12697 {
12698 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12699 }
12700
12701 static bool fp_consts_inited = false;
12702
12703 static REAL_VALUE_TYPE value_fp0;
12704
12705 static void
12706 init_fp_table (void)
12707 {
12708 REAL_VALUE_TYPE r;
12709
12710 r = REAL_VALUE_ATOF ("0", DFmode);
12711 value_fp0 = r;
12712 fp_consts_inited = true;
12713 }
12714
12715 /* Return TRUE if rtx X is a valid immediate FP constant. */
12716 int
12717 arm_const_double_rtx (rtx x)
12718 {
12719 const REAL_VALUE_TYPE *r;
12720
12721 if (!fp_consts_inited)
12722 init_fp_table ();
12723
12724 r = CONST_DOUBLE_REAL_VALUE (x);
12725 if (REAL_VALUE_MINUS_ZERO (*r))
12726 return 0;
12727
12728 if (real_equal (r, &value_fp0))
12729 return 1;
12730
12731 return 0;
12732 }
12733
12734 /* VFPv3 has a fairly wide range of representable immediates, formed from
12735 "quarter-precision" floating-point values. These can be evaluated using this
12736 formula (with ^ for exponentiation):
12737
12738 -1^s * n * 2^-r
12739
12740 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12741 16 <= n <= 31 and 0 <= r <= 7.
12742
12743 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12744
12745 - A (most-significant) is the sign bit.
12746 - BCD are the exponent (encoded as r XOR 3).
12747 - EFGH are the mantissa (encoded as n - 16).
12748 */
12749
12750 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12751 fconst[sd] instruction, or -1 if X isn't suitable. */
12752 static int
12753 vfp3_const_double_index (rtx x)
12754 {
12755 REAL_VALUE_TYPE r, m;
12756 int sign, exponent;
12757 unsigned HOST_WIDE_INT mantissa, mant_hi;
12758 unsigned HOST_WIDE_INT mask;
12759 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12760 bool fail;
12761
12762 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12763 return -1;
12764
12765 r = *CONST_DOUBLE_REAL_VALUE (x);
12766
12767 /* We can't represent these things, so detect them first. */
12768 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12769 return -1;
12770
12771 /* Extract sign, exponent and mantissa. */
12772 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12773 r = real_value_abs (&r);
12774 exponent = REAL_EXP (&r);
12775 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12776 highest (sign) bit, with a fixed binary point at bit point_pos.
12777 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12778 bits for the mantissa, this may fail (low bits would be lost). */
12779 real_ldexp (&m, &r, point_pos - exponent);
12780 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12781 mantissa = w.elt (0);
12782 mant_hi = w.elt (1);
12783
12784 /* If there are bits set in the low part of the mantissa, we can't
12785 represent this value. */
12786 if (mantissa != 0)
12787 return -1;
12788
12789 /* Now make it so that mantissa contains the most-significant bits, and move
12790 the point_pos to indicate that the least-significant bits have been
12791 discarded. */
12792 point_pos -= HOST_BITS_PER_WIDE_INT;
12793 mantissa = mant_hi;
12794
12795 /* We can permit four significant bits of mantissa only, plus a high bit
12796 which is always 1. */
12797 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12798 if ((mantissa & mask) != 0)
12799 return -1;
12800
12801 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12802 mantissa >>= point_pos - 5;
12803
12804 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12805 floating-point immediate zero with Neon using an integer-zero load, but
12806 that case is handled elsewhere.) */
12807 if (mantissa == 0)
12808 return -1;
12809
12810 gcc_assert (mantissa >= 16 && mantissa <= 31);
12811
12812 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12813 normalized significands are in the range [1, 2). (Our mantissa is shifted
12814 left 4 places at this point relative to normalized IEEE754 values). GCC
12815 internally uses [0.5, 1) (see real.cc), so the exponent returned from
12816 REAL_EXP must be altered. */
12817 exponent = 5 - exponent;
12818
12819 if (exponent < 0 || exponent > 7)
12820 return -1;
12821
12822 /* Sign, mantissa and exponent are now in the correct form to plug into the
12823 formula described in the comment above. */
12824 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12825 }
12826
12827 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12828 int
12829 vfp3_const_double_rtx (rtx x)
12830 {
12831 if (!TARGET_VFP3)
12832 return 0;
12833
12834 return vfp3_const_double_index (x) != -1;
12835 }
12836
12837 /* Recognize immediates which can be used in various Neon and MVE instructions.
12838 Legal immediates are described by the following table (for VMVN variants, the
12839 bitwise inverse of the constant shown is recognized. In either case, VMOV
12840 is output and the correct instruction to use for a given constant is chosen
12841 by the assembler). The constant shown is replicated across all elements of
12842 the destination vector.
12843
12844 insn elems variant constant (binary)
12845 ---- ----- ------- -----------------
12846 vmov i32 0 00000000 00000000 00000000 abcdefgh
12847 vmov i32 1 00000000 00000000 abcdefgh 00000000
12848 vmov i32 2 00000000 abcdefgh 00000000 00000000
12849 vmov i32 3 abcdefgh 00000000 00000000 00000000
12850 vmov i16 4 00000000 abcdefgh
12851 vmov i16 5 abcdefgh 00000000
12852 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12853 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12854 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12855 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12856 vmvn i16 10 00000000 abcdefgh
12857 vmvn i16 11 abcdefgh 00000000
12858 vmov i32 12 00000000 00000000 abcdefgh 11111111
12859 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12860 vmov i32 14 00000000 abcdefgh 11111111 11111111
12861 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12862 vmov i8 16 abcdefgh
12863 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12864 eeeeeeee ffffffff gggggggg hhhhhhhh
12865 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12866 vmov f32 19 00000000 00000000 00000000 00000000
12867
12868 For case 18, B = !b. Representable values are exactly those accepted by
12869 vfp3_const_double_index, but are output as floating-point numbers rather
12870 than indices.
12871
12872 For case 19, we will change it to vmov.i32 when assembling.
12873
12874 Variants 0-5 (inclusive) may also be used as immediates for the second
12875 operand of VORR/VBIC instructions.
12876
12877 The INVERSE argument causes the bitwise inverse of the given operand to be
12878 recognized instead (used for recognizing legal immediates for the VAND/VORN
12879 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12880 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12881 output, rather than the real insns vbic/vorr).
12882
12883 INVERSE makes no difference to the recognition of float vectors.
12884
12885 The return value is the variant of immediate as shown in the above table, or
12886 -1 if the given value doesn't match any of the listed patterns.
12887 */
12888 static int
12889 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12890 rtx *modconst, int *elementwidth)
12891 {
12892 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12893 matches = 1; \
12894 for (i = 0; i < idx; i += (STRIDE)) \
12895 if (!(TEST)) \
12896 matches = 0; \
12897 if (matches) \
12898 { \
12899 immtype = (CLASS); \
12900 elsize = (ELSIZE); \
12901 break; \
12902 }
12903
12904 unsigned int i, elsize = 0, idx = 0, n_elts;
12905 unsigned int innersize;
12906 unsigned char bytes[16] = {};
12907 int immtype = -1, matches;
12908 unsigned int invmask = inverse ? 0xff : 0;
12909 bool vector = GET_CODE (op) == CONST_VECTOR;
12910
12911 if (vector)
12912 n_elts = CONST_VECTOR_NUNITS (op);
12913 else
12914 {
12915 n_elts = 1;
12916 gcc_assert (mode != VOIDmode);
12917 }
12918
12919 innersize = GET_MODE_UNIT_SIZE (mode);
12920
12921 /* Only support 128-bit vectors for MVE. */
12922 if (TARGET_HAVE_MVE
12923 && (!vector
12924 || (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
12925 || n_elts * innersize != 16))
12926 return -1;
12927
12928 if (!TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
12929 return -1;
12930
12931 /* Vectors of float constants. */
12932 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12933 {
12934 rtx el0 = CONST_VECTOR_ELT (op, 0);
12935
12936 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12937 return -1;
12938
12939 /* FP16 vectors cannot be represented. */
12940 if (GET_MODE_INNER (mode) == HFmode)
12941 return -1;
12942
12943 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12944 are distinct in this context. */
12945 if (!const_vec_duplicate_p (op))
12946 return -1;
12947
12948 if (modconst)
12949 *modconst = CONST_VECTOR_ELT (op, 0);
12950
12951 if (elementwidth)
12952 *elementwidth = 0;
12953
12954 if (el0 == CONST0_RTX (GET_MODE (el0)))
12955 return 19;
12956 else
12957 return 18;
12958 }
12959
12960 /* The tricks done in the code below apply for little-endian vector layout.
12961 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12962 FIXME: Implement logic for big-endian vectors. */
12963 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12964 return -1;
12965
12966 /* Splat vector constant out into a byte vector. */
12967 for (i = 0; i < n_elts; i++)
12968 {
12969 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12970 unsigned HOST_WIDE_INT elpart;
12971
12972 gcc_assert (CONST_INT_P (el));
12973 elpart = INTVAL (el);
12974
12975 for (unsigned int byte = 0; byte < innersize; byte++)
12976 {
12977 bytes[idx++] = (elpart & 0xff) ^ invmask;
12978 elpart >>= BITS_PER_UNIT;
12979 }
12980 }
12981
12982 /* Sanity check. */
12983 gcc_assert (idx == GET_MODE_SIZE (mode));
12984
12985 do
12986 {
12987 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12988 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12989
12990 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12991 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12992
12993 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12994 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12995
12996 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12997 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12998
12999 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
13000
13001 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
13002
13003 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
13004 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13005
13006 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13007 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13008
13009 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
13010 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13011
13012 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
13013 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
13014
13015 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
13016
13017 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
13018
13019 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13020 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13021
13022 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13023 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13024
13025 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
13026 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13027
13028 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
13029 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13030
13031 CHECK (1, 8, 16, bytes[i] == bytes[0]);
13032
13033 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
13034 && bytes[i] == bytes[(i + 8) % idx]);
13035 }
13036 while (0);
13037
13038 if (immtype == -1)
13039 return -1;
13040
13041 if (elementwidth)
13042 *elementwidth = elsize;
13043
13044 if (modconst)
13045 {
13046 unsigned HOST_WIDE_INT imm = 0;
13047
13048 /* Un-invert bytes of recognized vector, if necessary. */
13049 if (invmask != 0)
13050 for (i = 0; i < idx; i++)
13051 bytes[i] ^= invmask;
13052
13053 if (immtype == 17)
13054 {
13055 /* FIXME: Broken on 32-bit H_W_I hosts. */
13056 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
13057
13058 for (i = 0; i < 8; i++)
13059 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
13060 << (i * BITS_PER_UNIT);
13061
13062 *modconst = GEN_INT (imm);
13063 }
13064 else
13065 {
13066 unsigned HOST_WIDE_INT imm = 0;
13067
13068 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
13069 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
13070
13071 *modconst = GEN_INT (imm);
13072 }
13073 }
13074
13075 return immtype;
13076 #undef CHECK
13077 }
13078
13079 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13080 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
13081 (or zero for float elements), and a modified constant (whatever should be
13082 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
13083 modified to "simd_immediate_valid_for_move" as this function will be used
13084 both by neon and mve. */
13085 int
13086 simd_immediate_valid_for_move (rtx op, machine_mode mode,
13087 rtx *modconst, int *elementwidth)
13088 {
13089 rtx tmpconst;
13090 int tmpwidth;
13091 int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
13092
13093 if (retval == -1)
13094 return 0;
13095
13096 if (modconst)
13097 *modconst = tmpconst;
13098
13099 if (elementwidth)
13100 *elementwidth = tmpwidth;
13101
13102 return 1;
13103 }
13104
13105 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
13106 the immediate is valid, write a constant suitable for using as an operand
13107 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13108 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
13109
13110 int
13111 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
13112 rtx *modconst, int *elementwidth)
13113 {
13114 rtx tmpconst;
13115 int tmpwidth;
13116 int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
13117
13118 if (retval < 0 || retval > 5)
13119 return 0;
13120
13121 if (modconst)
13122 *modconst = tmpconst;
13123
13124 if (elementwidth)
13125 *elementwidth = tmpwidth;
13126
13127 return 1;
13128 }
13129
13130 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
13131 the immediate is valid, write a constant suitable for using as an operand
13132 to VSHR/VSHL to *MODCONST and the corresponding element width to
13133 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13134 because they have different limitations. */
13135
13136 int
13137 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
13138 rtx *modconst, int *elementwidth,
13139 bool isleftshift)
13140 {
13141 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
13142 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
13143 unsigned HOST_WIDE_INT last_elt = 0;
13144 unsigned HOST_WIDE_INT maxshift;
13145
13146 /* Split vector constant out into a byte vector. */
13147 for (i = 0; i < n_elts; i++)
13148 {
13149 rtx el = CONST_VECTOR_ELT (op, i);
13150 unsigned HOST_WIDE_INT elpart;
13151
13152 if (CONST_INT_P (el))
13153 elpart = INTVAL (el);
13154 else if (CONST_DOUBLE_P (el))
13155 return 0;
13156 else
13157 gcc_unreachable ();
13158
13159 if (i != 0 && elpart != last_elt)
13160 return 0;
13161
13162 last_elt = elpart;
13163 }
13164
13165 /* Shift less than element size. */
13166 maxshift = innersize * 8;
13167
13168 if (isleftshift)
13169 {
13170 /* Left shift immediate value can be from 0 to <size>-1. */
13171 if (last_elt >= maxshift)
13172 return 0;
13173 }
13174 else
13175 {
13176 /* Right shift immediate value can be from 1 to <size>. */
13177 if (last_elt == 0 || last_elt > maxshift)
13178 return 0;
13179 }
13180
13181 if (elementwidth)
13182 *elementwidth = innersize * 8;
13183
13184 if (modconst)
13185 *modconst = CONST_VECTOR_ELT (op, 0);
13186
13187 return 1;
13188 }
13189
13190 /* Return a string suitable for output of Neon immediate logic operation
13191 MNEM. */
13192
13193 char *
13194 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
13195 int inverse, int quad)
13196 {
13197 int width, is_valid;
13198 static char templ[40];
13199
13200 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
13201
13202 gcc_assert (is_valid != 0);
13203
13204 if (quad)
13205 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
13206 else
13207 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
13208
13209 return templ;
13210 }
13211
13212 /* Return a string suitable for output of Neon immediate shift operation
13213 (VSHR or VSHL) MNEM. */
13214
13215 char *
13216 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13217 machine_mode mode, int quad,
13218 bool isleftshift)
13219 {
13220 int width, is_valid;
13221 static char templ[40];
13222
13223 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13224 gcc_assert (is_valid != 0);
13225
13226 if (quad)
13227 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13228 else
13229 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13230
13231 return templ;
13232 }
13233
13234 /* Output a sequence of pairwise operations to implement a reduction.
13235 NOTE: We do "too much work" here, because pairwise operations work on two
13236 registers-worth of operands in one go. Unfortunately we can't exploit those
13237 extra calculations to do the full operation in fewer steps, I don't think.
13238 Although all vector elements of the result but the first are ignored, we
13239 actually calculate the same result in each of the elements. An alternative
13240 such as initially loading a vector with zero to use as each of the second
13241 operands would use up an additional register and take an extra instruction,
13242 for no particular gain. */
13243
13244 void
13245 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13246 rtx (*reduc) (rtx, rtx, rtx))
13247 {
13248 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13249 rtx tmpsum = op1;
13250
13251 for (i = parts / 2; i >= 1; i /= 2)
13252 {
13253 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13254 emit_insn (reduc (dest, tmpsum, tmpsum));
13255 tmpsum = dest;
13256 }
13257 }
13258
13259 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13260 loaded into a register using VDUP.
13261
13262 If this is the case, and GENERATE is set, we also generate
13263 instructions to do this and return an RTX to assign to the register. */
13264
13265 static rtx
13266 neon_vdup_constant (rtx vals, bool generate)
13267 {
13268 machine_mode mode = GET_MODE (vals);
13269 machine_mode inner_mode = GET_MODE_INNER (mode);
13270 rtx x;
13271
13272 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13273 return NULL_RTX;
13274
13275 if (!const_vec_duplicate_p (vals, &x))
13276 /* The elements are not all the same. We could handle repeating
13277 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13278 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13279 vdup.i16). */
13280 return NULL_RTX;
13281
13282 if (!generate)
13283 return x;
13284
13285 /* We can load this constant by using VDUP and a constant in a
13286 single ARM register. This will be cheaper than a vector
13287 load. */
13288
13289 x = copy_to_mode_reg (inner_mode, x);
13290 return gen_vec_duplicate (mode, x);
13291 }
13292
13293 /* Return a HI representation of CONST_VEC suitable for MVE predicates. */
13294 rtx
13295 mve_bool_vec_to_const (rtx const_vec)
13296 {
13297 int n_elts = GET_MODE_NUNITS ( GET_MODE (const_vec));
13298 int repeat = 16 / n_elts;
13299 int i;
13300 int hi_val = 0;
13301
13302 for (i = 0; i < n_elts; i++)
13303 {
13304 rtx el = CONST_VECTOR_ELT (const_vec, i);
13305 unsigned HOST_WIDE_INT elpart;
13306
13307 gcc_assert (CONST_INT_P (el));
13308 elpart = INTVAL (el);
13309
13310 for (int j = 0; j < repeat; j++)
13311 hi_val |= elpart << (i * repeat + j);
13312 }
13313 return gen_int_mode (hi_val, HImode);
13314 }
13315
13316 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13317 constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13318 into a register.
13319
13320 If this is the case, and GENERATE is set, we also generate code to do
13321 this and return an RTX to copy into the register. */
13322
13323 rtx
13324 neon_make_constant (rtx vals, bool generate)
13325 {
13326 machine_mode mode = GET_MODE (vals);
13327 rtx target;
13328 rtx const_vec = NULL_RTX;
13329 int n_elts = GET_MODE_NUNITS (mode);
13330 int n_const = 0;
13331 int i;
13332
13333 if (GET_CODE (vals) == CONST_VECTOR)
13334 const_vec = vals;
13335 else if (GET_CODE (vals) == PARALLEL)
13336 {
13337 /* A CONST_VECTOR must contain only CONST_INTs and
13338 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13339 Only store valid constants in a CONST_VECTOR. */
13340 for (i = 0; i < n_elts; ++i)
13341 {
13342 rtx x = XVECEXP (vals, 0, i);
13343 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13344 n_const++;
13345 }
13346 if (n_const == n_elts)
13347 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13348 }
13349 else
13350 gcc_unreachable ();
13351
13352 if (const_vec != NULL
13353 && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13354 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13355 return const_vec;
13356 else if (TARGET_HAVE_MVE && (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL))
13357 return mve_bool_vec_to_const (const_vec);
13358 else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13359 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13360 pipeline cycle; creating the constant takes one or two ARM
13361 pipeline cycles. */
13362 return target;
13363 else if (const_vec != NULL_RTX)
13364 /* Load from constant pool. On Cortex-A8 this takes two cycles
13365 (for either double or quad vectors). We cannot take advantage
13366 of single-cycle VLD1 because we need a PC-relative addressing
13367 mode. */
13368 return arm_disable_literal_pool ? NULL_RTX : const_vec;
13369 else
13370 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13371 We cannot construct an initializer. */
13372 return NULL_RTX;
13373 }
13374
13375 /* Initialize vector TARGET to VALS. */
13376
13377 void
13378 neon_expand_vector_init (rtx target, rtx vals)
13379 {
13380 machine_mode mode = GET_MODE (target);
13381 machine_mode inner_mode = GET_MODE_INNER (mode);
13382 int n_elts = GET_MODE_NUNITS (mode);
13383 int n_var = 0, one_var = -1;
13384 bool all_same = true;
13385 rtx x, mem;
13386 int i;
13387
13388 for (i = 0; i < n_elts; ++i)
13389 {
13390 x = XVECEXP (vals, 0, i);
13391 if (!CONSTANT_P (x))
13392 ++n_var, one_var = i;
13393
13394 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13395 all_same = false;
13396 }
13397
13398 if (n_var == 0)
13399 {
13400 rtx constant = neon_make_constant (vals);
13401 if (constant != NULL_RTX)
13402 {
13403 emit_move_insn (target, constant);
13404 return;
13405 }
13406 }
13407
13408 /* Splat a single non-constant element if we can. */
13409 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13410 {
13411 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13412 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13413 return;
13414 }
13415
13416 /* One field is non-constant. Load constant then overwrite varying
13417 field. This is more efficient than using the stack. */
13418 if (n_var == 1)
13419 {
13420 rtx copy = copy_rtx (vals);
13421 rtx merge_mask = GEN_INT (1 << one_var);
13422
13423 /* Load constant part of vector, substitute neighboring value for
13424 varying element. */
13425 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13426 neon_expand_vector_init (target, copy);
13427
13428 /* Insert variable. */
13429 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13430 emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13431 return;
13432 }
13433
13434 /* Construct the vector in memory one field at a time
13435 and load the whole vector. */
13436 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13437 for (i = 0; i < n_elts; i++)
13438 emit_move_insn (adjust_address_nv (mem, inner_mode,
13439 i * GET_MODE_SIZE (inner_mode)),
13440 XVECEXP (vals, 0, i));
13441 emit_move_insn (target, mem);
13442 }
13443
13444 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13445 ERR if it doesn't. EXP indicates the source location, which includes the
13446 inlining history for intrinsics. */
13447
13448 static void
13449 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13450 const_tree exp, const char *desc)
13451 {
13452 HOST_WIDE_INT lane;
13453
13454 gcc_assert (CONST_INT_P (operand));
13455
13456 lane = INTVAL (operand);
13457
13458 if (lane < low || lane >= high)
13459 {
13460 if (exp)
13461 error_at (EXPR_LOCATION (exp),
13462 "%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13463 else
13464 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13465 }
13466 }
13467
13468 /* Bounds-check lanes. */
13469
13470 void
13471 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13472 const_tree exp)
13473 {
13474 bounds_check (operand, low, high, exp, "lane");
13475 }
13476
13477 /* Bounds-check constants. */
13478
13479 void
13480 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13481 {
13482 bounds_check (operand, low, high, NULL_TREE, "constant");
13483 }
13484
13485 HOST_WIDE_INT
13486 neon_element_bits (machine_mode mode)
13487 {
13488 return GET_MODE_UNIT_BITSIZE (mode);
13489 }
13490
13491 \f
13492 /* Predicates for `match_operand' and `match_operator'. */
13493
13494 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13495 WB level is 2 if full writeback address modes are allowed, 1
13496 if limited writeback address modes (POST_INC and PRE_DEC) are
13497 allowed and 0 if no writeback at all is supported. */
13498
13499 int
13500 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13501 {
13502 gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13503 rtx ind;
13504
13505 /* Reject eliminable registers. */
13506 if (! (reload_in_progress || reload_completed || lra_in_progress)
13507 && ( reg_mentioned_p (frame_pointer_rtx, op)
13508 || reg_mentioned_p (arg_pointer_rtx, op)
13509 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13510 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13511 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13512 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13513 return FALSE;
13514
13515 /* Constants are converted into offsets from labels. */
13516 if (!MEM_P (op))
13517 return FALSE;
13518
13519 ind = XEXP (op, 0);
13520
13521 if (reload_completed
13522 && (LABEL_REF_P (ind)
13523 || (GET_CODE (ind) == CONST
13524 && GET_CODE (XEXP (ind, 0)) == PLUS
13525 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13526 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13527 return TRUE;
13528
13529 /* Match: (mem (reg)). */
13530 if (REG_P (ind))
13531 return arm_address_register_rtx_p (ind, 0);
13532
13533 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13534 acceptable in any case (subject to verification by
13535 arm_address_register_rtx_p). We need full writeback to accept
13536 PRE_INC and POST_DEC, and at least restricted writeback for
13537 PRE_INC and POST_DEC. */
13538 if (wb_level > 0
13539 && (GET_CODE (ind) == POST_INC
13540 || GET_CODE (ind) == PRE_DEC
13541 || (wb_level > 1
13542 && (GET_CODE (ind) == PRE_INC
13543 || GET_CODE (ind) == POST_DEC))))
13544 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13545
13546 if (wb_level > 1
13547 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13548 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13549 && GET_CODE (XEXP (ind, 1)) == PLUS
13550 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13551 ind = XEXP (ind, 1);
13552
13553 /* Match:
13554 (plus (reg)
13555 (const))
13556
13557 The encoded immediate for 16-bit modes is multiplied by 2,
13558 while the encoded immediate for 32-bit and 64-bit modes is
13559 multiplied by 4. */
13560 int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13561 if (GET_CODE (ind) == PLUS
13562 && REG_P (XEXP (ind, 0))
13563 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13564 && CONST_INT_P (XEXP (ind, 1))
13565 && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13566 && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13567 return TRUE;
13568
13569 return FALSE;
13570 }
13571
13572 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13573 WB is true if full writeback address modes are allowed and is false
13574 if limited writeback address modes (POST_INC and PRE_DEC) are
13575 allowed. */
13576
13577 int arm_coproc_mem_operand (rtx op, bool wb)
13578 {
13579 return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13580 }
13581
13582 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13583 context in which no writeback address modes are allowed. */
13584
13585 int
13586 arm_coproc_mem_operand_no_writeback (rtx op)
13587 {
13588 return arm_coproc_mem_operand_wb (op, 0);
13589 }
13590
13591 /* This function returns TRUE on matching mode and op.
13592 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13593 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13594 int
13595 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13596 {
13597 enum rtx_code code;
13598 int val, reg_no;
13599
13600 /* Match: (mem (reg)). */
13601 if (REG_P (op))
13602 {
13603 int reg_no = REGNO (op);
13604 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13605 ? reg_no <= LAST_LO_REGNUM
13606 : reg_no < LAST_ARM_REGNUM)
13607 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13608 }
13609 code = GET_CODE (op);
13610
13611 if (code == POST_INC || code == PRE_DEC
13612 || code == PRE_INC || code == POST_DEC)
13613 {
13614 reg_no = REGNO (XEXP (op, 0));
13615 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13616 ? reg_no <= LAST_LO_REGNUM
13617 :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13618 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13619 }
13620 else if (((code == POST_MODIFY || code == PRE_MODIFY)
13621 && GET_CODE (XEXP (op, 1)) == PLUS
13622 && XEXP (op, 0) == XEXP (XEXP (op, 1), 0)
13623 && REG_P (XEXP (op, 0))
13624 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT)
13625 /* Make sure to only accept PLUS after reload_completed, otherwise
13626 this will interfere with auto_inc's pattern detection. */
13627 || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
13628 && GET_CODE (XEXP (op, 1)) == CONST_INT))
13629 {
13630 reg_no = REGNO (XEXP (op, 0));
13631 if (code == PLUS)
13632 val = INTVAL (XEXP (op, 1));
13633 else
13634 val = INTVAL (XEXP(XEXP (op, 1), 1));
13635
13636 switch (mode)
13637 {
13638 case E_V16QImode:
13639 case E_V8QImode:
13640 case E_V4QImode:
13641 if (abs (val) > 127)
13642 return FALSE;
13643 break;
13644 case E_V8HImode:
13645 case E_V8HFmode:
13646 case E_V4HImode:
13647 case E_V4HFmode:
13648 if (val % 2 != 0 || abs (val) > 254)
13649 return FALSE;
13650 break;
13651 case E_V4SImode:
13652 case E_V4SFmode:
13653 if (val % 4 != 0 || abs (val) > 508)
13654 return FALSE;
13655 break;
13656 default:
13657 return FALSE;
13658 }
13659 return ((!strict && reg_no >= FIRST_PSEUDO_REGISTER)
13660 || (MVE_STN_LDW_MODE (mode)
13661 ? reg_no <= LAST_LO_REGNUM
13662 : (reg_no < LAST_ARM_REGNUM
13663 && (code == PLUS || reg_no != SP_REGNUM))));
13664 }
13665 return FALSE;
13666 }
13667
13668 /* Return TRUE if OP is a memory operand which we can load or store a vector
13669 to/from. TYPE is one of the following values:
13670 0 - Vector load/stor (vldr)
13671 1 - Core registers (ldm)
13672 2 - Element/structure loads (vld1)
13673 */
13674 int
13675 neon_vector_mem_operand (rtx op, int type, bool strict)
13676 {
13677 rtx ind;
13678
13679 /* Reject eliminable registers. */
13680 if (strict && ! (reload_in_progress || reload_completed)
13681 && (reg_mentioned_p (frame_pointer_rtx, op)
13682 || reg_mentioned_p (arg_pointer_rtx, op)
13683 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13684 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13685 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13686 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13687 return FALSE;
13688
13689 /* Constants are converted into offsets from labels. */
13690 if (!MEM_P (op))
13691 return FALSE;
13692
13693 ind = XEXP (op, 0);
13694
13695 if (reload_completed
13696 && (LABEL_REF_P (ind)
13697 || (GET_CODE (ind) == CONST
13698 && GET_CODE (XEXP (ind, 0)) == PLUS
13699 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13700 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13701 return TRUE;
13702
13703 /* Match: (mem (reg)). */
13704 if (REG_P (ind))
13705 return arm_address_register_rtx_p (ind, 0);
13706
13707 /* Allow post-increment with Neon registers. */
13708 if ((type != 1 && GET_CODE (ind) == POST_INC)
13709 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13710 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13711
13712 /* Allow post-increment by register for VLDn */
13713 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13714 && GET_CODE (XEXP (ind, 1)) == PLUS
13715 && REG_P (XEXP (XEXP (ind, 1), 1))
13716 && REG_P (XEXP (ind, 0))
13717 && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13718 return true;
13719
13720 /* Match:
13721 (plus (reg)
13722 (const)). */
13723 if (type == 0
13724 && GET_CODE (ind) == PLUS
13725 && REG_P (XEXP (ind, 0))
13726 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13727 && CONST_INT_P (XEXP (ind, 1))
13728 && INTVAL (XEXP (ind, 1)) > -1024
13729 /* For quad modes, we restrict the constant offset to be slightly less
13730 than what the instruction format permits. We have no such constraint
13731 on double mode offsets. (This must match arm_legitimate_index_p.) */
13732 && (INTVAL (XEXP (ind, 1))
13733 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13734 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13735 return TRUE;
13736
13737 return FALSE;
13738 }
13739
13740 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13741 type. */
13742 int
13743 mve_struct_mem_operand (rtx op)
13744 {
13745 rtx ind = XEXP (op, 0);
13746
13747 /* Match: (mem (reg)). */
13748 if (REG_P (ind))
13749 return arm_address_register_rtx_p (ind, 0);
13750
13751 /* Allow only post-increment by the mode size. */
13752 if (GET_CODE (ind) == POST_INC)
13753 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13754
13755 return FALSE;
13756 }
13757
13758 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13759 type. */
13760 int
13761 neon_struct_mem_operand (rtx op)
13762 {
13763 rtx ind;
13764
13765 /* Reject eliminable registers. */
13766 if (! (reload_in_progress || reload_completed)
13767 && ( reg_mentioned_p (frame_pointer_rtx, op)
13768 || reg_mentioned_p (arg_pointer_rtx, op)
13769 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13770 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13771 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13772 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13773 return FALSE;
13774
13775 /* Constants are converted into offsets from labels. */
13776 if (!MEM_P (op))
13777 return FALSE;
13778
13779 ind = XEXP (op, 0);
13780
13781 if (reload_completed
13782 && (LABEL_REF_P (ind)
13783 || (GET_CODE (ind) == CONST
13784 && GET_CODE (XEXP (ind, 0)) == PLUS
13785 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13786 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13787 return TRUE;
13788
13789 /* Match: (mem (reg)). */
13790 if (REG_P (ind))
13791 return arm_address_register_rtx_p (ind, 0);
13792
13793 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13794 if (GET_CODE (ind) == POST_INC
13795 || GET_CODE (ind) == PRE_DEC)
13796 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13797
13798 return FALSE;
13799 }
13800
13801 /* Prepares the operands for the VCMLA by lane instruction such that the right
13802 register number is selected. This instruction is special in that it always
13803 requires a D register, however there is a choice to be made between Dn[0],
13804 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13805
13806 The VCMLA by lane function always selects two values. For instance given D0
13807 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13808 used by the instruction. However given V4SF then index 0 and 1 are valid as
13809 D0[0] or D1[0] are both valid.
13810
13811 This function centralizes that information based on OPERANDS, OPERANDS[3]
13812 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13813 updated to contain the right index. */
13814
13815 rtx *
13816 neon_vcmla_lane_prepare_operands (rtx *operands)
13817 {
13818 int lane = INTVAL (operands[4]);
13819 machine_mode constmode = SImode;
13820 machine_mode mode = GET_MODE (operands[3]);
13821 int regno = REGNO (operands[3]);
13822 regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13823 if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13824 {
13825 operands[3] = gen_int_mode (regno + 1, constmode);
13826 operands[4]
13827 = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13828 }
13829 else
13830 {
13831 operands[3] = gen_int_mode (regno, constmode);
13832 operands[4] = gen_int_mode (lane, constmode);
13833 }
13834 return operands;
13835 }
13836
13837
13838 /* Return true if X is a register that will be eliminated later on. */
13839 int
13840 arm_eliminable_register (rtx x)
13841 {
13842 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13843 || REGNO (x) == ARG_POINTER_REGNUM
13844 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13845 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13846 }
13847
13848 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13849 coprocessor registers. Otherwise return NO_REGS. */
13850
13851 enum reg_class
13852 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13853 {
13854 if (mode == HFmode)
13855 {
13856 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13857 return GENERAL_REGS;
13858 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13859 return NO_REGS;
13860 return GENERAL_REGS;
13861 }
13862
13863 /* The neon move patterns handle all legitimate vector and struct
13864 addresses. */
13865 if (TARGET_NEON
13866 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13867 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13868 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13869 || VALID_NEON_STRUCT_MODE (mode)))
13870 return NO_REGS;
13871
13872 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13873 return NO_REGS;
13874
13875 return GENERAL_REGS;
13876 }
13877
13878 /* Values which must be returned in the most-significant end of the return
13879 register. */
13880
13881 static bool
13882 arm_return_in_msb (const_tree valtype)
13883 {
13884 return (TARGET_AAPCS_BASED
13885 && BYTES_BIG_ENDIAN
13886 && (AGGREGATE_TYPE_P (valtype)
13887 || TREE_CODE (valtype) == COMPLEX_TYPE
13888 || FIXED_POINT_TYPE_P (valtype)));
13889 }
13890
13891 /* Return TRUE if X references a SYMBOL_REF. */
13892 int
13893 symbol_mentioned_p (rtx x)
13894 {
13895 const char * fmt;
13896 int i;
13897
13898 if (SYMBOL_REF_P (x))
13899 return 1;
13900
13901 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13902 are constant offsets, not symbols. */
13903 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13904 return 0;
13905
13906 fmt = GET_RTX_FORMAT (GET_CODE (x));
13907
13908 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13909 {
13910 if (fmt[i] == 'E')
13911 {
13912 int j;
13913
13914 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13915 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13916 return 1;
13917 }
13918 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13919 return 1;
13920 }
13921
13922 return 0;
13923 }
13924
13925 /* Return TRUE if X references a LABEL_REF. */
13926 int
13927 label_mentioned_p (rtx x)
13928 {
13929 const char * fmt;
13930 int i;
13931
13932 if (LABEL_REF_P (x))
13933 return 1;
13934
13935 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13936 instruction, but they are constant offsets, not symbols. */
13937 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13938 return 0;
13939
13940 fmt = GET_RTX_FORMAT (GET_CODE (x));
13941 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13942 {
13943 if (fmt[i] == 'E')
13944 {
13945 int j;
13946
13947 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13948 if (label_mentioned_p (XVECEXP (x, i, j)))
13949 return 1;
13950 }
13951 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13952 return 1;
13953 }
13954
13955 return 0;
13956 }
13957
13958 int
13959 tls_mentioned_p (rtx x)
13960 {
13961 switch (GET_CODE (x))
13962 {
13963 case CONST:
13964 return tls_mentioned_p (XEXP (x, 0));
13965
13966 case UNSPEC:
13967 if (XINT (x, 1) == UNSPEC_TLS)
13968 return 1;
13969
13970 /* Fall through. */
13971 default:
13972 return 0;
13973 }
13974 }
13975
13976 /* Must not copy any rtx that uses a pc-relative address.
13977 Also, disallow copying of load-exclusive instructions that
13978 may appear after splitting of compare-and-swap-style operations
13979 so as to prevent those loops from being transformed away from their
13980 canonical forms (see PR 69904). */
13981
13982 static bool
13983 arm_cannot_copy_insn_p (rtx_insn *insn)
13984 {
13985 /* The tls call insn cannot be copied, as it is paired with a data
13986 word. */
13987 if (recog_memoized (insn) == CODE_FOR_tlscall)
13988 return true;
13989
13990 subrtx_iterator::array_type array;
13991 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13992 {
13993 const_rtx x = *iter;
13994 if (GET_CODE (x) == UNSPEC
13995 && (XINT (x, 1) == UNSPEC_PIC_BASE
13996 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13997 return true;
13998 }
13999
14000 rtx set = single_set (insn);
14001 if (set)
14002 {
14003 rtx src = SET_SRC (set);
14004 if (GET_CODE (src) == ZERO_EXTEND)
14005 src = XEXP (src, 0);
14006
14007 /* Catch the load-exclusive and load-acquire operations. */
14008 if (GET_CODE (src) == UNSPEC_VOLATILE
14009 && (XINT (src, 1) == VUNSPEC_LL
14010 || XINT (src, 1) == VUNSPEC_LAX))
14011 return true;
14012 }
14013 return false;
14014 }
14015
14016 enum rtx_code
14017 minmax_code (rtx x)
14018 {
14019 enum rtx_code code = GET_CODE (x);
14020
14021 switch (code)
14022 {
14023 case SMAX:
14024 return GE;
14025 case SMIN:
14026 return LE;
14027 case UMIN:
14028 return LEU;
14029 case UMAX:
14030 return GEU;
14031 default:
14032 gcc_unreachable ();
14033 }
14034 }
14035
14036 /* Match pair of min/max operators that can be implemented via usat/ssat. */
14037
14038 bool
14039 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
14040 int *mask, bool *signed_sat)
14041 {
14042 /* The high bound must be a power of two minus one. */
14043 int log = exact_log2 (INTVAL (hi_bound) + 1);
14044 if (log == -1)
14045 return false;
14046
14047 /* The low bound is either zero (for usat) or one less than the
14048 negation of the high bound (for ssat). */
14049 if (INTVAL (lo_bound) == 0)
14050 {
14051 if (mask)
14052 *mask = log;
14053 if (signed_sat)
14054 *signed_sat = false;
14055
14056 return true;
14057 }
14058
14059 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
14060 {
14061 if (mask)
14062 *mask = log + 1;
14063 if (signed_sat)
14064 *signed_sat = true;
14065
14066 return true;
14067 }
14068
14069 return false;
14070 }
14071
14072 /* Return 1 if memory locations are adjacent. */
14073 int
14074 adjacent_mem_locations (rtx a, rtx b)
14075 {
14076 /* We don't guarantee to preserve the order of these memory refs. */
14077 if (volatile_refs_p (a) || volatile_refs_p (b))
14078 return 0;
14079
14080 if ((REG_P (XEXP (a, 0))
14081 || (GET_CODE (XEXP (a, 0)) == PLUS
14082 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
14083 && (REG_P (XEXP (b, 0))
14084 || (GET_CODE (XEXP (b, 0)) == PLUS
14085 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
14086 {
14087 HOST_WIDE_INT val0 = 0, val1 = 0;
14088 rtx reg0, reg1;
14089 int val_diff;
14090
14091 if (GET_CODE (XEXP (a, 0)) == PLUS)
14092 {
14093 reg0 = XEXP (XEXP (a, 0), 0);
14094 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
14095 }
14096 else
14097 reg0 = XEXP (a, 0);
14098
14099 if (GET_CODE (XEXP (b, 0)) == PLUS)
14100 {
14101 reg1 = XEXP (XEXP (b, 0), 0);
14102 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
14103 }
14104 else
14105 reg1 = XEXP (b, 0);
14106
14107 /* Don't accept any offset that will require multiple
14108 instructions to handle, since this would cause the
14109 arith_adjacentmem pattern to output an overlong sequence. */
14110 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
14111 return 0;
14112
14113 /* Don't allow an eliminable register: register elimination can make
14114 the offset too large. */
14115 if (arm_eliminable_register (reg0))
14116 return 0;
14117
14118 val_diff = val1 - val0;
14119
14120 if (arm_ld_sched)
14121 {
14122 /* If the target has load delay slots, then there's no benefit
14123 to using an ldm instruction unless the offset is zero and
14124 we are optimizing for size. */
14125 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
14126 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
14127 && (val_diff == 4 || val_diff == -4));
14128 }
14129
14130 return ((REGNO (reg0) == REGNO (reg1))
14131 && (val_diff == 4 || val_diff == -4));
14132 }
14133
14134 return 0;
14135 }
14136
14137 /* Return true if OP is a valid load or store multiple operation. LOAD is true
14138 for load operations, false for store operations. CONSECUTIVE is true
14139 if the register numbers in the operation must be consecutive in the register
14140 bank. RETURN_PC is true if value is to be loaded in PC.
14141 The pattern we are trying to match for load is:
14142 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14143 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14144 :
14145 :
14146 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14147 ]
14148 where
14149 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14150 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14151 3. If consecutive is TRUE, then for kth register being loaded,
14152 REGNO (R_dk) = REGNO (R_d0) + k.
14153 The pattern for store is similar. */
14154 bool
14155 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
14156 bool consecutive, bool return_pc)
14157 {
14158 HOST_WIDE_INT count = XVECLEN (op, 0);
14159 rtx reg, mem, addr;
14160 unsigned regno;
14161 unsigned first_regno;
14162 HOST_WIDE_INT i = 1, base = 0, offset = 0;
14163 rtx elt;
14164 bool addr_reg_in_reglist = false;
14165 bool update = false;
14166 int reg_increment;
14167 int offset_adj;
14168 int regs_per_val;
14169
14170 /* If not in SImode, then registers must be consecutive
14171 (e.g., VLDM instructions for DFmode). */
14172 gcc_assert ((mode == SImode) || consecutive);
14173 /* Setting return_pc for stores is illegal. */
14174 gcc_assert (!return_pc || load);
14175
14176 /* Set up the increments and the regs per val based on the mode. */
14177 reg_increment = GET_MODE_SIZE (mode);
14178 regs_per_val = reg_increment / 4;
14179 offset_adj = return_pc ? 1 : 0;
14180
14181 if (count <= 1
14182 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
14183 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
14184 return false;
14185
14186 /* Check if this is a write-back. */
14187 elt = XVECEXP (op, 0, offset_adj);
14188 if (GET_CODE (SET_SRC (elt)) == PLUS)
14189 {
14190 i++;
14191 base = 1;
14192 update = true;
14193
14194 /* The offset adjustment must be the number of registers being
14195 popped times the size of a single register. */
14196 if (!REG_P (SET_DEST (elt))
14197 || !REG_P (XEXP (SET_SRC (elt), 0))
14198 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
14199 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
14200 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
14201 ((count - 1 - offset_adj) * reg_increment))
14202 return false;
14203 }
14204
14205 i = i + offset_adj;
14206 base = base + offset_adj;
14207 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14208 success depends on the type: VLDM can do just one reg,
14209 LDM must do at least two. */
14210 if ((count <= i) && (mode == SImode))
14211 return false;
14212
14213 elt = XVECEXP (op, 0, i - 1);
14214 if (GET_CODE (elt) != SET)
14215 return false;
14216
14217 if (load)
14218 {
14219 reg = SET_DEST (elt);
14220 mem = SET_SRC (elt);
14221 }
14222 else
14223 {
14224 reg = SET_SRC (elt);
14225 mem = SET_DEST (elt);
14226 }
14227
14228 if (!REG_P (reg) || !MEM_P (mem))
14229 return false;
14230
14231 regno = REGNO (reg);
14232 first_regno = regno;
14233 addr = XEXP (mem, 0);
14234 if (GET_CODE (addr) == PLUS)
14235 {
14236 if (!CONST_INT_P (XEXP (addr, 1)))
14237 return false;
14238
14239 offset = INTVAL (XEXP (addr, 1));
14240 addr = XEXP (addr, 0);
14241 }
14242
14243 if (!REG_P (addr))
14244 return false;
14245
14246 /* Don't allow SP to be loaded unless it is also the base register. It
14247 guarantees that SP is reset correctly when an LDM instruction
14248 is interrupted. Otherwise, we might end up with a corrupt stack. */
14249 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14250 return false;
14251
14252 if (regno == REGNO (addr))
14253 addr_reg_in_reglist = true;
14254
14255 for (; i < count; i++)
14256 {
14257 elt = XVECEXP (op, 0, i);
14258 if (GET_CODE (elt) != SET)
14259 return false;
14260
14261 if (load)
14262 {
14263 reg = SET_DEST (elt);
14264 mem = SET_SRC (elt);
14265 }
14266 else
14267 {
14268 reg = SET_SRC (elt);
14269 mem = SET_DEST (elt);
14270 }
14271
14272 if (!REG_P (reg)
14273 || GET_MODE (reg) != mode
14274 || REGNO (reg) <= regno
14275 || (consecutive
14276 && (REGNO (reg) !=
14277 (unsigned int) (first_regno + regs_per_val * (i - base))))
14278 /* Don't allow SP to be loaded unless it is also the base register. It
14279 guarantees that SP is reset correctly when an LDM instruction
14280 is interrupted. Otherwise, we might end up with a corrupt stack. */
14281 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14282 || !MEM_P (mem)
14283 || GET_MODE (mem) != mode
14284 || ((GET_CODE (XEXP (mem, 0)) != PLUS
14285 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14286 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14287 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14288 offset + (i - base) * reg_increment))
14289 && (!REG_P (XEXP (mem, 0))
14290 || offset + (i - base) * reg_increment != 0)))
14291 return false;
14292
14293 regno = REGNO (reg);
14294 if (regno == REGNO (addr))
14295 addr_reg_in_reglist = true;
14296 }
14297
14298 if (load)
14299 {
14300 if (update && addr_reg_in_reglist)
14301 return false;
14302
14303 /* For Thumb-1, address register is always modified - either by write-back
14304 or by explicit load. If the pattern does not describe an update,
14305 then the address register must be in the list of loaded registers. */
14306 if (TARGET_THUMB1)
14307 return update || addr_reg_in_reglist;
14308 }
14309
14310 return true;
14311 }
14312
14313 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14314 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14315 following form:
14316
14317 [(set (reg:SI <N>) (const_int 0))
14318 (set (reg:SI <M>) (const_int 0))
14319 ...
14320 (unspec_volatile [(const_int 0)]
14321 VUNSPEC_CLRM_APSR)
14322 (clobber (reg:CC CC_REGNUM))
14323 ]
14324
14325 Any number (including 0) of set expressions is valid, the volatile unspec is
14326 optional. All registers but SP and PC are allowed and registers must be in
14327 strict increasing order.
14328
14329 To be a valid VSCCLRM pattern, OP must have the following form:
14330
14331 [(unspec_volatile [(const_int 0)]
14332 VUNSPEC_VSCCLRM_VPR)
14333 (set (reg:SF <N>) (const_int 0))
14334 (set (reg:SF <M>) (const_int 0))
14335 ...
14336 ]
14337
14338 As with CLRM, any number (including 0) of set expressions is valid, however
14339 the volatile unspec is mandatory here. Any VFP single-precision register is
14340 accepted but all registers must be consecutive and in increasing order. */
14341
14342 bool
14343 clear_operation_p (rtx op, bool vfp)
14344 {
14345 unsigned regno;
14346 unsigned last_regno = INVALID_REGNUM;
14347 rtx elt, reg, zero;
14348 int count = XVECLEN (op, 0);
14349 int first_set = vfp ? 1 : 0;
14350 machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14351
14352 for (int i = first_set; i < count; i++)
14353 {
14354 elt = XVECEXP (op, 0, i);
14355
14356 if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14357 {
14358 if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14359 || XVECLEN (elt, 0) != 1
14360 || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14361 || i != count - 2)
14362 return false;
14363
14364 continue;
14365 }
14366
14367 if (GET_CODE (elt) == CLOBBER)
14368 continue;
14369
14370 if (GET_CODE (elt) != SET)
14371 return false;
14372
14373 reg = SET_DEST (elt);
14374 zero = SET_SRC (elt);
14375
14376 if (!REG_P (reg)
14377 || GET_MODE (reg) != expected_mode
14378 || zero != CONST0_RTX (SImode))
14379 return false;
14380
14381 regno = REGNO (reg);
14382
14383 if (vfp)
14384 {
14385 if (i != first_set && regno != last_regno + 1)
14386 return false;
14387 }
14388 else
14389 {
14390 if (regno == SP_REGNUM || regno == PC_REGNUM)
14391 return false;
14392 if (i != first_set && regno <= last_regno)
14393 return false;
14394 }
14395
14396 last_regno = regno;
14397 }
14398
14399 return true;
14400 }
14401
14402 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14403 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14404 instruction. ADD_OFFSET is nonzero if the base address register needs
14405 to be modified with an add instruction before we can use it. */
14406
14407 static bool
14408 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14409 int nops, HOST_WIDE_INT add_offset)
14410 {
14411 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14412 if the offset isn't small enough. The reason 2 ldrs are faster
14413 is because these ARMs are able to do more than one cache access
14414 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14415 whilst the ARM8 has a double bandwidth cache. This means that
14416 these cores can do both an instruction fetch and a data fetch in
14417 a single cycle, so the trick of calculating the address into a
14418 scratch register (one of the result regs) and then doing a load
14419 multiple actually becomes slower (and no smaller in code size).
14420 That is the transformation
14421
14422 ldr rd1, [rbase + offset]
14423 ldr rd2, [rbase + offset + 4]
14424
14425 to
14426
14427 add rd1, rbase, offset
14428 ldmia rd1, {rd1, rd2}
14429
14430 produces worse code -- '3 cycles + any stalls on rd2' instead of
14431 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14432 access per cycle, the first sequence could never complete in less
14433 than 6 cycles, whereas the ldm sequence would only take 5 and
14434 would make better use of sequential accesses if not hitting the
14435 cache.
14436
14437 We cheat here and test 'arm_ld_sched' which we currently know to
14438 only be true for the ARM8, ARM9 and StrongARM. If this ever
14439 changes, then the test below needs to be reworked. */
14440 if (nops == 2 && arm_ld_sched && add_offset != 0)
14441 return false;
14442
14443 /* XScale has load-store double instructions, but they have stricter
14444 alignment requirements than load-store multiple, so we cannot
14445 use them.
14446
14447 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14448 the pipeline until completion.
14449
14450 NREGS CYCLES
14451 1 3
14452 2 4
14453 3 5
14454 4 6
14455
14456 An ldr instruction takes 1-3 cycles, but does not block the
14457 pipeline.
14458
14459 NREGS CYCLES
14460 1 1-3
14461 2 2-6
14462 3 3-9
14463 4 4-12
14464
14465 Best case ldr will always win. However, the more ldr instructions
14466 we issue, the less likely we are to be able to schedule them well.
14467 Using ldr instructions also increases code size.
14468
14469 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14470 for counts of 3 or 4 regs. */
14471 if (nops <= 2 && arm_tune_xscale && !optimize_size)
14472 return false;
14473 return true;
14474 }
14475
14476 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14477 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14478 an array ORDER which describes the sequence to use when accessing the
14479 offsets that produces an ascending order. In this sequence, each
14480 offset must be larger by exactly 4 than the previous one. ORDER[0]
14481 must have been filled in with the lowest offset by the caller.
14482 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14483 we use to verify that ORDER produces an ascending order of registers.
14484 Return true if it was possible to construct such an order, false if
14485 not. */
14486
14487 static bool
14488 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14489 int *unsorted_regs)
14490 {
14491 int i;
14492 for (i = 1; i < nops; i++)
14493 {
14494 int j;
14495
14496 order[i] = order[i - 1];
14497 for (j = 0; j < nops; j++)
14498 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14499 {
14500 /* We must find exactly one offset that is higher than the
14501 previous one by 4. */
14502 if (order[i] != order[i - 1])
14503 return false;
14504 order[i] = j;
14505 }
14506 if (order[i] == order[i - 1])
14507 return false;
14508 /* The register numbers must be ascending. */
14509 if (unsorted_regs != NULL
14510 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14511 return false;
14512 }
14513 return true;
14514 }
14515
14516 /* Used to determine in a peephole whether a sequence of load
14517 instructions can be changed into a load-multiple instruction.
14518 NOPS is the number of separate load instructions we are examining. The
14519 first NOPS entries in OPERANDS are the destination registers, the
14520 next NOPS entries are memory operands. If this function is
14521 successful, *BASE is set to the common base register of the memory
14522 accesses; *LOAD_OFFSET is set to the first memory location's offset
14523 from that base register.
14524 REGS is an array filled in with the destination register numbers.
14525 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14526 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14527 the sequence of registers in REGS matches the loads from ascending memory
14528 locations, and the function verifies that the register numbers are
14529 themselves ascending. If CHECK_REGS is false, the register numbers
14530 are stored in the order they are found in the operands. */
14531 static int
14532 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14533 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14534 {
14535 int unsorted_regs[MAX_LDM_STM_OPS];
14536 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14537 int order[MAX_LDM_STM_OPS];
14538 int base_reg = -1;
14539 int i, ldm_case;
14540
14541 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14542 easily extended if required. */
14543 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14544
14545 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14546
14547 /* Loop over the operands and check that the memory references are
14548 suitable (i.e. immediate offsets from the same base register). At
14549 the same time, extract the target register, and the memory
14550 offsets. */
14551 for (i = 0; i < nops; i++)
14552 {
14553 rtx reg;
14554 rtx offset;
14555
14556 /* Convert a subreg of a mem into the mem itself. */
14557 if (GET_CODE (operands[nops + i]) == SUBREG)
14558 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14559
14560 gcc_assert (MEM_P (operands[nops + i]));
14561
14562 /* Don't reorder volatile memory references; it doesn't seem worth
14563 looking for the case where the order is ok anyway. */
14564 if (MEM_VOLATILE_P (operands[nops + i]))
14565 return 0;
14566
14567 offset = const0_rtx;
14568
14569 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14570 || (SUBREG_P (reg)
14571 && REG_P (reg = SUBREG_REG (reg))))
14572 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14573 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14574 || (SUBREG_P (reg)
14575 && REG_P (reg = SUBREG_REG (reg))))
14576 && (CONST_INT_P (offset
14577 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14578 {
14579 if (i == 0)
14580 {
14581 base_reg = REGNO (reg);
14582 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14583 return 0;
14584 }
14585 else if (base_reg != (int) REGNO (reg))
14586 /* Not addressed from the same base register. */
14587 return 0;
14588
14589 unsorted_regs[i] = (REG_P (operands[i])
14590 ? REGNO (operands[i])
14591 : REGNO (SUBREG_REG (operands[i])));
14592
14593 /* If it isn't an integer register, or if it overwrites the
14594 base register but isn't the last insn in the list, then
14595 we can't do this. */
14596 if (unsorted_regs[i] < 0
14597 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14598 || unsorted_regs[i] > 14
14599 || (i != nops - 1 && unsorted_regs[i] == base_reg))
14600 return 0;
14601
14602 /* Don't allow SP to be loaded unless it is also the base
14603 register. It guarantees that SP is reset correctly when
14604 an LDM instruction is interrupted. Otherwise, we might
14605 end up with a corrupt stack. */
14606 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14607 return 0;
14608
14609 unsorted_offsets[i] = INTVAL (offset);
14610 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14611 order[0] = i;
14612 }
14613 else
14614 /* Not a suitable memory address. */
14615 return 0;
14616 }
14617
14618 /* All the useful information has now been extracted from the
14619 operands into unsorted_regs and unsorted_offsets; additionally,
14620 order[0] has been set to the lowest offset in the list. Sort
14621 the offsets into order, verifying that they are adjacent, and
14622 check that the register numbers are ascending. */
14623 if (!compute_offset_order (nops, unsorted_offsets, order,
14624 check_regs ? unsorted_regs : NULL))
14625 return 0;
14626
14627 if (saved_order)
14628 memcpy (saved_order, order, sizeof order);
14629
14630 if (base)
14631 {
14632 *base = base_reg;
14633
14634 for (i = 0; i < nops; i++)
14635 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14636
14637 *load_offset = unsorted_offsets[order[0]];
14638 }
14639
14640 if (unsorted_offsets[order[0]] == 0)
14641 ldm_case = 1; /* ldmia */
14642 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14643 ldm_case = 2; /* ldmib */
14644 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14645 ldm_case = 3; /* ldmda */
14646 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14647 ldm_case = 4; /* ldmdb */
14648 else if (const_ok_for_arm (unsorted_offsets[order[0]])
14649 || const_ok_for_arm (-unsorted_offsets[order[0]]))
14650 ldm_case = 5;
14651 else
14652 return 0;
14653
14654 if (!multiple_operation_profitable_p (false, nops,
14655 ldm_case == 5
14656 ? unsorted_offsets[order[0]] : 0))
14657 return 0;
14658
14659 return ldm_case;
14660 }
14661
14662 /* Used to determine in a peephole whether a sequence of store instructions can
14663 be changed into a store-multiple instruction.
14664 NOPS is the number of separate store instructions we are examining.
14665 NOPS_TOTAL is the total number of instructions recognized by the peephole
14666 pattern.
14667 The first NOPS entries in OPERANDS are the source registers, the next
14668 NOPS entries are memory operands. If this function is successful, *BASE is
14669 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14670 to the first memory location's offset from that base register. REGS is an
14671 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14672 likewise filled with the corresponding rtx's.
14673 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14674 numbers to an ascending order of stores.
14675 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14676 from ascending memory locations, and the function verifies that the register
14677 numbers are themselves ascending. If CHECK_REGS is false, the register
14678 numbers are stored in the order they are found in the operands. */
14679 static int
14680 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14681 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14682 HOST_WIDE_INT *load_offset, bool check_regs)
14683 {
14684 int unsorted_regs[MAX_LDM_STM_OPS];
14685 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14686 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14687 int order[MAX_LDM_STM_OPS];
14688 int base_reg = -1;
14689 rtx base_reg_rtx = NULL;
14690 int i, stm_case;
14691
14692 /* Write back of base register is currently only supported for Thumb 1. */
14693 int base_writeback = TARGET_THUMB1;
14694
14695 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14696 easily extended if required. */
14697 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14698
14699 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14700
14701 /* Loop over the operands and check that the memory references are
14702 suitable (i.e. immediate offsets from the same base register). At
14703 the same time, extract the target register, and the memory
14704 offsets. */
14705 for (i = 0; i < nops; i++)
14706 {
14707 rtx reg;
14708 rtx offset;
14709
14710 /* Convert a subreg of a mem into the mem itself. */
14711 if (GET_CODE (operands[nops + i]) == SUBREG)
14712 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14713
14714 gcc_assert (MEM_P (operands[nops + i]));
14715
14716 /* Don't reorder volatile memory references; it doesn't seem worth
14717 looking for the case where the order is ok anyway. */
14718 if (MEM_VOLATILE_P (operands[nops + i]))
14719 return 0;
14720
14721 offset = const0_rtx;
14722
14723 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14724 || (SUBREG_P (reg)
14725 && REG_P (reg = SUBREG_REG (reg))))
14726 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14727 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14728 || (SUBREG_P (reg)
14729 && REG_P (reg = SUBREG_REG (reg))))
14730 && (CONST_INT_P (offset
14731 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14732 {
14733 unsorted_reg_rtxs[i] = (REG_P (operands[i])
14734 ? operands[i] : SUBREG_REG (operands[i]));
14735 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14736
14737 if (i == 0)
14738 {
14739 base_reg = REGNO (reg);
14740 base_reg_rtx = reg;
14741 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14742 return 0;
14743 }
14744 else if (base_reg != (int) REGNO (reg))
14745 /* Not addressed from the same base register. */
14746 return 0;
14747
14748 /* If it isn't an integer register, then we can't do this. */
14749 if (unsorted_regs[i] < 0
14750 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14751 /* The effects are unpredictable if the base register is
14752 both updated and stored. */
14753 || (base_writeback && unsorted_regs[i] == base_reg)
14754 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14755 || unsorted_regs[i] > 14)
14756 return 0;
14757
14758 unsorted_offsets[i] = INTVAL (offset);
14759 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14760 order[0] = i;
14761 }
14762 else
14763 /* Not a suitable memory address. */
14764 return 0;
14765 }
14766
14767 /* All the useful information has now been extracted from the
14768 operands into unsorted_regs and unsorted_offsets; additionally,
14769 order[0] has been set to the lowest offset in the list. Sort
14770 the offsets into order, verifying that they are adjacent, and
14771 check that the register numbers are ascending. */
14772 if (!compute_offset_order (nops, unsorted_offsets, order,
14773 check_regs ? unsorted_regs : NULL))
14774 return 0;
14775
14776 if (saved_order)
14777 memcpy (saved_order, order, sizeof order);
14778
14779 if (base)
14780 {
14781 *base = base_reg;
14782
14783 for (i = 0; i < nops; i++)
14784 {
14785 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14786 if (reg_rtxs)
14787 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14788 }
14789
14790 *load_offset = unsorted_offsets[order[0]];
14791 }
14792
14793 if (TARGET_THUMB1
14794 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14795 return 0;
14796
14797 if (unsorted_offsets[order[0]] == 0)
14798 stm_case = 1; /* stmia */
14799 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14800 stm_case = 2; /* stmib */
14801 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14802 stm_case = 3; /* stmda */
14803 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14804 stm_case = 4; /* stmdb */
14805 else
14806 return 0;
14807
14808 if (!multiple_operation_profitable_p (false, nops, 0))
14809 return 0;
14810
14811 return stm_case;
14812 }
14813 \f
14814 /* Routines for use in generating RTL. */
14815
14816 /* Generate a load-multiple instruction. COUNT is the number of loads in
14817 the instruction; REGS and MEMS are arrays containing the operands.
14818 BASEREG is the base register to be used in addressing the memory operands.
14819 WBACK_OFFSET is nonzero if the instruction should update the base
14820 register. */
14821
14822 static rtx
14823 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14824 HOST_WIDE_INT wback_offset)
14825 {
14826 int i = 0, j;
14827 rtx result;
14828
14829 if (!multiple_operation_profitable_p (false, count, 0))
14830 {
14831 rtx seq;
14832
14833 start_sequence ();
14834
14835 for (i = 0; i < count; i++)
14836 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14837
14838 if (wback_offset != 0)
14839 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14840
14841 seq = get_insns ();
14842 end_sequence ();
14843
14844 return seq;
14845 }
14846
14847 result = gen_rtx_PARALLEL (VOIDmode,
14848 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14849 if (wback_offset != 0)
14850 {
14851 XVECEXP (result, 0, 0)
14852 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14853 i = 1;
14854 count++;
14855 }
14856
14857 for (j = 0; i < count; i++, j++)
14858 XVECEXP (result, 0, i)
14859 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14860
14861 return result;
14862 }
14863
14864 /* Generate a store-multiple instruction. COUNT is the number of stores in
14865 the instruction; REGS and MEMS are arrays containing the operands.
14866 BASEREG is the base register to be used in addressing the memory operands.
14867 WBACK_OFFSET is nonzero if the instruction should update the base
14868 register. */
14869
14870 static rtx
14871 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14872 HOST_WIDE_INT wback_offset)
14873 {
14874 int i = 0, j;
14875 rtx result;
14876
14877 if (GET_CODE (basereg) == PLUS)
14878 basereg = XEXP (basereg, 0);
14879
14880 if (!multiple_operation_profitable_p (false, count, 0))
14881 {
14882 rtx seq;
14883
14884 start_sequence ();
14885
14886 for (i = 0; i < count; i++)
14887 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14888
14889 if (wback_offset != 0)
14890 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14891
14892 seq = get_insns ();
14893 end_sequence ();
14894
14895 return seq;
14896 }
14897
14898 result = gen_rtx_PARALLEL (VOIDmode,
14899 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14900 if (wback_offset != 0)
14901 {
14902 XVECEXP (result, 0, 0)
14903 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14904 i = 1;
14905 count++;
14906 }
14907
14908 for (j = 0; i < count; i++, j++)
14909 XVECEXP (result, 0, i)
14910 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14911
14912 return result;
14913 }
14914
14915 /* Generate either a load-multiple or a store-multiple instruction. This
14916 function can be used in situations where we can start with a single MEM
14917 rtx and adjust its address upwards.
14918 COUNT is the number of operations in the instruction, not counting a
14919 possible update of the base register. REGS is an array containing the
14920 register operands.
14921 BASEREG is the base register to be used in addressing the memory operands,
14922 which are constructed from BASEMEM.
14923 WRITE_BACK specifies whether the generated instruction should include an
14924 update of the base register.
14925 OFFSETP is used to pass an offset to and from this function; this offset
14926 is not used when constructing the address (instead BASEMEM should have an
14927 appropriate offset in its address), it is used only for setting
14928 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14929
14930 static rtx
14931 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14932 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14933 {
14934 rtx mems[MAX_LDM_STM_OPS];
14935 HOST_WIDE_INT offset = *offsetp;
14936 int i;
14937
14938 gcc_assert (count <= MAX_LDM_STM_OPS);
14939
14940 if (GET_CODE (basereg) == PLUS)
14941 basereg = XEXP (basereg, 0);
14942
14943 for (i = 0; i < count; i++)
14944 {
14945 rtx addr = plus_constant (Pmode, basereg, i * 4);
14946 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14947 offset += 4;
14948 }
14949
14950 if (write_back)
14951 *offsetp = offset;
14952
14953 if (is_load)
14954 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14955 write_back ? 4 * count : 0);
14956 else
14957 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14958 write_back ? 4 * count : 0);
14959 }
14960
14961 rtx
14962 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14963 rtx basemem, HOST_WIDE_INT *offsetp)
14964 {
14965 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14966 offsetp);
14967 }
14968
14969 rtx
14970 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14971 rtx basemem, HOST_WIDE_INT *offsetp)
14972 {
14973 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14974 offsetp);
14975 }
14976
14977 /* Called from a peephole2 expander to turn a sequence of loads into an
14978 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14979 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14980 is true if we can reorder the registers because they are used commutatively
14981 subsequently.
14982 Returns true iff we could generate a new instruction. */
14983
14984 bool
14985 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14986 {
14987 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14988 rtx mems[MAX_LDM_STM_OPS];
14989 int i, j, base_reg;
14990 rtx base_reg_rtx;
14991 HOST_WIDE_INT offset;
14992 int write_back = FALSE;
14993 int ldm_case;
14994 rtx addr;
14995
14996 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14997 &base_reg, &offset, !sort_regs);
14998
14999 if (ldm_case == 0)
15000 return false;
15001
15002 if (sort_regs)
15003 for (i = 0; i < nops - 1; i++)
15004 for (j = i + 1; j < nops; j++)
15005 if (regs[i] > regs[j])
15006 {
15007 int t = regs[i];
15008 regs[i] = regs[j];
15009 regs[j] = t;
15010 }
15011 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15012
15013 if (TARGET_THUMB1)
15014 {
15015 gcc_assert (ldm_case == 1 || ldm_case == 5);
15016
15017 /* Thumb-1 ldm uses writeback except if the base is loaded. */
15018 write_back = true;
15019 for (i = 0; i < nops; i++)
15020 if (base_reg == regs[i])
15021 write_back = false;
15022
15023 /* Ensure the base is dead if it is updated. */
15024 if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
15025 return false;
15026 }
15027
15028 if (ldm_case == 5)
15029 {
15030 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
15031 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
15032 offset = 0;
15033 base_reg_rtx = newbase;
15034 }
15035
15036 for (i = 0; i < nops; i++)
15037 {
15038 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15039 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15040 SImode, addr, 0);
15041 }
15042 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
15043 write_back ? offset + i * 4 : 0));
15044 return true;
15045 }
15046
15047 /* Called from a peephole2 expander to turn a sequence of stores into an
15048 STM instruction. OPERANDS are the operands found by the peephole matcher;
15049 NOPS indicates how many separate stores we are trying to combine.
15050 Returns true iff we could generate a new instruction. */
15051
15052 bool
15053 gen_stm_seq (rtx *operands, int nops)
15054 {
15055 int i;
15056 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15057 rtx mems[MAX_LDM_STM_OPS];
15058 int base_reg;
15059 rtx base_reg_rtx;
15060 HOST_WIDE_INT offset;
15061 int write_back = FALSE;
15062 int stm_case;
15063 rtx addr;
15064 bool base_reg_dies;
15065
15066 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
15067 mem_order, &base_reg, &offset, true);
15068
15069 if (stm_case == 0)
15070 return false;
15071
15072 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15073
15074 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
15075 if (TARGET_THUMB1)
15076 {
15077 gcc_assert (base_reg_dies);
15078 write_back = TRUE;
15079 }
15080
15081 if (stm_case == 5)
15082 {
15083 gcc_assert (base_reg_dies);
15084 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15085 offset = 0;
15086 }
15087
15088 addr = plus_constant (Pmode, base_reg_rtx, offset);
15089
15090 for (i = 0; i < nops; i++)
15091 {
15092 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15093 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15094 SImode, addr, 0);
15095 }
15096 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
15097 write_back ? offset + i * 4 : 0));
15098 return true;
15099 }
15100
15101 /* Called from a peephole2 expander to turn a sequence of stores that are
15102 preceded by constant loads into an STM instruction. OPERANDS are the
15103 operands found by the peephole matcher; NOPS indicates how many
15104 separate stores we are trying to combine; there are 2 * NOPS
15105 instructions in the peephole.
15106 Returns true iff we could generate a new instruction. */
15107
15108 bool
15109 gen_const_stm_seq (rtx *operands, int nops)
15110 {
15111 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
15112 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15113 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
15114 rtx mems[MAX_LDM_STM_OPS];
15115 int base_reg;
15116 rtx base_reg_rtx;
15117 HOST_WIDE_INT offset;
15118 int write_back = FALSE;
15119 int stm_case;
15120 rtx addr;
15121 bool base_reg_dies;
15122 int i, j;
15123 HARD_REG_SET allocated;
15124
15125 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
15126 mem_order, &base_reg, &offset, false);
15127
15128 if (stm_case == 0)
15129 return false;
15130
15131 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
15132
15133 /* If the same register is used more than once, try to find a free
15134 register. */
15135 CLEAR_HARD_REG_SET (allocated);
15136 for (i = 0; i < nops; i++)
15137 {
15138 for (j = i + 1; j < nops; j++)
15139 if (regs[i] == regs[j])
15140 {
15141 rtx t = peep2_find_free_register (0, nops * 2,
15142 TARGET_THUMB1 ? "l" : "r",
15143 SImode, &allocated);
15144 if (t == NULL_RTX)
15145 return false;
15146 reg_rtxs[i] = t;
15147 regs[i] = REGNO (t);
15148 }
15149 }
15150
15151 /* Compute an ordering that maps the register numbers to an ascending
15152 sequence. */
15153 reg_order[0] = 0;
15154 for (i = 0; i < nops; i++)
15155 if (regs[i] < regs[reg_order[0]])
15156 reg_order[0] = i;
15157
15158 for (i = 1; i < nops; i++)
15159 {
15160 int this_order = reg_order[i - 1];
15161 for (j = 0; j < nops; j++)
15162 if (regs[j] > regs[reg_order[i - 1]]
15163 && (this_order == reg_order[i - 1]
15164 || regs[j] < regs[this_order]))
15165 this_order = j;
15166 reg_order[i] = this_order;
15167 }
15168
15169 /* Ensure that registers that must be live after the instruction end
15170 up with the correct value. */
15171 for (i = 0; i < nops; i++)
15172 {
15173 int this_order = reg_order[i];
15174 if ((this_order != mem_order[i]
15175 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
15176 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
15177 return false;
15178 }
15179
15180 /* Load the constants. */
15181 for (i = 0; i < nops; i++)
15182 {
15183 rtx op = operands[2 * nops + mem_order[i]];
15184 sorted_regs[i] = regs[reg_order[i]];
15185 emit_move_insn (reg_rtxs[reg_order[i]], op);
15186 }
15187
15188 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15189
15190 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
15191 if (TARGET_THUMB1)
15192 {
15193 gcc_assert (base_reg_dies);
15194 write_back = TRUE;
15195 }
15196
15197 if (stm_case == 5)
15198 {
15199 gcc_assert (base_reg_dies);
15200 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15201 offset = 0;
15202 }
15203
15204 addr = plus_constant (Pmode, base_reg_rtx, offset);
15205
15206 for (i = 0; i < nops; i++)
15207 {
15208 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15209 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15210 SImode, addr, 0);
15211 }
15212 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
15213 write_back ? offset + i * 4 : 0));
15214 return true;
15215 }
15216
15217 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15218 unaligned copies on processors which support unaligned semantics for those
15219 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
15220 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15221 An interleave factor of 1 (the minimum) will perform no interleaving.
15222 Load/store multiple are used for aligned addresses where possible. */
15223
15224 static void
15225 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
15226 HOST_WIDE_INT length,
15227 unsigned int interleave_factor)
15228 {
15229 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
15230 int *regnos = XALLOCAVEC (int, interleave_factor);
15231 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
15232 HOST_WIDE_INT i, j;
15233 HOST_WIDE_INT remaining = length, words;
15234 rtx halfword_tmp = NULL, byte_tmp = NULL;
15235 rtx dst, src;
15236 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
15237 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
15238 HOST_WIDE_INT srcoffset, dstoffset;
15239 HOST_WIDE_INT src_autoinc, dst_autoinc;
15240 rtx mem, addr;
15241
15242 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
15243
15244 /* Use hard registers if we have aligned source or destination so we can use
15245 load/store multiple with contiguous registers. */
15246 if (dst_aligned || src_aligned)
15247 for (i = 0; i < interleave_factor; i++)
15248 regs[i] = gen_rtx_REG (SImode, i);
15249 else
15250 for (i = 0; i < interleave_factor; i++)
15251 regs[i] = gen_reg_rtx (SImode);
15252
15253 dst = copy_addr_to_reg (XEXP (dstbase, 0));
15254 src = copy_addr_to_reg (XEXP (srcbase, 0));
15255
15256 srcoffset = dstoffset = 0;
15257
15258 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15259 For copying the last bytes we want to subtract this offset again. */
15260 src_autoinc = dst_autoinc = 0;
15261
15262 for (i = 0; i < interleave_factor; i++)
15263 regnos[i] = i;
15264
15265 /* Copy BLOCK_SIZE_BYTES chunks. */
15266
15267 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15268 {
15269 /* Load words. */
15270 if (src_aligned && interleave_factor > 1)
15271 {
15272 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15273 TRUE, srcbase, &srcoffset));
15274 src_autoinc += UNITS_PER_WORD * interleave_factor;
15275 }
15276 else
15277 {
15278 for (j = 0; j < interleave_factor; j++)
15279 {
15280 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15281 - src_autoinc));
15282 mem = adjust_automodify_address (srcbase, SImode, addr,
15283 srcoffset + j * UNITS_PER_WORD);
15284 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15285 }
15286 srcoffset += block_size_bytes;
15287 }
15288
15289 /* Store words. */
15290 if (dst_aligned && interleave_factor > 1)
15291 {
15292 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15293 TRUE, dstbase, &dstoffset));
15294 dst_autoinc += UNITS_PER_WORD * interleave_factor;
15295 }
15296 else
15297 {
15298 for (j = 0; j < interleave_factor; j++)
15299 {
15300 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15301 - dst_autoinc));
15302 mem = adjust_automodify_address (dstbase, SImode, addr,
15303 dstoffset + j * UNITS_PER_WORD);
15304 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15305 }
15306 dstoffset += block_size_bytes;
15307 }
15308
15309 remaining -= block_size_bytes;
15310 }
15311
15312 /* Copy any whole words left (note these aren't interleaved with any
15313 subsequent halfword/byte load/stores in the interests of simplicity). */
15314
15315 words = remaining / UNITS_PER_WORD;
15316
15317 gcc_assert (words < interleave_factor);
15318
15319 if (src_aligned && words > 1)
15320 {
15321 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15322 &srcoffset));
15323 src_autoinc += UNITS_PER_WORD * words;
15324 }
15325 else
15326 {
15327 for (j = 0; j < words; j++)
15328 {
15329 addr = plus_constant (Pmode, src,
15330 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15331 mem = adjust_automodify_address (srcbase, SImode, addr,
15332 srcoffset + j * UNITS_PER_WORD);
15333 if (src_aligned)
15334 emit_move_insn (regs[j], mem);
15335 else
15336 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15337 }
15338 srcoffset += words * UNITS_PER_WORD;
15339 }
15340
15341 if (dst_aligned && words > 1)
15342 {
15343 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15344 &dstoffset));
15345 dst_autoinc += words * UNITS_PER_WORD;
15346 }
15347 else
15348 {
15349 for (j = 0; j < words; j++)
15350 {
15351 addr = plus_constant (Pmode, dst,
15352 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15353 mem = adjust_automodify_address (dstbase, SImode, addr,
15354 dstoffset + j * UNITS_PER_WORD);
15355 if (dst_aligned)
15356 emit_move_insn (mem, regs[j]);
15357 else
15358 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15359 }
15360 dstoffset += words * UNITS_PER_WORD;
15361 }
15362
15363 remaining -= words * UNITS_PER_WORD;
15364
15365 gcc_assert (remaining < 4);
15366
15367 /* Copy a halfword if necessary. */
15368
15369 if (remaining >= 2)
15370 {
15371 halfword_tmp = gen_reg_rtx (SImode);
15372
15373 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15374 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15375 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15376
15377 /* Either write out immediately, or delay until we've loaded the last
15378 byte, depending on interleave factor. */
15379 if (interleave_factor == 1)
15380 {
15381 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15382 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15383 emit_insn (gen_unaligned_storehi (mem,
15384 gen_lowpart (HImode, halfword_tmp)));
15385 halfword_tmp = NULL;
15386 dstoffset += 2;
15387 }
15388
15389 remaining -= 2;
15390 srcoffset += 2;
15391 }
15392
15393 gcc_assert (remaining < 2);
15394
15395 /* Copy last byte. */
15396
15397 if ((remaining & 1) != 0)
15398 {
15399 byte_tmp = gen_reg_rtx (SImode);
15400
15401 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15402 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15403 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15404
15405 if (interleave_factor == 1)
15406 {
15407 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15408 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15409 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15410 byte_tmp = NULL;
15411 dstoffset++;
15412 }
15413
15414 remaining--;
15415 srcoffset++;
15416 }
15417
15418 /* Store last halfword if we haven't done so already. */
15419
15420 if (halfword_tmp)
15421 {
15422 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15423 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15424 emit_insn (gen_unaligned_storehi (mem,
15425 gen_lowpart (HImode, halfword_tmp)));
15426 dstoffset += 2;
15427 }
15428
15429 /* Likewise for last byte. */
15430
15431 if (byte_tmp)
15432 {
15433 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15434 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15435 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15436 dstoffset++;
15437 }
15438
15439 gcc_assert (remaining == 0 && srcoffset == dstoffset);
15440 }
15441
15442 /* From mips_adjust_block_mem:
15443
15444 Helper function for doing a loop-based block operation on memory
15445 reference MEM. Each iteration of the loop will operate on LENGTH
15446 bytes of MEM.
15447
15448 Create a new base register for use within the loop and point it to
15449 the start of MEM. Create a new memory reference that uses this
15450 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15451
15452 static void
15453 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15454 rtx *loop_mem)
15455 {
15456 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15457
15458 /* Although the new mem does not refer to a known location,
15459 it does keep up to LENGTH bytes of alignment. */
15460 *loop_mem = change_address (mem, BLKmode, *loop_reg);
15461 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15462 }
15463
15464 /* From mips_block_move_loop:
15465
15466 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15467 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15468 the memory regions do not overlap. */
15469
15470 static void
15471 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15472 unsigned int interleave_factor,
15473 HOST_WIDE_INT bytes_per_iter)
15474 {
15475 rtx src_reg, dest_reg, final_src, test;
15476 HOST_WIDE_INT leftover;
15477
15478 leftover = length % bytes_per_iter;
15479 length -= leftover;
15480
15481 /* Create registers and memory references for use within the loop. */
15482 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15483 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15484
15485 /* Calculate the value that SRC_REG should have after the last iteration of
15486 the loop. */
15487 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15488 0, 0, OPTAB_WIDEN);
15489
15490 /* Emit the start of the loop. */
15491 rtx_code_label *label = gen_label_rtx ();
15492 emit_label (label);
15493
15494 /* Emit the loop body. */
15495 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15496 interleave_factor);
15497
15498 /* Move on to the next block. */
15499 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15500 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15501
15502 /* Emit the loop condition. */
15503 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15504 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15505
15506 /* Mop up any left-over bytes. */
15507 if (leftover)
15508 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15509 }
15510
15511 /* Emit a block move when either the source or destination is unaligned (not
15512 aligned to a four-byte boundary). This may need further tuning depending on
15513 core type, optimize_size setting, etc. */
15514
15515 static int
15516 arm_cpymemqi_unaligned (rtx *operands)
15517 {
15518 HOST_WIDE_INT length = INTVAL (operands[2]);
15519
15520 if (optimize_size)
15521 {
15522 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15523 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15524 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15525 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15526 or dst_aligned though: allow more interleaving in those cases since the
15527 resulting code can be smaller. */
15528 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15529 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15530
15531 if (length > 12)
15532 arm_block_move_unaligned_loop (operands[0], operands[1], length,
15533 interleave_factor, bytes_per_iter);
15534 else
15535 arm_block_move_unaligned_straight (operands[0], operands[1], length,
15536 interleave_factor);
15537 }
15538 else
15539 {
15540 /* Note that the loop created by arm_block_move_unaligned_loop may be
15541 subject to loop unrolling, which makes tuning this condition a little
15542 redundant. */
15543 if (length > 32)
15544 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15545 else
15546 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15547 }
15548
15549 return 1;
15550 }
15551
15552 int
15553 arm_gen_cpymemqi (rtx *operands)
15554 {
15555 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15556 HOST_WIDE_INT srcoffset, dstoffset;
15557 rtx src, dst, srcbase, dstbase;
15558 rtx part_bytes_reg = NULL;
15559 rtx mem;
15560
15561 if (!CONST_INT_P (operands[2])
15562 || !CONST_INT_P (operands[3])
15563 || INTVAL (operands[2]) > 64)
15564 return 0;
15565
15566 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15567 return arm_cpymemqi_unaligned (operands);
15568
15569 if (INTVAL (operands[3]) & 3)
15570 return 0;
15571
15572 dstbase = operands[0];
15573 srcbase = operands[1];
15574
15575 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15576 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15577
15578 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15579 out_words_to_go = INTVAL (operands[2]) / 4;
15580 last_bytes = INTVAL (operands[2]) & 3;
15581 dstoffset = srcoffset = 0;
15582
15583 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15584 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15585
15586 while (in_words_to_go >= 2)
15587 {
15588 if (in_words_to_go > 4)
15589 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15590 TRUE, srcbase, &srcoffset));
15591 else
15592 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15593 src, FALSE, srcbase,
15594 &srcoffset));
15595
15596 if (out_words_to_go)
15597 {
15598 if (out_words_to_go > 4)
15599 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15600 TRUE, dstbase, &dstoffset));
15601 else if (out_words_to_go != 1)
15602 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15603 out_words_to_go, dst,
15604 (last_bytes == 0
15605 ? FALSE : TRUE),
15606 dstbase, &dstoffset));
15607 else
15608 {
15609 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15610 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15611 if (last_bytes != 0)
15612 {
15613 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15614 dstoffset += 4;
15615 }
15616 }
15617 }
15618
15619 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15620 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15621 }
15622
15623 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15624 if (out_words_to_go)
15625 {
15626 rtx sreg;
15627
15628 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15629 sreg = copy_to_reg (mem);
15630
15631 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15632 emit_move_insn (mem, sreg);
15633 in_words_to_go--;
15634
15635 gcc_assert (!in_words_to_go); /* Sanity check */
15636 }
15637
15638 if (in_words_to_go)
15639 {
15640 gcc_assert (in_words_to_go > 0);
15641
15642 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15643 part_bytes_reg = copy_to_mode_reg (SImode, mem);
15644 }
15645
15646 gcc_assert (!last_bytes || part_bytes_reg);
15647
15648 if (BYTES_BIG_ENDIAN && last_bytes)
15649 {
15650 rtx tmp = gen_reg_rtx (SImode);
15651
15652 /* The bytes we want are in the top end of the word. */
15653 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15654 GEN_INT (8 * (4 - last_bytes))));
15655 part_bytes_reg = tmp;
15656
15657 while (last_bytes)
15658 {
15659 mem = adjust_automodify_address (dstbase, QImode,
15660 plus_constant (Pmode, dst,
15661 last_bytes - 1),
15662 dstoffset + last_bytes - 1);
15663 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15664
15665 if (--last_bytes)
15666 {
15667 tmp = gen_reg_rtx (SImode);
15668 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15669 part_bytes_reg = tmp;
15670 }
15671 }
15672
15673 }
15674 else
15675 {
15676 if (last_bytes > 1)
15677 {
15678 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15679 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15680 last_bytes -= 2;
15681 if (last_bytes)
15682 {
15683 rtx tmp = gen_reg_rtx (SImode);
15684 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15685 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15686 part_bytes_reg = tmp;
15687 dstoffset += 2;
15688 }
15689 }
15690
15691 if (last_bytes)
15692 {
15693 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15694 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15695 }
15696 }
15697
15698 return 1;
15699 }
15700
15701 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15702 by mode size. */
15703 inline static rtx
15704 next_consecutive_mem (rtx mem)
15705 {
15706 machine_mode mode = GET_MODE (mem);
15707 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15708 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15709
15710 return adjust_automodify_address (mem, mode, addr, offset);
15711 }
15712
15713 /* Copy using LDRD/STRD instructions whenever possible.
15714 Returns true upon success. */
15715 bool
15716 gen_cpymem_ldrd_strd (rtx *operands)
15717 {
15718 unsigned HOST_WIDE_INT len;
15719 HOST_WIDE_INT align;
15720 rtx src, dst, base;
15721 rtx reg0;
15722 bool src_aligned, dst_aligned;
15723 bool src_volatile, dst_volatile;
15724
15725 gcc_assert (CONST_INT_P (operands[2]));
15726 gcc_assert (CONST_INT_P (operands[3]));
15727
15728 len = UINTVAL (operands[2]);
15729 if (len > 64)
15730 return false;
15731
15732 /* Maximum alignment we can assume for both src and dst buffers. */
15733 align = INTVAL (operands[3]);
15734
15735 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15736 return false;
15737
15738 /* Place src and dst addresses in registers
15739 and update the corresponding mem rtx. */
15740 dst = operands[0];
15741 dst_volatile = MEM_VOLATILE_P (dst);
15742 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15743 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15744 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15745
15746 src = operands[1];
15747 src_volatile = MEM_VOLATILE_P (src);
15748 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15749 base = copy_to_mode_reg (SImode, XEXP (src, 0));
15750 src = adjust_automodify_address (src, VOIDmode, base, 0);
15751
15752 if (!unaligned_access && !(src_aligned && dst_aligned))
15753 return false;
15754
15755 if (src_volatile || dst_volatile)
15756 return false;
15757
15758 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15759 if (!(dst_aligned || src_aligned))
15760 return arm_gen_cpymemqi (operands);
15761
15762 /* If the either src or dst is unaligned we'll be accessing it as pairs
15763 of unaligned SImode accesses. Otherwise we can generate DImode
15764 ldrd/strd instructions. */
15765 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15766 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15767
15768 while (len >= 8)
15769 {
15770 len -= 8;
15771 reg0 = gen_reg_rtx (DImode);
15772 rtx first_reg = NULL_RTX;
15773 rtx second_reg = NULL_RTX;
15774
15775 if (!src_aligned || !dst_aligned)
15776 {
15777 if (BYTES_BIG_ENDIAN)
15778 {
15779 second_reg = gen_lowpart (SImode, reg0);
15780 first_reg = gen_highpart_mode (SImode, DImode, reg0);
15781 }
15782 else
15783 {
15784 first_reg = gen_lowpart (SImode, reg0);
15785 second_reg = gen_highpart_mode (SImode, DImode, reg0);
15786 }
15787 }
15788 if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15789 emit_move_insn (reg0, src);
15790 else if (src_aligned)
15791 emit_insn (gen_unaligned_loaddi (reg0, src));
15792 else
15793 {
15794 emit_insn (gen_unaligned_loadsi (first_reg, src));
15795 src = next_consecutive_mem (src);
15796 emit_insn (gen_unaligned_loadsi (second_reg, src));
15797 }
15798
15799 if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15800 emit_move_insn (dst, reg0);
15801 else if (dst_aligned)
15802 emit_insn (gen_unaligned_storedi (dst, reg0));
15803 else
15804 {
15805 emit_insn (gen_unaligned_storesi (dst, first_reg));
15806 dst = next_consecutive_mem (dst);
15807 emit_insn (gen_unaligned_storesi (dst, second_reg));
15808 }
15809
15810 src = next_consecutive_mem (src);
15811 dst = next_consecutive_mem (dst);
15812 }
15813
15814 gcc_assert (len < 8);
15815 if (len >= 4)
15816 {
15817 /* More than a word but less than a double-word to copy. Copy a word. */
15818 reg0 = gen_reg_rtx (SImode);
15819 src = adjust_address (src, SImode, 0);
15820 dst = adjust_address (dst, SImode, 0);
15821 if (src_aligned)
15822 emit_move_insn (reg0, src);
15823 else
15824 emit_insn (gen_unaligned_loadsi (reg0, src));
15825
15826 if (dst_aligned)
15827 emit_move_insn (dst, reg0);
15828 else
15829 emit_insn (gen_unaligned_storesi (dst, reg0));
15830
15831 src = next_consecutive_mem (src);
15832 dst = next_consecutive_mem (dst);
15833 len -= 4;
15834 }
15835
15836 if (len == 0)
15837 return true;
15838
15839 /* Copy the remaining bytes. */
15840 if (len >= 2)
15841 {
15842 dst = adjust_address (dst, HImode, 0);
15843 src = adjust_address (src, HImode, 0);
15844 reg0 = gen_reg_rtx (SImode);
15845 if (src_aligned)
15846 emit_insn (gen_zero_extendhisi2 (reg0, src));
15847 else
15848 emit_insn (gen_unaligned_loadhiu (reg0, src));
15849
15850 if (dst_aligned)
15851 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15852 else
15853 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15854
15855 src = next_consecutive_mem (src);
15856 dst = next_consecutive_mem (dst);
15857 if (len == 2)
15858 return true;
15859 }
15860
15861 dst = adjust_address (dst, QImode, 0);
15862 src = adjust_address (src, QImode, 0);
15863 reg0 = gen_reg_rtx (QImode);
15864 emit_move_insn (reg0, src);
15865 emit_move_insn (dst, reg0);
15866 return true;
15867 }
15868
15869 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15870 into its component 32-bit subregs. OP2 may be an immediate
15871 constant and we want to simplify it in that case. */
15872 void
15873 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15874 rtx *lo_op2, rtx *hi_op2)
15875 {
15876 *lo_op1 = gen_lowpart (SImode, op1);
15877 *hi_op1 = gen_highpart (SImode, op1);
15878 *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15879 subreg_lowpart_offset (SImode, DImode));
15880 *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15881 subreg_highpart_offset (SImode, DImode));
15882 }
15883
15884 /* Select a dominance comparison mode if possible for a test of the general
15885 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15886 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15887 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15888 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15889 In all cases OP will be either EQ or NE, but we don't need to know which
15890 here. If we are unable to support a dominance comparison we return
15891 CC mode. This will then fail to match for the RTL expressions that
15892 generate this call. */
15893 machine_mode
15894 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15895 {
15896 enum rtx_code cond1, cond2;
15897 int swapped = 0;
15898
15899 /* Currently we will probably get the wrong result if the individual
15900 comparisons are not simple. This also ensures that it is safe to
15901 reverse a comparison if necessary. */
15902 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15903 != CCmode)
15904 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15905 != CCmode))
15906 return CCmode;
15907
15908 /* The if_then_else variant of this tests the second condition if the
15909 first passes, but is true if the first fails. Reverse the first
15910 condition to get a true "inclusive-or" expression. */
15911 if (cond_or == DOM_CC_NX_OR_Y)
15912 cond1 = reverse_condition (cond1);
15913
15914 /* If the comparisons are not equal, and one doesn't dominate the other,
15915 then we can't do this. */
15916 if (cond1 != cond2
15917 && !comparison_dominates_p (cond1, cond2)
15918 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15919 return CCmode;
15920
15921 if (swapped)
15922 std::swap (cond1, cond2);
15923
15924 switch (cond1)
15925 {
15926 case EQ:
15927 if (cond_or == DOM_CC_X_AND_Y)
15928 return CC_DEQmode;
15929
15930 switch (cond2)
15931 {
15932 case EQ: return CC_DEQmode;
15933 case LE: return CC_DLEmode;
15934 case LEU: return CC_DLEUmode;
15935 case GE: return CC_DGEmode;
15936 case GEU: return CC_DGEUmode;
15937 default: gcc_unreachable ();
15938 }
15939
15940 case LT:
15941 if (cond_or == DOM_CC_X_AND_Y)
15942 return CC_DLTmode;
15943
15944 switch (cond2)
15945 {
15946 case LT:
15947 return CC_DLTmode;
15948 case LE:
15949 return CC_DLEmode;
15950 case NE:
15951 return CC_DNEmode;
15952 default:
15953 gcc_unreachable ();
15954 }
15955
15956 case GT:
15957 if (cond_or == DOM_CC_X_AND_Y)
15958 return CC_DGTmode;
15959
15960 switch (cond2)
15961 {
15962 case GT:
15963 return CC_DGTmode;
15964 case GE:
15965 return CC_DGEmode;
15966 case NE:
15967 return CC_DNEmode;
15968 default:
15969 gcc_unreachable ();
15970 }
15971
15972 case LTU:
15973 if (cond_or == DOM_CC_X_AND_Y)
15974 return CC_DLTUmode;
15975
15976 switch (cond2)
15977 {
15978 case LTU:
15979 return CC_DLTUmode;
15980 case LEU:
15981 return CC_DLEUmode;
15982 case NE:
15983 return CC_DNEmode;
15984 default:
15985 gcc_unreachable ();
15986 }
15987
15988 case GTU:
15989 if (cond_or == DOM_CC_X_AND_Y)
15990 return CC_DGTUmode;
15991
15992 switch (cond2)
15993 {
15994 case GTU:
15995 return CC_DGTUmode;
15996 case GEU:
15997 return CC_DGEUmode;
15998 case NE:
15999 return CC_DNEmode;
16000 default:
16001 gcc_unreachable ();
16002 }
16003
16004 /* The remaining cases only occur when both comparisons are the
16005 same. */
16006 case NE:
16007 gcc_assert (cond1 == cond2);
16008 return CC_DNEmode;
16009
16010 case LE:
16011 gcc_assert (cond1 == cond2);
16012 return CC_DLEmode;
16013
16014 case GE:
16015 gcc_assert (cond1 == cond2);
16016 return CC_DGEmode;
16017
16018 case LEU:
16019 gcc_assert (cond1 == cond2);
16020 return CC_DLEUmode;
16021
16022 case GEU:
16023 gcc_assert (cond1 == cond2);
16024 return CC_DGEUmode;
16025
16026 default:
16027 gcc_unreachable ();
16028 }
16029 }
16030
16031 machine_mode
16032 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
16033 {
16034 /* All floating point compares return CCFP if it is an equality
16035 comparison, and CCFPE otherwise. */
16036 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16037 {
16038 switch (op)
16039 {
16040 case EQ:
16041 case NE:
16042 case UNORDERED:
16043 case ORDERED:
16044 case UNLT:
16045 case UNLE:
16046 case UNGT:
16047 case UNGE:
16048 case UNEQ:
16049 case LTGT:
16050 return CCFPmode;
16051
16052 case LT:
16053 case LE:
16054 case GT:
16055 case GE:
16056 return CCFPEmode;
16057
16058 default:
16059 gcc_unreachable ();
16060 }
16061 }
16062
16063 /* A compare with a shifted operand. Because of canonicalization, the
16064 comparison will have to be swapped when we emit the assembler. */
16065 if (GET_MODE (y) == SImode
16066 && (REG_P (y) || (SUBREG_P (y)))
16067 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16068 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
16069 || GET_CODE (x) == ROTATERT))
16070 return CC_SWPmode;
16071
16072 /* A widened compare of the sum of a value plus a carry against a
16073 constant. This is a representation of RSC. We want to swap the
16074 result of the comparison at output. Not valid if the Z bit is
16075 needed. */
16076 if (GET_MODE (x) == DImode
16077 && GET_CODE (x) == PLUS
16078 && arm_borrow_operation (XEXP (x, 1), DImode)
16079 && CONST_INT_P (y)
16080 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16081 && (op == LE || op == GT))
16082 || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
16083 && (op == LEU || op == GTU))))
16084 return CC_SWPmode;
16085
16086 /* If X is a constant we want to use CC_RSBmode. This is
16087 non-canonical, but arm_gen_compare_reg uses this to generate the
16088 correct canonical form. */
16089 if (GET_MODE (y) == SImode
16090 && (REG_P (y) || SUBREG_P (y))
16091 && CONST_INT_P (x))
16092 return CC_RSBmode;
16093
16094 /* This operation is performed swapped, but since we only rely on the Z
16095 flag we don't need an additional mode. */
16096 if (GET_MODE (y) == SImode
16097 && (REG_P (y) || (SUBREG_P (y)))
16098 && GET_CODE (x) == NEG
16099 && (op == EQ || op == NE))
16100 return CC_Zmode;
16101
16102 /* This is a special case that is used by combine to allow a
16103 comparison of a shifted byte load to be split into a zero-extend
16104 followed by a comparison of the shifted integer (only valid for
16105 equalities and unsigned inequalities). */
16106 if (GET_MODE (x) == SImode
16107 && GET_CODE (x) == ASHIFT
16108 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
16109 && GET_CODE (XEXP (x, 0)) == SUBREG
16110 && MEM_P (SUBREG_REG (XEXP (x, 0)))
16111 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
16112 && (op == EQ || op == NE
16113 || op == GEU || op == GTU || op == LTU || op == LEU)
16114 && CONST_INT_P (y))
16115 return CC_Zmode;
16116
16117 /* A construct for a conditional compare, if the false arm contains
16118 0, then both conditions must be true, otherwise either condition
16119 must be true. Not all conditions are possible, so CCmode is
16120 returned if it can't be done. */
16121 if (GET_CODE (x) == IF_THEN_ELSE
16122 && (XEXP (x, 2) == const0_rtx
16123 || XEXP (x, 2) == const1_rtx)
16124 && COMPARISON_P (XEXP (x, 0))
16125 && COMPARISON_P (XEXP (x, 1)))
16126 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16127 INTVAL (XEXP (x, 2)));
16128
16129 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
16130 if (GET_CODE (x) == AND
16131 && (op == EQ || op == NE)
16132 && COMPARISON_P (XEXP (x, 0))
16133 && COMPARISON_P (XEXP (x, 1)))
16134 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16135 DOM_CC_X_AND_Y);
16136
16137 if (GET_CODE (x) == IOR
16138 && (op == EQ || op == NE)
16139 && COMPARISON_P (XEXP (x, 0))
16140 && COMPARISON_P (XEXP (x, 1)))
16141 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16142 DOM_CC_X_OR_Y);
16143
16144 /* An operation (on Thumb) where we want to test for a single bit.
16145 This is done by shifting that bit up into the top bit of a
16146 scratch register; we can then branch on the sign bit. */
16147 if (TARGET_THUMB1
16148 && GET_MODE (x) == SImode
16149 && (op == EQ || op == NE)
16150 && GET_CODE (x) == ZERO_EXTRACT
16151 && XEXP (x, 1) == const1_rtx)
16152 return CC_Nmode;
16153
16154 /* An operation that sets the condition codes as a side-effect, the
16155 V flag is not set correctly, so we can only use comparisons where
16156 this doesn't matter. (For LT and GE we can use "mi" and "pl"
16157 instead.) */
16158 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
16159 if (GET_MODE (x) == SImode
16160 && y == const0_rtx
16161 && (op == EQ || op == NE || op == LT || op == GE)
16162 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
16163 || GET_CODE (x) == AND || GET_CODE (x) == IOR
16164 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
16165 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
16166 || GET_CODE (x) == LSHIFTRT
16167 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16168 || GET_CODE (x) == ROTATERT
16169 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
16170 return CC_NZmode;
16171
16172 /* A comparison of ~reg with a const is really a special
16173 canoncialization of compare (~const, reg), which is a reverse
16174 subtract operation. We may not get here if CONST is 0, but that
16175 doesn't matter because ~0 isn't a valid immediate for RSB. */
16176 if (GET_MODE (x) == SImode
16177 && GET_CODE (x) == NOT
16178 && CONST_INT_P (y))
16179 return CC_RSBmode;
16180
16181 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
16182 return CC_Zmode;
16183
16184 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
16185 && GET_CODE (x) == PLUS
16186 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
16187 return CC_Cmode;
16188
16189 if (GET_MODE (x) == DImode
16190 && GET_CODE (x) == PLUS
16191 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
16192 && CONST_INT_P (y)
16193 && UINTVAL (y) == 0x800000000
16194 && (op == GEU || op == LTU))
16195 return CC_ADCmode;
16196
16197 if (GET_MODE (x) == DImode
16198 && (op == GE || op == LT)
16199 && GET_CODE (x) == SIGN_EXTEND
16200 && ((GET_CODE (y) == PLUS
16201 && arm_borrow_operation (XEXP (y, 0), DImode))
16202 || arm_borrow_operation (y, DImode)))
16203 return CC_NVmode;
16204
16205 if (GET_MODE (x) == DImode
16206 && (op == GEU || op == LTU)
16207 && GET_CODE (x) == ZERO_EXTEND
16208 && ((GET_CODE (y) == PLUS
16209 && arm_borrow_operation (XEXP (y, 0), DImode))
16210 || arm_borrow_operation (y, DImode)))
16211 return CC_Bmode;
16212
16213 if (GET_MODE (x) == DImode
16214 && (op == EQ || op == NE)
16215 && (GET_CODE (x) == PLUS
16216 || GET_CODE (x) == MINUS)
16217 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16218 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
16219 && GET_CODE (y) == SIGN_EXTEND
16220 && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
16221 return CC_Vmode;
16222
16223 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
16224 return GET_MODE (x);
16225
16226 return CCmode;
16227 }
16228
16229 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
16230 the sequence of instructions needed to generate a suitable condition
16231 code register. Return the CC register result. */
16232 static rtx
16233 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16234 {
16235 machine_mode mode;
16236 rtx cc_reg;
16237
16238 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
16239 gcc_assert (TARGET_32BIT);
16240 gcc_assert (!CONST_INT_P (x));
16241
16242 rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
16243 subreg_lowpart_offset (SImode, DImode));
16244 rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
16245 subreg_highpart_offset (SImode, DImode));
16246 rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
16247 subreg_lowpart_offset (SImode, DImode));
16248 rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
16249 subreg_highpart_offset (SImode, DImode));
16250 switch (code)
16251 {
16252 case EQ:
16253 case NE:
16254 {
16255 if (y_lo == const0_rtx || y_hi == const0_rtx)
16256 {
16257 if (y_lo != const0_rtx)
16258 {
16259 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16260
16261 gcc_assert (y_hi == const0_rtx);
16262 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16263 if (!arm_add_operand (y_lo, SImode))
16264 y_lo = force_reg (SImode, y_lo);
16265 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16266 x_lo = scratch2;
16267 }
16268 else if (y_hi != const0_rtx)
16269 {
16270 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16271
16272 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16273 if (!arm_add_operand (y_hi, SImode))
16274 y_hi = force_reg (SImode, y_hi);
16275 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16276 x_hi = scratch2;
16277 }
16278
16279 if (!scratch)
16280 {
16281 gcc_assert (!reload_completed);
16282 scratch = gen_rtx_SCRATCH (SImode);
16283 }
16284
16285 rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16286 cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16287
16288 rtx set
16289 = gen_rtx_SET (cc_reg,
16290 gen_rtx_COMPARE (CC_NZmode,
16291 gen_rtx_IOR (SImode, x_lo, x_hi),
16292 const0_rtx));
16293 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16294 clobber)));
16295 return cc_reg;
16296 }
16297
16298 if (!arm_add_operand (y_lo, SImode))
16299 y_lo = force_reg (SImode, y_lo);
16300
16301 if (!arm_add_operand (y_hi, SImode))
16302 y_hi = force_reg (SImode, y_hi);
16303
16304 rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16305 rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16306 rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16307 mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16308 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16309
16310 emit_insn (gen_rtx_SET (cc_reg,
16311 gen_rtx_COMPARE (mode, conjunction,
16312 const0_rtx)));
16313 return cc_reg;
16314 }
16315
16316 case LT:
16317 case GE:
16318 {
16319 if (y_lo == const0_rtx)
16320 {
16321 /* If the low word of y is 0, then this is simply a normal
16322 compare of the upper words. */
16323 if (!arm_add_operand (y_hi, SImode))
16324 y_hi = force_reg (SImode, y_hi);
16325
16326 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16327 }
16328
16329 if (!arm_add_operand (y_lo, SImode))
16330 y_lo = force_reg (SImode, y_lo);
16331
16332 rtx cmp1
16333 = gen_rtx_LTU (DImode,
16334 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16335 const0_rtx);
16336
16337 if (!scratch)
16338 scratch = gen_rtx_SCRATCH (SImode);
16339
16340 if (!arm_not_operand (y_hi, SImode))
16341 y_hi = force_reg (SImode, y_hi);
16342
16343 rtx_insn *insn;
16344 if (y_hi == const0_rtx)
16345 insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16346 cmp1));
16347 else if (CONST_INT_P (y_hi))
16348 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16349 y_hi, cmp1));
16350 else
16351 insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16352 cmp1));
16353 return SET_DEST (single_set (insn));
16354 }
16355
16356 case LE:
16357 case GT:
16358 {
16359 /* During expansion, we only expect to get here if y is a
16360 constant that we want to handle, otherwise we should have
16361 swapped the operands already. */
16362 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16363
16364 if (!const_ok_for_arm (INTVAL (y_lo)))
16365 y_lo = force_reg (SImode, y_lo);
16366
16367 /* Perform a reverse subtract and compare. */
16368 rtx cmp1
16369 = gen_rtx_LTU (DImode,
16370 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16371 const0_rtx);
16372 rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16373 x_hi, cmp1));
16374 return SET_DEST (single_set (insn));
16375 }
16376
16377 case LTU:
16378 case GEU:
16379 {
16380 if (y_lo == const0_rtx)
16381 {
16382 /* If the low word of y is 0, then this is simply a normal
16383 compare of the upper words. */
16384 if (!arm_add_operand (y_hi, SImode))
16385 y_hi = force_reg (SImode, y_hi);
16386
16387 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16388 }
16389
16390 if (!arm_add_operand (y_lo, SImode))
16391 y_lo = force_reg (SImode, y_lo);
16392
16393 rtx cmp1
16394 = gen_rtx_LTU (DImode,
16395 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16396 const0_rtx);
16397
16398 if (!scratch)
16399 scratch = gen_rtx_SCRATCH (SImode);
16400 if (!arm_not_operand (y_hi, SImode))
16401 y_hi = force_reg (SImode, y_hi);
16402
16403 rtx_insn *insn;
16404 if (y_hi == const0_rtx)
16405 insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16406 cmp1));
16407 else if (CONST_INT_P (y_hi))
16408 {
16409 /* Constant is viewed as unsigned when zero-extended. */
16410 y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16411 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16412 y_hi, cmp1));
16413 }
16414 else
16415 insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16416 cmp1));
16417 return SET_DEST (single_set (insn));
16418 }
16419
16420 case LEU:
16421 case GTU:
16422 {
16423 /* During expansion, we only expect to get here if y is a
16424 constant that we want to handle, otherwise we should have
16425 swapped the operands already. */
16426 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16427
16428 if (!const_ok_for_arm (INTVAL (y_lo)))
16429 y_lo = force_reg (SImode, y_lo);
16430
16431 /* Perform a reverse subtract and compare. */
16432 rtx cmp1
16433 = gen_rtx_LTU (DImode,
16434 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16435 const0_rtx);
16436 y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16437 rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16438 x_hi, cmp1));
16439 return SET_DEST (single_set (insn));
16440 }
16441
16442 default:
16443 gcc_unreachable ();
16444 }
16445 }
16446
16447 /* X and Y are two things to compare using CODE. Emit the compare insn and
16448 return the rtx for register 0 in the proper mode. */
16449 rtx
16450 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16451 {
16452 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16453 return arm_gen_dicompare_reg (code, x, y, scratch);
16454
16455 machine_mode mode = SELECT_CC_MODE (code, x, y);
16456 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16457 if (mode == CC_RSBmode)
16458 {
16459 if (!scratch)
16460 scratch = gen_rtx_SCRATCH (SImode);
16461 emit_insn (gen_rsb_imm_compare_scratch (scratch,
16462 GEN_INT (~UINTVAL (x)), y));
16463 }
16464 else
16465 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16466
16467 return cc_reg;
16468 }
16469
16470 /* Generate a sequence of insns that will generate the correct return
16471 address mask depending on the physical architecture that the program
16472 is running on. */
16473 rtx
16474 arm_gen_return_addr_mask (void)
16475 {
16476 rtx reg = gen_reg_rtx (Pmode);
16477
16478 emit_insn (gen_return_addr_mask (reg));
16479 return reg;
16480 }
16481
16482 void
16483 arm_reload_in_hi (rtx *operands)
16484 {
16485 rtx ref = operands[1];
16486 rtx base, scratch;
16487 HOST_WIDE_INT offset = 0;
16488
16489 if (SUBREG_P (ref))
16490 {
16491 offset = SUBREG_BYTE (ref);
16492 ref = SUBREG_REG (ref);
16493 }
16494
16495 if (REG_P (ref))
16496 {
16497 /* We have a pseudo which has been spilt onto the stack; there
16498 are two cases here: the first where there is a simple
16499 stack-slot replacement and a second where the stack-slot is
16500 out of range, or is used as a subreg. */
16501 if (reg_equiv_mem (REGNO (ref)))
16502 {
16503 ref = reg_equiv_mem (REGNO (ref));
16504 base = find_replacement (&XEXP (ref, 0));
16505 }
16506 else
16507 /* The slot is out of range, or was dressed up in a SUBREG. */
16508 base = reg_equiv_address (REGNO (ref));
16509
16510 /* PR 62554: If there is no equivalent memory location then just move
16511 the value as an SImode register move. This happens when the target
16512 architecture variant does not have an HImode register move. */
16513 if (base == NULL)
16514 {
16515 gcc_assert (REG_P (operands[0]));
16516 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16517 gen_rtx_SUBREG (SImode, ref, 0)));
16518 return;
16519 }
16520 }
16521 else
16522 base = find_replacement (&XEXP (ref, 0));
16523
16524 /* Handle the case where the address is too complex to be offset by 1. */
16525 if (GET_CODE (base) == MINUS
16526 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16527 {
16528 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16529
16530 emit_set_insn (base_plus, base);
16531 base = base_plus;
16532 }
16533 else if (GET_CODE (base) == PLUS)
16534 {
16535 /* The addend must be CONST_INT, or we would have dealt with it above. */
16536 HOST_WIDE_INT hi, lo;
16537
16538 offset += INTVAL (XEXP (base, 1));
16539 base = XEXP (base, 0);
16540
16541 /* Rework the address into a legal sequence of insns. */
16542 /* Valid range for lo is -4095 -> 4095 */
16543 lo = (offset >= 0
16544 ? (offset & 0xfff)
16545 : -((-offset) & 0xfff));
16546
16547 /* Corner case, if lo is the max offset then we would be out of range
16548 once we have added the additional 1 below, so bump the msb into the
16549 pre-loading insn(s). */
16550 if (lo == 4095)
16551 lo &= 0x7ff;
16552
16553 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16554 ^ (HOST_WIDE_INT) 0x80000000)
16555 - (HOST_WIDE_INT) 0x80000000);
16556
16557 gcc_assert (hi + lo == offset);
16558
16559 if (hi != 0)
16560 {
16561 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16562
16563 /* Get the base address; addsi3 knows how to handle constants
16564 that require more than one insn. */
16565 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16566 base = base_plus;
16567 offset = lo;
16568 }
16569 }
16570
16571 /* Operands[2] may overlap operands[0] (though it won't overlap
16572 operands[1]), that's why we asked for a DImode reg -- so we can
16573 use the bit that does not overlap. */
16574 if (REGNO (operands[2]) == REGNO (operands[0]))
16575 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16576 else
16577 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16578
16579 emit_insn (gen_zero_extendqisi2 (scratch,
16580 gen_rtx_MEM (QImode,
16581 plus_constant (Pmode, base,
16582 offset))));
16583 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16584 gen_rtx_MEM (QImode,
16585 plus_constant (Pmode, base,
16586 offset + 1))));
16587 if (!BYTES_BIG_ENDIAN)
16588 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16589 gen_rtx_IOR (SImode,
16590 gen_rtx_ASHIFT
16591 (SImode,
16592 gen_rtx_SUBREG (SImode, operands[0], 0),
16593 GEN_INT (8)),
16594 scratch));
16595 else
16596 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16597 gen_rtx_IOR (SImode,
16598 gen_rtx_ASHIFT (SImode, scratch,
16599 GEN_INT (8)),
16600 gen_rtx_SUBREG (SImode, operands[0], 0)));
16601 }
16602
16603 /* Handle storing a half-word to memory during reload by synthesizing as two
16604 byte stores. Take care not to clobber the input values until after we
16605 have moved them somewhere safe. This code assumes that if the DImode
16606 scratch in operands[2] overlaps either the input value or output address
16607 in some way, then that value must die in this insn (we absolutely need
16608 two scratch registers for some corner cases). */
16609 void
16610 arm_reload_out_hi (rtx *operands)
16611 {
16612 rtx ref = operands[0];
16613 rtx outval = operands[1];
16614 rtx base, scratch;
16615 HOST_WIDE_INT offset = 0;
16616
16617 if (SUBREG_P (ref))
16618 {
16619 offset = SUBREG_BYTE (ref);
16620 ref = SUBREG_REG (ref);
16621 }
16622
16623 if (REG_P (ref))
16624 {
16625 /* We have a pseudo which has been spilt onto the stack; there
16626 are two cases here: the first where there is a simple
16627 stack-slot replacement and a second where the stack-slot is
16628 out of range, or is used as a subreg. */
16629 if (reg_equiv_mem (REGNO (ref)))
16630 {
16631 ref = reg_equiv_mem (REGNO (ref));
16632 base = find_replacement (&XEXP (ref, 0));
16633 }
16634 else
16635 /* The slot is out of range, or was dressed up in a SUBREG. */
16636 base = reg_equiv_address (REGNO (ref));
16637
16638 /* PR 62254: If there is no equivalent memory location then just move
16639 the value as an SImode register move. This happens when the target
16640 architecture variant does not have an HImode register move. */
16641 if (base == NULL)
16642 {
16643 gcc_assert (REG_P (outval) || SUBREG_P (outval));
16644
16645 if (REG_P (outval))
16646 {
16647 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16648 gen_rtx_SUBREG (SImode, outval, 0)));
16649 }
16650 else /* SUBREG_P (outval) */
16651 {
16652 if (GET_MODE (SUBREG_REG (outval)) == SImode)
16653 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16654 SUBREG_REG (outval)));
16655 else
16656 /* FIXME: Handle other cases ? */
16657 gcc_unreachable ();
16658 }
16659 return;
16660 }
16661 }
16662 else
16663 base = find_replacement (&XEXP (ref, 0));
16664
16665 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16666
16667 /* Handle the case where the address is too complex to be offset by 1. */
16668 if (GET_CODE (base) == MINUS
16669 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16670 {
16671 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16672
16673 /* Be careful not to destroy OUTVAL. */
16674 if (reg_overlap_mentioned_p (base_plus, outval))
16675 {
16676 /* Updating base_plus might destroy outval, see if we can
16677 swap the scratch and base_plus. */
16678 if (!reg_overlap_mentioned_p (scratch, outval))
16679 std::swap (scratch, base_plus);
16680 else
16681 {
16682 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16683
16684 /* Be conservative and copy OUTVAL into the scratch now,
16685 this should only be necessary if outval is a subreg
16686 of something larger than a word. */
16687 /* XXX Might this clobber base? I can't see how it can,
16688 since scratch is known to overlap with OUTVAL, and
16689 must be wider than a word. */
16690 emit_insn (gen_movhi (scratch_hi, outval));
16691 outval = scratch_hi;
16692 }
16693 }
16694
16695 emit_set_insn (base_plus, base);
16696 base = base_plus;
16697 }
16698 else if (GET_CODE (base) == PLUS)
16699 {
16700 /* The addend must be CONST_INT, or we would have dealt with it above. */
16701 HOST_WIDE_INT hi, lo;
16702
16703 offset += INTVAL (XEXP (base, 1));
16704 base = XEXP (base, 0);
16705
16706 /* Rework the address into a legal sequence of insns. */
16707 /* Valid range for lo is -4095 -> 4095 */
16708 lo = (offset >= 0
16709 ? (offset & 0xfff)
16710 : -((-offset) & 0xfff));
16711
16712 /* Corner case, if lo is the max offset then we would be out of range
16713 once we have added the additional 1 below, so bump the msb into the
16714 pre-loading insn(s). */
16715 if (lo == 4095)
16716 lo &= 0x7ff;
16717
16718 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16719 ^ (HOST_WIDE_INT) 0x80000000)
16720 - (HOST_WIDE_INT) 0x80000000);
16721
16722 gcc_assert (hi + lo == offset);
16723
16724 if (hi != 0)
16725 {
16726 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16727
16728 /* Be careful not to destroy OUTVAL. */
16729 if (reg_overlap_mentioned_p (base_plus, outval))
16730 {
16731 /* Updating base_plus might destroy outval, see if we
16732 can swap the scratch and base_plus. */
16733 if (!reg_overlap_mentioned_p (scratch, outval))
16734 std::swap (scratch, base_plus);
16735 else
16736 {
16737 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16738
16739 /* Be conservative and copy outval into scratch now,
16740 this should only be necessary if outval is a
16741 subreg of something larger than a word. */
16742 /* XXX Might this clobber base? I can't see how it
16743 can, since scratch is known to overlap with
16744 outval. */
16745 emit_insn (gen_movhi (scratch_hi, outval));
16746 outval = scratch_hi;
16747 }
16748 }
16749
16750 /* Get the base address; addsi3 knows how to handle constants
16751 that require more than one insn. */
16752 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16753 base = base_plus;
16754 offset = lo;
16755 }
16756 }
16757
16758 if (BYTES_BIG_ENDIAN)
16759 {
16760 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16761 plus_constant (Pmode, base,
16762 offset + 1)),
16763 gen_lowpart (QImode, outval)));
16764 emit_insn (gen_lshrsi3 (scratch,
16765 gen_rtx_SUBREG (SImode, outval, 0),
16766 GEN_INT (8)));
16767 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16768 offset)),
16769 gen_lowpart (QImode, scratch)));
16770 }
16771 else
16772 {
16773 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16774 offset)),
16775 gen_lowpart (QImode, outval)));
16776 emit_insn (gen_lshrsi3 (scratch,
16777 gen_rtx_SUBREG (SImode, outval, 0),
16778 GEN_INT (8)));
16779 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16780 plus_constant (Pmode, base,
16781 offset + 1)),
16782 gen_lowpart (QImode, scratch)));
16783 }
16784 }
16785
16786 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16787 (padded to the size of a word) should be passed in a register. */
16788
16789 static bool
16790 arm_must_pass_in_stack (const function_arg_info &arg)
16791 {
16792 if (TARGET_AAPCS_BASED)
16793 return must_pass_in_stack_var_size (arg);
16794 else
16795 return must_pass_in_stack_var_size_or_pad (arg);
16796 }
16797
16798
16799 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16800 byte of a stack argument has useful data. For legacy APCS ABIs we use
16801 the default. For AAPCS based ABIs small aggregate types are placed
16802 in the lowest memory address. */
16803
16804 static pad_direction
16805 arm_function_arg_padding (machine_mode mode, const_tree type)
16806 {
16807 if (!TARGET_AAPCS_BASED)
16808 return default_function_arg_padding (mode, type);
16809
16810 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16811 return PAD_DOWNWARD;
16812
16813 return PAD_UPWARD;
16814 }
16815
16816
16817 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16818 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16819 register has useful data, and return the opposite if the most
16820 significant byte does. */
16821
16822 bool
16823 arm_pad_reg_upward (machine_mode mode,
16824 tree type, int first ATTRIBUTE_UNUSED)
16825 {
16826 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16827 {
16828 /* For AAPCS, small aggregates, small fixed-point types,
16829 and small complex types are always padded upwards. */
16830 if (type)
16831 {
16832 if ((AGGREGATE_TYPE_P (type)
16833 || TREE_CODE (type) == COMPLEX_TYPE
16834 || FIXED_POINT_TYPE_P (type))
16835 && int_size_in_bytes (type) <= 4)
16836 return true;
16837 }
16838 else
16839 {
16840 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16841 && GET_MODE_SIZE (mode) <= 4)
16842 return true;
16843 }
16844 }
16845
16846 /* Otherwise, use default padding. */
16847 return !BYTES_BIG_ENDIAN;
16848 }
16849
16850 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16851 assuming that the address in the base register is word aligned. */
16852 bool
16853 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16854 {
16855 HOST_WIDE_INT max_offset;
16856
16857 /* Offset must be a multiple of 4 in Thumb mode. */
16858 if (TARGET_THUMB2 && ((offset & 3) != 0))
16859 return false;
16860
16861 if (TARGET_THUMB2)
16862 max_offset = 1020;
16863 else if (TARGET_ARM)
16864 max_offset = 255;
16865 else
16866 return false;
16867
16868 return ((offset <= max_offset) && (offset >= -max_offset));
16869 }
16870
16871 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16872 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16873 Assumes that the address in the base register RN is word aligned. Pattern
16874 guarantees that both memory accesses use the same base register,
16875 the offsets are constants within the range, and the gap between the offsets is 4.
16876 If preload complete then check that registers are legal. WBACK indicates whether
16877 address is updated. LOAD indicates whether memory access is load or store. */
16878 bool
16879 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16880 bool wback, bool load)
16881 {
16882 unsigned int t, t2, n;
16883
16884 if (!reload_completed)
16885 return true;
16886
16887 if (!offset_ok_for_ldrd_strd (offset))
16888 return false;
16889
16890 t = REGNO (rt);
16891 t2 = REGNO (rt2);
16892 n = REGNO (rn);
16893
16894 if ((TARGET_THUMB2)
16895 && ((wback && (n == t || n == t2))
16896 || (t == SP_REGNUM)
16897 || (t == PC_REGNUM)
16898 || (t2 == SP_REGNUM)
16899 || (t2 == PC_REGNUM)
16900 || (!load && (n == PC_REGNUM))
16901 || (load && (t == t2))
16902 /* Triggers Cortex-M3 LDRD errata. */
16903 || (!wback && load && fix_cm3_ldrd && (n == t))))
16904 return false;
16905
16906 if ((TARGET_ARM)
16907 && ((wback && (n == t || n == t2))
16908 || (t2 == PC_REGNUM)
16909 || (t % 2 != 0) /* First destination register is not even. */
16910 || (t2 != t + 1)
16911 /* PC can be used as base register (for offset addressing only),
16912 but it is depricated. */
16913 || (n == PC_REGNUM)))
16914 return false;
16915
16916 return true;
16917 }
16918
16919 /* Return true if a 64-bit access with alignment ALIGN and with a
16920 constant offset OFFSET from the base pointer is permitted on this
16921 architecture. */
16922 static bool
16923 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
16924 {
16925 return (unaligned_access
16926 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
16927 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
16928 }
16929
16930 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
16931 operand MEM's address contains an immediate offset from the base
16932 register and has no side effects, in which case it sets BASE,
16933 OFFSET and ALIGN accordingly. */
16934 static bool
16935 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
16936 {
16937 rtx addr;
16938
16939 gcc_assert (base != NULL && offset != NULL);
16940
16941 /* TODO: Handle more general memory operand patterns, such as
16942 PRE_DEC and PRE_INC. */
16943
16944 if (side_effects_p (mem))
16945 return false;
16946
16947 /* Can't deal with subregs. */
16948 if (SUBREG_P (mem))
16949 return false;
16950
16951 gcc_assert (MEM_P (mem));
16952
16953 *offset = const0_rtx;
16954 *align = MEM_ALIGN (mem);
16955
16956 addr = XEXP (mem, 0);
16957
16958 /* If addr isn't valid for DImode, then we can't handle it. */
16959 if (!arm_legitimate_address_p (DImode, addr,
16960 reload_in_progress || reload_completed))
16961 return false;
16962
16963 if (REG_P (addr))
16964 {
16965 *base = addr;
16966 return true;
16967 }
16968 else if (GET_CODE (addr) == PLUS)
16969 {
16970 *base = XEXP (addr, 0);
16971 *offset = XEXP (addr, 1);
16972 return (REG_P (*base) && CONST_INT_P (*offset));
16973 }
16974
16975 return false;
16976 }
16977
16978 /* Called from a peephole2 to replace two word-size accesses with a
16979 single LDRD/STRD instruction. Returns true iff we can generate a
16980 new instruction sequence. That is, both accesses use the same base
16981 register and the gap between constant offsets is 4. This function
16982 may reorder its operands to match ldrd/strd RTL templates.
16983 OPERANDS are the operands found by the peephole matcher;
16984 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16985 corresponding memory operands. LOAD indicaates whether the access
16986 is load or store. CONST_STORE indicates a store of constant
16987 integer values held in OPERANDS[4,5] and assumes that the pattern
16988 is of length 4 insn, for the purpose of checking dead registers.
16989 COMMUTE indicates that register operands may be reordered. */
16990 bool
16991 gen_operands_ldrd_strd (rtx *operands, bool load,
16992 bool const_store, bool commute)
16993 {
16994 int nops = 2;
16995 HOST_WIDE_INT offsets[2], offset, align[2];
16996 rtx base = NULL_RTX;
16997 rtx cur_base, cur_offset, tmp;
16998 int i, gap;
16999 HARD_REG_SET regset;
17000
17001 gcc_assert (!const_store || !load);
17002 /* Check that the memory references are immediate offsets from the
17003 same base register. Extract the base register, the destination
17004 registers, and the corresponding memory offsets. */
17005 for (i = 0; i < nops; i++)
17006 {
17007 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17008 &align[i]))
17009 return false;
17010
17011 if (i == 0)
17012 base = cur_base;
17013 else if (REGNO (base) != REGNO (cur_base))
17014 return false;
17015
17016 offsets[i] = INTVAL (cur_offset);
17017 if (GET_CODE (operands[i]) == SUBREG)
17018 {
17019 tmp = SUBREG_REG (operands[i]);
17020 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
17021 operands[i] = tmp;
17022 }
17023 }
17024
17025 /* Make sure there is no dependency between the individual loads. */
17026 if (load && REGNO (operands[0]) == REGNO (base))
17027 return false; /* RAW */
17028
17029 if (load && REGNO (operands[0]) == REGNO (operands[1]))
17030 return false; /* WAW */
17031
17032 /* If the same input register is used in both stores
17033 when storing different constants, try to find a free register.
17034 For example, the code
17035 mov r0, 0
17036 str r0, [r2]
17037 mov r0, 1
17038 str r0, [r2, #4]
17039 can be transformed into
17040 mov r1, 0
17041 mov r0, 1
17042 strd r1, r0, [r2]
17043 in Thumb mode assuming that r1 is free.
17044 For ARM mode do the same but only if the starting register
17045 can be made to be even. */
17046 if (const_store
17047 && REGNO (operands[0]) == REGNO (operands[1])
17048 && INTVAL (operands[4]) != INTVAL (operands[5]))
17049 {
17050 if (TARGET_THUMB2)
17051 {
17052 CLEAR_HARD_REG_SET (regset);
17053 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17054 if (tmp == NULL_RTX)
17055 return false;
17056
17057 /* Use the new register in the first load to ensure that
17058 if the original input register is not dead after peephole,
17059 then it will have the correct constant value. */
17060 operands[0] = tmp;
17061 }
17062 else if (TARGET_ARM)
17063 {
17064 int regno = REGNO (operands[0]);
17065 if (!peep2_reg_dead_p (4, operands[0]))
17066 {
17067 /* When the input register is even and is not dead after the
17068 pattern, it has to hold the second constant but we cannot
17069 form a legal STRD in ARM mode with this register as the second
17070 register. */
17071 if (regno % 2 == 0)
17072 return false;
17073
17074 /* Is regno-1 free? */
17075 SET_HARD_REG_SET (regset);
17076 CLEAR_HARD_REG_BIT(regset, regno - 1);
17077 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17078 if (tmp == NULL_RTX)
17079 return false;
17080
17081 operands[0] = tmp;
17082 }
17083 else
17084 {
17085 /* Find a DImode register. */
17086 CLEAR_HARD_REG_SET (regset);
17087 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17088 if (tmp != NULL_RTX)
17089 {
17090 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17091 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17092 }
17093 else
17094 {
17095 /* Can we use the input register to form a DI register? */
17096 SET_HARD_REG_SET (regset);
17097 CLEAR_HARD_REG_BIT(regset,
17098 regno % 2 == 0 ? regno + 1 : regno - 1);
17099 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17100 if (tmp == NULL_RTX)
17101 return false;
17102 operands[regno % 2 == 1 ? 0 : 1] = tmp;
17103 }
17104 }
17105
17106 gcc_assert (operands[0] != NULL_RTX);
17107 gcc_assert (operands[1] != NULL_RTX);
17108 gcc_assert (REGNO (operands[0]) % 2 == 0);
17109 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
17110 }
17111 }
17112
17113 /* Make sure the instructions are ordered with lower memory access first. */
17114 if (offsets[0] > offsets[1])
17115 {
17116 gap = offsets[0] - offsets[1];
17117 offset = offsets[1];
17118
17119 /* Swap the instructions such that lower memory is accessed first. */
17120 std::swap (operands[0], operands[1]);
17121 std::swap (operands[2], operands[3]);
17122 std::swap (align[0], align[1]);
17123 if (const_store)
17124 std::swap (operands[4], operands[5]);
17125 }
17126 else
17127 {
17128 gap = offsets[1] - offsets[0];
17129 offset = offsets[0];
17130 }
17131
17132 /* Make sure accesses are to consecutive memory locations. */
17133 if (gap != GET_MODE_SIZE (SImode))
17134 return false;
17135
17136 if (!align_ok_ldrd_strd (align[0], offset))
17137 return false;
17138
17139 /* Make sure we generate legal instructions. */
17140 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17141 false, load))
17142 return true;
17143
17144 /* In Thumb state, where registers are almost unconstrained, there
17145 is little hope to fix it. */
17146 if (TARGET_THUMB2)
17147 return false;
17148
17149 if (load && commute)
17150 {
17151 /* Try reordering registers. */
17152 std::swap (operands[0], operands[1]);
17153 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17154 false, load))
17155 return true;
17156 }
17157
17158 if (const_store)
17159 {
17160 /* If input registers are dead after this pattern, they can be
17161 reordered or replaced by other registers that are free in the
17162 current pattern. */
17163 if (!peep2_reg_dead_p (4, operands[0])
17164 || !peep2_reg_dead_p (4, operands[1]))
17165 return false;
17166
17167 /* Try to reorder the input registers. */
17168 /* For example, the code
17169 mov r0, 0
17170 mov r1, 1
17171 str r1, [r2]
17172 str r0, [r2, #4]
17173 can be transformed into
17174 mov r1, 0
17175 mov r0, 1
17176 strd r0, [r2]
17177 */
17178 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
17179 false, false))
17180 {
17181 std::swap (operands[0], operands[1]);
17182 return true;
17183 }
17184
17185 /* Try to find a free DI register. */
17186 CLEAR_HARD_REG_SET (regset);
17187 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
17188 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
17189 while (true)
17190 {
17191 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17192 if (tmp == NULL_RTX)
17193 return false;
17194
17195 /* DREG must be an even-numbered register in DImode.
17196 Split it into SI registers. */
17197 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17198 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17199 gcc_assert (operands[0] != NULL_RTX);
17200 gcc_assert (operands[1] != NULL_RTX);
17201 gcc_assert (REGNO (operands[0]) % 2 == 0);
17202 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
17203
17204 return (operands_ok_ldrd_strd (operands[0], operands[1],
17205 base, offset,
17206 false, load));
17207 }
17208 }
17209
17210 return false;
17211 }
17212
17213
17214 /* Return true if parallel execution of the two word-size accesses provided
17215 could be satisfied with a single LDRD/STRD instruction. Two word-size
17216 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17217 register operands and OPERANDS[2,3] are the corresponding memory operands.
17218 */
17219 bool
17220 valid_operands_ldrd_strd (rtx *operands, bool load)
17221 {
17222 int nops = 2;
17223 HOST_WIDE_INT offsets[2], offset, align[2];
17224 rtx base = NULL_RTX;
17225 rtx cur_base, cur_offset;
17226 int i, gap;
17227
17228 /* Check that the memory references are immediate offsets from the
17229 same base register. Extract the base register, the destination
17230 registers, and the corresponding memory offsets. */
17231 for (i = 0; i < nops; i++)
17232 {
17233 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17234 &align[i]))
17235 return false;
17236
17237 if (i == 0)
17238 base = cur_base;
17239 else if (REGNO (base) != REGNO (cur_base))
17240 return false;
17241
17242 offsets[i] = INTVAL (cur_offset);
17243 if (GET_CODE (operands[i]) == SUBREG)
17244 return false;
17245 }
17246
17247 if (offsets[0] > offsets[1])
17248 return false;
17249
17250 gap = offsets[1] - offsets[0];
17251 offset = offsets[0];
17252
17253 /* Make sure accesses are to consecutive memory locations. */
17254 if (gap != GET_MODE_SIZE (SImode))
17255 return false;
17256
17257 if (!align_ok_ldrd_strd (align[0], offset))
17258 return false;
17259
17260 return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17261 false, load);
17262 }
17263
17264 \f
17265 /* Print a symbolic form of X to the debug file, F. */
17266 static void
17267 arm_print_value (FILE *f, rtx x)
17268 {
17269 switch (GET_CODE (x))
17270 {
17271 case CONST_INT:
17272 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17273 return;
17274
17275 case CONST_DOUBLE:
17276 {
17277 char fpstr[20];
17278 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17279 sizeof (fpstr), 0, 1);
17280 fputs (fpstr, f);
17281 }
17282 return;
17283
17284 case CONST_VECTOR:
17285 {
17286 int i;
17287
17288 fprintf (f, "<");
17289 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17290 {
17291 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17292 if (i < (CONST_VECTOR_NUNITS (x) - 1))
17293 fputc (',', f);
17294 }
17295 fprintf (f, ">");
17296 }
17297 return;
17298
17299 case CONST_STRING:
17300 fprintf (f, "\"%s\"", XSTR (x, 0));
17301 return;
17302
17303 case SYMBOL_REF:
17304 fprintf (f, "`%s'", XSTR (x, 0));
17305 return;
17306
17307 case LABEL_REF:
17308 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17309 return;
17310
17311 case CONST:
17312 arm_print_value (f, XEXP (x, 0));
17313 return;
17314
17315 case PLUS:
17316 arm_print_value (f, XEXP (x, 0));
17317 fprintf (f, "+");
17318 arm_print_value (f, XEXP (x, 1));
17319 return;
17320
17321 case PC:
17322 fprintf (f, "pc");
17323 return;
17324
17325 default:
17326 fprintf (f, "????");
17327 return;
17328 }
17329 }
17330 \f
17331 /* Routines for manipulation of the constant pool. */
17332
17333 /* Arm instructions cannot load a large constant directly into a
17334 register; they have to come from a pc relative load. The constant
17335 must therefore be placed in the addressable range of the pc
17336 relative load. Depending on the precise pc relative load
17337 instruction the range is somewhere between 256 bytes and 4k. This
17338 means that we often have to dump a constant inside a function, and
17339 generate code to branch around it.
17340
17341 It is important to minimize this, since the branches will slow
17342 things down and make the code larger.
17343
17344 Normally we can hide the table after an existing unconditional
17345 branch so that there is no interruption of the flow, but in the
17346 worst case the code looks like this:
17347
17348 ldr rn, L1
17349 ...
17350 b L2
17351 align
17352 L1: .long value
17353 L2:
17354 ...
17355
17356 ldr rn, L3
17357 ...
17358 b L4
17359 align
17360 L3: .long value
17361 L4:
17362 ...
17363
17364 We fix this by performing a scan after scheduling, which notices
17365 which instructions need to have their operands fetched from the
17366 constant table and builds the table.
17367
17368 The algorithm starts by building a table of all the constants that
17369 need fixing up and all the natural barriers in the function (places
17370 where a constant table can be dropped without breaking the flow).
17371 For each fixup we note how far the pc-relative replacement will be
17372 able to reach and the offset of the instruction into the function.
17373
17374 Having built the table we then group the fixes together to form
17375 tables that are as large as possible (subject to addressing
17376 constraints) and emit each table of constants after the last
17377 barrier that is within range of all the instructions in the group.
17378 If a group does not contain a barrier, then we forcibly create one
17379 by inserting a jump instruction into the flow. Once the table has
17380 been inserted, the insns are then modified to reference the
17381 relevant entry in the pool.
17382
17383 Possible enhancements to the algorithm (not implemented) are:
17384
17385 1) For some processors and object formats, there may be benefit in
17386 aligning the pools to the start of cache lines; this alignment
17387 would need to be taken into account when calculating addressability
17388 of a pool. */
17389
17390 /* These typedefs are located at the start of this file, so that
17391 they can be used in the prototypes there. This comment is to
17392 remind readers of that fact so that the following structures
17393 can be understood more easily.
17394
17395 typedef struct minipool_node Mnode;
17396 typedef struct minipool_fixup Mfix; */
17397
17398 struct minipool_node
17399 {
17400 /* Doubly linked chain of entries. */
17401 Mnode * next;
17402 Mnode * prev;
17403 /* The maximum offset into the code that this entry can be placed. While
17404 pushing fixes for forward references, all entries are sorted in order
17405 of increasing max_address. */
17406 HOST_WIDE_INT max_address;
17407 /* Similarly for an entry inserted for a backwards ref. */
17408 HOST_WIDE_INT min_address;
17409 /* The number of fixes referencing this entry. This can become zero
17410 if we "unpush" an entry. In this case we ignore the entry when we
17411 come to emit the code. */
17412 int refcount;
17413 /* The offset from the start of the minipool. */
17414 HOST_WIDE_INT offset;
17415 /* The value in table. */
17416 rtx value;
17417 /* The mode of value. */
17418 machine_mode mode;
17419 /* The size of the value. With iWMMXt enabled
17420 sizes > 4 also imply an alignment of 8-bytes. */
17421 int fix_size;
17422 };
17423
17424 struct minipool_fixup
17425 {
17426 Mfix * next;
17427 rtx_insn * insn;
17428 HOST_WIDE_INT address;
17429 rtx * loc;
17430 machine_mode mode;
17431 int fix_size;
17432 rtx value;
17433 Mnode * minipool;
17434 HOST_WIDE_INT forwards;
17435 HOST_WIDE_INT backwards;
17436 };
17437
17438 /* Fixes less than a word need padding out to a word boundary. */
17439 #define MINIPOOL_FIX_SIZE(mode) \
17440 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17441
17442 static Mnode * minipool_vector_head;
17443 static Mnode * minipool_vector_tail;
17444 static rtx_code_label *minipool_vector_label;
17445 static int minipool_pad;
17446
17447 /* The linked list of all minipool fixes required for this function. */
17448 Mfix * minipool_fix_head;
17449 Mfix * minipool_fix_tail;
17450 /* The fix entry for the current minipool, once it has been placed. */
17451 Mfix * minipool_barrier;
17452
17453 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17454 #define JUMP_TABLES_IN_TEXT_SECTION 0
17455 #endif
17456
17457 static HOST_WIDE_INT
17458 get_jump_table_size (rtx_jump_table_data *insn)
17459 {
17460 /* ADDR_VECs only take room if read-only data does into the text
17461 section. */
17462 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17463 {
17464 rtx body = PATTERN (insn);
17465 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17466 HOST_WIDE_INT size;
17467 HOST_WIDE_INT modesize;
17468
17469 modesize = GET_MODE_SIZE (GET_MODE (body));
17470 size = modesize * XVECLEN (body, elt);
17471 switch (modesize)
17472 {
17473 case 1:
17474 /* Round up size of TBB table to a halfword boundary. */
17475 size = (size + 1) & ~HOST_WIDE_INT_1;
17476 break;
17477 case 2:
17478 /* No padding necessary for TBH. */
17479 break;
17480 case 4:
17481 /* Add two bytes for alignment on Thumb. */
17482 if (TARGET_THUMB)
17483 size += 2;
17484 break;
17485 default:
17486 gcc_unreachable ();
17487 }
17488 return size;
17489 }
17490
17491 return 0;
17492 }
17493
17494 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17495 function descriptor) into a register and the GOT address into the
17496 FDPIC register, returning an rtx for the register holding the
17497 function address. */
17498
17499 rtx
17500 arm_load_function_descriptor (rtx funcdesc)
17501 {
17502 rtx fnaddr_reg = gen_reg_rtx (Pmode);
17503 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17504 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17505 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17506
17507 emit_move_insn (fnaddr_reg, fnaddr);
17508
17509 /* The ABI requires the entry point address to be loaded first, but
17510 since we cannot support lazy binding for lack of atomic load of
17511 two 32-bits values, we do not need to bother to prevent the
17512 previous load from being moved after that of the GOT address. */
17513 emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17514
17515 return fnaddr_reg;
17516 }
17517
17518 /* Return the maximum amount of padding that will be inserted before
17519 label LABEL. */
17520 static HOST_WIDE_INT
17521 get_label_padding (rtx label)
17522 {
17523 HOST_WIDE_INT align, min_insn_size;
17524
17525 align = 1 << label_to_alignment (label).levels[0].log;
17526 min_insn_size = TARGET_THUMB ? 2 : 4;
17527 return align > min_insn_size ? align - min_insn_size : 0;
17528 }
17529
17530 /* Move a minipool fix MP from its current location to before MAX_MP.
17531 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17532 constraints may need updating. */
17533 static Mnode *
17534 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17535 HOST_WIDE_INT max_address)
17536 {
17537 /* The code below assumes these are different. */
17538 gcc_assert (mp != max_mp);
17539
17540 if (max_mp == NULL)
17541 {
17542 if (max_address < mp->max_address)
17543 mp->max_address = max_address;
17544 }
17545 else
17546 {
17547 if (max_address > max_mp->max_address - mp->fix_size)
17548 mp->max_address = max_mp->max_address - mp->fix_size;
17549 else
17550 mp->max_address = max_address;
17551
17552 /* Unlink MP from its current position. Since max_mp is non-null,
17553 mp->prev must be non-null. */
17554 mp->prev->next = mp->next;
17555 if (mp->next != NULL)
17556 mp->next->prev = mp->prev;
17557 else
17558 minipool_vector_tail = mp->prev;
17559
17560 /* Re-insert it before MAX_MP. */
17561 mp->next = max_mp;
17562 mp->prev = max_mp->prev;
17563 max_mp->prev = mp;
17564
17565 if (mp->prev != NULL)
17566 mp->prev->next = mp;
17567 else
17568 minipool_vector_head = mp;
17569 }
17570
17571 /* Save the new entry. */
17572 max_mp = mp;
17573
17574 /* Scan over the preceding entries and adjust their addresses as
17575 required. */
17576 while (mp->prev != NULL
17577 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17578 {
17579 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17580 mp = mp->prev;
17581 }
17582
17583 return max_mp;
17584 }
17585
17586 /* Add a constant to the minipool for a forward reference. Returns the
17587 node added or NULL if the constant will not fit in this pool. */
17588 static Mnode *
17589 add_minipool_forward_ref (Mfix *fix)
17590 {
17591 /* If set, max_mp is the first pool_entry that has a lower
17592 constraint than the one we are trying to add. */
17593 Mnode * max_mp = NULL;
17594 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17595 Mnode * mp;
17596
17597 /* If the minipool starts before the end of FIX->INSN then this FIX
17598 cannot be placed into the current pool. Furthermore, adding the
17599 new constant pool entry may cause the pool to start FIX_SIZE bytes
17600 earlier. */
17601 if (minipool_vector_head &&
17602 (fix->address + get_attr_length (fix->insn)
17603 >= minipool_vector_head->max_address - fix->fix_size))
17604 return NULL;
17605
17606 /* Scan the pool to see if a constant with the same value has
17607 already been added. While we are doing this, also note the
17608 location where we must insert the constant if it doesn't already
17609 exist. */
17610 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17611 {
17612 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17613 && fix->mode == mp->mode
17614 && (!LABEL_P (fix->value)
17615 || (CODE_LABEL_NUMBER (fix->value)
17616 == CODE_LABEL_NUMBER (mp->value)))
17617 && rtx_equal_p (fix->value, mp->value))
17618 {
17619 /* More than one fix references this entry. */
17620 mp->refcount++;
17621 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17622 }
17623
17624 /* Note the insertion point if necessary. */
17625 if (max_mp == NULL
17626 && mp->max_address > max_address)
17627 max_mp = mp;
17628
17629 /* If we are inserting an 8-bytes aligned quantity and
17630 we have not already found an insertion point, then
17631 make sure that all such 8-byte aligned quantities are
17632 placed at the start of the pool. */
17633 if (ARM_DOUBLEWORD_ALIGN
17634 && max_mp == NULL
17635 && fix->fix_size >= 8
17636 && mp->fix_size < 8)
17637 {
17638 max_mp = mp;
17639 max_address = mp->max_address;
17640 }
17641 }
17642
17643 /* The value is not currently in the minipool, so we need to create
17644 a new entry for it. If MAX_MP is NULL, the entry will be put on
17645 the end of the list since the placement is less constrained than
17646 any existing entry. Otherwise, we insert the new fix before
17647 MAX_MP and, if necessary, adjust the constraints on the other
17648 entries. */
17649 mp = XNEW (Mnode);
17650 mp->fix_size = fix->fix_size;
17651 mp->mode = fix->mode;
17652 mp->value = fix->value;
17653 mp->refcount = 1;
17654 /* Not yet required for a backwards ref. */
17655 mp->min_address = -65536;
17656
17657 if (max_mp == NULL)
17658 {
17659 mp->max_address = max_address;
17660 mp->next = NULL;
17661 mp->prev = minipool_vector_tail;
17662
17663 if (mp->prev == NULL)
17664 {
17665 minipool_vector_head = mp;
17666 minipool_vector_label = gen_label_rtx ();
17667 }
17668 else
17669 mp->prev->next = mp;
17670
17671 minipool_vector_tail = mp;
17672 }
17673 else
17674 {
17675 if (max_address > max_mp->max_address - mp->fix_size)
17676 mp->max_address = max_mp->max_address - mp->fix_size;
17677 else
17678 mp->max_address = max_address;
17679
17680 mp->next = max_mp;
17681 mp->prev = max_mp->prev;
17682 max_mp->prev = mp;
17683 if (mp->prev != NULL)
17684 mp->prev->next = mp;
17685 else
17686 minipool_vector_head = mp;
17687 }
17688
17689 /* Save the new entry. */
17690 max_mp = mp;
17691
17692 /* Scan over the preceding entries and adjust their addresses as
17693 required. */
17694 while (mp->prev != NULL
17695 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17696 {
17697 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17698 mp = mp->prev;
17699 }
17700
17701 return max_mp;
17702 }
17703
17704 static Mnode *
17705 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17706 HOST_WIDE_INT min_address)
17707 {
17708 HOST_WIDE_INT offset;
17709
17710 /* The code below assumes these are different. */
17711 gcc_assert (mp != min_mp);
17712
17713 if (min_mp == NULL)
17714 {
17715 if (min_address > mp->min_address)
17716 mp->min_address = min_address;
17717 }
17718 else
17719 {
17720 /* We will adjust this below if it is too loose. */
17721 mp->min_address = min_address;
17722
17723 /* Unlink MP from its current position. Since min_mp is non-null,
17724 mp->next must be non-null. */
17725 mp->next->prev = mp->prev;
17726 if (mp->prev != NULL)
17727 mp->prev->next = mp->next;
17728 else
17729 minipool_vector_head = mp->next;
17730
17731 /* Reinsert it after MIN_MP. */
17732 mp->prev = min_mp;
17733 mp->next = min_mp->next;
17734 min_mp->next = mp;
17735 if (mp->next != NULL)
17736 mp->next->prev = mp;
17737 else
17738 minipool_vector_tail = mp;
17739 }
17740
17741 min_mp = mp;
17742
17743 offset = 0;
17744 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17745 {
17746 mp->offset = offset;
17747 if (mp->refcount > 0)
17748 offset += mp->fix_size;
17749
17750 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17751 mp->next->min_address = mp->min_address + mp->fix_size;
17752 }
17753
17754 return min_mp;
17755 }
17756
17757 /* Add a constant to the minipool for a backward reference. Returns the
17758 node added or NULL if the constant will not fit in this pool.
17759
17760 Note that the code for insertion for a backwards reference can be
17761 somewhat confusing because the calculated offsets for each fix do
17762 not take into account the size of the pool (which is still under
17763 construction. */
17764 static Mnode *
17765 add_minipool_backward_ref (Mfix *fix)
17766 {
17767 /* If set, min_mp is the last pool_entry that has a lower constraint
17768 than the one we are trying to add. */
17769 Mnode *min_mp = NULL;
17770 /* This can be negative, since it is only a constraint. */
17771 HOST_WIDE_INT min_address = fix->address - fix->backwards;
17772 Mnode *mp;
17773
17774 /* If we can't reach the current pool from this insn, or if we can't
17775 insert this entry at the end of the pool without pushing other
17776 fixes out of range, then we don't try. This ensures that we
17777 can't fail later on. */
17778 if (min_address >= minipool_barrier->address
17779 || (minipool_vector_tail->min_address + fix->fix_size
17780 >= minipool_barrier->address))
17781 return NULL;
17782
17783 /* Scan the pool to see if a constant with the same value has
17784 already been added. While we are doing this, also note the
17785 location where we must insert the constant if it doesn't already
17786 exist. */
17787 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17788 {
17789 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17790 && fix->mode == mp->mode
17791 && (!LABEL_P (fix->value)
17792 || (CODE_LABEL_NUMBER (fix->value)
17793 == CODE_LABEL_NUMBER (mp->value)))
17794 && rtx_equal_p (fix->value, mp->value)
17795 /* Check that there is enough slack to move this entry to the
17796 end of the table (this is conservative). */
17797 && (mp->max_address
17798 > (minipool_barrier->address
17799 + minipool_vector_tail->offset
17800 + minipool_vector_tail->fix_size)))
17801 {
17802 mp->refcount++;
17803 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17804 }
17805
17806 if (min_mp != NULL)
17807 mp->min_address += fix->fix_size;
17808 else
17809 {
17810 /* Note the insertion point if necessary. */
17811 if (mp->min_address < min_address)
17812 {
17813 /* For now, we do not allow the insertion of 8-byte alignment
17814 requiring nodes anywhere but at the start of the pool. */
17815 if (ARM_DOUBLEWORD_ALIGN
17816 && fix->fix_size >= 8 && mp->fix_size < 8)
17817 return NULL;
17818 else
17819 min_mp = mp;
17820 }
17821 else if (mp->max_address
17822 < minipool_barrier->address + mp->offset + fix->fix_size)
17823 {
17824 /* Inserting before this entry would push the fix beyond
17825 its maximum address (which can happen if we have
17826 re-located a forwards fix); force the new fix to come
17827 after it. */
17828 if (ARM_DOUBLEWORD_ALIGN
17829 && fix->fix_size >= 8 && mp->fix_size < 8)
17830 return NULL;
17831 else
17832 {
17833 min_mp = mp;
17834 min_address = mp->min_address + fix->fix_size;
17835 }
17836 }
17837 /* Do not insert a non-8-byte aligned quantity before 8-byte
17838 aligned quantities. */
17839 else if (ARM_DOUBLEWORD_ALIGN
17840 && fix->fix_size < 8
17841 && mp->fix_size >= 8)
17842 {
17843 min_mp = mp;
17844 min_address = mp->min_address + fix->fix_size;
17845 }
17846 }
17847 }
17848
17849 /* We need to create a new entry. */
17850 mp = XNEW (Mnode);
17851 mp->fix_size = fix->fix_size;
17852 mp->mode = fix->mode;
17853 mp->value = fix->value;
17854 mp->refcount = 1;
17855 mp->max_address = minipool_barrier->address + 65536;
17856
17857 mp->min_address = min_address;
17858
17859 if (min_mp == NULL)
17860 {
17861 mp->prev = NULL;
17862 mp->next = minipool_vector_head;
17863
17864 if (mp->next == NULL)
17865 {
17866 minipool_vector_tail = mp;
17867 minipool_vector_label = gen_label_rtx ();
17868 }
17869 else
17870 mp->next->prev = mp;
17871
17872 minipool_vector_head = mp;
17873 }
17874 else
17875 {
17876 mp->next = min_mp->next;
17877 mp->prev = min_mp;
17878 min_mp->next = mp;
17879
17880 if (mp->next != NULL)
17881 mp->next->prev = mp;
17882 else
17883 minipool_vector_tail = mp;
17884 }
17885
17886 /* Save the new entry. */
17887 min_mp = mp;
17888
17889 if (mp->prev)
17890 mp = mp->prev;
17891 else
17892 mp->offset = 0;
17893
17894 /* Scan over the following entries and adjust their offsets. */
17895 while (mp->next != NULL)
17896 {
17897 if (mp->next->min_address < mp->min_address + mp->fix_size)
17898 mp->next->min_address = mp->min_address + mp->fix_size;
17899
17900 if (mp->refcount)
17901 mp->next->offset = mp->offset + mp->fix_size;
17902 else
17903 mp->next->offset = mp->offset;
17904
17905 mp = mp->next;
17906 }
17907
17908 return min_mp;
17909 }
17910
17911 static void
17912 assign_minipool_offsets (Mfix *barrier)
17913 {
17914 HOST_WIDE_INT offset = 0;
17915 Mnode *mp;
17916
17917 minipool_barrier = barrier;
17918
17919 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17920 {
17921 mp->offset = offset;
17922
17923 if (mp->refcount > 0)
17924 offset += mp->fix_size;
17925 }
17926 }
17927
17928 /* Output the literal table */
17929 static void
17930 dump_minipool (rtx_insn *scan)
17931 {
17932 Mnode * mp;
17933 Mnode * nmp;
17934 int align64 = 0;
17935
17936 if (ARM_DOUBLEWORD_ALIGN)
17937 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17938 if (mp->refcount > 0 && mp->fix_size >= 8)
17939 {
17940 align64 = 1;
17941 break;
17942 }
17943
17944 if (dump_file)
17945 fprintf (dump_file,
17946 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17947 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
17948
17949 scan = emit_label_after (gen_label_rtx (), scan);
17950 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
17951 scan = emit_label_after (minipool_vector_label, scan);
17952
17953 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
17954 {
17955 if (mp->refcount > 0)
17956 {
17957 if (dump_file)
17958 {
17959 fprintf (dump_file,
17960 ";; Offset %u, min %ld, max %ld ",
17961 (unsigned) mp->offset, (unsigned long) mp->min_address,
17962 (unsigned long) mp->max_address);
17963 arm_print_value (dump_file, mp->value);
17964 fputc ('\n', dump_file);
17965 }
17966
17967 rtx val = copy_rtx (mp->value);
17968
17969 switch (GET_MODE_SIZE (mp->mode))
17970 {
17971 #ifdef HAVE_consttable_1
17972 case 1:
17973 scan = emit_insn_after (gen_consttable_1 (val), scan);
17974 break;
17975
17976 #endif
17977 #ifdef HAVE_consttable_2
17978 case 2:
17979 scan = emit_insn_after (gen_consttable_2 (val), scan);
17980 break;
17981
17982 #endif
17983 #ifdef HAVE_consttable_4
17984 case 4:
17985 scan = emit_insn_after (gen_consttable_4 (val), scan);
17986 break;
17987
17988 #endif
17989 #ifdef HAVE_consttable_8
17990 case 8:
17991 scan = emit_insn_after (gen_consttable_8 (val), scan);
17992 break;
17993
17994 #endif
17995 #ifdef HAVE_consttable_16
17996 case 16:
17997 scan = emit_insn_after (gen_consttable_16 (val), scan);
17998 break;
17999
18000 #endif
18001 default:
18002 gcc_unreachable ();
18003 }
18004 }
18005
18006 nmp = mp->next;
18007 free (mp);
18008 }
18009
18010 minipool_vector_head = minipool_vector_tail = NULL;
18011 scan = emit_insn_after (gen_consttable_end (), scan);
18012 scan = emit_barrier_after (scan);
18013 }
18014
18015 /* Return the cost of forcibly inserting a barrier after INSN. */
18016 static int
18017 arm_barrier_cost (rtx_insn *insn)
18018 {
18019 /* Basing the location of the pool on the loop depth is preferable,
18020 but at the moment, the basic block information seems to be
18021 corrupt by this stage of the compilation. */
18022 int base_cost = 50;
18023 rtx_insn *next = next_nonnote_insn (insn);
18024
18025 if (next != NULL && LABEL_P (next))
18026 base_cost -= 20;
18027
18028 switch (GET_CODE (insn))
18029 {
18030 case CODE_LABEL:
18031 /* It will always be better to place the table before the label, rather
18032 than after it. */
18033 return 50;
18034
18035 case INSN:
18036 case CALL_INSN:
18037 return base_cost;
18038
18039 case JUMP_INSN:
18040 return base_cost - 10;
18041
18042 default:
18043 return base_cost + 10;
18044 }
18045 }
18046
18047 /* Find the best place in the insn stream in the range
18048 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18049 Create the barrier by inserting a jump and add a new fix entry for
18050 it. */
18051 static Mfix *
18052 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
18053 {
18054 HOST_WIDE_INT count = 0;
18055 rtx_barrier *barrier;
18056 rtx_insn *from = fix->insn;
18057 /* The instruction after which we will insert the jump. */
18058 rtx_insn *selected = NULL;
18059 int selected_cost;
18060 /* The address at which the jump instruction will be placed. */
18061 HOST_WIDE_INT selected_address;
18062 Mfix * new_fix;
18063 HOST_WIDE_INT max_count = max_address - fix->address;
18064 rtx_code_label *label = gen_label_rtx ();
18065
18066 selected_cost = arm_barrier_cost (from);
18067 selected_address = fix->address;
18068
18069 while (from && count < max_count)
18070 {
18071 rtx_jump_table_data *tmp;
18072 int new_cost;
18073
18074 /* This code shouldn't have been called if there was a natural barrier
18075 within range. */
18076 gcc_assert (!BARRIER_P (from));
18077
18078 /* Count the length of this insn. This must stay in sync with the
18079 code that pushes minipool fixes. */
18080 if (LABEL_P (from))
18081 count += get_label_padding (from);
18082 else
18083 count += get_attr_length (from);
18084
18085 /* If there is a jump table, add its length. */
18086 if (tablejump_p (from, NULL, &tmp))
18087 {
18088 count += get_jump_table_size (tmp);
18089
18090 /* Jump tables aren't in a basic block, so base the cost on
18091 the dispatch insn. If we select this location, we will
18092 still put the pool after the table. */
18093 new_cost = arm_barrier_cost (from);
18094
18095 if (count < max_count
18096 && (!selected || new_cost <= selected_cost))
18097 {
18098 selected = tmp;
18099 selected_cost = new_cost;
18100 selected_address = fix->address + count;
18101 }
18102
18103 /* Continue after the dispatch table. */
18104 from = NEXT_INSN (tmp);
18105 continue;
18106 }
18107
18108 new_cost = arm_barrier_cost (from);
18109
18110 if (count < max_count
18111 && (!selected || new_cost <= selected_cost))
18112 {
18113 selected = from;
18114 selected_cost = new_cost;
18115 selected_address = fix->address + count;
18116 }
18117
18118 from = NEXT_INSN (from);
18119 }
18120
18121 /* Make sure that we found a place to insert the jump. */
18122 gcc_assert (selected);
18123
18124 /* Create a new JUMP_INSN that branches around a barrier. */
18125 from = emit_jump_insn_after (gen_jump (label), selected);
18126 JUMP_LABEL (from) = label;
18127 barrier = emit_barrier_after (from);
18128 emit_label_after (label, barrier);
18129
18130 /* Create a minipool barrier entry for the new barrier. */
18131 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
18132 new_fix->insn = barrier;
18133 new_fix->address = selected_address;
18134 new_fix->next = fix->next;
18135 fix->next = new_fix;
18136
18137 return new_fix;
18138 }
18139
18140 /* Record that there is a natural barrier in the insn stream at
18141 ADDRESS. */
18142 static void
18143 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
18144 {
18145 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18146
18147 fix->insn = insn;
18148 fix->address = address;
18149
18150 fix->next = NULL;
18151 if (minipool_fix_head != NULL)
18152 minipool_fix_tail->next = fix;
18153 else
18154 minipool_fix_head = fix;
18155
18156 minipool_fix_tail = fix;
18157 }
18158
18159 /* Record INSN, which will need fixing up to load a value from the
18160 minipool. ADDRESS is the offset of the insn since the start of the
18161 function; LOC is a pointer to the part of the insn which requires
18162 fixing; VALUE is the constant that must be loaded, which is of type
18163 MODE. */
18164 static void
18165 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
18166 machine_mode mode, rtx value)
18167 {
18168 gcc_assert (!arm_disable_literal_pool);
18169 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18170
18171 fix->insn = insn;
18172 fix->address = address;
18173 fix->loc = loc;
18174 fix->mode = mode;
18175 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
18176 fix->value = value;
18177 fix->forwards = get_attr_pool_range (insn);
18178 fix->backwards = get_attr_neg_pool_range (insn);
18179 fix->minipool = NULL;
18180
18181 /* If an insn doesn't have a range defined for it, then it isn't
18182 expecting to be reworked by this code. Better to stop now than
18183 to generate duff assembly code. */
18184 gcc_assert (fix->forwards || fix->backwards);
18185
18186 /* If an entry requires 8-byte alignment then assume all constant pools
18187 require 4 bytes of padding. Trying to do this later on a per-pool
18188 basis is awkward because existing pool entries have to be modified. */
18189 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
18190 minipool_pad = 4;
18191
18192 if (dump_file)
18193 {
18194 fprintf (dump_file,
18195 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18196 GET_MODE_NAME (mode),
18197 INSN_UID (insn), (unsigned long) address,
18198 -1 * (long)fix->backwards, (long)fix->forwards);
18199 arm_print_value (dump_file, fix->value);
18200 fprintf (dump_file, "\n");
18201 }
18202
18203 /* Add it to the chain of fixes. */
18204 fix->next = NULL;
18205
18206 if (minipool_fix_head != NULL)
18207 minipool_fix_tail->next = fix;
18208 else
18209 minipool_fix_head = fix;
18210
18211 minipool_fix_tail = fix;
18212 }
18213
18214 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18215 Returns the number of insns needed, or 99 if we always want to synthesize
18216 the value. */
18217 int
18218 arm_max_const_double_inline_cost ()
18219 {
18220 return ((optimize_size || arm_ld_sched) ? 3 : 4);
18221 }
18222
18223 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18224 Returns the number of insns needed, or 99 if we don't know how to
18225 do it. */
18226 int
18227 arm_const_double_inline_cost (rtx val)
18228 {
18229 rtx lowpart, highpart;
18230 machine_mode mode;
18231
18232 mode = GET_MODE (val);
18233
18234 if (mode == VOIDmode)
18235 mode = DImode;
18236
18237 gcc_assert (GET_MODE_SIZE (mode) == 8);
18238
18239 lowpart = gen_lowpart (SImode, val);
18240 highpart = gen_highpart_mode (SImode, mode, val);
18241
18242 gcc_assert (CONST_INT_P (lowpart));
18243 gcc_assert (CONST_INT_P (highpart));
18244
18245 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
18246 NULL_RTX, NULL_RTX, 0, 0)
18247 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
18248 NULL_RTX, NULL_RTX, 0, 0));
18249 }
18250
18251 /* Cost of loading a SImode constant. */
18252 static inline int
18253 arm_const_inline_cost (enum rtx_code code, rtx val)
18254 {
18255 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18256 NULL_RTX, NULL_RTX, 1, 0);
18257 }
18258
18259 /* Return true if it is worthwhile to split a 64-bit constant into two
18260 32-bit operations. This is the case if optimizing for size, or
18261 if we have load delay slots, or if one 32-bit part can be done with
18262 a single data operation. */
18263 bool
18264 arm_const_double_by_parts (rtx val)
18265 {
18266 machine_mode mode = GET_MODE (val);
18267 rtx part;
18268
18269 if (optimize_size || arm_ld_sched)
18270 return true;
18271
18272 if (mode == VOIDmode)
18273 mode = DImode;
18274
18275 part = gen_highpart_mode (SImode, mode, val);
18276
18277 gcc_assert (CONST_INT_P (part));
18278
18279 if (const_ok_for_arm (INTVAL (part))
18280 || const_ok_for_arm (~INTVAL (part)))
18281 return true;
18282
18283 part = gen_lowpart (SImode, val);
18284
18285 gcc_assert (CONST_INT_P (part));
18286
18287 if (const_ok_for_arm (INTVAL (part))
18288 || const_ok_for_arm (~INTVAL (part)))
18289 return true;
18290
18291 return false;
18292 }
18293
18294 /* Return true if it is possible to inline both the high and low parts
18295 of a 64-bit constant into 32-bit data processing instructions. */
18296 bool
18297 arm_const_double_by_immediates (rtx val)
18298 {
18299 machine_mode mode = GET_MODE (val);
18300 rtx part;
18301
18302 if (mode == VOIDmode)
18303 mode = DImode;
18304
18305 part = gen_highpart_mode (SImode, mode, val);
18306
18307 gcc_assert (CONST_INT_P (part));
18308
18309 if (!const_ok_for_arm (INTVAL (part)))
18310 return false;
18311
18312 part = gen_lowpart (SImode, val);
18313
18314 gcc_assert (CONST_INT_P (part));
18315
18316 if (!const_ok_for_arm (INTVAL (part)))
18317 return false;
18318
18319 return true;
18320 }
18321
18322 /* Scan INSN and note any of its operands that need fixing.
18323 If DO_PUSHES is false we do not actually push any of the fixups
18324 needed. */
18325 static void
18326 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18327 {
18328 int opno;
18329
18330 extract_constrain_insn (insn);
18331
18332 if (recog_data.n_alternatives == 0)
18333 return;
18334
18335 /* Fill in recog_op_alt with information about the constraints of
18336 this insn. */
18337 preprocess_constraints (insn);
18338
18339 const operand_alternative *op_alt = which_op_alt ();
18340 for (opno = 0; opno < recog_data.n_operands; opno++)
18341 {
18342 /* Things we need to fix can only occur in inputs. */
18343 if (recog_data.operand_type[opno] != OP_IN)
18344 continue;
18345
18346 /* If this alternative is a memory reference, then any mention
18347 of constants in this alternative is really to fool reload
18348 into allowing us to accept one there. We need to fix them up
18349 now so that we output the right code. */
18350 if (op_alt[opno].memory_ok)
18351 {
18352 rtx op = recog_data.operand[opno];
18353
18354 if (CONSTANT_P (op))
18355 {
18356 if (do_pushes)
18357 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18358 recog_data.operand_mode[opno], op);
18359 }
18360 else if (MEM_P (op)
18361 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18362 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18363 {
18364 if (do_pushes)
18365 {
18366 rtx cop = avoid_constant_pool_reference (op);
18367
18368 /* Casting the address of something to a mode narrower
18369 than a word can cause avoid_constant_pool_reference()
18370 to return the pool reference itself. That's no good to
18371 us here. Lets just hope that we can use the
18372 constant pool value directly. */
18373 if (op == cop)
18374 cop = get_pool_constant (XEXP (op, 0));
18375
18376 push_minipool_fix (insn, address,
18377 recog_data.operand_loc[opno],
18378 recog_data.operand_mode[opno], cop);
18379 }
18380
18381 }
18382 }
18383 }
18384
18385 return;
18386 }
18387
18388 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18389 and unions in the context of ARMv8-M Security Extensions. It is used as a
18390 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18391 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18392 or four masks, depending on whether it is being computed for a
18393 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18394 respectively. The tree for the type of the argument or a field within an
18395 argument is passed in ARG_TYPE, the current register this argument or field
18396 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18397 argument or field starts at is passed in STARTING_BIT and the last used bit
18398 is kept in LAST_USED_BIT which is also updated accordingly. */
18399
18400 static unsigned HOST_WIDE_INT
18401 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18402 uint32_t * padding_bits_to_clear,
18403 unsigned starting_bit, int * last_used_bit)
18404
18405 {
18406 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18407
18408 if (TREE_CODE (arg_type) == RECORD_TYPE)
18409 {
18410 unsigned current_bit = starting_bit;
18411 tree field;
18412 long int offset, size;
18413
18414
18415 field = TYPE_FIELDS (arg_type);
18416 while (field)
18417 {
18418 /* The offset within a structure is always an offset from
18419 the start of that structure. Make sure we take that into the
18420 calculation of the register based offset that we use here. */
18421 offset = starting_bit;
18422 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18423 offset %= 32;
18424
18425 /* This is the actual size of the field, for bitfields this is the
18426 bitfield width and not the container size. */
18427 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18428
18429 if (*last_used_bit != offset)
18430 {
18431 if (offset < *last_used_bit)
18432 {
18433 /* This field's offset is before the 'last_used_bit', that
18434 means this field goes on the next register. So we need to
18435 pad the rest of the current register and increase the
18436 register number. */
18437 uint32_t mask;
18438 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18439 mask++;
18440
18441 padding_bits_to_clear[*regno] |= mask;
18442 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18443 (*regno)++;
18444 }
18445 else
18446 {
18447 /* Otherwise we pad the bits between the last field's end and
18448 the start of the new field. */
18449 uint32_t mask;
18450
18451 mask = ((uint32_t)-1) >> (32 - offset);
18452 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18453 padding_bits_to_clear[*regno] |= mask;
18454 }
18455 current_bit = offset;
18456 }
18457
18458 /* Calculate further padding bits for inner structs/unions too. */
18459 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18460 {
18461 *last_used_bit = current_bit;
18462 not_to_clear_reg_mask
18463 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18464 padding_bits_to_clear, offset,
18465 last_used_bit);
18466 }
18467 else
18468 {
18469 /* Update 'current_bit' with this field's size. If the
18470 'current_bit' lies in a subsequent register, update 'regno' and
18471 reset 'current_bit' to point to the current bit in that new
18472 register. */
18473 current_bit += size;
18474 while (current_bit >= 32)
18475 {
18476 current_bit-=32;
18477 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18478 (*regno)++;
18479 }
18480 *last_used_bit = current_bit;
18481 }
18482
18483 field = TREE_CHAIN (field);
18484 }
18485 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18486 }
18487 else if (TREE_CODE (arg_type) == UNION_TYPE)
18488 {
18489 tree field, field_t;
18490 int i, regno_t, field_size;
18491 int max_reg = -1;
18492 int max_bit = -1;
18493 uint32_t mask;
18494 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18495 = {-1, -1, -1, -1};
18496
18497 /* To compute the padding bits in a union we only consider bits as
18498 padding bits if they are always either a padding bit or fall outside a
18499 fields size for all fields in the union. */
18500 field = TYPE_FIELDS (arg_type);
18501 while (field)
18502 {
18503 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18504 = {0U, 0U, 0U, 0U};
18505 int last_used_bit_t = *last_used_bit;
18506 regno_t = *regno;
18507 field_t = TREE_TYPE (field);
18508
18509 /* If the field's type is either a record or a union make sure to
18510 compute their padding bits too. */
18511 if (RECORD_OR_UNION_TYPE_P (field_t))
18512 not_to_clear_reg_mask
18513 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18514 &padding_bits_to_clear_t[0],
18515 starting_bit, &last_used_bit_t);
18516 else
18517 {
18518 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18519 regno_t = (field_size / 32) + *regno;
18520 last_used_bit_t = (starting_bit + field_size) % 32;
18521 }
18522
18523 for (i = *regno; i < regno_t; i++)
18524 {
18525 /* For all but the last register used by this field only keep the
18526 padding bits that were padding bits in this field. */
18527 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18528 }
18529
18530 /* For the last register, keep all padding bits that were padding
18531 bits in this field and any padding bits that are still valid
18532 as padding bits but fall outside of this field's size. */
18533 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18534 padding_bits_to_clear_res[regno_t]
18535 &= padding_bits_to_clear_t[regno_t] | mask;
18536
18537 /* Update the maximum size of the fields in terms of registers used
18538 ('max_reg') and the 'last_used_bit' in said register. */
18539 if (max_reg < regno_t)
18540 {
18541 max_reg = regno_t;
18542 max_bit = last_used_bit_t;
18543 }
18544 else if (max_reg == regno_t && max_bit < last_used_bit_t)
18545 max_bit = last_used_bit_t;
18546
18547 field = TREE_CHAIN (field);
18548 }
18549
18550 /* Update the current padding_bits_to_clear using the intersection of the
18551 padding bits of all the fields. */
18552 for (i=*regno; i < max_reg; i++)
18553 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18554
18555 /* Do not keep trailing padding bits, we do not know yet whether this
18556 is the end of the argument. */
18557 mask = ((uint32_t) 1 << max_bit) - 1;
18558 padding_bits_to_clear[max_reg]
18559 |= padding_bits_to_clear_res[max_reg] & mask;
18560
18561 *regno = max_reg;
18562 *last_used_bit = max_bit;
18563 }
18564 else
18565 /* This function should only be used for structs and unions. */
18566 gcc_unreachable ();
18567
18568 return not_to_clear_reg_mask;
18569 }
18570
18571 /* In the context of ARMv8-M Security Extensions, this function is used for both
18572 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18573 registers are used when returning or passing arguments, which is then
18574 returned as a mask. It will also compute a mask to indicate padding/unused
18575 bits for each of these registers, and passes this through the
18576 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18577 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18578 the starting register used to pass this argument or return value is passed
18579 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18580 for struct and union types. */
18581
18582 static unsigned HOST_WIDE_INT
18583 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18584 uint32_t * padding_bits_to_clear)
18585
18586 {
18587 int last_used_bit = 0;
18588 unsigned HOST_WIDE_INT not_to_clear_mask;
18589
18590 if (RECORD_OR_UNION_TYPE_P (arg_type))
18591 {
18592 not_to_clear_mask
18593 = comp_not_to_clear_mask_str_un (arg_type, &regno,
18594 padding_bits_to_clear, 0,
18595 &last_used_bit);
18596
18597
18598 /* If the 'last_used_bit' is not zero, that means we are still using a
18599 part of the last 'regno'. In such cases we must clear the trailing
18600 bits. Otherwise we are not using regno and we should mark it as to
18601 clear. */
18602 if (last_used_bit != 0)
18603 padding_bits_to_clear[regno]
18604 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18605 else
18606 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18607 }
18608 else
18609 {
18610 not_to_clear_mask = 0;
18611 /* We are not dealing with structs nor unions. So these arguments may be
18612 passed in floating point registers too. In some cases a BLKmode is
18613 used when returning or passing arguments in multiple VFP registers. */
18614 if (GET_MODE (arg_rtx) == BLKmode)
18615 {
18616 int i, arg_regs;
18617 rtx reg;
18618
18619 /* This should really only occur when dealing with the hard-float
18620 ABI. */
18621 gcc_assert (TARGET_HARD_FLOAT_ABI);
18622
18623 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18624 {
18625 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18626 gcc_assert (REG_P (reg));
18627
18628 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18629
18630 /* If we are dealing with DF mode, make sure we don't
18631 clear either of the registers it addresses. */
18632 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18633 if (arg_regs > 1)
18634 {
18635 unsigned HOST_WIDE_INT mask;
18636 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18637 mask -= HOST_WIDE_INT_1U << REGNO (reg);
18638 not_to_clear_mask |= mask;
18639 }
18640 }
18641 }
18642 else
18643 {
18644 /* Otherwise we can rely on the MODE to determine how many registers
18645 are being used by this argument. */
18646 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18647 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18648 if (arg_regs > 1)
18649 {
18650 unsigned HOST_WIDE_INT
18651 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18652 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18653 not_to_clear_mask |= mask;
18654 }
18655 }
18656 }
18657
18658 return not_to_clear_mask;
18659 }
18660
18661 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18662 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18663 are to be fully cleared, using the value in register CLEARING_REG if more
18664 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18665 the bits that needs to be cleared in caller-saved core registers, with
18666 SCRATCH_REG used as a scratch register for that clearing.
18667
18668 NOTE: one of three following assertions must hold:
18669 - SCRATCH_REG is a low register
18670 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18671 in TO_CLEAR_BITMAP)
18672 - CLEARING_REG is a low register. */
18673
18674 static void
18675 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18676 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18677 {
18678 bool saved_clearing = false;
18679 rtx saved_clearing_reg = NULL_RTX;
18680 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18681
18682 gcc_assert (arm_arch_cmse);
18683
18684 if (!bitmap_empty_p (to_clear_bitmap))
18685 {
18686 minregno = bitmap_first_set_bit (to_clear_bitmap);
18687 maxregno = bitmap_last_set_bit (to_clear_bitmap);
18688 }
18689 clearing_regno = REGNO (clearing_reg);
18690
18691 /* Clear padding bits. */
18692 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18693 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18694 {
18695 uint64_t mask;
18696 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18697
18698 if (padding_bits_to_clear[i] == 0)
18699 continue;
18700
18701 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18702 CLEARING_REG as scratch. */
18703 if (TARGET_THUMB1
18704 && REGNO (scratch_reg) > LAST_LO_REGNUM)
18705 {
18706 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18707 such that we can use clearing_reg to clear the unused bits in the
18708 arguments. */
18709 if ((clearing_regno > maxregno
18710 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18711 && !saved_clearing)
18712 {
18713 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18714 emit_move_insn (scratch_reg, clearing_reg);
18715 saved_clearing = true;
18716 saved_clearing_reg = scratch_reg;
18717 }
18718 scratch_reg = clearing_reg;
18719 }
18720
18721 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18722 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18723 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18724
18725 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18726 mask = (~padding_bits_to_clear[i]) >> 16;
18727 rtx16 = gen_int_mode (16, SImode);
18728 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18729 if (mask)
18730 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18731
18732 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18733 }
18734 if (saved_clearing)
18735 emit_move_insn (clearing_reg, saved_clearing_reg);
18736
18737
18738 /* Clear full registers. */
18739
18740 if (TARGET_HAVE_FPCXT_CMSE)
18741 {
18742 rtvec vunspec_vec;
18743 int i, j, k, nb_regs;
18744 rtx use_seq, par, reg, set, vunspec;
18745 int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18746 auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18747 auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18748
18749 for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18750 {
18751 /* Find next register to clear and exit if none. */
18752 for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18753 if (i > maxregno)
18754 break;
18755
18756 /* Compute number of consecutive registers to clear. */
18757 for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18758 j++);
18759 nb_regs = j - i;
18760
18761 /* Create VSCCLRM RTX pattern. */
18762 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18763 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18764 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18765 VUNSPEC_VSCCLRM_VPR);
18766 XVECEXP (par, 0, 0) = vunspec;
18767
18768 /* Insert VFP register clearing RTX in the pattern. */
18769 start_sequence ();
18770 for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18771 {
18772 if (!bitmap_bit_p (to_clear_bitmap, j))
18773 continue;
18774
18775 reg = gen_rtx_REG (SFmode, j);
18776 set = gen_rtx_SET (reg, const0_rtx);
18777 XVECEXP (par, 0, k++) = set;
18778 emit_use (reg);
18779 }
18780 use_seq = get_insns ();
18781 end_sequence ();
18782
18783 emit_insn_after (use_seq, emit_insn (par));
18784 }
18785
18786 /* Get set of core registers to clear. */
18787 bitmap_clear (core_regs_bitmap);
18788 bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18789 IP_REGNUM - R0_REGNUM + 1);
18790 bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18791 core_regs_bitmap);
18792 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18793
18794 if (bitmap_empty_p (to_clear_core_bitmap))
18795 return;
18796
18797 /* Create clrm RTX pattern. */
18798 nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18799 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18800
18801 /* Insert core register clearing RTX in the pattern. */
18802 start_sequence ();
18803 for (j = 0, i = minregno; j < nb_regs; i++)
18804 {
18805 if (!bitmap_bit_p (to_clear_core_bitmap, i))
18806 continue;
18807
18808 reg = gen_rtx_REG (SImode, i);
18809 set = gen_rtx_SET (reg, const0_rtx);
18810 XVECEXP (par, 0, j++) = set;
18811 emit_use (reg);
18812 }
18813
18814 /* Insert APSR register clearing RTX in the pattern
18815 * along with clobbering CC. */
18816 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18817 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18818 VUNSPEC_CLRM_APSR);
18819
18820 XVECEXP (par, 0, j++) = vunspec;
18821
18822 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18823 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18824 XVECEXP (par, 0, j) = clobber;
18825
18826 use_seq = get_insns ();
18827 end_sequence ();
18828
18829 emit_insn_after (use_seq, emit_insn (par));
18830 }
18831 else
18832 {
18833 /* If not marked for clearing, clearing_reg already does not contain
18834 any secret. */
18835 if (clearing_regno <= maxregno
18836 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18837 {
18838 emit_move_insn (clearing_reg, const0_rtx);
18839 emit_use (clearing_reg);
18840 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18841 }
18842
18843 for (regno = minregno; regno <= maxregno; regno++)
18844 {
18845 if (!bitmap_bit_p (to_clear_bitmap, regno))
18846 continue;
18847
18848 if (IS_VFP_REGNUM (regno))
18849 {
18850 /* If regno is an even vfp register and its successor is also to
18851 be cleared, use vmov. */
18852 if (TARGET_VFP_DOUBLE
18853 && VFP_REGNO_OK_FOR_DOUBLE (regno)
18854 && bitmap_bit_p (to_clear_bitmap, regno + 1))
18855 {
18856 emit_move_insn (gen_rtx_REG (DFmode, regno),
18857 CONST1_RTX (DFmode));
18858 emit_use (gen_rtx_REG (DFmode, regno));
18859 regno++;
18860 }
18861 else
18862 {
18863 emit_move_insn (gen_rtx_REG (SFmode, regno),
18864 CONST1_RTX (SFmode));
18865 emit_use (gen_rtx_REG (SFmode, regno));
18866 }
18867 }
18868 else
18869 {
18870 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18871 emit_use (gen_rtx_REG (SImode, regno));
18872 }
18873 }
18874 }
18875 }
18876
18877 /* Clear core and caller-saved VFP registers not used to pass arguments before
18878 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
18879 registers is done in the __gnu_cmse_nonsecure_call libcall. See
18880 libgcc/config/arm/cmse_nonsecure_call.S. */
18881
18882 static void
18883 cmse_nonsecure_call_inline_register_clear (void)
18884 {
18885 basic_block bb;
18886
18887 FOR_EACH_BB_FN (bb, cfun)
18888 {
18889 rtx_insn *insn;
18890
18891 FOR_BB_INSNS (bb, insn)
18892 {
18893 bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18894 /* frame = VFP regs + FPSCR + VPR. */
18895 unsigned lazy_store_stack_frame_size
18896 = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18897 unsigned long callee_saved_mask
18898 = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18899 & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18900 unsigned address_regnum, regno;
18901 unsigned max_int_regno
18902 = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
18903 unsigned max_fp_regno
18904 = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
18905 unsigned maxregno
18906 = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
18907 auto_sbitmap to_clear_bitmap (maxregno + 1);
18908 rtx_insn *seq;
18909 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
18910 rtx address;
18911 CUMULATIVE_ARGS args_so_far_v;
18912 cumulative_args_t args_so_far;
18913 tree arg_type, fntype;
18914 bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
18915 function_args_iterator args_iter;
18916 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
18917
18918 if (!NONDEBUG_INSN_P (insn))
18919 continue;
18920
18921 if (!CALL_P (insn))
18922 continue;
18923
18924 pat = PATTERN (insn);
18925 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
18926 call = XVECEXP (pat, 0, 0);
18927
18928 /* Get the real call RTX if the insn sets a value, ie. returns. */
18929 if (GET_CODE (call) == SET)
18930 call = SET_SRC (call);
18931
18932 /* Check if it is a cmse_nonsecure_call. */
18933 unspec = XEXP (call, 0);
18934 if (GET_CODE (unspec) != UNSPEC
18935 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
18936 continue;
18937
18938 /* Mark registers that needs to be cleared. Those that holds a
18939 parameter are removed from the set further below. */
18940 bitmap_clear (to_clear_bitmap);
18941 bitmap_set_range (to_clear_bitmap, R0_REGNUM,
18942 max_int_regno - R0_REGNUM + 1);
18943
18944 /* Only look at the caller-saved floating point registers in case of
18945 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
18946 lazy store and loads which clear both caller- and callee-saved
18947 registers. */
18948 if (!lazy_fpclear)
18949 {
18950 auto_sbitmap float_bitmap (maxregno + 1);
18951
18952 bitmap_clear (float_bitmap);
18953 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
18954 max_fp_regno - FIRST_VFP_REGNUM + 1);
18955 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
18956 }
18957
18958 /* Make sure the register used to hold the function address is not
18959 cleared. */
18960 address = RTVEC_ELT (XVEC (unspec, 0), 0);
18961 gcc_assert (MEM_P (address));
18962 gcc_assert (REG_P (XEXP (address, 0)));
18963 address_regnum = REGNO (XEXP (address, 0));
18964 if (address_regnum <= max_int_regno)
18965 bitmap_clear_bit (to_clear_bitmap, address_regnum);
18966
18967 /* Set basic block of call insn so that df rescan is performed on
18968 insns inserted here. */
18969 set_block_for_insn (insn, bb);
18970 df_set_flags (DF_DEFER_INSN_RESCAN);
18971 start_sequence ();
18972
18973 /* Make sure the scheduler doesn't schedule other insns beyond
18974 here. */
18975 emit_insn (gen_blockage ());
18976
18977 /* Walk through all arguments and clear registers appropriately.
18978 */
18979 fntype = TREE_TYPE (MEM_EXPR (address));
18980 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
18981 NULL_TREE);
18982 args_so_far = pack_cumulative_args (&args_so_far_v);
18983 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
18984 {
18985 rtx arg_rtx;
18986 uint64_t to_clear_args_mask;
18987
18988 if (VOID_TYPE_P (arg_type))
18989 continue;
18990
18991 function_arg_info arg (arg_type, /*named=*/true);
18992 if (!first_param)
18993 /* ??? We should advance after processing the argument and pass
18994 the argument we're advancing past. */
18995 arm_function_arg_advance (args_so_far, arg);
18996
18997 arg_rtx = arm_function_arg (args_so_far, arg);
18998 gcc_assert (REG_P (arg_rtx));
18999 to_clear_args_mask
19000 = compute_not_to_clear_mask (arg_type, arg_rtx,
19001 REGNO (arg_rtx),
19002 &padding_bits_to_clear[0]);
19003 if (to_clear_args_mask)
19004 {
19005 for (regno = R0_REGNUM; regno <= maxregno; regno++)
19006 {
19007 if (to_clear_args_mask & (1ULL << regno))
19008 bitmap_clear_bit (to_clear_bitmap, regno);
19009 }
19010 }
19011
19012 first_param = false;
19013 }
19014
19015 /* We use right shift and left shift to clear the LSB of the address
19016 we jump to instead of using bic, to avoid having to use an extra
19017 register on Thumb-1. */
19018 clearing_reg = XEXP (address, 0);
19019 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
19020 emit_insn (gen_rtx_SET (clearing_reg, shift));
19021 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
19022 emit_insn (gen_rtx_SET (clearing_reg, shift));
19023
19024 if (clear_callee_saved)
19025 {
19026 rtx push_insn =
19027 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
19028 /* Disable frame debug info in push because it needs to be
19029 disabled for pop (see below). */
19030 RTX_FRAME_RELATED_P (push_insn) = 0;
19031
19032 /* Lazy store multiple. */
19033 if (lazy_fpclear)
19034 {
19035 rtx imm;
19036 rtx_insn *add_insn;
19037
19038 imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
19039 add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19040 stack_pointer_rtx, imm));
19041 /* If we have the frame pointer, then it will be the
19042 CFA reg. Otherwise, the stack pointer is the CFA
19043 reg, so we need to emit a CFA adjust. */
19044 if (!frame_pointer_needed)
19045 arm_add_cfa_adjust_cfa_note (add_insn,
19046 - lazy_store_stack_frame_size,
19047 stack_pointer_rtx,
19048 stack_pointer_rtx);
19049 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
19050 }
19051 /* Save VFP callee-saved registers. */
19052 else
19053 {
19054 vfp_emit_fstmd (D7_VFP_REGNUM + 1,
19055 (max_fp_regno - D7_VFP_REGNUM) / 2);
19056 /* Disable frame debug info in push because it needs to be
19057 disabled for vpop (see below). */
19058 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19059 }
19060 }
19061
19062 /* Clear caller-saved registers that leak before doing a non-secure
19063 call. */
19064 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
19065 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
19066 NUM_ARG_REGS, ip_reg, clearing_reg);
19067
19068 seq = get_insns ();
19069 end_sequence ();
19070 emit_insn_before (seq, insn);
19071
19072 if (TARGET_HAVE_FPCXT_CMSE)
19073 {
19074 rtx_insn *last, *pop_insn, *after = insn;
19075
19076 start_sequence ();
19077
19078 /* Lazy load multiple done as part of libcall in Armv8-M. */
19079 if (lazy_fpclear)
19080 {
19081 rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
19082 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
19083 rtx_insn *add_insn =
19084 emit_insn (gen_addsi3 (stack_pointer_rtx,
19085 stack_pointer_rtx, imm));
19086 if (!frame_pointer_needed)
19087 arm_add_cfa_adjust_cfa_note (add_insn,
19088 lazy_store_stack_frame_size,
19089 stack_pointer_rtx,
19090 stack_pointer_rtx);
19091 }
19092 /* Restore VFP callee-saved registers. */
19093 else
19094 {
19095 int nb_callee_saved_vfp_regs =
19096 (max_fp_regno - D7_VFP_REGNUM) / 2;
19097 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
19098 nb_callee_saved_vfp_regs,
19099 stack_pointer_rtx);
19100 /* Disable frame debug info in vpop because the SP adjustment
19101 is made using a CFA adjustment note while CFA used is
19102 sometimes R7. This then causes an assert failure in the
19103 CFI note creation code. */
19104 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19105 }
19106
19107 arm_emit_multi_reg_pop (callee_saved_mask);
19108 pop_insn = get_last_insn ();
19109
19110 /* Disable frame debug info in pop because they reset the state
19111 of popped registers to what it was at the beginning of the
19112 function, before the prologue. This leads to incorrect state
19113 when doing the pop after the nonsecure call for registers that
19114 are pushed both in prologue and before the nonsecure call.
19115
19116 It also occasionally triggers an assert failure in CFI note
19117 creation code when there are two codepaths to the epilogue,
19118 one of which does not go through the nonsecure call.
19119 Obviously this mean that debugging between the push and pop is
19120 not reliable. */
19121 RTX_FRAME_RELATED_P (pop_insn) = 0;
19122
19123 seq = get_insns ();
19124 last = get_last_insn ();
19125 end_sequence ();
19126
19127 emit_insn_after (seq, after);
19128
19129 /* Skip pop we have just inserted after nonsecure call, we know
19130 it does not contain a nonsecure call. */
19131 insn = last;
19132 }
19133 }
19134 }
19135 }
19136
19137 /* Rewrite move insn into subtract of 0 if the condition codes will
19138 be useful in next conditional jump insn. */
19139
19140 static void
19141 thumb1_reorg (void)
19142 {
19143 basic_block bb;
19144
19145 FOR_EACH_BB_FN (bb, cfun)
19146 {
19147 rtx dest, src;
19148 rtx cmp, op0, op1, set = NULL;
19149 rtx_insn *prev, *insn = BB_END (bb);
19150 bool insn_clobbered = false;
19151
19152 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
19153 insn = PREV_INSN (insn);
19154
19155 /* Find the last cbranchsi4_insn in basic block BB. */
19156 if (insn == BB_HEAD (bb)
19157 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19158 continue;
19159
19160 /* Get the register with which we are comparing. */
19161 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
19162 op0 = XEXP (cmp, 0);
19163 op1 = XEXP (cmp, 1);
19164
19165 /* Check that comparison is against ZERO. */
19166 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
19167 continue;
19168
19169 /* Find the first flag setting insn before INSN in basic block BB. */
19170 gcc_assert (insn != BB_HEAD (bb));
19171 for (prev = PREV_INSN (insn);
19172 (!insn_clobbered
19173 && prev != BB_HEAD (bb)
19174 && (NOTE_P (prev)
19175 || DEBUG_INSN_P (prev)
19176 || ((set = single_set (prev)) != NULL
19177 && get_attr_conds (prev) == CONDS_NOCOND)));
19178 prev = PREV_INSN (prev))
19179 {
19180 if (reg_set_p (op0, prev))
19181 insn_clobbered = true;
19182 }
19183
19184 /* Skip if op0 is clobbered by insn other than prev. */
19185 if (insn_clobbered)
19186 continue;
19187
19188 if (!set)
19189 continue;
19190
19191 dest = SET_DEST (set);
19192 src = SET_SRC (set);
19193 if (!low_register_operand (dest, SImode)
19194 || !low_register_operand (src, SImode))
19195 continue;
19196
19197 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19198 in INSN. Both src and dest of the move insn are checked. */
19199 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
19200 {
19201 dest = copy_rtx (dest);
19202 src = copy_rtx (src);
19203 src = gen_rtx_MINUS (SImode, src, const0_rtx);
19204 PATTERN (prev) = gen_rtx_SET (dest, src);
19205 INSN_CODE (prev) = -1;
19206 /* Set test register in INSN to dest. */
19207 XEXP (cmp, 0) = copy_rtx (dest);
19208 INSN_CODE (insn) = -1;
19209 }
19210 }
19211 }
19212
19213 /* Convert instructions to their cc-clobbering variant if possible, since
19214 that allows us to use smaller encodings. */
19215
19216 static void
19217 thumb2_reorg (void)
19218 {
19219 basic_block bb;
19220 regset_head live;
19221
19222 INIT_REG_SET (&live);
19223
19224 /* We are freeing block_for_insn in the toplev to keep compatibility
19225 with old MDEP_REORGS that are not CFG based. Recompute it now. */
19226 compute_bb_for_insn ();
19227 df_analyze ();
19228
19229 enum Convert_Action {SKIP, CONV, SWAP_CONV};
19230
19231 FOR_EACH_BB_FN (bb, cfun)
19232 {
19233 if ((current_tune->disparage_flag_setting_t16_encodings
19234 == tune_params::DISPARAGE_FLAGS_ALL)
19235 && optimize_bb_for_speed_p (bb))
19236 continue;
19237
19238 rtx_insn *insn;
19239 Convert_Action action = SKIP;
19240 Convert_Action action_for_partial_flag_setting
19241 = ((current_tune->disparage_flag_setting_t16_encodings
19242 != tune_params::DISPARAGE_FLAGS_NEITHER)
19243 && optimize_bb_for_speed_p (bb))
19244 ? SKIP : CONV;
19245
19246 COPY_REG_SET (&live, DF_LR_OUT (bb));
19247 df_simulate_initialize_backwards (bb, &live);
19248 FOR_BB_INSNS_REVERSE (bb, insn)
19249 {
19250 if (NONJUMP_INSN_P (insn)
19251 && !REGNO_REG_SET_P (&live, CC_REGNUM)
19252 && GET_CODE (PATTERN (insn)) == SET)
19253 {
19254 action = SKIP;
19255 rtx pat = PATTERN (insn);
19256 rtx dst = XEXP (pat, 0);
19257 rtx src = XEXP (pat, 1);
19258 rtx op0 = NULL_RTX, op1 = NULL_RTX;
19259
19260 if (UNARY_P (src) || BINARY_P (src))
19261 op0 = XEXP (src, 0);
19262
19263 if (BINARY_P (src))
19264 op1 = XEXP (src, 1);
19265
19266 if (low_register_operand (dst, SImode))
19267 {
19268 switch (GET_CODE (src))
19269 {
19270 case PLUS:
19271 /* Adding two registers and storing the result
19272 in the first source is already a 16-bit
19273 operation. */
19274 if (rtx_equal_p (dst, op0)
19275 && register_operand (op1, SImode))
19276 break;
19277
19278 if (low_register_operand (op0, SImode))
19279 {
19280 /* ADDS <Rd>,<Rn>,<Rm> */
19281 if (low_register_operand (op1, SImode))
19282 action = CONV;
19283 /* ADDS <Rdn>,#<imm8> */
19284 /* SUBS <Rdn>,#<imm8> */
19285 else if (rtx_equal_p (dst, op0)
19286 && CONST_INT_P (op1)
19287 && IN_RANGE (INTVAL (op1), -255, 255))
19288 action = CONV;
19289 /* ADDS <Rd>,<Rn>,#<imm3> */
19290 /* SUBS <Rd>,<Rn>,#<imm3> */
19291 else if (CONST_INT_P (op1)
19292 && IN_RANGE (INTVAL (op1), -7, 7))
19293 action = CONV;
19294 }
19295 /* ADCS <Rd>, <Rn> */
19296 else if (GET_CODE (XEXP (src, 0)) == PLUS
19297 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19298 && low_register_operand (XEXP (XEXP (src, 0), 1),
19299 SImode)
19300 && COMPARISON_P (op1)
19301 && cc_register (XEXP (op1, 0), VOIDmode)
19302 && maybe_get_arm_condition_code (op1) == ARM_CS
19303 && XEXP (op1, 1) == const0_rtx)
19304 action = CONV;
19305 break;
19306
19307 case MINUS:
19308 /* RSBS <Rd>,<Rn>,#0
19309 Not handled here: see NEG below. */
19310 /* SUBS <Rd>,<Rn>,#<imm3>
19311 SUBS <Rdn>,#<imm8>
19312 Not handled here: see PLUS above. */
19313 /* SUBS <Rd>,<Rn>,<Rm> */
19314 if (low_register_operand (op0, SImode)
19315 && low_register_operand (op1, SImode))
19316 action = CONV;
19317 break;
19318
19319 case MULT:
19320 /* MULS <Rdm>,<Rn>,<Rdm>
19321 As an exception to the rule, this is only used
19322 when optimizing for size since MULS is slow on all
19323 known implementations. We do not even want to use
19324 MULS in cold code, if optimizing for speed, so we
19325 test the global flag here. */
19326 if (!optimize_size)
19327 break;
19328 /* Fall through. */
19329 case AND:
19330 case IOR:
19331 case XOR:
19332 /* ANDS <Rdn>,<Rm> */
19333 if (rtx_equal_p (dst, op0)
19334 && low_register_operand (op1, SImode))
19335 action = action_for_partial_flag_setting;
19336 else if (rtx_equal_p (dst, op1)
19337 && low_register_operand (op0, SImode))
19338 action = action_for_partial_flag_setting == SKIP
19339 ? SKIP : SWAP_CONV;
19340 break;
19341
19342 case ASHIFTRT:
19343 case ASHIFT:
19344 case LSHIFTRT:
19345 /* ASRS <Rdn>,<Rm> */
19346 /* LSRS <Rdn>,<Rm> */
19347 /* LSLS <Rdn>,<Rm> */
19348 if (rtx_equal_p (dst, op0)
19349 && low_register_operand (op1, SImode))
19350 action = action_for_partial_flag_setting;
19351 /* ASRS <Rd>,<Rm>,#<imm5> */
19352 /* LSRS <Rd>,<Rm>,#<imm5> */
19353 /* LSLS <Rd>,<Rm>,#<imm5> */
19354 else if (low_register_operand (op0, SImode)
19355 && CONST_INT_P (op1)
19356 && IN_RANGE (INTVAL (op1), 0, 31))
19357 action = action_for_partial_flag_setting;
19358 break;
19359
19360 case ROTATERT:
19361 /* RORS <Rdn>,<Rm> */
19362 if (rtx_equal_p (dst, op0)
19363 && low_register_operand (op1, SImode))
19364 action = action_for_partial_flag_setting;
19365 break;
19366
19367 case NOT:
19368 /* MVNS <Rd>,<Rm> */
19369 if (low_register_operand (op0, SImode))
19370 action = action_for_partial_flag_setting;
19371 break;
19372
19373 case NEG:
19374 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19375 if (low_register_operand (op0, SImode))
19376 action = CONV;
19377 break;
19378
19379 case CONST_INT:
19380 /* MOVS <Rd>,#<imm8> */
19381 if (CONST_INT_P (src)
19382 && IN_RANGE (INTVAL (src), 0, 255))
19383 action = action_for_partial_flag_setting;
19384 break;
19385
19386 case REG:
19387 /* MOVS and MOV<c> with registers have different
19388 encodings, so are not relevant here. */
19389 break;
19390
19391 default:
19392 break;
19393 }
19394 }
19395
19396 if (action != SKIP)
19397 {
19398 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19399 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19400 rtvec vec;
19401
19402 if (action == SWAP_CONV)
19403 {
19404 src = copy_rtx (src);
19405 XEXP (src, 0) = op1;
19406 XEXP (src, 1) = op0;
19407 pat = gen_rtx_SET (dst, src);
19408 vec = gen_rtvec (2, pat, clobber);
19409 }
19410 else /* action == CONV */
19411 vec = gen_rtvec (2, pat, clobber);
19412
19413 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19414 INSN_CODE (insn) = -1;
19415 }
19416 }
19417
19418 if (NONDEBUG_INSN_P (insn))
19419 df_simulate_one_insn_backwards (bb, insn, &live);
19420 }
19421 }
19422
19423 CLEAR_REG_SET (&live);
19424 }
19425
19426 /* Gcc puts the pool in the wrong place for ARM, since we can only
19427 load addresses a limited distance around the pc. We do some
19428 special munging to move the constant pool values to the correct
19429 point in the code. */
19430 static void
19431 arm_reorg (void)
19432 {
19433 rtx_insn *insn;
19434 HOST_WIDE_INT address = 0;
19435 Mfix * fix;
19436
19437 if (use_cmse)
19438 cmse_nonsecure_call_inline_register_clear ();
19439
19440 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19441 if (cfun->is_thunk)
19442 ;
19443 else if (TARGET_THUMB1)
19444 thumb1_reorg ();
19445 else if (TARGET_THUMB2)
19446 thumb2_reorg ();
19447
19448 /* Ensure all insns that must be split have been split at this point.
19449 Otherwise, the pool placement code below may compute incorrect
19450 insn lengths. Note that when optimizing, all insns have already
19451 been split at this point. */
19452 if (!optimize)
19453 split_all_insns_noflow ();
19454
19455 /* Make sure we do not attempt to create a literal pool even though it should
19456 no longer be necessary to create any. */
19457 if (arm_disable_literal_pool)
19458 return ;
19459
19460 minipool_fix_head = minipool_fix_tail = NULL;
19461
19462 /* The first insn must always be a note, or the code below won't
19463 scan it properly. */
19464 insn = get_insns ();
19465 gcc_assert (NOTE_P (insn));
19466 minipool_pad = 0;
19467
19468 /* Scan all the insns and record the operands that will need fixing. */
19469 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19470 {
19471 if (BARRIER_P (insn))
19472 push_minipool_barrier (insn, address);
19473 else if (INSN_P (insn))
19474 {
19475 rtx_jump_table_data *table;
19476
19477 note_invalid_constants (insn, address, true);
19478 address += get_attr_length (insn);
19479
19480 /* If the insn is a vector jump, add the size of the table
19481 and skip the table. */
19482 if (tablejump_p (insn, NULL, &table))
19483 {
19484 address += get_jump_table_size (table);
19485 insn = table;
19486 }
19487 }
19488 else if (LABEL_P (insn))
19489 /* Add the worst-case padding due to alignment. We don't add
19490 the _current_ padding because the minipool insertions
19491 themselves might change it. */
19492 address += get_label_padding (insn);
19493 }
19494
19495 fix = minipool_fix_head;
19496
19497 /* Now scan the fixups and perform the required changes. */
19498 while (fix)
19499 {
19500 Mfix * ftmp;
19501 Mfix * fdel;
19502 Mfix * last_added_fix;
19503 Mfix * last_barrier = NULL;
19504 Mfix * this_fix;
19505
19506 /* Skip any further barriers before the next fix. */
19507 while (fix && BARRIER_P (fix->insn))
19508 fix = fix->next;
19509
19510 /* No more fixes. */
19511 if (fix == NULL)
19512 break;
19513
19514 last_added_fix = NULL;
19515
19516 for (ftmp = fix; ftmp; ftmp = ftmp->next)
19517 {
19518 if (BARRIER_P (ftmp->insn))
19519 {
19520 if (ftmp->address >= minipool_vector_head->max_address)
19521 break;
19522
19523 last_barrier = ftmp;
19524 }
19525 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19526 break;
19527
19528 last_added_fix = ftmp; /* Keep track of the last fix added. */
19529 }
19530
19531 /* If we found a barrier, drop back to that; any fixes that we
19532 could have reached but come after the barrier will now go in
19533 the next mini-pool. */
19534 if (last_barrier != NULL)
19535 {
19536 /* Reduce the refcount for those fixes that won't go into this
19537 pool after all. */
19538 for (fdel = last_barrier->next;
19539 fdel && fdel != ftmp;
19540 fdel = fdel->next)
19541 {
19542 fdel->minipool->refcount--;
19543 fdel->minipool = NULL;
19544 }
19545
19546 ftmp = last_barrier;
19547 }
19548 else
19549 {
19550 /* ftmp is first fix that we can't fit into this pool and
19551 there no natural barriers that we could use. Insert a
19552 new barrier in the code somewhere between the previous
19553 fix and this one, and arrange to jump around it. */
19554 HOST_WIDE_INT max_address;
19555
19556 /* The last item on the list of fixes must be a barrier, so
19557 we can never run off the end of the list of fixes without
19558 last_barrier being set. */
19559 gcc_assert (ftmp);
19560
19561 max_address = minipool_vector_head->max_address;
19562 /* Check that there isn't another fix that is in range that
19563 we couldn't fit into this pool because the pool was
19564 already too large: we need to put the pool before such an
19565 instruction. The pool itself may come just after the
19566 fix because create_fix_barrier also allows space for a
19567 jump instruction. */
19568 if (ftmp->address < max_address)
19569 max_address = ftmp->address + 1;
19570
19571 last_barrier = create_fix_barrier (last_added_fix, max_address);
19572 }
19573
19574 assign_minipool_offsets (last_barrier);
19575
19576 while (ftmp)
19577 {
19578 if (!BARRIER_P (ftmp->insn)
19579 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19580 == NULL))
19581 break;
19582
19583 ftmp = ftmp->next;
19584 }
19585
19586 /* Scan over the fixes we have identified for this pool, fixing them
19587 up and adding the constants to the pool itself. */
19588 for (this_fix = fix; this_fix && ftmp != this_fix;
19589 this_fix = this_fix->next)
19590 if (!BARRIER_P (this_fix->insn))
19591 {
19592 rtx addr
19593 = plus_constant (Pmode,
19594 gen_rtx_LABEL_REF (VOIDmode,
19595 minipool_vector_label),
19596 this_fix->minipool->offset);
19597 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19598 }
19599
19600 dump_minipool (last_barrier->insn);
19601 fix = ftmp;
19602 }
19603
19604 /* From now on we must synthesize any constants that we can't handle
19605 directly. This can happen if the RTL gets split during final
19606 instruction generation. */
19607 cfun->machine->after_arm_reorg = 1;
19608
19609 /* Free the minipool memory. */
19610 obstack_free (&minipool_obstack, minipool_startobj);
19611 }
19612 \f
19613 /* Routines to output assembly language. */
19614
19615 /* Return string representation of passed in real value. */
19616 static const char *
19617 fp_const_from_val (REAL_VALUE_TYPE *r)
19618 {
19619 if (!fp_consts_inited)
19620 init_fp_table ();
19621
19622 gcc_assert (real_equal (r, &value_fp0));
19623 return "0";
19624 }
19625
19626 /* OPERANDS[0] is the entire list of insns that constitute pop,
19627 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19628 is in the list, UPDATE is true iff the list contains explicit
19629 update of base register. */
19630 void
19631 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19632 bool update)
19633 {
19634 int i;
19635 char pattern[100];
19636 int offset;
19637 const char *conditional;
19638 int num_saves = XVECLEN (operands[0], 0);
19639 unsigned int regno;
19640 unsigned int regno_base = REGNO (operands[1]);
19641 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19642
19643 offset = 0;
19644 offset += update ? 1 : 0;
19645 offset += return_pc ? 1 : 0;
19646
19647 /* Is the base register in the list? */
19648 for (i = offset; i < num_saves; i++)
19649 {
19650 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19651 /* If SP is in the list, then the base register must be SP. */
19652 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19653 /* If base register is in the list, there must be no explicit update. */
19654 if (regno == regno_base)
19655 gcc_assert (!update);
19656 }
19657
19658 conditional = reverse ? "%?%D0" : "%?%d0";
19659 /* Can't use POP if returning from an interrupt. */
19660 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19661 sprintf (pattern, "pop%s\t{", conditional);
19662 else
19663 {
19664 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19665 It's just a convention, their semantics are identical. */
19666 if (regno_base == SP_REGNUM)
19667 sprintf (pattern, "ldmfd%s\t", conditional);
19668 else if (update)
19669 sprintf (pattern, "ldmia%s\t", conditional);
19670 else
19671 sprintf (pattern, "ldm%s\t", conditional);
19672
19673 strcat (pattern, reg_names[regno_base]);
19674 if (update)
19675 strcat (pattern, "!, {");
19676 else
19677 strcat (pattern, ", {");
19678 }
19679
19680 /* Output the first destination register. */
19681 strcat (pattern,
19682 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19683
19684 /* Output the rest of the destination registers. */
19685 for (i = offset + 1; i < num_saves; i++)
19686 {
19687 strcat (pattern, ", ");
19688 strcat (pattern,
19689 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19690 }
19691
19692 strcat (pattern, "}");
19693
19694 if (interrupt_p && return_pc)
19695 strcat (pattern, "^");
19696
19697 output_asm_insn (pattern, &cond);
19698 }
19699
19700
19701 /* Output the assembly for a store multiple. */
19702
19703 const char *
19704 vfp_output_vstmd (rtx * operands)
19705 {
19706 char pattern[100];
19707 int p;
19708 int base;
19709 int i;
19710 rtx addr_reg = REG_P (XEXP (operands[0], 0))
19711 ? XEXP (operands[0], 0)
19712 : XEXP (XEXP (operands[0], 0), 0);
19713 bool push_p = REGNO (addr_reg) == SP_REGNUM;
19714
19715 if (push_p)
19716 strcpy (pattern, "vpush%?.64\t{%P1");
19717 else
19718 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19719
19720 p = strlen (pattern);
19721
19722 gcc_assert (REG_P (operands[1]));
19723
19724 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19725 for (i = 1; i < XVECLEN (operands[2], 0); i++)
19726 {
19727 p += sprintf (&pattern[p], ", d%d", base + i);
19728 }
19729 strcpy (&pattern[p], "}");
19730
19731 output_asm_insn (pattern, operands);
19732 return "";
19733 }
19734
19735
19736 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19737 number of bytes pushed. */
19738
19739 static int
19740 vfp_emit_fstmd (int base_reg, int count)
19741 {
19742 rtx par;
19743 rtx dwarf;
19744 rtx tmp, reg;
19745 int i;
19746
19747 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19748 register pairs are stored by a store multiple insn. We avoid this
19749 by pushing an extra pair. */
19750 if (count == 2 && !arm_arch6)
19751 {
19752 if (base_reg == LAST_VFP_REGNUM - 3)
19753 base_reg -= 2;
19754 count++;
19755 }
19756
19757 /* FSTMD may not store more than 16 doubleword registers at once. Split
19758 larger stores into multiple parts (up to a maximum of two, in
19759 practice). */
19760 if (count > 16)
19761 {
19762 int saved;
19763 /* NOTE: base_reg is an internal register number, so each D register
19764 counts as 2. */
19765 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19766 saved += vfp_emit_fstmd (base_reg, 16);
19767 return saved;
19768 }
19769
19770 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19771 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19772
19773 reg = gen_rtx_REG (DFmode, base_reg);
19774 base_reg += 2;
19775
19776 XVECEXP (par, 0, 0)
19777 = gen_rtx_SET (gen_frame_mem
19778 (BLKmode,
19779 gen_rtx_PRE_MODIFY (Pmode,
19780 stack_pointer_rtx,
19781 plus_constant
19782 (Pmode, stack_pointer_rtx,
19783 - (count * 8)))
19784 ),
19785 gen_rtx_UNSPEC (BLKmode,
19786 gen_rtvec (1, reg),
19787 UNSPEC_PUSH_MULT));
19788
19789 tmp = gen_rtx_SET (stack_pointer_rtx,
19790 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19791 RTX_FRAME_RELATED_P (tmp) = 1;
19792 XVECEXP (dwarf, 0, 0) = tmp;
19793
19794 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19795 RTX_FRAME_RELATED_P (tmp) = 1;
19796 XVECEXP (dwarf, 0, 1) = tmp;
19797
19798 for (i = 1; i < count; i++)
19799 {
19800 reg = gen_rtx_REG (DFmode, base_reg);
19801 base_reg += 2;
19802 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19803
19804 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19805 plus_constant (Pmode,
19806 stack_pointer_rtx,
19807 i * 8)),
19808 reg);
19809 RTX_FRAME_RELATED_P (tmp) = 1;
19810 XVECEXP (dwarf, 0, i + 1) = tmp;
19811 }
19812
19813 par = emit_insn (par);
19814 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19815 RTX_FRAME_RELATED_P (par) = 1;
19816
19817 return count * 8;
19818 }
19819
19820 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19821 has the cmse_nonsecure_call attribute and returns false otherwise. */
19822
19823 bool
19824 detect_cmse_nonsecure_call (tree addr)
19825 {
19826 if (!addr)
19827 return FALSE;
19828
19829 tree fntype = TREE_TYPE (addr);
19830 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19831 TYPE_ATTRIBUTES (fntype)))
19832 return TRUE;
19833 return FALSE;
19834 }
19835
19836
19837 /* Emit a call instruction with pattern PAT. ADDR is the address of
19838 the call target. */
19839
19840 void
19841 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19842 {
19843 rtx insn;
19844
19845 insn = emit_call_insn (pat);
19846
19847 /* The PIC register is live on entry to VxWorks PIC PLT entries.
19848 If the call might use such an entry, add a use of the PIC register
19849 to the instruction's CALL_INSN_FUNCTION_USAGE. */
19850 if (TARGET_VXWORKS_RTP
19851 && flag_pic
19852 && !sibcall
19853 && SYMBOL_REF_P (addr)
19854 && (SYMBOL_REF_DECL (addr)
19855 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19856 : !SYMBOL_REF_LOCAL_P (addr)))
19857 {
19858 require_pic_register (NULL_RTX, false /*compute_now*/);
19859 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19860 }
19861
19862 if (TARGET_FDPIC)
19863 {
19864 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19865 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19866 }
19867
19868 if (TARGET_AAPCS_BASED)
19869 {
19870 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19871 linker. We need to add an IP clobber to allow setting
19872 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
19873 is not needed since it's a fixed register. */
19874 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19875 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19876 }
19877 }
19878
19879 /* Output a 'call' insn. */
19880 const char *
19881 output_call (rtx *operands)
19882 {
19883 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
19884
19885 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
19886 if (REGNO (operands[0]) == LR_REGNUM)
19887 {
19888 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19889 output_asm_insn ("mov%?\t%0, %|lr", operands);
19890 }
19891
19892 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19893
19894 if (TARGET_INTERWORK || arm_arch4t)
19895 output_asm_insn ("bx%?\t%0", operands);
19896 else
19897 output_asm_insn ("mov%?\t%|pc, %0", operands);
19898
19899 return "";
19900 }
19901
19902 /* Output a move from arm registers to arm registers of a long double
19903 OPERANDS[0] is the destination.
19904 OPERANDS[1] is the source. */
19905 const char *
19906 output_mov_long_double_arm_from_arm (rtx *operands)
19907 {
19908 /* We have to be careful here because the two might overlap. */
19909 int dest_start = REGNO (operands[0]);
19910 int src_start = REGNO (operands[1]);
19911 rtx ops[2];
19912 int i;
19913
19914 if (dest_start < src_start)
19915 {
19916 for (i = 0; i < 3; i++)
19917 {
19918 ops[0] = gen_rtx_REG (SImode, dest_start + i);
19919 ops[1] = gen_rtx_REG (SImode, src_start + i);
19920 output_asm_insn ("mov%?\t%0, %1", ops);
19921 }
19922 }
19923 else
19924 {
19925 for (i = 2; i >= 0; i--)
19926 {
19927 ops[0] = gen_rtx_REG (SImode, dest_start + i);
19928 ops[1] = gen_rtx_REG (SImode, src_start + i);
19929 output_asm_insn ("mov%?\t%0, %1", ops);
19930 }
19931 }
19932
19933 return "";
19934 }
19935
19936 void
19937 arm_emit_movpair (rtx dest, rtx src)
19938 {
19939 /* If the src is an immediate, simplify it. */
19940 if (CONST_INT_P (src))
19941 {
19942 HOST_WIDE_INT val = INTVAL (src);
19943 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
19944 if ((val >> 16) & 0x0000ffff)
19945 {
19946 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
19947 GEN_INT (16)),
19948 GEN_INT ((val >> 16) & 0x0000ffff));
19949 rtx_insn *insn = get_last_insn ();
19950 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19951 }
19952 return;
19953 }
19954 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
19955 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
19956 rtx_insn *insn = get_last_insn ();
19957 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19958 }
19959
19960 /* Output a move between double words. It must be REG<-MEM
19961 or MEM<-REG. */
19962 const char *
19963 output_move_double (rtx *operands, bool emit, int *count)
19964 {
19965 enum rtx_code code0 = GET_CODE (operands[0]);
19966 enum rtx_code code1 = GET_CODE (operands[1]);
19967 rtx otherops[3];
19968 if (count)
19969 *count = 1;
19970
19971 /* The only case when this might happen is when
19972 you are looking at the length of a DImode instruction
19973 that has an invalid constant in it. */
19974 if (code0 == REG && code1 != MEM)
19975 {
19976 gcc_assert (!emit);
19977 *count = 2;
19978 return "";
19979 }
19980
19981 if (code0 == REG)
19982 {
19983 unsigned int reg0 = REGNO (operands[0]);
19984 const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
19985
19986 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
19987
19988 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
19989
19990 switch (GET_CODE (XEXP (operands[1], 0)))
19991 {
19992 case REG:
19993
19994 if (emit)
19995 {
19996 if (can_ldrd
19997 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
19998 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
19999 else
20000 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20001 }
20002 break;
20003
20004 case PRE_INC:
20005 gcc_assert (can_ldrd);
20006 if (emit)
20007 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
20008 break;
20009
20010 case PRE_DEC:
20011 if (emit)
20012 {
20013 if (can_ldrd)
20014 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
20015 else
20016 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
20017 }
20018 break;
20019
20020 case POST_INC:
20021 if (emit)
20022 {
20023 if (can_ldrd)
20024 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
20025 else
20026 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
20027 }
20028 break;
20029
20030 case POST_DEC:
20031 gcc_assert (can_ldrd);
20032 if (emit)
20033 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
20034 break;
20035
20036 case PRE_MODIFY:
20037 case POST_MODIFY:
20038 /* Autoicrement addressing modes should never have overlapping
20039 base and destination registers, and overlapping index registers
20040 are already prohibited, so this doesn't need to worry about
20041 fix_cm3_ldrd. */
20042 otherops[0] = operands[0];
20043 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
20044 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
20045
20046 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
20047 {
20048 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
20049 {
20050 /* Registers overlap so split out the increment. */
20051 if (emit)
20052 {
20053 gcc_assert (can_ldrd);
20054 output_asm_insn ("add%?\t%1, %1, %2", otherops);
20055 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
20056 }
20057 if (count)
20058 *count = 2;
20059 }
20060 else
20061 {
20062 /* Use a single insn if we can.
20063 FIXME: IWMMXT allows offsets larger than ldrd can
20064 handle, fix these up with a pair of ldr. */
20065 if (can_ldrd
20066 && (TARGET_THUMB2
20067 || !CONST_INT_P (otherops[2])
20068 || (INTVAL (otherops[2]) > -256
20069 && INTVAL (otherops[2]) < 256)))
20070 {
20071 if (emit)
20072 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
20073 }
20074 else
20075 {
20076 if (emit)
20077 {
20078 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
20079 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20080 }
20081 if (count)
20082 *count = 2;
20083
20084 }
20085 }
20086 }
20087 else
20088 {
20089 /* Use a single insn if we can.
20090 FIXME: IWMMXT allows offsets larger than ldrd can handle,
20091 fix these up with a pair of ldr. */
20092 if (can_ldrd
20093 && (TARGET_THUMB2
20094 || !CONST_INT_P (otherops[2])
20095 || (INTVAL (otherops[2]) > -256
20096 && INTVAL (otherops[2]) < 256)))
20097 {
20098 if (emit)
20099 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
20100 }
20101 else
20102 {
20103 if (emit)
20104 {
20105 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20106 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
20107 }
20108 if (count)
20109 *count = 2;
20110 }
20111 }
20112 break;
20113
20114 case LABEL_REF:
20115 case CONST:
20116 /* We might be able to use ldrd %0, %1 here. However the range is
20117 different to ldr/adr, and it is broken on some ARMv7-M
20118 implementations. */
20119 /* Use the second register of the pair to avoid problematic
20120 overlap. */
20121 otherops[1] = operands[1];
20122 if (emit)
20123 output_asm_insn ("adr%?\t%0, %1", otherops);
20124 operands[1] = otherops[0];
20125 if (emit)
20126 {
20127 if (can_ldrd)
20128 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20129 else
20130 output_asm_insn ("ldmia%?\t%1, %M0", operands);
20131 }
20132
20133 if (count)
20134 *count = 2;
20135 break;
20136
20137 /* ??? This needs checking for thumb2. */
20138 default:
20139 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
20140 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
20141 {
20142 otherops[0] = operands[0];
20143 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
20144 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
20145
20146 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
20147 {
20148 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20149 {
20150 switch ((int) INTVAL (otherops[2]))
20151 {
20152 case -8:
20153 if (emit)
20154 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
20155 return "";
20156 case -4:
20157 if (TARGET_THUMB2)
20158 break;
20159 if (emit)
20160 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
20161 return "";
20162 case 4:
20163 if (TARGET_THUMB2)
20164 break;
20165 if (emit)
20166 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
20167 return "";
20168 }
20169 }
20170 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
20171 operands[1] = otherops[0];
20172 if (can_ldrd
20173 && (REG_P (otherops[2])
20174 || TARGET_THUMB2
20175 || (CONST_INT_P (otherops[2])
20176 && INTVAL (otherops[2]) > -256
20177 && INTVAL (otherops[2]) < 256)))
20178 {
20179 if (reg_overlap_mentioned_p (operands[0],
20180 otherops[2]))
20181 {
20182 /* Swap base and index registers over to
20183 avoid a conflict. */
20184 std::swap (otherops[1], otherops[2]);
20185 }
20186 /* If both registers conflict, it will usually
20187 have been fixed by a splitter. */
20188 if (reg_overlap_mentioned_p (operands[0], otherops[2])
20189 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
20190 {
20191 if (emit)
20192 {
20193 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20194 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20195 }
20196 if (count)
20197 *count = 2;
20198 }
20199 else
20200 {
20201 otherops[0] = operands[0];
20202 if (emit)
20203 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
20204 }
20205 return "";
20206 }
20207
20208 if (CONST_INT_P (otherops[2]))
20209 {
20210 if (emit)
20211 {
20212 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
20213 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
20214 else
20215 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20216 }
20217 }
20218 else
20219 {
20220 if (emit)
20221 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20222 }
20223 }
20224 else
20225 {
20226 if (emit)
20227 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
20228 }
20229
20230 if (count)
20231 *count = 2;
20232
20233 if (can_ldrd)
20234 return "ldrd%?\t%0, [%1]";
20235
20236 return "ldmia%?\t%1, %M0";
20237 }
20238 else
20239 {
20240 otherops[1] = adjust_address (operands[1], SImode, 4);
20241 /* Take care of overlapping base/data reg. */
20242 if (reg_mentioned_p (operands[0], operands[1]))
20243 {
20244 if (emit)
20245 {
20246 output_asm_insn ("ldr%?\t%0, %1", otherops);
20247 output_asm_insn ("ldr%?\t%0, %1", operands);
20248 }
20249 if (count)
20250 *count = 2;
20251
20252 }
20253 else
20254 {
20255 if (emit)
20256 {
20257 output_asm_insn ("ldr%?\t%0, %1", operands);
20258 output_asm_insn ("ldr%?\t%0, %1", otherops);
20259 }
20260 if (count)
20261 *count = 2;
20262 }
20263 }
20264 }
20265 }
20266 else
20267 {
20268 /* Constraints should ensure this. */
20269 gcc_assert (code0 == MEM && code1 == REG);
20270 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20271 || (TARGET_ARM && TARGET_LDRD));
20272
20273 /* For TARGET_ARM the first source register of an STRD
20274 must be even. This is usually the case for double-word
20275 values but user assembly constraints can force an odd
20276 starting register. */
20277 bool allow_strd = TARGET_LDRD
20278 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20279 switch (GET_CODE (XEXP (operands[0], 0)))
20280 {
20281 case REG:
20282 if (emit)
20283 {
20284 if (allow_strd)
20285 output_asm_insn ("strd%?\t%1, [%m0]", operands);
20286 else
20287 output_asm_insn ("stm%?\t%m0, %M1", operands);
20288 }
20289 break;
20290
20291 case PRE_INC:
20292 gcc_assert (allow_strd);
20293 if (emit)
20294 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20295 break;
20296
20297 case PRE_DEC:
20298 if (emit)
20299 {
20300 if (allow_strd)
20301 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20302 else
20303 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20304 }
20305 break;
20306
20307 case POST_INC:
20308 if (emit)
20309 {
20310 if (allow_strd)
20311 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20312 else
20313 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20314 }
20315 break;
20316
20317 case POST_DEC:
20318 gcc_assert (allow_strd);
20319 if (emit)
20320 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20321 break;
20322
20323 case PRE_MODIFY:
20324 case POST_MODIFY:
20325 otherops[0] = operands[1];
20326 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20327 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20328
20329 /* IWMMXT allows offsets larger than strd can handle,
20330 fix these up with a pair of str. */
20331 if (!TARGET_THUMB2
20332 && CONST_INT_P (otherops[2])
20333 && (INTVAL(otherops[2]) <= -256
20334 || INTVAL(otherops[2]) >= 256))
20335 {
20336 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20337 {
20338 if (emit)
20339 {
20340 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20341 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20342 }
20343 if (count)
20344 *count = 2;
20345 }
20346 else
20347 {
20348 if (emit)
20349 {
20350 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20351 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20352 }
20353 if (count)
20354 *count = 2;
20355 }
20356 }
20357 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20358 {
20359 if (emit)
20360 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20361 }
20362 else
20363 {
20364 if (emit)
20365 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20366 }
20367 break;
20368
20369 case PLUS:
20370 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20371 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20372 {
20373 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20374 {
20375 case -8:
20376 if (emit)
20377 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20378 return "";
20379
20380 case -4:
20381 if (TARGET_THUMB2)
20382 break;
20383 if (emit)
20384 output_asm_insn ("stmda%?\t%m0, %M1", operands);
20385 return "";
20386
20387 case 4:
20388 if (TARGET_THUMB2)
20389 break;
20390 if (emit)
20391 output_asm_insn ("stmib%?\t%m0, %M1", operands);
20392 return "";
20393 }
20394 }
20395 if (allow_strd
20396 && (REG_P (otherops[2])
20397 || TARGET_THUMB2
20398 || (CONST_INT_P (otherops[2])
20399 && INTVAL (otherops[2]) > -256
20400 && INTVAL (otherops[2]) < 256)))
20401 {
20402 otherops[0] = operands[1];
20403 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20404 if (emit)
20405 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20406 return "";
20407 }
20408 /* Fall through */
20409
20410 default:
20411 otherops[0] = adjust_address (operands[0], SImode, 4);
20412 otherops[1] = operands[1];
20413 if (emit)
20414 {
20415 output_asm_insn ("str%?\t%1, %0", operands);
20416 output_asm_insn ("str%?\t%H1, %0", otherops);
20417 }
20418 if (count)
20419 *count = 2;
20420 }
20421 }
20422
20423 return "";
20424 }
20425
20426 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20427 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20428
20429 const char *
20430 output_move_quad (rtx *operands)
20431 {
20432 if (REG_P (operands[0]))
20433 {
20434 /* Load, or reg->reg move. */
20435
20436 if (MEM_P (operands[1]))
20437 {
20438 switch (GET_CODE (XEXP (operands[1], 0)))
20439 {
20440 case REG:
20441 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20442 break;
20443
20444 case LABEL_REF:
20445 case CONST:
20446 output_asm_insn ("adr%?\t%0, %1", operands);
20447 output_asm_insn ("ldmia%?\t%0, %M0", operands);
20448 break;
20449
20450 default:
20451 gcc_unreachable ();
20452 }
20453 }
20454 else
20455 {
20456 rtx ops[2];
20457 int dest, src, i;
20458
20459 gcc_assert (REG_P (operands[1]));
20460
20461 dest = REGNO (operands[0]);
20462 src = REGNO (operands[1]);
20463
20464 /* This seems pretty dumb, but hopefully GCC won't try to do it
20465 very often. */
20466 if (dest < src)
20467 for (i = 0; i < 4; i++)
20468 {
20469 ops[0] = gen_rtx_REG (SImode, dest + i);
20470 ops[1] = gen_rtx_REG (SImode, src + i);
20471 output_asm_insn ("mov%?\t%0, %1", ops);
20472 }
20473 else
20474 for (i = 3; i >= 0; i--)
20475 {
20476 ops[0] = gen_rtx_REG (SImode, dest + i);
20477 ops[1] = gen_rtx_REG (SImode, src + i);
20478 output_asm_insn ("mov%?\t%0, %1", ops);
20479 }
20480 }
20481 }
20482 else
20483 {
20484 gcc_assert (MEM_P (operands[0]));
20485 gcc_assert (REG_P (operands[1]));
20486 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20487
20488 switch (GET_CODE (XEXP (operands[0], 0)))
20489 {
20490 case REG:
20491 output_asm_insn ("stm%?\t%m0, %M1", operands);
20492 break;
20493
20494 default:
20495 gcc_unreachable ();
20496 }
20497 }
20498
20499 return "";
20500 }
20501
20502 /* Output a VFP load or store instruction. */
20503
20504 const char *
20505 output_move_vfp (rtx *operands)
20506 {
20507 rtx reg, mem, addr, ops[2];
20508 int load = REG_P (operands[0]);
20509 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20510 int sp = (!TARGET_VFP_FP16INST
20511 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20512 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20513 const char *templ;
20514 char buff[50];
20515 machine_mode mode;
20516
20517 reg = operands[!load];
20518 mem = operands[load];
20519
20520 mode = GET_MODE (reg);
20521
20522 gcc_assert (REG_P (reg));
20523 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20524 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20525 || mode == SFmode
20526 || mode == DFmode
20527 || mode == HImode
20528 || mode == SImode
20529 || mode == DImode
20530 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20531 gcc_assert (MEM_P (mem));
20532
20533 addr = XEXP (mem, 0);
20534
20535 switch (GET_CODE (addr))
20536 {
20537 case PRE_DEC:
20538 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20539 ops[0] = XEXP (addr, 0);
20540 ops[1] = reg;
20541 break;
20542
20543 case POST_INC:
20544 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20545 ops[0] = XEXP (addr, 0);
20546 ops[1] = reg;
20547 break;
20548
20549 default:
20550 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20551 ops[0] = reg;
20552 ops[1] = mem;
20553 break;
20554 }
20555
20556 sprintf (buff, templ,
20557 load ? "ld" : "st",
20558 dp ? "64" : sp ? "32" : "16",
20559 dp ? "P" : "",
20560 integer_p ? "\t%@ int" : "");
20561 output_asm_insn (buff, ops);
20562
20563 return "";
20564 }
20565
20566 /* Output a Neon double-word or quad-word load or store, or a load
20567 or store for larger structure modes.
20568
20569 WARNING: The ordering of elements is weird in big-endian mode,
20570 because the EABI requires that vectors stored in memory appear
20571 as though they were stored by a VSTM, as required by the EABI.
20572 GCC RTL defines element ordering based on in-memory order.
20573 This can be different from the architectural ordering of elements
20574 within a NEON register. The intrinsics defined in arm_neon.h use the
20575 NEON register element ordering, not the GCC RTL element ordering.
20576
20577 For example, the in-memory ordering of a big-endian a quadword
20578 vector with 16-bit elements when stored from register pair {d0,d1}
20579 will be (lowest address first, d0[N] is NEON register element N):
20580
20581 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20582
20583 When necessary, quadword registers (dN, dN+1) are moved to ARM
20584 registers from rN in the order:
20585
20586 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20587
20588 So that STM/LDM can be used on vectors in ARM registers, and the
20589 same memory layout will result as if VSTM/VLDM were used.
20590
20591 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20592 possible, which allows use of appropriate alignment tags.
20593 Note that the choice of "64" is independent of the actual vector
20594 element size; this size simply ensures that the behavior is
20595 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20596
20597 Due to limitations of those instructions, use of VST1.64/VLD1.64
20598 is not possible if:
20599 - the address contains PRE_DEC, or
20600 - the mode refers to more than 4 double-word registers
20601
20602 In those cases, it would be possible to replace VSTM/VLDM by a
20603 sequence of instructions; this is not currently implemented since
20604 this is not certain to actually improve performance. */
20605
20606 const char *
20607 output_move_neon (rtx *operands)
20608 {
20609 rtx reg, mem, addr, ops[2];
20610 int regno, nregs, load = REG_P (operands[0]);
20611 const char *templ;
20612 char buff[50];
20613 machine_mode mode;
20614
20615 reg = operands[!load];
20616 mem = operands[load];
20617
20618 mode = GET_MODE (reg);
20619
20620 gcc_assert (REG_P (reg));
20621 regno = REGNO (reg);
20622 nregs = REG_NREGS (reg) / 2;
20623 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20624 || NEON_REGNO_OK_FOR_QUAD (regno));
20625 gcc_assert (VALID_NEON_DREG_MODE (mode)
20626 || VALID_NEON_QREG_MODE (mode)
20627 || VALID_NEON_STRUCT_MODE (mode));
20628 gcc_assert (MEM_P (mem));
20629
20630 addr = XEXP (mem, 0);
20631
20632 /* Strip off const from addresses like (const (plus (...))). */
20633 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20634 addr = XEXP (addr, 0);
20635
20636 switch (GET_CODE (addr))
20637 {
20638 case POST_INC:
20639 /* We have to use vldm / vstm for too-large modes. */
20640 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20641 {
20642 templ = "v%smia%%?\t%%0!, %%h1";
20643 ops[0] = XEXP (addr, 0);
20644 }
20645 else
20646 {
20647 templ = "v%s1.64\t%%h1, %%A0";
20648 ops[0] = mem;
20649 }
20650 ops[1] = reg;
20651 break;
20652
20653 case PRE_DEC:
20654 /* We have to use vldm / vstm in this case, since there is no
20655 pre-decrement form of the vld1 / vst1 instructions. */
20656 templ = "v%smdb%%?\t%%0!, %%h1";
20657 ops[0] = XEXP (addr, 0);
20658 ops[1] = reg;
20659 break;
20660
20661 case POST_MODIFY:
20662 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20663 gcc_unreachable ();
20664
20665 case REG:
20666 /* We have to use vldm / vstm for too-large modes. */
20667 if (nregs > 1)
20668 {
20669 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20670 templ = "v%smia%%?\t%%m0, %%h1";
20671 else
20672 templ = "v%s1.64\t%%h1, %%A0";
20673
20674 ops[0] = mem;
20675 ops[1] = reg;
20676 break;
20677 }
20678 /* Fall through. */
20679 case PLUS:
20680 if (GET_CODE (addr) == PLUS)
20681 addr = XEXP (addr, 0);
20682 /* Fall through. */
20683 case LABEL_REF:
20684 {
20685 int i;
20686 int overlap = -1;
20687 for (i = 0; i < nregs; i++)
20688 {
20689 /* We're only using DImode here because it's a convenient
20690 size. */
20691 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20692 ops[1] = adjust_address (mem, DImode, 8 * i);
20693 if (reg_overlap_mentioned_p (ops[0], mem))
20694 {
20695 gcc_assert (overlap == -1);
20696 overlap = i;
20697 }
20698 else
20699 {
20700 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20701 sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20702 else
20703 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20704 output_asm_insn (buff, ops);
20705 }
20706 }
20707 if (overlap != -1)
20708 {
20709 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20710 ops[1] = adjust_address (mem, SImode, 8 * overlap);
20711 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20712 sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20713 else
20714 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20715 output_asm_insn (buff, ops);
20716 }
20717
20718 return "";
20719 }
20720
20721 default:
20722 gcc_unreachable ();
20723 }
20724
20725 sprintf (buff, templ, load ? "ld" : "st");
20726 output_asm_insn (buff, ops);
20727
20728 return "";
20729 }
20730
20731 /* Compute and return the length of neon_mov<mode>, where <mode> is
20732 one of VSTRUCT modes: EI, OI, CI or XI. */
20733 int
20734 arm_attr_length_move_neon (rtx_insn *insn)
20735 {
20736 rtx reg, mem, addr;
20737 int load;
20738 machine_mode mode;
20739
20740 extract_insn_cached (insn);
20741
20742 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20743 {
20744 mode = GET_MODE (recog_data.operand[0]);
20745 switch (mode)
20746 {
20747 case E_EImode:
20748 case E_OImode:
20749 return 8;
20750 case E_CImode:
20751 return 12;
20752 case E_XImode:
20753 return 16;
20754 default:
20755 gcc_unreachable ();
20756 }
20757 }
20758
20759 load = REG_P (recog_data.operand[0]);
20760 reg = recog_data.operand[!load];
20761 mem = recog_data.operand[load];
20762
20763 gcc_assert (MEM_P (mem));
20764
20765 addr = XEXP (mem, 0);
20766
20767 /* Strip off const from addresses like (const (plus (...))). */
20768 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20769 addr = XEXP (addr, 0);
20770
20771 if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20772 {
20773 int insns = REG_NREGS (reg) / 2;
20774 return insns * 4;
20775 }
20776 else
20777 return 4;
20778 }
20779
20780 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20781 return zero. */
20782
20783 int
20784 arm_address_offset_is_imm (rtx_insn *insn)
20785 {
20786 rtx mem, addr;
20787
20788 extract_insn_cached (insn);
20789
20790 if (REG_P (recog_data.operand[0]))
20791 return 0;
20792
20793 mem = recog_data.operand[0];
20794
20795 gcc_assert (MEM_P (mem));
20796
20797 addr = XEXP (mem, 0);
20798
20799 if (REG_P (addr)
20800 || (GET_CODE (addr) == PLUS
20801 && REG_P (XEXP (addr, 0))
20802 && CONST_INT_P (XEXP (addr, 1))))
20803 return 1;
20804 else
20805 return 0;
20806 }
20807
20808 /* Output an ADD r, s, #n where n may be too big for one instruction.
20809 If adding zero to one register, output nothing. */
20810 const char *
20811 output_add_immediate (rtx *operands)
20812 {
20813 HOST_WIDE_INT n = INTVAL (operands[2]);
20814
20815 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20816 {
20817 if (n < 0)
20818 output_multi_immediate (operands,
20819 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20820 -n);
20821 else
20822 output_multi_immediate (operands,
20823 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20824 n);
20825 }
20826
20827 return "";
20828 }
20829
20830 /* Output a multiple immediate operation.
20831 OPERANDS is the vector of operands referred to in the output patterns.
20832 INSTR1 is the output pattern to use for the first constant.
20833 INSTR2 is the output pattern to use for subsequent constants.
20834 IMMED_OP is the index of the constant slot in OPERANDS.
20835 N is the constant value. */
20836 static const char *
20837 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20838 int immed_op, HOST_WIDE_INT n)
20839 {
20840 #if HOST_BITS_PER_WIDE_INT > 32
20841 n &= 0xffffffff;
20842 #endif
20843
20844 if (n == 0)
20845 {
20846 /* Quick and easy output. */
20847 operands[immed_op] = const0_rtx;
20848 output_asm_insn (instr1, operands);
20849 }
20850 else
20851 {
20852 int i;
20853 const char * instr = instr1;
20854
20855 /* Note that n is never zero here (which would give no output). */
20856 for (i = 0; i < 32; i += 2)
20857 {
20858 if (n & (3 << i))
20859 {
20860 operands[immed_op] = GEN_INT (n & (255 << i));
20861 output_asm_insn (instr, operands);
20862 instr = instr2;
20863 i += 6;
20864 }
20865 }
20866 }
20867
20868 return "";
20869 }
20870
20871 /* Return the name of a shifter operation. */
20872 static const char *
20873 arm_shift_nmem(enum rtx_code code)
20874 {
20875 switch (code)
20876 {
20877 case ASHIFT:
20878 return ARM_LSL_NAME;
20879
20880 case ASHIFTRT:
20881 return "asr";
20882
20883 case LSHIFTRT:
20884 return "lsr";
20885
20886 case ROTATERT:
20887 return "ror";
20888
20889 default:
20890 abort();
20891 }
20892 }
20893
20894 /* Return the appropriate ARM instruction for the operation code.
20895 The returned result should not be overwritten. OP is the rtx of the
20896 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20897 was shifted. */
20898 const char *
20899 arithmetic_instr (rtx op, int shift_first_arg)
20900 {
20901 switch (GET_CODE (op))
20902 {
20903 case PLUS:
20904 return "add";
20905
20906 case MINUS:
20907 return shift_first_arg ? "rsb" : "sub";
20908
20909 case IOR:
20910 return "orr";
20911
20912 case XOR:
20913 return "eor";
20914
20915 case AND:
20916 return "and";
20917
20918 case ASHIFT:
20919 case ASHIFTRT:
20920 case LSHIFTRT:
20921 case ROTATERT:
20922 return arm_shift_nmem(GET_CODE(op));
20923
20924 default:
20925 gcc_unreachable ();
20926 }
20927 }
20928
20929 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20930 for the operation code. The returned result should not be overwritten.
20931 OP is the rtx code of the shift.
20932 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20933 shift. */
20934 static const char *
20935 shift_op (rtx op, HOST_WIDE_INT *amountp)
20936 {
20937 const char * mnem;
20938 enum rtx_code code = GET_CODE (op);
20939
20940 switch (code)
20941 {
20942 case ROTATE:
20943 if (!CONST_INT_P (XEXP (op, 1)))
20944 {
20945 output_operand_lossage ("invalid shift operand");
20946 return NULL;
20947 }
20948
20949 code = ROTATERT;
20950 *amountp = 32 - INTVAL (XEXP (op, 1));
20951 mnem = "ror";
20952 break;
20953
20954 case ASHIFT:
20955 case ASHIFTRT:
20956 case LSHIFTRT:
20957 case ROTATERT:
20958 mnem = arm_shift_nmem(code);
20959 if (CONST_INT_P (XEXP (op, 1)))
20960 {
20961 *amountp = INTVAL (XEXP (op, 1));
20962 }
20963 else if (REG_P (XEXP (op, 1)))
20964 {
20965 *amountp = -1;
20966 return mnem;
20967 }
20968 else
20969 {
20970 output_operand_lossage ("invalid shift operand");
20971 return NULL;
20972 }
20973 break;
20974
20975 case MULT:
20976 /* We never have to worry about the amount being other than a
20977 power of 2, since this case can never be reloaded from a reg. */
20978 if (!CONST_INT_P (XEXP (op, 1)))
20979 {
20980 output_operand_lossage ("invalid shift operand");
20981 return NULL;
20982 }
20983
20984 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
20985
20986 /* Amount must be a power of two. */
20987 if (*amountp & (*amountp - 1))
20988 {
20989 output_operand_lossage ("invalid shift operand");
20990 return NULL;
20991 }
20992
20993 *amountp = exact_log2 (*amountp);
20994 gcc_assert (IN_RANGE (*amountp, 0, 31));
20995 return ARM_LSL_NAME;
20996
20997 default:
20998 output_operand_lossage ("invalid shift operand");
20999 return NULL;
21000 }
21001
21002 /* This is not 100% correct, but follows from the desire to merge
21003 multiplication by a power of 2 with the recognizer for a
21004 shift. >=32 is not a valid shift for "lsl", so we must try and
21005 output a shift that produces the correct arithmetical result.
21006 Using lsr #32 is identical except for the fact that the carry bit
21007 is not set correctly if we set the flags; but we never use the
21008 carry bit from such an operation, so we can ignore that. */
21009 if (code == ROTATERT)
21010 /* Rotate is just modulo 32. */
21011 *amountp &= 31;
21012 else if (*amountp != (*amountp & 31))
21013 {
21014 if (code == ASHIFT)
21015 mnem = "lsr";
21016 *amountp = 32;
21017 }
21018
21019 /* Shifts of 0 are no-ops. */
21020 if (*amountp == 0)
21021 return NULL;
21022
21023 return mnem;
21024 }
21025
21026 /* Output a .ascii pseudo-op, keeping track of lengths. This is
21027 because /bin/as is horribly restrictive. The judgement about
21028 whether or not each character is 'printable' (and can be output as
21029 is) or not (and must be printed with an octal escape) must be made
21030 with reference to the *host* character set -- the situation is
21031 similar to that discussed in the comments above pp_c_char in
21032 c-pretty-print.cc. */
21033
21034 #define MAX_ASCII_LEN 51
21035
21036 void
21037 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
21038 {
21039 int i;
21040 int len_so_far = 0;
21041
21042 fputs ("\t.ascii\t\"", stream);
21043
21044 for (i = 0; i < len; i++)
21045 {
21046 int c = p[i];
21047
21048 if (len_so_far >= MAX_ASCII_LEN)
21049 {
21050 fputs ("\"\n\t.ascii\t\"", stream);
21051 len_so_far = 0;
21052 }
21053
21054 if (ISPRINT (c))
21055 {
21056 if (c == '\\' || c == '\"')
21057 {
21058 putc ('\\', stream);
21059 len_so_far++;
21060 }
21061 putc (c, stream);
21062 len_so_far++;
21063 }
21064 else
21065 {
21066 fprintf (stream, "\\%03o", c);
21067 len_so_far += 4;
21068 }
21069 }
21070
21071 fputs ("\"\n", stream);
21072 }
21073 \f
21074
21075 /* Compute the register save mask for registers 0 through 12
21076 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
21077
21078 static unsigned long
21079 arm_compute_save_reg0_reg12_mask (void)
21080 {
21081 unsigned long func_type = arm_current_func_type ();
21082 unsigned long save_reg_mask = 0;
21083 unsigned int reg;
21084
21085 if (IS_INTERRUPT (func_type))
21086 {
21087 unsigned int max_reg;
21088 /* Interrupt functions must not corrupt any registers,
21089 even call clobbered ones. If this is a leaf function
21090 we can just examine the registers used by the RTL, but
21091 otherwise we have to assume that whatever function is
21092 called might clobber anything, and so we have to save
21093 all the call-clobbered registers as well. */
21094 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
21095 /* FIQ handlers have registers r8 - r12 banked, so
21096 we only need to check r0 - r7, Normal ISRs only
21097 bank r14 and r15, so we must check up to r12.
21098 r13 is the stack pointer which is always preserved,
21099 so we do not need to consider it here. */
21100 max_reg = 7;
21101 else
21102 max_reg = 12;
21103
21104 for (reg = 0; reg <= max_reg; reg++)
21105 if (reg_needs_saving_p (reg))
21106 save_reg_mask |= (1 << reg);
21107
21108 /* Also save the pic base register if necessary. */
21109 if (PIC_REGISTER_MAY_NEED_SAVING
21110 && crtl->uses_pic_offset_table)
21111 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21112 }
21113 else if (IS_VOLATILE(func_type))
21114 {
21115 /* For noreturn functions we historically omitted register saves
21116 altogether. However this really messes up debugging. As a
21117 compromise save just the frame pointers. Combined with the link
21118 register saved elsewhere this should be sufficient to get
21119 a backtrace. */
21120 if (frame_pointer_needed)
21121 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21122 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
21123 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21124 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
21125 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
21126 }
21127 else
21128 {
21129 /* In the normal case we only need to save those registers
21130 which are call saved and which are used by this function. */
21131 for (reg = 0; reg <= 11; reg++)
21132 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21133 save_reg_mask |= (1 << reg);
21134
21135 /* Handle the frame pointer as a special case. */
21136 if (frame_pointer_needed)
21137 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21138
21139 /* If we aren't loading the PIC register,
21140 don't stack it even though it may be live. */
21141 if (PIC_REGISTER_MAY_NEED_SAVING
21142 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
21143 || crtl->uses_pic_offset_table))
21144 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21145
21146 /* The prologue will copy SP into R0, so save it. */
21147 if (IS_STACKALIGN (func_type))
21148 save_reg_mask |= 1;
21149 }
21150
21151 /* Save registers so the exception handler can modify them. */
21152 if (crtl->calls_eh_return)
21153 {
21154 unsigned int i;
21155
21156 for (i = 0; ; i++)
21157 {
21158 reg = EH_RETURN_DATA_REGNO (i);
21159 if (reg == INVALID_REGNUM)
21160 break;
21161 save_reg_mask |= 1 << reg;
21162 }
21163 }
21164
21165 return save_reg_mask;
21166 }
21167
21168 /* Return true if r3 is live at the start of the function. */
21169
21170 static bool
21171 arm_r3_live_at_start_p (void)
21172 {
21173 /* Just look at cfg info, which is still close enough to correct at this
21174 point. This gives false positives for broken functions that might use
21175 uninitialized data that happens to be allocated in r3, but who cares? */
21176 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
21177 }
21178
21179 /* Compute the number of bytes used to store the static chain register on the
21180 stack, above the stack frame. We need to know this accurately to get the
21181 alignment of the rest of the stack frame correct. */
21182
21183 static int
21184 arm_compute_static_chain_stack_bytes (void)
21185 {
21186 /* Once the value is updated from the init value of -1, do not
21187 re-compute. */
21188 if (cfun->machine->static_chain_stack_bytes != -1)
21189 return cfun->machine->static_chain_stack_bytes;
21190
21191 /* See the defining assertion in arm_expand_prologue. */
21192 if (IS_NESTED (arm_current_func_type ())
21193 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21194 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21195 || flag_stack_clash_protection)
21196 && !df_regs_ever_live_p (LR_REGNUM)))
21197 && arm_r3_live_at_start_p ()
21198 && crtl->args.pretend_args_size == 0)
21199 return 4;
21200
21201 return 0;
21202 }
21203
21204 /* Compute a bit mask of which core registers need to be
21205 saved on the stack for the current function.
21206 This is used by arm_compute_frame_layout, which may add extra registers. */
21207
21208 static unsigned long
21209 arm_compute_save_core_reg_mask (void)
21210 {
21211 unsigned int save_reg_mask = 0;
21212 unsigned long func_type = arm_current_func_type ();
21213 unsigned int reg;
21214
21215 if (IS_NAKED (func_type))
21216 /* This should never really happen. */
21217 return 0;
21218
21219 /* If we are creating a stack frame, then we must save the frame pointer,
21220 IP (which will hold the old stack pointer), LR and the PC. */
21221 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21222 save_reg_mask |=
21223 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
21224 | (1 << IP_REGNUM)
21225 | (1 << LR_REGNUM)
21226 | (1 << PC_REGNUM);
21227
21228 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
21229
21230 /* Decide if we need to save the link register.
21231 Interrupt routines have their own banked link register,
21232 so they never need to save it.
21233 Otherwise if we do not use the link register we do not need to save
21234 it. If we are pushing other registers onto the stack however, we
21235 can save an instruction in the epilogue by pushing the link register
21236 now and then popping it back into the PC. This incurs extra memory
21237 accesses though, so we only do it when optimizing for size, and only
21238 if we know that we will not need a fancy return sequence. */
21239 if (df_regs_ever_live_p (LR_REGNUM)
21240 || (save_reg_mask
21241 && optimize_size
21242 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
21243 && !crtl->tail_call_emit
21244 && !crtl->calls_eh_return))
21245 save_reg_mask |= 1 << LR_REGNUM;
21246
21247 if (cfun->machine->lr_save_eliminated)
21248 save_reg_mask &= ~ (1 << LR_REGNUM);
21249
21250 if (TARGET_REALLY_IWMMXT
21251 && ((bit_count (save_reg_mask)
21252 + ARM_NUM_INTS (crtl->args.pretend_args_size +
21253 arm_compute_static_chain_stack_bytes())
21254 ) % 2) != 0)
21255 {
21256 /* The total number of registers that are going to be pushed
21257 onto the stack is odd. We need to ensure that the stack
21258 is 64-bit aligned before we start to save iWMMXt registers,
21259 and also before we start to create locals. (A local variable
21260 might be a double or long long which we will load/store using
21261 an iWMMXt instruction). Therefore we need to push another
21262 ARM register, so that the stack will be 64-bit aligned. We
21263 try to avoid using the arg registers (r0 -r3) as they might be
21264 used to pass values in a tail call. */
21265 for (reg = 4; reg <= 12; reg++)
21266 if ((save_reg_mask & (1 << reg)) == 0)
21267 break;
21268
21269 if (reg <= 12)
21270 save_reg_mask |= (1 << reg);
21271 else
21272 {
21273 cfun->machine->sibcall_blocked = 1;
21274 save_reg_mask |= (1 << 3);
21275 }
21276 }
21277
21278 /* We may need to push an additional register for use initializing the
21279 PIC base register. */
21280 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21281 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21282 {
21283 reg = thumb_find_work_register (1 << 4);
21284 if (!call_used_or_fixed_reg_p (reg))
21285 save_reg_mask |= (1 << reg);
21286 }
21287
21288 return save_reg_mask;
21289 }
21290
21291 /* Compute a bit mask of which core registers need to be
21292 saved on the stack for the current function. */
21293 static unsigned long
21294 thumb1_compute_save_core_reg_mask (void)
21295 {
21296 unsigned long mask;
21297 unsigned reg;
21298
21299 mask = 0;
21300 for (reg = 0; reg < 12; reg ++)
21301 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21302 mask |= 1 << reg;
21303
21304 /* Handle the frame pointer as a special case. */
21305 if (frame_pointer_needed)
21306 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21307
21308 if (flag_pic
21309 && !TARGET_SINGLE_PIC_BASE
21310 && arm_pic_register != INVALID_REGNUM
21311 && crtl->uses_pic_offset_table)
21312 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21313
21314 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21315 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21316 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21317
21318 /* LR will also be pushed if any lo regs are pushed. */
21319 if (mask & 0xff || thumb_force_lr_save ())
21320 mask |= (1 << LR_REGNUM);
21321
21322 bool call_clobbered_scratch
21323 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21324 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21325
21326 /* Make sure we have a low work register if we need one. We will
21327 need one if we are going to push a high register, but we are not
21328 currently intending to push a low register. However if both the
21329 prologue and epilogue have a spare call-clobbered low register,
21330 then we won't need to find an additional work register. It does
21331 not need to be the same register in the prologue and
21332 epilogue. */
21333 if ((mask & 0xff) == 0
21334 && !call_clobbered_scratch
21335 && ((mask & 0x0f00) || TARGET_BACKTRACE))
21336 {
21337 /* Use thumb_find_work_register to choose which register
21338 we will use. If the register is live then we will
21339 have to push it. Use LAST_LO_REGNUM as our fallback
21340 choice for the register to select. */
21341 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21342 /* Make sure the register returned by thumb_find_work_register is
21343 not part of the return value. */
21344 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21345 reg = LAST_LO_REGNUM;
21346
21347 if (callee_saved_reg_p (reg))
21348 mask |= 1 << reg;
21349 }
21350
21351 /* The 504 below is 8 bytes less than 512 because there are two possible
21352 alignment words. We can't tell here if they will be present or not so we
21353 have to play it safe and assume that they are. */
21354 if ((CALLER_INTERWORKING_SLOT_SIZE +
21355 ROUND_UP_WORD (get_frame_size ()) +
21356 crtl->outgoing_args_size) >= 504)
21357 {
21358 /* This is the same as the code in thumb1_expand_prologue() which
21359 determines which register to use for stack decrement. */
21360 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21361 if (mask & (1 << reg))
21362 break;
21363
21364 if (reg > LAST_LO_REGNUM)
21365 {
21366 /* Make sure we have a register available for stack decrement. */
21367 mask |= 1 << LAST_LO_REGNUM;
21368 }
21369 }
21370
21371 return mask;
21372 }
21373
21374 /* Return the number of bytes required to save VFP registers. */
21375 static int
21376 arm_get_vfp_saved_size (void)
21377 {
21378 unsigned int regno;
21379 int count;
21380 int saved;
21381
21382 saved = 0;
21383 /* Space for saved VFP registers. */
21384 if (TARGET_VFP_BASE)
21385 {
21386 count = 0;
21387 for (regno = FIRST_VFP_REGNUM;
21388 regno < LAST_VFP_REGNUM;
21389 regno += 2)
21390 {
21391 if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21392 {
21393 if (count > 0)
21394 {
21395 /* Workaround ARM10 VFPr1 bug. */
21396 if (count == 2 && !arm_arch6)
21397 count++;
21398 saved += count * 8;
21399 }
21400 count = 0;
21401 }
21402 else
21403 count++;
21404 }
21405 if (count > 0)
21406 {
21407 if (count == 2 && !arm_arch6)
21408 count++;
21409 saved += count * 8;
21410 }
21411 }
21412 return saved;
21413 }
21414
21415
21416 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21417 everything bar the final return instruction. If simple_return is true,
21418 then do not output epilogue, because it has already been emitted in RTL.
21419
21420 Note: do not forget to update length attribute of corresponding insn pattern
21421 when changing assembly output (eg. length attribute of
21422 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21423 register clearing sequences). */
21424 const char *
21425 output_return_instruction (rtx operand, bool really_return, bool reverse,
21426 bool simple_return)
21427 {
21428 char conditional[10];
21429 char instr[100];
21430 unsigned reg;
21431 unsigned long live_regs_mask;
21432 unsigned long func_type;
21433 arm_stack_offsets *offsets;
21434
21435 func_type = arm_current_func_type ();
21436
21437 if (IS_NAKED (func_type))
21438 return "";
21439
21440 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21441 {
21442 /* If this function was declared non-returning, and we have
21443 found a tail call, then we have to trust that the called
21444 function won't return. */
21445 if (really_return)
21446 {
21447 rtx ops[2];
21448
21449 /* Otherwise, trap an attempted return by aborting. */
21450 ops[0] = operand;
21451 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21452 : "abort");
21453 assemble_external_libcall (ops[1]);
21454 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21455 }
21456
21457 return "";
21458 }
21459
21460 gcc_assert (!cfun->calls_alloca || really_return);
21461
21462 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21463
21464 cfun->machine->return_used_this_function = 1;
21465
21466 offsets = arm_get_frame_offsets ();
21467 live_regs_mask = offsets->saved_regs_mask;
21468
21469 if (!simple_return && live_regs_mask)
21470 {
21471 const char * return_reg;
21472
21473 /* If we do not have any special requirements for function exit
21474 (e.g. interworking) then we can load the return address
21475 directly into the PC. Otherwise we must load it into LR. */
21476 if (really_return
21477 && !IS_CMSE_ENTRY (func_type)
21478 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21479 return_reg = reg_names[PC_REGNUM];
21480 else
21481 return_reg = reg_names[LR_REGNUM];
21482
21483 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21484 {
21485 /* There are three possible reasons for the IP register
21486 being saved. 1) a stack frame was created, in which case
21487 IP contains the old stack pointer, or 2) an ISR routine
21488 corrupted it, or 3) it was saved to align the stack on
21489 iWMMXt. In case 1, restore IP into SP, otherwise just
21490 restore IP. */
21491 if (frame_pointer_needed)
21492 {
21493 live_regs_mask &= ~ (1 << IP_REGNUM);
21494 live_regs_mask |= (1 << SP_REGNUM);
21495 }
21496 else
21497 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21498 }
21499
21500 /* On some ARM architectures it is faster to use LDR rather than
21501 LDM to load a single register. On other architectures, the
21502 cost is the same. In 26 bit mode, or for exception handlers,
21503 we have to use LDM to load the PC so that the CPSR is also
21504 restored. */
21505 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21506 if (live_regs_mask == (1U << reg))
21507 break;
21508
21509 if (reg <= LAST_ARM_REGNUM
21510 && (reg != LR_REGNUM
21511 || ! really_return
21512 || ! IS_INTERRUPT (func_type)))
21513 {
21514 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21515 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21516 }
21517 else
21518 {
21519 char *p;
21520 int first = 1;
21521
21522 /* Generate the load multiple instruction to restore the
21523 registers. Note we can get here, even if
21524 frame_pointer_needed is true, but only if sp already
21525 points to the base of the saved core registers. */
21526 if (live_regs_mask & (1 << SP_REGNUM))
21527 {
21528 unsigned HOST_WIDE_INT stack_adjust;
21529
21530 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21531 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21532
21533 if (stack_adjust && arm_arch5t && TARGET_ARM)
21534 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21535 else
21536 {
21537 /* If we can't use ldmib (SA110 bug),
21538 then try to pop r3 instead. */
21539 if (stack_adjust)
21540 live_regs_mask |= 1 << 3;
21541
21542 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21543 }
21544 }
21545 /* For interrupt returns we have to use an LDM rather than
21546 a POP so that we can use the exception return variant. */
21547 else if (IS_INTERRUPT (func_type))
21548 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21549 else
21550 sprintf (instr, "pop%s\t{", conditional);
21551
21552 p = instr + strlen (instr);
21553
21554 for (reg = 0; reg <= SP_REGNUM; reg++)
21555 if (live_regs_mask & (1 << reg))
21556 {
21557 int l = strlen (reg_names[reg]);
21558
21559 if (first)
21560 first = 0;
21561 else
21562 {
21563 memcpy (p, ", ", 2);
21564 p += 2;
21565 }
21566
21567 memcpy (p, "%|", 2);
21568 memcpy (p + 2, reg_names[reg], l);
21569 p += l + 2;
21570 }
21571
21572 if (live_regs_mask & (1 << LR_REGNUM))
21573 {
21574 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21575 /* If returning from an interrupt, restore the CPSR. */
21576 if (IS_INTERRUPT (func_type))
21577 strcat (p, "^");
21578 }
21579 else
21580 strcpy (p, "}");
21581 }
21582
21583 output_asm_insn (instr, & operand);
21584
21585 /* See if we need to generate an extra instruction to
21586 perform the actual function return. */
21587 if (really_return
21588 && func_type != ARM_FT_INTERWORKED
21589 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21590 {
21591 /* The return has already been handled
21592 by loading the LR into the PC. */
21593 return "";
21594 }
21595 }
21596
21597 if (really_return)
21598 {
21599 switch ((int) ARM_FUNC_TYPE (func_type))
21600 {
21601 case ARM_FT_ISR:
21602 case ARM_FT_FIQ:
21603 /* ??? This is wrong for unified assembly syntax. */
21604 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21605 break;
21606
21607 case ARM_FT_INTERWORKED:
21608 gcc_assert (arm_arch5t || arm_arch4t);
21609 sprintf (instr, "bx%s\t%%|lr", conditional);
21610 break;
21611
21612 case ARM_FT_EXCEPTION:
21613 /* ??? This is wrong for unified assembly syntax. */
21614 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21615 break;
21616
21617 default:
21618 if (IS_CMSE_ENTRY (func_type))
21619 {
21620 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21621 emitted by cmse_nonsecure_entry_clear_before_return () and the
21622 VSTR/VLDR instructions in the prologue and epilogue. */
21623 if (!TARGET_HAVE_FPCXT_CMSE)
21624 {
21625 /* Check if we have to clear the 'GE bits' which is only used if
21626 parallel add and subtraction instructions are available. */
21627 if (TARGET_INT_SIMD)
21628 snprintf (instr, sizeof (instr),
21629 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21630 else
21631 snprintf (instr, sizeof (instr),
21632 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21633
21634 output_asm_insn (instr, & operand);
21635 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21636 care of it. */
21637 if (TARGET_HARD_FLOAT)
21638 {
21639 /* Clear the cumulative exception-status bits (0-4,7) and
21640 the condition code bits (28-31) of the FPSCR. We need
21641 to remember to clear the first scratch register used
21642 (IP) and save and restore the second (r4).
21643
21644 Important note: the length of the
21645 thumb2_cmse_entry_return insn pattern must account for
21646 the size of the below instructions. */
21647 output_asm_insn ("push\t{%|r4}", & operand);
21648 output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21649 output_asm_insn ("movw\t%|r4, #65376", & operand);
21650 output_asm_insn ("movt\t%|r4, #4095", & operand);
21651 output_asm_insn ("and\t%|ip, %|r4", & operand);
21652 output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21653 output_asm_insn ("pop\t{%|r4}", & operand);
21654 output_asm_insn ("mov\t%|ip, %|lr", & operand);
21655 }
21656 }
21657 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21658 }
21659 /* Use bx if it's available. */
21660 else if (arm_arch5t || arm_arch4t)
21661 sprintf (instr, "bx%s\t%%|lr", conditional);
21662 else
21663 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21664 break;
21665 }
21666
21667 output_asm_insn (instr, & operand);
21668 }
21669
21670 return "";
21671 }
21672
21673 /* Output in FILE asm statements needed to declare the NAME of the function
21674 defined by its DECL node. */
21675
21676 void
21677 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21678 {
21679 size_t cmse_name_len;
21680 char *cmse_name = 0;
21681 char cmse_prefix[] = "__acle_se_";
21682
21683 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21684 extra function label for each function with the 'cmse_nonsecure_entry'
21685 attribute. This extra function label should be prepended with
21686 '__acle_se_', telling the linker that it needs to create secure gateway
21687 veneers for this function. */
21688 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21689 DECL_ATTRIBUTES (decl)))
21690 {
21691 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21692 cmse_name = XALLOCAVEC (char, cmse_name_len);
21693 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21694 targetm.asm_out.globalize_label (file, cmse_name);
21695
21696 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21697 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21698 }
21699
21700 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21701 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21702 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21703 ASM_OUTPUT_LABEL (file, name);
21704
21705 if (cmse_name)
21706 ASM_OUTPUT_LABEL (file, cmse_name);
21707
21708 ARM_OUTPUT_FN_UNWIND (file, TRUE);
21709 }
21710
21711 /* Write the function name into the code section, directly preceding
21712 the function prologue.
21713
21714 Code will be output similar to this:
21715 t0
21716 .ascii "arm_poke_function_name", 0
21717 .align
21718 t1
21719 .word 0xff000000 + (t1 - t0)
21720 arm_poke_function_name
21721 mov ip, sp
21722 stmfd sp!, {fp, ip, lr, pc}
21723 sub fp, ip, #4
21724
21725 When performing a stack backtrace, code can inspect the value
21726 of 'pc' stored at 'fp' + 0. If the trace function then looks
21727 at location pc - 12 and the top 8 bits are set, then we know
21728 that there is a function name embedded immediately preceding this
21729 location and has length ((pc[-3]) & 0xff000000).
21730
21731 We assume that pc is declared as a pointer to an unsigned long.
21732
21733 It is of no benefit to output the function name if we are assembling
21734 a leaf function. These function types will not contain a stack
21735 backtrace structure, therefore it is not possible to determine the
21736 function name. */
21737 void
21738 arm_poke_function_name (FILE *stream, const char *name)
21739 {
21740 unsigned long alignlength;
21741 unsigned long length;
21742 rtx x;
21743
21744 length = strlen (name) + 1;
21745 alignlength = ROUND_UP_WORD (length);
21746
21747 ASM_OUTPUT_ASCII (stream, name, length);
21748 ASM_OUTPUT_ALIGN (stream, 2);
21749 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21750 assemble_aligned_integer (UNITS_PER_WORD, x);
21751 }
21752
21753 /* Place some comments into the assembler stream
21754 describing the current function. */
21755 static void
21756 arm_output_function_prologue (FILE *f)
21757 {
21758 unsigned long func_type;
21759
21760 /* Sanity check. */
21761 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21762
21763 func_type = arm_current_func_type ();
21764
21765 switch ((int) ARM_FUNC_TYPE (func_type))
21766 {
21767 default:
21768 case ARM_FT_NORMAL:
21769 break;
21770 case ARM_FT_INTERWORKED:
21771 asm_fprintf (f, "\t%@ Function supports interworking.\n");
21772 break;
21773 case ARM_FT_ISR:
21774 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21775 break;
21776 case ARM_FT_FIQ:
21777 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21778 break;
21779 case ARM_FT_EXCEPTION:
21780 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21781 break;
21782 }
21783
21784 if (IS_NAKED (func_type))
21785 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21786
21787 if (IS_VOLATILE (func_type))
21788 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21789
21790 if (IS_NESTED (func_type))
21791 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21792 if (IS_STACKALIGN (func_type))
21793 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21794 if (IS_CMSE_ENTRY (func_type))
21795 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21796
21797 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21798 (HOST_WIDE_INT) crtl->args.size,
21799 crtl->args.pretend_args_size,
21800 (HOST_WIDE_INT) get_frame_size ());
21801
21802 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21803 frame_pointer_needed,
21804 cfun->machine->uses_anonymous_args);
21805
21806 if (cfun->machine->lr_save_eliminated)
21807 asm_fprintf (f, "\t%@ link register save eliminated.\n");
21808
21809 if (crtl->calls_eh_return)
21810 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21811
21812 }
21813
21814 static void
21815 arm_output_function_epilogue (FILE *)
21816 {
21817 arm_stack_offsets *offsets;
21818
21819 if (TARGET_THUMB1)
21820 {
21821 int regno;
21822
21823 /* Emit any call-via-reg trampolines that are needed for v4t support
21824 of call_reg and call_value_reg type insns. */
21825 for (regno = 0; regno < LR_REGNUM; regno++)
21826 {
21827 rtx label = cfun->machine->call_via[regno];
21828
21829 if (label != NULL)
21830 {
21831 switch_to_section (function_section (current_function_decl));
21832 targetm.asm_out.internal_label (asm_out_file, "L",
21833 CODE_LABEL_NUMBER (label));
21834 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21835 }
21836 }
21837
21838 /* ??? Probably not safe to set this here, since it assumes that a
21839 function will be emitted as assembly immediately after we generate
21840 RTL for it. This does not happen for inline functions. */
21841 cfun->machine->return_used_this_function = 0;
21842 }
21843 else /* TARGET_32BIT */
21844 {
21845 /* We need to take into account any stack-frame rounding. */
21846 offsets = arm_get_frame_offsets ();
21847
21848 gcc_assert (!use_return_insn (FALSE, NULL)
21849 || (cfun->machine->return_used_this_function != 0)
21850 || offsets->saved_regs == offsets->outgoing_args
21851 || frame_pointer_needed);
21852 }
21853 }
21854
21855 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21856 STR and STRD. If an even number of registers are being pushed, one
21857 or more STRD patterns are created for each register pair. If an
21858 odd number of registers are pushed, emit an initial STR followed by
21859 as many STRD instructions as are needed. This works best when the
21860 stack is initially 64-bit aligned (the normal case), since it
21861 ensures that each STRD is also 64-bit aligned. */
21862 static void
21863 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21864 {
21865 int num_regs = 0;
21866 int i;
21867 int regno;
21868 rtx par = NULL_RTX;
21869 rtx dwarf = NULL_RTX;
21870 rtx tmp;
21871 bool first = true;
21872
21873 num_regs = bit_count (saved_regs_mask);
21874
21875 /* Must be at least one register to save, and can't save SP or PC. */
21876 gcc_assert (num_regs > 0 && num_regs <= 14);
21877 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21878 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21879
21880 /* Create sequence for DWARF info. All the frame-related data for
21881 debugging is held in this wrapper. */
21882 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21883
21884 /* Describe the stack adjustment. */
21885 tmp = gen_rtx_SET (stack_pointer_rtx,
21886 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21887 RTX_FRAME_RELATED_P (tmp) = 1;
21888 XVECEXP (dwarf, 0, 0) = tmp;
21889
21890 /* Find the first register. */
21891 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21892 ;
21893
21894 i = 0;
21895
21896 /* If there's an odd number of registers to push. Start off by
21897 pushing a single register. This ensures that subsequent strd
21898 operations are dword aligned (assuming that SP was originally
21899 64-bit aligned). */
21900 if ((num_regs & 1) != 0)
21901 {
21902 rtx reg, mem, insn;
21903
21904 reg = gen_rtx_REG (SImode, regno);
21905 if (num_regs == 1)
21906 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
21907 stack_pointer_rtx));
21908 else
21909 mem = gen_frame_mem (Pmode,
21910 gen_rtx_PRE_MODIFY
21911 (Pmode, stack_pointer_rtx,
21912 plus_constant (Pmode, stack_pointer_rtx,
21913 -4 * num_regs)));
21914
21915 tmp = gen_rtx_SET (mem, reg);
21916 RTX_FRAME_RELATED_P (tmp) = 1;
21917 insn = emit_insn (tmp);
21918 RTX_FRAME_RELATED_P (insn) = 1;
21919 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21920 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
21921 RTX_FRAME_RELATED_P (tmp) = 1;
21922 i++;
21923 regno++;
21924 XVECEXP (dwarf, 0, i) = tmp;
21925 first = false;
21926 }
21927
21928 while (i < num_regs)
21929 if (saved_regs_mask & (1 << regno))
21930 {
21931 rtx reg1, reg2, mem1, mem2;
21932 rtx tmp0, tmp1, tmp2;
21933 int regno2;
21934
21935 /* Find the register to pair with this one. */
21936 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
21937 regno2++)
21938 ;
21939
21940 reg1 = gen_rtx_REG (SImode, regno);
21941 reg2 = gen_rtx_REG (SImode, regno2);
21942
21943 if (first)
21944 {
21945 rtx insn;
21946
21947 first = false;
21948 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21949 stack_pointer_rtx,
21950 -4 * num_regs));
21951 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21952 stack_pointer_rtx,
21953 -4 * (num_regs - 1)));
21954 tmp0 = gen_rtx_SET (stack_pointer_rtx,
21955 plus_constant (Pmode, stack_pointer_rtx,
21956 -4 * (num_regs)));
21957 tmp1 = gen_rtx_SET (mem1, reg1);
21958 tmp2 = gen_rtx_SET (mem2, reg2);
21959 RTX_FRAME_RELATED_P (tmp0) = 1;
21960 RTX_FRAME_RELATED_P (tmp1) = 1;
21961 RTX_FRAME_RELATED_P (tmp2) = 1;
21962 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
21963 XVECEXP (par, 0, 0) = tmp0;
21964 XVECEXP (par, 0, 1) = tmp1;
21965 XVECEXP (par, 0, 2) = tmp2;
21966 insn = emit_insn (par);
21967 RTX_FRAME_RELATED_P (insn) = 1;
21968 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21969 }
21970 else
21971 {
21972 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21973 stack_pointer_rtx,
21974 4 * i));
21975 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21976 stack_pointer_rtx,
21977 4 * (i + 1)));
21978 tmp1 = gen_rtx_SET (mem1, reg1);
21979 tmp2 = gen_rtx_SET (mem2, reg2);
21980 RTX_FRAME_RELATED_P (tmp1) = 1;
21981 RTX_FRAME_RELATED_P (tmp2) = 1;
21982 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21983 XVECEXP (par, 0, 0) = tmp1;
21984 XVECEXP (par, 0, 1) = tmp2;
21985 emit_insn (par);
21986 }
21987
21988 /* Create unwind information. This is an approximation. */
21989 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
21990 plus_constant (Pmode,
21991 stack_pointer_rtx,
21992 4 * i)),
21993 reg1);
21994 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
21995 plus_constant (Pmode,
21996 stack_pointer_rtx,
21997 4 * (i + 1))),
21998 reg2);
21999
22000 RTX_FRAME_RELATED_P (tmp1) = 1;
22001 RTX_FRAME_RELATED_P (tmp2) = 1;
22002 XVECEXP (dwarf, 0, i + 1) = tmp1;
22003 XVECEXP (dwarf, 0, i + 2) = tmp2;
22004 i += 2;
22005 regno = regno2 + 1;
22006 }
22007 else
22008 regno++;
22009
22010 return;
22011 }
22012
22013 /* STRD in ARM mode requires consecutive registers. This function emits STRD
22014 whenever possible, otherwise it emits single-word stores. The first store
22015 also allocates stack space for all saved registers, using writeback with
22016 post-addressing mode. All other stores use offset addressing. If no STRD
22017 can be emitted, this function emits a sequence of single-word stores,
22018 and not an STM as before, because single-word stores provide more freedom
22019 scheduling and can be turned into an STM by peephole optimizations. */
22020 static void
22021 arm_emit_strd_push (unsigned long saved_regs_mask)
22022 {
22023 int num_regs = 0;
22024 int i, j, dwarf_index = 0;
22025 int offset = 0;
22026 rtx dwarf = NULL_RTX;
22027 rtx insn = NULL_RTX;
22028 rtx tmp, mem;
22029
22030 /* TODO: A more efficient code can be emitted by changing the
22031 layout, e.g., first push all pairs that can use STRD to keep the
22032 stack aligned, and then push all other registers. */
22033 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22034 if (saved_regs_mask & (1 << i))
22035 num_regs++;
22036
22037 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22038 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22039 gcc_assert (num_regs > 0);
22040
22041 /* Create sequence for DWARF info. */
22042 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22043
22044 /* For dwarf info, we generate explicit stack update. */
22045 tmp = gen_rtx_SET (stack_pointer_rtx,
22046 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22047 RTX_FRAME_RELATED_P (tmp) = 1;
22048 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22049
22050 /* Save registers. */
22051 offset = - 4 * num_regs;
22052 j = 0;
22053 while (j <= LAST_ARM_REGNUM)
22054 if (saved_regs_mask & (1 << j))
22055 {
22056 if ((j % 2 == 0)
22057 && (saved_regs_mask & (1 << (j + 1))))
22058 {
22059 /* Current register and previous register form register pair for
22060 which STRD can be generated. */
22061 if (offset < 0)
22062 {
22063 /* Allocate stack space for all saved registers. */
22064 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22065 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22066 mem = gen_frame_mem (DImode, tmp);
22067 offset = 0;
22068 }
22069 else if (offset > 0)
22070 mem = gen_frame_mem (DImode,
22071 plus_constant (Pmode,
22072 stack_pointer_rtx,
22073 offset));
22074 else
22075 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22076
22077 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
22078 RTX_FRAME_RELATED_P (tmp) = 1;
22079 tmp = emit_insn (tmp);
22080
22081 /* Record the first store insn. */
22082 if (dwarf_index == 1)
22083 insn = tmp;
22084
22085 /* Generate dwarf info. */
22086 mem = gen_frame_mem (SImode,
22087 plus_constant (Pmode,
22088 stack_pointer_rtx,
22089 offset));
22090 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22091 RTX_FRAME_RELATED_P (tmp) = 1;
22092 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22093
22094 mem = gen_frame_mem (SImode,
22095 plus_constant (Pmode,
22096 stack_pointer_rtx,
22097 offset + 4));
22098 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
22099 RTX_FRAME_RELATED_P (tmp) = 1;
22100 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22101
22102 offset += 8;
22103 j += 2;
22104 }
22105 else
22106 {
22107 /* Emit a single word store. */
22108 if (offset < 0)
22109 {
22110 /* Allocate stack space for all saved registers. */
22111 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22112 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22113 mem = gen_frame_mem (SImode, tmp);
22114 offset = 0;
22115 }
22116 else if (offset > 0)
22117 mem = gen_frame_mem (SImode,
22118 plus_constant (Pmode,
22119 stack_pointer_rtx,
22120 offset));
22121 else
22122 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22123
22124 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22125 RTX_FRAME_RELATED_P (tmp) = 1;
22126 tmp = emit_insn (tmp);
22127
22128 /* Record the first store insn. */
22129 if (dwarf_index == 1)
22130 insn = tmp;
22131
22132 /* Generate dwarf info. */
22133 mem = gen_frame_mem (SImode,
22134 plus_constant(Pmode,
22135 stack_pointer_rtx,
22136 offset));
22137 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22138 RTX_FRAME_RELATED_P (tmp) = 1;
22139 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22140
22141 offset += 4;
22142 j += 1;
22143 }
22144 }
22145 else
22146 j++;
22147
22148 /* Attach dwarf info to the first insn we generate. */
22149 gcc_assert (insn != NULL_RTX);
22150 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22151 RTX_FRAME_RELATED_P (insn) = 1;
22152 }
22153
22154 /* Generate and emit an insn that we will recognize as a push_multi.
22155 Unfortunately, since this insn does not reflect very well the actual
22156 semantics of the operation, we need to annotate the insn for the benefit
22157 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
22158 MASK for registers that should be annotated for DWARF2 frame unwind
22159 information. */
22160 static rtx
22161 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
22162 {
22163 int num_regs = 0;
22164 int num_dwarf_regs = 0;
22165 int i, j;
22166 rtx par;
22167 rtx dwarf;
22168 int dwarf_par_index;
22169 rtx tmp, reg;
22170
22171 /* We don't record the PC in the dwarf frame information. */
22172 dwarf_regs_mask &= ~(1 << PC_REGNUM);
22173
22174 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22175 {
22176 if (mask & (1 << i))
22177 num_regs++;
22178 if (dwarf_regs_mask & (1 << i))
22179 num_dwarf_regs++;
22180 }
22181
22182 gcc_assert (num_regs && num_regs <= 16);
22183 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
22184
22185 /* For the body of the insn we are going to generate an UNSPEC in
22186 parallel with several USEs. This allows the insn to be recognized
22187 by the push_multi pattern in the arm.md file.
22188
22189 The body of the insn looks something like this:
22190
22191 (parallel [
22192 (set (mem:BLK (pre_modify:SI (reg:SI sp)
22193 (const_int:SI <num>)))
22194 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22195 (use (reg:SI XX))
22196 (use (reg:SI YY))
22197 ...
22198 ])
22199
22200 For the frame note however, we try to be more explicit and actually
22201 show each register being stored into the stack frame, plus a (single)
22202 decrement of the stack pointer. We do it this way in order to be
22203 friendly to the stack unwinding code, which only wants to see a single
22204 stack decrement per instruction. The RTL we generate for the note looks
22205 something like this:
22206
22207 (sequence [
22208 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22209 (set (mem:SI (reg:SI sp)) (reg:SI r4))
22210 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22211 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22212 ...
22213 ])
22214
22215 FIXME:: In an ideal world the PRE_MODIFY would not exist and
22216 instead we'd have a parallel expression detailing all
22217 the stores to the various memory addresses so that debug
22218 information is more up-to-date. Remember however while writing
22219 this to take care of the constraints with the push instruction.
22220
22221 Note also that this has to be taken care of for the VFP registers.
22222
22223 For more see PR43399. */
22224
22225 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
22226 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
22227 dwarf_par_index = 1;
22228
22229 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22230 {
22231 if (mask & (1 << i))
22232 {
22233 reg = gen_rtx_REG (SImode, i);
22234
22235 XVECEXP (par, 0, 0)
22236 = gen_rtx_SET (gen_frame_mem
22237 (BLKmode,
22238 gen_rtx_PRE_MODIFY (Pmode,
22239 stack_pointer_rtx,
22240 plus_constant
22241 (Pmode, stack_pointer_rtx,
22242 -4 * num_regs))
22243 ),
22244 gen_rtx_UNSPEC (BLKmode,
22245 gen_rtvec (1, reg),
22246 UNSPEC_PUSH_MULT));
22247
22248 if (dwarf_regs_mask & (1 << i))
22249 {
22250 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22251 reg);
22252 RTX_FRAME_RELATED_P (tmp) = 1;
22253 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22254 }
22255
22256 break;
22257 }
22258 }
22259
22260 for (j = 1, i++; j < num_regs; i++)
22261 {
22262 if (mask & (1 << i))
22263 {
22264 reg = gen_rtx_REG (SImode, i);
22265
22266 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22267
22268 if (dwarf_regs_mask & (1 << i))
22269 {
22270 tmp
22271 = gen_rtx_SET (gen_frame_mem
22272 (SImode,
22273 plus_constant (Pmode, stack_pointer_rtx,
22274 4 * j)),
22275 reg);
22276 RTX_FRAME_RELATED_P (tmp) = 1;
22277 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22278 }
22279
22280 j++;
22281 }
22282 }
22283
22284 par = emit_insn (par);
22285
22286 tmp = gen_rtx_SET (stack_pointer_rtx,
22287 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22288 RTX_FRAME_RELATED_P (tmp) = 1;
22289 XVECEXP (dwarf, 0, 0) = tmp;
22290
22291 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22292
22293 return par;
22294 }
22295
22296 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22297 SIZE is the offset to be adjusted.
22298 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22299 static void
22300 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22301 {
22302 rtx dwarf;
22303
22304 RTX_FRAME_RELATED_P (insn) = 1;
22305 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22306 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22307 }
22308
22309 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22310 SAVED_REGS_MASK shows which registers need to be restored.
22311
22312 Unfortunately, since this insn does not reflect very well the actual
22313 semantics of the operation, we need to annotate the insn for the benefit
22314 of DWARF2 frame unwind information. */
22315 static void
22316 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22317 {
22318 int num_regs = 0;
22319 int i, j;
22320 rtx par;
22321 rtx dwarf = NULL_RTX;
22322 rtx tmp, reg;
22323 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22324 int offset_adj;
22325 int emit_update;
22326
22327 offset_adj = return_in_pc ? 1 : 0;
22328 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22329 if (saved_regs_mask & (1 << i))
22330 num_regs++;
22331
22332 gcc_assert (num_regs && num_regs <= 16);
22333
22334 /* If SP is in reglist, then we don't emit SP update insn. */
22335 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22336
22337 /* The parallel needs to hold num_regs SETs
22338 and one SET for the stack update. */
22339 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22340
22341 if (return_in_pc)
22342 XVECEXP (par, 0, 0) = ret_rtx;
22343
22344 if (emit_update)
22345 {
22346 /* Increment the stack pointer, based on there being
22347 num_regs 4-byte registers to restore. */
22348 tmp = gen_rtx_SET (stack_pointer_rtx,
22349 plus_constant (Pmode,
22350 stack_pointer_rtx,
22351 4 * num_regs));
22352 RTX_FRAME_RELATED_P (tmp) = 1;
22353 XVECEXP (par, 0, offset_adj) = tmp;
22354 }
22355
22356 /* Now restore every reg, which may include PC. */
22357 for (j = 0, i = 0; j < num_regs; i++)
22358 if (saved_regs_mask & (1 << i))
22359 {
22360 reg = gen_rtx_REG (SImode, i);
22361 if ((num_regs == 1) && emit_update && !return_in_pc)
22362 {
22363 /* Emit single load with writeback. */
22364 tmp = gen_frame_mem (SImode,
22365 gen_rtx_POST_INC (Pmode,
22366 stack_pointer_rtx));
22367 tmp = emit_insn (gen_rtx_SET (reg, tmp));
22368 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22369 return;
22370 }
22371
22372 tmp = gen_rtx_SET (reg,
22373 gen_frame_mem
22374 (SImode,
22375 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22376 RTX_FRAME_RELATED_P (tmp) = 1;
22377 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22378
22379 /* We need to maintain a sequence for DWARF info too. As dwarf info
22380 should not have PC, skip PC. */
22381 if (i != PC_REGNUM)
22382 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22383
22384 j++;
22385 }
22386
22387 if (return_in_pc)
22388 par = emit_jump_insn (par);
22389 else
22390 par = emit_insn (par);
22391
22392 REG_NOTES (par) = dwarf;
22393 if (!return_in_pc)
22394 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22395 stack_pointer_rtx, stack_pointer_rtx);
22396 }
22397
22398 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22399 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22400
22401 Unfortunately, since this insn does not reflect very well the actual
22402 semantics of the operation, we need to annotate the insn for the benefit
22403 of DWARF2 frame unwind information. */
22404 static void
22405 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22406 {
22407 int i, j;
22408 rtx par;
22409 rtx dwarf = NULL_RTX;
22410 rtx tmp, reg;
22411
22412 gcc_assert (num_regs && num_regs <= 32);
22413
22414 /* Workaround ARM10 VFPr1 bug. */
22415 if (num_regs == 2 && !arm_arch6)
22416 {
22417 if (first_reg == 15)
22418 first_reg--;
22419
22420 num_regs++;
22421 }
22422
22423 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22424 there could be up to 32 D-registers to restore.
22425 If there are more than 16 D-registers, make two recursive calls,
22426 each of which emits one pop_multi instruction. */
22427 if (num_regs > 16)
22428 {
22429 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22430 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22431 return;
22432 }
22433
22434 /* The parallel needs to hold num_regs SETs
22435 and one SET for the stack update. */
22436 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22437
22438 /* Increment the stack pointer, based on there being
22439 num_regs 8-byte registers to restore. */
22440 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22441 RTX_FRAME_RELATED_P (tmp) = 1;
22442 XVECEXP (par, 0, 0) = tmp;
22443
22444 /* Now show every reg that will be restored, using a SET for each. */
22445 for (j = 0, i=first_reg; j < num_regs; i += 2)
22446 {
22447 reg = gen_rtx_REG (DFmode, i);
22448
22449 tmp = gen_rtx_SET (reg,
22450 gen_frame_mem
22451 (DFmode,
22452 plus_constant (Pmode, base_reg, 8 * j)));
22453 RTX_FRAME_RELATED_P (tmp) = 1;
22454 XVECEXP (par, 0, j + 1) = tmp;
22455
22456 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22457
22458 j++;
22459 }
22460
22461 par = emit_insn (par);
22462 REG_NOTES (par) = dwarf;
22463
22464 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22465 if (REGNO (base_reg) == IP_REGNUM)
22466 {
22467 RTX_FRAME_RELATED_P (par) = 1;
22468 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22469 }
22470 else
22471 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22472 base_reg, base_reg);
22473 }
22474
22475 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22476 number of registers are being popped, multiple LDRD patterns are created for
22477 all register pairs. If odd number of registers are popped, last register is
22478 loaded by using LDR pattern. */
22479 static void
22480 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22481 {
22482 int num_regs = 0;
22483 int i, j;
22484 rtx par = NULL_RTX;
22485 rtx dwarf = NULL_RTX;
22486 rtx tmp, reg, tmp1;
22487 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22488
22489 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22490 if (saved_regs_mask & (1 << i))
22491 num_regs++;
22492
22493 gcc_assert (num_regs && num_regs <= 16);
22494
22495 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22496 to be popped. So, if num_regs is even, now it will become odd,
22497 and we can generate pop with PC. If num_regs is odd, it will be
22498 even now, and ldr with return can be generated for PC. */
22499 if (return_in_pc)
22500 num_regs--;
22501
22502 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22503
22504 /* Var j iterates over all the registers to gather all the registers in
22505 saved_regs_mask. Var i gives index of saved registers in stack frame.
22506 A PARALLEL RTX of register-pair is created here, so that pattern for
22507 LDRD can be matched. As PC is always last register to be popped, and
22508 we have already decremented num_regs if PC, we don't have to worry
22509 about PC in this loop. */
22510 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22511 if (saved_regs_mask & (1 << j))
22512 {
22513 /* Create RTX for memory load. */
22514 reg = gen_rtx_REG (SImode, j);
22515 tmp = gen_rtx_SET (reg,
22516 gen_frame_mem (SImode,
22517 plus_constant (Pmode,
22518 stack_pointer_rtx, 4 * i)));
22519 RTX_FRAME_RELATED_P (tmp) = 1;
22520
22521 if (i % 2 == 0)
22522 {
22523 /* When saved-register index (i) is even, the RTX to be emitted is
22524 yet to be created. Hence create it first. The LDRD pattern we
22525 are generating is :
22526 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22527 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22528 where target registers need not be consecutive. */
22529 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22530 dwarf = NULL_RTX;
22531 }
22532
22533 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22534 added as 0th element and if i is odd, reg_i is added as 1st element
22535 of LDRD pattern shown above. */
22536 XVECEXP (par, 0, (i % 2)) = tmp;
22537 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22538
22539 if ((i % 2) == 1)
22540 {
22541 /* When saved-register index (i) is odd, RTXs for both the registers
22542 to be loaded are generated in above given LDRD pattern, and the
22543 pattern can be emitted now. */
22544 par = emit_insn (par);
22545 REG_NOTES (par) = dwarf;
22546 RTX_FRAME_RELATED_P (par) = 1;
22547 }
22548
22549 i++;
22550 }
22551
22552 /* If the number of registers pushed is odd AND return_in_pc is false OR
22553 number of registers are even AND return_in_pc is true, last register is
22554 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22555 then LDR with post increment. */
22556
22557 /* Increment the stack pointer, based on there being
22558 num_regs 4-byte registers to restore. */
22559 tmp = gen_rtx_SET (stack_pointer_rtx,
22560 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22561 RTX_FRAME_RELATED_P (tmp) = 1;
22562 tmp = emit_insn (tmp);
22563 if (!return_in_pc)
22564 {
22565 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22566 stack_pointer_rtx, stack_pointer_rtx);
22567 }
22568
22569 dwarf = NULL_RTX;
22570
22571 if (((num_regs % 2) == 1 && !return_in_pc)
22572 || ((num_regs % 2) == 0 && return_in_pc))
22573 {
22574 /* Scan for the single register to be popped. Skip until the saved
22575 register is found. */
22576 for (; (saved_regs_mask & (1 << j)) == 0; j++);
22577
22578 /* Gen LDR with post increment here. */
22579 tmp1 = gen_rtx_MEM (SImode,
22580 gen_rtx_POST_INC (SImode,
22581 stack_pointer_rtx));
22582 set_mem_alias_set (tmp1, get_frame_alias_set ());
22583
22584 reg = gen_rtx_REG (SImode, j);
22585 tmp = gen_rtx_SET (reg, tmp1);
22586 RTX_FRAME_RELATED_P (tmp) = 1;
22587 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22588
22589 if (return_in_pc)
22590 {
22591 /* If return_in_pc, j must be PC_REGNUM. */
22592 gcc_assert (j == PC_REGNUM);
22593 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22594 XVECEXP (par, 0, 0) = ret_rtx;
22595 XVECEXP (par, 0, 1) = tmp;
22596 par = emit_jump_insn (par);
22597 }
22598 else
22599 {
22600 par = emit_insn (tmp);
22601 REG_NOTES (par) = dwarf;
22602 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22603 stack_pointer_rtx, stack_pointer_rtx);
22604 }
22605
22606 }
22607 else if ((num_regs % 2) == 1 && return_in_pc)
22608 {
22609 /* There are 2 registers to be popped. So, generate the pattern
22610 pop_multiple_with_stack_update_and_return to pop in PC. */
22611 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22612 }
22613
22614 return;
22615 }
22616
22617 /* LDRD in ARM mode needs consecutive registers as operands. This function
22618 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22619 offset addressing and then generates one separate stack udpate. This provides
22620 more scheduling freedom, compared to writeback on every load. However,
22621 if the function returns using load into PC directly
22622 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22623 before the last load. TODO: Add a peephole optimization to recognize
22624 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22625 peephole optimization to merge the load at stack-offset zero
22626 with the stack update instruction using load with writeback
22627 in post-index addressing mode. */
22628 static void
22629 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22630 {
22631 int j = 0;
22632 int offset = 0;
22633 rtx par = NULL_RTX;
22634 rtx dwarf = NULL_RTX;
22635 rtx tmp, mem;
22636
22637 /* Restore saved registers. */
22638 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22639 j = 0;
22640 while (j <= LAST_ARM_REGNUM)
22641 if (saved_regs_mask & (1 << j))
22642 {
22643 if ((j % 2) == 0
22644 && (saved_regs_mask & (1 << (j + 1)))
22645 && (j + 1) != PC_REGNUM)
22646 {
22647 /* Current register and next register form register pair for which
22648 LDRD can be generated. PC is always the last register popped, and
22649 we handle it separately. */
22650 if (offset > 0)
22651 mem = gen_frame_mem (DImode,
22652 plus_constant (Pmode,
22653 stack_pointer_rtx,
22654 offset));
22655 else
22656 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22657
22658 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22659 tmp = emit_insn (tmp);
22660 RTX_FRAME_RELATED_P (tmp) = 1;
22661
22662 /* Generate dwarf info. */
22663
22664 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22665 gen_rtx_REG (SImode, j),
22666 NULL_RTX);
22667 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22668 gen_rtx_REG (SImode, j + 1),
22669 dwarf);
22670
22671 REG_NOTES (tmp) = dwarf;
22672
22673 offset += 8;
22674 j += 2;
22675 }
22676 else if (j != PC_REGNUM)
22677 {
22678 /* Emit a single word load. */
22679 if (offset > 0)
22680 mem = gen_frame_mem (SImode,
22681 plus_constant (Pmode,
22682 stack_pointer_rtx,
22683 offset));
22684 else
22685 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22686
22687 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22688 tmp = emit_insn (tmp);
22689 RTX_FRAME_RELATED_P (tmp) = 1;
22690
22691 /* Generate dwarf info. */
22692 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22693 gen_rtx_REG (SImode, j),
22694 NULL_RTX);
22695
22696 offset += 4;
22697 j += 1;
22698 }
22699 else /* j == PC_REGNUM */
22700 j++;
22701 }
22702 else
22703 j++;
22704
22705 /* Update the stack. */
22706 if (offset > 0)
22707 {
22708 tmp = gen_rtx_SET (stack_pointer_rtx,
22709 plus_constant (Pmode,
22710 stack_pointer_rtx,
22711 offset));
22712 tmp = emit_insn (tmp);
22713 arm_add_cfa_adjust_cfa_note (tmp, offset,
22714 stack_pointer_rtx, stack_pointer_rtx);
22715 offset = 0;
22716 }
22717
22718 if (saved_regs_mask & (1 << PC_REGNUM))
22719 {
22720 /* Only PC is to be popped. */
22721 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22722 XVECEXP (par, 0, 0) = ret_rtx;
22723 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22724 gen_frame_mem (SImode,
22725 gen_rtx_POST_INC (SImode,
22726 stack_pointer_rtx)));
22727 RTX_FRAME_RELATED_P (tmp) = 1;
22728 XVECEXP (par, 0, 1) = tmp;
22729 par = emit_jump_insn (par);
22730
22731 /* Generate dwarf info. */
22732 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22733 gen_rtx_REG (SImode, PC_REGNUM),
22734 NULL_RTX);
22735 REG_NOTES (par) = dwarf;
22736 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22737 stack_pointer_rtx, stack_pointer_rtx);
22738 }
22739 }
22740
22741 /* Calculate the size of the return value that is passed in registers. */
22742 static unsigned
22743 arm_size_return_regs (void)
22744 {
22745 machine_mode mode;
22746
22747 if (crtl->return_rtx != 0)
22748 mode = GET_MODE (crtl->return_rtx);
22749 else
22750 mode = DECL_MODE (DECL_RESULT (current_function_decl));
22751
22752 return GET_MODE_SIZE (mode);
22753 }
22754
22755 /* Return true if the current function needs to save/restore LR. */
22756 static bool
22757 thumb_force_lr_save (void)
22758 {
22759 return !cfun->machine->lr_save_eliminated
22760 && (!crtl->is_leaf
22761 || thumb_far_jump_used_p ()
22762 || df_regs_ever_live_p (LR_REGNUM));
22763 }
22764
22765 /* We do not know if r3 will be available because
22766 we do have an indirect tailcall happening in this
22767 particular case. */
22768 static bool
22769 is_indirect_tailcall_p (rtx call)
22770 {
22771 rtx pat = PATTERN (call);
22772
22773 /* Indirect tail call. */
22774 pat = XVECEXP (pat, 0, 0);
22775 if (GET_CODE (pat) == SET)
22776 pat = SET_SRC (pat);
22777
22778 pat = XEXP (XEXP (pat, 0), 0);
22779 return REG_P (pat);
22780 }
22781
22782 /* Return true if r3 is used by any of the tail call insns in the
22783 current function. */
22784 static bool
22785 any_sibcall_could_use_r3 (void)
22786 {
22787 edge_iterator ei;
22788 edge e;
22789
22790 if (!crtl->tail_call_emit)
22791 return false;
22792 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22793 if (e->flags & EDGE_SIBCALL)
22794 {
22795 rtx_insn *call = BB_END (e->src);
22796 if (!CALL_P (call))
22797 call = prev_nonnote_nondebug_insn (call);
22798 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22799 if (find_regno_fusage (call, USE, 3)
22800 || is_indirect_tailcall_p (call))
22801 return true;
22802 }
22803 return false;
22804 }
22805
22806
22807 /* Compute the distance from register FROM to register TO.
22808 These can be the arg pointer (26), the soft frame pointer (25),
22809 the stack pointer (13) or the hard frame pointer (11).
22810 In thumb mode r7 is used as the soft frame pointer, if needed.
22811 Typical stack layout looks like this:
22812
22813 old stack pointer -> | |
22814 ----
22815 | | \
22816 | | saved arguments for
22817 | | vararg functions
22818 | | /
22819 --
22820 hard FP & arg pointer -> | | \
22821 | | stack
22822 | | frame
22823 | | /
22824 --
22825 | | \
22826 | | call saved
22827 | | registers
22828 soft frame pointer -> | | /
22829 --
22830 | | \
22831 | | local
22832 | | variables
22833 locals base pointer -> | | /
22834 --
22835 | | \
22836 | | outgoing
22837 | | arguments
22838 current stack pointer -> | | /
22839 --
22840
22841 For a given function some or all of these stack components
22842 may not be needed, giving rise to the possibility of
22843 eliminating some of the registers.
22844
22845 The values returned by this function must reflect the behavior
22846 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22847
22848 The sign of the number returned reflects the direction of stack
22849 growth, so the values are positive for all eliminations except
22850 from the soft frame pointer to the hard frame pointer.
22851
22852 SFP may point just inside the local variables block to ensure correct
22853 alignment. */
22854
22855
22856 /* Return cached stack offsets. */
22857
22858 static arm_stack_offsets *
22859 arm_get_frame_offsets (void)
22860 {
22861 struct arm_stack_offsets *offsets;
22862
22863 offsets = &cfun->machine->stack_offsets;
22864
22865 return offsets;
22866 }
22867
22868
22869 /* Calculate stack offsets. These are used to calculate register elimination
22870 offsets and in prologue/epilogue code. Also calculates which registers
22871 should be saved. */
22872
22873 static void
22874 arm_compute_frame_layout (void)
22875 {
22876 struct arm_stack_offsets *offsets;
22877 unsigned long func_type;
22878 int saved;
22879 int core_saved;
22880 HOST_WIDE_INT frame_size;
22881 int i;
22882
22883 offsets = &cfun->machine->stack_offsets;
22884
22885 /* Initially this is the size of the local variables. It will translated
22886 into an offset once we have determined the size of preceding data. */
22887 frame_size = ROUND_UP_WORD (get_frame_size ());
22888
22889 /* Space for variadic functions. */
22890 offsets->saved_args = crtl->args.pretend_args_size;
22891
22892 /* In Thumb mode this is incorrect, but never used. */
22893 offsets->frame
22894 = (offsets->saved_args
22895 + arm_compute_static_chain_stack_bytes ()
22896 + (frame_pointer_needed ? 4 : 0));
22897
22898 if (TARGET_32BIT)
22899 {
22900 unsigned int regno;
22901
22902 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
22903 core_saved = bit_count (offsets->saved_regs_mask) * 4;
22904 saved = core_saved;
22905
22906 /* We know that SP will be doubleword aligned on entry, and we must
22907 preserve that condition at any subroutine call. We also require the
22908 soft frame pointer to be doubleword aligned. */
22909
22910 if (TARGET_REALLY_IWMMXT)
22911 {
22912 /* Check for the call-saved iWMMXt registers. */
22913 for (regno = FIRST_IWMMXT_REGNUM;
22914 regno <= LAST_IWMMXT_REGNUM;
22915 regno++)
22916 if (reg_needs_saving_p (regno))
22917 saved += 8;
22918 }
22919
22920 func_type = arm_current_func_type ();
22921 /* Space for saved VFP registers. */
22922 if (! IS_VOLATILE (func_type)
22923 && TARGET_VFP_BASE)
22924 saved += arm_get_vfp_saved_size ();
22925
22926 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
22927 nonecure entry functions with VSTR/VLDR. */
22928 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
22929 saved += 4;
22930 }
22931 else /* TARGET_THUMB1 */
22932 {
22933 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
22934 core_saved = bit_count (offsets->saved_regs_mask) * 4;
22935 saved = core_saved;
22936 if (TARGET_BACKTRACE)
22937 saved += 16;
22938 }
22939
22940 /* Saved registers include the stack frame. */
22941 offsets->saved_regs
22942 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
22943 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
22944
22945 /* A leaf function does not need any stack alignment if it has nothing
22946 on the stack. */
22947 if (crtl->is_leaf && frame_size == 0
22948 /* However if it calls alloca(), we have a dynamically allocated
22949 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
22950 && ! cfun->calls_alloca)
22951 {
22952 offsets->outgoing_args = offsets->soft_frame;
22953 offsets->locals_base = offsets->soft_frame;
22954 return;
22955 }
22956
22957 /* Ensure SFP has the correct alignment. */
22958 if (ARM_DOUBLEWORD_ALIGN
22959 && (offsets->soft_frame & 7))
22960 {
22961 offsets->soft_frame += 4;
22962 /* Try to align stack by pushing an extra reg. Don't bother doing this
22963 when there is a stack frame as the alignment will be rolled into
22964 the normal stack adjustment. */
22965 if (frame_size + crtl->outgoing_args_size == 0)
22966 {
22967 int reg = -1;
22968
22969 /* Register r3 is caller-saved. Normally it does not need to be
22970 saved on entry by the prologue. However if we choose to save
22971 it for padding then we may confuse the compiler into thinking
22972 a prologue sequence is required when in fact it is not. This
22973 will occur when shrink-wrapping if r3 is used as a scratch
22974 register and there are no other callee-saved writes.
22975
22976 This situation can be avoided when other callee-saved registers
22977 are available and r3 is not mandatory if we choose a callee-saved
22978 register for padding. */
22979 bool prefer_callee_reg_p = false;
22980
22981 /* If it is safe to use r3, then do so. This sometimes
22982 generates better code on Thumb-2 by avoiding the need to
22983 use 32-bit push/pop instructions. */
22984 if (! any_sibcall_could_use_r3 ()
22985 && arm_size_return_regs () <= 12
22986 && (offsets->saved_regs_mask & (1 << 3)) == 0
22987 && (TARGET_THUMB2
22988 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
22989 {
22990 reg = 3;
22991 if (!TARGET_THUMB2)
22992 prefer_callee_reg_p = true;
22993 }
22994 if (reg == -1
22995 || prefer_callee_reg_p)
22996 {
22997 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
22998 {
22999 /* Avoid fixed registers; they may be changed at
23000 arbitrary times so it's unsafe to restore them
23001 during the epilogue. */
23002 if (!fixed_regs[i]
23003 && (offsets->saved_regs_mask & (1 << i)) == 0)
23004 {
23005 reg = i;
23006 break;
23007 }
23008 }
23009 }
23010
23011 if (reg != -1)
23012 {
23013 offsets->saved_regs += 4;
23014 offsets->saved_regs_mask |= (1 << reg);
23015 }
23016 }
23017 }
23018
23019 offsets->locals_base = offsets->soft_frame + frame_size;
23020 offsets->outgoing_args = (offsets->locals_base
23021 + crtl->outgoing_args_size);
23022
23023 if (ARM_DOUBLEWORD_ALIGN)
23024 {
23025 /* Ensure SP remains doubleword aligned. */
23026 if (offsets->outgoing_args & 7)
23027 offsets->outgoing_args += 4;
23028 gcc_assert (!(offsets->outgoing_args & 7));
23029 }
23030 }
23031
23032
23033 /* Calculate the relative offsets for the different stack pointers. Positive
23034 offsets are in the direction of stack growth. */
23035
23036 HOST_WIDE_INT
23037 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23038 {
23039 arm_stack_offsets *offsets;
23040
23041 offsets = arm_get_frame_offsets ();
23042
23043 /* OK, now we have enough information to compute the distances.
23044 There must be an entry in these switch tables for each pair
23045 of registers in ELIMINABLE_REGS, even if some of the entries
23046 seem to be redundant or useless. */
23047 switch (from)
23048 {
23049 case ARG_POINTER_REGNUM:
23050 switch (to)
23051 {
23052 case THUMB_HARD_FRAME_POINTER_REGNUM:
23053 return 0;
23054
23055 case FRAME_POINTER_REGNUM:
23056 /* This is the reverse of the soft frame pointer
23057 to hard frame pointer elimination below. */
23058 return offsets->soft_frame - offsets->saved_args;
23059
23060 case ARM_HARD_FRAME_POINTER_REGNUM:
23061 /* This is only non-zero in the case where the static chain register
23062 is stored above the frame. */
23063 return offsets->frame - offsets->saved_args - 4;
23064
23065 case STACK_POINTER_REGNUM:
23066 /* If nothing has been pushed on the stack at all
23067 then this will return -4. This *is* correct! */
23068 return offsets->outgoing_args - (offsets->saved_args + 4);
23069
23070 default:
23071 gcc_unreachable ();
23072 }
23073 gcc_unreachable ();
23074
23075 case FRAME_POINTER_REGNUM:
23076 switch (to)
23077 {
23078 case THUMB_HARD_FRAME_POINTER_REGNUM:
23079 return 0;
23080
23081 case ARM_HARD_FRAME_POINTER_REGNUM:
23082 /* The hard frame pointer points to the top entry in the
23083 stack frame. The soft frame pointer to the bottom entry
23084 in the stack frame. If there is no stack frame at all,
23085 then they are identical. */
23086
23087 return offsets->frame - offsets->soft_frame;
23088
23089 case STACK_POINTER_REGNUM:
23090 return offsets->outgoing_args - offsets->soft_frame;
23091
23092 default:
23093 gcc_unreachable ();
23094 }
23095 gcc_unreachable ();
23096
23097 default:
23098 /* You cannot eliminate from the stack pointer.
23099 In theory you could eliminate from the hard frame
23100 pointer to the stack pointer, but this will never
23101 happen, since if a stack frame is not needed the
23102 hard frame pointer will never be used. */
23103 gcc_unreachable ();
23104 }
23105 }
23106
23107 /* Given FROM and TO register numbers, say whether this elimination is
23108 allowed. Frame pointer elimination is automatically handled.
23109
23110 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
23111 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
23112 pointer, we must eliminate FRAME_POINTER_REGNUM into
23113 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23114 ARG_POINTER_REGNUM. */
23115
23116 bool
23117 arm_can_eliminate (const int from, const int to)
23118 {
23119 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
23120 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
23121 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
23122 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
23123 true);
23124 }
23125
23126 /* Emit RTL to save coprocessor registers on function entry. Returns the
23127 number of bytes pushed. */
23128
23129 static int
23130 arm_save_coproc_regs(void)
23131 {
23132 int saved_size = 0;
23133 unsigned reg;
23134 unsigned start_reg;
23135 rtx insn;
23136
23137 if (TARGET_REALLY_IWMMXT)
23138 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
23139 if (reg_needs_saving_p (reg))
23140 {
23141 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23142 insn = gen_rtx_MEM (V2SImode, insn);
23143 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
23144 RTX_FRAME_RELATED_P (insn) = 1;
23145 saved_size += 8;
23146 }
23147
23148 if (TARGET_VFP_BASE)
23149 {
23150 start_reg = FIRST_VFP_REGNUM;
23151
23152 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
23153 {
23154 if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
23155 {
23156 if (start_reg != reg)
23157 saved_size += vfp_emit_fstmd (start_reg,
23158 (reg - start_reg) / 2);
23159 start_reg = reg + 2;
23160 }
23161 }
23162 if (start_reg != reg)
23163 saved_size += vfp_emit_fstmd (start_reg,
23164 (reg - start_reg) / 2);
23165 }
23166 return saved_size;
23167 }
23168
23169
23170 /* Set the Thumb frame pointer from the stack pointer. */
23171
23172 static void
23173 thumb_set_frame_pointer (arm_stack_offsets *offsets)
23174 {
23175 HOST_WIDE_INT amount;
23176 rtx insn, dwarf;
23177
23178 amount = offsets->outgoing_args - offsets->locals_base;
23179 if (amount < 1024)
23180 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23181 stack_pointer_rtx, GEN_INT (amount)));
23182 else
23183 {
23184 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
23185 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
23186 expects the first two operands to be the same. */
23187 if (TARGET_THUMB2)
23188 {
23189 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23190 stack_pointer_rtx,
23191 hard_frame_pointer_rtx));
23192 }
23193 else
23194 {
23195 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23196 hard_frame_pointer_rtx,
23197 stack_pointer_rtx));
23198 }
23199 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
23200 plus_constant (Pmode, stack_pointer_rtx, amount));
23201 RTX_FRAME_RELATED_P (dwarf) = 1;
23202 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23203 }
23204
23205 RTX_FRAME_RELATED_P (insn) = 1;
23206 }
23207
23208 struct scratch_reg {
23209 rtx reg;
23210 bool saved;
23211 };
23212
23213 /* Return a short-lived scratch register for use as a 2nd scratch register on
23214 function entry after the registers are saved in the prologue. This register
23215 must be released by means of release_scratch_register_on_entry. IP is not
23216 considered since it is always used as the 1st scratch register if available.
23217
23218 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23219 mask of live registers. */
23220
23221 static void
23222 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
23223 unsigned long live_regs)
23224 {
23225 int regno = -1;
23226
23227 sr->saved = false;
23228
23229 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
23230 regno = LR_REGNUM;
23231 else
23232 {
23233 unsigned int i;
23234
23235 for (i = 4; i < 11; i++)
23236 if (regno1 != i && (live_regs & (1 << i)) != 0)
23237 {
23238 regno = i;
23239 break;
23240 }
23241
23242 if (regno < 0)
23243 {
23244 /* If IP is used as the 1st scratch register for a nested function,
23245 then either r3 wasn't available or is used to preserve IP. */
23246 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
23247 regno1 = 3;
23248 regno = (regno1 == 3 ? 2 : 3);
23249 sr->saved
23250 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23251 regno);
23252 }
23253 }
23254
23255 sr->reg = gen_rtx_REG (SImode, regno);
23256 if (sr->saved)
23257 {
23258 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23259 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23260 rtx x = gen_rtx_SET (stack_pointer_rtx,
23261 plus_constant (Pmode, stack_pointer_rtx, -4));
23262 RTX_FRAME_RELATED_P (insn) = 1;
23263 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23264 }
23265 }
23266
23267 /* Release a scratch register obtained from the preceding function. */
23268
23269 static void
23270 release_scratch_register_on_entry (struct scratch_reg *sr)
23271 {
23272 if (sr->saved)
23273 {
23274 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23275 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23276 rtx x = gen_rtx_SET (stack_pointer_rtx,
23277 plus_constant (Pmode, stack_pointer_rtx, 4));
23278 RTX_FRAME_RELATED_P (insn) = 1;
23279 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23280 }
23281 }
23282
23283 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23284
23285 #if PROBE_INTERVAL > 4096
23286 #error Cannot use indexed addressing mode for stack probing
23287 #endif
23288
23289 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23290 inclusive. These are offsets from the current stack pointer. REGNO1
23291 is the index number of the 1st scratch register and LIVE_REGS is the
23292 mask of live registers. */
23293
23294 static void
23295 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23296 unsigned int regno1, unsigned long live_regs)
23297 {
23298 rtx reg1 = gen_rtx_REG (Pmode, regno1);
23299
23300 /* See if we have a constant small number of probes to generate. If so,
23301 that's the easy case. */
23302 if (size <= PROBE_INTERVAL)
23303 {
23304 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23305 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23306 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23307 }
23308
23309 /* The run-time loop is made up of 10 insns in the generic case while the
23310 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23311 else if (size <= 5 * PROBE_INTERVAL)
23312 {
23313 HOST_WIDE_INT i, rem;
23314
23315 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23316 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23317 emit_stack_probe (reg1);
23318
23319 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23320 it exceeds SIZE. If only two probes are needed, this will not
23321 generate any code. Then probe at FIRST + SIZE. */
23322 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23323 {
23324 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23325 emit_stack_probe (reg1);
23326 }
23327
23328 rem = size - (i - PROBE_INTERVAL);
23329 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23330 {
23331 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23332 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23333 }
23334 else
23335 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23336 }
23337
23338 /* Otherwise, do the same as above, but in a loop. Note that we must be
23339 extra careful with variables wrapping around because we might be at
23340 the very top (or the very bottom) of the address space and we have
23341 to be able to handle this case properly; in particular, we use an
23342 equality test for the loop condition. */
23343 else
23344 {
23345 HOST_WIDE_INT rounded_size;
23346 struct scratch_reg sr;
23347
23348 get_scratch_register_on_entry (&sr, regno1, live_regs);
23349
23350 emit_move_insn (reg1, GEN_INT (first));
23351
23352
23353 /* Step 1: round SIZE to the previous multiple of the interval. */
23354
23355 rounded_size = size & -PROBE_INTERVAL;
23356 emit_move_insn (sr.reg, GEN_INT (rounded_size));
23357
23358
23359 /* Step 2: compute initial and final value of the loop counter. */
23360
23361 /* TEST_ADDR = SP + FIRST. */
23362 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23363
23364 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23365 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23366
23367
23368 /* Step 3: the loop
23369
23370 do
23371 {
23372 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23373 probe at TEST_ADDR
23374 }
23375 while (TEST_ADDR != LAST_ADDR)
23376
23377 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23378 until it is equal to ROUNDED_SIZE. */
23379
23380 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23381
23382
23383 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23384 that SIZE is equal to ROUNDED_SIZE. */
23385
23386 if (size != rounded_size)
23387 {
23388 HOST_WIDE_INT rem = size - rounded_size;
23389
23390 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23391 {
23392 emit_set_insn (sr.reg,
23393 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23394 emit_stack_probe (plus_constant (Pmode, sr.reg,
23395 PROBE_INTERVAL - rem));
23396 }
23397 else
23398 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23399 }
23400
23401 release_scratch_register_on_entry (&sr);
23402 }
23403
23404 /* Make sure nothing is scheduled before we are done. */
23405 emit_insn (gen_blockage ());
23406 }
23407
23408 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23409 absolute addresses. */
23410
23411 const char *
23412 output_probe_stack_range (rtx reg1, rtx reg2)
23413 {
23414 static int labelno = 0;
23415 char loop_lab[32];
23416 rtx xops[2];
23417
23418 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23419
23420 /* Loop. */
23421 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23422
23423 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23424 xops[0] = reg1;
23425 xops[1] = GEN_INT (PROBE_INTERVAL);
23426 output_asm_insn ("sub\t%0, %0, %1", xops);
23427
23428 /* Probe at TEST_ADDR. */
23429 output_asm_insn ("str\tr0, [%0, #0]", xops);
23430
23431 /* Test if TEST_ADDR == LAST_ADDR. */
23432 xops[1] = reg2;
23433 output_asm_insn ("cmp\t%0, %1", xops);
23434
23435 /* Branch. */
23436 fputs ("\tbne\t", asm_out_file);
23437 assemble_name_raw (asm_out_file, loop_lab);
23438 fputc ('\n', asm_out_file);
23439
23440 return "";
23441 }
23442
23443 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23444 function. */
23445 void
23446 arm_expand_prologue (void)
23447 {
23448 rtx amount;
23449 rtx insn;
23450 rtx ip_rtx;
23451 unsigned long live_regs_mask;
23452 unsigned long func_type;
23453 int fp_offset = 0;
23454 int saved_pretend_args = 0;
23455 int saved_regs = 0;
23456 unsigned HOST_WIDE_INT args_to_push;
23457 HOST_WIDE_INT size;
23458 arm_stack_offsets *offsets;
23459 bool clobber_ip;
23460
23461 func_type = arm_current_func_type ();
23462
23463 /* Naked functions don't have prologues. */
23464 if (IS_NAKED (func_type))
23465 {
23466 if (flag_stack_usage_info)
23467 current_function_static_stack_size = 0;
23468 return;
23469 }
23470
23471 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23472 args_to_push = crtl->args.pretend_args_size;
23473
23474 /* Compute which register we will have to save onto the stack. */
23475 offsets = arm_get_frame_offsets ();
23476 live_regs_mask = offsets->saved_regs_mask;
23477
23478 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23479
23480 if (IS_STACKALIGN (func_type))
23481 {
23482 rtx r0, r1;
23483
23484 /* Handle a word-aligned stack pointer. We generate the following:
23485
23486 mov r0, sp
23487 bic r1, r0, #7
23488 mov sp, r1
23489 <save and restore r0 in normal prologue/epilogue>
23490 mov sp, r0
23491 bx lr
23492
23493 The unwinder doesn't need to know about the stack realignment.
23494 Just tell it we saved SP in r0. */
23495 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23496
23497 r0 = gen_rtx_REG (SImode, R0_REGNUM);
23498 r1 = gen_rtx_REG (SImode, R1_REGNUM);
23499
23500 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23501 RTX_FRAME_RELATED_P (insn) = 1;
23502 add_reg_note (insn, REG_CFA_REGISTER, NULL);
23503
23504 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23505
23506 /* ??? The CFA changes here, which may cause GDB to conclude that it
23507 has entered a different function. That said, the unwind info is
23508 correct, individually, before and after this instruction because
23509 we've described the save of SP, which will override the default
23510 handling of SP as restoring from the CFA. */
23511 emit_insn (gen_movsi (stack_pointer_rtx, r1));
23512 }
23513
23514 /* Let's compute the static_chain_stack_bytes required and store it. Right
23515 now the value must be -1 as stored by arm_init_machine_status (). */
23516 cfun->machine->static_chain_stack_bytes
23517 = arm_compute_static_chain_stack_bytes ();
23518
23519 /* The static chain register is the same as the IP register. If it is
23520 clobbered when creating the frame, we need to save and restore it. */
23521 clobber_ip = IS_NESTED (func_type)
23522 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23523 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23524 || flag_stack_clash_protection)
23525 && !df_regs_ever_live_p (LR_REGNUM)
23526 && arm_r3_live_at_start_p ()));
23527
23528 /* Find somewhere to store IP whilst the frame is being created.
23529 We try the following places in order:
23530
23531 1. The last argument register r3 if it is available.
23532 2. A slot on the stack above the frame if there are no
23533 arguments to push onto the stack.
23534 3. Register r3 again, after pushing the argument registers
23535 onto the stack, if this is a varargs function.
23536 4. The last slot on the stack created for the arguments to
23537 push, if this isn't a varargs function.
23538
23539 Note - we only need to tell the dwarf2 backend about the SP
23540 adjustment in the second variant; the static chain register
23541 doesn't need to be unwound, as it doesn't contain a value
23542 inherited from the caller. */
23543 if (clobber_ip)
23544 {
23545 if (!arm_r3_live_at_start_p ())
23546 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23547 else if (args_to_push == 0)
23548 {
23549 rtx addr, dwarf;
23550
23551 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
23552 saved_regs += 4;
23553
23554 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23555 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23556 fp_offset = 4;
23557
23558 /* Just tell the dwarf backend that we adjusted SP. */
23559 dwarf = gen_rtx_SET (stack_pointer_rtx,
23560 plus_constant (Pmode, stack_pointer_rtx,
23561 -fp_offset));
23562 RTX_FRAME_RELATED_P (insn) = 1;
23563 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23564 }
23565 else
23566 {
23567 /* Store the args on the stack. */
23568 if (cfun->machine->uses_anonymous_args)
23569 {
23570 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23571 (0xf0 >> (args_to_push / 4)) & 0xf);
23572 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23573 saved_pretend_args = 1;
23574 }
23575 else
23576 {
23577 rtx addr, dwarf;
23578
23579 if (args_to_push == 4)
23580 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23581 else
23582 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23583 plus_constant (Pmode,
23584 stack_pointer_rtx,
23585 -args_to_push));
23586
23587 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23588
23589 /* Just tell the dwarf backend that we adjusted SP. */
23590 dwarf = gen_rtx_SET (stack_pointer_rtx,
23591 plus_constant (Pmode, stack_pointer_rtx,
23592 -args_to_push));
23593 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23594 }
23595
23596 RTX_FRAME_RELATED_P (insn) = 1;
23597 fp_offset = args_to_push;
23598 args_to_push = 0;
23599 }
23600 }
23601
23602 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23603 {
23604 if (IS_INTERRUPT (func_type))
23605 {
23606 /* Interrupt functions must not corrupt any registers.
23607 Creating a frame pointer however, corrupts the IP
23608 register, so we must push it first. */
23609 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23610
23611 /* Do not set RTX_FRAME_RELATED_P on this insn.
23612 The dwarf stack unwinding code only wants to see one
23613 stack decrement per function, and this is not it. If
23614 this instruction is labeled as being part of the frame
23615 creation sequence then dwarf2out_frame_debug_expr will
23616 die when it encounters the assignment of IP to FP
23617 later on, since the use of SP here establishes SP as
23618 the CFA register and not IP.
23619
23620 Anyway this instruction is not really part of the stack
23621 frame creation although it is part of the prologue. */
23622 }
23623
23624 insn = emit_set_insn (ip_rtx,
23625 plus_constant (Pmode, stack_pointer_rtx,
23626 fp_offset));
23627 RTX_FRAME_RELATED_P (insn) = 1;
23628 }
23629
23630 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23631 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23632 {
23633 saved_regs += 4;
23634 insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23635 GEN_INT (FPCXTNS_ENUM)));
23636 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23637 plus_constant (Pmode, stack_pointer_rtx, -4));
23638 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23639 RTX_FRAME_RELATED_P (insn) = 1;
23640 }
23641
23642 if (args_to_push)
23643 {
23644 /* Push the argument registers, or reserve space for them. */
23645 if (cfun->machine->uses_anonymous_args)
23646 insn = emit_multi_reg_push
23647 ((0xf0 >> (args_to_push / 4)) & 0xf,
23648 (0xf0 >> (args_to_push / 4)) & 0xf);
23649 else
23650 insn = emit_insn
23651 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23652 GEN_INT (- args_to_push)));
23653 RTX_FRAME_RELATED_P (insn) = 1;
23654 }
23655
23656 /* If this is an interrupt service routine, and the link register
23657 is going to be pushed, and we're not generating extra
23658 push of IP (needed when frame is needed and frame layout if apcs),
23659 subtracting four from LR now will mean that the function return
23660 can be done with a single instruction. */
23661 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23662 && (live_regs_mask & (1 << LR_REGNUM)) != 0
23663 && !(frame_pointer_needed && TARGET_APCS_FRAME)
23664 && TARGET_ARM)
23665 {
23666 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23667
23668 emit_set_insn (lr, plus_constant (SImode, lr, -4));
23669 }
23670
23671 if (live_regs_mask)
23672 {
23673 unsigned long dwarf_regs_mask = live_regs_mask;
23674
23675 saved_regs += bit_count (live_regs_mask) * 4;
23676 if (optimize_size && !frame_pointer_needed
23677 && saved_regs == offsets->saved_regs - offsets->saved_args)
23678 {
23679 /* If no coprocessor registers are being pushed and we don't have
23680 to worry about a frame pointer then push extra registers to
23681 create the stack frame. This is done in a way that does not
23682 alter the frame layout, so is independent of the epilogue. */
23683 int n;
23684 int frame;
23685 n = 0;
23686 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23687 n++;
23688 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23689 if (frame && n * 4 >= frame)
23690 {
23691 n = frame / 4;
23692 live_regs_mask |= (1 << n) - 1;
23693 saved_regs += frame;
23694 }
23695 }
23696
23697 if (TARGET_LDRD
23698 && current_tune->prefer_ldrd_strd
23699 && !optimize_function_for_size_p (cfun))
23700 {
23701 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23702 if (TARGET_THUMB2)
23703 thumb2_emit_strd_push (live_regs_mask);
23704 else if (TARGET_ARM
23705 && !TARGET_APCS_FRAME
23706 && !IS_INTERRUPT (func_type))
23707 arm_emit_strd_push (live_regs_mask);
23708 else
23709 {
23710 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23711 RTX_FRAME_RELATED_P (insn) = 1;
23712 }
23713 }
23714 else
23715 {
23716 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23717 RTX_FRAME_RELATED_P (insn) = 1;
23718 }
23719 }
23720
23721 if (! IS_VOLATILE (func_type))
23722 saved_regs += arm_save_coproc_regs ();
23723
23724 if (frame_pointer_needed && TARGET_ARM)
23725 {
23726 /* Create the new frame pointer. */
23727 if (TARGET_APCS_FRAME)
23728 {
23729 insn = GEN_INT (-(4 + args_to_push + fp_offset));
23730 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23731 RTX_FRAME_RELATED_P (insn) = 1;
23732 }
23733 else
23734 {
23735 insn = GEN_INT (saved_regs - (4 + fp_offset));
23736 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23737 stack_pointer_rtx, insn));
23738 RTX_FRAME_RELATED_P (insn) = 1;
23739 }
23740 }
23741
23742 size = offsets->outgoing_args - offsets->saved_args;
23743 if (flag_stack_usage_info)
23744 current_function_static_stack_size = size;
23745
23746 /* If this isn't an interrupt service routine and we have a frame, then do
23747 stack checking. We use IP as the first scratch register, except for the
23748 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
23749 if (!IS_INTERRUPT (func_type)
23750 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23751 || flag_stack_clash_protection))
23752 {
23753 unsigned int regno;
23754
23755 if (!IS_NESTED (func_type) || clobber_ip)
23756 regno = IP_REGNUM;
23757 else if (df_regs_ever_live_p (LR_REGNUM))
23758 regno = LR_REGNUM;
23759 else
23760 regno = 3;
23761
23762 if (crtl->is_leaf && !cfun->calls_alloca)
23763 {
23764 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23765 arm_emit_probe_stack_range (get_stack_check_protect (),
23766 size - get_stack_check_protect (),
23767 regno, live_regs_mask);
23768 }
23769 else if (size > 0)
23770 arm_emit_probe_stack_range (get_stack_check_protect (), size,
23771 regno, live_regs_mask);
23772 }
23773
23774 /* Recover the static chain register. */
23775 if (clobber_ip)
23776 {
23777 if (!arm_r3_live_at_start_p () || saved_pretend_args)
23778 insn = gen_rtx_REG (SImode, 3);
23779 else
23780 {
23781 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23782 insn = gen_frame_mem (SImode, insn);
23783 }
23784 emit_set_insn (ip_rtx, insn);
23785 emit_insn (gen_force_register_use (ip_rtx));
23786 }
23787
23788 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23789 {
23790 /* This add can produce multiple insns for a large constant, so we
23791 need to get tricky. */
23792 rtx_insn *last = get_last_insn ();
23793
23794 amount = GEN_INT (offsets->saved_args + saved_regs
23795 - offsets->outgoing_args);
23796
23797 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23798 amount));
23799 do
23800 {
23801 last = last ? NEXT_INSN (last) : get_insns ();
23802 RTX_FRAME_RELATED_P (last) = 1;
23803 }
23804 while (last != insn);
23805
23806 /* If the frame pointer is needed, emit a special barrier that
23807 will prevent the scheduler from moving stores to the frame
23808 before the stack adjustment. */
23809 if (frame_pointer_needed)
23810 emit_insn (gen_stack_tie (stack_pointer_rtx,
23811 hard_frame_pointer_rtx));
23812 }
23813
23814
23815 if (frame_pointer_needed && TARGET_THUMB2)
23816 thumb_set_frame_pointer (offsets);
23817
23818 if (flag_pic && arm_pic_register != INVALID_REGNUM)
23819 {
23820 unsigned long mask;
23821
23822 mask = live_regs_mask;
23823 mask &= THUMB2_WORK_REGS;
23824 if (!IS_NESTED (func_type))
23825 mask |= (1 << IP_REGNUM);
23826 arm_load_pic_register (mask, NULL_RTX);
23827 }
23828
23829 /* If we are profiling, make sure no instructions are scheduled before
23830 the call to mcount. Similarly if the user has requested no
23831 scheduling in the prolog. Similarly if we want non-call exceptions
23832 using the EABI unwinder, to prevent faulting instructions from being
23833 swapped with a stack adjustment. */
23834 if (crtl->profile || !TARGET_SCHED_PROLOG
23835 || (arm_except_unwind_info (&global_options) == UI_TARGET
23836 && cfun->can_throw_non_call_exceptions))
23837 emit_insn (gen_blockage ());
23838
23839 /* If the link register is being kept alive, with the return address in it,
23840 then make sure that it does not get reused by the ce2 pass. */
23841 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23842 cfun->machine->lr_save_eliminated = 1;
23843 }
23844 \f
23845 /* Print condition code to STREAM. Helper function for arm_print_operand. */
23846 static void
23847 arm_print_condition (FILE *stream)
23848 {
23849 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
23850 {
23851 /* Branch conversion is not implemented for Thumb-2. */
23852 if (TARGET_THUMB)
23853 {
23854 output_operand_lossage ("predicated Thumb instruction");
23855 return;
23856 }
23857 if (current_insn_predicate != NULL)
23858 {
23859 output_operand_lossage
23860 ("predicated instruction in conditional sequence");
23861 return;
23862 }
23863
23864 fputs (arm_condition_codes[arm_current_cc], stream);
23865 }
23866 else if (current_insn_predicate)
23867 {
23868 enum arm_cond_code code;
23869
23870 if (TARGET_THUMB1)
23871 {
23872 output_operand_lossage ("predicated Thumb instruction");
23873 return;
23874 }
23875
23876 code = get_arm_condition_code (current_insn_predicate);
23877 fputs (arm_condition_codes[code], stream);
23878 }
23879 }
23880
23881
23882 /* Globally reserved letters: acln
23883 Puncutation letters currently used: @_|?().!#
23884 Lower case letters currently used: bcdefhimpqtvwxyz
23885 Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
23886 Letters previously used, but now deprecated/obsolete: sWXYZ.
23887
23888 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23889
23890 If CODE is 'd', then the X is a condition operand and the instruction
23891 should only be executed if the condition is true.
23892 if CODE is 'D', then the X is a condition operand and the instruction
23893 should only be executed if the condition is false: however, if the mode
23894 of the comparison is CCFPEmode, then always execute the instruction -- we
23895 do this because in these circumstances !GE does not necessarily imply LT;
23896 in these cases the instruction pattern will take care to make sure that
23897 an instruction containing %d will follow, thereby undoing the effects of
23898 doing this instruction unconditionally.
23899 If CODE is 'N' then X is a floating point operand that must be negated
23900 before output.
23901 If CODE is 'B' then output a bitwise inverted value of X (a const int).
23902 If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
23903 If CODE is 'V', then the operand must be a CONST_INT representing
23904 the bits to preserve in the modified register (Rd) of a BFI or BFC
23905 instruction: print out both the width and lsb (shift) fields. */
23906 static void
23907 arm_print_operand (FILE *stream, rtx x, int code)
23908 {
23909 switch (code)
23910 {
23911 case '@':
23912 fputs (ASM_COMMENT_START, stream);
23913 return;
23914
23915 case '_':
23916 fputs (user_label_prefix, stream);
23917 return;
23918
23919 case '|':
23920 fputs (REGISTER_PREFIX, stream);
23921 return;
23922
23923 case '?':
23924 arm_print_condition (stream);
23925 return;
23926
23927 case '.':
23928 /* The current condition code for a condition code setting instruction.
23929 Preceded by 's' in unified syntax, otherwise followed by 's'. */
23930 fputc('s', stream);
23931 arm_print_condition (stream);
23932 return;
23933
23934 case '!':
23935 /* If the instruction is conditionally executed then print
23936 the current condition code, otherwise print 's'. */
23937 gcc_assert (TARGET_THUMB2);
23938 if (current_insn_predicate)
23939 arm_print_condition (stream);
23940 else
23941 fputc('s', stream);
23942 break;
23943
23944 /* %# is a "break" sequence. It doesn't output anything, but is used to
23945 separate e.g. operand numbers from following text, if that text consists
23946 of further digits which we don't want to be part of the operand
23947 number. */
23948 case '#':
23949 return;
23950
23951 case 'N':
23952 {
23953 REAL_VALUE_TYPE r;
23954 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
23955 fprintf (stream, "%s", fp_const_from_val (&r));
23956 }
23957 return;
23958
23959 /* An integer or symbol address without a preceding # sign. */
23960 case 'c':
23961 switch (GET_CODE (x))
23962 {
23963 case CONST_INT:
23964 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
23965 break;
23966
23967 case SYMBOL_REF:
23968 output_addr_const (stream, x);
23969 break;
23970
23971 case CONST:
23972 if (GET_CODE (XEXP (x, 0)) == PLUS
23973 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
23974 {
23975 output_addr_const (stream, x);
23976 break;
23977 }
23978 /* Fall through. */
23979
23980 default:
23981 output_operand_lossage ("Unsupported operand for code '%c'", code);
23982 }
23983 return;
23984
23985 /* An integer that we want to print in HEX. */
23986 case 'x':
23987 switch (GET_CODE (x))
23988 {
23989 case CONST_INT:
23990 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
23991 break;
23992
23993 default:
23994 output_operand_lossage ("Unsupported operand for code '%c'", code);
23995 }
23996 return;
23997
23998 case 'B':
23999 if (CONST_INT_P (x))
24000 {
24001 HOST_WIDE_INT val;
24002 val = ARM_SIGN_EXTEND (~INTVAL (x));
24003 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
24004 }
24005 else
24006 {
24007 putc ('~', stream);
24008 output_addr_const (stream, x);
24009 }
24010 return;
24011
24012 case 'b':
24013 /* Print the log2 of a CONST_INT. */
24014 {
24015 HOST_WIDE_INT val;
24016
24017 if (!CONST_INT_P (x)
24018 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
24019 output_operand_lossage ("Unsupported operand for code '%c'", code);
24020 else
24021 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24022 }
24023 return;
24024
24025 case 'L':
24026 /* The low 16 bits of an immediate constant. */
24027 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
24028 return;
24029
24030 case 'i':
24031 fprintf (stream, "%s", arithmetic_instr (x, 1));
24032 return;
24033
24034 case 'I':
24035 fprintf (stream, "%s", arithmetic_instr (x, 0));
24036 return;
24037
24038 case 'S':
24039 {
24040 HOST_WIDE_INT val;
24041 const char *shift;
24042
24043 shift = shift_op (x, &val);
24044
24045 if (shift)
24046 {
24047 fprintf (stream, ", %s ", shift);
24048 if (val == -1)
24049 arm_print_operand (stream, XEXP (x, 1), 0);
24050 else
24051 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24052 }
24053 }
24054 return;
24055
24056 /* An explanation of the 'Q', 'R' and 'H' register operands:
24057
24058 In a pair of registers containing a DI or DF value the 'Q'
24059 operand returns the register number of the register containing
24060 the least significant part of the value. The 'R' operand returns
24061 the register number of the register containing the most
24062 significant part of the value.
24063
24064 The 'H' operand returns the higher of the two register numbers.
24065 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24066 same as the 'Q' operand, since the most significant part of the
24067 value is held in the lower number register. The reverse is true
24068 on systems where WORDS_BIG_ENDIAN is false.
24069
24070 The purpose of these operands is to distinguish between cases
24071 where the endian-ness of the values is important (for example
24072 when they are added together), and cases where the endian-ness
24073 is irrelevant, but the order of register operations is important.
24074 For example when loading a value from memory into a register
24075 pair, the endian-ness does not matter. Provided that the value
24076 from the lower memory address is put into the lower numbered
24077 register, and the value from the higher address is put into the
24078 higher numbered register, the load will work regardless of whether
24079 the value being loaded is big-wordian or little-wordian. The
24080 order of the two register loads can matter however, if the address
24081 of the memory location is actually held in one of the registers
24082 being overwritten by the load.
24083
24084 The 'Q' and 'R' constraints are also available for 64-bit
24085 constants. */
24086 case 'Q':
24087 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24088 {
24089 rtx part = gen_lowpart (SImode, x);
24090 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24091 return;
24092 }
24093
24094 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24095 {
24096 output_operand_lossage ("invalid operand for code '%c'", code);
24097 return;
24098 }
24099
24100 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
24101 return;
24102
24103 case 'R':
24104 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24105 {
24106 machine_mode mode = GET_MODE (x);
24107 rtx part;
24108
24109 if (mode == VOIDmode)
24110 mode = DImode;
24111 part = gen_highpart_mode (SImode, mode, x);
24112 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24113 return;
24114 }
24115
24116 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24117 {
24118 output_operand_lossage ("invalid operand for code '%c'", code);
24119 return;
24120 }
24121
24122 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
24123 return;
24124
24125 case 'H':
24126 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24127 {
24128 output_operand_lossage ("invalid operand for code '%c'", code);
24129 return;
24130 }
24131
24132 asm_fprintf (stream, "%r", REGNO (x) + 1);
24133 return;
24134
24135 case 'J':
24136 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24137 {
24138 output_operand_lossage ("invalid operand for code '%c'", code);
24139 return;
24140 }
24141
24142 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
24143 return;
24144
24145 case 'K':
24146 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24147 {
24148 output_operand_lossage ("invalid operand for code '%c'", code);
24149 return;
24150 }
24151
24152 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
24153 return;
24154
24155 case 'm':
24156 asm_fprintf (stream, "%r",
24157 REG_P (XEXP (x, 0))
24158 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
24159 return;
24160
24161 case 'M':
24162 asm_fprintf (stream, "{%r-%r}",
24163 REGNO (x),
24164 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
24165 return;
24166
24167 /* Like 'M', but writing doubleword vector registers, for use by Neon
24168 insns. */
24169 case 'h':
24170 {
24171 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
24172 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
24173 if (numregs == 1)
24174 asm_fprintf (stream, "{d%d}", regno);
24175 else
24176 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
24177 }
24178 return;
24179
24180 case 'd':
24181 /* CONST_TRUE_RTX means always -- that's the default. */
24182 if (x == const_true_rtx)
24183 return;
24184
24185 if (!COMPARISON_P (x))
24186 {
24187 output_operand_lossage ("invalid operand for code '%c'", code);
24188 return;
24189 }
24190
24191 fputs (arm_condition_codes[get_arm_condition_code (x)],
24192 stream);
24193 return;
24194
24195 case 'D':
24196 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
24197 want to do that. */
24198 if (x == const_true_rtx)
24199 {
24200 output_operand_lossage ("instruction never executed");
24201 return;
24202 }
24203 if (!COMPARISON_P (x))
24204 {
24205 output_operand_lossage ("invalid operand for code '%c'", code);
24206 return;
24207 }
24208
24209 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
24210 (get_arm_condition_code (x))],
24211 stream);
24212 return;
24213
24214 case 'V':
24215 {
24216 /* Output the LSB (shift) and width for a bitmask instruction
24217 based on a literal mask. The LSB is printed first,
24218 followed by the width.
24219
24220 Eg. For 0b1...1110001, the result is #1, #3. */
24221 if (!CONST_INT_P (x))
24222 {
24223 output_operand_lossage ("invalid operand for code '%c'", code);
24224 return;
24225 }
24226
24227 unsigned HOST_WIDE_INT val
24228 = ~UINTVAL (x) & HOST_WIDE_INT_UC (0xffffffff);
24229 int lsb = exact_log2 (val & -val);
24230 asm_fprintf (stream, "#%d, #%d", lsb,
24231 (exact_log2 (val + (val & -val)) - lsb));
24232 }
24233 return;
24234
24235 case 's':
24236 case 'W':
24237 case 'X':
24238 case 'Y':
24239 case 'Z':
24240 /* Former Maverick support, removed after GCC-4.7. */
24241 output_operand_lossage ("obsolete Maverick format code '%c'", code);
24242 return;
24243
24244 case 'U':
24245 if (!REG_P (x)
24246 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
24247 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
24248 /* Bad value for wCG register number. */
24249 {
24250 output_operand_lossage ("invalid operand for code '%c'", code);
24251 return;
24252 }
24253
24254 else
24255 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
24256 return;
24257
24258 /* Print an iWMMXt control register name. */
24259 case 'w':
24260 if (!CONST_INT_P (x)
24261 || INTVAL (x) < 0
24262 || INTVAL (x) >= 16)
24263 /* Bad value for wC register number. */
24264 {
24265 output_operand_lossage ("invalid operand for code '%c'", code);
24266 return;
24267 }
24268
24269 else
24270 {
24271 static const char * wc_reg_names [16] =
24272 {
24273 "wCID", "wCon", "wCSSF", "wCASF",
24274 "wC4", "wC5", "wC6", "wC7",
24275 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24276 "wC12", "wC13", "wC14", "wC15"
24277 };
24278
24279 fputs (wc_reg_names [INTVAL (x)], stream);
24280 }
24281 return;
24282
24283 /* Print the high single-precision register of a VFP double-precision
24284 register. */
24285 case 'p':
24286 {
24287 machine_mode mode = GET_MODE (x);
24288 int regno;
24289
24290 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24291 {
24292 output_operand_lossage ("invalid operand for code '%c'", code);
24293 return;
24294 }
24295
24296 regno = REGNO (x);
24297 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24298 {
24299 output_operand_lossage ("invalid operand for code '%c'", code);
24300 return;
24301 }
24302
24303 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24304 }
24305 return;
24306
24307 /* Print a VFP/Neon double precision or quad precision register name. */
24308 case 'P':
24309 case 'q':
24310 {
24311 machine_mode mode = GET_MODE (x);
24312 int is_quad = (code == 'q');
24313 int regno;
24314
24315 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24316 {
24317 output_operand_lossage ("invalid operand for code '%c'", code);
24318 return;
24319 }
24320
24321 if (!REG_P (x)
24322 || !IS_VFP_REGNUM (REGNO (x)))
24323 {
24324 output_operand_lossage ("invalid operand for code '%c'", code);
24325 return;
24326 }
24327
24328 regno = REGNO (x);
24329 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24330 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24331 {
24332 output_operand_lossage ("invalid operand for code '%c'", code);
24333 return;
24334 }
24335
24336 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24337 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24338 }
24339 return;
24340
24341 /* These two codes print the low/high doubleword register of a Neon quad
24342 register, respectively. For pair-structure types, can also print
24343 low/high quadword registers. */
24344 case 'e':
24345 case 'f':
24346 {
24347 machine_mode mode = GET_MODE (x);
24348 int regno;
24349
24350 if ((GET_MODE_SIZE (mode) != 16
24351 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24352 {
24353 output_operand_lossage ("invalid operand for code '%c'", code);
24354 return;
24355 }
24356
24357 regno = REGNO (x);
24358 if (!NEON_REGNO_OK_FOR_QUAD (regno))
24359 {
24360 output_operand_lossage ("invalid operand for code '%c'", code);
24361 return;
24362 }
24363
24364 if (GET_MODE_SIZE (mode) == 16)
24365 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24366 + (code == 'f' ? 1 : 0));
24367 else
24368 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24369 + (code == 'f' ? 1 : 0));
24370 }
24371 return;
24372
24373 /* Print a VFPv3 floating-point constant, represented as an integer
24374 index. */
24375 case 'G':
24376 {
24377 int index = vfp3_const_double_index (x);
24378 gcc_assert (index != -1);
24379 fprintf (stream, "%d", index);
24380 }
24381 return;
24382
24383 /* Print bits representing opcode features for Neon.
24384
24385 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24386 and polynomials as unsigned.
24387
24388 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24389
24390 Bit 2 is 1 for rounding functions, 0 otherwise. */
24391
24392 /* Identify the type as 's', 'u', 'p' or 'f'. */
24393 case 'T':
24394 {
24395 HOST_WIDE_INT bits = INTVAL (x);
24396 fputc ("uspf"[bits & 3], stream);
24397 }
24398 return;
24399
24400 /* Likewise, but signed and unsigned integers are both 'i'. */
24401 case 'F':
24402 {
24403 HOST_WIDE_INT bits = INTVAL (x);
24404 fputc ("iipf"[bits & 3], stream);
24405 }
24406 return;
24407
24408 /* As for 'T', but emit 'u' instead of 'p'. */
24409 case 't':
24410 {
24411 HOST_WIDE_INT bits = INTVAL (x);
24412 fputc ("usuf"[bits & 3], stream);
24413 }
24414 return;
24415
24416 /* Bit 2: rounding (vs none). */
24417 case 'O':
24418 {
24419 HOST_WIDE_INT bits = INTVAL (x);
24420 fputs ((bits & 4) != 0 ? "r" : "", stream);
24421 }
24422 return;
24423
24424 /* Memory operand for vld1/vst1 instruction. */
24425 case 'A':
24426 {
24427 rtx addr;
24428 bool postinc = FALSE;
24429 rtx postinc_reg = NULL;
24430 unsigned align, memsize, align_bits;
24431
24432 gcc_assert (MEM_P (x));
24433 addr = XEXP (x, 0);
24434 if (GET_CODE (addr) == POST_INC)
24435 {
24436 postinc = 1;
24437 addr = XEXP (addr, 0);
24438 }
24439 if (GET_CODE (addr) == POST_MODIFY)
24440 {
24441 postinc_reg = XEXP( XEXP (addr, 1), 1);
24442 addr = XEXP (addr, 0);
24443 }
24444 asm_fprintf (stream, "[%r", REGNO (addr));
24445
24446 /* We know the alignment of this access, so we can emit a hint in the
24447 instruction (for some alignments) as an aid to the memory subsystem
24448 of the target. */
24449 align = MEM_ALIGN (x) >> 3;
24450 memsize = MEM_SIZE (x);
24451
24452 /* Only certain alignment specifiers are supported by the hardware. */
24453 if (memsize == 32 && (align % 32) == 0)
24454 align_bits = 256;
24455 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24456 align_bits = 128;
24457 else if (memsize >= 8 && (align % 8) == 0)
24458 align_bits = 64;
24459 else
24460 align_bits = 0;
24461
24462 if (align_bits != 0)
24463 asm_fprintf (stream, ":%d", align_bits);
24464
24465 asm_fprintf (stream, "]");
24466
24467 if (postinc)
24468 fputs("!", stream);
24469 if (postinc_reg)
24470 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24471 }
24472 return;
24473
24474 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24475 rtx_code the memory operands output looks like following.
24476 1. [Rn], #+/-<imm>
24477 2. [Rn, #+/-<imm>]!
24478 3. [Rn, #+/-<imm>]
24479 4. [Rn]. */
24480 case 'E':
24481 {
24482 rtx addr;
24483 rtx postinc_reg = NULL;
24484 unsigned inc_val = 0;
24485 enum rtx_code code;
24486
24487 gcc_assert (MEM_P (x));
24488 addr = XEXP (x, 0);
24489 code = GET_CODE (addr);
24490 if (code == POST_INC || code == POST_DEC || code == PRE_INC
24491 || code == PRE_DEC)
24492 {
24493 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24494 inc_val = GET_MODE_SIZE (GET_MODE (x));
24495 if (code == POST_INC || code == POST_DEC)
24496 asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24497 ? "": "-", inc_val);
24498 else
24499 asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24500 ? "": "-", inc_val);
24501 }
24502 else if (code == POST_MODIFY || code == PRE_MODIFY)
24503 {
24504 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24505 postinc_reg = XEXP (XEXP (addr, 1), 1);
24506 if (postinc_reg && CONST_INT_P (postinc_reg))
24507 {
24508 if (code == POST_MODIFY)
24509 asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24510 else
24511 asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24512 }
24513 }
24514 else if (code == PLUS)
24515 {
24516 rtx base = XEXP (addr, 0);
24517 rtx index = XEXP (addr, 1);
24518
24519 gcc_assert (REG_P (base) && CONST_INT_P (index));
24520
24521 HOST_WIDE_INT offset = INTVAL (index);
24522 asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24523 }
24524 else
24525 {
24526 gcc_assert (REG_P (addr));
24527 asm_fprintf (stream, "[%r]",REGNO (addr));
24528 }
24529 }
24530 return;
24531
24532 case 'C':
24533 {
24534 rtx addr;
24535
24536 gcc_assert (MEM_P (x));
24537 addr = XEXP (x, 0);
24538 gcc_assert (REG_P (addr));
24539 asm_fprintf (stream, "[%r]", REGNO (addr));
24540 }
24541 return;
24542
24543 /* Translate an S register number into a D register number and element index. */
24544 case 'y':
24545 {
24546 machine_mode mode = GET_MODE (x);
24547 int regno;
24548
24549 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24550 {
24551 output_operand_lossage ("invalid operand for code '%c'", code);
24552 return;
24553 }
24554
24555 regno = REGNO (x);
24556 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24557 {
24558 output_operand_lossage ("invalid operand for code '%c'", code);
24559 return;
24560 }
24561
24562 regno = regno - FIRST_VFP_REGNUM;
24563 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24564 }
24565 return;
24566
24567 case 'v':
24568 gcc_assert (CONST_DOUBLE_P (x));
24569 int result;
24570 result = vfp3_const_double_for_fract_bits (x);
24571 if (result == 0)
24572 result = vfp3_const_double_for_bits (x);
24573 fprintf (stream, "#%d", result);
24574 return;
24575
24576 /* Register specifier for vld1.16/vst1.16. Translate the S register
24577 number into a D register number and element index. */
24578 case 'z':
24579 {
24580 machine_mode mode = GET_MODE (x);
24581 int regno;
24582
24583 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24584 {
24585 output_operand_lossage ("invalid operand for code '%c'", code);
24586 return;
24587 }
24588
24589 regno = REGNO (x);
24590 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24591 {
24592 output_operand_lossage ("invalid operand for code '%c'", code);
24593 return;
24594 }
24595
24596 regno = regno - FIRST_VFP_REGNUM;
24597 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24598 }
24599 return;
24600
24601 default:
24602 if (x == 0)
24603 {
24604 output_operand_lossage ("missing operand");
24605 return;
24606 }
24607
24608 switch (GET_CODE (x))
24609 {
24610 case REG:
24611 asm_fprintf (stream, "%r", REGNO (x));
24612 break;
24613
24614 case MEM:
24615 output_address (GET_MODE (x), XEXP (x, 0));
24616 break;
24617
24618 case CONST_DOUBLE:
24619 {
24620 char fpstr[20];
24621 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24622 sizeof (fpstr), 0, 1);
24623 fprintf (stream, "#%s", fpstr);
24624 }
24625 break;
24626
24627 default:
24628 gcc_assert (GET_CODE (x) != NEG);
24629 fputc ('#', stream);
24630 if (GET_CODE (x) == HIGH)
24631 {
24632 fputs (":lower16:", stream);
24633 x = XEXP (x, 0);
24634 }
24635
24636 output_addr_const (stream, x);
24637 break;
24638 }
24639 }
24640 }
24641 \f
24642 /* Target hook for printing a memory address. */
24643 static void
24644 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24645 {
24646 if (TARGET_32BIT)
24647 {
24648 int is_minus = GET_CODE (x) == MINUS;
24649
24650 if (REG_P (x))
24651 asm_fprintf (stream, "[%r]", REGNO (x));
24652 else if (GET_CODE (x) == PLUS || is_minus)
24653 {
24654 rtx base = XEXP (x, 0);
24655 rtx index = XEXP (x, 1);
24656 HOST_WIDE_INT offset = 0;
24657 if (!REG_P (base)
24658 || (REG_P (index) && REGNO (index) == SP_REGNUM))
24659 {
24660 /* Ensure that BASE is a register. */
24661 /* (one of them must be). */
24662 /* Also ensure the SP is not used as in index register. */
24663 std::swap (base, index);
24664 }
24665 switch (GET_CODE (index))
24666 {
24667 case CONST_INT:
24668 offset = INTVAL (index);
24669 if (is_minus)
24670 offset = -offset;
24671 asm_fprintf (stream, "[%r, #%wd]",
24672 REGNO (base), offset);
24673 break;
24674
24675 case REG:
24676 asm_fprintf (stream, "[%r, %s%r]",
24677 REGNO (base), is_minus ? "-" : "",
24678 REGNO (index));
24679 break;
24680
24681 case MULT:
24682 case ASHIFTRT:
24683 case LSHIFTRT:
24684 case ASHIFT:
24685 case ROTATERT:
24686 {
24687 asm_fprintf (stream, "[%r, %s%r",
24688 REGNO (base), is_minus ? "-" : "",
24689 REGNO (XEXP (index, 0)));
24690 arm_print_operand (stream, index, 'S');
24691 fputs ("]", stream);
24692 break;
24693 }
24694
24695 default:
24696 gcc_unreachable ();
24697 }
24698 }
24699 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24700 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24701 {
24702 gcc_assert (REG_P (XEXP (x, 0)));
24703
24704 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24705 asm_fprintf (stream, "[%r, #%s%d]!",
24706 REGNO (XEXP (x, 0)),
24707 GET_CODE (x) == PRE_DEC ? "-" : "",
24708 GET_MODE_SIZE (mode));
24709 else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24710 asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24711 else
24712 asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24713 GET_CODE (x) == POST_DEC ? "-" : "",
24714 GET_MODE_SIZE (mode));
24715 }
24716 else if (GET_CODE (x) == PRE_MODIFY)
24717 {
24718 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24719 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24720 asm_fprintf (stream, "#%wd]!",
24721 INTVAL (XEXP (XEXP (x, 1), 1)));
24722 else
24723 asm_fprintf (stream, "%r]!",
24724 REGNO (XEXP (XEXP (x, 1), 1)));
24725 }
24726 else if (GET_CODE (x) == POST_MODIFY)
24727 {
24728 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24729 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24730 asm_fprintf (stream, "#%wd",
24731 INTVAL (XEXP (XEXP (x, 1), 1)));
24732 else
24733 asm_fprintf (stream, "%r",
24734 REGNO (XEXP (XEXP (x, 1), 1)));
24735 }
24736 else output_addr_const (stream, x);
24737 }
24738 else
24739 {
24740 if (REG_P (x))
24741 asm_fprintf (stream, "[%r]", REGNO (x));
24742 else if (GET_CODE (x) == POST_INC)
24743 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24744 else if (GET_CODE (x) == PLUS)
24745 {
24746 gcc_assert (REG_P (XEXP (x, 0)));
24747 if (CONST_INT_P (XEXP (x, 1)))
24748 asm_fprintf (stream, "[%r, #%wd]",
24749 REGNO (XEXP (x, 0)),
24750 INTVAL (XEXP (x, 1)));
24751 else
24752 asm_fprintf (stream, "[%r, %r]",
24753 REGNO (XEXP (x, 0)),
24754 REGNO (XEXP (x, 1)));
24755 }
24756 else
24757 output_addr_const (stream, x);
24758 }
24759 }
24760 \f
24761 /* Target hook for indicating whether a punctuation character for
24762 TARGET_PRINT_OPERAND is valid. */
24763 static bool
24764 arm_print_operand_punct_valid_p (unsigned char code)
24765 {
24766 return (code == '@' || code == '|' || code == '.'
24767 || code == '(' || code == ')' || code == '#'
24768 || (TARGET_32BIT && (code == '?'))
24769 || (TARGET_THUMB2 && (code == '!'))
24770 || (TARGET_THUMB && (code == '_')));
24771 }
24772 \f
24773 /* Target hook for assembling integer objects. The ARM version needs to
24774 handle word-sized values specially. */
24775 static bool
24776 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24777 {
24778 machine_mode mode;
24779
24780 if (size == UNITS_PER_WORD && aligned_p)
24781 {
24782 fputs ("\t.word\t", asm_out_file);
24783 output_addr_const (asm_out_file, x);
24784
24785 /* Mark symbols as position independent. We only do this in the
24786 .text segment, not in the .data segment. */
24787 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24788 (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
24789 {
24790 /* See legitimize_pic_address for an explanation of the
24791 TARGET_VXWORKS_RTP check. */
24792 /* References to weak symbols cannot be resolved locally:
24793 they may be overridden by a non-weak definition at link
24794 time. */
24795 if (!arm_pic_data_is_text_relative
24796 || (SYMBOL_REF_P (x)
24797 && (!SYMBOL_REF_LOCAL_P (x)
24798 || (SYMBOL_REF_DECL (x)
24799 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24800 || (SYMBOL_REF_FUNCTION_P (x)
24801 && !arm_fdpic_local_funcdesc_p (x)))))
24802 {
24803 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24804 fputs ("(GOTFUNCDESC)", asm_out_file);
24805 else
24806 fputs ("(GOT)", asm_out_file);
24807 }
24808 else
24809 {
24810 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24811 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24812 else
24813 {
24814 bool is_readonly;
24815
24816 if (!TARGET_FDPIC
24817 || arm_is_segment_info_known (x, &is_readonly))
24818 fputs ("(GOTOFF)", asm_out_file);
24819 else
24820 fputs ("(GOT)", asm_out_file);
24821 }
24822 }
24823 }
24824
24825 /* For FDPIC we also have to mark symbol for .data section. */
24826 if (TARGET_FDPIC
24827 && !making_const_table
24828 && SYMBOL_REF_P (x)
24829 && SYMBOL_REF_FUNCTION_P (x))
24830 fputs ("(FUNCDESC)", asm_out_file);
24831
24832 fputc ('\n', asm_out_file);
24833 return true;
24834 }
24835
24836 mode = GET_MODE (x);
24837
24838 if (arm_vector_mode_supported_p (mode))
24839 {
24840 int i, units;
24841
24842 gcc_assert (GET_CODE (x) == CONST_VECTOR);
24843
24844 units = CONST_VECTOR_NUNITS (x);
24845 size = GET_MODE_UNIT_SIZE (mode);
24846
24847 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24848 for (i = 0; i < units; i++)
24849 {
24850 rtx elt = CONST_VECTOR_ELT (x, i);
24851 assemble_integer
24852 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
24853 }
24854 else
24855 for (i = 0; i < units; i++)
24856 {
24857 rtx elt = CONST_VECTOR_ELT (x, i);
24858 assemble_real
24859 (*CONST_DOUBLE_REAL_VALUE (elt),
24860 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
24861 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
24862 }
24863
24864 return true;
24865 }
24866
24867 return default_assemble_integer (x, size, aligned_p);
24868 }
24869
24870 static void
24871 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
24872 {
24873 section *s;
24874
24875 if (!TARGET_AAPCS_BASED)
24876 {
24877 (is_ctor ?
24878 default_named_section_asm_out_constructor
24879 : default_named_section_asm_out_destructor) (symbol, priority);
24880 return;
24881 }
24882
24883 /* Put these in the .init_array section, using a special relocation. */
24884 if (priority != DEFAULT_INIT_PRIORITY)
24885 {
24886 char buf[18];
24887 sprintf (buf, "%s.%.5u",
24888 is_ctor ? ".init_array" : ".fini_array",
24889 priority);
24890 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
24891 }
24892 else if (is_ctor)
24893 s = ctors_section;
24894 else
24895 s = dtors_section;
24896
24897 switch_to_section (s);
24898 assemble_align (POINTER_SIZE);
24899 fputs ("\t.word\t", asm_out_file);
24900 output_addr_const (asm_out_file, symbol);
24901 fputs ("(target1)\n", asm_out_file);
24902 }
24903
24904 /* Add a function to the list of static constructors. */
24905
24906 static void
24907 arm_elf_asm_constructor (rtx symbol, int priority)
24908 {
24909 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
24910 }
24911
24912 /* Add a function to the list of static destructors. */
24913
24914 static void
24915 arm_elf_asm_destructor (rtx symbol, int priority)
24916 {
24917 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
24918 }
24919 \f
24920 /* A finite state machine takes care of noticing whether or not instructions
24921 can be conditionally executed, and thus decrease execution time and code
24922 size by deleting branch instructions. The fsm is controlled by
24923 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
24924
24925 /* The state of the fsm controlling condition codes are:
24926 0: normal, do nothing special
24927 1: make ASM_OUTPUT_OPCODE not output this instruction
24928 2: make ASM_OUTPUT_OPCODE not output this instruction
24929 3: make instructions conditional
24930 4: make instructions conditional
24931
24932 State transitions (state->state by whom under condition):
24933 0 -> 1 final_prescan_insn if the `target' is a label
24934 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
24935 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
24936 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
24937 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
24938 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
24939 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
24940 (the target insn is arm_target_insn).
24941
24942 If the jump clobbers the conditions then we use states 2 and 4.
24943
24944 A similar thing can be done with conditional return insns.
24945
24946 XXX In case the `target' is an unconditional branch, this conditionalising
24947 of the instructions always reduces code size, but not always execution
24948 time. But then, I want to reduce the code size to somewhere near what
24949 /bin/cc produces. */
24950
24951 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
24952 instructions. When a COND_EXEC instruction is seen the subsequent
24953 instructions are scanned so that multiple conditional instructions can be
24954 combined into a single IT block. arm_condexec_count and arm_condexec_mask
24955 specify the length and true/false mask for the IT block. These will be
24956 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
24957
24958 /* Returns the index of the ARM condition code string in
24959 `arm_condition_codes', or ARM_NV if the comparison is invalid.
24960 COMPARISON should be an rtx like `(eq (...) (...))'. */
24961
24962 enum arm_cond_code
24963 maybe_get_arm_condition_code (rtx comparison)
24964 {
24965 machine_mode mode = GET_MODE (XEXP (comparison, 0));
24966 enum arm_cond_code code;
24967 enum rtx_code comp_code = GET_CODE (comparison);
24968
24969 if (GET_MODE_CLASS (mode) != MODE_CC)
24970 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
24971 XEXP (comparison, 1));
24972
24973 switch (mode)
24974 {
24975 case E_CC_DNEmode: code = ARM_NE; goto dominance;
24976 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
24977 case E_CC_DGEmode: code = ARM_GE; goto dominance;
24978 case E_CC_DGTmode: code = ARM_GT; goto dominance;
24979 case E_CC_DLEmode: code = ARM_LE; goto dominance;
24980 case E_CC_DLTmode: code = ARM_LT; goto dominance;
24981 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
24982 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
24983 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
24984 case E_CC_DLTUmode: code = ARM_CC;
24985
24986 dominance:
24987 if (comp_code == EQ)
24988 return ARM_INVERSE_CONDITION_CODE (code);
24989 if (comp_code == NE)
24990 return code;
24991 return ARM_NV;
24992
24993 case E_CC_NZmode:
24994 switch (comp_code)
24995 {
24996 case NE: return ARM_NE;
24997 case EQ: return ARM_EQ;
24998 case GE: return ARM_PL;
24999 case LT: return ARM_MI;
25000 default: return ARM_NV;
25001 }
25002
25003 case E_CC_Zmode:
25004 switch (comp_code)
25005 {
25006 case NE: return ARM_NE;
25007 case EQ: return ARM_EQ;
25008 default: return ARM_NV;
25009 }
25010
25011 case E_CC_Nmode:
25012 switch (comp_code)
25013 {
25014 case NE: return ARM_MI;
25015 case EQ: return ARM_PL;
25016 default: return ARM_NV;
25017 }
25018
25019 case E_CCFPEmode:
25020 case E_CCFPmode:
25021 /* We can handle all cases except UNEQ and LTGT. */
25022 switch (comp_code)
25023 {
25024 case GE: return ARM_GE;
25025 case GT: return ARM_GT;
25026 case LE: return ARM_LS;
25027 case LT: return ARM_MI;
25028 case NE: return ARM_NE;
25029 case EQ: return ARM_EQ;
25030 case ORDERED: return ARM_VC;
25031 case UNORDERED: return ARM_VS;
25032 case UNLT: return ARM_LT;
25033 case UNLE: return ARM_LE;
25034 case UNGT: return ARM_HI;
25035 case UNGE: return ARM_PL;
25036 /* UNEQ and LTGT do not have a representation. */
25037 case UNEQ: /* Fall through. */
25038 case LTGT: /* Fall through. */
25039 default: return ARM_NV;
25040 }
25041
25042 case E_CC_SWPmode:
25043 switch (comp_code)
25044 {
25045 case NE: return ARM_NE;
25046 case EQ: return ARM_EQ;
25047 case GE: return ARM_LE;
25048 case GT: return ARM_LT;
25049 case LE: return ARM_GE;
25050 case LT: return ARM_GT;
25051 case GEU: return ARM_LS;
25052 case GTU: return ARM_CC;
25053 case LEU: return ARM_CS;
25054 case LTU: return ARM_HI;
25055 default: return ARM_NV;
25056 }
25057
25058 case E_CC_Cmode:
25059 switch (comp_code)
25060 {
25061 case LTU: return ARM_CS;
25062 case GEU: return ARM_CC;
25063 default: return ARM_NV;
25064 }
25065
25066 case E_CC_NVmode:
25067 switch (comp_code)
25068 {
25069 case GE: return ARM_GE;
25070 case LT: return ARM_LT;
25071 default: return ARM_NV;
25072 }
25073
25074 case E_CC_Bmode:
25075 switch (comp_code)
25076 {
25077 case GEU: return ARM_CS;
25078 case LTU: return ARM_CC;
25079 default: return ARM_NV;
25080 }
25081
25082 case E_CC_Vmode:
25083 switch (comp_code)
25084 {
25085 case NE: return ARM_VS;
25086 case EQ: return ARM_VC;
25087 default: return ARM_NV;
25088 }
25089
25090 case E_CC_ADCmode:
25091 switch (comp_code)
25092 {
25093 case GEU: return ARM_CS;
25094 case LTU: return ARM_CC;
25095 default: return ARM_NV;
25096 }
25097
25098 case E_CCmode:
25099 case E_CC_RSBmode:
25100 switch (comp_code)
25101 {
25102 case NE: return ARM_NE;
25103 case EQ: return ARM_EQ;
25104 case GE: return ARM_GE;
25105 case GT: return ARM_GT;
25106 case LE: return ARM_LE;
25107 case LT: return ARM_LT;
25108 case GEU: return ARM_CS;
25109 case GTU: return ARM_HI;
25110 case LEU: return ARM_LS;
25111 case LTU: return ARM_CC;
25112 default: return ARM_NV;
25113 }
25114
25115 default: gcc_unreachable ();
25116 }
25117 }
25118
25119 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
25120 static enum arm_cond_code
25121 get_arm_condition_code (rtx comparison)
25122 {
25123 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
25124 gcc_assert (code != ARM_NV);
25125 return code;
25126 }
25127
25128 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
25129 code registers when not targetting Thumb1. The VFP condition register
25130 only exists when generating hard-float code. */
25131 static bool
25132 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
25133 {
25134 if (!TARGET_32BIT)
25135 return false;
25136
25137 *p1 = CC_REGNUM;
25138 *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
25139 return true;
25140 }
25141
25142 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25143 instructions. */
25144 void
25145 thumb2_final_prescan_insn (rtx_insn *insn)
25146 {
25147 rtx_insn *first_insn = insn;
25148 rtx body = PATTERN (insn);
25149 rtx predicate;
25150 enum arm_cond_code code;
25151 int n;
25152 int mask;
25153 int max;
25154
25155 /* max_insns_skipped in the tune was already taken into account in the
25156 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
25157 just emit the IT blocks as we can. It does not make sense to split
25158 the IT blocks. */
25159 max = MAX_INSN_PER_IT_BLOCK;
25160
25161 /* Remove the previous insn from the count of insns to be output. */
25162 if (arm_condexec_count)
25163 arm_condexec_count--;
25164
25165 /* Nothing to do if we are already inside a conditional block. */
25166 if (arm_condexec_count)
25167 return;
25168
25169 if (GET_CODE (body) != COND_EXEC)
25170 return;
25171
25172 /* Conditional jumps are implemented directly. */
25173 if (JUMP_P (insn))
25174 return;
25175
25176 predicate = COND_EXEC_TEST (body);
25177 arm_current_cc = get_arm_condition_code (predicate);
25178
25179 n = get_attr_ce_count (insn);
25180 arm_condexec_count = 1;
25181 arm_condexec_mask = (1 << n) - 1;
25182 arm_condexec_masklen = n;
25183 /* See if subsequent instructions can be combined into the same block. */
25184 for (;;)
25185 {
25186 insn = next_nonnote_insn (insn);
25187
25188 /* Jumping into the middle of an IT block is illegal, so a label or
25189 barrier terminates the block. */
25190 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
25191 break;
25192
25193 body = PATTERN (insn);
25194 /* USE and CLOBBER aren't really insns, so just skip them. */
25195 if (GET_CODE (body) == USE
25196 || GET_CODE (body) == CLOBBER)
25197 continue;
25198
25199 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
25200 if (GET_CODE (body) != COND_EXEC)
25201 break;
25202 /* Maximum number of conditionally executed instructions in a block. */
25203 n = get_attr_ce_count (insn);
25204 if (arm_condexec_masklen + n > max)
25205 break;
25206
25207 predicate = COND_EXEC_TEST (body);
25208 code = get_arm_condition_code (predicate);
25209 mask = (1 << n) - 1;
25210 if (arm_current_cc == code)
25211 arm_condexec_mask |= (mask << arm_condexec_masklen);
25212 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
25213 break;
25214
25215 arm_condexec_count++;
25216 arm_condexec_masklen += n;
25217
25218 /* A jump must be the last instruction in a conditional block. */
25219 if (JUMP_P (insn))
25220 break;
25221 }
25222 /* Restore recog_data (getting the attributes of other insns can
25223 destroy this array, but final.cc assumes that it remains intact
25224 across this call). */
25225 extract_constrain_insn_cached (first_insn);
25226 }
25227
25228 void
25229 arm_final_prescan_insn (rtx_insn *insn)
25230 {
25231 /* BODY will hold the body of INSN. */
25232 rtx body = PATTERN (insn);
25233
25234 /* This will be 1 if trying to repeat the trick, and things need to be
25235 reversed if it appears to fail. */
25236 int reverse = 0;
25237
25238 /* If we start with a return insn, we only succeed if we find another one. */
25239 int seeking_return = 0;
25240 enum rtx_code return_code = UNKNOWN;
25241
25242 /* START_INSN will hold the insn from where we start looking. This is the
25243 first insn after the following code_label if REVERSE is true. */
25244 rtx_insn *start_insn = insn;
25245
25246 /* If in state 4, check if the target branch is reached, in order to
25247 change back to state 0. */
25248 if (arm_ccfsm_state == 4)
25249 {
25250 if (insn == arm_target_insn)
25251 {
25252 arm_target_insn = NULL;
25253 arm_ccfsm_state = 0;
25254 }
25255 return;
25256 }
25257
25258 /* If in state 3, it is possible to repeat the trick, if this insn is an
25259 unconditional branch to a label, and immediately following this branch
25260 is the previous target label which is only used once, and the label this
25261 branch jumps to is not too far off. */
25262 if (arm_ccfsm_state == 3)
25263 {
25264 if (simplejump_p (insn))
25265 {
25266 start_insn = next_nonnote_insn (start_insn);
25267 if (BARRIER_P (start_insn))
25268 {
25269 /* XXX Isn't this always a barrier? */
25270 start_insn = next_nonnote_insn (start_insn);
25271 }
25272 if (LABEL_P (start_insn)
25273 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25274 && LABEL_NUSES (start_insn) == 1)
25275 reverse = TRUE;
25276 else
25277 return;
25278 }
25279 else if (ANY_RETURN_P (body))
25280 {
25281 start_insn = next_nonnote_insn (start_insn);
25282 if (BARRIER_P (start_insn))
25283 start_insn = next_nonnote_insn (start_insn);
25284 if (LABEL_P (start_insn)
25285 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25286 && LABEL_NUSES (start_insn) == 1)
25287 {
25288 reverse = TRUE;
25289 seeking_return = 1;
25290 return_code = GET_CODE (body);
25291 }
25292 else
25293 return;
25294 }
25295 else
25296 return;
25297 }
25298
25299 gcc_assert (!arm_ccfsm_state || reverse);
25300 if (!JUMP_P (insn))
25301 return;
25302
25303 /* This jump might be paralleled with a clobber of the condition codes
25304 the jump should always come first */
25305 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25306 body = XVECEXP (body, 0, 0);
25307
25308 if (reverse
25309 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25310 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25311 {
25312 int insns_skipped;
25313 int fail = FALSE, succeed = FALSE;
25314 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25315 int then_not_else = TRUE;
25316 rtx_insn *this_insn = start_insn;
25317 rtx label = 0;
25318
25319 /* Register the insn jumped to. */
25320 if (reverse)
25321 {
25322 if (!seeking_return)
25323 label = XEXP (SET_SRC (body), 0);
25324 }
25325 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25326 label = XEXP (XEXP (SET_SRC (body), 1), 0);
25327 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25328 {
25329 label = XEXP (XEXP (SET_SRC (body), 2), 0);
25330 then_not_else = FALSE;
25331 }
25332 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25333 {
25334 seeking_return = 1;
25335 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25336 }
25337 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25338 {
25339 seeking_return = 1;
25340 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25341 then_not_else = FALSE;
25342 }
25343 else
25344 gcc_unreachable ();
25345
25346 /* See how many insns this branch skips, and what kind of insns. If all
25347 insns are okay, and the label or unconditional branch to the same
25348 label is not too far away, succeed. */
25349 for (insns_skipped = 0;
25350 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25351 {
25352 rtx scanbody;
25353
25354 this_insn = next_nonnote_insn (this_insn);
25355 if (!this_insn)
25356 break;
25357
25358 switch (GET_CODE (this_insn))
25359 {
25360 case CODE_LABEL:
25361 /* Succeed if it is the target label, otherwise fail since
25362 control falls in from somewhere else. */
25363 if (this_insn == label)
25364 {
25365 arm_ccfsm_state = 1;
25366 succeed = TRUE;
25367 }
25368 else
25369 fail = TRUE;
25370 break;
25371
25372 case BARRIER:
25373 /* Succeed if the following insn is the target label.
25374 Otherwise fail.
25375 If return insns are used then the last insn in a function
25376 will be a barrier. */
25377 this_insn = next_nonnote_insn (this_insn);
25378 if (this_insn && this_insn == label)
25379 {
25380 arm_ccfsm_state = 1;
25381 succeed = TRUE;
25382 }
25383 else
25384 fail = TRUE;
25385 break;
25386
25387 case CALL_INSN:
25388 /* The AAPCS says that conditional calls should not be
25389 used since they make interworking inefficient (the
25390 linker can't transform BL<cond> into BLX). That's
25391 only a problem if the machine has BLX. */
25392 if (arm_arch5t)
25393 {
25394 fail = TRUE;
25395 break;
25396 }
25397
25398 /* Succeed if the following insn is the target label, or
25399 if the following two insns are a barrier and the
25400 target label. */
25401 this_insn = next_nonnote_insn (this_insn);
25402 if (this_insn && BARRIER_P (this_insn))
25403 this_insn = next_nonnote_insn (this_insn);
25404
25405 if (this_insn && this_insn == label
25406 && insns_skipped < max_insns_skipped)
25407 {
25408 arm_ccfsm_state = 1;
25409 succeed = TRUE;
25410 }
25411 else
25412 fail = TRUE;
25413 break;
25414
25415 case JUMP_INSN:
25416 /* If this is an unconditional branch to the same label, succeed.
25417 If it is to another label, do nothing. If it is conditional,
25418 fail. */
25419 /* XXX Probably, the tests for SET and the PC are
25420 unnecessary. */
25421
25422 scanbody = PATTERN (this_insn);
25423 if (GET_CODE (scanbody) == SET
25424 && GET_CODE (SET_DEST (scanbody)) == PC)
25425 {
25426 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25427 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25428 {
25429 arm_ccfsm_state = 2;
25430 succeed = TRUE;
25431 }
25432 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25433 fail = TRUE;
25434 }
25435 /* Fail if a conditional return is undesirable (e.g. on a
25436 StrongARM), but still allow this if optimizing for size. */
25437 else if (GET_CODE (scanbody) == return_code
25438 && !use_return_insn (TRUE, NULL)
25439 && !optimize_size)
25440 fail = TRUE;
25441 else if (GET_CODE (scanbody) == return_code)
25442 {
25443 arm_ccfsm_state = 2;
25444 succeed = TRUE;
25445 }
25446 else if (GET_CODE (scanbody) == PARALLEL)
25447 {
25448 switch (get_attr_conds (this_insn))
25449 {
25450 case CONDS_NOCOND:
25451 break;
25452 default:
25453 fail = TRUE;
25454 break;
25455 }
25456 }
25457 else
25458 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
25459
25460 break;
25461
25462 case INSN:
25463 /* Instructions using or affecting the condition codes make it
25464 fail. */
25465 scanbody = PATTERN (this_insn);
25466 if (!(GET_CODE (scanbody) == SET
25467 || GET_CODE (scanbody) == PARALLEL)
25468 || get_attr_conds (this_insn) != CONDS_NOCOND)
25469 fail = TRUE;
25470 break;
25471
25472 default:
25473 break;
25474 }
25475 }
25476 if (succeed)
25477 {
25478 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25479 arm_target_label = CODE_LABEL_NUMBER (label);
25480 else
25481 {
25482 gcc_assert (seeking_return || arm_ccfsm_state == 2);
25483
25484 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25485 {
25486 this_insn = next_nonnote_insn (this_insn);
25487 gcc_assert (!this_insn
25488 || (!BARRIER_P (this_insn)
25489 && !LABEL_P (this_insn)));
25490 }
25491 if (!this_insn)
25492 {
25493 /* Oh, dear! we ran off the end.. give up. */
25494 extract_constrain_insn_cached (insn);
25495 arm_ccfsm_state = 0;
25496 arm_target_insn = NULL;
25497 return;
25498 }
25499 arm_target_insn = this_insn;
25500 }
25501
25502 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25503 what it was. */
25504 if (!reverse)
25505 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25506
25507 if (reverse || then_not_else)
25508 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25509 }
25510
25511 /* Restore recog_data (getting the attributes of other insns can
25512 destroy this array, but final.cc assumes that it remains intact
25513 across this call. */
25514 extract_constrain_insn_cached (insn);
25515 }
25516 }
25517
25518 /* Output IT instructions. */
25519 void
25520 thumb2_asm_output_opcode (FILE * stream)
25521 {
25522 char buff[5];
25523 int n;
25524
25525 if (arm_condexec_mask)
25526 {
25527 for (n = 0; n < arm_condexec_masklen; n++)
25528 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25529 buff[n] = 0;
25530 asm_fprintf(stream, "i%s\t%s\n\t", buff,
25531 arm_condition_codes[arm_current_cc]);
25532 arm_condexec_mask = 0;
25533 }
25534 }
25535
25536 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25537 UNITS_PER_WORD bytes wide. */
25538 static unsigned int
25539 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25540 {
25541 if (IS_VPR_REGNUM (regno))
25542 return CEIL (GET_MODE_SIZE (mode), 2);
25543
25544 if (TARGET_32BIT
25545 && regno > PC_REGNUM
25546 && regno != FRAME_POINTER_REGNUM
25547 && regno != ARG_POINTER_REGNUM
25548 && !IS_VFP_REGNUM (regno))
25549 return 1;
25550
25551 return ARM_NUM_REGS (mode);
25552 }
25553
25554 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25555 static bool
25556 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25557 {
25558 if (GET_MODE_CLASS (mode) == MODE_CC)
25559 return (regno == CC_REGNUM
25560 || (TARGET_VFP_BASE
25561 && regno == VFPCC_REGNUM));
25562
25563 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25564 return false;
25565
25566 if (IS_VPR_REGNUM (regno))
25567 return mode == HImode
25568 || mode == V16BImode
25569 || mode == V8BImode
25570 || mode == V4BImode;
25571
25572 if (TARGET_THUMB1)
25573 /* For the Thumb we only allow values bigger than SImode in
25574 registers 0 - 6, so that there is always a second low
25575 register available to hold the upper part of the value.
25576 We probably we ought to ensure that the register is the
25577 start of an even numbered register pair. */
25578 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25579
25580 if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25581 {
25582 if (mode == DFmode || mode == DImode)
25583 return VFP_REGNO_OK_FOR_DOUBLE (regno);
25584
25585 if (mode == HFmode || mode == BFmode || mode == HImode
25586 || mode == SFmode || mode == SImode)
25587 return VFP_REGNO_OK_FOR_SINGLE (regno);
25588
25589 if (TARGET_NEON)
25590 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25591 || (VALID_NEON_QREG_MODE (mode)
25592 && NEON_REGNO_OK_FOR_QUAD (regno))
25593 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25594 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25595 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25596 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25597 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25598 if (TARGET_HAVE_MVE)
25599 return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25600 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25601 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25602
25603 return false;
25604 }
25605
25606 if (TARGET_REALLY_IWMMXT)
25607 {
25608 if (IS_IWMMXT_GR_REGNUM (regno))
25609 return mode == SImode;
25610
25611 if (IS_IWMMXT_REGNUM (regno))
25612 return VALID_IWMMXT_REG_MODE (mode);
25613 }
25614
25615 /* We allow almost any value to be stored in the general registers.
25616 Restrict doubleword quantities to even register pairs in ARM state
25617 so that we can use ldrd. The same restriction applies for MVE
25618 in order to support Armv8.1-M Mainline instructions.
25619 Do not allow very large Neon structure opaque modes in general
25620 registers; they would use too many. */
25621 if (regno <= LAST_ARM_REGNUM)
25622 {
25623 if (ARM_NUM_REGS (mode) > 4)
25624 return false;
25625
25626 if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25627 return true;
25628
25629 return !((TARGET_LDRD || TARGET_CDE)
25630 && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25631 }
25632
25633 if (regno == FRAME_POINTER_REGNUM
25634 || regno == ARG_POINTER_REGNUM)
25635 /* We only allow integers in the fake hard registers. */
25636 return GET_MODE_CLASS (mode) == MODE_INT;
25637
25638 return false;
25639 }
25640
25641 /* Implement TARGET_MODES_TIEABLE_P. */
25642
25643 static bool
25644 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25645 {
25646 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25647 return true;
25648
25649 /* We specifically want to allow elements of "structure" modes to
25650 be tieable to the structure. This more general condition allows
25651 other rarer situations too. */
25652 if ((TARGET_NEON
25653 && (VALID_NEON_DREG_MODE (mode1)
25654 || VALID_NEON_QREG_MODE (mode1)
25655 || VALID_NEON_STRUCT_MODE (mode1))
25656 && (VALID_NEON_DREG_MODE (mode2)
25657 || VALID_NEON_QREG_MODE (mode2)
25658 || VALID_NEON_STRUCT_MODE (mode2)))
25659 || (TARGET_HAVE_MVE
25660 && (VALID_MVE_MODE (mode1)
25661 || VALID_MVE_STRUCT_MODE (mode1))
25662 && (VALID_MVE_MODE (mode2)
25663 || VALID_MVE_STRUCT_MODE (mode2))))
25664 return true;
25665
25666 return false;
25667 }
25668
25669 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25670 not used in arm mode. */
25671
25672 enum reg_class
25673 arm_regno_class (int regno)
25674 {
25675 if (regno == PC_REGNUM)
25676 return NO_REGS;
25677
25678 if (IS_VPR_REGNUM (regno))
25679 return VPR_REG;
25680
25681 if (TARGET_THUMB1)
25682 {
25683 if (regno == STACK_POINTER_REGNUM)
25684 return STACK_REG;
25685 if (regno == CC_REGNUM)
25686 return CC_REG;
25687 if (regno < 8)
25688 return LO_REGS;
25689 return HI_REGS;
25690 }
25691
25692 if (TARGET_THUMB2 && regno < 8)
25693 return LO_REGS;
25694
25695 if ( regno <= LAST_ARM_REGNUM
25696 || regno == FRAME_POINTER_REGNUM
25697 || regno == ARG_POINTER_REGNUM)
25698 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25699
25700 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25701 return TARGET_THUMB2 ? CC_REG : NO_REGS;
25702
25703 if (IS_VFP_REGNUM (regno))
25704 {
25705 if (regno <= D7_VFP_REGNUM)
25706 return VFP_D0_D7_REGS;
25707 else if (regno <= LAST_LO_VFP_REGNUM)
25708 return VFP_LO_REGS;
25709 else
25710 return VFP_HI_REGS;
25711 }
25712
25713 if (IS_IWMMXT_REGNUM (regno))
25714 return IWMMXT_REGS;
25715
25716 if (IS_IWMMXT_GR_REGNUM (regno))
25717 return IWMMXT_GR_REGS;
25718
25719 return NO_REGS;
25720 }
25721
25722 /* Handle a special case when computing the offset
25723 of an argument from the frame pointer. */
25724 int
25725 arm_debugger_arg_offset (int value, rtx addr)
25726 {
25727 rtx_insn *insn;
25728
25729 /* We are only interested if dbxout_parms() failed to compute the offset. */
25730 if (value != 0)
25731 return 0;
25732
25733 /* We can only cope with the case where the address is held in a register. */
25734 if (!REG_P (addr))
25735 return 0;
25736
25737 /* If we are using the frame pointer to point at the argument, then
25738 an offset of 0 is correct. */
25739 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25740 return 0;
25741
25742 /* If we are using the stack pointer to point at the
25743 argument, then an offset of 0 is correct. */
25744 /* ??? Check this is consistent with thumb2 frame layout. */
25745 if ((TARGET_THUMB || !frame_pointer_needed)
25746 && REGNO (addr) == SP_REGNUM)
25747 return 0;
25748
25749 /* Oh dear. The argument is pointed to by a register rather
25750 than being held in a register, or being stored at a known
25751 offset from the frame pointer. Since GDB only understands
25752 those two kinds of argument we must translate the address
25753 held in the register into an offset from the frame pointer.
25754 We do this by searching through the insns for the function
25755 looking to see where this register gets its value. If the
25756 register is initialized from the frame pointer plus an offset
25757 then we are in luck and we can continue, otherwise we give up.
25758
25759 This code is exercised by producing debugging information
25760 for a function with arguments like this:
25761
25762 double func (double a, double b, int c, double d) {return d;}
25763
25764 Without this code the stab for parameter 'd' will be set to
25765 an offset of 0 from the frame pointer, rather than 8. */
25766
25767 /* The if() statement says:
25768
25769 If the insn is a normal instruction
25770 and if the insn is setting the value in a register
25771 and if the register being set is the register holding the address of the argument
25772 and if the address is computing by an addition
25773 that involves adding to a register
25774 which is the frame pointer
25775 a constant integer
25776
25777 then... */
25778
25779 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25780 {
25781 if ( NONJUMP_INSN_P (insn)
25782 && GET_CODE (PATTERN (insn)) == SET
25783 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25784 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25785 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25786 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25787 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25788 )
25789 {
25790 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25791
25792 break;
25793 }
25794 }
25795
25796 if (value == 0)
25797 {
25798 debug_rtx (addr);
25799 warning (0, "unable to compute real location of stacked parameter");
25800 value = 8; /* XXX magic hack */
25801 }
25802
25803 return value;
25804 }
25805 \f
25806 /* Implement TARGET_PROMOTED_TYPE. */
25807
25808 static tree
25809 arm_promoted_type (const_tree t)
25810 {
25811 if (SCALAR_FLOAT_TYPE_P (t)
25812 && TYPE_PRECISION (t) == 16
25813 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25814 return float_type_node;
25815 return NULL_TREE;
25816 }
25817
25818 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25819 This simply adds HFmode as a supported mode; even though we don't
25820 implement arithmetic on this type directly, it's supported by
25821 optabs conversions, much the way the double-word arithmetic is
25822 special-cased in the default hook. */
25823
25824 static bool
25825 arm_scalar_mode_supported_p (scalar_mode mode)
25826 {
25827 if (mode == HFmode)
25828 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25829 else if (ALL_FIXED_POINT_MODE_P (mode))
25830 return true;
25831 else
25832 return default_scalar_mode_supported_p (mode);
25833 }
25834
25835 /* Set the value of FLT_EVAL_METHOD.
25836 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25837
25838 0: evaluate all operations and constants, whose semantic type has at
25839 most the range and precision of type float, to the range and
25840 precision of float; evaluate all other operations and constants to
25841 the range and precision of the semantic type;
25842
25843 N, where _FloatN is a supported interchange floating type
25844 evaluate all operations and constants, whose semantic type has at
25845 most the range and precision of _FloatN type, to the range and
25846 precision of the _FloatN type; evaluate all other operations and
25847 constants to the range and precision of the semantic type;
25848
25849 If we have the ARMv8.2-A extensions then we support _Float16 in native
25850 precision, so we should set this to 16. Otherwise, we support the type,
25851 but want to evaluate expressions in float precision, so set this to
25852 0. */
25853
25854 static enum flt_eval_method
25855 arm_excess_precision (enum excess_precision_type type)
25856 {
25857 switch (type)
25858 {
25859 case EXCESS_PRECISION_TYPE_FAST:
25860 case EXCESS_PRECISION_TYPE_STANDARD:
25861 /* We can calculate either in 16-bit range and precision or
25862 32-bit range and precision. Make that decision based on whether
25863 we have native support for the ARMv8.2-A 16-bit floating-point
25864 instructions or not. */
25865 return (TARGET_VFP_FP16INST
25866 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25867 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
25868 case EXCESS_PRECISION_TYPE_IMPLICIT:
25869 case EXCESS_PRECISION_TYPE_FLOAT16:
25870 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25871 default:
25872 gcc_unreachable ();
25873 }
25874 return FLT_EVAL_METHOD_UNPREDICTABLE;
25875 }
25876
25877
25878 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
25879 _Float16 if we are using anything other than ieee format for 16-bit
25880 floating point. Otherwise, punt to the default implementation. */
25881 static opt_scalar_float_mode
25882 arm_floatn_mode (int n, bool extended)
25883 {
25884 if (!extended && n == 16)
25885 {
25886 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
25887 return HFmode;
25888 return opt_scalar_float_mode ();
25889 }
25890
25891 return default_floatn_mode (n, extended);
25892 }
25893
25894
25895 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25896 not to early-clobber SRC registers in the process.
25897
25898 We assume that the operands described by SRC and DEST represent a
25899 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25900 number of components into which the copy has been decomposed. */
25901 void
25902 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25903 {
25904 unsigned int i;
25905
25906 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25907 || REGNO (operands[0]) < REGNO (operands[1]))
25908 {
25909 for (i = 0; i < count; i++)
25910 {
25911 operands[2 * i] = dest[i];
25912 operands[2 * i + 1] = src[i];
25913 }
25914 }
25915 else
25916 {
25917 for (i = 0; i < count; i++)
25918 {
25919 operands[2 * i] = dest[count - i - 1];
25920 operands[2 * i + 1] = src[count - i - 1];
25921 }
25922 }
25923 }
25924
25925 /* Split operands into moves from op[1] + op[2] into op[0]. */
25926
25927 void
25928 neon_split_vcombine (rtx operands[3])
25929 {
25930 unsigned int dest = REGNO (operands[0]);
25931 unsigned int src1 = REGNO (operands[1]);
25932 unsigned int src2 = REGNO (operands[2]);
25933 machine_mode halfmode = GET_MODE (operands[1]);
25934 unsigned int halfregs = REG_NREGS (operands[1]);
25935 rtx destlo, desthi;
25936
25937 if (src1 == dest && src2 == dest + halfregs)
25938 {
25939 /* No-op move. Can't split to nothing; emit something. */
25940 emit_note (NOTE_INSN_DELETED);
25941 return;
25942 }
25943
25944 /* Preserve register attributes for variable tracking. */
25945 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25946 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25947 GET_MODE_SIZE (halfmode));
25948
25949 /* Special case of reversed high/low parts. Use VSWP. */
25950 if (src2 == dest && src1 == dest + halfregs)
25951 {
25952 rtx x = gen_rtx_SET (destlo, operands[1]);
25953 rtx y = gen_rtx_SET (desthi, operands[2]);
25954 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25955 return;
25956 }
25957
25958 if (!reg_overlap_mentioned_p (operands[2], destlo))
25959 {
25960 /* Try to avoid unnecessary moves if part of the result
25961 is in the right place already. */
25962 if (src1 != dest)
25963 emit_move_insn (destlo, operands[1]);
25964 if (src2 != dest + halfregs)
25965 emit_move_insn (desthi, operands[2]);
25966 }
25967 else
25968 {
25969 if (src2 != dest + halfregs)
25970 emit_move_insn (desthi, operands[2]);
25971 if (src1 != dest)
25972 emit_move_insn (destlo, operands[1]);
25973 }
25974 }
25975 \f
25976 /* Return the number (counting from 0) of
25977 the least significant set bit in MASK. */
25978
25979 inline static int
25980 number_of_first_bit_set (unsigned mask)
25981 {
25982 return ctz_hwi (mask);
25983 }
25984
25985 /* Like emit_multi_reg_push, but allowing for a different set of
25986 registers to be described as saved. MASK is the set of registers
25987 to be saved; REAL_REGS is the set of registers to be described as
25988 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25989
25990 static rtx_insn *
25991 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25992 {
25993 unsigned long regno;
25994 rtx par[10], tmp, reg;
25995 rtx_insn *insn;
25996 int i, j;
25997
25998 /* Build the parallel of the registers actually being stored. */
25999 for (i = 0; mask; ++i, mask &= mask - 1)
26000 {
26001 regno = ctz_hwi (mask);
26002 reg = gen_rtx_REG (SImode, regno);
26003
26004 if (i == 0)
26005 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26006 else
26007 tmp = gen_rtx_USE (VOIDmode, reg);
26008
26009 par[i] = tmp;
26010 }
26011
26012 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26013 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26014 tmp = gen_frame_mem (BLKmode, tmp);
26015 tmp = gen_rtx_SET (tmp, par[0]);
26016 par[0] = tmp;
26017
26018 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26019 insn = emit_insn (tmp);
26020
26021 /* Always build the stack adjustment note for unwind info. */
26022 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26023 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
26024 par[0] = tmp;
26025
26026 /* Build the parallel of the registers recorded as saved for unwind. */
26027 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26028 {
26029 regno = ctz_hwi (real_regs);
26030 reg = gen_rtx_REG (SImode, regno);
26031
26032 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26033 tmp = gen_frame_mem (SImode, tmp);
26034 tmp = gen_rtx_SET (tmp, reg);
26035 RTX_FRAME_RELATED_P (tmp) = 1;
26036 par[j + 1] = tmp;
26037 }
26038
26039 if (j == 0)
26040 tmp = par[0];
26041 else
26042 {
26043 RTX_FRAME_RELATED_P (par[0]) = 1;
26044 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26045 }
26046
26047 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26048
26049 return insn;
26050 }
26051
26052 /* Emit code to push or pop registers to or from the stack. F is the
26053 assembly file. MASK is the registers to pop. */
26054 static void
26055 thumb_pop (FILE *f, unsigned long mask)
26056 {
26057 int regno;
26058 int lo_mask = mask & 0xFF;
26059
26060 gcc_assert (mask);
26061
26062 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26063 {
26064 /* Special case. Do not generate a POP PC statement here, do it in
26065 thumb_exit() */
26066 thumb_exit (f, -1);
26067 return;
26068 }
26069
26070 fprintf (f, "\tpop\t{");
26071
26072 /* Look at the low registers first. */
26073 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26074 {
26075 if (lo_mask & 1)
26076 {
26077 asm_fprintf (f, "%r", regno);
26078
26079 if ((lo_mask & ~1) != 0)
26080 fprintf (f, ", ");
26081 }
26082 }
26083
26084 if (mask & (1 << PC_REGNUM))
26085 {
26086 /* Catch popping the PC. */
26087 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
26088 || IS_CMSE_ENTRY (arm_current_func_type ()))
26089 {
26090 /* The PC is never poped directly, instead
26091 it is popped into r3 and then BX is used. */
26092 fprintf (f, "}\n");
26093
26094 thumb_exit (f, -1);
26095
26096 return;
26097 }
26098 else
26099 {
26100 if (mask & 0xFF)
26101 fprintf (f, ", ");
26102
26103 asm_fprintf (f, "%r", PC_REGNUM);
26104 }
26105 }
26106
26107 fprintf (f, "}\n");
26108 }
26109
26110 /* Generate code to return from a thumb function.
26111 If 'reg_containing_return_addr' is -1, then the return address is
26112 actually on the stack, at the stack pointer.
26113
26114 Note: do not forget to update length attribute of corresponding insn pattern
26115 when changing assembly output (eg. length attribute of epilogue_insns when
26116 updating Armv8-M Baseline Security Extensions register clearing
26117 sequences). */
26118 static void
26119 thumb_exit (FILE *f, int reg_containing_return_addr)
26120 {
26121 unsigned regs_available_for_popping;
26122 unsigned regs_to_pop;
26123 int pops_needed;
26124 unsigned available;
26125 unsigned required;
26126 machine_mode mode;
26127 int size;
26128 int restore_a4 = FALSE;
26129
26130 /* Compute the registers we need to pop. */
26131 regs_to_pop = 0;
26132 pops_needed = 0;
26133
26134 if (reg_containing_return_addr == -1)
26135 {
26136 regs_to_pop |= 1 << LR_REGNUM;
26137 ++pops_needed;
26138 }
26139
26140 if (TARGET_BACKTRACE)
26141 {
26142 /* Restore the (ARM) frame pointer and stack pointer. */
26143 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26144 pops_needed += 2;
26145 }
26146
26147 /* If there is nothing to pop then just emit the BX instruction and
26148 return. */
26149 if (pops_needed == 0)
26150 {
26151 if (crtl->calls_eh_return)
26152 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26153
26154 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26155 {
26156 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26157 emitted by cmse_nonsecure_entry_clear_before_return (). */
26158 if (!TARGET_HAVE_FPCXT_CMSE)
26159 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
26160 reg_containing_return_addr);
26161 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26162 }
26163 else
26164 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26165 return;
26166 }
26167 /* Otherwise if we are not supporting interworking and we have not created
26168 a backtrace structure and the function was not entered in ARM mode then
26169 just pop the return address straight into the PC. */
26170 else if (!TARGET_INTERWORK
26171 && !TARGET_BACKTRACE
26172 && !is_called_in_ARM_mode (current_function_decl)
26173 && !crtl->calls_eh_return
26174 && !IS_CMSE_ENTRY (arm_current_func_type ()))
26175 {
26176 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26177 return;
26178 }
26179
26180 /* Find out how many of the (return) argument registers we can corrupt. */
26181 regs_available_for_popping = 0;
26182
26183 /* If returning via __builtin_eh_return, the bottom three registers
26184 all contain information needed for the return. */
26185 if (crtl->calls_eh_return)
26186 size = 12;
26187 else
26188 {
26189 /* If we can deduce the registers used from the function's
26190 return value. This is more reliable that examining
26191 df_regs_ever_live_p () because that will be set if the register is
26192 ever used in the function, not just if the register is used
26193 to hold a return value. */
26194
26195 if (crtl->return_rtx != 0)
26196 mode = GET_MODE (crtl->return_rtx);
26197 else
26198 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26199
26200 size = GET_MODE_SIZE (mode);
26201
26202 if (size == 0)
26203 {
26204 /* In a void function we can use any argument register.
26205 In a function that returns a structure on the stack
26206 we can use the second and third argument registers. */
26207 if (mode == VOIDmode)
26208 regs_available_for_popping =
26209 (1 << ARG_REGISTER (1))
26210 | (1 << ARG_REGISTER (2))
26211 | (1 << ARG_REGISTER (3));
26212 else
26213 regs_available_for_popping =
26214 (1 << ARG_REGISTER (2))
26215 | (1 << ARG_REGISTER (3));
26216 }
26217 else if (size <= 4)
26218 regs_available_for_popping =
26219 (1 << ARG_REGISTER (2))
26220 | (1 << ARG_REGISTER (3));
26221 else if (size <= 8)
26222 regs_available_for_popping =
26223 (1 << ARG_REGISTER (3));
26224 }
26225
26226 /* Match registers to be popped with registers into which we pop them. */
26227 for (available = regs_available_for_popping,
26228 required = regs_to_pop;
26229 required != 0 && available != 0;
26230 available &= ~(available & - available),
26231 required &= ~(required & - required))
26232 -- pops_needed;
26233
26234 /* If we have any popping registers left over, remove them. */
26235 if (available > 0)
26236 regs_available_for_popping &= ~available;
26237
26238 /* Otherwise if we need another popping register we can use
26239 the fourth argument register. */
26240 else if (pops_needed)
26241 {
26242 /* If we have not found any free argument registers and
26243 reg a4 contains the return address, we must move it. */
26244 if (regs_available_for_popping == 0
26245 && reg_containing_return_addr == LAST_ARG_REGNUM)
26246 {
26247 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26248 reg_containing_return_addr = LR_REGNUM;
26249 }
26250 else if (size > 12)
26251 {
26252 /* Register a4 is being used to hold part of the return value,
26253 but we have dire need of a free, low register. */
26254 restore_a4 = TRUE;
26255
26256 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26257 }
26258
26259 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26260 {
26261 /* The fourth argument register is available. */
26262 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26263
26264 --pops_needed;
26265 }
26266 }
26267
26268 /* Pop as many registers as we can. */
26269 thumb_pop (f, regs_available_for_popping);
26270
26271 /* Process the registers we popped. */
26272 if (reg_containing_return_addr == -1)
26273 {
26274 /* The return address was popped into the lowest numbered register. */
26275 regs_to_pop &= ~(1 << LR_REGNUM);
26276
26277 reg_containing_return_addr =
26278 number_of_first_bit_set (regs_available_for_popping);
26279
26280 /* Remove this register for the mask of available registers, so that
26281 the return address will not be corrupted by further pops. */
26282 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26283 }
26284
26285 /* If we popped other registers then handle them here. */
26286 if (regs_available_for_popping)
26287 {
26288 int frame_pointer;
26289
26290 /* Work out which register currently contains the frame pointer. */
26291 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26292
26293 /* Move it into the correct place. */
26294 asm_fprintf (f, "\tmov\t%r, %r\n",
26295 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26296
26297 /* (Temporarily) remove it from the mask of popped registers. */
26298 regs_available_for_popping &= ~(1 << frame_pointer);
26299 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26300
26301 if (regs_available_for_popping)
26302 {
26303 int stack_pointer;
26304
26305 /* We popped the stack pointer as well,
26306 find the register that contains it. */
26307 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26308
26309 /* Move it into the stack register. */
26310 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26311
26312 /* At this point we have popped all necessary registers, so
26313 do not worry about restoring regs_available_for_popping
26314 to its correct value:
26315
26316 assert (pops_needed == 0)
26317 assert (regs_available_for_popping == (1 << frame_pointer))
26318 assert (regs_to_pop == (1 << STACK_POINTER)) */
26319 }
26320 else
26321 {
26322 /* Since we have just move the popped value into the frame
26323 pointer, the popping register is available for reuse, and
26324 we know that we still have the stack pointer left to pop. */
26325 regs_available_for_popping |= (1 << frame_pointer);
26326 }
26327 }
26328
26329 /* If we still have registers left on the stack, but we no longer have
26330 any registers into which we can pop them, then we must move the return
26331 address into the link register and make available the register that
26332 contained it. */
26333 if (regs_available_for_popping == 0 && pops_needed > 0)
26334 {
26335 regs_available_for_popping |= 1 << reg_containing_return_addr;
26336
26337 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26338 reg_containing_return_addr);
26339
26340 reg_containing_return_addr = LR_REGNUM;
26341 }
26342
26343 /* If we have registers left on the stack then pop some more.
26344 We know that at most we will want to pop FP and SP. */
26345 if (pops_needed > 0)
26346 {
26347 int popped_into;
26348 int move_to;
26349
26350 thumb_pop (f, regs_available_for_popping);
26351
26352 /* We have popped either FP or SP.
26353 Move whichever one it is into the correct register. */
26354 popped_into = number_of_first_bit_set (regs_available_for_popping);
26355 move_to = number_of_first_bit_set (regs_to_pop);
26356
26357 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26358 --pops_needed;
26359 }
26360
26361 /* If we still have not popped everything then we must have only
26362 had one register available to us and we are now popping the SP. */
26363 if (pops_needed > 0)
26364 {
26365 int popped_into;
26366
26367 thumb_pop (f, regs_available_for_popping);
26368
26369 popped_into = number_of_first_bit_set (regs_available_for_popping);
26370
26371 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26372 /*
26373 assert (regs_to_pop == (1 << STACK_POINTER))
26374 assert (pops_needed == 1)
26375 */
26376 }
26377
26378 /* If necessary restore the a4 register. */
26379 if (restore_a4)
26380 {
26381 if (reg_containing_return_addr != LR_REGNUM)
26382 {
26383 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26384 reg_containing_return_addr = LR_REGNUM;
26385 }
26386
26387 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26388 }
26389
26390 if (crtl->calls_eh_return)
26391 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26392
26393 /* Return to caller. */
26394 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26395 {
26396 /* This is for the cases where LR is not being used to contain the return
26397 address. It may therefore contain information that we might not want
26398 to leak, hence it must be cleared. The value in R0 will never be a
26399 secret at this point, so it is safe to use it, see the clearing code
26400 in cmse_nonsecure_entry_clear_before_return (). */
26401 if (reg_containing_return_addr != LR_REGNUM)
26402 asm_fprintf (f, "\tmov\tlr, r0\n");
26403
26404 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26405 by cmse_nonsecure_entry_clear_before_return (). */
26406 if (!TARGET_HAVE_FPCXT_CMSE)
26407 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26408 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26409 }
26410 else
26411 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26412 }
26413 \f
26414 /* Scan INSN just before assembler is output for it.
26415 For Thumb-1, we track the status of the condition codes; this
26416 information is used in the cbranchsi4_insn pattern. */
26417 void
26418 thumb1_final_prescan_insn (rtx_insn *insn)
26419 {
26420 if (flag_print_asm_name)
26421 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26422 INSN_ADDRESSES (INSN_UID (insn)));
26423 /* Don't overwrite the previous setter when we get to a cbranch. */
26424 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26425 {
26426 enum attr_conds conds;
26427
26428 if (cfun->machine->thumb1_cc_insn)
26429 {
26430 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26431 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26432 CC_STATUS_INIT;
26433 }
26434 conds = get_attr_conds (insn);
26435 if (conds == CONDS_SET)
26436 {
26437 rtx set = single_set (insn);
26438 cfun->machine->thumb1_cc_insn = insn;
26439 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26440 cfun->machine->thumb1_cc_op1 = const0_rtx;
26441 cfun->machine->thumb1_cc_mode = CC_NZmode;
26442 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26443 {
26444 rtx src1 = XEXP (SET_SRC (set), 1);
26445 if (src1 == const0_rtx)
26446 cfun->machine->thumb1_cc_mode = CCmode;
26447 }
26448 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26449 {
26450 /* Record the src register operand instead of dest because
26451 cprop_hardreg pass propagates src. */
26452 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26453 }
26454 }
26455 else if (conds != CONDS_NOCOND)
26456 cfun->machine->thumb1_cc_insn = NULL_RTX;
26457 }
26458
26459 /* Check if unexpected far jump is used. */
26460 if (cfun->machine->lr_save_eliminated
26461 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26462 internal_error("Unexpected thumb1 far jump");
26463 }
26464
26465 int
26466 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26467 {
26468 unsigned HOST_WIDE_INT mask = 0xff;
26469 int i;
26470
26471 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26472 if (val == 0) /* XXX */
26473 return 0;
26474
26475 for (i = 0; i < 25; i++)
26476 if ((val & (mask << i)) == val)
26477 return 1;
26478
26479 return 0;
26480 }
26481
26482 /* Returns nonzero if the current function contains,
26483 or might contain a far jump. */
26484 static int
26485 thumb_far_jump_used_p (void)
26486 {
26487 rtx_insn *insn;
26488 bool far_jump = false;
26489 unsigned int func_size = 0;
26490
26491 /* If we have already decided that far jumps may be used,
26492 do not bother checking again, and always return true even if
26493 it turns out that they are not being used. Once we have made
26494 the decision that far jumps are present (and that hence the link
26495 register will be pushed onto the stack) we cannot go back on it. */
26496 if (cfun->machine->far_jump_used)
26497 return 1;
26498
26499 /* If this function is not being called from the prologue/epilogue
26500 generation code then it must be being called from the
26501 INITIAL_ELIMINATION_OFFSET macro. */
26502 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26503 {
26504 /* In this case we know that we are being asked about the elimination
26505 of the arg pointer register. If that register is not being used,
26506 then there are no arguments on the stack, and we do not have to
26507 worry that a far jump might force the prologue to push the link
26508 register, changing the stack offsets. In this case we can just
26509 return false, since the presence of far jumps in the function will
26510 not affect stack offsets.
26511
26512 If the arg pointer is live (or if it was live, but has now been
26513 eliminated and so set to dead) then we do have to test to see if
26514 the function might contain a far jump. This test can lead to some
26515 false negatives, since before reload is completed, then length of
26516 branch instructions is not known, so gcc defaults to returning their
26517 longest length, which in turn sets the far jump attribute to true.
26518
26519 A false negative will not result in bad code being generated, but it
26520 will result in a needless push and pop of the link register. We
26521 hope that this does not occur too often.
26522
26523 If we need doubleword stack alignment this could affect the other
26524 elimination offsets so we can't risk getting it wrong. */
26525 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26526 cfun->machine->arg_pointer_live = 1;
26527 else if (!cfun->machine->arg_pointer_live)
26528 return 0;
26529 }
26530
26531 /* We should not change far_jump_used during or after reload, as there is
26532 no chance to change stack frame layout. */
26533 if (reload_in_progress || reload_completed)
26534 return 0;
26535
26536 /* Check to see if the function contains a branch
26537 insn with the far jump attribute set. */
26538 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26539 {
26540 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26541 {
26542 far_jump = true;
26543 }
26544 func_size += get_attr_length (insn);
26545 }
26546
26547 /* Attribute far_jump will always be true for thumb1 before
26548 shorten_branch pass. So checking far_jump attribute before
26549 shorten_branch isn't much useful.
26550
26551 Following heuristic tries to estimate more accurately if a far jump
26552 may finally be used. The heuristic is very conservative as there is
26553 no chance to roll-back the decision of not to use far jump.
26554
26555 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26556 2-byte insn is associated with a 4 byte constant pool. Using
26557 function size 2048/3 as the threshold is conservative enough. */
26558 if (far_jump)
26559 {
26560 if ((func_size * 3) >= 2048)
26561 {
26562 /* Record the fact that we have decided that
26563 the function does use far jumps. */
26564 cfun->machine->far_jump_used = 1;
26565 return 1;
26566 }
26567 }
26568
26569 return 0;
26570 }
26571
26572 /* Return nonzero if FUNC must be entered in ARM mode. */
26573 static bool
26574 is_called_in_ARM_mode (tree func)
26575 {
26576 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26577
26578 /* Ignore the problem about functions whose address is taken. */
26579 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26580 return true;
26581
26582 #ifdef ARM_PE
26583 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26584 #else
26585 return false;
26586 #endif
26587 }
26588
26589 /* Given the stack offsets and register mask in OFFSETS, decide how
26590 many additional registers to push instead of subtracting a constant
26591 from SP. For epilogues the principle is the same except we use pop.
26592 FOR_PROLOGUE indicates which we're generating. */
26593 static int
26594 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26595 {
26596 HOST_WIDE_INT amount;
26597 unsigned long live_regs_mask = offsets->saved_regs_mask;
26598 /* Extract a mask of the ones we can give to the Thumb's push/pop
26599 instruction. */
26600 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26601 /* Then count how many other high registers will need to be pushed. */
26602 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26603 int n_free, reg_base, size;
26604
26605 if (!for_prologue && frame_pointer_needed)
26606 amount = offsets->locals_base - offsets->saved_regs;
26607 else
26608 amount = offsets->outgoing_args - offsets->saved_regs;
26609
26610 /* If the stack frame size is 512 exactly, we can save one load
26611 instruction, which should make this a win even when optimizing
26612 for speed. */
26613 if (!optimize_size && amount != 512)
26614 return 0;
26615
26616 /* Can't do this if there are high registers to push. */
26617 if (high_regs_pushed != 0)
26618 return 0;
26619
26620 /* Shouldn't do it in the prologue if no registers would normally
26621 be pushed at all. In the epilogue, also allow it if we'll have
26622 a pop insn for the PC. */
26623 if (l_mask == 0
26624 && (for_prologue
26625 || TARGET_BACKTRACE
26626 || (live_regs_mask & 1 << LR_REGNUM) == 0
26627 || TARGET_INTERWORK
26628 || crtl->args.pretend_args_size != 0))
26629 return 0;
26630
26631 /* Don't do this if thumb_expand_prologue wants to emit instructions
26632 between the push and the stack frame allocation. */
26633 if (for_prologue
26634 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26635 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26636 return 0;
26637
26638 reg_base = 0;
26639 n_free = 0;
26640 if (!for_prologue)
26641 {
26642 size = arm_size_return_regs ();
26643 reg_base = ARM_NUM_INTS (size);
26644 live_regs_mask >>= reg_base;
26645 }
26646
26647 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26648 && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26649 {
26650 live_regs_mask >>= 1;
26651 n_free++;
26652 }
26653
26654 if (n_free == 0)
26655 return 0;
26656 gcc_assert (amount / 4 * 4 == amount);
26657
26658 if (amount >= 512 && (amount - n_free * 4) < 512)
26659 return (amount - 508) / 4;
26660 if (amount <= n_free * 4)
26661 return amount / 4;
26662 return 0;
26663 }
26664
26665 /* The bits which aren't usefully expanded as rtl. */
26666 const char *
26667 thumb1_unexpanded_epilogue (void)
26668 {
26669 arm_stack_offsets *offsets;
26670 int regno;
26671 unsigned long live_regs_mask = 0;
26672 int high_regs_pushed = 0;
26673 int extra_pop;
26674 int had_to_push_lr;
26675 int size;
26676
26677 if (cfun->machine->return_used_this_function != 0)
26678 return "";
26679
26680 if (IS_NAKED (arm_current_func_type ()))
26681 return "";
26682
26683 offsets = arm_get_frame_offsets ();
26684 live_regs_mask = offsets->saved_regs_mask;
26685 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26686
26687 /* If we can deduce the registers used from the function's return value.
26688 This is more reliable that examining df_regs_ever_live_p () because that
26689 will be set if the register is ever used in the function, not just if
26690 the register is used to hold a return value. */
26691 size = arm_size_return_regs ();
26692
26693 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26694 if (extra_pop > 0)
26695 {
26696 unsigned long extra_mask = (1 << extra_pop) - 1;
26697 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26698 }
26699
26700 /* The prolog may have pushed some high registers to use as
26701 work registers. e.g. the testsuite file:
26702 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26703 compiles to produce:
26704 push {r4, r5, r6, r7, lr}
26705 mov r7, r9
26706 mov r6, r8
26707 push {r6, r7}
26708 as part of the prolog. We have to undo that pushing here. */
26709
26710 if (high_regs_pushed)
26711 {
26712 unsigned long mask = live_regs_mask & 0xff;
26713 int next_hi_reg;
26714
26715 mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26716
26717 if (mask == 0)
26718 /* Oh dear! We have no low registers into which we can pop
26719 high registers! */
26720 internal_error
26721 ("no low registers available for popping high registers");
26722
26723 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26724 if (live_regs_mask & (1 << next_hi_reg))
26725 break;
26726
26727 while (high_regs_pushed)
26728 {
26729 /* Find lo register(s) into which the high register(s) can
26730 be popped. */
26731 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26732 {
26733 if (mask & (1 << regno))
26734 high_regs_pushed--;
26735 if (high_regs_pushed == 0)
26736 break;
26737 }
26738
26739 if (high_regs_pushed == 0 && regno >= 0)
26740 mask &= ~((1 << regno) - 1);
26741
26742 /* Pop the values into the low register(s). */
26743 thumb_pop (asm_out_file, mask);
26744
26745 /* Move the value(s) into the high registers. */
26746 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26747 {
26748 if (mask & (1 << regno))
26749 {
26750 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26751 regno);
26752
26753 for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26754 next_hi_reg--)
26755 if (live_regs_mask & (1 << next_hi_reg))
26756 break;
26757 }
26758 }
26759 }
26760 live_regs_mask &= ~0x0f00;
26761 }
26762
26763 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26764 live_regs_mask &= 0xff;
26765
26766 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26767 {
26768 /* Pop the return address into the PC. */
26769 if (had_to_push_lr)
26770 live_regs_mask |= 1 << PC_REGNUM;
26771
26772 /* Either no argument registers were pushed or a backtrace
26773 structure was created which includes an adjusted stack
26774 pointer, so just pop everything. */
26775 if (live_regs_mask)
26776 thumb_pop (asm_out_file, live_regs_mask);
26777
26778 /* We have either just popped the return address into the
26779 PC or it is was kept in LR for the entire function.
26780 Note that thumb_pop has already called thumb_exit if the
26781 PC was in the list. */
26782 if (!had_to_push_lr)
26783 thumb_exit (asm_out_file, LR_REGNUM);
26784 }
26785 else
26786 {
26787 /* Pop everything but the return address. */
26788 if (live_regs_mask)
26789 thumb_pop (asm_out_file, live_regs_mask);
26790
26791 if (had_to_push_lr)
26792 {
26793 if (size > 12)
26794 {
26795 /* We have no free low regs, so save one. */
26796 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26797 LAST_ARG_REGNUM);
26798 }
26799
26800 /* Get the return address into a temporary register. */
26801 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26802
26803 if (size > 12)
26804 {
26805 /* Move the return address to lr. */
26806 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26807 LAST_ARG_REGNUM);
26808 /* Restore the low register. */
26809 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26810 IP_REGNUM);
26811 regno = LR_REGNUM;
26812 }
26813 else
26814 regno = LAST_ARG_REGNUM;
26815 }
26816 else
26817 regno = LR_REGNUM;
26818
26819 /* Remove the argument registers that were pushed onto the stack. */
26820 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26821 SP_REGNUM, SP_REGNUM,
26822 crtl->args.pretend_args_size);
26823
26824 thumb_exit (asm_out_file, regno);
26825 }
26826
26827 return "";
26828 }
26829
26830 /* Functions to save and restore machine-specific function data. */
26831 static struct machine_function *
26832 arm_init_machine_status (void)
26833 {
26834 struct machine_function *machine;
26835 machine = ggc_cleared_alloc<machine_function> ();
26836
26837 #if ARM_FT_UNKNOWN != 0
26838 machine->func_type = ARM_FT_UNKNOWN;
26839 #endif
26840 machine->static_chain_stack_bytes = -1;
26841 return machine;
26842 }
26843
26844 /* Return an RTX indicating where the return address to the
26845 calling function can be found. */
26846 rtx
26847 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26848 {
26849 if (count != 0)
26850 return NULL_RTX;
26851
26852 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26853 }
26854
26855 /* Do anything needed before RTL is emitted for each function. */
26856 void
26857 arm_init_expanders (void)
26858 {
26859 /* Arrange to initialize and mark the machine per-function status. */
26860 init_machine_status = arm_init_machine_status;
26861
26862 /* This is to stop the combine pass optimizing away the alignment
26863 adjustment of va_arg. */
26864 /* ??? It is claimed that this should not be necessary. */
26865 if (cfun)
26866 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26867 }
26868
26869 /* Check that FUNC is called with a different mode. */
26870
26871 bool
26872 arm_change_mode_p (tree func)
26873 {
26874 if (TREE_CODE (func) != FUNCTION_DECL)
26875 return false;
26876
26877 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
26878
26879 if (!callee_tree)
26880 callee_tree = target_option_default_node;
26881
26882 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
26883 int flags = callee_opts->x_target_flags;
26884
26885 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
26886 }
26887
26888 /* Like arm_compute_initial_elimination offset. Simpler because there
26889 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26890 to point at the base of the local variables after static stack
26891 space for a function has been allocated. */
26892
26893 HOST_WIDE_INT
26894 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26895 {
26896 arm_stack_offsets *offsets;
26897
26898 offsets = arm_get_frame_offsets ();
26899
26900 switch (from)
26901 {
26902 case ARG_POINTER_REGNUM:
26903 switch (to)
26904 {
26905 case STACK_POINTER_REGNUM:
26906 return offsets->outgoing_args - offsets->saved_args;
26907
26908 case FRAME_POINTER_REGNUM:
26909 return offsets->soft_frame - offsets->saved_args;
26910
26911 case ARM_HARD_FRAME_POINTER_REGNUM:
26912 return offsets->saved_regs - offsets->saved_args;
26913
26914 case THUMB_HARD_FRAME_POINTER_REGNUM:
26915 return offsets->locals_base - offsets->saved_args;
26916
26917 default:
26918 gcc_unreachable ();
26919 }
26920 break;
26921
26922 case FRAME_POINTER_REGNUM:
26923 switch (to)
26924 {
26925 case STACK_POINTER_REGNUM:
26926 return offsets->outgoing_args - offsets->soft_frame;
26927
26928 case ARM_HARD_FRAME_POINTER_REGNUM:
26929 return offsets->saved_regs - offsets->soft_frame;
26930
26931 case THUMB_HARD_FRAME_POINTER_REGNUM:
26932 return offsets->locals_base - offsets->soft_frame;
26933
26934 default:
26935 gcc_unreachable ();
26936 }
26937 break;
26938
26939 default:
26940 gcc_unreachable ();
26941 }
26942 }
26943
26944 /* Generate the function's prologue. */
26945
26946 void
26947 thumb1_expand_prologue (void)
26948 {
26949 rtx_insn *insn;
26950
26951 HOST_WIDE_INT amount;
26952 HOST_WIDE_INT size;
26953 arm_stack_offsets *offsets;
26954 unsigned long func_type;
26955 int regno;
26956 unsigned long live_regs_mask;
26957 unsigned long l_mask;
26958 unsigned high_regs_pushed = 0;
26959 bool lr_needs_saving;
26960
26961 func_type = arm_current_func_type ();
26962
26963 /* Naked functions don't have prologues. */
26964 if (IS_NAKED (func_type))
26965 {
26966 if (flag_stack_usage_info)
26967 current_function_static_stack_size = 0;
26968 return;
26969 }
26970
26971 if (IS_INTERRUPT (func_type))
26972 {
26973 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
26974 return;
26975 }
26976
26977 if (is_called_in_ARM_mode (current_function_decl))
26978 emit_insn (gen_prologue_thumb1_interwork ());
26979
26980 offsets = arm_get_frame_offsets ();
26981 live_regs_mask = offsets->saved_regs_mask;
26982 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
26983
26984 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26985 l_mask = live_regs_mask & 0x40ff;
26986 /* Then count how many other high registers will need to be pushed. */
26987 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26988
26989 if (crtl->args.pretend_args_size)
26990 {
26991 rtx x = GEN_INT (-crtl->args.pretend_args_size);
26992
26993 if (cfun->machine->uses_anonymous_args)
26994 {
26995 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26996 unsigned long mask;
26997
26998 mask = 1ul << (LAST_ARG_REGNUM + 1);
26999 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27000
27001 insn = thumb1_emit_multi_reg_push (mask, 0);
27002 }
27003 else
27004 {
27005 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27006 stack_pointer_rtx, x));
27007 }
27008 RTX_FRAME_RELATED_P (insn) = 1;
27009 }
27010
27011 if (TARGET_BACKTRACE)
27012 {
27013 HOST_WIDE_INT offset = 0;
27014 unsigned work_register;
27015 rtx work_reg, x, arm_hfp_rtx;
27016
27017 /* We have been asked to create a stack backtrace structure.
27018 The code looks like this:
27019
27020 0 .align 2
27021 0 func:
27022 0 sub SP, #16 Reserve space for 4 registers.
27023 2 push {R7} Push low registers.
27024 4 add R7, SP, #20 Get the stack pointer before the push.
27025 6 str R7, [SP, #8] Store the stack pointer
27026 (before reserving the space).
27027 8 mov R7, PC Get hold of the start of this code + 12.
27028 10 str R7, [SP, #16] Store it.
27029 12 mov R7, FP Get hold of the current frame pointer.
27030 14 str R7, [SP, #4] Store it.
27031 16 mov R7, LR Get hold of the current return address.
27032 18 str R7, [SP, #12] Store it.
27033 20 add R7, SP, #16 Point at the start of the
27034 backtrace structure.
27035 22 mov FP, R7 Put this value into the frame pointer. */
27036
27037 work_register = thumb_find_work_register (live_regs_mask);
27038 work_reg = gen_rtx_REG (SImode, work_register);
27039 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27040
27041 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27042 stack_pointer_rtx, GEN_INT (-16)));
27043 RTX_FRAME_RELATED_P (insn) = 1;
27044
27045 if (l_mask)
27046 {
27047 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27048 RTX_FRAME_RELATED_P (insn) = 1;
27049 lr_needs_saving = false;
27050
27051 offset = bit_count (l_mask) * UNITS_PER_WORD;
27052 }
27053
27054 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27055 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27056
27057 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27058 x = gen_frame_mem (SImode, x);
27059 emit_move_insn (x, work_reg);
27060
27061 /* Make sure that the instruction fetching the PC is in the right place
27062 to calculate "start of backtrace creation code + 12". */
27063 /* ??? The stores using the common WORK_REG ought to be enough to
27064 prevent the scheduler from doing anything weird. Failing that
27065 we could always move all of the following into an UNSPEC_VOLATILE. */
27066 if (l_mask)
27067 {
27068 x = gen_rtx_REG (SImode, PC_REGNUM);
27069 emit_move_insn (work_reg, x);
27070
27071 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27072 x = gen_frame_mem (SImode, x);
27073 emit_move_insn (x, work_reg);
27074
27075 emit_move_insn (work_reg, arm_hfp_rtx);
27076
27077 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27078 x = gen_frame_mem (SImode, x);
27079 emit_move_insn (x, work_reg);
27080 }
27081 else
27082 {
27083 emit_move_insn (work_reg, arm_hfp_rtx);
27084
27085 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27086 x = gen_frame_mem (SImode, x);
27087 emit_move_insn (x, work_reg);
27088
27089 x = gen_rtx_REG (SImode, PC_REGNUM);
27090 emit_move_insn (work_reg, x);
27091
27092 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27093 x = gen_frame_mem (SImode, x);
27094 emit_move_insn (x, work_reg);
27095 }
27096
27097 x = gen_rtx_REG (SImode, LR_REGNUM);
27098 emit_move_insn (work_reg, x);
27099
27100 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27101 x = gen_frame_mem (SImode, x);
27102 emit_move_insn (x, work_reg);
27103
27104 x = GEN_INT (offset + 12);
27105 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27106
27107 emit_move_insn (arm_hfp_rtx, work_reg);
27108 }
27109 /* Optimization: If we are not pushing any low registers but we are going
27110 to push some high registers then delay our first push. This will just
27111 be a push of LR and we can combine it with the push of the first high
27112 register. */
27113 else if ((l_mask & 0xff) != 0
27114 || (high_regs_pushed == 0 && lr_needs_saving))
27115 {
27116 unsigned long mask = l_mask;
27117 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27118 insn = thumb1_emit_multi_reg_push (mask, mask);
27119 RTX_FRAME_RELATED_P (insn) = 1;
27120 lr_needs_saving = false;
27121 }
27122
27123 if (high_regs_pushed)
27124 {
27125 unsigned pushable_regs;
27126 unsigned next_hi_reg;
27127 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27128 : crtl->args.info.nregs;
27129 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27130
27131 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27132 if (live_regs_mask & (1 << next_hi_reg))
27133 break;
27134
27135 /* Here we need to mask out registers used for passing arguments
27136 even if they can be pushed. This is to avoid using them to
27137 stash the high registers. Such kind of stash may clobber the
27138 use of arguments. */
27139 pushable_regs = l_mask & (~arg_regs_mask);
27140 pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
27141
27142 /* Normally, LR can be used as a scratch register once it has been
27143 saved; but if the function examines its own return address then
27144 the value is still live and we need to avoid using it. */
27145 bool return_addr_live
27146 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
27147 LR_REGNUM);
27148
27149 if (lr_needs_saving || return_addr_live)
27150 pushable_regs &= ~(1 << LR_REGNUM);
27151
27152 if (pushable_regs == 0)
27153 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27154
27155 while (high_regs_pushed > 0)
27156 {
27157 unsigned long real_regs_mask = 0;
27158 unsigned long push_mask = 0;
27159
27160 for (regno = LR_REGNUM; regno >= 0; regno --)
27161 {
27162 if (pushable_regs & (1 << regno))
27163 {
27164 emit_move_insn (gen_rtx_REG (SImode, regno),
27165 gen_rtx_REG (SImode, next_hi_reg));
27166
27167 high_regs_pushed --;
27168 real_regs_mask |= (1 << next_hi_reg);
27169 push_mask |= (1 << regno);
27170
27171 if (high_regs_pushed)
27172 {
27173 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27174 next_hi_reg --)
27175 if (live_regs_mask & (1 << next_hi_reg))
27176 break;
27177 }
27178 else
27179 break;
27180 }
27181 }
27182
27183 /* If we had to find a work register and we have not yet
27184 saved the LR then add it to the list of regs to push. */
27185 if (lr_needs_saving)
27186 {
27187 push_mask |= 1 << LR_REGNUM;
27188 real_regs_mask |= 1 << LR_REGNUM;
27189 lr_needs_saving = false;
27190 /* If the return address is not live at this point, we
27191 can add LR to the list of registers that we can use
27192 for pushes. */
27193 if (!return_addr_live)
27194 pushable_regs |= 1 << LR_REGNUM;
27195 }
27196
27197 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
27198 RTX_FRAME_RELATED_P (insn) = 1;
27199 }
27200 }
27201
27202 /* Load the pic register before setting the frame pointer,
27203 so we can use r7 as a temporary work register. */
27204 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27205 arm_load_pic_register (live_regs_mask, NULL_RTX);
27206
27207 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27208 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27209 stack_pointer_rtx);
27210
27211 size = offsets->outgoing_args - offsets->saved_args;
27212 if (flag_stack_usage_info)
27213 current_function_static_stack_size = size;
27214
27215 /* If we have a frame, then do stack checking. FIXME: not implemented. */
27216 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27217 || flag_stack_clash_protection)
27218 && size)
27219 sorry ("%<-fstack-check=specific%> for Thumb-1");
27220
27221 amount = offsets->outgoing_args - offsets->saved_regs;
27222 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27223 if (amount)
27224 {
27225 if (amount < 512)
27226 {
27227 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27228 GEN_INT (- amount)));
27229 RTX_FRAME_RELATED_P (insn) = 1;
27230 }
27231 else
27232 {
27233 rtx reg, dwarf;
27234
27235 /* The stack decrement is too big for an immediate value in a single
27236 insn. In theory we could issue multiple subtracts, but after
27237 three of them it becomes more space efficient to place the full
27238 value in the constant pool and load into a register. (Also the
27239 ARM debugger really likes to see only one stack decrement per
27240 function). So instead we look for a scratch register into which
27241 we can load the decrement, and then we subtract this from the
27242 stack pointer. Unfortunately on the thumb the only available
27243 scratch registers are the argument registers, and we cannot use
27244 these as they may hold arguments to the function. Instead we
27245 attempt to locate a call preserved register which is used by this
27246 function. If we can find one, then we know that it will have
27247 been pushed at the start of the prologue and so we can corrupt
27248 it now. */
27249 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27250 if (live_regs_mask & (1 << regno))
27251 break;
27252
27253 gcc_assert(regno <= LAST_LO_REGNUM);
27254
27255 reg = gen_rtx_REG (SImode, regno);
27256
27257 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27258
27259 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27260 stack_pointer_rtx, reg));
27261
27262 dwarf = gen_rtx_SET (stack_pointer_rtx,
27263 plus_constant (Pmode, stack_pointer_rtx,
27264 -amount));
27265 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27266 RTX_FRAME_RELATED_P (insn) = 1;
27267 }
27268 }
27269
27270 if (frame_pointer_needed)
27271 thumb_set_frame_pointer (offsets);
27272
27273 /* If we are profiling, make sure no instructions are scheduled before
27274 the call to mcount. Similarly if the user has requested no
27275 scheduling in the prolog. Similarly if we want non-call exceptions
27276 using the EABI unwinder, to prevent faulting instructions from being
27277 swapped with a stack adjustment. */
27278 if (crtl->profile || !TARGET_SCHED_PROLOG
27279 || (arm_except_unwind_info (&global_options) == UI_TARGET
27280 && cfun->can_throw_non_call_exceptions))
27281 emit_insn (gen_blockage ());
27282
27283 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27284 if (live_regs_mask & 0xff)
27285 cfun->machine->lr_save_eliminated = 0;
27286 }
27287
27288 /* Clear caller saved registers not used to pass return values and leaked
27289 condition flags before exiting a cmse_nonsecure_entry function. */
27290
27291 void
27292 cmse_nonsecure_entry_clear_before_return (void)
27293 {
27294 bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27295 int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27296 uint32_t padding_bits_to_clear = 0;
27297 auto_sbitmap to_clear_bitmap (maxregno + 1);
27298 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27299 tree result_type;
27300
27301 bitmap_clear (to_clear_bitmap);
27302 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27303 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27304
27305 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27306 registers. */
27307 if (clear_vfpregs)
27308 {
27309 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27310
27311 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27312
27313 if (!TARGET_HAVE_FPCXT_CMSE)
27314 {
27315 /* Make sure we don't clear the two scratch registers used to clear
27316 the relevant FPSCR bits in output_return_instruction. */
27317 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27318 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27319 emit_use (gen_rtx_REG (SImode, 4));
27320 bitmap_clear_bit (to_clear_bitmap, 4);
27321 }
27322 }
27323
27324 /* If the user has defined registers to be caller saved, these are no longer
27325 restored by the function before returning and must thus be cleared for
27326 security purposes. */
27327 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27328 {
27329 /* We do not touch registers that can be used to pass arguments as per
27330 the AAPCS, since these should never be made callee-saved by user
27331 options. */
27332 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27333 continue;
27334 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27335 continue;
27336 if (!callee_saved_reg_p (regno)
27337 && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27338 || TARGET_HARD_FLOAT))
27339 bitmap_set_bit (to_clear_bitmap, regno);
27340 }
27341
27342 /* Make sure we do not clear the registers used to return the result in. */
27343 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27344 if (!VOID_TYPE_P (result_type))
27345 {
27346 uint64_t to_clear_return_mask;
27347 result_rtl = arm_function_value (result_type, current_function_decl, 0);
27348
27349 /* No need to check that we return in registers, because we don't
27350 support returning on stack yet. */
27351 gcc_assert (REG_P (result_rtl));
27352 to_clear_return_mask
27353 = compute_not_to_clear_mask (result_type, result_rtl, 0,
27354 &padding_bits_to_clear);
27355 if (to_clear_return_mask)
27356 {
27357 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27358 for (regno = R0_REGNUM; regno <= maxregno; regno++)
27359 {
27360 if (to_clear_return_mask & (1ULL << regno))
27361 bitmap_clear_bit (to_clear_bitmap, regno);
27362 }
27363 }
27364 }
27365
27366 if (padding_bits_to_clear != 0)
27367 {
27368 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27369 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27370
27371 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27372 returning a composite type, which only uses r0. Let's make sure that
27373 r1-r3 is cleared too. */
27374 bitmap_clear (to_clear_arg_regs_bitmap);
27375 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27376 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27377 }
27378
27379 /* Clear full registers that leak before returning. */
27380 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27381 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27382 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27383 clearing_reg);
27384 }
27385
27386 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27387 POP instruction can be generated. LR should be replaced by PC. All
27388 the checks required are already done by USE_RETURN_INSN (). Hence,
27389 all we really need to check here is if single register is to be
27390 returned, or multiple register return. */
27391 void
27392 thumb2_expand_return (bool simple_return)
27393 {
27394 int i, num_regs;
27395 unsigned long saved_regs_mask;
27396 arm_stack_offsets *offsets;
27397
27398 offsets = arm_get_frame_offsets ();
27399 saved_regs_mask = offsets->saved_regs_mask;
27400
27401 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27402 if (saved_regs_mask & (1 << i))
27403 num_regs++;
27404
27405 if (!simple_return && saved_regs_mask)
27406 {
27407 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27408 functions or adapt code to handle according to ACLE. This path should
27409 not be reachable for cmse_nonsecure_entry functions though we prefer
27410 to assert it for now to ensure that future code changes do not silently
27411 change this behavior. */
27412 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27413 if (num_regs == 1)
27414 {
27415 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27416 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27417 rtx addr = gen_rtx_MEM (SImode,
27418 gen_rtx_POST_INC (SImode,
27419 stack_pointer_rtx));
27420 set_mem_alias_set (addr, get_frame_alias_set ());
27421 XVECEXP (par, 0, 0) = ret_rtx;
27422 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27423 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27424 emit_jump_insn (par);
27425 }
27426 else
27427 {
27428 saved_regs_mask &= ~ (1 << LR_REGNUM);
27429 saved_regs_mask |= (1 << PC_REGNUM);
27430 arm_emit_multi_reg_pop (saved_regs_mask);
27431 }
27432 }
27433 else
27434 {
27435 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27436 cmse_nonsecure_entry_clear_before_return ();
27437 emit_jump_insn (simple_return_rtx);
27438 }
27439 }
27440
27441 void
27442 thumb1_expand_epilogue (void)
27443 {
27444 HOST_WIDE_INT amount;
27445 arm_stack_offsets *offsets;
27446 int regno;
27447
27448 /* Naked functions don't have prologues. */
27449 if (IS_NAKED (arm_current_func_type ()))
27450 return;
27451
27452 offsets = arm_get_frame_offsets ();
27453 amount = offsets->outgoing_args - offsets->saved_regs;
27454
27455 if (frame_pointer_needed)
27456 {
27457 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27458 amount = offsets->locals_base - offsets->saved_regs;
27459 }
27460 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27461
27462 gcc_assert (amount >= 0);
27463 if (amount)
27464 {
27465 emit_insn (gen_blockage ());
27466
27467 if (amount < 512)
27468 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27469 GEN_INT (amount)));
27470 else
27471 {
27472 /* r3 is always free in the epilogue. */
27473 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27474
27475 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27476 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27477 }
27478 }
27479
27480 /* Emit a USE (stack_pointer_rtx), so that
27481 the stack adjustment will not be deleted. */
27482 emit_insn (gen_force_register_use (stack_pointer_rtx));
27483
27484 if (crtl->profile || !TARGET_SCHED_PROLOG)
27485 emit_insn (gen_blockage ());
27486
27487 /* Emit a clobber for each insn that will be restored in the epilogue,
27488 so that flow2 will get register lifetimes correct. */
27489 for (regno = 0; regno < 13; regno++)
27490 if (reg_needs_saving_p (regno))
27491 emit_clobber (gen_rtx_REG (SImode, regno));
27492
27493 if (! df_regs_ever_live_p (LR_REGNUM))
27494 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27495
27496 /* Clear all caller-saved regs that are not used to return. */
27497 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27498 cmse_nonsecure_entry_clear_before_return ();
27499 }
27500
27501 /* Epilogue code for APCS frame. */
27502 static void
27503 arm_expand_epilogue_apcs_frame (bool really_return)
27504 {
27505 unsigned long func_type;
27506 unsigned long saved_regs_mask;
27507 int num_regs = 0;
27508 int i;
27509 int floats_from_frame = 0;
27510 arm_stack_offsets *offsets;
27511
27512 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27513 func_type = arm_current_func_type ();
27514
27515 /* Get frame offsets for ARM. */
27516 offsets = arm_get_frame_offsets ();
27517 saved_regs_mask = offsets->saved_regs_mask;
27518
27519 /* Find the offset of the floating-point save area in the frame. */
27520 floats_from_frame
27521 = (offsets->saved_args
27522 + arm_compute_static_chain_stack_bytes ()
27523 - offsets->frame);
27524
27525 /* Compute how many core registers saved and how far away the floats are. */
27526 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27527 if (saved_regs_mask & (1 << i))
27528 {
27529 num_regs++;
27530 floats_from_frame += 4;
27531 }
27532
27533 if (TARGET_VFP_BASE)
27534 {
27535 int start_reg;
27536 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27537
27538 /* The offset is from IP_REGNUM. */
27539 int saved_size = arm_get_vfp_saved_size ();
27540 if (saved_size > 0)
27541 {
27542 rtx_insn *insn;
27543 floats_from_frame += saved_size;
27544 insn = emit_insn (gen_addsi3 (ip_rtx,
27545 hard_frame_pointer_rtx,
27546 GEN_INT (-floats_from_frame)));
27547 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27548 ip_rtx, hard_frame_pointer_rtx);
27549 }
27550
27551 /* Generate VFP register multi-pop. */
27552 start_reg = FIRST_VFP_REGNUM;
27553
27554 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27555 /* Look for a case where a reg does not need restoring. */
27556 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27557 {
27558 if (start_reg != i)
27559 arm_emit_vfp_multi_reg_pop (start_reg,
27560 (i - start_reg) / 2,
27561 gen_rtx_REG (SImode,
27562 IP_REGNUM));
27563 start_reg = i + 2;
27564 }
27565
27566 /* Restore the remaining regs that we have discovered (or possibly
27567 even all of them, if the conditional in the for loop never
27568 fired). */
27569 if (start_reg != i)
27570 arm_emit_vfp_multi_reg_pop (start_reg,
27571 (i - start_reg) / 2,
27572 gen_rtx_REG (SImode, IP_REGNUM));
27573 }
27574
27575 if (TARGET_IWMMXT)
27576 {
27577 /* The frame pointer is guaranteed to be non-double-word aligned, as
27578 it is set to double-word-aligned old_stack_pointer - 4. */
27579 rtx_insn *insn;
27580 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27581
27582 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27583 if (reg_needs_saving_p (i))
27584 {
27585 rtx addr = gen_frame_mem (V2SImode,
27586 plus_constant (Pmode, hard_frame_pointer_rtx,
27587 - lrm_count * 4));
27588 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27589 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27590 gen_rtx_REG (V2SImode, i),
27591 NULL_RTX);
27592 lrm_count += 2;
27593 }
27594 }
27595
27596 /* saved_regs_mask should contain IP which contains old stack pointer
27597 at the time of activation creation. Since SP and IP are adjacent registers,
27598 we can restore the value directly into SP. */
27599 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27600 saved_regs_mask &= ~(1 << IP_REGNUM);
27601 saved_regs_mask |= (1 << SP_REGNUM);
27602
27603 /* There are two registers left in saved_regs_mask - LR and PC. We
27604 only need to restore LR (the return address), but to
27605 save time we can load it directly into PC, unless we need a
27606 special function exit sequence, or we are not really returning. */
27607 if (really_return
27608 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27609 && !crtl->calls_eh_return)
27610 /* Delete LR from the register mask, so that LR on
27611 the stack is loaded into the PC in the register mask. */
27612 saved_regs_mask &= ~(1 << LR_REGNUM);
27613 else
27614 saved_regs_mask &= ~(1 << PC_REGNUM);
27615
27616 num_regs = bit_count (saved_regs_mask);
27617 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27618 {
27619 rtx_insn *insn;
27620 emit_insn (gen_blockage ());
27621 /* Unwind the stack to just below the saved registers. */
27622 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27623 hard_frame_pointer_rtx,
27624 GEN_INT (- 4 * num_regs)));
27625
27626 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27627 stack_pointer_rtx, hard_frame_pointer_rtx);
27628 }
27629
27630 arm_emit_multi_reg_pop (saved_regs_mask);
27631
27632 if (IS_INTERRUPT (func_type))
27633 {
27634 /* Interrupt handlers will have pushed the
27635 IP onto the stack, so restore it now. */
27636 rtx_insn *insn;
27637 rtx addr = gen_rtx_MEM (SImode,
27638 gen_rtx_POST_INC (SImode,
27639 stack_pointer_rtx));
27640 set_mem_alias_set (addr, get_frame_alias_set ());
27641 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27642 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27643 gen_rtx_REG (SImode, IP_REGNUM),
27644 NULL_RTX);
27645 }
27646
27647 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27648 return;
27649
27650 if (crtl->calls_eh_return)
27651 emit_insn (gen_addsi3 (stack_pointer_rtx,
27652 stack_pointer_rtx,
27653 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27654
27655 if (IS_STACKALIGN (func_type))
27656 /* Restore the original stack pointer. Before prologue, the stack was
27657 realigned and the original stack pointer saved in r0. For details,
27658 see comment in arm_expand_prologue. */
27659 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27660
27661 emit_jump_insn (simple_return_rtx);
27662 }
27663
27664 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27665 function is not a sibcall. */
27666 void
27667 arm_expand_epilogue (bool really_return)
27668 {
27669 unsigned long func_type;
27670 unsigned long saved_regs_mask;
27671 int num_regs = 0;
27672 int i;
27673 int amount;
27674 arm_stack_offsets *offsets;
27675
27676 func_type = arm_current_func_type ();
27677
27678 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27679 let output_return_instruction take care of instruction emission if any. */
27680 if (IS_NAKED (func_type)
27681 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27682 {
27683 if (really_return)
27684 emit_jump_insn (simple_return_rtx);
27685 return;
27686 }
27687
27688 /* If we are throwing an exception, then we really must be doing a
27689 return, so we can't tail-call. */
27690 gcc_assert (!crtl->calls_eh_return || really_return);
27691
27692 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27693 {
27694 arm_expand_epilogue_apcs_frame (really_return);
27695 return;
27696 }
27697
27698 /* Get frame offsets for ARM. */
27699 offsets = arm_get_frame_offsets ();
27700 saved_regs_mask = offsets->saved_regs_mask;
27701 num_regs = bit_count (saved_regs_mask);
27702
27703 if (frame_pointer_needed)
27704 {
27705 rtx_insn *insn;
27706 /* Restore stack pointer if necessary. */
27707 if (TARGET_ARM)
27708 {
27709 /* In ARM mode, frame pointer points to first saved register.
27710 Restore stack pointer to last saved register. */
27711 amount = offsets->frame - offsets->saved_regs;
27712
27713 /* Force out any pending memory operations that reference stacked data
27714 before stack de-allocation occurs. */
27715 emit_insn (gen_blockage ());
27716 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27717 hard_frame_pointer_rtx,
27718 GEN_INT (amount)));
27719 arm_add_cfa_adjust_cfa_note (insn, amount,
27720 stack_pointer_rtx,
27721 hard_frame_pointer_rtx);
27722
27723 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27724 deleted. */
27725 emit_insn (gen_force_register_use (stack_pointer_rtx));
27726 }
27727 else
27728 {
27729 /* In Thumb-2 mode, the frame pointer points to the last saved
27730 register. */
27731 amount = offsets->locals_base - offsets->saved_regs;
27732 if (amount)
27733 {
27734 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27735 hard_frame_pointer_rtx,
27736 GEN_INT (amount)));
27737 arm_add_cfa_adjust_cfa_note (insn, amount,
27738 hard_frame_pointer_rtx,
27739 hard_frame_pointer_rtx);
27740 }
27741
27742 /* Force out any pending memory operations that reference stacked data
27743 before stack de-allocation occurs. */
27744 emit_insn (gen_blockage ());
27745 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27746 hard_frame_pointer_rtx));
27747 arm_add_cfa_adjust_cfa_note (insn, 0,
27748 stack_pointer_rtx,
27749 hard_frame_pointer_rtx);
27750 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27751 deleted. */
27752 emit_insn (gen_force_register_use (stack_pointer_rtx));
27753 }
27754 }
27755 else
27756 {
27757 /* Pop off outgoing args and local frame to adjust stack pointer to
27758 last saved register. */
27759 amount = offsets->outgoing_args - offsets->saved_regs;
27760 if (amount)
27761 {
27762 rtx_insn *tmp;
27763 /* Force out any pending memory operations that reference stacked data
27764 before stack de-allocation occurs. */
27765 emit_insn (gen_blockage ());
27766 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27767 stack_pointer_rtx,
27768 GEN_INT (amount)));
27769 arm_add_cfa_adjust_cfa_note (tmp, amount,
27770 stack_pointer_rtx, stack_pointer_rtx);
27771 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27772 not deleted. */
27773 emit_insn (gen_force_register_use (stack_pointer_rtx));
27774 }
27775 }
27776
27777 if (TARGET_VFP_BASE)
27778 {
27779 /* Generate VFP register multi-pop. */
27780 int end_reg = LAST_VFP_REGNUM + 1;
27781
27782 /* Scan the registers in reverse order. We need to match
27783 any groupings made in the prologue and generate matching
27784 vldm operations. The need to match groups is because,
27785 unlike pop, vldm can only do consecutive regs. */
27786 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27787 /* Look for a case where a reg does not need restoring. */
27788 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27789 {
27790 /* Restore the regs discovered so far (from reg+2 to
27791 end_reg). */
27792 if (end_reg > i + 2)
27793 arm_emit_vfp_multi_reg_pop (i + 2,
27794 (end_reg - (i + 2)) / 2,
27795 stack_pointer_rtx);
27796 end_reg = i;
27797 }
27798
27799 /* Restore the remaining regs that we have discovered (or possibly
27800 even all of them, if the conditional in the for loop never
27801 fired). */
27802 if (end_reg > i + 2)
27803 arm_emit_vfp_multi_reg_pop (i + 2,
27804 (end_reg - (i + 2)) / 2,
27805 stack_pointer_rtx);
27806 }
27807
27808 if (TARGET_IWMMXT)
27809 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27810 if (reg_needs_saving_p (i))
27811 {
27812 rtx_insn *insn;
27813 rtx addr = gen_rtx_MEM (V2SImode,
27814 gen_rtx_POST_INC (SImode,
27815 stack_pointer_rtx));
27816 set_mem_alias_set (addr, get_frame_alias_set ());
27817 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27818 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27819 gen_rtx_REG (V2SImode, i),
27820 NULL_RTX);
27821 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27822 stack_pointer_rtx, stack_pointer_rtx);
27823 }
27824
27825 if (saved_regs_mask)
27826 {
27827 rtx insn;
27828 bool return_in_pc = false;
27829
27830 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27831 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27832 && !IS_CMSE_ENTRY (func_type)
27833 && !IS_STACKALIGN (func_type)
27834 && really_return
27835 && crtl->args.pretend_args_size == 0
27836 && saved_regs_mask & (1 << LR_REGNUM)
27837 && !crtl->calls_eh_return)
27838 {
27839 saved_regs_mask &= ~(1 << LR_REGNUM);
27840 saved_regs_mask |= (1 << PC_REGNUM);
27841 return_in_pc = true;
27842 }
27843
27844 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27845 {
27846 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27847 if (saved_regs_mask & (1 << i))
27848 {
27849 rtx addr = gen_rtx_MEM (SImode,
27850 gen_rtx_POST_INC (SImode,
27851 stack_pointer_rtx));
27852 set_mem_alias_set (addr, get_frame_alias_set ());
27853
27854 if (i == PC_REGNUM)
27855 {
27856 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27857 XVECEXP (insn, 0, 0) = ret_rtx;
27858 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
27859 addr);
27860 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27861 insn = emit_jump_insn (insn);
27862 }
27863 else
27864 {
27865 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27866 addr));
27867 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27868 gen_rtx_REG (SImode, i),
27869 NULL_RTX);
27870 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27871 stack_pointer_rtx,
27872 stack_pointer_rtx);
27873 }
27874 }
27875 }
27876 else
27877 {
27878 if (TARGET_LDRD
27879 && current_tune->prefer_ldrd_strd
27880 && !optimize_function_for_size_p (cfun))
27881 {
27882 if (TARGET_THUMB2)
27883 thumb2_emit_ldrd_pop (saved_regs_mask);
27884 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27885 arm_emit_ldrd_pop (saved_regs_mask);
27886 else
27887 arm_emit_multi_reg_pop (saved_regs_mask);
27888 }
27889 else
27890 arm_emit_multi_reg_pop (saved_regs_mask);
27891 }
27892
27893 if (return_in_pc)
27894 return;
27895 }
27896
27897 amount
27898 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
27899 if (amount)
27900 {
27901 int i, j;
27902 rtx dwarf = NULL_RTX;
27903 rtx_insn *tmp =
27904 emit_insn (gen_addsi3 (stack_pointer_rtx,
27905 stack_pointer_rtx,
27906 GEN_INT (amount)));
27907
27908 RTX_FRAME_RELATED_P (tmp) = 1;
27909
27910 if (cfun->machine->uses_anonymous_args)
27911 {
27912 /* Restore pretend args. Refer arm_expand_prologue on how to save
27913 pretend_args in stack. */
27914 int num_regs = crtl->args.pretend_args_size / 4;
27915 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27916 for (j = 0, i = 0; j < num_regs; i++)
27917 if (saved_regs_mask & (1 << i))
27918 {
27919 rtx reg = gen_rtx_REG (SImode, i);
27920 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27921 j++;
27922 }
27923 REG_NOTES (tmp) = dwarf;
27924 }
27925 arm_add_cfa_adjust_cfa_note (tmp, amount,
27926 stack_pointer_rtx, stack_pointer_rtx);
27927 }
27928
27929 if (IS_CMSE_ENTRY (func_type))
27930 {
27931 /* CMSE_ENTRY always returns. */
27932 gcc_assert (really_return);
27933 /* Clear all caller-saved regs that are not used to return. */
27934 cmse_nonsecure_entry_clear_before_return ();
27935
27936 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
27937 VLDR. */
27938 if (TARGET_HAVE_FPCXT_CMSE)
27939 {
27940 rtx_insn *insn;
27941
27942 insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
27943 GEN_INT (FPCXTNS_ENUM)));
27944 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
27945 plus_constant (Pmode, stack_pointer_rtx, 4));
27946 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27947 RTX_FRAME_RELATED_P (insn) = 1;
27948 }
27949 }
27950
27951 if (!really_return)
27952 return;
27953
27954 if (crtl->calls_eh_return)
27955 emit_insn (gen_addsi3 (stack_pointer_rtx,
27956 stack_pointer_rtx,
27957 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27958
27959 if (IS_STACKALIGN (func_type))
27960 /* Restore the original stack pointer. Before prologue, the stack was
27961 realigned and the original stack pointer saved in r0. For details,
27962 see comment in arm_expand_prologue. */
27963 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27964
27965 emit_jump_insn (simple_return_rtx);
27966 }
27967
27968 /* Implementation of insn prologue_thumb1_interwork. This is the first
27969 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27970
27971 const char *
27972 thumb1_output_interwork (void)
27973 {
27974 const char * name;
27975 FILE *f = asm_out_file;
27976
27977 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27978 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27979 == SYMBOL_REF);
27980 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27981
27982 /* Generate code sequence to switch us into Thumb mode. */
27983 /* The .code 32 directive has already been emitted by
27984 ASM_DECLARE_FUNCTION_NAME. */
27985 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27986 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27987
27988 /* Generate a label, so that the debugger will notice the
27989 change in instruction sets. This label is also used by
27990 the assembler to bypass the ARM code when this function
27991 is called from a Thumb encoded function elsewhere in the
27992 same file. Hence the definition of STUB_NAME here must
27993 agree with the definition in gas/config/tc-arm.c. */
27994
27995 #define STUB_NAME ".real_start_of"
27996
27997 fprintf (f, "\t.code\t16\n");
27998 #ifdef ARM_PE
27999 if (arm_dllexport_name_p (name))
28000 name = arm_strip_name_encoding (name);
28001 #endif
28002 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28003 fprintf (f, "\t.thumb_func\n");
28004 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28005
28006 return "";
28007 }
28008
28009 /* Handle the case of a double word load into a low register from
28010 a computed memory address. The computed address may involve a
28011 register which is overwritten by the load. */
28012 const char *
28013 thumb_load_double_from_address (rtx *operands)
28014 {
28015 rtx addr;
28016 rtx base;
28017 rtx offset;
28018 rtx arg1;
28019 rtx arg2;
28020
28021 gcc_assert (REG_P (operands[0]));
28022 gcc_assert (MEM_P (operands[1]));
28023
28024 /* Get the memory address. */
28025 addr = XEXP (operands[1], 0);
28026
28027 /* Work out how the memory address is computed. */
28028 switch (GET_CODE (addr))
28029 {
28030 case REG:
28031 operands[2] = adjust_address (operands[1], SImode, 4);
28032
28033 if (REGNO (operands[0]) == REGNO (addr))
28034 {
28035 output_asm_insn ("ldr\t%H0, %2", operands);
28036 output_asm_insn ("ldr\t%0, %1", operands);
28037 }
28038 else
28039 {
28040 output_asm_insn ("ldr\t%0, %1", operands);
28041 output_asm_insn ("ldr\t%H0, %2", operands);
28042 }
28043 break;
28044
28045 case CONST:
28046 /* Compute <address> + 4 for the high order load. */
28047 operands[2] = adjust_address (operands[1], SImode, 4);
28048
28049 output_asm_insn ("ldr\t%0, %1", operands);
28050 output_asm_insn ("ldr\t%H0, %2", operands);
28051 break;
28052
28053 case PLUS:
28054 arg1 = XEXP (addr, 0);
28055 arg2 = XEXP (addr, 1);
28056
28057 if (CONSTANT_P (arg1))
28058 base = arg2, offset = arg1;
28059 else
28060 base = arg1, offset = arg2;
28061
28062 gcc_assert (REG_P (base));
28063
28064 /* Catch the case of <address> = <reg> + <reg> */
28065 if (REG_P (offset))
28066 {
28067 int reg_offset = REGNO (offset);
28068 int reg_base = REGNO (base);
28069 int reg_dest = REGNO (operands[0]);
28070
28071 /* Add the base and offset registers together into the
28072 higher destination register. */
28073 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28074 reg_dest + 1, reg_base, reg_offset);
28075
28076 /* Load the lower destination register from the address in
28077 the higher destination register. */
28078 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28079 reg_dest, reg_dest + 1);
28080
28081 /* Load the higher destination register from its own address
28082 plus 4. */
28083 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28084 reg_dest + 1, reg_dest + 1);
28085 }
28086 else
28087 {
28088 /* Compute <address> + 4 for the high order load. */
28089 operands[2] = adjust_address (operands[1], SImode, 4);
28090
28091 /* If the computed address is held in the low order register
28092 then load the high order register first, otherwise always
28093 load the low order register first. */
28094 if (REGNO (operands[0]) == REGNO (base))
28095 {
28096 output_asm_insn ("ldr\t%H0, %2", operands);
28097 output_asm_insn ("ldr\t%0, %1", operands);
28098 }
28099 else
28100 {
28101 output_asm_insn ("ldr\t%0, %1", operands);
28102 output_asm_insn ("ldr\t%H0, %2", operands);
28103 }
28104 }
28105 break;
28106
28107 case LABEL_REF:
28108 /* With no registers to worry about we can just load the value
28109 directly. */
28110 operands[2] = adjust_address (operands[1], SImode, 4);
28111
28112 output_asm_insn ("ldr\t%H0, %2", operands);
28113 output_asm_insn ("ldr\t%0, %1", operands);
28114 break;
28115
28116 default:
28117 gcc_unreachable ();
28118 }
28119
28120 return "";
28121 }
28122
28123 const char *
28124 thumb_output_move_mem_multiple (int n, rtx *operands)
28125 {
28126 switch (n)
28127 {
28128 case 2:
28129 if (REGNO (operands[4]) > REGNO (operands[5]))
28130 std::swap (operands[4], operands[5]);
28131
28132 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28133 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28134 break;
28135
28136 case 3:
28137 if (REGNO (operands[4]) > REGNO (operands[5]))
28138 std::swap (operands[4], operands[5]);
28139 if (REGNO (operands[5]) > REGNO (operands[6]))
28140 std::swap (operands[5], operands[6]);
28141 if (REGNO (operands[4]) > REGNO (operands[5]))
28142 std::swap (operands[4], operands[5]);
28143
28144 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28145 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28146 break;
28147
28148 default:
28149 gcc_unreachable ();
28150 }
28151
28152 return "";
28153 }
28154
28155 /* Output a call-via instruction for thumb state. */
28156 const char *
28157 thumb_call_via_reg (rtx reg)
28158 {
28159 int regno = REGNO (reg);
28160 rtx *labelp;
28161
28162 gcc_assert (regno < LR_REGNUM);
28163
28164 /* If we are in the normal text section we can use a single instance
28165 per compilation unit. If we are doing function sections, then we need
28166 an entry per section, since we can't rely on reachability. */
28167 if (in_section == text_section)
28168 {
28169 thumb_call_reg_needed = 1;
28170
28171 if (thumb_call_via_label[regno] == NULL)
28172 thumb_call_via_label[regno] = gen_label_rtx ();
28173 labelp = thumb_call_via_label + regno;
28174 }
28175 else
28176 {
28177 if (cfun->machine->call_via[regno] == NULL)
28178 cfun->machine->call_via[regno] = gen_label_rtx ();
28179 labelp = cfun->machine->call_via + regno;
28180 }
28181
28182 output_asm_insn ("bl\t%a0", labelp);
28183 return "";
28184 }
28185
28186 /* Routines for generating rtl. */
28187 void
28188 thumb_expand_cpymemqi (rtx *operands)
28189 {
28190 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28191 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28192 HOST_WIDE_INT len = INTVAL (operands[2]);
28193 HOST_WIDE_INT offset = 0;
28194
28195 while (len >= 12)
28196 {
28197 emit_insn (gen_cpymem12b (out, in, out, in));
28198 len -= 12;
28199 }
28200
28201 if (len >= 8)
28202 {
28203 emit_insn (gen_cpymem8b (out, in, out, in));
28204 len -= 8;
28205 }
28206
28207 if (len >= 4)
28208 {
28209 rtx reg = gen_reg_rtx (SImode);
28210 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28211 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28212 len -= 4;
28213 offset += 4;
28214 }
28215
28216 if (len >= 2)
28217 {
28218 rtx reg = gen_reg_rtx (HImode);
28219 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28220 plus_constant (Pmode, in,
28221 offset))));
28222 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28223 offset)),
28224 reg));
28225 len -= 2;
28226 offset += 2;
28227 }
28228
28229 if (len)
28230 {
28231 rtx reg = gen_reg_rtx (QImode);
28232 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28233 plus_constant (Pmode, in,
28234 offset))));
28235 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28236 offset)),
28237 reg));
28238 }
28239 }
28240
28241 void
28242 thumb_reload_out_hi (rtx *operands)
28243 {
28244 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28245 }
28246
28247 /* Return the length of a function name prefix
28248 that starts with the character 'c'. */
28249 static int
28250 arm_get_strip_length (int c)
28251 {
28252 switch (c)
28253 {
28254 ARM_NAME_ENCODING_LENGTHS
28255 default: return 0;
28256 }
28257 }
28258
28259 /* Return a pointer to a function's name with any
28260 and all prefix encodings stripped from it. */
28261 const char *
28262 arm_strip_name_encoding (const char *name)
28263 {
28264 int skip;
28265
28266 while ((skip = arm_get_strip_length (* name)))
28267 name += skip;
28268
28269 return name;
28270 }
28271
28272 /* If there is a '*' anywhere in the name's prefix, then
28273 emit the stripped name verbatim, otherwise prepend an
28274 underscore if leading underscores are being used. */
28275 void
28276 arm_asm_output_labelref (FILE *stream, const char *name)
28277 {
28278 int skip;
28279 int verbatim = 0;
28280
28281 while ((skip = arm_get_strip_length (* name)))
28282 {
28283 verbatim |= (*name == '*');
28284 name += skip;
28285 }
28286
28287 if (verbatim)
28288 fputs (name, stream);
28289 else
28290 asm_fprintf (stream, "%U%s", name);
28291 }
28292
28293 /* This function is used to emit an EABI tag and its associated value.
28294 We emit the numerical value of the tag in case the assembler does not
28295 support textual tags. (Eg gas prior to 2.20). If requested we include
28296 the tag name in a comment so that anyone reading the assembler output
28297 will know which tag is being set.
28298
28299 This function is not static because arm-c.cc needs it too. */
28300
28301 void
28302 arm_emit_eabi_attribute (const char *name, int num, int val)
28303 {
28304 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28305 if (flag_verbose_asm || flag_debug_asm)
28306 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28307 asm_fprintf (asm_out_file, "\n");
28308 }
28309
28310 /* This function is used to print CPU tuning information as comment
28311 in assembler file. Pointers are not printed for now. */
28312
28313 void
28314 arm_print_tune_info (void)
28315 {
28316 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28317 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28318 current_tune->constant_limit);
28319 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28320 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28321 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28322 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28323 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28324 "prefetch.l1_cache_size:\t%d\n",
28325 current_tune->prefetch.l1_cache_size);
28326 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28327 "prefetch.l1_cache_line_size:\t%d\n",
28328 current_tune->prefetch.l1_cache_line_size);
28329 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28330 "prefer_constant_pool:\t%d\n",
28331 (int) current_tune->prefer_constant_pool);
28332 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28333 "branch_cost:\t(s:speed, p:predictable)\n");
28334 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28335 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28336 current_tune->branch_cost (false, false));
28337 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28338 current_tune->branch_cost (false, true));
28339 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28340 current_tune->branch_cost (true, false));
28341 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28342 current_tune->branch_cost (true, true));
28343 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28344 "prefer_ldrd_strd:\t%d\n",
28345 (int) current_tune->prefer_ldrd_strd);
28346 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28347 "logical_op_non_short_circuit:\t[%d,%d]\n",
28348 (int) current_tune->logical_op_non_short_circuit_thumb,
28349 (int) current_tune->logical_op_non_short_circuit_arm);
28350 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28351 "disparage_flag_setting_t16_encodings:\t%d\n",
28352 (int) current_tune->disparage_flag_setting_t16_encodings);
28353 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28354 "string_ops_prefer_neon:\t%d\n",
28355 (int) current_tune->string_ops_prefer_neon);
28356 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28357 "max_insns_inline_memset:\t%d\n",
28358 current_tune->max_insns_inline_memset);
28359 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28360 current_tune->fusible_ops);
28361 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28362 (int) current_tune->sched_autopref);
28363 }
28364
28365 /* The last set of target options used to emit .arch directives, etc. This
28366 could be a function-local static if it were not required to expose it as a
28367 root to the garbage collector. */
28368 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28369
28370 /* Print .arch and .arch_extension directives corresponding to the
28371 current architecture configuration. */
28372 static void
28373 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28374 {
28375 arm_build_target build_target;
28376 /* If the target options haven't changed since the last time we were called
28377 there is nothing to do. This should be sufficient to suppress the
28378 majority of redundant work. */
28379 if (last_asm_targ_options == targ_options)
28380 return;
28381
28382 last_asm_targ_options = targ_options;
28383
28384 build_target.isa = sbitmap_alloc (isa_num_bits);
28385 arm_configure_build_target (&build_target, targ_options, false);
28386
28387 if (build_target.core_name
28388 && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28389 {
28390 const char* truncated_name
28391 = arm_rewrite_selected_cpu (build_target.core_name);
28392 asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28393 }
28394
28395 const arch_option *arch
28396 = arm_parse_arch_option_name (all_architectures, "-march",
28397 build_target.arch_name);
28398 auto_sbitmap opt_bits (isa_num_bits);
28399
28400 gcc_assert (arch);
28401
28402 if (strcmp (build_target.arch_name, "armv7ve") == 0)
28403 {
28404 /* Keep backward compatability for assemblers which don't support
28405 armv7ve. Fortunately, none of the following extensions are reset
28406 by a .fpu directive. */
28407 asm_fprintf (stream, "\t.arch armv7-a\n");
28408 asm_fprintf (stream, "\t.arch_extension virt\n");
28409 asm_fprintf (stream, "\t.arch_extension idiv\n");
28410 asm_fprintf (stream, "\t.arch_extension sec\n");
28411 asm_fprintf (stream, "\t.arch_extension mp\n");
28412 }
28413 else
28414 asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28415
28416 /* The .fpu directive will reset any architecture extensions from the
28417 assembler that relate to the fp/vector extensions. So put this out before
28418 any .arch_extension directives. */
28419 const char *fpu_name = (TARGET_SOFT_FLOAT
28420 ? "softvfp"
28421 : arm_identify_fpu_from_isa (build_target.isa));
28422 asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28423
28424 if (!arch->common.extensions)
28425 return;
28426
28427 for (const struct cpu_arch_extension *opt = arch->common.extensions;
28428 opt->name != NULL;
28429 opt++)
28430 {
28431 if (!opt->remove)
28432 {
28433 arm_initialize_isa (opt_bits, opt->isa_bits);
28434
28435 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28436 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28437 floating point instructions is disabled. So the following check
28438 restricts the printing of ".arch_extension mve" and
28439 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28440 this special behaviour because the feature bit "mve" and
28441 "mve_float" are not part of "fpu bits", so they are not cleared
28442 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28443 TARGET_HAVE_MVE_FLOAT are disabled. */
28444 if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28445 || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28446 && !TARGET_HAVE_MVE_FLOAT))
28447 continue;
28448
28449 /* If every feature bit of this option is set in the target ISA
28450 specification, print out the option name. However, don't print
28451 anything if all the bits are part of the FPU specification. */
28452 if (bitmap_subset_p (opt_bits, build_target.isa)
28453 && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28454 asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28455 }
28456 }
28457 }
28458
28459 static void
28460 arm_file_start (void)
28461 {
28462 int val;
28463
28464 arm_print_asm_arch_directives
28465 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28466
28467 if (TARGET_BPABI)
28468 {
28469 /* If we have a named cpu, but we the assembler does not support that
28470 name via .cpu, put out a cpu name attribute; but don't do this if the
28471 name starts with the fictitious prefix, 'generic'. */
28472 if (arm_active_target.core_name
28473 && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28474 && !startswith (arm_active_target.core_name, "generic"))
28475 {
28476 const char* truncated_name
28477 = arm_rewrite_selected_cpu (arm_active_target.core_name);
28478 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28479 asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28480 truncated_name);
28481 }
28482
28483 if (print_tune_info)
28484 arm_print_tune_info ();
28485
28486 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28487 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28488
28489 if (TARGET_HARD_FLOAT_ABI)
28490 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28491
28492 /* Some of these attributes only apply when the corresponding features
28493 are used. However we don't have any easy way of figuring this out.
28494 Conservatively record the setting that would have been used. */
28495
28496 if (flag_rounding_math)
28497 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28498
28499 if (!flag_unsafe_math_optimizations)
28500 {
28501 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28502 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28503 }
28504 if (flag_signaling_nans)
28505 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28506
28507 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28508 flag_finite_math_only ? 1 : 3);
28509
28510 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28511 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28512 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28513 flag_short_enums ? 1 : 2);
28514
28515 /* Tag_ABI_optimization_goals. */
28516 if (optimize_size)
28517 val = 4;
28518 else if (optimize >= 2)
28519 val = 2;
28520 else if (optimize)
28521 val = 1;
28522 else
28523 val = 6;
28524 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28525
28526 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28527 unaligned_access);
28528
28529 if (arm_fp16_format)
28530 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28531 (int) arm_fp16_format);
28532
28533 if (arm_lang_output_object_attributes_hook)
28534 arm_lang_output_object_attributes_hook();
28535 }
28536
28537 default_file_start ();
28538 }
28539
28540 static void
28541 arm_file_end (void)
28542 {
28543 int regno;
28544
28545 /* Just in case the last function output in the assembler had non-default
28546 architecture directives, we force the assembler state back to the default
28547 set, so that any 'calculated' build attributes are based on the default
28548 options rather than the special options for that function. */
28549 arm_print_asm_arch_directives
28550 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28551
28552 if (NEED_INDICATE_EXEC_STACK)
28553 /* Add .note.GNU-stack. */
28554 file_end_indicate_exec_stack ();
28555
28556 if (! thumb_call_reg_needed)
28557 return;
28558
28559 switch_to_section (text_section);
28560 asm_fprintf (asm_out_file, "\t.code 16\n");
28561 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28562
28563 for (regno = 0; regno < LR_REGNUM; regno++)
28564 {
28565 rtx label = thumb_call_via_label[regno];
28566
28567 if (label != 0)
28568 {
28569 targetm.asm_out.internal_label (asm_out_file, "L",
28570 CODE_LABEL_NUMBER (label));
28571 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28572 }
28573 }
28574 }
28575
28576 #ifndef ARM_PE
28577 /* Symbols in the text segment can be accessed without indirecting via the
28578 constant pool; it may take an extra binary operation, but this is still
28579 faster than indirecting via memory. Don't do this when not optimizing,
28580 since we won't be calculating al of the offsets necessary to do this
28581 simplification. */
28582
28583 static void
28584 arm_encode_section_info (tree decl, rtx rtl, int first)
28585 {
28586 if (optimize > 0 && TREE_CONSTANT (decl))
28587 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28588
28589 default_encode_section_info (decl, rtl, first);
28590 }
28591 #endif /* !ARM_PE */
28592
28593 static void
28594 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28595 {
28596 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28597 && !strcmp (prefix, "L"))
28598 {
28599 arm_ccfsm_state = 0;
28600 arm_target_insn = NULL;
28601 }
28602 default_internal_label (stream, prefix, labelno);
28603 }
28604
28605 /* Define classes to generate code as RTL or output asm to a file.
28606 Using templates then allows to use the same code to output code
28607 sequences in the two formats. */
28608 class thumb1_const_rtl
28609 {
28610 public:
28611 thumb1_const_rtl (rtx dst) : dst (dst) {}
28612
28613 void mov (HOST_WIDE_INT val)
28614 {
28615 emit_set_insn (dst, GEN_INT (val));
28616 }
28617
28618 void add (HOST_WIDE_INT val)
28619 {
28620 emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28621 }
28622
28623 void ashift (HOST_WIDE_INT shift)
28624 {
28625 emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28626 }
28627
28628 void neg ()
28629 {
28630 emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28631 }
28632
28633 private:
28634 rtx dst;
28635 };
28636
28637 class thumb1_const_print
28638 {
28639 public:
28640 thumb1_const_print (FILE *f, int regno)
28641 {
28642 t_file = f;
28643 dst_regname = reg_names[regno];
28644 }
28645
28646 void mov (HOST_WIDE_INT val)
28647 {
28648 asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28649 dst_regname, val);
28650 }
28651
28652 void add (HOST_WIDE_INT val)
28653 {
28654 asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28655 dst_regname, val);
28656 }
28657
28658 void ashift (HOST_WIDE_INT shift)
28659 {
28660 asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28661 dst_regname, shift);
28662 }
28663
28664 void neg ()
28665 {
28666 asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
28667 }
28668
28669 private:
28670 FILE *t_file;
28671 const char *dst_regname;
28672 };
28673
28674 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28675 Avoid generating useless code when one of the bytes is zero. */
28676 template <class T>
28677 void
28678 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
28679 {
28680 bool mov_done_p = false;
28681 unsigned HOST_WIDE_INT val = op1;
28682 int shift = 0;
28683 int i;
28684
28685 gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
28686
28687 if (val <= 255)
28688 {
28689 dst.mov (val);
28690 return;
28691 }
28692
28693 /* For negative numbers with the first nine bits set, build the
28694 opposite of OP1, then negate it, it's generally shorter and not
28695 longer. */
28696 if ((val & 0xFF800000) == 0xFF800000)
28697 {
28698 thumb1_gen_const_int_1 (dst, -op1);
28699 dst.neg ();
28700 return;
28701 }
28702
28703 /* In the general case, we need 7 instructions to build
28704 a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28705 do better if VAL is small enough, or
28706 right-shiftable by a suitable amount. If the
28707 right-shift enables to encode at least one less byte,
28708 it's worth it: we save a adds and a lsls at the
28709 expense of a final lsls. */
28710 int final_shift = number_of_first_bit_set (val);
28711
28712 int leading_zeroes = clz_hwi (val);
28713 int number_of_bytes_needed
28714 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
28715 / BITS_PER_UNIT) + 1;
28716 int number_of_bytes_needed2
28717 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
28718 / BITS_PER_UNIT) + 1;
28719
28720 if (number_of_bytes_needed2 < number_of_bytes_needed)
28721 val >>= final_shift;
28722 else
28723 final_shift = 0;
28724
28725 /* If we are in a very small range, we can use either a single movs
28726 or movs+adds. */
28727 if (val <= 510)
28728 {
28729 if (val > 255)
28730 {
28731 unsigned HOST_WIDE_INT high = val - 255;
28732
28733 dst.mov (high);
28734 dst.add (255);
28735 }
28736 else
28737 dst.mov (val);
28738
28739 if (final_shift > 0)
28740 dst.ashift (final_shift);
28741 }
28742 else
28743 {
28744 /* General case, emit upper 3 bytes as needed. */
28745 for (i = 0; i < 3; i++)
28746 {
28747 unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
28748
28749 if (byte)
28750 {
28751 /* We are about to emit new bits, stop accumulating a
28752 shift amount, and left-shift only if we have already
28753 emitted some upper bits. */
28754 if (mov_done_p)
28755 {
28756 dst.ashift (shift);
28757 dst.add (byte);
28758 }
28759 else
28760 dst.mov (byte);
28761
28762 /* Stop accumulating shift amount since we've just
28763 emitted some bits. */
28764 shift = 0;
28765
28766 mov_done_p = true;
28767 }
28768
28769 if (mov_done_p)
28770 shift += 8;
28771 }
28772
28773 /* Emit lower byte. */
28774 if (!mov_done_p)
28775 dst.mov (val & 0xff);
28776 else
28777 {
28778 dst.ashift (shift);
28779 if (val & 0xff)
28780 dst.add (val & 0xff);
28781 }
28782
28783 if (final_shift > 0)
28784 dst.ashift (final_shift);
28785 }
28786 }
28787
28788 /* Proxies for thumb1.md, since the thumb1_const_print and
28789 thumb1_const_rtl classes are not exported. */
28790 void
28791 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
28792 {
28793 thumb1_const_rtl t (dst);
28794 thumb1_gen_const_int_1 (t, op1);
28795 }
28796
28797 void
28798 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
28799 {
28800 thumb1_const_print t (asm_out_file, REGNO (dst));
28801 thumb1_gen_const_int_1 (t, op1);
28802 }
28803
28804 /* Output code to add DELTA to the first argument, and then jump
28805 to FUNCTION. Used for C++ multiple inheritance. */
28806
28807 static void
28808 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28809 HOST_WIDE_INT, tree function)
28810 {
28811 static int thunk_label = 0;
28812 char label[256];
28813 char labelpc[256];
28814 int mi_delta = delta;
28815 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28816 int shift = 0;
28817 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28818 ? 1 : 0);
28819 if (mi_delta < 0)
28820 mi_delta = - mi_delta;
28821
28822 final_start_function (emit_barrier (), file, 1);
28823
28824 if (TARGET_THUMB1)
28825 {
28826 int labelno = thunk_label++;
28827 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28828 /* Thunks are entered in arm mode when available. */
28829 if (TARGET_THUMB1_ONLY)
28830 {
28831 /* push r3 so we can use it as a temporary. */
28832 /* TODO: Omit this save if r3 is not used. */
28833 fputs ("\tpush {r3}\n", file);
28834
28835 /* With -mpure-code, we cannot load the address from the
28836 constant pool: we build it explicitly. */
28837 if (target_pure_code)
28838 {
28839 fputs ("\tmovs\tr3, #:upper8_15:#", file);
28840 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28841 fputc ('\n', file);
28842 fputs ("\tlsls r3, #8\n", file);
28843 fputs ("\tadds\tr3, #:upper0_7:#", file);
28844 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28845 fputc ('\n', file);
28846 fputs ("\tlsls r3, #8\n", file);
28847 fputs ("\tadds\tr3, #:lower8_15:#", file);
28848 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28849 fputc ('\n', file);
28850 fputs ("\tlsls r3, #8\n", file);
28851 fputs ("\tadds\tr3, #:lower0_7:#", file);
28852 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28853 fputc ('\n', file);
28854 }
28855 else
28856 fputs ("\tldr\tr3, ", file);
28857 }
28858 else
28859 {
28860 fputs ("\tldr\tr12, ", file);
28861 }
28862
28863 if (!target_pure_code)
28864 {
28865 assemble_name (file, label);
28866 fputc ('\n', file);
28867 }
28868
28869 if (flag_pic)
28870 {
28871 /* If we are generating PIC, the ldr instruction below loads
28872 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28873 the address of the add + 8, so we have:
28874
28875 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28876 = target + 1.
28877
28878 Note that we have "+ 1" because some versions of GNU ld
28879 don't set the low bit of the result for R_ARM_REL32
28880 relocations against thumb function symbols.
28881 On ARMv6M this is +4, not +8. */
28882 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28883 assemble_name (file, labelpc);
28884 fputs (":\n", file);
28885 if (TARGET_THUMB1_ONLY)
28886 {
28887 /* This is 2 insns after the start of the thunk, so we know it
28888 is 4-byte aligned. */
28889 fputs ("\tadd\tr3, pc, r3\n", file);
28890 fputs ("\tmov r12, r3\n", file);
28891 }
28892 else
28893 fputs ("\tadd\tr12, pc, r12\n", file);
28894 }
28895 else if (TARGET_THUMB1_ONLY)
28896 fputs ("\tmov r12, r3\n", file);
28897 }
28898 if (TARGET_THUMB1_ONLY)
28899 {
28900 if (mi_delta > 255)
28901 {
28902 /* With -mpure-code, we cannot load MI_DELTA from the
28903 constant pool: we build it explicitly. */
28904 if (target_pure_code)
28905 {
28906 thumb1_const_print r3 (file, 3);
28907 thumb1_gen_const_int_1 (r3, mi_delta);
28908 }
28909 else
28910 {
28911 fputs ("\tldr\tr3, ", file);
28912 assemble_name (file, label);
28913 fputs ("+4\n", file);
28914 }
28915 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
28916 mi_op, this_regno, this_regno);
28917 }
28918 else if (mi_delta != 0)
28919 {
28920 /* Thumb1 unified syntax requires s suffix in instruction name when
28921 one of the operands is immediate. */
28922 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
28923 mi_op, this_regno, this_regno,
28924 mi_delta);
28925 }
28926 }
28927 else
28928 {
28929 /* TODO: Use movw/movt for large constants when available. */
28930 while (mi_delta != 0)
28931 {
28932 if ((mi_delta & (3 << shift)) == 0)
28933 shift += 2;
28934 else
28935 {
28936 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28937 mi_op, this_regno, this_regno,
28938 mi_delta & (0xff << shift));
28939 mi_delta &= ~(0xff << shift);
28940 shift += 8;
28941 }
28942 }
28943 }
28944 if (TARGET_THUMB1)
28945 {
28946 if (TARGET_THUMB1_ONLY)
28947 fputs ("\tpop\t{r3}\n", file);
28948
28949 fprintf (file, "\tbx\tr12\n");
28950
28951 /* With -mpure-code, we don't need to emit literals for the
28952 function address and delta since we emitted code to build
28953 them. */
28954 if (!target_pure_code)
28955 {
28956 ASM_OUTPUT_ALIGN (file, 2);
28957 assemble_name (file, label);
28958 fputs (":\n", file);
28959 if (flag_pic)
28960 {
28961 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28962 rtx tem = XEXP (DECL_RTL (function), 0);
28963 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28964 pipeline offset is four rather than eight. Adjust the offset
28965 accordingly. */
28966 tem = plus_constant (GET_MODE (tem), tem,
28967 TARGET_THUMB1_ONLY ? -3 : -7);
28968 tem = gen_rtx_MINUS (GET_MODE (tem),
28969 tem,
28970 gen_rtx_SYMBOL_REF (Pmode,
28971 ggc_strdup (labelpc)));
28972 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28973 }
28974 else
28975 /* Output ".word .LTHUNKn". */
28976 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28977
28978 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28979 assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
28980 }
28981 }
28982 else
28983 {
28984 fputs ("\tb\t", file);
28985 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28986 if (NEED_PLT_RELOC)
28987 fputs ("(PLT)", file);
28988 fputc ('\n', file);
28989 }
28990
28991 final_end_function ();
28992 }
28993
28994 /* MI thunk handling for TARGET_32BIT. */
28995
28996 static void
28997 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28998 HOST_WIDE_INT vcall_offset, tree function)
28999 {
29000 const bool long_call_p = arm_is_long_call_p (function);
29001
29002 /* On ARM, this_regno is R0 or R1 depending on
29003 whether the function returns an aggregate or not.
29004 */
29005 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
29006 function)
29007 ? R1_REGNUM : R0_REGNUM);
29008
29009 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
29010 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
29011 reload_completed = 1;
29012 emit_note (NOTE_INSN_PROLOGUE_END);
29013
29014 /* Add DELTA to THIS_RTX. */
29015 if (delta != 0)
29016 arm_split_constant (PLUS, Pmode, NULL_RTX,
29017 delta, this_rtx, this_rtx, false);
29018
29019 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
29020 if (vcall_offset != 0)
29021 {
29022 /* Load *THIS_RTX. */
29023 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
29024 /* Compute *THIS_RTX + VCALL_OFFSET. */
29025 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
29026 false);
29027 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
29028 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
29029 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
29030 }
29031
29032 /* Generate a tail call to the target function. */
29033 if (!TREE_USED (function))
29034 {
29035 assemble_external (function);
29036 TREE_USED (function) = 1;
29037 }
29038 rtx funexp = XEXP (DECL_RTL (function), 0);
29039 if (long_call_p)
29040 {
29041 emit_move_insn (temp, funexp);
29042 funexp = temp;
29043 }
29044 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29045 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
29046 SIBLING_CALL_P (insn) = 1;
29047 emit_barrier ();
29048
29049 /* Indirect calls require a bit of fixup in PIC mode. */
29050 if (long_call_p)
29051 {
29052 split_all_insns_noflow ();
29053 arm_reorg ();
29054 }
29055
29056 insn = get_insns ();
29057 shorten_branches (insn);
29058 final_start_function (insn, file, 1);
29059 final (insn, file, 1);
29060 final_end_function ();
29061
29062 /* Stop pretending this is a post-reload pass. */
29063 reload_completed = 0;
29064 }
29065
29066 /* Output code to add DELTA to the first argument, and then jump
29067 to FUNCTION. Used for C++ multiple inheritance. */
29068
29069 static void
29070 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
29071 HOST_WIDE_INT vcall_offset, tree function)
29072 {
29073 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
29074
29075 assemble_start_function (thunk, fnname);
29076 if (TARGET_32BIT)
29077 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
29078 else
29079 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
29080 assemble_end_function (thunk, fnname);
29081 }
29082
29083 int
29084 arm_emit_vector_const (FILE *file, rtx x)
29085 {
29086 int i;
29087 const char * pattern;
29088
29089 gcc_assert (GET_CODE (x) == CONST_VECTOR);
29090
29091 switch (GET_MODE (x))
29092 {
29093 case E_V2SImode: pattern = "%08x"; break;
29094 case E_V4HImode: pattern = "%04x"; break;
29095 case E_V8QImode: pattern = "%02x"; break;
29096 default: gcc_unreachable ();
29097 }
29098
29099 fprintf (file, "0x");
29100 for (i = CONST_VECTOR_NUNITS (x); i--;)
29101 {
29102 rtx element;
29103
29104 element = CONST_VECTOR_ELT (x, i);
29105 fprintf (file, pattern, INTVAL (element));
29106 }
29107
29108 return 1;
29109 }
29110
29111 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29112 HFmode constant pool entries are actually loaded with ldr. */
29113 void
29114 arm_emit_fp16_const (rtx c)
29115 {
29116 long bits;
29117
29118 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
29119 if (WORDS_BIG_ENDIAN)
29120 assemble_zeros (2);
29121 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
29122 if (!WORDS_BIG_ENDIAN)
29123 assemble_zeros (2);
29124 }
29125
29126 const char *
29127 arm_output_load_gr (rtx *operands)
29128 {
29129 rtx reg;
29130 rtx offset;
29131 rtx wcgr;
29132 rtx sum;
29133
29134 if (!MEM_P (operands [1])
29135 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
29136 || !REG_P (reg = XEXP (sum, 0))
29137 || !CONST_INT_P (offset = XEXP (sum, 1))
29138 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
29139 return "wldrw%?\t%0, %1";
29140
29141 /* Fix up an out-of-range load of a GR register. */
29142 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
29143 wcgr = operands[0];
29144 operands[0] = reg;
29145 output_asm_insn ("ldr%?\t%0, %1", operands);
29146
29147 operands[0] = wcgr;
29148 operands[1] = reg;
29149 output_asm_insn ("tmcr%?\t%0, %1", operands);
29150 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
29151
29152 return "";
29153 }
29154
29155 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29156
29157 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29158 named arg and all anonymous args onto the stack.
29159 XXX I know the prologue shouldn't be pushing registers, but it is faster
29160 that way. */
29161
29162 static void
29163 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
29164 const function_arg_info &arg,
29165 int *pretend_size,
29166 int second_time ATTRIBUTE_UNUSED)
29167 {
29168 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
29169 int nregs;
29170
29171 cfun->machine->uses_anonymous_args = 1;
29172 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
29173 {
29174 nregs = pcum->aapcs_ncrn;
29175 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
29176 && (nregs & 1))
29177 {
29178 int res = arm_needs_doubleword_align (arg.mode, arg.type);
29179 if (res < 0 && warn_psabi)
29180 inform (input_location, "parameter passing for argument of "
29181 "type %qT changed in GCC 7.1", arg.type);
29182 else if (res > 0)
29183 {
29184 nregs++;
29185 if (res > 1 && warn_psabi)
29186 inform (input_location,
29187 "parameter passing for argument of type "
29188 "%qT changed in GCC 9.1", arg.type);
29189 }
29190 }
29191 }
29192 else
29193 nregs = pcum->nregs;
29194
29195 if (nregs < NUM_ARG_REGS)
29196 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
29197 }
29198
29199 /* We can't rely on the caller doing the proper promotion when
29200 using APCS or ATPCS. */
29201
29202 static bool
29203 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
29204 {
29205 return !TARGET_AAPCS_BASED;
29206 }
29207
29208 static machine_mode
29209 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
29210 machine_mode mode,
29211 int *punsignedp ATTRIBUTE_UNUSED,
29212 const_tree fntype ATTRIBUTE_UNUSED,
29213 int for_return ATTRIBUTE_UNUSED)
29214 {
29215 if (GET_MODE_CLASS (mode) == MODE_INT
29216 && GET_MODE_SIZE (mode) < 4)
29217 return SImode;
29218
29219 return mode;
29220 }
29221
29222
29223 static bool
29224 arm_default_short_enums (void)
29225 {
29226 return ARM_DEFAULT_SHORT_ENUMS;
29227 }
29228
29229
29230 /* AAPCS requires that anonymous bitfields affect structure alignment. */
29231
29232 static bool
29233 arm_align_anon_bitfield (void)
29234 {
29235 return TARGET_AAPCS_BASED;
29236 }
29237
29238
29239 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
29240
29241 static tree
29242 arm_cxx_guard_type (void)
29243 {
29244 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
29245 }
29246
29247
29248 /* The EABI says test the least significant bit of a guard variable. */
29249
29250 static bool
29251 arm_cxx_guard_mask_bit (void)
29252 {
29253 return TARGET_AAPCS_BASED;
29254 }
29255
29256
29257 /* The EABI specifies that all array cookies are 8 bytes long. */
29258
29259 static tree
29260 arm_get_cookie_size (tree type)
29261 {
29262 tree size;
29263
29264 if (!TARGET_AAPCS_BASED)
29265 return default_cxx_get_cookie_size (type);
29266
29267 size = build_int_cst (sizetype, 8);
29268 return size;
29269 }
29270
29271
29272 /* The EABI says that array cookies should also contain the element size. */
29273
29274 static bool
29275 arm_cookie_has_size (void)
29276 {
29277 return TARGET_AAPCS_BASED;
29278 }
29279
29280
29281 /* The EABI says constructors and destructors should return a pointer to
29282 the object constructed/destroyed. */
29283
29284 static bool
29285 arm_cxx_cdtor_returns_this (void)
29286 {
29287 return TARGET_AAPCS_BASED;
29288 }
29289
29290 /* The EABI says that an inline function may never be the key
29291 method. */
29292
29293 static bool
29294 arm_cxx_key_method_may_be_inline (void)
29295 {
29296 return !TARGET_AAPCS_BASED;
29297 }
29298
29299 static void
29300 arm_cxx_determine_class_data_visibility (tree decl)
29301 {
29302 if (!TARGET_AAPCS_BASED
29303 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
29304 return;
29305
29306 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29307 is exported. However, on systems without dynamic vague linkage,
29308 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
29309 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
29310 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
29311 else
29312 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
29313 DECL_VISIBILITY_SPECIFIED (decl) = 1;
29314 }
29315
29316 static bool
29317 arm_cxx_class_data_always_comdat (void)
29318 {
29319 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29320 vague linkage if the class has no key function. */
29321 return !TARGET_AAPCS_BASED;
29322 }
29323
29324
29325 /* The EABI says __aeabi_atexit should be used to register static
29326 destructors. */
29327
29328 static bool
29329 arm_cxx_use_aeabi_atexit (void)
29330 {
29331 return TARGET_AAPCS_BASED;
29332 }
29333
29334
29335 void
29336 arm_set_return_address (rtx source, rtx scratch)
29337 {
29338 arm_stack_offsets *offsets;
29339 HOST_WIDE_INT delta;
29340 rtx addr, mem;
29341 unsigned long saved_regs;
29342
29343 offsets = arm_get_frame_offsets ();
29344 saved_regs = offsets->saved_regs_mask;
29345
29346 if ((saved_regs & (1 << LR_REGNUM)) == 0)
29347 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29348 else
29349 {
29350 if (frame_pointer_needed)
29351 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29352 else
29353 {
29354 /* LR will be the first saved register. */
29355 delta = offsets->outgoing_args - (offsets->frame + 4);
29356
29357
29358 if (delta >= 4096)
29359 {
29360 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29361 GEN_INT (delta & ~4095)));
29362 addr = scratch;
29363 delta &= 4095;
29364 }
29365 else
29366 addr = stack_pointer_rtx;
29367
29368 addr = plus_constant (Pmode, addr, delta);
29369 }
29370
29371 /* The store needs to be marked to prevent DSE from deleting
29372 it as dead if it is based on fp. */
29373 mem = gen_frame_mem (Pmode, addr);
29374 MEM_VOLATILE_P (mem) = true;
29375 emit_move_insn (mem, source);
29376 }
29377 }
29378
29379
29380 void
29381 thumb_set_return_address (rtx source, rtx scratch)
29382 {
29383 arm_stack_offsets *offsets;
29384 HOST_WIDE_INT delta;
29385 HOST_WIDE_INT limit;
29386 int reg;
29387 rtx addr, mem;
29388 unsigned long mask;
29389
29390 emit_use (source);
29391
29392 offsets = arm_get_frame_offsets ();
29393 mask = offsets->saved_regs_mask;
29394 if (mask & (1 << LR_REGNUM))
29395 {
29396 limit = 1024;
29397 /* Find the saved regs. */
29398 if (frame_pointer_needed)
29399 {
29400 delta = offsets->soft_frame - offsets->saved_args;
29401 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29402 if (TARGET_THUMB1)
29403 limit = 128;
29404 }
29405 else
29406 {
29407 delta = offsets->outgoing_args - offsets->saved_args;
29408 reg = SP_REGNUM;
29409 }
29410 /* Allow for the stack frame. */
29411 if (TARGET_THUMB1 && TARGET_BACKTRACE)
29412 delta -= 16;
29413 /* The link register is always the first saved register. */
29414 delta -= 4;
29415
29416 /* Construct the address. */
29417 addr = gen_rtx_REG (SImode, reg);
29418 if (delta > limit)
29419 {
29420 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29421 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29422 addr = scratch;
29423 }
29424 else
29425 addr = plus_constant (Pmode, addr, delta);
29426
29427 /* The store needs to be marked to prevent DSE from deleting
29428 it as dead if it is based on fp. */
29429 mem = gen_frame_mem (Pmode, addr);
29430 MEM_VOLATILE_P (mem) = true;
29431 emit_move_insn (mem, source);
29432 }
29433 else
29434 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29435 }
29436
29437 /* Implements target hook vector_mode_supported_p. */
29438 bool
29439 arm_vector_mode_supported_p (machine_mode mode)
29440 {
29441 /* Neon also supports V2SImode, etc. listed in the clause below. */
29442 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29443 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29444 || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29445 || mode == V8BFmode))
29446 return true;
29447
29448 if ((TARGET_NEON || TARGET_IWMMXT)
29449 && ((mode == V2SImode)
29450 || (mode == V4HImode)
29451 || (mode == V8QImode)))
29452 return true;
29453
29454 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29455 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29456 || mode == V2HAmode))
29457 return true;
29458
29459 if (TARGET_HAVE_MVE
29460 && (mode == V2DImode || mode == V4SImode || mode == V8HImode
29461 || mode == V16QImode
29462 || mode == V16BImode || mode == V8BImode || mode == V4BImode))
29463 return true;
29464
29465 if (TARGET_HAVE_MVE_FLOAT
29466 && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29467 return true;
29468
29469 return false;
29470 }
29471
29472 /* Implements target hook array_mode_supported_p. */
29473
29474 static bool
29475 arm_array_mode_supported_p (machine_mode mode,
29476 unsigned HOST_WIDE_INT nelems)
29477 {
29478 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29479 for now, as the lane-swapping logic needs to be extended in the expanders.
29480 See PR target/82518. */
29481 if (TARGET_NEON && !BYTES_BIG_ENDIAN
29482 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29483 && (nelems >= 2 && nelems <= 4))
29484 return true;
29485
29486 if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29487 && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29488 return true;
29489
29490 return false;
29491 }
29492
29493 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29494 registers when autovectorizing for Neon, at least until multiple vector
29495 widths are supported properly by the middle-end. */
29496
29497 static machine_mode
29498 arm_preferred_simd_mode (scalar_mode mode)
29499 {
29500 if (TARGET_NEON)
29501 switch (mode)
29502 {
29503 case E_HFmode:
29504 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29505 case E_SFmode:
29506 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29507 case E_SImode:
29508 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29509 case E_HImode:
29510 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29511 case E_QImode:
29512 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29513 case E_DImode:
29514 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29515 return V2DImode;
29516 break;
29517
29518 default:;
29519 }
29520
29521 if (TARGET_REALLY_IWMMXT)
29522 switch (mode)
29523 {
29524 case E_SImode:
29525 return V2SImode;
29526 case E_HImode:
29527 return V4HImode;
29528 case E_QImode:
29529 return V8QImode;
29530
29531 default:;
29532 }
29533
29534 if (TARGET_HAVE_MVE)
29535 switch (mode)
29536 {
29537 case E_QImode:
29538 return V16QImode;
29539 case E_HImode:
29540 return V8HImode;
29541 case E_SImode:
29542 return V4SImode;
29543
29544 default:;
29545 }
29546
29547 if (TARGET_HAVE_MVE_FLOAT)
29548 switch (mode)
29549 {
29550 case E_HFmode:
29551 return V8HFmode;
29552 case E_SFmode:
29553 return V4SFmode;
29554
29555 default:;
29556 }
29557
29558 return word_mode;
29559 }
29560
29561 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29562
29563 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29564 using r0-r4 for function arguments, r7 for the stack frame and don't have
29565 enough left over to do doubleword arithmetic. For Thumb-2 all the
29566 potentially problematic instructions accept high registers so this is not
29567 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29568 that require many low registers. */
29569 static bool
29570 arm_class_likely_spilled_p (reg_class_t rclass)
29571 {
29572 if ((TARGET_THUMB1 && rclass == LO_REGS)
29573 || rclass == CC_REG)
29574 return true;
29575
29576 return default_class_likely_spilled_p (rclass);
29577 }
29578
29579 /* Implements target hook small_register_classes_for_mode_p. */
29580 bool
29581 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29582 {
29583 return TARGET_THUMB1;
29584 }
29585
29586 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29587 ARM insns and therefore guarantee that the shift count is modulo 256.
29588 DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29589 guarantee no particular behavior for out-of-range counts. */
29590
29591 static unsigned HOST_WIDE_INT
29592 arm_shift_truncation_mask (machine_mode mode)
29593 {
29594 return mode == SImode ? 255 : 0;
29595 }
29596
29597
29598 /* Map internal gcc register numbers to DWARF2 register numbers. */
29599
29600 unsigned int
29601 arm_debugger_regno (unsigned int regno)
29602 {
29603 if (regno < 16)
29604 return regno;
29605
29606 if (IS_VFP_REGNUM (regno))
29607 {
29608 /* See comment in arm_dwarf_register_span. */
29609 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29610 return 64 + regno - FIRST_VFP_REGNUM;
29611 else
29612 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29613 }
29614
29615 if (IS_IWMMXT_GR_REGNUM (regno))
29616 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29617
29618 if (IS_IWMMXT_REGNUM (regno))
29619 return 112 + regno - FIRST_IWMMXT_REGNUM;
29620
29621 return DWARF_FRAME_REGISTERS;
29622 }
29623
29624 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29625 GCC models tham as 64 32-bit registers, so we need to describe this to
29626 the DWARF generation code. Other registers can use the default. */
29627 static rtx
29628 arm_dwarf_register_span (rtx rtl)
29629 {
29630 machine_mode mode;
29631 unsigned regno;
29632 rtx parts[16];
29633 int nregs;
29634 int i;
29635
29636 regno = REGNO (rtl);
29637 if (!IS_VFP_REGNUM (regno))
29638 return NULL_RTX;
29639
29640 /* XXX FIXME: The EABI defines two VFP register ranges:
29641 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29642 256-287: D0-D31
29643 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29644 corresponding D register. Until GDB supports this, we shall use the
29645 legacy encodings. We also use these encodings for D0-D15 for
29646 compatibility with older debuggers. */
29647 mode = GET_MODE (rtl);
29648 if (GET_MODE_SIZE (mode) < 8)
29649 return NULL_RTX;
29650
29651 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29652 {
29653 nregs = GET_MODE_SIZE (mode) / 4;
29654 for (i = 0; i < nregs; i += 2)
29655 if (TARGET_BIG_END)
29656 {
29657 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29658 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29659 }
29660 else
29661 {
29662 parts[i] = gen_rtx_REG (SImode, regno + i);
29663 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29664 }
29665 }
29666 else
29667 {
29668 nregs = GET_MODE_SIZE (mode) / 8;
29669 for (i = 0; i < nregs; i++)
29670 parts[i] = gen_rtx_REG (DImode, regno + i);
29671 }
29672
29673 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29674 }
29675
29676 #if ARM_UNWIND_INFO
29677 /* Emit unwind directives for a store-multiple instruction or stack pointer
29678 push during alignment.
29679 These should only ever be generated by the function prologue code, so
29680 expect them to have a particular form.
29681 The store-multiple instruction sometimes pushes pc as the last register,
29682 although it should not be tracked into unwind information, or for -Os
29683 sometimes pushes some dummy registers before first register that needs
29684 to be tracked in unwind information; such dummy registers are there just
29685 to avoid separate stack adjustment, and will not be restored in the
29686 epilogue. */
29687
29688 static void
29689 arm_unwind_emit_sequence (FILE * out_file, rtx p)
29690 {
29691 int i;
29692 HOST_WIDE_INT offset;
29693 HOST_WIDE_INT nregs;
29694 int reg_size;
29695 unsigned reg;
29696 unsigned lastreg;
29697 unsigned padfirst = 0, padlast = 0;
29698 rtx e;
29699
29700 e = XVECEXP (p, 0, 0);
29701 gcc_assert (GET_CODE (e) == SET);
29702
29703 /* First insn will adjust the stack pointer. */
29704 gcc_assert (GET_CODE (e) == SET
29705 && REG_P (SET_DEST (e))
29706 && REGNO (SET_DEST (e)) == SP_REGNUM
29707 && GET_CODE (SET_SRC (e)) == PLUS);
29708
29709 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29710 nregs = XVECLEN (p, 0) - 1;
29711 gcc_assert (nregs);
29712
29713 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29714 if (reg < 16)
29715 {
29716 /* For -Os dummy registers can be pushed at the beginning to
29717 avoid separate stack pointer adjustment. */
29718 e = XVECEXP (p, 0, 1);
29719 e = XEXP (SET_DEST (e), 0);
29720 if (GET_CODE (e) == PLUS)
29721 padfirst = INTVAL (XEXP (e, 1));
29722 gcc_assert (padfirst == 0 || optimize_size);
29723 /* The function prologue may also push pc, but not annotate it as it is
29724 never restored. We turn this into a stack pointer adjustment. */
29725 e = XVECEXP (p, 0, nregs);
29726 e = XEXP (SET_DEST (e), 0);
29727 if (GET_CODE (e) == PLUS)
29728 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29729 else
29730 padlast = offset - 4;
29731 gcc_assert (padlast == 0 || padlast == 4);
29732 if (padlast == 4)
29733 fprintf (out_file, "\t.pad #4\n");
29734 reg_size = 4;
29735 fprintf (out_file, "\t.save {");
29736 }
29737 else if (IS_VFP_REGNUM (reg))
29738 {
29739 reg_size = 8;
29740 fprintf (out_file, "\t.vsave {");
29741 }
29742 else
29743 /* Unknown register type. */
29744 gcc_unreachable ();
29745
29746 /* If the stack increment doesn't match the size of the saved registers,
29747 something has gone horribly wrong. */
29748 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29749
29750 offset = padfirst;
29751 lastreg = 0;
29752 /* The remaining insns will describe the stores. */
29753 for (i = 1; i <= nregs; i++)
29754 {
29755 /* Expect (set (mem <addr>) (reg)).
29756 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29757 e = XVECEXP (p, 0, i);
29758 gcc_assert (GET_CODE (e) == SET
29759 && MEM_P (SET_DEST (e))
29760 && REG_P (SET_SRC (e)));
29761
29762 reg = REGNO (SET_SRC (e));
29763 gcc_assert (reg >= lastreg);
29764
29765 if (i != 1)
29766 fprintf (out_file, ", ");
29767 /* We can't use %r for vfp because we need to use the
29768 double precision register names. */
29769 if (IS_VFP_REGNUM (reg))
29770 asm_fprintf (out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29771 else
29772 asm_fprintf (out_file, "%r", reg);
29773
29774 if (flag_checking)
29775 {
29776 /* Check that the addresses are consecutive. */
29777 e = XEXP (SET_DEST (e), 0);
29778 if (GET_CODE (e) == PLUS)
29779 gcc_assert (REG_P (XEXP (e, 0))
29780 && REGNO (XEXP (e, 0)) == SP_REGNUM
29781 && CONST_INT_P (XEXP (e, 1))
29782 && offset == INTVAL (XEXP (e, 1)));
29783 else
29784 gcc_assert (i == 1
29785 && REG_P (e)
29786 && REGNO (e) == SP_REGNUM);
29787 offset += reg_size;
29788 }
29789 }
29790 fprintf (out_file, "}\n");
29791 if (padfirst)
29792 fprintf (out_file, "\t.pad #%d\n", padfirst);
29793 }
29794
29795 /* Emit unwind directives for a SET. */
29796
29797 static void
29798 arm_unwind_emit_set (FILE * out_file, rtx p)
29799 {
29800 rtx e0;
29801 rtx e1;
29802 unsigned reg;
29803
29804 e0 = XEXP (p, 0);
29805 e1 = XEXP (p, 1);
29806 switch (GET_CODE (e0))
29807 {
29808 case MEM:
29809 /* Pushing a single register. */
29810 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29811 || !REG_P (XEXP (XEXP (e0, 0), 0))
29812 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29813 abort ();
29814
29815 asm_fprintf (out_file, "\t.save ");
29816 if (IS_VFP_REGNUM (REGNO (e1)))
29817 asm_fprintf(out_file, "{d%d}\n",
29818 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29819 else
29820 asm_fprintf(out_file, "{%r}\n", REGNO (e1));
29821 break;
29822
29823 case REG:
29824 if (REGNO (e0) == SP_REGNUM)
29825 {
29826 /* A stack increment. */
29827 if (GET_CODE (e1) != PLUS
29828 || !REG_P (XEXP (e1, 0))
29829 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29830 || !CONST_INT_P (XEXP (e1, 1)))
29831 abort ();
29832
29833 asm_fprintf (out_file, "\t.pad #%wd\n",
29834 -INTVAL (XEXP (e1, 1)));
29835 }
29836 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29837 {
29838 HOST_WIDE_INT offset;
29839
29840 if (GET_CODE (e1) == PLUS)
29841 {
29842 if (!REG_P (XEXP (e1, 0))
29843 || !CONST_INT_P (XEXP (e1, 1)))
29844 abort ();
29845 reg = REGNO (XEXP (e1, 0));
29846 offset = INTVAL (XEXP (e1, 1));
29847 asm_fprintf (out_file, "\t.setfp %r, %r, #%wd\n",
29848 HARD_FRAME_POINTER_REGNUM, reg,
29849 offset);
29850 }
29851 else if (REG_P (e1))
29852 {
29853 reg = REGNO (e1);
29854 asm_fprintf (out_file, "\t.setfp %r, %r\n",
29855 HARD_FRAME_POINTER_REGNUM, reg);
29856 }
29857 else
29858 abort ();
29859 }
29860 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29861 {
29862 /* Move from sp to reg. */
29863 asm_fprintf (out_file, "\t.movsp %r\n", REGNO (e0));
29864 }
29865 else if (GET_CODE (e1) == PLUS
29866 && REG_P (XEXP (e1, 0))
29867 && REGNO (XEXP (e1, 0)) == SP_REGNUM
29868 && CONST_INT_P (XEXP (e1, 1)))
29869 {
29870 /* Set reg to offset from sp. */
29871 asm_fprintf (out_file, "\t.movsp %r, #%d\n",
29872 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29873 }
29874 else
29875 abort ();
29876 break;
29877
29878 default:
29879 abort ();
29880 }
29881 }
29882
29883
29884 /* Emit unwind directives for the given insn. */
29885
29886 static void
29887 arm_unwind_emit (FILE * out_file, rtx_insn *insn)
29888 {
29889 rtx note, pat;
29890 bool handled_one = false;
29891
29892 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29893 return;
29894
29895 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29896 && (TREE_NOTHROW (current_function_decl)
29897 || crtl->all_throwers_are_sibcalls))
29898 return;
29899
29900 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29901 return;
29902
29903 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29904 {
29905 switch (REG_NOTE_KIND (note))
29906 {
29907 case REG_FRAME_RELATED_EXPR:
29908 pat = XEXP (note, 0);
29909 goto found;
29910
29911 case REG_CFA_REGISTER:
29912 pat = XEXP (note, 0);
29913 if (pat == NULL)
29914 {
29915 pat = PATTERN (insn);
29916 if (GET_CODE (pat) == PARALLEL)
29917 pat = XVECEXP (pat, 0, 0);
29918 }
29919
29920 /* Only emitted for IS_STACKALIGN re-alignment. */
29921 {
29922 rtx dest, src;
29923 unsigned reg;
29924
29925 src = SET_SRC (pat);
29926 dest = SET_DEST (pat);
29927
29928 gcc_assert (src == stack_pointer_rtx);
29929 reg = REGNO (dest);
29930 asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29931 reg + 0x90, reg);
29932 }
29933 handled_one = true;
29934 break;
29935
29936 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29937 to get correct dwarf information for shrink-wrap. We should not
29938 emit unwind information for it because these are used either for
29939 pretend arguments or notes to adjust sp and restore registers from
29940 stack. */
29941 case REG_CFA_DEF_CFA:
29942 case REG_CFA_ADJUST_CFA:
29943 case REG_CFA_RESTORE:
29944 return;
29945
29946 case REG_CFA_EXPRESSION:
29947 case REG_CFA_OFFSET:
29948 /* ??? Only handling here what we actually emit. */
29949 gcc_unreachable ();
29950
29951 default:
29952 break;
29953 }
29954 }
29955 if (handled_one)
29956 return;
29957 pat = PATTERN (insn);
29958 found:
29959
29960 switch (GET_CODE (pat))
29961 {
29962 case SET:
29963 arm_unwind_emit_set (out_file, pat);
29964 break;
29965
29966 case SEQUENCE:
29967 /* Store multiple. */
29968 arm_unwind_emit_sequence (out_file, pat);
29969 break;
29970
29971 default:
29972 abort();
29973 }
29974 }
29975
29976
29977 /* Output a reference from a function exception table to the type_info
29978 object X. The EABI specifies that the symbol should be relocated by
29979 an R_ARM_TARGET2 relocation. */
29980
29981 static bool
29982 arm_output_ttype (rtx x)
29983 {
29984 fputs ("\t.word\t", asm_out_file);
29985 output_addr_const (asm_out_file, x);
29986 /* Use special relocations for symbol references. */
29987 if (!CONST_INT_P (x))
29988 fputs ("(TARGET2)", asm_out_file);
29989 fputc ('\n', asm_out_file);
29990
29991 return TRUE;
29992 }
29993
29994 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29995
29996 static void
29997 arm_asm_emit_except_personality (rtx personality)
29998 {
29999 fputs ("\t.personality\t", asm_out_file);
30000 output_addr_const (asm_out_file, personality);
30001 fputc ('\n', asm_out_file);
30002 }
30003 #endif /* ARM_UNWIND_INFO */
30004
30005 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
30006
30007 static void
30008 arm_asm_init_sections (void)
30009 {
30010 #if ARM_UNWIND_INFO
30011 exception_section = get_unnamed_section (0, output_section_asm_op,
30012 "\t.handlerdata");
30013 #endif /* ARM_UNWIND_INFO */
30014
30015 #ifdef OBJECT_FORMAT_ELF
30016 if (target_pure_code)
30017 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
30018 #endif
30019 }
30020
30021 /* Output unwind directives for the start/end of a function. */
30022
30023 void
30024 arm_output_fn_unwind (FILE * f, bool prologue)
30025 {
30026 if (arm_except_unwind_info (&global_options) != UI_TARGET)
30027 return;
30028
30029 if (prologue)
30030 fputs ("\t.fnstart\n", f);
30031 else
30032 {
30033 /* If this function will never be unwound, then mark it as such.
30034 The came condition is used in arm_unwind_emit to suppress
30035 the frame annotations. */
30036 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30037 && (TREE_NOTHROW (current_function_decl)
30038 || crtl->all_throwers_are_sibcalls))
30039 fputs("\t.cantunwind\n", f);
30040
30041 fputs ("\t.fnend\n", f);
30042 }
30043 }
30044
30045 static bool
30046 arm_emit_tls_decoration (FILE *fp, rtx x)
30047 {
30048 enum tls_reloc reloc;
30049 rtx val;
30050
30051 val = XVECEXP (x, 0, 0);
30052 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
30053
30054 output_addr_const (fp, val);
30055
30056 switch (reloc)
30057 {
30058 case TLS_GD32:
30059 fputs ("(tlsgd)", fp);
30060 break;
30061 case TLS_GD32_FDPIC:
30062 fputs ("(tlsgd_fdpic)", fp);
30063 break;
30064 case TLS_LDM32:
30065 fputs ("(tlsldm)", fp);
30066 break;
30067 case TLS_LDM32_FDPIC:
30068 fputs ("(tlsldm_fdpic)", fp);
30069 break;
30070 case TLS_LDO32:
30071 fputs ("(tlsldo)", fp);
30072 break;
30073 case TLS_IE32:
30074 fputs ("(gottpoff)", fp);
30075 break;
30076 case TLS_IE32_FDPIC:
30077 fputs ("(gottpoff_fdpic)", fp);
30078 break;
30079 case TLS_LE32:
30080 fputs ("(tpoff)", fp);
30081 break;
30082 case TLS_DESCSEQ:
30083 fputs ("(tlsdesc)", fp);
30084 break;
30085 default:
30086 gcc_unreachable ();
30087 }
30088
30089 switch (reloc)
30090 {
30091 case TLS_GD32:
30092 case TLS_LDM32:
30093 case TLS_IE32:
30094 case TLS_DESCSEQ:
30095 fputs (" + (. - ", fp);
30096 output_addr_const (fp, XVECEXP (x, 0, 2));
30097 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30098 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
30099 output_addr_const (fp, XVECEXP (x, 0, 3));
30100 fputc (')', fp);
30101 break;
30102 default:
30103 break;
30104 }
30105
30106 return TRUE;
30107 }
30108
30109 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
30110
30111 static void
30112 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
30113 {
30114 gcc_assert (size == 4);
30115 fputs ("\t.word\t", file);
30116 output_addr_const (file, x);
30117 fputs ("(tlsldo)", file);
30118 }
30119
30120 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
30121
30122 static bool
30123 arm_output_addr_const_extra (FILE *fp, rtx x)
30124 {
30125 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
30126 return arm_emit_tls_decoration (fp, x);
30127 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
30128 {
30129 char label[256];
30130 int labelno = INTVAL (XVECEXP (x, 0, 0));
30131
30132 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
30133 assemble_name_raw (fp, label);
30134
30135 return TRUE;
30136 }
30137 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
30138 {
30139 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
30140 if (GOT_PCREL)
30141 fputs ("+.", fp);
30142 fputs ("-(", fp);
30143 output_addr_const (fp, XVECEXP (x, 0, 0));
30144 fputc (')', fp);
30145 return TRUE;
30146 }
30147 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
30148 {
30149 output_addr_const (fp, XVECEXP (x, 0, 0));
30150 if (GOT_PCREL)
30151 fputs ("+.", fp);
30152 fputs ("-(", fp);
30153 output_addr_const (fp, XVECEXP (x, 0, 1));
30154 fputc (')', fp);
30155 return TRUE;
30156 }
30157 else if (GET_CODE (x) == CONST_VECTOR)
30158 return arm_emit_vector_const (fp, x);
30159
30160 return FALSE;
30161 }
30162
30163 /* Output assembly for a shift instruction.
30164 SET_FLAGS determines how the instruction modifies the condition codes.
30165 0 - Do not set condition codes.
30166 1 - Set condition codes.
30167 2 - Use smallest instruction. */
30168 const char *
30169 arm_output_shift(rtx * operands, int set_flags)
30170 {
30171 char pattern[100];
30172 static const char flag_chars[3] = {'?', '.', '!'};
30173 const char *shift;
30174 HOST_WIDE_INT val;
30175 char c;
30176
30177 c = flag_chars[set_flags];
30178 shift = shift_op(operands[3], &val);
30179 if (shift)
30180 {
30181 if (val != -1)
30182 operands[2] = GEN_INT(val);
30183 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
30184 }
30185 else
30186 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
30187
30188 output_asm_insn (pattern, operands);
30189 return "";
30190 }
30191
30192 /* Output assembly for a WMMX immediate shift instruction. */
30193 const char *
30194 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
30195 {
30196 int shift = INTVAL (operands[2]);
30197 char templ[50];
30198 machine_mode opmode = GET_MODE (operands[0]);
30199
30200 gcc_assert (shift >= 0);
30201
30202 /* If the shift value in the register versions is > 63 (for D qualifier),
30203 31 (for W qualifier) or 15 (for H qualifier). */
30204 if (((opmode == V4HImode) && (shift > 15))
30205 || ((opmode == V2SImode) && (shift > 31))
30206 || ((opmode == DImode) && (shift > 63)))
30207 {
30208 if (wror_or_wsra)
30209 {
30210 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30211 output_asm_insn (templ, operands);
30212 if (opmode == DImode)
30213 {
30214 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
30215 output_asm_insn (templ, operands);
30216 }
30217 }
30218 else
30219 {
30220 /* The destination register will contain all zeros. */
30221 sprintf (templ, "wzero\t%%0");
30222 output_asm_insn (templ, operands);
30223 }
30224 return "";
30225 }
30226
30227 if ((opmode == DImode) && (shift > 32))
30228 {
30229 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30230 output_asm_insn (templ, operands);
30231 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
30232 output_asm_insn (templ, operands);
30233 }
30234 else
30235 {
30236 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
30237 output_asm_insn (templ, operands);
30238 }
30239 return "";
30240 }
30241
30242 /* Output assembly for a WMMX tinsr instruction. */
30243 const char *
30244 arm_output_iwmmxt_tinsr (rtx *operands)
30245 {
30246 int mask = INTVAL (operands[3]);
30247 int i;
30248 char templ[50];
30249 int units = mode_nunits[GET_MODE (operands[0])];
30250 gcc_assert ((mask & (mask - 1)) == 0);
30251 for (i = 0; i < units; ++i)
30252 {
30253 if ((mask & 0x01) == 1)
30254 {
30255 break;
30256 }
30257 mask >>= 1;
30258 }
30259 gcc_assert (i < units);
30260 {
30261 switch (GET_MODE (operands[0]))
30262 {
30263 case E_V8QImode:
30264 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
30265 break;
30266 case E_V4HImode:
30267 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
30268 break;
30269 case E_V2SImode:
30270 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
30271 break;
30272 default:
30273 gcc_unreachable ();
30274 break;
30275 }
30276 output_asm_insn (templ, operands);
30277 }
30278 return "";
30279 }
30280
30281 /* Output a Thumb-1 casesi dispatch sequence. */
30282 const char *
30283 thumb1_output_casesi (rtx *operands)
30284 {
30285 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
30286
30287 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30288
30289 switch (GET_MODE(diff_vec))
30290 {
30291 case E_QImode:
30292 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30293 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30294 case E_HImode:
30295 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30296 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30297 case E_SImode:
30298 return "bl\t%___gnu_thumb1_case_si";
30299 default:
30300 gcc_unreachable ();
30301 }
30302 }
30303
30304 /* Output a Thumb-2 casesi instruction. */
30305 const char *
30306 thumb2_output_casesi (rtx *operands)
30307 {
30308 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30309
30310 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30311
30312 output_asm_insn ("cmp\t%0, %1", operands);
30313 output_asm_insn ("bhi\t%l3", operands);
30314 switch (GET_MODE(diff_vec))
30315 {
30316 case E_QImode:
30317 return "tbb\t[%|pc, %0]";
30318 case E_HImode:
30319 return "tbh\t[%|pc, %0, lsl #1]";
30320 case E_SImode:
30321 if (flag_pic)
30322 {
30323 output_asm_insn ("adr\t%4, %l2", operands);
30324 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
30325 output_asm_insn ("add\t%4, %4, %5", operands);
30326 return "bx\t%4";
30327 }
30328 else
30329 {
30330 output_asm_insn ("adr\t%4, %l2", operands);
30331 return "ldr\t%|pc, [%4, %0, lsl #2]";
30332 }
30333 default:
30334 gcc_unreachable ();
30335 }
30336 }
30337
30338 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
30339 per-core tuning structs. */
30340 static int
30341 arm_issue_rate (void)
30342 {
30343 return current_tune->issue_rate;
30344 }
30345
30346 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
30347 static int
30348 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30349 {
30350 if (DEBUG_INSN_P (insn))
30351 return more;
30352
30353 rtx_code code = GET_CODE (PATTERN (insn));
30354 if (code == USE || code == CLOBBER)
30355 return more;
30356
30357 if (get_attr_type (insn) == TYPE_NO_INSN)
30358 return more;
30359
30360 return more - 1;
30361 }
30362
30363 /* Return how many instructions should scheduler lookahead to choose the
30364 best one. */
30365 static int
30366 arm_first_cycle_multipass_dfa_lookahead (void)
30367 {
30368 int issue_rate = arm_issue_rate ();
30369
30370 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30371 }
30372
30373 /* Enable modeling of L2 auto-prefetcher. */
30374 static int
30375 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30376 {
30377 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30378 }
30379
30380 const char *
30381 arm_mangle_type (const_tree type)
30382 {
30383 /* The ARM ABI documents (10th October 2008) say that "__va_list"
30384 has to be managled as if it is in the "std" namespace. */
30385 if (TARGET_AAPCS_BASED
30386 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30387 return "St9__va_list";
30388
30389 /* Half-precision floating point types. */
30390 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
30391 {
30392 if (TYPE_MAIN_VARIANT (type) == float16_type_node)
30393 return NULL;
30394 if (TYPE_MODE (type) == BFmode)
30395 return "u6__bf16";
30396 else
30397 return "Dh";
30398 }
30399
30400 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30401 builtin type. */
30402 if (TYPE_NAME (type) != NULL)
30403 return arm_mangle_builtin_type (type);
30404
30405 /* Use the default mangling. */
30406 return NULL;
30407 }
30408
30409 /* Order of allocation of core registers for Thumb: this allocation is
30410 written over the corresponding initial entries of the array
30411 initialized with REG_ALLOC_ORDER. We allocate all low registers
30412 first. Saving and restoring a low register is usually cheaper than
30413 using a call-clobbered high register. */
30414
30415 static const int thumb_core_reg_alloc_order[] =
30416 {
30417 3, 2, 1, 0, 4, 5, 6, 7,
30418 12, 14, 8, 9, 10, 11
30419 };
30420
30421 /* Adjust register allocation order when compiling for Thumb. */
30422
30423 void
30424 arm_order_regs_for_local_alloc (void)
30425 {
30426 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30427 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30428 if (TARGET_THUMB)
30429 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30430 sizeof (thumb_core_reg_alloc_order));
30431 }
30432
30433 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30434
30435 bool
30436 arm_frame_pointer_required (void)
30437 {
30438 if (SUBTARGET_FRAME_POINTER_REQUIRED)
30439 return true;
30440
30441 /* If the function receives nonlocal gotos, it needs to save the frame
30442 pointer in the nonlocal_goto_save_area object. */
30443 if (cfun->has_nonlocal_label)
30444 return true;
30445
30446 /* The frame pointer is required for non-leaf APCS frames. */
30447 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30448 return true;
30449
30450 /* If we are probing the stack in the prologue, we will have a faulting
30451 instruction prior to the stack adjustment and this requires a frame
30452 pointer if we want to catch the exception using the EABI unwinder. */
30453 if (!IS_INTERRUPT (arm_current_func_type ())
30454 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30455 || flag_stack_clash_protection)
30456 && arm_except_unwind_info (&global_options) == UI_TARGET
30457 && cfun->can_throw_non_call_exceptions)
30458 {
30459 HOST_WIDE_INT size = get_frame_size ();
30460
30461 /* That's irrelevant if there is no stack adjustment. */
30462 if (size <= 0)
30463 return false;
30464
30465 /* That's relevant only if there is a stack probe. */
30466 if (crtl->is_leaf && !cfun->calls_alloca)
30467 {
30468 /* We don't have the final size of the frame so adjust. */
30469 size += 32 * UNITS_PER_WORD;
30470 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30471 return true;
30472 }
30473 else
30474 return true;
30475 }
30476
30477 return false;
30478 }
30479
30480 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30481 All modes except THUMB1 have conditional execution.
30482 If we have conditional arithmetic, return false before reload to
30483 enable some ifcvt transformations. */
30484 static bool
30485 arm_have_conditional_execution (void)
30486 {
30487 bool has_cond_exec, enable_ifcvt_trans;
30488
30489 /* Only THUMB1 cannot support conditional execution. */
30490 has_cond_exec = !TARGET_THUMB1;
30491
30492 /* Enable ifcvt transformations if we have conditional arithmetic, but only
30493 before reload. */
30494 enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30495
30496 return has_cond_exec && !enable_ifcvt_trans;
30497 }
30498
30499 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30500 static HOST_WIDE_INT
30501 arm_vector_alignment (const_tree type)
30502 {
30503 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30504
30505 if (TARGET_AAPCS_BASED)
30506 align = MIN (align, 64);
30507
30508 return align;
30509 }
30510
30511 static unsigned int
30512 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30513 {
30514 if (!TARGET_NEON_VECTORIZE_DOUBLE)
30515 {
30516 modes->safe_push (V16QImode);
30517 modes->safe_push (V8QImode);
30518 }
30519 return 0;
30520 }
30521
30522 static bool
30523 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30524 {
30525 /* Vectors which aren't in packed structures will not be less aligned than
30526 the natural alignment of their element type, so this is safe. */
30527 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30528 return !is_packed;
30529
30530 return default_builtin_vector_alignment_reachable (type, is_packed);
30531 }
30532
30533 static bool
30534 arm_builtin_support_vector_misalignment (machine_mode mode,
30535 const_tree type, int misalignment,
30536 bool is_packed)
30537 {
30538 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30539 {
30540 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30541
30542 if (is_packed)
30543 return align == 1;
30544
30545 /* If the misalignment is unknown, we should be able to handle the access
30546 so long as it is not to a member of a packed data structure. */
30547 if (misalignment == -1)
30548 return true;
30549
30550 /* Return true if the misalignment is a multiple of the natural alignment
30551 of the vector's element type. This is probably always going to be
30552 true in practice, since we've already established that this isn't a
30553 packed access. */
30554 return ((misalignment % align) == 0);
30555 }
30556
30557 return default_builtin_support_vector_misalignment (mode, type, misalignment,
30558 is_packed);
30559 }
30560
30561 static void
30562 arm_conditional_register_usage (void)
30563 {
30564 int regno;
30565
30566 if (TARGET_THUMB1 && optimize_size)
30567 {
30568 /* When optimizing for size on Thumb-1, it's better not
30569 to use the HI regs, because of the overhead of
30570 stacking them. */
30571 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30572 fixed_regs[regno] = call_used_regs[regno] = 1;
30573 }
30574
30575 /* The link register can be clobbered by any branch insn,
30576 but we have no way to track that at present, so mark
30577 it as unavailable. */
30578 if (TARGET_THUMB1)
30579 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30580
30581 if (TARGET_32BIT && TARGET_VFP_BASE)
30582 {
30583 /* VFPv3 registers are disabled when earlier VFP
30584 versions are selected due to the definition of
30585 LAST_VFP_REGNUM. */
30586 for (regno = FIRST_VFP_REGNUM;
30587 regno <= LAST_VFP_REGNUM; ++ regno)
30588 {
30589 fixed_regs[regno] = 0;
30590 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30591 || regno >= FIRST_VFP_REGNUM + 32;
30592 }
30593 if (TARGET_HAVE_MVE)
30594 fixed_regs[VPR_REGNUM] = 0;
30595 }
30596
30597 if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30598 {
30599 regno = FIRST_IWMMXT_GR_REGNUM;
30600 /* The 2002/10/09 revision of the XScale ABI has wCG0
30601 and wCG1 as call-preserved registers. The 2002/11/21
30602 revision changed this so that all wCG registers are
30603 scratch registers. */
30604 for (regno = FIRST_IWMMXT_GR_REGNUM;
30605 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30606 fixed_regs[regno] = 0;
30607 /* The XScale ABI has wR0 - wR9 as scratch registers,
30608 the rest as call-preserved registers. */
30609 for (regno = FIRST_IWMMXT_REGNUM;
30610 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30611 {
30612 fixed_regs[regno] = 0;
30613 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30614 }
30615 }
30616
30617 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30618 {
30619 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30620 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30621 }
30622 else if (TARGET_APCS_STACK)
30623 {
30624 fixed_regs[10] = 1;
30625 call_used_regs[10] = 1;
30626 }
30627 /* -mcaller-super-interworking reserves r11 for calls to
30628 _interwork_r11_call_via_rN(). Making the register global
30629 is an easy way of ensuring that it remains valid for all
30630 calls. */
30631 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30632 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30633 {
30634 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30635 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30636 if (TARGET_CALLER_INTERWORKING)
30637 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30638 }
30639
30640 /* The Q and GE bits are only accessed via special ACLE patterns. */
30641 CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30642 CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30643
30644 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30645 }
30646
30647 static reg_class_t
30648 arm_preferred_rename_class (reg_class_t rclass)
30649 {
30650 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30651 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30652 and code size can be reduced. */
30653 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30654 return LO_REGS;
30655 else
30656 return NO_REGS;
30657 }
30658
30659 /* Compute the attribute "length" of insn "*push_multi".
30660 So this function MUST be kept in sync with that insn pattern. */
30661 int
30662 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30663 {
30664 int i, regno, hi_reg;
30665 int num_saves = XVECLEN (parallel_op, 0);
30666
30667 /* ARM mode. */
30668 if (TARGET_ARM)
30669 return 4;
30670 /* Thumb1 mode. */
30671 if (TARGET_THUMB1)
30672 return 2;
30673
30674 /* Thumb2 mode. */
30675 regno = REGNO (first_op);
30676 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30677 list is 8-bit. Normally this means all registers in the list must be
30678 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
30679 encodings. There is one exception for PUSH that LR in HI_REGS can be used
30680 with 16-bit encoding. */
30681 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30682 for (i = 1; i < num_saves && !hi_reg; i++)
30683 {
30684 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30685 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30686 }
30687
30688 if (!hi_reg)
30689 return 2;
30690 return 4;
30691 }
30692
30693 /* Compute the attribute "length" of insn. Currently, this function is used
30694 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30695 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
30696 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
30697 true if OPERANDS contains insn which explicit updates base register. */
30698
30699 int
30700 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30701 {
30702 /* ARM mode. */
30703 if (TARGET_ARM)
30704 return 4;
30705 /* Thumb1 mode. */
30706 if (TARGET_THUMB1)
30707 return 2;
30708
30709 rtx parallel_op = operands[0];
30710 /* Initialize to elements number of PARALLEL. */
30711 unsigned indx = XVECLEN (parallel_op, 0) - 1;
30712 /* Initialize the value to base register. */
30713 unsigned regno = REGNO (operands[1]);
30714 /* Skip return and write back pattern.
30715 We only need register pop pattern for later analysis. */
30716 unsigned first_indx = 0;
30717 first_indx += return_pc ? 1 : 0;
30718 first_indx += write_back_p ? 1 : 0;
30719
30720 /* A pop operation can be done through LDM or POP. If the base register is SP
30721 and if it's with write back, then a LDM will be alias of POP. */
30722 bool pop_p = (regno == SP_REGNUM && write_back_p);
30723 bool ldm_p = !pop_p;
30724
30725 /* Check base register for LDM. */
30726 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30727 return 4;
30728
30729 /* Check each register in the list. */
30730 for (; indx >= first_indx; indx--)
30731 {
30732 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30733 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
30734 comment in arm_attr_length_push_multi. */
30735 if (REGNO_REG_CLASS (regno) == HI_REGS
30736 && (regno != PC_REGNUM || ldm_p))
30737 return 4;
30738 }
30739
30740 return 2;
30741 }
30742
30743 /* Compute the number of instructions emitted by output_move_double. */
30744 int
30745 arm_count_output_move_double_insns (rtx *operands)
30746 {
30747 int count;
30748 rtx ops[2];
30749 /* output_move_double may modify the operands array, so call it
30750 here on a copy of the array. */
30751 ops[0] = operands[0];
30752 ops[1] = operands[1];
30753 output_move_double (ops, false, &count);
30754 return count;
30755 }
30756
30757 /* Same as above, but operands are a register/memory pair in SImode.
30758 Assumes operands has the base register in position 0 and memory in position
30759 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
30760 int
30761 arm_count_ldrdstrd_insns (rtx *operands, bool load)
30762 {
30763 int count;
30764 rtx ops[2];
30765 int regnum, memnum;
30766 if (load)
30767 regnum = 0, memnum = 1;
30768 else
30769 regnum = 1, memnum = 0;
30770 ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
30771 ops[memnum] = adjust_address (operands[2], DImode, 0);
30772 output_move_double (ops, false, &count);
30773 return count;
30774 }
30775
30776
30777 int
30778 vfp3_const_double_for_fract_bits (rtx operand)
30779 {
30780 REAL_VALUE_TYPE r0;
30781
30782 if (!CONST_DOUBLE_P (operand))
30783 return 0;
30784
30785 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
30786 if (exact_real_inverse (DFmode, &r0)
30787 && !REAL_VALUE_NEGATIVE (r0))
30788 {
30789 if (exact_real_truncate (DFmode, &r0))
30790 {
30791 HOST_WIDE_INT value = real_to_integer (&r0);
30792 value = value & 0xffffffff;
30793 if ((value != 0) && ( (value & (value - 1)) == 0))
30794 {
30795 int ret = exact_log2 (value);
30796 gcc_assert (IN_RANGE (ret, 0, 31));
30797 return ret;
30798 }
30799 }
30800 }
30801 return 0;
30802 }
30803
30804 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30805 log2 is in [1, 32], return that log2. Otherwise return -1.
30806 This is used in the patterns for vcvt.s32.f32 floating-point to
30807 fixed-point conversions. */
30808
30809 int
30810 vfp3_const_double_for_bits (rtx x)
30811 {
30812 const REAL_VALUE_TYPE *r;
30813
30814 if (!CONST_DOUBLE_P (x))
30815 return -1;
30816
30817 r = CONST_DOUBLE_REAL_VALUE (x);
30818
30819 if (REAL_VALUE_NEGATIVE (*r)
30820 || REAL_VALUE_ISNAN (*r)
30821 || REAL_VALUE_ISINF (*r)
30822 || !real_isinteger (r, SFmode))
30823 return -1;
30824
30825 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
30826
30827 /* The exact_log2 above will have returned -1 if this is
30828 not an exact log2. */
30829 if (!IN_RANGE (hwint, 1, 32))
30830 return -1;
30831
30832 return hwint;
30833 }
30834
30835 \f
30836 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30837
30838 static void
30839 arm_pre_atomic_barrier (enum memmodel model)
30840 {
30841 if (need_atomic_barrier_p (model, true))
30842 emit_insn (gen_memory_barrier ());
30843 }
30844
30845 static void
30846 arm_post_atomic_barrier (enum memmodel model)
30847 {
30848 if (need_atomic_barrier_p (model, false))
30849 emit_insn (gen_memory_barrier ());
30850 }
30851
30852 /* Emit the load-exclusive and store-exclusive instructions.
30853 Use acquire and release versions if necessary. */
30854
30855 static void
30856 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30857 {
30858 rtx (*gen) (rtx, rtx);
30859
30860 if (acq)
30861 {
30862 switch (mode)
30863 {
30864 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30865 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30866 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30867 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30868 default:
30869 gcc_unreachable ();
30870 }
30871 }
30872 else
30873 {
30874 switch (mode)
30875 {
30876 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
30877 case E_HImode: gen = gen_arm_load_exclusivehi; break;
30878 case E_SImode: gen = gen_arm_load_exclusivesi; break;
30879 case E_DImode: gen = gen_arm_load_exclusivedi; break;
30880 default:
30881 gcc_unreachable ();
30882 }
30883 }
30884
30885 emit_insn (gen (rval, mem));
30886 }
30887
30888 static void
30889 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30890 rtx mem, bool rel)
30891 {
30892 rtx (*gen) (rtx, rtx, rtx);
30893
30894 if (rel)
30895 {
30896 switch (mode)
30897 {
30898 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
30899 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
30900 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
30901 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
30902 default:
30903 gcc_unreachable ();
30904 }
30905 }
30906 else
30907 {
30908 switch (mode)
30909 {
30910 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
30911 case E_HImode: gen = gen_arm_store_exclusivehi; break;
30912 case E_SImode: gen = gen_arm_store_exclusivesi; break;
30913 case E_DImode: gen = gen_arm_store_exclusivedi; break;
30914 default:
30915 gcc_unreachable ();
30916 }
30917 }
30918
30919 emit_insn (gen (bval, rval, mem));
30920 }
30921
30922 /* Mark the previous jump instruction as unlikely. */
30923
30924 static void
30925 emit_unlikely_jump (rtx insn)
30926 {
30927 rtx_insn *jump = emit_jump_insn (insn);
30928 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
30929 }
30930
30931 /* Expand a compare and swap pattern. */
30932
30933 void
30934 arm_expand_compare_and_swap (rtx operands[])
30935 {
30936 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30937 machine_mode mode, cmp_mode;
30938
30939 bval = operands[0];
30940 rval = operands[1];
30941 mem = operands[2];
30942 oldval = operands[3];
30943 newval = operands[4];
30944 is_weak = operands[5];
30945 mod_s = operands[6];
30946 mod_f = operands[7];
30947 mode = GET_MODE (mem);
30948
30949 /* Normally the succ memory model must be stronger than fail, but in the
30950 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30951 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30952
30953 if (TARGET_HAVE_LDACQ
30954 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
30955 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
30956 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30957
30958 switch (mode)
30959 {
30960 case E_QImode:
30961 case E_HImode:
30962 /* For narrow modes, we're going to perform the comparison in SImode,
30963 so do the zero-extension now. */
30964 rval = gen_reg_rtx (SImode);
30965 oldval = convert_modes (SImode, mode, oldval, true);
30966 /* FALLTHRU */
30967
30968 case E_SImode:
30969 /* Force the value into a register if needed. We waited until after
30970 the zero-extension above to do this properly. */
30971 if (!arm_add_operand (oldval, SImode))
30972 oldval = force_reg (SImode, oldval);
30973 break;
30974
30975 case E_DImode:
30976 if (!cmpdi_operand (oldval, mode))
30977 oldval = force_reg (mode, oldval);
30978 break;
30979
30980 default:
30981 gcc_unreachable ();
30982 }
30983
30984 if (TARGET_THUMB1)
30985 cmp_mode = E_SImode;
30986 else
30987 cmp_mode = CC_Zmode;
30988
30989 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
30990 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
30991 oldval, newval, is_weak, mod_s, mod_f));
30992
30993 if (mode == QImode || mode == HImode)
30994 emit_move_insn (operands[1], gen_lowpart (mode, rval));
30995
30996 /* In all cases, we arrange for success to be signaled by Z set.
30997 This arrangement allows for the boolean result to be used directly
30998 in a subsequent branch, post optimization. For Thumb-1 targets, the
30999 boolean negation of the result is also stored in bval because Thumb-1
31000 backend lacks dependency tracking for CC flag due to flag-setting not
31001 being represented at RTL level. */
31002 if (TARGET_THUMB1)
31003 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
31004 else
31005 {
31006 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
31007 emit_insn (gen_rtx_SET (bval, x));
31008 }
31009 }
31010
31011 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
31012 another memory store between the load-exclusive and store-exclusive can
31013 reset the monitor from Exclusive to Open state. This means we must wait
31014 until after reload to split the pattern, lest we get a register spill in
31015 the middle of the atomic sequence. Success of the compare and swap is
31016 indicated by the Z flag set for 32bit targets and by neg_bval being zero
31017 for Thumb-1 targets (ie. negation of the boolean value returned by
31018 atomic_compare_and_swapmode standard pattern in operand 0). */
31019
31020 void
31021 arm_split_compare_and_swap (rtx operands[])
31022 {
31023 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
31024 machine_mode mode;
31025 enum memmodel mod_s, mod_f;
31026 bool is_weak;
31027 rtx_code_label *label1, *label2;
31028 rtx x, cond;
31029
31030 rval = operands[1];
31031 mem = operands[2];
31032 oldval = operands[3];
31033 newval = operands[4];
31034 is_weak = (operands[5] != const0_rtx);
31035 mod_s_rtx = operands[6];
31036 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
31037 mod_f = memmodel_from_int (INTVAL (operands[7]));
31038 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
31039 mode = GET_MODE (mem);
31040
31041 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
31042
31043 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
31044 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
31045
31046 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
31047 a full barrier is emitted after the store-release. */
31048 if (is_armv8_sync)
31049 use_acquire = false;
31050
31051 /* Checks whether a barrier is needed and emits one accordingly. */
31052 if (!(use_acquire || use_release))
31053 arm_pre_atomic_barrier (mod_s);
31054
31055 label1 = NULL;
31056 if (!is_weak)
31057 {
31058 label1 = gen_label_rtx ();
31059 emit_label (label1);
31060 }
31061 label2 = gen_label_rtx ();
31062
31063 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
31064
31065 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31066 as required to communicate with arm_expand_compare_and_swap. */
31067 if (TARGET_32BIT)
31068 {
31069 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
31070 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31071 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31072 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
31073 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31074 }
31075 else
31076 {
31077 cond = gen_rtx_NE (VOIDmode, rval, oldval);
31078 if (thumb1_cmpneg_operand (oldval, SImode))
31079 {
31080 rtx src = rval;
31081 if (!satisfies_constraint_L (oldval))
31082 {
31083 gcc_assert (satisfies_constraint_J (oldval));
31084
31085 /* For such immediates, ADDS needs the source and destination regs
31086 to be the same.
31087
31088 Normally this would be handled by RA, but this is all happening
31089 after RA. */
31090 emit_move_insn (neg_bval, rval);
31091 src = neg_bval;
31092 }
31093
31094 emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
31095 label2, cond));
31096 }
31097 else
31098 {
31099 emit_move_insn (neg_bval, const1_rtx);
31100 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
31101 }
31102 }
31103
31104 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
31105
31106 /* Weak or strong, we want EQ to be true for success, so that we
31107 match the flags that we got from the compare above. */
31108 if (TARGET_32BIT)
31109 {
31110 cond = gen_rtx_REG (CCmode, CC_REGNUM);
31111 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
31112 emit_insn (gen_rtx_SET (cond, x));
31113 }
31114
31115 if (!is_weak)
31116 {
31117 /* Z is set to boolean value of !neg_bval, as required to communicate
31118 with arm_expand_compare_and_swap. */
31119 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
31120 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
31121 }
31122
31123 if (!is_mm_relaxed (mod_f))
31124 emit_label (label2);
31125
31126 /* Checks whether a barrier is needed and emits one accordingly. */
31127 if (is_armv8_sync
31128 || !(use_acquire || use_release))
31129 arm_post_atomic_barrier (mod_s);
31130
31131 if (is_mm_relaxed (mod_f))
31132 emit_label (label2);
31133 }
31134
31135 /* Split an atomic operation pattern. Operation is given by CODE and is one
31136 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31137 operation). Operation is performed on the content at MEM and on VALUE
31138 following the memory model MODEL_RTX. The content at MEM before and after
31139 the operation is returned in OLD_OUT and NEW_OUT respectively while the
31140 success of the operation is returned in COND. Using a scratch register or
31141 an operand register for these determines what result is returned for that
31142 pattern. */
31143
31144 void
31145 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
31146 rtx value, rtx model_rtx, rtx cond)
31147 {
31148 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
31149 machine_mode mode = GET_MODE (mem);
31150 machine_mode wmode = (mode == DImode ? DImode : SImode);
31151 rtx_code_label *label;
31152 bool all_low_regs, bind_old_new;
31153 rtx x;
31154
31155 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
31156
31157 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
31158 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
31159
31160 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
31161 a full barrier is emitted after the store-release. */
31162 if (is_armv8_sync)
31163 use_acquire = false;
31164
31165 /* Checks whether a barrier is needed and emits one accordingly. */
31166 if (!(use_acquire || use_release))
31167 arm_pre_atomic_barrier (model);
31168
31169 label = gen_label_rtx ();
31170 emit_label (label);
31171
31172 if (new_out)
31173 new_out = gen_lowpart (wmode, new_out);
31174 if (old_out)
31175 old_out = gen_lowpart (wmode, old_out);
31176 else
31177 old_out = new_out;
31178 value = simplify_gen_subreg (wmode, value, mode, 0);
31179
31180 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
31181
31182 /* Does the operation require destination and first operand to use the same
31183 register? This is decided by register constraints of relevant insn
31184 patterns in thumb1.md. */
31185 gcc_assert (!new_out || REG_P (new_out));
31186 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
31187 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
31188 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
31189 bind_old_new =
31190 (TARGET_THUMB1
31191 && code != SET
31192 && code != MINUS
31193 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
31194
31195 /* We want to return the old value while putting the result of the operation
31196 in the same register as the old value so copy the old value over to the
31197 destination register and use that register for the operation. */
31198 if (old_out && bind_old_new)
31199 {
31200 emit_move_insn (new_out, old_out);
31201 old_out = new_out;
31202 }
31203
31204 switch (code)
31205 {
31206 case SET:
31207 new_out = value;
31208 break;
31209
31210 case NOT:
31211 x = gen_rtx_AND (wmode, old_out, value);
31212 emit_insn (gen_rtx_SET (new_out, x));
31213 x = gen_rtx_NOT (wmode, new_out);
31214 emit_insn (gen_rtx_SET (new_out, x));
31215 break;
31216
31217 case MINUS:
31218 if (CONST_INT_P (value))
31219 {
31220 value = gen_int_mode (-INTVAL (value), wmode);
31221 code = PLUS;
31222 }
31223 /* FALLTHRU */
31224
31225 case PLUS:
31226 if (mode == DImode)
31227 {
31228 /* DImode plus/minus need to clobber flags. */
31229 /* The adddi3 and subdi3 patterns are incorrectly written so that
31230 they require matching operands, even when we could easily support
31231 three operands. Thankfully, this can be fixed up post-splitting,
31232 as the individual add+adc patterns do accept three operands and
31233 post-reload cprop can make these moves go away. */
31234 emit_move_insn (new_out, old_out);
31235 if (code == PLUS)
31236 x = gen_adddi3 (new_out, new_out, value);
31237 else
31238 x = gen_subdi3 (new_out, new_out, value);
31239 emit_insn (x);
31240 break;
31241 }
31242 /* FALLTHRU */
31243
31244 default:
31245 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
31246 emit_insn (gen_rtx_SET (new_out, x));
31247 break;
31248 }
31249
31250 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
31251 use_release);
31252
31253 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31254 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
31255
31256 /* Checks whether a barrier is needed and emits one accordingly. */
31257 if (is_armv8_sync
31258 || !(use_acquire || use_release))
31259 arm_post_atomic_barrier (model);
31260 }
31261 \f
31262 /* Return the mode for the MVE vector of predicates corresponding to MODE. */
31263 opt_machine_mode
31264 arm_mode_to_pred_mode (machine_mode mode)
31265 {
31266 switch (GET_MODE_NUNITS (mode))
31267 {
31268 case 16: return V16BImode;
31269 case 8: return V8BImode;
31270 case 4: return V4BImode;
31271 }
31272 return opt_machine_mode ();
31273 }
31274
31275 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31276 If CAN_INVERT, store either the result or its inverse in TARGET
31277 and return true if TARGET contains the inverse. If !CAN_INVERT,
31278 always store the result in TARGET, never its inverse.
31279
31280 Note that the handling of floating-point comparisons is not
31281 IEEE compliant. */
31282
31283 bool
31284 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
31285 bool can_invert)
31286 {
31287 machine_mode cmp_result_mode = GET_MODE (target);
31288 machine_mode cmp_mode = GET_MODE (op0);
31289
31290 bool inverted;
31291
31292 /* MVE supports more comparisons than Neon. */
31293 if (TARGET_HAVE_MVE)
31294 inverted = false;
31295 else
31296 switch (code)
31297 {
31298 /* For these we need to compute the inverse of the requested
31299 comparison. */
31300 case UNORDERED:
31301 case UNLT:
31302 case UNLE:
31303 case UNGT:
31304 case UNGE:
31305 case UNEQ:
31306 case NE:
31307 code = reverse_condition_maybe_unordered (code);
31308 if (!can_invert)
31309 {
31310 /* Recursively emit the inverted comparison into a temporary
31311 and then store its inverse in TARGET. This avoids reusing
31312 TARGET (which for integer NE could be one of the inputs). */
31313 rtx tmp = gen_reg_rtx (cmp_result_mode);
31314 if (arm_expand_vector_compare (tmp, code, op0, op1, true))
31315 gcc_unreachable ();
31316 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
31317 return false;
31318 }
31319 inverted = true;
31320 break;
31321
31322 default:
31323 inverted = false;
31324 break;
31325 }
31326
31327 switch (code)
31328 {
31329 /* These are natively supported by Neon for zero comparisons, but otherwise
31330 require the operands to be swapped. For MVE, we can only compare
31331 registers. */
31332 case LE:
31333 case LT:
31334 if (!TARGET_HAVE_MVE)
31335 if (op1 != CONST0_RTX (cmp_mode))
31336 {
31337 code = swap_condition (code);
31338 std::swap (op0, op1);
31339 }
31340 /* Fall through. */
31341
31342 /* These are natively supported by Neon for both register and zero
31343 operands. MVE supports registers only. */
31344 case EQ:
31345 case GE:
31346 case GT:
31347 case NE:
31348 if (TARGET_HAVE_MVE)
31349 {
31350 switch (GET_MODE_CLASS (cmp_mode))
31351 {
31352 case MODE_VECTOR_INT:
31353 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31354 op0, force_reg (cmp_mode, op1)));
31355 break;
31356 case MODE_VECTOR_FLOAT:
31357 if (TARGET_HAVE_MVE_FLOAT)
31358 emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
31359 op0, force_reg (cmp_mode, op1)));
31360 else
31361 gcc_unreachable ();
31362 break;
31363 default:
31364 gcc_unreachable ();
31365 }
31366 }
31367 else
31368 emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
31369 return inverted;
31370
31371 /* These are natively supported for register operands only.
31372 Comparisons with zero aren't useful and should be folded
31373 or canonicalized by target-independent code. */
31374 case GEU:
31375 case GTU:
31376 if (TARGET_HAVE_MVE)
31377 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31378 op0, force_reg (cmp_mode, op1)));
31379 else
31380 emit_insn (gen_neon_vc (code, cmp_mode, target,
31381 op0, force_reg (cmp_mode, op1)));
31382 return inverted;
31383
31384 /* These require the operands to be swapped and likewise do not
31385 support comparisons with zero. */
31386 case LEU:
31387 case LTU:
31388 if (TARGET_HAVE_MVE)
31389 emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
31390 force_reg (cmp_mode, op1), op0));
31391 else
31392 emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31393 target, force_reg (cmp_mode, op1), op0));
31394 return inverted;
31395
31396 /* These need a combination of two comparisons. */
31397 case LTGT:
31398 case ORDERED:
31399 {
31400 /* Operands are LTGT iff (a > b || a > b).
31401 Operands are ORDERED iff (a > b || a <= b). */
31402 rtx gt_res = gen_reg_rtx (cmp_result_mode);
31403 rtx alt_res = gen_reg_rtx (cmp_result_mode);
31404 rtx_code alt_code = (code == LTGT ? LT : LE);
31405 if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31406 || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31407 gcc_unreachable ();
31408 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31409 gt_res, alt_res)));
31410 return inverted;
31411 }
31412
31413 default:
31414 gcc_unreachable ();
31415 }
31416 }
31417
31418 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31419 CMP_RESULT_MODE is the mode of the comparison result. */
31420
31421 void
31422 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
31423 {
31424 /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31425 arm_expand_vector_compare, and another one here. */
31426 rtx mask;
31427
31428 if (TARGET_HAVE_MVE)
31429 mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
31430 else
31431 mask = gen_reg_rtx (cmp_result_mode);
31432
31433 bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
31434 operands[4], operands[5], true);
31435 if (inverted)
31436 std::swap (operands[1], operands[2]);
31437 if (TARGET_NEON)
31438 emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
31439 mask, operands[1], operands[2]));
31440 else
31441 {
31442 machine_mode cmp_mode = GET_MODE (operands[0]);
31443
31444 switch (GET_MODE_CLASS (cmp_mode))
31445 {
31446 case MODE_VECTOR_INT:
31447 emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0],
31448 operands[1], operands[2], mask));
31449 break;
31450 case MODE_VECTOR_FLOAT:
31451 if (TARGET_HAVE_MVE_FLOAT)
31452 emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0],
31453 operands[1], operands[2], mask));
31454 else
31455 gcc_unreachable ();
31456 break;
31457 default:
31458 gcc_unreachable ();
31459 }
31460 }
31461 }
31462 \f
31463 #define MAX_VECT_LEN 16
31464
31465 struct expand_vec_perm_d
31466 {
31467 rtx target, op0, op1;
31468 vec_perm_indices perm;
31469 machine_mode vmode;
31470 bool one_vector_p;
31471 bool testing_p;
31472 };
31473
31474 /* Generate a variable permutation. */
31475
31476 static void
31477 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31478 {
31479 machine_mode vmode = GET_MODE (target);
31480 bool one_vector_p = rtx_equal_p (op0, op1);
31481
31482 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31483 gcc_checking_assert (GET_MODE (op0) == vmode);
31484 gcc_checking_assert (GET_MODE (op1) == vmode);
31485 gcc_checking_assert (GET_MODE (sel) == vmode);
31486 gcc_checking_assert (TARGET_NEON);
31487
31488 if (one_vector_p)
31489 {
31490 if (vmode == V8QImode)
31491 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31492 else
31493 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31494 }
31495 else
31496 {
31497 rtx pair;
31498
31499 if (vmode == V8QImode)
31500 {
31501 pair = gen_reg_rtx (V16QImode);
31502 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31503 pair = gen_lowpart (TImode, pair);
31504 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31505 }
31506 else
31507 {
31508 pair = gen_reg_rtx (OImode);
31509 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31510 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31511 }
31512 }
31513 }
31514
31515 void
31516 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31517 {
31518 machine_mode vmode = GET_MODE (target);
31519 unsigned int nelt = GET_MODE_NUNITS (vmode);
31520 bool one_vector_p = rtx_equal_p (op0, op1);
31521 rtx mask;
31522
31523 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31524 numbering of elements for big-endian, we must reverse the order. */
31525 gcc_checking_assert (!BYTES_BIG_ENDIAN);
31526
31527 /* The VTBL instruction does not use a modulo index, so we must take care
31528 of that ourselves. */
31529 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31530 mask = gen_const_vec_duplicate (vmode, mask);
31531 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31532
31533 arm_expand_vec_perm_1 (target, op0, op1, sel);
31534 }
31535
31536 /* Map lane ordering between architectural lane order, and GCC lane order,
31537 taking into account ABI. See comment above output_move_neon for details. */
31538
31539 static int
31540 neon_endian_lane_map (machine_mode mode, int lane)
31541 {
31542 if (BYTES_BIG_ENDIAN)
31543 {
31544 int nelems = GET_MODE_NUNITS (mode);
31545 /* Reverse lane order. */
31546 lane = (nelems - 1 - lane);
31547 /* Reverse D register order, to match ABI. */
31548 if (GET_MODE_SIZE (mode) == 16)
31549 lane = lane ^ (nelems / 2);
31550 }
31551 return lane;
31552 }
31553
31554 /* Some permutations index into pairs of vectors, this is a helper function
31555 to map indexes into those pairs of vectors. */
31556
31557 static int
31558 neon_pair_endian_lane_map (machine_mode mode, int lane)
31559 {
31560 int nelem = GET_MODE_NUNITS (mode);
31561 if (BYTES_BIG_ENDIAN)
31562 lane =
31563 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31564 return lane;
31565 }
31566
31567 /* Generate or test for an insn that supports a constant permutation. */
31568
31569 /* Recognize patterns for the VUZP insns. */
31570
31571 static bool
31572 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31573 {
31574 unsigned int i, odd, mask, nelt = d->perm.length ();
31575 rtx out0, out1, in0, in1;
31576 int first_elem;
31577 int swap_nelt;
31578
31579 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31580 return false;
31581
31582 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31583 big endian pattern on 64 bit vectors, so we correct for that. */
31584 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31585 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31586
31587 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31588
31589 if (first_elem == neon_endian_lane_map (d->vmode, 0))
31590 odd = 0;
31591 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31592 odd = 1;
31593 else
31594 return false;
31595 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31596
31597 for (i = 0; i < nelt; i++)
31598 {
31599 unsigned elt =
31600 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31601 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31602 return false;
31603 }
31604
31605 /* Success! */
31606 if (d->testing_p)
31607 return true;
31608
31609 in0 = d->op0;
31610 in1 = d->op1;
31611 if (swap_nelt != 0)
31612 std::swap (in0, in1);
31613
31614 out0 = d->target;
31615 out1 = gen_reg_rtx (d->vmode);
31616 if (odd)
31617 std::swap (out0, out1);
31618
31619 emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
31620 return true;
31621 }
31622
31623 /* Recognize patterns for the VZIP insns. */
31624
31625 static bool
31626 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
31627 {
31628 unsigned int i, high, mask, nelt = d->perm.length ();
31629 rtx out0, out1, in0, in1;
31630 int first_elem;
31631 bool is_swapped;
31632
31633 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31634 return false;
31635
31636 is_swapped = BYTES_BIG_ENDIAN;
31637
31638 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
31639
31640 high = nelt / 2;
31641 if (first_elem == neon_endian_lane_map (d->vmode, high))
31642 ;
31643 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
31644 high = 0;
31645 else
31646 return false;
31647 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31648
31649 for (i = 0; i < nelt / 2; i++)
31650 {
31651 unsigned elt =
31652 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
31653 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
31654 != elt)
31655 return false;
31656 elt =
31657 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
31658 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
31659 != elt)
31660 return false;
31661 }
31662
31663 /* Success! */
31664 if (d->testing_p)
31665 return true;
31666
31667 in0 = d->op0;
31668 in1 = d->op1;
31669 if (is_swapped)
31670 std::swap (in0, in1);
31671
31672 out0 = d->target;
31673 out1 = gen_reg_rtx (d->vmode);
31674 if (high)
31675 std::swap (out0, out1);
31676
31677 emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31678 return true;
31679 }
31680
31681 /* Recognize patterns for the VREV insns. */
31682 static bool
31683 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31684 {
31685 unsigned int i, j, diff, nelt = d->perm.length ();
31686 rtx (*gen) (machine_mode, rtx, rtx);
31687
31688 if (!d->one_vector_p)
31689 return false;
31690
31691 diff = d->perm[0];
31692 switch (diff)
31693 {
31694 case 7:
31695 switch (d->vmode)
31696 {
31697 case E_V16QImode:
31698 case E_V8QImode:
31699 gen = gen_neon_vrev64;
31700 break;
31701 default:
31702 return false;
31703 }
31704 break;
31705 case 3:
31706 switch (d->vmode)
31707 {
31708 case E_V16QImode:
31709 case E_V8QImode:
31710 gen = gen_neon_vrev32;
31711 break;
31712 case E_V8HImode:
31713 case E_V4HImode:
31714 case E_V8HFmode:
31715 case E_V4HFmode:
31716 gen = gen_neon_vrev64;
31717 break;
31718 default:
31719 return false;
31720 }
31721 break;
31722 case 1:
31723 switch (d->vmode)
31724 {
31725 case E_V16QImode:
31726 case E_V8QImode:
31727 gen = gen_neon_vrev16;
31728 break;
31729 case E_V8HImode:
31730 case E_V4HImode:
31731 gen = gen_neon_vrev32;
31732 break;
31733 case E_V4SImode:
31734 case E_V2SImode:
31735 case E_V4SFmode:
31736 case E_V2SFmode:
31737 gen = gen_neon_vrev64;
31738 break;
31739 default:
31740 return false;
31741 }
31742 break;
31743 default:
31744 return false;
31745 }
31746
31747 for (i = 0; i < nelt ; i += diff + 1)
31748 for (j = 0; j <= diff; j += 1)
31749 {
31750 /* This is guaranteed to be true as the value of diff
31751 is 7, 3, 1 and we should have enough elements in the
31752 queue to generate this. Getting a vector mask with a
31753 value of diff other than these values implies that
31754 something is wrong by the time we get here. */
31755 gcc_assert (i + j < nelt);
31756 if (d->perm[i + j] != i + diff - j)
31757 return false;
31758 }
31759
31760 /* Success! */
31761 if (d->testing_p)
31762 return true;
31763
31764 emit_insn (gen (d->vmode, d->target, d->op0));
31765 return true;
31766 }
31767
31768 /* Recognize patterns for the VTRN insns. */
31769
31770 static bool
31771 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31772 {
31773 unsigned int i, odd, mask, nelt = d->perm.length ();
31774 rtx out0, out1, in0, in1;
31775
31776 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31777 return false;
31778
31779 /* Note that these are little-endian tests. Adjust for big-endian later. */
31780 if (d->perm[0] == 0)
31781 odd = 0;
31782 else if (d->perm[0] == 1)
31783 odd = 1;
31784 else
31785 return false;
31786 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31787
31788 for (i = 0; i < nelt; i += 2)
31789 {
31790 if (d->perm[i] != i + odd)
31791 return false;
31792 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31793 return false;
31794 }
31795
31796 /* Success! */
31797 if (d->testing_p)
31798 return true;
31799
31800 in0 = d->op0;
31801 in1 = d->op1;
31802 if (BYTES_BIG_ENDIAN)
31803 {
31804 std::swap (in0, in1);
31805 odd = !odd;
31806 }
31807
31808 out0 = d->target;
31809 out1 = gen_reg_rtx (d->vmode);
31810 if (odd)
31811 std::swap (out0, out1);
31812
31813 emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
31814 return true;
31815 }
31816
31817 /* Recognize patterns for the VEXT insns. */
31818
31819 static bool
31820 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31821 {
31822 unsigned int i, nelt = d->perm.length ();
31823 rtx offset;
31824
31825 unsigned int location;
31826
31827 unsigned int next = d->perm[0] + 1;
31828
31829 /* TODO: Handle GCC's numbering of elements for big-endian. */
31830 if (BYTES_BIG_ENDIAN)
31831 return false;
31832
31833 /* Check if the extracted indexes are increasing by one. */
31834 for (i = 1; i < nelt; next++, i++)
31835 {
31836 /* If we hit the most significant element of the 2nd vector in
31837 the previous iteration, no need to test further. */
31838 if (next == 2 * nelt)
31839 return false;
31840
31841 /* If we are operating on only one vector: it could be a
31842 rotation. If there are only two elements of size < 64, let
31843 arm_evpc_neon_vrev catch it. */
31844 if (d->one_vector_p && (next == nelt))
31845 {
31846 if ((nelt == 2) && (d->vmode != V2DImode))
31847 return false;
31848 else
31849 next = 0;
31850 }
31851
31852 if (d->perm[i] != next)
31853 return false;
31854 }
31855
31856 location = d->perm[0];
31857
31858 /* Success! */
31859 if (d->testing_p)
31860 return true;
31861
31862 offset = GEN_INT (location);
31863
31864 if(d->vmode == E_DImode)
31865 return false;
31866
31867 emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
31868 return true;
31869 }
31870
31871 /* The NEON VTBL instruction is a fully variable permuation that's even
31872 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31873 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31874 can do slightly better by expanding this as a constant where we don't
31875 have to apply a mask. */
31876
31877 static bool
31878 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31879 {
31880 rtx rperm[MAX_VECT_LEN], sel;
31881 machine_mode vmode = d->vmode;
31882 unsigned int i, nelt = d->perm.length ();
31883
31884 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31885 numbering of elements for big-endian, we must reverse the order. */
31886 if (BYTES_BIG_ENDIAN)
31887 return false;
31888
31889 if (d->testing_p)
31890 return true;
31891
31892 /* Generic code will try constant permutation twice. Once with the
31893 original mode and again with the elements lowered to QImode.
31894 So wait and don't do the selector expansion ourselves. */
31895 if (vmode != V8QImode && vmode != V16QImode)
31896 return false;
31897
31898 for (i = 0; i < nelt; ++i)
31899 rperm[i] = GEN_INT (d->perm[i]);
31900 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31901 sel = force_reg (vmode, sel);
31902
31903 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31904 return true;
31905 }
31906
31907 static bool
31908 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31909 {
31910 /* Check if the input mask matches vext before reordering the
31911 operands. */
31912 if (TARGET_NEON)
31913 if (arm_evpc_neon_vext (d))
31914 return true;
31915
31916 /* The pattern matching functions above are written to look for a small
31917 number to begin the sequence (0, 1, N/2). If we begin with an index
31918 from the second operand, we can swap the operands. */
31919 unsigned int nelt = d->perm.length ();
31920 if (d->perm[0] >= nelt)
31921 {
31922 d->perm.rotate_inputs (1);
31923 std::swap (d->op0, d->op1);
31924 }
31925
31926 if (TARGET_NEON)
31927 {
31928 if (arm_evpc_neon_vuzp (d))
31929 return true;
31930 if (arm_evpc_neon_vzip (d))
31931 return true;
31932 if (arm_evpc_neon_vrev (d))
31933 return true;
31934 if (arm_evpc_neon_vtrn (d))
31935 return true;
31936 return arm_evpc_neon_vtbl (d);
31937 }
31938 return false;
31939 }
31940
31941 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
31942
31943 static bool
31944 arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
31945 rtx target, rtx op0, rtx op1,
31946 const vec_perm_indices &sel)
31947 {
31948 if (vmode != op_mode)
31949 return false;
31950
31951 struct expand_vec_perm_d d;
31952 int i, nelt, which;
31953
31954 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
31955 return false;
31956
31957 d.target = target;
31958 if (op0)
31959 {
31960 rtx nop0 = force_reg (vmode, op0);
31961 if (op0 == op1)
31962 op1 = nop0;
31963 op0 = nop0;
31964 }
31965 if (op1)
31966 op1 = force_reg (vmode, op1);
31967 d.op0 = op0;
31968 d.op1 = op1;
31969
31970 d.vmode = vmode;
31971 gcc_assert (VECTOR_MODE_P (d.vmode));
31972 d.testing_p = !target;
31973
31974 nelt = GET_MODE_NUNITS (d.vmode);
31975 for (i = which = 0; i < nelt; ++i)
31976 {
31977 int ei = sel[i] & (2 * nelt - 1);
31978 which |= (ei < nelt ? 1 : 2);
31979 }
31980
31981 switch (which)
31982 {
31983 default:
31984 gcc_unreachable();
31985
31986 case 3:
31987 d.one_vector_p = false;
31988 if (d.testing_p || !rtx_equal_p (op0, op1))
31989 break;
31990
31991 /* The elements of PERM do not suggest that only the first operand
31992 is used, but both operands are identical. Allow easier matching
31993 of the permutation by folding the permutation into the single
31994 input vector. */
31995 /* FALLTHRU */
31996 case 2:
31997 d.op0 = op1;
31998 d.one_vector_p = true;
31999 break;
32000
32001 case 1:
32002 d.op1 = op0;
32003 d.one_vector_p = true;
32004 break;
32005 }
32006
32007 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
32008
32009 if (!d.testing_p)
32010 return arm_expand_vec_perm_const_1 (&d);
32011
32012 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32013 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32014 if (!d.one_vector_p)
32015 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32016
32017 start_sequence ();
32018 bool ret = arm_expand_vec_perm_const_1 (&d);
32019 end_sequence ();
32020
32021 return ret;
32022 }
32023
32024 bool
32025 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
32026 {
32027 /* If we are soft float and we do not have ldrd
32028 then all auto increment forms are ok. */
32029 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
32030 return true;
32031
32032 switch (code)
32033 {
32034 /* Post increment and Pre Decrement are supported for all
32035 instruction forms except for vector forms. */
32036 case ARM_POST_INC:
32037 case ARM_PRE_DEC:
32038 if (VECTOR_MODE_P (mode))
32039 {
32040 if (code != ARM_PRE_DEC)
32041 return true;
32042 else
32043 return false;
32044 }
32045
32046 return true;
32047
32048 case ARM_POST_DEC:
32049 case ARM_PRE_INC:
32050 /* Without LDRD and mode size greater than
32051 word size, there is no point in auto-incrementing
32052 because ldm and stm will not have these forms. */
32053 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
32054 return false;
32055
32056 /* Vector and floating point modes do not support
32057 these auto increment forms. */
32058 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
32059 return false;
32060
32061 return true;
32062
32063 default:
32064 return false;
32065
32066 }
32067
32068 return false;
32069 }
32070
32071 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32072 on ARM, since we know that shifts by negative amounts are no-ops.
32073 Additionally, the default expansion code is not available or suitable
32074 for post-reload insn splits (this can occur when the register allocator
32075 chooses not to do a shift in NEON).
32076
32077 This function is used in both initial expand and post-reload splits, and
32078 handles all kinds of 64-bit shifts.
32079
32080 Input requirements:
32081 - It is safe for the input and output to be the same register, but
32082 early-clobber rules apply for the shift amount and scratch registers.
32083 - Shift by register requires both scratch registers. In all other cases
32084 the scratch registers may be NULL.
32085 - Ashiftrt by a register also clobbers the CC register. */
32086 void
32087 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
32088 rtx amount, rtx scratch1, rtx scratch2)
32089 {
32090 rtx out_high = gen_highpart (SImode, out);
32091 rtx out_low = gen_lowpart (SImode, out);
32092 rtx in_high = gen_highpart (SImode, in);
32093 rtx in_low = gen_lowpart (SImode, in);
32094
32095 /* Terminology:
32096 in = the register pair containing the input value.
32097 out = the destination register pair.
32098 up = the high- or low-part of each pair.
32099 down = the opposite part to "up".
32100 In a shift, we can consider bits to shift from "up"-stream to
32101 "down"-stream, so in a left-shift "up" is the low-part and "down"
32102 is the high-part of each register pair. */
32103
32104 rtx out_up = code == ASHIFT ? out_low : out_high;
32105 rtx out_down = code == ASHIFT ? out_high : out_low;
32106 rtx in_up = code == ASHIFT ? in_low : in_high;
32107 rtx in_down = code == ASHIFT ? in_high : in_low;
32108
32109 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
32110 gcc_assert (out
32111 && (REG_P (out) || SUBREG_P (out))
32112 && GET_MODE (out) == DImode);
32113 gcc_assert (in
32114 && (REG_P (in) || SUBREG_P (in))
32115 && GET_MODE (in) == DImode);
32116 gcc_assert (amount
32117 && (((REG_P (amount) || SUBREG_P (amount))
32118 && GET_MODE (amount) == SImode)
32119 || CONST_INT_P (amount)));
32120 gcc_assert (scratch1 == NULL
32121 || (GET_CODE (scratch1) == SCRATCH)
32122 || (GET_MODE (scratch1) == SImode
32123 && REG_P (scratch1)));
32124 gcc_assert (scratch2 == NULL
32125 || (GET_CODE (scratch2) == SCRATCH)
32126 || (GET_MODE (scratch2) == SImode
32127 && REG_P (scratch2)));
32128 gcc_assert (!REG_P (out) || !REG_P (amount)
32129 || !HARD_REGISTER_P (out)
32130 || (REGNO (out) != REGNO (amount)
32131 && REGNO (out) + 1 != REGNO (amount)));
32132
32133 /* Macros to make following code more readable. */
32134 #define SUB_32(DEST,SRC) \
32135 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32136 #define RSB_32(DEST,SRC) \
32137 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32138 #define SUB_S_32(DEST,SRC) \
32139 gen_addsi3_compare0 ((DEST), (SRC), \
32140 GEN_INT (-32))
32141 #define SET(DEST,SRC) \
32142 gen_rtx_SET ((DEST), (SRC))
32143 #define SHIFT(CODE,SRC,AMOUNT) \
32144 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32145 #define LSHIFT(CODE,SRC,AMOUNT) \
32146 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32147 SImode, (SRC), (AMOUNT))
32148 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32149 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32150 SImode, (SRC), (AMOUNT))
32151 #define ORR(A,B) \
32152 gen_rtx_IOR (SImode, (A), (B))
32153 #define BRANCH(COND,LABEL) \
32154 gen_arm_cond_branch ((LABEL), \
32155 gen_rtx_ ## COND (CCmode, cc_reg, \
32156 const0_rtx), \
32157 cc_reg)
32158
32159 /* Shifts by register and shifts by constant are handled separately. */
32160 if (CONST_INT_P (amount))
32161 {
32162 /* We have a shift-by-constant. */
32163
32164 /* First, handle out-of-range shift amounts.
32165 In both cases we try to match the result an ARM instruction in a
32166 shift-by-register would give. This helps reduce execution
32167 differences between optimization levels, but it won't stop other
32168 parts of the compiler doing different things. This is "undefined
32169 behavior, in any case. */
32170 if (INTVAL (amount) <= 0)
32171 emit_insn (gen_movdi (out, in));
32172 else if (INTVAL (amount) >= 64)
32173 {
32174 if (code == ASHIFTRT)
32175 {
32176 rtx const31_rtx = GEN_INT (31);
32177 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
32178 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
32179 }
32180 else
32181 emit_insn (gen_movdi (out, const0_rtx));
32182 }
32183
32184 /* Now handle valid shifts. */
32185 else if (INTVAL (amount) < 32)
32186 {
32187 /* Shifts by a constant less than 32. */
32188 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
32189
32190 /* Clearing the out register in DImode first avoids lots
32191 of spilling and results in less stack usage.
32192 Later this redundant insn is completely removed.
32193 Do that only if "in" and "out" are different registers. */
32194 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32195 emit_insn (SET (out, const0_rtx));
32196 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32197 emit_insn (SET (out_down,
32198 ORR (REV_LSHIFT (code, in_up, reverse_amount),
32199 out_down)));
32200 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32201 }
32202 else
32203 {
32204 /* Shifts by a constant greater than 31. */
32205 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
32206
32207 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32208 emit_insn (SET (out, const0_rtx));
32209 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
32210 if (code == ASHIFTRT)
32211 emit_insn (gen_ashrsi3 (out_up, in_up,
32212 GEN_INT (31)));
32213 else
32214 emit_insn (SET (out_up, const0_rtx));
32215 }
32216 }
32217 else
32218 {
32219 /* We have a shift-by-register. */
32220 rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
32221
32222 /* This alternative requires the scratch registers. */
32223 gcc_assert (scratch1 && REG_P (scratch1));
32224 gcc_assert (scratch2 && REG_P (scratch2));
32225
32226 /* We will need the values "amount-32" and "32-amount" later.
32227 Swapping them around now allows the later code to be more general. */
32228 switch (code)
32229 {
32230 case ASHIFT:
32231 emit_insn (SUB_32 (scratch1, amount));
32232 emit_insn (RSB_32 (scratch2, amount));
32233 break;
32234 case ASHIFTRT:
32235 emit_insn (RSB_32 (scratch1, amount));
32236 /* Also set CC = amount > 32. */
32237 emit_insn (SUB_S_32 (scratch2, amount));
32238 break;
32239 case LSHIFTRT:
32240 emit_insn (RSB_32 (scratch1, amount));
32241 emit_insn (SUB_32 (scratch2, amount));
32242 break;
32243 default:
32244 gcc_unreachable ();
32245 }
32246
32247 /* Emit code like this:
32248
32249 arithmetic-left:
32250 out_down = in_down << amount;
32251 out_down = (in_up << (amount - 32)) | out_down;
32252 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32253 out_up = in_up << amount;
32254
32255 arithmetic-right:
32256 out_down = in_down >> amount;
32257 out_down = (in_up << (32 - amount)) | out_down;
32258 if (amount < 32)
32259 out_down = ((signed)in_up >> (amount - 32)) | out_down;
32260 out_up = in_up << amount;
32261
32262 logical-right:
32263 out_down = in_down >> amount;
32264 out_down = (in_up << (32 - amount)) | out_down;
32265 if (amount < 32)
32266 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32267 out_up = in_up << amount;
32268
32269 The ARM and Thumb2 variants are the same but implemented slightly
32270 differently. If this were only called during expand we could just
32271 use the Thumb2 case and let combine do the right thing, but this
32272 can also be called from post-reload splitters. */
32273
32274 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32275
32276 if (!TARGET_THUMB2)
32277 {
32278 /* Emit code for ARM mode. */
32279 emit_insn (SET (out_down,
32280 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
32281 if (code == ASHIFTRT)
32282 {
32283 rtx_code_label *done_label = gen_label_rtx ();
32284 emit_jump_insn (BRANCH (LT, done_label));
32285 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
32286 out_down)));
32287 emit_label (done_label);
32288 }
32289 else
32290 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
32291 out_down)));
32292 }
32293 else
32294 {
32295 /* Emit code for Thumb2 mode.
32296 Thumb2 can't do shift and or in one insn. */
32297 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
32298 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
32299
32300 if (code == ASHIFTRT)
32301 {
32302 rtx_code_label *done_label = gen_label_rtx ();
32303 emit_jump_insn (BRANCH (LT, done_label));
32304 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
32305 emit_insn (SET (out_down, ORR (out_down, scratch2)));
32306 emit_label (done_label);
32307 }
32308 else
32309 {
32310 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
32311 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
32312 }
32313 }
32314
32315 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32316 }
32317
32318 #undef SUB_32
32319 #undef RSB_32
32320 #undef SUB_S_32
32321 #undef SET
32322 #undef SHIFT
32323 #undef LSHIFT
32324 #undef REV_LSHIFT
32325 #undef ORR
32326 #undef BRANCH
32327 }
32328
32329 /* Returns true if the pattern is a valid symbolic address, which is either a
32330 symbol_ref or (symbol_ref + addend).
32331
32332 According to the ARM ELF ABI, the initial addend of REL-type relocations
32333 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32334 literal field of the instruction as a 16-bit signed value in the range
32335 -32768 <= A < 32768.
32336
32337 In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32338 unsigned range of 0 <= A < 256 as described in the AAELF32
32339 relocation handling documentation: REL-type relocations are encoded
32340 as unsigned in this case. */
32341
32342 bool
32343 arm_valid_symbolic_address_p (rtx addr)
32344 {
32345 rtx xop0, xop1 = NULL_RTX;
32346 rtx tmp = addr;
32347
32348 if (target_word_relocations)
32349 return false;
32350
32351 if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
32352 return true;
32353
32354 /* (const (plus: symbol_ref const_int)) */
32355 if (GET_CODE (addr) == CONST)
32356 tmp = XEXP (addr, 0);
32357
32358 if (GET_CODE (tmp) == PLUS)
32359 {
32360 xop0 = XEXP (tmp, 0);
32361 xop1 = XEXP (tmp, 1);
32362
32363 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
32364 {
32365 if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
32366 return IN_RANGE (INTVAL (xop1), 0, 0xff);
32367 else
32368 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
32369 }
32370 }
32371
32372 return false;
32373 }
32374
32375 /* Returns true if a valid comparison operation and makes
32376 the operands in a form that is valid. */
32377 bool
32378 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
32379 {
32380 enum rtx_code code = GET_CODE (*comparison);
32381 int code_int;
32382 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
32383 ? GET_MODE (*op2) : GET_MODE (*op1);
32384
32385 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
32386
32387 if (code == UNEQ || code == LTGT)
32388 return false;
32389
32390 code_int = (int)code;
32391 arm_canonicalize_comparison (&code_int, op1, op2, 0);
32392 PUT_CODE (*comparison, (enum rtx_code)code_int);
32393
32394 switch (mode)
32395 {
32396 case E_SImode:
32397 if (!arm_add_operand (*op1, mode))
32398 *op1 = force_reg (mode, *op1);
32399 if (!arm_add_operand (*op2, mode))
32400 *op2 = force_reg (mode, *op2);
32401 return true;
32402
32403 case E_DImode:
32404 /* gen_compare_reg() will sort out any invalid operands. */
32405 return true;
32406
32407 case E_HFmode:
32408 if (!TARGET_VFP_FP16INST)
32409 break;
32410 /* FP16 comparisons are done in SF mode. */
32411 mode = SFmode;
32412 *op1 = convert_to_mode (mode, *op1, 1);
32413 *op2 = convert_to_mode (mode, *op2, 1);
32414 /* Fall through. */
32415 case E_SFmode:
32416 case E_DFmode:
32417 if (!vfp_compare_operand (*op1, mode))
32418 *op1 = force_reg (mode, *op1);
32419 if (!vfp_compare_operand (*op2, mode))
32420 *op2 = force_reg (mode, *op2);
32421 return true;
32422 default:
32423 break;
32424 }
32425
32426 return false;
32427
32428 }
32429
32430 /* Maximum number of instructions to set block of memory. */
32431 static int
32432 arm_block_set_max_insns (void)
32433 {
32434 if (optimize_function_for_size_p (cfun))
32435 return 4;
32436 else
32437 return current_tune->max_insns_inline_memset;
32438 }
32439
32440 /* Return TRUE if it's profitable to set block of memory for
32441 non-vectorized case. VAL is the value to set the memory
32442 with. LENGTH is the number of bytes to set. ALIGN is the
32443 alignment of the destination memory in bytes. UNALIGNED_P
32444 is TRUE if we can only set the memory with instructions
32445 meeting alignment requirements. USE_STRD_P is TRUE if we
32446 can use strd to set the memory. */
32447 static bool
32448 arm_block_set_non_vect_profit_p (rtx val,
32449 unsigned HOST_WIDE_INT length,
32450 unsigned HOST_WIDE_INT align,
32451 bool unaligned_p, bool use_strd_p)
32452 {
32453 int num = 0;
32454 /* For leftovers in bytes of 0-7, we can set the memory block using
32455 strb/strh/str with minimum instruction number. */
32456 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32457
32458 if (unaligned_p)
32459 {
32460 num = arm_const_inline_cost (SET, val);
32461 num += length / align + length % align;
32462 }
32463 else if (use_strd_p)
32464 {
32465 num = arm_const_double_inline_cost (val);
32466 num += (length >> 3) + leftover[length & 7];
32467 }
32468 else
32469 {
32470 num = arm_const_inline_cost (SET, val);
32471 num += (length >> 2) + leftover[length & 3];
32472 }
32473
32474 /* We may be able to combine last pair STRH/STRB into a single STR
32475 by shifting one byte back. */
32476 if (unaligned_access && length > 3 && (length & 3) == 3)
32477 num--;
32478
32479 return (num <= arm_block_set_max_insns ());
32480 }
32481
32482 /* Return TRUE if it's profitable to set block of memory for
32483 vectorized case. LENGTH is the number of bytes to set.
32484 ALIGN is the alignment of destination memory in bytes.
32485 MODE is the vector mode used to set the memory. */
32486 static bool
32487 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32488 unsigned HOST_WIDE_INT align,
32489 machine_mode mode)
32490 {
32491 int num;
32492 bool unaligned_p = ((align & 3) != 0);
32493 unsigned int nelt = GET_MODE_NUNITS (mode);
32494
32495 /* Instruction loading constant value. */
32496 num = 1;
32497 /* Instructions storing the memory. */
32498 num += (length + nelt - 1) / nelt;
32499 /* Instructions adjusting the address expression. Only need to
32500 adjust address expression if it's 4 bytes aligned and bytes
32501 leftover can only be stored by mis-aligned store instruction. */
32502 if (!unaligned_p && (length & 3) != 0)
32503 num++;
32504
32505 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
32506 if (!unaligned_p && mode == V16QImode)
32507 num--;
32508
32509 return (num <= arm_block_set_max_insns ());
32510 }
32511
32512 /* Set a block of memory using vectorization instructions for the
32513 unaligned case. We fill the first LENGTH bytes of the memory
32514 area starting from DSTBASE with byte constant VALUE. ALIGN is
32515 the alignment requirement of memory. Return TRUE if succeeded. */
32516 static bool
32517 arm_block_set_unaligned_vect (rtx dstbase,
32518 unsigned HOST_WIDE_INT length,
32519 unsigned HOST_WIDE_INT value,
32520 unsigned HOST_WIDE_INT align)
32521 {
32522 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32523 rtx dst, mem;
32524 rtx val_vec, reg;
32525 rtx (*gen_func) (rtx, rtx);
32526 machine_mode mode;
32527 unsigned HOST_WIDE_INT v = value;
32528 unsigned int offset = 0;
32529 gcc_assert ((align & 0x3) != 0);
32530 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32531 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32532 if (length >= nelt_v16)
32533 {
32534 mode = V16QImode;
32535 gen_func = gen_movmisalignv16qi;
32536 }
32537 else
32538 {
32539 mode = V8QImode;
32540 gen_func = gen_movmisalignv8qi;
32541 }
32542 nelt_mode = GET_MODE_NUNITS (mode);
32543 gcc_assert (length >= nelt_mode);
32544 /* Skip if it isn't profitable. */
32545 if (!arm_block_set_vect_profit_p (length, align, mode))
32546 return false;
32547
32548 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32549 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32550
32551 v = sext_hwi (v, BITS_PER_WORD);
32552
32553 reg = gen_reg_rtx (mode);
32554 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32555 /* Emit instruction loading the constant value. */
32556 emit_move_insn (reg, val_vec);
32557
32558 /* Handle nelt_mode bytes in a vector. */
32559 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32560 {
32561 emit_insn ((*gen_func) (mem, reg));
32562 if (i + 2 * nelt_mode <= length)
32563 {
32564 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32565 offset += nelt_mode;
32566 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32567 }
32568 }
32569
32570 /* If there are not less than nelt_v8 bytes leftover, we must be in
32571 V16QI mode. */
32572 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32573
32574 /* Handle (8, 16) bytes leftover. */
32575 if (i + nelt_v8 < length)
32576 {
32577 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32578 offset += length - i;
32579 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32580
32581 /* We are shifting bytes back, set the alignment accordingly. */
32582 if ((length & 1) != 0 && align >= 2)
32583 set_mem_align (mem, BITS_PER_UNIT);
32584
32585 emit_insn (gen_movmisalignv16qi (mem, reg));
32586 }
32587 /* Handle (0, 8] bytes leftover. */
32588 else if (i < length && i + nelt_v8 >= length)
32589 {
32590 if (mode == V16QImode)
32591 reg = gen_lowpart (V8QImode, reg);
32592
32593 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32594 + (nelt_mode - nelt_v8))));
32595 offset += (length - i) + (nelt_mode - nelt_v8);
32596 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32597
32598 /* We are shifting bytes back, set the alignment accordingly. */
32599 if ((length & 1) != 0 && align >= 2)
32600 set_mem_align (mem, BITS_PER_UNIT);
32601
32602 emit_insn (gen_movmisalignv8qi (mem, reg));
32603 }
32604
32605 return true;
32606 }
32607
32608 /* Set a block of memory using vectorization instructions for the
32609 aligned case. We fill the first LENGTH bytes of the memory area
32610 starting from DSTBASE with byte constant VALUE. ALIGN is the
32611 alignment requirement of memory. Return TRUE if succeeded. */
32612 static bool
32613 arm_block_set_aligned_vect (rtx dstbase,
32614 unsigned HOST_WIDE_INT length,
32615 unsigned HOST_WIDE_INT value,
32616 unsigned HOST_WIDE_INT align)
32617 {
32618 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
32619 rtx dst, addr, mem;
32620 rtx val_vec, reg;
32621 machine_mode mode;
32622 unsigned int offset = 0;
32623
32624 gcc_assert ((align & 0x3) == 0);
32625 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32626 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32627 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
32628 mode = V16QImode;
32629 else
32630 mode = V8QImode;
32631
32632 nelt_mode = GET_MODE_NUNITS (mode);
32633 gcc_assert (length >= nelt_mode);
32634 /* Skip if it isn't profitable. */
32635 if (!arm_block_set_vect_profit_p (length, align, mode))
32636 return false;
32637
32638 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32639
32640 reg = gen_reg_rtx (mode);
32641 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
32642 /* Emit instruction loading the constant value. */
32643 emit_move_insn (reg, val_vec);
32644
32645 i = 0;
32646 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
32647 if (mode == V16QImode)
32648 {
32649 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32650 emit_insn (gen_movmisalignv16qi (mem, reg));
32651 i += nelt_mode;
32652 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
32653 if (i + nelt_v8 < length && i + nelt_v16 > length)
32654 {
32655 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32656 offset += length - nelt_mode;
32657 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32658 /* We are shifting bytes back, set the alignment accordingly. */
32659 if ((length & 0x3) == 0)
32660 set_mem_align (mem, BITS_PER_UNIT * 4);
32661 else if ((length & 0x1) == 0)
32662 set_mem_align (mem, BITS_PER_UNIT * 2);
32663 else
32664 set_mem_align (mem, BITS_PER_UNIT);
32665
32666 emit_insn (gen_movmisalignv16qi (mem, reg));
32667 return true;
32668 }
32669 /* Fall through for bytes leftover. */
32670 mode = V8QImode;
32671 nelt_mode = GET_MODE_NUNITS (mode);
32672 reg = gen_lowpart (V8QImode, reg);
32673 }
32674
32675 /* Handle 8 bytes in a vector. */
32676 for (; (i + nelt_mode <= length); i += nelt_mode)
32677 {
32678 addr = plus_constant (Pmode, dst, i);
32679 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
32680 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32681 emit_move_insn (mem, reg);
32682 else
32683 emit_insn (gen_unaligned_storev8qi (mem, reg));
32684 }
32685
32686 /* Handle single word leftover by shifting 4 bytes back. We can
32687 use aligned access for this case. */
32688 if (i + UNITS_PER_WORD == length)
32689 {
32690 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32691 offset += i - UNITS_PER_WORD;
32692 mem = adjust_automodify_address (dstbase, mode, addr, offset);
32693 /* We are shifting 4 bytes back, set the alignment accordingly. */
32694 if (align > UNITS_PER_WORD)
32695 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32696
32697 emit_insn (gen_unaligned_storev8qi (mem, reg));
32698 }
32699 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32700 We have to use unaligned access for this case. */
32701 else if (i < length)
32702 {
32703 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32704 offset += length - nelt_mode;
32705 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32706 /* We are shifting bytes back, set the alignment accordingly. */
32707 if ((length & 1) == 0)
32708 set_mem_align (mem, BITS_PER_UNIT * 2);
32709 else
32710 set_mem_align (mem, BITS_PER_UNIT);
32711
32712 emit_insn (gen_movmisalignv8qi (mem, reg));
32713 }
32714
32715 return true;
32716 }
32717
32718 /* Set a block of memory using plain strh/strb instructions, only
32719 using instructions allowed by ALIGN on processor. We fill the
32720 first LENGTH bytes of the memory area starting from DSTBASE
32721 with byte constant VALUE. ALIGN is the alignment requirement
32722 of memory. */
32723 static bool
32724 arm_block_set_unaligned_non_vect (rtx dstbase,
32725 unsigned HOST_WIDE_INT length,
32726 unsigned HOST_WIDE_INT value,
32727 unsigned HOST_WIDE_INT align)
32728 {
32729 unsigned int i;
32730 rtx dst, addr, mem;
32731 rtx val_exp, val_reg, reg;
32732 machine_mode mode;
32733 HOST_WIDE_INT v = value;
32734
32735 gcc_assert (align == 1 || align == 2);
32736
32737 if (align == 2)
32738 v |= (value << BITS_PER_UNIT);
32739
32740 v = sext_hwi (v, BITS_PER_WORD);
32741 val_exp = GEN_INT (v);
32742 /* Skip if it isn't profitable. */
32743 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32744 align, true, false))
32745 return false;
32746
32747 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32748 mode = (align == 2 ? HImode : QImode);
32749 val_reg = force_reg (SImode, val_exp);
32750 reg = gen_lowpart (mode, val_reg);
32751
32752 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
32753 {
32754 addr = plus_constant (Pmode, dst, i);
32755 mem = adjust_automodify_address (dstbase, mode, addr, i);
32756 emit_move_insn (mem, reg);
32757 }
32758
32759 /* Handle single byte leftover. */
32760 if (i + 1 == length)
32761 {
32762 reg = gen_lowpart (QImode, val_reg);
32763 addr = plus_constant (Pmode, dst, i);
32764 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32765 emit_move_insn (mem, reg);
32766 i++;
32767 }
32768
32769 gcc_assert (i == length);
32770 return true;
32771 }
32772
32773 /* Set a block of memory using plain strd/str/strh/strb instructions,
32774 to permit unaligned copies on processors which support unaligned
32775 semantics for those instructions. We fill the first LENGTH bytes
32776 of the memory area starting from DSTBASE with byte constant VALUE.
32777 ALIGN is the alignment requirement of memory. */
32778 static bool
32779 arm_block_set_aligned_non_vect (rtx dstbase,
32780 unsigned HOST_WIDE_INT length,
32781 unsigned HOST_WIDE_INT value,
32782 unsigned HOST_WIDE_INT align)
32783 {
32784 unsigned int i;
32785 rtx dst, addr, mem;
32786 rtx val_exp, val_reg, reg;
32787 unsigned HOST_WIDE_INT v;
32788 bool use_strd_p;
32789
32790 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32791 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32792
32793 v = (value | (value << 8) | (value << 16) | (value << 24));
32794 if (length < UNITS_PER_WORD)
32795 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32796
32797 if (use_strd_p)
32798 v |= (v << BITS_PER_WORD);
32799 else
32800 v = sext_hwi (v, BITS_PER_WORD);
32801
32802 val_exp = GEN_INT (v);
32803 /* Skip if it isn't profitable. */
32804 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32805 align, false, use_strd_p))
32806 {
32807 if (!use_strd_p)
32808 return false;
32809
32810 /* Try without strd. */
32811 v = (v >> BITS_PER_WORD);
32812 v = sext_hwi (v, BITS_PER_WORD);
32813 val_exp = GEN_INT (v);
32814 use_strd_p = false;
32815 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32816 align, false, use_strd_p))
32817 return false;
32818 }
32819
32820 i = 0;
32821 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32822 /* Handle double words using strd if possible. */
32823 if (use_strd_p)
32824 {
32825 val_reg = force_reg (DImode, val_exp);
32826 reg = val_reg;
32827 for (; (i + 8 <= length); i += 8)
32828 {
32829 addr = plus_constant (Pmode, dst, i);
32830 mem = adjust_automodify_address (dstbase, DImode, addr, i);
32831 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32832 emit_move_insn (mem, reg);
32833 else
32834 emit_insn (gen_unaligned_storedi (mem, reg));
32835 }
32836 }
32837 else
32838 val_reg = force_reg (SImode, val_exp);
32839
32840 /* Handle words. */
32841 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32842 for (; (i + 4 <= length); i += 4)
32843 {
32844 addr = plus_constant (Pmode, dst, i);
32845 mem = adjust_automodify_address (dstbase, SImode, addr, i);
32846 if ((align & 3) == 0)
32847 emit_move_insn (mem, reg);
32848 else
32849 emit_insn (gen_unaligned_storesi (mem, reg));
32850 }
32851
32852 /* Merge last pair of STRH and STRB into a STR if possible. */
32853 if (unaligned_access && i > 0 && (i + 3) == length)
32854 {
32855 addr = plus_constant (Pmode, dst, i - 1);
32856 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32857 /* We are shifting one byte back, set the alignment accordingly. */
32858 if ((align & 1) == 0)
32859 set_mem_align (mem, BITS_PER_UNIT);
32860
32861 /* Most likely this is an unaligned access, and we can't tell at
32862 compilation time. */
32863 emit_insn (gen_unaligned_storesi (mem, reg));
32864 return true;
32865 }
32866
32867 /* Handle half word leftover. */
32868 if (i + 2 <= length)
32869 {
32870 reg = gen_lowpart (HImode, val_reg);
32871 addr = plus_constant (Pmode, dst, i);
32872 mem = adjust_automodify_address (dstbase, HImode, addr, i);
32873 if ((align & 1) == 0)
32874 emit_move_insn (mem, reg);
32875 else
32876 emit_insn (gen_unaligned_storehi (mem, reg));
32877
32878 i += 2;
32879 }
32880
32881 /* Handle single byte leftover. */
32882 if (i + 1 == length)
32883 {
32884 reg = gen_lowpart (QImode, val_reg);
32885 addr = plus_constant (Pmode, dst, i);
32886 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32887 emit_move_insn (mem, reg);
32888 }
32889
32890 return true;
32891 }
32892
32893 /* Set a block of memory using vectorization instructions for both
32894 aligned and unaligned cases. We fill the first LENGTH bytes of
32895 the memory area starting from DSTBASE with byte constant VALUE.
32896 ALIGN is the alignment requirement of memory. */
32897 static bool
32898 arm_block_set_vect (rtx dstbase,
32899 unsigned HOST_WIDE_INT length,
32900 unsigned HOST_WIDE_INT value,
32901 unsigned HOST_WIDE_INT align)
32902 {
32903 /* Check whether we need to use unaligned store instruction. */
32904 if (((align & 3) != 0 || (length & 3) != 0)
32905 /* Check whether unaligned store instruction is available. */
32906 && (!unaligned_access || BYTES_BIG_ENDIAN))
32907 return false;
32908
32909 if ((align & 3) == 0)
32910 return arm_block_set_aligned_vect (dstbase, length, value, align);
32911 else
32912 return arm_block_set_unaligned_vect (dstbase, length, value, align);
32913 }
32914
32915 /* Expand string store operation. Firstly we try to do that by using
32916 vectorization instructions, then try with ARM unaligned access and
32917 double-word store if profitable. OPERANDS[0] is the destination,
32918 OPERANDS[1] is the number of bytes, operands[2] is the value to
32919 initialize the memory, OPERANDS[3] is the known alignment of the
32920 destination. */
32921 bool
32922 arm_gen_setmem (rtx *operands)
32923 {
32924 rtx dstbase = operands[0];
32925 unsigned HOST_WIDE_INT length;
32926 unsigned HOST_WIDE_INT value;
32927 unsigned HOST_WIDE_INT align;
32928
32929 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32930 return false;
32931
32932 length = UINTVAL (operands[1]);
32933 if (length > 64)
32934 return false;
32935
32936 value = (UINTVAL (operands[2]) & 0xFF);
32937 align = UINTVAL (operands[3]);
32938 if (TARGET_NEON && length >= 8
32939 && current_tune->string_ops_prefer_neon
32940 && arm_block_set_vect (dstbase, length, value, align))
32941 return true;
32942
32943 if (!unaligned_access && (align & 3) != 0)
32944 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32945
32946 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32947 }
32948
32949
32950 static bool
32951 arm_macro_fusion_p (void)
32952 {
32953 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
32954 }
32955
32956 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
32957 for MOVW / MOVT macro fusion. */
32958
32959 static bool
32960 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
32961 {
32962 /* We are trying to fuse
32963 movw imm / movt imm
32964 instructions as a group that gets scheduled together. */
32965
32966 rtx set_dest = SET_DEST (curr_set);
32967
32968 if (GET_MODE (set_dest) != SImode)
32969 return false;
32970
32971 /* We are trying to match:
32972 prev (movw) == (set (reg r0) (const_int imm16))
32973 curr (movt) == (set (zero_extract (reg r0)
32974 (const_int 16)
32975 (const_int 16))
32976 (const_int imm16_1))
32977 or
32978 prev (movw) == (set (reg r1)
32979 (high (symbol_ref ("SYM"))))
32980 curr (movt) == (set (reg r0)
32981 (lo_sum (reg r1)
32982 (symbol_ref ("SYM")))) */
32983
32984 if (GET_CODE (set_dest) == ZERO_EXTRACT)
32985 {
32986 if (CONST_INT_P (SET_SRC (curr_set))
32987 && CONST_INT_P (SET_SRC (prev_set))
32988 && REG_P (XEXP (set_dest, 0))
32989 && REG_P (SET_DEST (prev_set))
32990 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
32991 return true;
32992
32993 }
32994 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
32995 && REG_P (SET_DEST (curr_set))
32996 && REG_P (SET_DEST (prev_set))
32997 && GET_CODE (SET_SRC (prev_set)) == HIGH
32998 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
32999 return true;
33000
33001 return false;
33002 }
33003
33004 static bool
33005 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
33006 {
33007 rtx prev_set = single_set (prev);
33008 rtx curr_set = single_set (curr);
33009
33010 if (!prev_set
33011 || !curr_set)
33012 return false;
33013
33014 if (any_condjump_p (curr))
33015 return false;
33016
33017 if (!arm_macro_fusion_p ())
33018 return false;
33019
33020 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
33021 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
33022 return true;
33023
33024 return false;
33025 }
33026
33027 /* Return true iff the instruction fusion described by OP is enabled. */
33028 bool
33029 arm_fusion_enabled_p (tune_params::fuse_ops op)
33030 {
33031 return current_tune->fusible_ops & op;
33032 }
33033
33034 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
33035 scheduled for speculative execution. Reject the long-running division
33036 and square-root instructions. */
33037
33038 static bool
33039 arm_sched_can_speculate_insn (rtx_insn *insn)
33040 {
33041 switch (get_attr_type (insn))
33042 {
33043 case TYPE_SDIV:
33044 case TYPE_UDIV:
33045 case TYPE_FDIVS:
33046 case TYPE_FDIVD:
33047 case TYPE_FSQRTS:
33048 case TYPE_FSQRTD:
33049 case TYPE_NEON_FP_SQRT_S:
33050 case TYPE_NEON_FP_SQRT_D:
33051 case TYPE_NEON_FP_SQRT_S_Q:
33052 case TYPE_NEON_FP_SQRT_D_Q:
33053 case TYPE_NEON_FP_DIV_S:
33054 case TYPE_NEON_FP_DIV_D:
33055 case TYPE_NEON_FP_DIV_S_Q:
33056 case TYPE_NEON_FP_DIV_D_Q:
33057 return false;
33058 default:
33059 return true;
33060 }
33061 }
33062
33063 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
33064
33065 static unsigned HOST_WIDE_INT
33066 arm_asan_shadow_offset (void)
33067 {
33068 return HOST_WIDE_INT_1U << 29;
33069 }
33070
33071
33072 /* This is a temporary fix for PR60655. Ideally we need
33073 to handle most of these cases in the generic part but
33074 currently we reject minus (..) (sym_ref). We try to
33075 ameliorate the case with minus (sym_ref1) (sym_ref2)
33076 where they are in the same section. */
33077
33078 static bool
33079 arm_const_not_ok_for_debug_p (rtx p)
33080 {
33081 tree decl_op0 = NULL;
33082 tree decl_op1 = NULL;
33083
33084 if (GET_CODE (p) == UNSPEC)
33085 return true;
33086 if (GET_CODE (p) == MINUS)
33087 {
33088 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
33089 {
33090 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
33091 if (decl_op1
33092 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
33093 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
33094 {
33095 if ((VAR_P (decl_op1)
33096 || TREE_CODE (decl_op1) == CONST_DECL)
33097 && (VAR_P (decl_op0)
33098 || TREE_CODE (decl_op0) == CONST_DECL))
33099 return (get_variable_section (decl_op1, false)
33100 != get_variable_section (decl_op0, false));
33101
33102 if (TREE_CODE (decl_op1) == LABEL_DECL
33103 && TREE_CODE (decl_op0) == LABEL_DECL)
33104 return (DECL_CONTEXT (decl_op1)
33105 != DECL_CONTEXT (decl_op0));
33106 }
33107
33108 return true;
33109 }
33110 }
33111
33112 return false;
33113 }
33114
33115 /* return TRUE if x is a reference to a value in a constant pool */
33116 extern bool
33117 arm_is_constant_pool_ref (rtx x)
33118 {
33119 return (MEM_P (x)
33120 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
33121 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
33122 }
33123
33124 /* Remember the last target of arm_set_current_function. */
33125 static GTY(()) tree arm_previous_fndecl;
33126
33127 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
33128
33129 void
33130 save_restore_target_globals (tree new_tree)
33131 {
33132 /* If we have a previous state, use it. */
33133 if (TREE_TARGET_GLOBALS (new_tree))
33134 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33135 else if (new_tree == target_option_default_node)
33136 restore_target_globals (&default_target_globals);
33137 else
33138 {
33139 /* Call target_reinit and save the state for TARGET_GLOBALS. */
33140 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
33141 }
33142
33143 arm_option_params_internal ();
33144 }
33145
33146 /* Invalidate arm_previous_fndecl. */
33147
33148 void
33149 arm_reset_previous_fndecl (void)
33150 {
33151 arm_previous_fndecl = NULL_TREE;
33152 }
33153
33154 /* Establish appropriate back-end context for processing the function
33155 FNDECL. The argument might be NULL to indicate processing at top
33156 level, outside of any function scope. */
33157
33158 static void
33159 arm_set_current_function (tree fndecl)
33160 {
33161 if (!fndecl || fndecl == arm_previous_fndecl)
33162 return;
33163
33164 tree old_tree = (arm_previous_fndecl
33165 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
33166 : NULL_TREE);
33167
33168 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33169
33170 /* If current function has no attributes but previous one did,
33171 use the default node. */
33172 if (! new_tree && old_tree)
33173 new_tree = target_option_default_node;
33174
33175 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
33176 the default have been handled by save_restore_target_globals from
33177 arm_pragma_target_parse. */
33178 if (old_tree == new_tree)
33179 return;
33180
33181 arm_previous_fndecl = fndecl;
33182
33183 /* First set the target options. */
33184 cl_target_option_restore (&global_options, &global_options_set,
33185 TREE_TARGET_OPTION (new_tree));
33186
33187 save_restore_target_globals (new_tree);
33188
33189 arm_override_options_after_change_1 (&global_options, &global_options_set);
33190 }
33191
33192 /* Implement TARGET_OPTION_PRINT. */
33193
33194 static void
33195 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
33196 {
33197 int flags = ptr->x_target_flags;
33198 const char *fpu_name;
33199
33200 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
33201 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
33202
33203 fprintf (file, "%*sselected isa %s\n", indent, "",
33204 TARGET_THUMB2_P (flags) ? "thumb2" :
33205 TARGET_THUMB_P (flags) ? "thumb1" :
33206 "arm");
33207
33208 if (ptr->x_arm_arch_string)
33209 fprintf (file, "%*sselected architecture %s\n", indent, "",
33210 ptr->x_arm_arch_string);
33211
33212 if (ptr->x_arm_cpu_string)
33213 fprintf (file, "%*sselected CPU %s\n", indent, "",
33214 ptr->x_arm_cpu_string);
33215
33216 if (ptr->x_arm_tune_string)
33217 fprintf (file, "%*sselected tune %s\n", indent, "",
33218 ptr->x_arm_tune_string);
33219
33220 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
33221 }
33222
33223 /* Hook to determine if one function can safely inline another. */
33224
33225 static bool
33226 arm_can_inline_p (tree caller, tree callee)
33227 {
33228 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33229 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33230 bool can_inline = true;
33231
33232 struct cl_target_option *caller_opts
33233 = TREE_TARGET_OPTION (caller_tree ? caller_tree
33234 : target_option_default_node);
33235
33236 struct cl_target_option *callee_opts
33237 = TREE_TARGET_OPTION (callee_tree ? callee_tree
33238 : target_option_default_node);
33239
33240 if (callee_opts == caller_opts)
33241 return true;
33242
33243 /* Callee's ISA features should be a subset of the caller's. */
33244 struct arm_build_target caller_target;
33245 struct arm_build_target callee_target;
33246 caller_target.isa = sbitmap_alloc (isa_num_bits);
33247 callee_target.isa = sbitmap_alloc (isa_num_bits);
33248
33249 arm_configure_build_target (&caller_target, caller_opts, false);
33250 arm_configure_build_target (&callee_target, callee_opts, false);
33251 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
33252 can_inline = false;
33253
33254 sbitmap_free (caller_target.isa);
33255 sbitmap_free (callee_target.isa);
33256
33257 /* OK to inline between different modes.
33258 Function with mode specific instructions, e.g using asm,
33259 must be explicitly protected with noinline. */
33260 return can_inline;
33261 }
33262
33263 /* Hook to fix function's alignment affected by target attribute. */
33264
33265 static void
33266 arm_relayout_function (tree fndecl)
33267 {
33268 if (DECL_USER_ALIGN (fndecl))
33269 return;
33270
33271 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33272
33273 if (!callee_tree)
33274 callee_tree = target_option_default_node;
33275
33276 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
33277 SET_DECL_ALIGN
33278 (fndecl,
33279 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
33280 }
33281
33282 /* Inner function to process the attribute((target(...))), take an argument and
33283 set the current options from the argument. If we have a list, recursively
33284 go over the list. */
33285
33286 static bool
33287 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
33288 {
33289 if (TREE_CODE (args) == TREE_LIST)
33290 {
33291 bool ret = true;
33292
33293 for (; args; args = TREE_CHAIN (args))
33294 if (TREE_VALUE (args)
33295 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
33296 ret = false;
33297 return ret;
33298 }
33299
33300 else if (TREE_CODE (args) != STRING_CST)
33301 {
33302 error ("attribute %<target%> argument not a string");
33303 return false;
33304 }
33305
33306 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
33307 char *q;
33308
33309 while ((q = strtok (argstr, ",")) != NULL)
33310 {
33311 argstr = NULL;
33312 if (!strcmp (q, "thumb"))
33313 {
33314 opts->x_target_flags |= MASK_THUMB;
33315 if (TARGET_FDPIC && !arm_arch_thumb2)
33316 sorry ("FDPIC mode is not supported in Thumb-1 mode");
33317 }
33318
33319 else if (!strcmp (q, "arm"))
33320 opts->x_target_flags &= ~MASK_THUMB;
33321
33322 else if (!strcmp (q, "general-regs-only"))
33323 opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
33324
33325 else if (startswith (q, "fpu="))
33326 {
33327 int fpu_index;
33328 if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
33329 &fpu_index, CL_TARGET))
33330 {
33331 error ("invalid fpu for target attribute or pragma %qs", q);
33332 return false;
33333 }
33334 if (fpu_index == TARGET_FPU_auto)
33335 {
33336 /* This doesn't really make sense until we support
33337 general dynamic selection of the architecture and all
33338 sub-features. */
33339 sorry ("auto fpu selection not currently permitted here");
33340 return false;
33341 }
33342 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
33343 }
33344 else if (startswith (q, "arch="))
33345 {
33346 char *arch = q + 5;
33347 const arch_option *arm_selected_arch
33348 = arm_parse_arch_option_name (all_architectures, "arch", arch);
33349
33350 if (!arm_selected_arch)
33351 {
33352 error ("invalid architecture for target attribute or pragma %qs",
33353 q);
33354 return false;
33355 }
33356
33357 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
33358 }
33359 else if (q[0] == '+')
33360 {
33361 opts->x_arm_arch_string
33362 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
33363 }
33364 else
33365 {
33366 error ("unknown target attribute or pragma %qs", q);
33367 return false;
33368 }
33369 }
33370
33371 return true;
33372 }
33373
33374 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
33375
33376 tree
33377 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
33378 struct gcc_options *opts_set)
33379 {
33380 struct cl_target_option cl_opts;
33381
33382 if (!arm_valid_target_attribute_rec (args, opts))
33383 return NULL_TREE;
33384
33385 cl_target_option_save (&cl_opts, opts, opts_set);
33386 arm_configure_build_target (&arm_active_target, &cl_opts, false);
33387 arm_option_check_internal (opts);
33388 /* Do any overrides, such as global options arch=xxx.
33389 We do this since arm_active_target was overridden. */
33390 arm_option_reconfigure_globals ();
33391 arm_options_perform_arch_sanity_checks ();
33392 arm_option_override_internal (opts, opts_set);
33393
33394 return build_target_option_node (opts, opts_set);
33395 }
33396
33397 static void
33398 add_attribute (const char * mode, tree *attributes)
33399 {
33400 size_t len = strlen (mode);
33401 tree value = build_string (len, mode);
33402
33403 TREE_TYPE (value) = build_array_type (char_type_node,
33404 build_index_type (size_int (len)));
33405
33406 *attributes = tree_cons (get_identifier ("target"),
33407 build_tree_list (NULL_TREE, value),
33408 *attributes);
33409 }
33410
33411 /* For testing. Insert thumb or arm modes alternatively on functions. */
33412
33413 static void
33414 arm_insert_attributes (tree fndecl, tree * attributes)
33415 {
33416 const char *mode;
33417
33418 if (! TARGET_FLIP_THUMB)
33419 return;
33420
33421 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33422 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33423 return;
33424
33425 /* Nested definitions must inherit mode. */
33426 if (current_function_decl)
33427 {
33428 mode = TARGET_THUMB ? "thumb" : "arm";
33429 add_attribute (mode, attributes);
33430 return;
33431 }
33432
33433 /* If there is already a setting don't change it. */
33434 if (lookup_attribute ("target", *attributes) != NULL)
33435 return;
33436
33437 mode = thumb_flipper ? "thumb" : "arm";
33438 add_attribute (mode, attributes);
33439
33440 thumb_flipper = !thumb_flipper;
33441 }
33442
33443 /* Hook to validate attribute((target("string"))). */
33444
33445 static bool
33446 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33447 tree args, int ARG_UNUSED (flags))
33448 {
33449 bool ret = true;
33450 struct gcc_options func_options, func_options_set;
33451 tree cur_tree, new_optimize;
33452 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33453
33454 /* Get the optimization options of the current function. */
33455 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33456
33457 /* If the function changed the optimization levels as well as setting target
33458 options, start with the optimizations specified. */
33459 if (!func_optimize)
33460 func_optimize = optimization_default_node;
33461
33462 /* Init func_options. */
33463 memset (&func_options, 0, sizeof (func_options));
33464 init_options_struct (&func_options, NULL);
33465 lang_hooks.init_options_struct (&func_options);
33466 memset (&func_options_set, 0, sizeof (func_options_set));
33467
33468 /* Initialize func_options to the defaults. */
33469 cl_optimization_restore (&func_options, &func_options_set,
33470 TREE_OPTIMIZATION (func_optimize));
33471
33472 cl_target_option_restore (&func_options, &func_options_set,
33473 TREE_TARGET_OPTION (target_option_default_node));
33474
33475 /* Set func_options flags with new target mode. */
33476 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33477 &func_options_set);
33478
33479 if (cur_tree == NULL_TREE)
33480 ret = false;
33481
33482 new_optimize = build_optimization_node (&func_options, &func_options_set);
33483
33484 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33485
33486 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33487
33488 return ret;
33489 }
33490
33491 /* Match an ISA feature bitmap to a named FPU. We always use the
33492 first entry that exactly matches the feature set, so that we
33493 effectively canonicalize the FPU name for the assembler. */
33494 static const char*
33495 arm_identify_fpu_from_isa (sbitmap isa)
33496 {
33497 auto_sbitmap fpubits (isa_num_bits);
33498 auto_sbitmap cand_fpubits (isa_num_bits);
33499
33500 bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33501
33502 /* If there are no ISA feature bits relating to the FPU, we must be
33503 doing soft-float. */
33504 if (bitmap_empty_p (fpubits))
33505 return "softvfp";
33506
33507 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33508 {
33509 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33510 if (bitmap_equal_p (fpubits, cand_fpubits))
33511 return all_fpus[i].name;
33512 }
33513 /* We must find an entry, or things have gone wrong. */
33514 gcc_unreachable ();
33515 }
33516
33517 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
33518 by the function fndecl. */
33519 void
33520 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33521 {
33522 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
33523
33524 struct cl_target_option *targ_options;
33525 if (target_parts)
33526 targ_options = TREE_TARGET_OPTION (target_parts);
33527 else
33528 targ_options = TREE_TARGET_OPTION (target_option_current_node);
33529 gcc_assert (targ_options);
33530
33531 arm_print_asm_arch_directives (stream, targ_options);
33532
33533 fprintf (stream, "\t.syntax unified\n");
33534
33535 if (TARGET_THUMB)
33536 {
33537 if (is_called_in_ARM_mode (decl)
33538 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
33539 && cfun->is_thunk))
33540 fprintf (stream, "\t.code 32\n");
33541 else if (TARGET_THUMB1)
33542 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
33543 else
33544 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
33545 }
33546 else
33547 fprintf (stream, "\t.arm\n");
33548
33549 if (TARGET_POKE_FUNCTION_NAME)
33550 arm_poke_function_name (stream, (const char *) name);
33551 }
33552
33553 /* If MEM is in the form of [base+offset], extract the two parts
33554 of address and set to BASE and OFFSET, otherwise return false
33555 after clearing BASE and OFFSET. */
33556
33557 static bool
33558 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
33559 {
33560 rtx addr;
33561
33562 gcc_assert (MEM_P (mem));
33563
33564 addr = XEXP (mem, 0);
33565
33566 /* Strip off const from addresses like (const (addr)). */
33567 if (GET_CODE (addr) == CONST)
33568 addr = XEXP (addr, 0);
33569
33570 if (REG_P (addr))
33571 {
33572 *base = addr;
33573 *offset = const0_rtx;
33574 return true;
33575 }
33576
33577 if (GET_CODE (addr) == PLUS
33578 && GET_CODE (XEXP (addr, 0)) == REG
33579 && CONST_INT_P (XEXP (addr, 1)))
33580 {
33581 *base = XEXP (addr, 0);
33582 *offset = XEXP (addr, 1);
33583 return true;
33584 }
33585
33586 *base = NULL_RTX;
33587 *offset = NULL_RTX;
33588
33589 return false;
33590 }
33591
33592 /* If INSN is a load or store of address in the form of [base+offset],
33593 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
33594 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
33595 otherwise return FALSE. */
33596
33597 static bool
33598 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
33599 {
33600 rtx x, dest, src;
33601
33602 gcc_assert (INSN_P (insn));
33603 x = PATTERN (insn);
33604 if (GET_CODE (x) != SET)
33605 return false;
33606
33607 src = SET_SRC (x);
33608 dest = SET_DEST (x);
33609 if (REG_P (src) && MEM_P (dest))
33610 {
33611 *is_load = false;
33612 extract_base_offset_in_addr (dest, base, offset);
33613 }
33614 else if (MEM_P (src) && REG_P (dest))
33615 {
33616 *is_load = true;
33617 extract_base_offset_in_addr (src, base, offset);
33618 }
33619 else
33620 return false;
33621
33622 return (*base != NULL_RTX && *offset != NULL_RTX);
33623 }
33624
33625 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33626
33627 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33628 and PRI are only calculated for these instructions. For other instruction,
33629 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
33630 instruction fusion can be supported by returning different priorities.
33631
33632 It's important that irrelevant instructions get the largest FUSION_PRI. */
33633
33634 static void
33635 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33636 int *fusion_pri, int *pri)
33637 {
33638 int tmp, off_val;
33639 bool is_load;
33640 rtx base, offset;
33641
33642 gcc_assert (INSN_P (insn));
33643
33644 tmp = max_pri - 1;
33645 if (!fusion_load_store (insn, &base, &offset, &is_load))
33646 {
33647 *pri = tmp;
33648 *fusion_pri = tmp;
33649 return;
33650 }
33651
33652 /* Load goes first. */
33653 if (is_load)
33654 *fusion_pri = tmp - 1;
33655 else
33656 *fusion_pri = tmp - 2;
33657
33658 tmp /= 2;
33659
33660 /* INSN with smaller base register goes first. */
33661 tmp -= ((REGNO (base) & 0xff) << 20);
33662
33663 /* INSN with smaller offset goes first. */
33664 off_val = (int)(INTVAL (offset));
33665 if (off_val >= 0)
33666 tmp -= (off_val & 0xfffff);
33667 else
33668 tmp += ((- off_val) & 0xfffff);
33669
33670 *pri = tmp;
33671 return;
33672 }
33673
33674
33675 /* Construct and return a PARALLEL RTX vector with elements numbering the
33676 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
33677 the vector - from the perspective of the architecture. This does not
33678 line up with GCC's perspective on lane numbers, so we end up with
33679 different masks depending on our target endian-ness. The diagram
33680 below may help. We must draw the distinction when building masks
33681 which select one half of the vector. An instruction selecting
33682 architectural low-lanes for a big-endian target, must be described using
33683 a mask selecting GCC high-lanes.
33684
33685 Big-Endian Little-Endian
33686
33687 GCC 0 1 2 3 3 2 1 0
33688 | x | x | x | x | | x | x | x | x |
33689 Architecture 3 2 1 0 3 2 1 0
33690
33691 Low Mask: { 2, 3 } { 0, 1 }
33692 High Mask: { 0, 1 } { 2, 3 }
33693 */
33694
33695 rtx
33696 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
33697 {
33698 int nunits = GET_MODE_NUNITS (mode);
33699 rtvec v = rtvec_alloc (nunits / 2);
33700 int high_base = nunits / 2;
33701 int low_base = 0;
33702 int base;
33703 rtx t1;
33704 int i;
33705
33706 if (BYTES_BIG_ENDIAN)
33707 base = high ? low_base : high_base;
33708 else
33709 base = high ? high_base : low_base;
33710
33711 for (i = 0; i < nunits / 2; i++)
33712 RTVEC_ELT (v, i) = GEN_INT (base + i);
33713
33714 t1 = gen_rtx_PARALLEL (mode, v);
33715 return t1;
33716 }
33717
33718 /* Check OP for validity as a PARALLEL RTX vector with elements
33719 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
33720 from the perspective of the architecture. See the diagram above
33721 arm_simd_vect_par_cnst_half_p for more details. */
33722
33723 bool
33724 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
33725 bool high)
33726 {
33727 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
33728 HOST_WIDE_INT count_op = XVECLEN (op, 0);
33729 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
33730 int i = 0;
33731
33732 if (!VECTOR_MODE_P (mode))
33733 return false;
33734
33735 if (count_op != count_ideal)
33736 return false;
33737
33738 for (i = 0; i < count_ideal; i++)
33739 {
33740 rtx elt_op = XVECEXP (op, 0, i);
33741 rtx elt_ideal = XVECEXP (ideal, 0, i);
33742
33743 if (!CONST_INT_P (elt_op)
33744 || INTVAL (elt_ideal) != INTVAL (elt_op))
33745 return false;
33746 }
33747 return true;
33748 }
33749
33750 /* Can output mi_thunk for all cases except for non-zero vcall_offset
33751 in Thumb1. */
33752 static bool
33753 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
33754 const_tree)
33755 {
33756 /* For now, we punt and not handle this for TARGET_THUMB1. */
33757 if (vcall_offset && TARGET_THUMB1)
33758 return false;
33759
33760 /* Otherwise ok. */
33761 return true;
33762 }
33763
33764 /* Generate RTL for a conditional branch with rtx comparison CODE in
33765 mode CC_MODE. The destination of the unlikely conditional branch
33766 is LABEL_REF. */
33767
33768 void
33769 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
33770 rtx label_ref)
33771 {
33772 rtx x;
33773 x = gen_rtx_fmt_ee (code, VOIDmode,
33774 gen_rtx_REG (cc_mode, CC_REGNUM),
33775 const0_rtx);
33776
33777 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
33778 gen_rtx_LABEL_REF (VOIDmode, label_ref),
33779 pc_rtx);
33780 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
33781 }
33782
33783 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
33784
33785 For pure-code sections there is no letter code for this attribute, so
33786 output all the section flags numerically when this is needed. */
33787
33788 static bool
33789 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
33790 {
33791
33792 if (flags & SECTION_ARM_PURECODE)
33793 {
33794 *num = 0x20000000;
33795
33796 if (!(flags & SECTION_DEBUG))
33797 *num |= 0x2;
33798 if (flags & SECTION_EXCLUDE)
33799 *num |= 0x80000000;
33800 if (flags & SECTION_WRITE)
33801 *num |= 0x1;
33802 if (flags & SECTION_CODE)
33803 *num |= 0x4;
33804 if (flags & SECTION_MERGE)
33805 *num |= 0x10;
33806 if (flags & SECTION_STRINGS)
33807 *num |= 0x20;
33808 if (flags & SECTION_TLS)
33809 *num |= 0x400;
33810 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
33811 *num |= 0x200;
33812
33813 return true;
33814 }
33815
33816 return false;
33817 }
33818
33819 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
33820
33821 If pure-code is passed as an option, make sure all functions are in
33822 sections that have the SHF_ARM_PURECODE attribute. */
33823
33824 static section *
33825 arm_function_section (tree decl, enum node_frequency freq,
33826 bool startup, bool exit)
33827 {
33828 const char * section_name;
33829 section * sec;
33830
33831 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
33832 return default_function_section (decl, freq, startup, exit);
33833
33834 if (!target_pure_code)
33835 return default_function_section (decl, freq, startup, exit);
33836
33837
33838 section_name = DECL_SECTION_NAME (decl);
33839
33840 /* If a function is not in a named section then it falls under the 'default'
33841 text section, also known as '.text'. We can preserve previous behavior as
33842 the default text section already has the SHF_ARM_PURECODE section
33843 attribute. */
33844 if (!section_name)
33845 {
33846 section *default_sec = default_function_section (decl, freq, startup,
33847 exit);
33848
33849 /* If default_sec is not null, then it must be a special section like for
33850 example .text.startup. We set the pure-code attribute and return the
33851 same section to preserve existing behavior. */
33852 if (default_sec)
33853 default_sec->common.flags |= SECTION_ARM_PURECODE;
33854 return default_sec;
33855 }
33856
33857 /* Otherwise look whether a section has already been created with
33858 'section_name'. */
33859 sec = get_named_section (decl, section_name, 0);
33860 if (!sec)
33861 /* If that is not the case passing NULL as the section's name to
33862 'get_named_section' will create a section with the declaration's
33863 section name. */
33864 sec = get_named_section (decl, NULL, 0);
33865
33866 /* Set the SHF_ARM_PURECODE attribute. */
33867 sec->common.flags |= SECTION_ARM_PURECODE;
33868
33869 return sec;
33870 }
33871
33872 /* Implements the TARGET_SECTION_FLAGS hook.
33873
33874 If DECL is a function declaration and pure-code is passed as an option
33875 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
33876 section's name and RELOC indicates whether the declarations initializer may
33877 contain runtime relocations. */
33878
33879 static unsigned int
33880 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
33881 {
33882 unsigned int flags = default_section_type_flags (decl, name, reloc);
33883
33884 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
33885 flags |= SECTION_ARM_PURECODE;
33886
33887 return flags;
33888 }
33889
33890 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
33891
33892 static void
33893 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
33894 rtx op0, rtx op1,
33895 rtx *quot_p, rtx *rem_p)
33896 {
33897 if (mode == SImode)
33898 gcc_assert (!TARGET_IDIV);
33899
33900 scalar_int_mode libval_mode
33901 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
33902
33903 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
33904 libval_mode, op0, mode, op1, mode);
33905
33906 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
33907 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
33908 GET_MODE_SIZE (mode));
33909
33910 gcc_assert (quotient);
33911 gcc_assert (remainder);
33912
33913 *quot_p = quotient;
33914 *rem_p = remainder;
33915 }
33916
33917 /* This function checks for the availability of the coprocessor builtin passed
33918 in BUILTIN for the current target. Returns true if it is available and
33919 false otherwise. If a BUILTIN is passed for which this function has not
33920 been implemented it will cause an exception. */
33921
33922 bool
33923 arm_coproc_builtin_available (enum unspecv builtin)
33924 {
33925 /* None of these builtins are available in Thumb mode if the target only
33926 supports Thumb-1. */
33927 if (TARGET_THUMB1)
33928 return false;
33929
33930 switch (builtin)
33931 {
33932 case VUNSPEC_CDP:
33933 case VUNSPEC_LDC:
33934 case VUNSPEC_LDCL:
33935 case VUNSPEC_STC:
33936 case VUNSPEC_STCL:
33937 case VUNSPEC_MCR:
33938 case VUNSPEC_MRC:
33939 if (arm_arch4)
33940 return true;
33941 break;
33942 case VUNSPEC_CDP2:
33943 case VUNSPEC_LDC2:
33944 case VUNSPEC_LDC2L:
33945 case VUNSPEC_STC2:
33946 case VUNSPEC_STC2L:
33947 case VUNSPEC_MCR2:
33948 case VUNSPEC_MRC2:
33949 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
33950 ARMv8-{A,M}. */
33951 if (arm_arch5t)
33952 return true;
33953 break;
33954 case VUNSPEC_MCRR:
33955 case VUNSPEC_MRRC:
33956 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
33957 ARMv8-{A,M}. */
33958 if (arm_arch6 || arm_arch5te)
33959 return true;
33960 break;
33961 case VUNSPEC_MCRR2:
33962 case VUNSPEC_MRRC2:
33963 if (arm_arch6)
33964 return true;
33965 break;
33966 default:
33967 gcc_unreachable ();
33968 }
33969 return false;
33970 }
33971
33972 /* This function returns true if OP is a valid memory operand for the ldc and
33973 stc coprocessor instructions and false otherwise. */
33974
33975 bool
33976 arm_coproc_ldc_stc_legitimate_address (rtx op)
33977 {
33978 HOST_WIDE_INT range;
33979 /* Has to be a memory operand. */
33980 if (!MEM_P (op))
33981 return false;
33982
33983 op = XEXP (op, 0);
33984
33985 /* We accept registers. */
33986 if (REG_P (op))
33987 return true;
33988
33989 switch GET_CODE (op)
33990 {
33991 case PLUS:
33992 {
33993 /* Or registers with an offset. */
33994 if (!REG_P (XEXP (op, 0)))
33995 return false;
33996
33997 op = XEXP (op, 1);
33998
33999 /* The offset must be an immediate though. */
34000 if (!CONST_INT_P (op))
34001 return false;
34002
34003 range = INTVAL (op);
34004
34005 /* Within the range of [-1020,1020]. */
34006 if (!IN_RANGE (range, -1020, 1020))
34007 return false;
34008
34009 /* And a multiple of 4. */
34010 return (range % 4) == 0;
34011 }
34012 case PRE_INC:
34013 case POST_INC:
34014 case PRE_DEC:
34015 case POST_DEC:
34016 return REG_P (XEXP (op, 0));
34017 default:
34018 gcc_unreachable ();
34019 }
34020 return false;
34021 }
34022
34023 /* Return the diagnostic message string if conversion from FROMTYPE to
34024 TOTYPE is not allowed, NULL otherwise. */
34025
34026 static const char *
34027 arm_invalid_conversion (const_tree fromtype, const_tree totype)
34028 {
34029 if (element_mode (fromtype) != element_mode (totype))
34030 {
34031 /* Do no allow conversions to/from BFmode scalar types. */
34032 if (TYPE_MODE (fromtype) == BFmode)
34033 return N_("invalid conversion from type %<bfloat16_t%>");
34034 if (TYPE_MODE (totype) == BFmode)
34035 return N_("invalid conversion to type %<bfloat16_t%>");
34036 }
34037
34038 /* Conversion allowed. */
34039 return NULL;
34040 }
34041
34042 /* Return the diagnostic message string if the unary operation OP is
34043 not permitted on TYPE, NULL otherwise. */
34044
34045 static const char *
34046 arm_invalid_unary_op (int op, const_tree type)
34047 {
34048 /* Reject all single-operand operations on BFmode except for &. */
34049 if (element_mode (type) == BFmode && op != ADDR_EXPR)
34050 return N_("operation not permitted on type %<bfloat16_t%>");
34051
34052 /* Operation allowed. */
34053 return NULL;
34054 }
34055
34056 /* Return the diagnostic message string if the binary operation OP is
34057 not permitted on TYPE1 and TYPE2, NULL otherwise. */
34058
34059 static const char *
34060 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
34061 const_tree type2)
34062 {
34063 /* Reject all 2-operand operations on BFmode. */
34064 if (element_mode (type1) == BFmode
34065 || element_mode (type2) == BFmode)
34066 return N_("operation not permitted on type %<bfloat16_t%>");
34067
34068 /* Operation allowed. */
34069 return NULL;
34070 }
34071
34072 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34073
34074 In VFPv1, VFP registers could only be accessed in the mode they were
34075 set, so subregs would be invalid there. However, we don't support
34076 VFPv1 at the moment, and the restriction was lifted in VFPv2.
34077
34078 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34079 VFP registers in little-endian order. We can't describe that accurately to
34080 GCC, so avoid taking subregs of such values.
34081
34082 The only exception is going from a 128-bit to a 64-bit type. In that
34083 case the data layout happens to be consistent for big-endian, so we
34084 explicitly allow that case. */
34085
34086 static bool
34087 arm_can_change_mode_class (machine_mode from, machine_mode to,
34088 reg_class_t rclass)
34089 {
34090 if (TARGET_BIG_END
34091 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
34092 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
34093 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
34094 && reg_classes_intersect_p (VFP_REGS, rclass))
34095 return false;
34096 return true;
34097 }
34098
34099 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
34100 strcpy from constants will be faster. */
34101
34102 static HOST_WIDE_INT
34103 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
34104 {
34105 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
34106 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
34107 return MAX (align, BITS_PER_WORD * factor);
34108 return align;
34109 }
34110
34111 /* Emit a speculation barrier on target architectures that do not have
34112 DSB/ISB directly. Such systems probably don't need a barrier
34113 themselves, but if the code is ever run on a later architecture, it
34114 might become a problem. */
34115 void
34116 arm_emit_speculation_barrier_function ()
34117 {
34118 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
34119 }
34120
34121 /* Have we recorded an explicit access to the Q bit of APSR?. */
34122 bool
34123 arm_q_bit_access (void)
34124 {
34125 if (cfun && cfun->decl)
34126 return lookup_attribute ("acle qbit",
34127 DECL_ATTRIBUTES (cfun->decl));
34128 return true;
34129 }
34130
34131 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
34132 bool
34133 arm_ge_bits_access (void)
34134 {
34135 if (cfun && cfun->decl)
34136 return lookup_attribute ("acle gebits",
34137 DECL_ATTRIBUTES (cfun->decl));
34138 return true;
34139 }
34140
34141 /* NULL if insn INSN is valid within a low-overhead loop.
34142 Otherwise return why doloop cannot be applied. */
34143
34144 static const char *
34145 arm_invalid_within_doloop (const rtx_insn *insn)
34146 {
34147 if (!TARGET_HAVE_LOB)
34148 return default_invalid_within_doloop (insn);
34149
34150 if (CALL_P (insn))
34151 return "Function call in the loop.";
34152
34153 if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
34154 return "LR is used inside loop.";
34155
34156 return NULL;
34157 }
34158
34159 bool
34160 arm_target_insn_ok_for_lob (rtx insn)
34161 {
34162 basic_block bb = BLOCK_FOR_INSN (insn);
34163 /* Make sure the basic block of the target insn is a simple latch
34164 having as single predecessor and successor the body of the loop
34165 itself. Only simple loops with a single basic block as body are
34166 supported for 'low over head loop' making sure that LE target is
34167 above LE itself in the generated code. */
34168
34169 return single_succ_p (bb)
34170 && single_pred_p (bb)
34171 && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
34172 && contains_no_active_insn_p (bb);
34173 }
34174
34175 #if CHECKING_P
34176 namespace selftest {
34177
34178 /* Scan the static data tables generated by parsecpu.awk looking for
34179 potential issues with the data. We primarily check for
34180 inconsistencies in the option extensions at present (extensions
34181 that duplicate others but aren't marked as aliases). Furthermore,
34182 for correct canonicalization later options must never be a subset
34183 of an earlier option. Any extension should also only specify other
34184 feature bits and never an architecture bit. The architecture is inferred
34185 from the declaration of the extension. */
34186 static void
34187 arm_test_cpu_arch_data (void)
34188 {
34189 const arch_option *arch;
34190 const cpu_option *cpu;
34191 auto_sbitmap target_isa (isa_num_bits);
34192 auto_sbitmap isa1 (isa_num_bits);
34193 auto_sbitmap isa2 (isa_num_bits);
34194
34195 for (arch = all_architectures; arch->common.name != NULL; ++arch)
34196 {
34197 const cpu_arch_extension *ext1, *ext2;
34198
34199 if (arch->common.extensions == NULL)
34200 continue;
34201
34202 arm_initialize_isa (target_isa, arch->common.isa_bits);
34203
34204 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
34205 {
34206 if (ext1->alias)
34207 continue;
34208
34209 arm_initialize_isa (isa1, ext1->isa_bits);
34210 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34211 {
34212 if (ext2->alias || ext1->remove != ext2->remove)
34213 continue;
34214
34215 arm_initialize_isa (isa2, ext2->isa_bits);
34216 /* If the option is a subset of the parent option, it doesn't
34217 add anything and so isn't useful. */
34218 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34219
34220 /* If the extension specifies any architectural bits then
34221 disallow it. Extensions should only specify feature bits. */
34222 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34223 }
34224 }
34225 }
34226
34227 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
34228 {
34229 const cpu_arch_extension *ext1, *ext2;
34230
34231 if (cpu->common.extensions == NULL)
34232 continue;
34233
34234 arm_initialize_isa (target_isa, arch->common.isa_bits);
34235
34236 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
34237 {
34238 if (ext1->alias)
34239 continue;
34240
34241 arm_initialize_isa (isa1, ext1->isa_bits);
34242 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34243 {
34244 if (ext2->alias || ext1->remove != ext2->remove)
34245 continue;
34246
34247 arm_initialize_isa (isa2, ext2->isa_bits);
34248 /* If the option is a subset of the parent option, it doesn't
34249 add anything and so isn't useful. */
34250 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34251
34252 /* If the extension specifies any architectural bits then
34253 disallow it. Extensions should only specify feature bits. */
34254 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34255 }
34256 }
34257 }
34258 }
34259
34260 /* Scan the static data tables generated by parsecpu.awk looking for
34261 potential issues with the data. Here we check for consistency between the
34262 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34263 a feature bit that is not defined by any FPU flag. */
34264 static void
34265 arm_test_fpu_data (void)
34266 {
34267 auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
34268 auto_sbitmap fpubits (isa_num_bits);
34269 auto_sbitmap tmpset (isa_num_bits);
34270
34271 static const enum isa_feature fpu_bitlist_internal[]
34272 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
34273 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
34274
34275 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
34276 {
34277 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
34278 bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
34279 bitmap_clear (isa_all_fpubits_internal);
34280 bitmap_copy (isa_all_fpubits_internal, tmpset);
34281 }
34282
34283 if (!bitmap_empty_p (isa_all_fpubits_internal))
34284 {
34285 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
34286 " group that are not defined by any FPU.\n"
34287 " Check your arm-cpus.in.\n");
34288 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
34289 }
34290 }
34291
34292 static void
34293 arm_run_selftests (void)
34294 {
34295 arm_test_cpu_arch_data ();
34296 arm_test_fpu_data ();
34297 }
34298 } /* Namespace selftest. */
34299
34300 #undef TARGET_RUN_TARGET_SELFTESTS
34301 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34302 #endif /* CHECKING_P */
34303
34304 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34305 global variable based guard use the default else
34306 return a null tree. */
34307 static tree
34308 arm_stack_protect_guard (void)
34309 {
34310 if (arm_stack_protector_guard == SSP_GLOBAL)
34311 return default_stack_protect_guard ();
34312
34313 return NULL_TREE;
34314 }
34315
34316 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34317 Unlike the arm version, we do NOT implement asm flag outputs. */
34318
34319 rtx_insn *
34320 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
34321 vec<machine_mode> & /*input_modes*/,
34322 vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
34323 HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
34324 {
34325 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
34326 if (startswith (constraints[i], "=@cc"))
34327 {
34328 sorry ("%<asm%> flags not supported in thumb1 mode");
34329 break;
34330 }
34331 return NULL;
34332 }
34333
34334 /* Generate code to enable conditional branches in functions over 1 MiB.
34335 Parameters are:
34336 operands: is the operands list of the asm insn (see arm_cond_branch or
34337 arm_cond_branch_reversed).
34338 pos_label: is an index into the operands array where operands[pos_label] is
34339 the asm label of the final jump destination.
34340 dest: is a string which is used to generate the asm label of the intermediate
34341 destination
34342 branch_format: is a string denoting the intermediate branch format, e.g.
34343 "beq", "bne", etc. */
34344
34345 const char *
34346 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
34347 const char * branch_format)
34348 {
34349 rtx_code_label * tmp_label = gen_label_rtx ();
34350 char label_buf[256];
34351 char buffer[128];
34352 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
34353 CODE_LABEL_NUMBER (tmp_label));
34354 const char *label_ptr = arm_strip_name_encoding (label_buf);
34355 rtx dest_label = operands[pos_label];
34356 operands[pos_label] = tmp_label;
34357
34358 snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
34359 output_asm_insn (buffer, operands);
34360
34361 snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
34362 operands[pos_label] = dest_label;
34363 output_asm_insn (buffer, operands);
34364 return "";
34365 }
34366
34367 /* If given mode matches, load from memory to LO_REGS.
34368 (i.e [Rn], Rn <= LO_REGS). */
34369 enum reg_class
34370 arm_mode_base_reg_class (machine_mode mode)
34371 {
34372 if (TARGET_HAVE_MVE
34373 && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
34374 return LO_REGS;
34375
34376 return MODE_BASE_REG_REG_CLASS (mode);
34377 }
34378
34379 struct gcc_target targetm = TARGET_INITIALIZER;
34380
34381 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
34382
34383 opt_machine_mode
34384 arm_get_mask_mode (machine_mode mode)
34385 {
34386 if (TARGET_HAVE_MVE)
34387 return arm_mode_to_pred_mode (mode);
34388
34389 return default_get_mask_mode (mode);
34390 }
34391
34392 #include "gt-arm.h"