]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.cc
arm: big-endian issue in gen_cpymem_ldrd_strd [PR105981]
[thirdparty/gcc.git] / gcc / config / arm / arm.cc
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2022 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "tm_p.h"
38 #include "stringpool.h"
39 #include "attribs.h"
40 #include "optabs.h"
41 #include "regs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "cgraph.h"
45 #include "diagnostic-core.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "varasm.h"
51 #include "output.h"
52 #include "insn-attr.h"
53 #include "flags.h"
54 #include "reload.h"
55 #include "explow.h"
56 #include "expr.h"
57 #include "cfgrtl.h"
58 #include "sched-int.h"
59 #include "common/common-target.h"
60 #include "langhooks.h"
61 #include "intl.h"
62 #include "libfuncs.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "selftest.h"
73 #include "tree-vectorizer.h"
74 #include "opts.h"
75
76 /* This file should be included last. */
77 #include "target-def.h"
78
79 /* Forward definitions of types. */
80 typedef struct minipool_node Mnode;
81 typedef struct minipool_fixup Mfix;
82
83 void (*arm_lang_output_object_attributes_hook)(void);
84
85 struct four_ints
86 {
87 int i[4];
88 };
89
90 /* Forward function declarations. */
91 static bool arm_const_not_ok_for_debug_p (rtx);
92 static int arm_needs_doubleword_align (machine_mode, const_tree);
93 static int arm_compute_static_chain_stack_bytes (void);
94 static arm_stack_offsets *arm_get_frame_offsets (void);
95 static void arm_compute_frame_layout (void);
96 static void arm_add_gc_roots (void);
97 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
98 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
99 static unsigned bit_count (unsigned long);
100 static unsigned bitmap_popcount (const sbitmap);
101 static int arm_address_register_rtx_p (rtx, int);
102 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
103 static bool is_called_in_ARM_mode (tree);
104 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
105 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
106 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
107 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
108 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
109 inline static int thumb1_index_register_rtx_p (rtx, int);
110 static int thumb_far_jump_used_p (void);
111 static bool thumb_force_lr_save (void);
112 static unsigned arm_size_return_regs (void);
113 static bool arm_assemble_integer (rtx, unsigned int, int);
114 static void arm_print_operand (FILE *, rtx, int);
115 static void arm_print_operand_address (FILE *, machine_mode, rtx);
116 static bool arm_print_operand_punct_valid_p (unsigned char code);
117 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
118 static arm_cc get_arm_condition_code (rtx);
119 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
120 static const char *output_multi_immediate (rtx *, const char *, const char *,
121 int, HOST_WIDE_INT);
122 static const char *shift_op (rtx, HOST_WIDE_INT *);
123 static struct machine_function *arm_init_machine_status (void);
124 static void thumb_exit (FILE *, int);
125 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
126 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
127 static Mnode *add_minipool_forward_ref (Mfix *);
128 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
129 static Mnode *add_minipool_backward_ref (Mfix *);
130 static void assign_minipool_offsets (Mfix *);
131 static void arm_print_value (FILE *, rtx);
132 static void dump_minipool (rtx_insn *);
133 static int arm_barrier_cost (rtx_insn *);
134 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
135 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
136 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
137 machine_mode, rtx);
138 static void arm_reorg (void);
139 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
140 static unsigned long arm_compute_save_reg0_reg12_mask (void);
141 static unsigned long arm_compute_save_core_reg_mask (void);
142 static unsigned long arm_isr_value (tree);
143 static unsigned long arm_compute_func_type (void);
144 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
145 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
146 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
147 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
148 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
149 #endif
150 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
151 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
152 static void arm_output_function_epilogue (FILE *);
153 static void arm_output_function_prologue (FILE *);
154 static int arm_comp_type_attributes (const_tree, const_tree);
155 static void arm_set_default_type_attributes (tree);
156 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
157 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
158 static int optimal_immediate_sequence (enum rtx_code code,
159 unsigned HOST_WIDE_INT val,
160 struct four_ints *return_sequence);
161 static int optimal_immediate_sequence_1 (enum rtx_code code,
162 unsigned HOST_WIDE_INT val,
163 struct four_ints *return_sequence,
164 int i);
165 static int arm_get_strip_length (int);
166 static bool arm_function_ok_for_sibcall (tree, tree);
167 static machine_mode arm_promote_function_mode (const_tree,
168 machine_mode, int *,
169 const_tree, int);
170 static bool arm_return_in_memory (const_tree, const_tree);
171 static rtx arm_function_value (const_tree, const_tree, bool);
172 static rtx arm_libcall_value_1 (machine_mode);
173 static rtx arm_libcall_value (machine_mode, const_rtx);
174 static bool arm_function_value_regno_p (const unsigned int);
175 static void arm_internal_label (FILE *, const char *, unsigned long);
176 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
177 tree);
178 static bool arm_have_conditional_execution (void);
179 static bool arm_cannot_force_const_mem (machine_mode, rtx);
180 static bool arm_legitimate_constant_p (machine_mode, rtx);
181 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
182 static int arm_insn_cost (rtx_insn *, bool);
183 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
184 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
185 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
186 static void emit_constant_insn (rtx cond, rtx pattern);
187 static rtx_insn *emit_set_insn (rtx, rtx);
188 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
190 static void arm_emit_multi_reg_pop (unsigned long);
191 static int vfp_emit_fstmd (int, int);
192 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
193 static int arm_arg_partial_bytes (cumulative_args_t,
194 const function_arg_info &);
195 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
196 static void arm_function_arg_advance (cumulative_args_t,
197 const function_arg_info &);
198 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
199 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
200 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
201 const_tree);
202 static rtx aapcs_libcall_value (machine_mode);
203 static int aapcs_select_return_coproc (const_tree, const_tree);
204
205 #ifdef OBJECT_FORMAT_ELF
206 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
207 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
208 #endif
209 #ifndef ARM_PE
210 static void arm_encode_section_info (tree, rtx, int);
211 #endif
212
213 static void arm_file_end (void);
214 static void arm_file_start (void);
215 static void arm_insert_attributes (tree, tree *);
216
217 static void arm_setup_incoming_varargs (cumulative_args_t,
218 const function_arg_info &, int *, int);
219 static bool arm_pass_by_reference (cumulative_args_t,
220 const function_arg_info &);
221 static bool arm_promote_prototypes (const_tree);
222 static bool arm_default_short_enums (void);
223 static bool arm_align_anon_bitfield (void);
224 static bool arm_return_in_msb (const_tree);
225 static bool arm_must_pass_in_stack (const function_arg_info &);
226 static bool arm_return_in_memory (const_tree, const_tree);
227 #if ARM_UNWIND_INFO
228 static void arm_unwind_emit (FILE *, rtx_insn *);
229 static bool arm_output_ttype (rtx);
230 static void arm_asm_emit_except_personality (rtx);
231 #endif
232 static void arm_asm_init_sections (void);
233 static rtx arm_dwarf_register_span (rtx);
234
235 static tree arm_cxx_guard_type (void);
236 static bool arm_cxx_guard_mask_bit (void);
237 static tree arm_get_cookie_size (tree);
238 static bool arm_cookie_has_size (void);
239 static bool arm_cxx_cdtor_returns_this (void);
240 static bool arm_cxx_key_method_may_be_inline (void);
241 static void arm_cxx_determine_class_data_visibility (tree);
242 static bool arm_cxx_class_data_always_comdat (void);
243 static bool arm_cxx_use_aeabi_atexit (void);
244 static void arm_init_libfuncs (void);
245 static tree arm_build_builtin_va_list (void);
246 static void arm_expand_builtin_va_start (tree, rtx);
247 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
248 static void arm_option_override (void);
249 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
250 struct cl_target_option *);
251 static void arm_override_options_after_change (void);
252 static void arm_option_print (FILE *, int, struct cl_target_option *);
253 static void arm_set_current_function (tree);
254 static bool arm_can_inline_p (tree, tree);
255 static void arm_relayout_function (tree);
256 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
257 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
258 static bool arm_sched_can_speculate_insn (rtx_insn *);
259 static bool arm_macro_fusion_p (void);
260 static bool arm_cannot_copy_insn_p (rtx_insn *);
261 static int arm_issue_rate (void);
262 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
263 static int arm_first_cycle_multipass_dfa_lookahead (void);
264 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
265 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
266 static bool arm_output_addr_const_extra (FILE *, rtx);
267 static bool arm_allocate_stack_slots_for_args (void);
268 static bool arm_warn_func_return (tree);
269 static tree arm_promoted_type (const_tree t);
270 static bool arm_scalar_mode_supported_p (scalar_mode);
271 static bool arm_frame_pointer_required (void);
272 static bool arm_can_eliminate (const int, const int);
273 static void arm_asm_trampoline_template (FILE *);
274 static void arm_trampoline_init (rtx, tree, rtx);
275 static rtx arm_trampoline_adjust_address (rtx);
276 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
277 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
278 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
279 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
280 static bool arm_array_mode_supported_p (machine_mode,
281 unsigned HOST_WIDE_INT);
282 static machine_mode arm_preferred_simd_mode (scalar_mode);
283 static bool arm_class_likely_spilled_p (reg_class_t);
284 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
285 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
286 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
287 const_tree type,
288 int misalignment,
289 bool is_packed);
290 static void arm_conditional_register_usage (void);
291 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
292 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
293 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
294 static int arm_default_branch_cost (bool, bool);
295 static int arm_cortex_a5_branch_cost (bool, bool);
296 static int arm_cortex_m_branch_cost (bool, bool);
297 static int arm_cortex_m7_branch_cost (bool, bool);
298
299 static bool arm_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, rtx,
300 rtx, const vec_perm_indices &);
301
302 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
303
304 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
305 tree vectype,
306 int misalign ATTRIBUTE_UNUSED);
307
308 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
309 bool op0_preserve_value);
310 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
311
312 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
313 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
314 const_tree);
315 static section *arm_function_section (tree, enum node_frequency, bool, bool);
316 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
317 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
318 int reloc);
319 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
320 static opt_scalar_float_mode arm_floatn_mode (int, bool);
321 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
322 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
323 static bool arm_modes_tieable_p (machine_mode, machine_mode);
324 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
325 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
326 vec<machine_mode> &,
327 vec<const char *> &, vec<rtx> &,
328 HARD_REG_SET &, location_t);
329 static const char *arm_identify_fpu_from_isa (sbitmap);
330 \f
331 /* Table of machine attributes. */
332 static const struct attribute_spec arm_attribute_table[] =
333 {
334 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
335 affects_type_identity, handler, exclude } */
336 /* Function calls made to this symbol must be done indirectly, because
337 it may lie outside of the 26 bit addressing range of a normal function
338 call. */
339 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
340 /* Whereas these functions are always known to reside within the 26 bit
341 addressing range. */
342 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
343 /* Specify the procedure call conventions for a function. */
344 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
345 NULL },
346 /* Interrupt Service Routines have special prologue and epilogue requirements. */
347 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
348 NULL },
349 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
350 NULL },
351 { "naked", 0, 0, true, false, false, false,
352 arm_handle_fndecl_attribute, NULL },
353 #ifdef ARM_PE
354 /* ARM/PE has three new attributes:
355 interfacearm - ?
356 dllexport - for exporting a function/variable that will live in a dll
357 dllimport - for importing a function/variable from a dll
358
359 Microsoft allows multiple declspecs in one __declspec, separating
360 them with spaces. We do NOT support this. Instead, use __declspec
361 multiple times.
362 */
363 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
364 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
365 { "interfacearm", 0, 0, true, false, false, false,
366 arm_handle_fndecl_attribute, NULL },
367 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
368 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
369 NULL },
370 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
371 NULL },
372 { "notshared", 0, 0, false, true, false, false,
373 arm_handle_notshared_attribute, NULL },
374 #endif
375 /* ARMv8-M Security Extensions support. */
376 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
377 arm_handle_cmse_nonsecure_entry, NULL },
378 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
379 arm_handle_cmse_nonsecure_call, NULL },
380 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL },
381 { NULL, 0, 0, false, false, false, false, NULL, NULL }
382 };
383 \f
384 /* Initialize the GCC target structure. */
385 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
386 #undef TARGET_MERGE_DECL_ATTRIBUTES
387 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
388 #endif
389
390 #undef TARGET_CHECK_BUILTIN_CALL
391 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
392
393 #undef TARGET_LEGITIMIZE_ADDRESS
394 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
395
396 #undef TARGET_ATTRIBUTE_TABLE
397 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
398
399 #undef TARGET_INSERT_ATTRIBUTES
400 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
401
402 #undef TARGET_ASM_FILE_START
403 #define TARGET_ASM_FILE_START arm_file_start
404 #undef TARGET_ASM_FILE_END
405 #define TARGET_ASM_FILE_END arm_file_end
406
407 #undef TARGET_ASM_ALIGNED_SI_OP
408 #define TARGET_ASM_ALIGNED_SI_OP NULL
409 #undef TARGET_ASM_INTEGER
410 #define TARGET_ASM_INTEGER arm_assemble_integer
411
412 #undef TARGET_PRINT_OPERAND
413 #define TARGET_PRINT_OPERAND arm_print_operand
414 #undef TARGET_PRINT_OPERAND_ADDRESS
415 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
416 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
417 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
418
419 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
420 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
421
422 #undef TARGET_ASM_FUNCTION_PROLOGUE
423 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
424
425 #undef TARGET_ASM_FUNCTION_EPILOGUE
426 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
427
428 #undef TARGET_CAN_INLINE_P
429 #define TARGET_CAN_INLINE_P arm_can_inline_p
430
431 #undef TARGET_RELAYOUT_FUNCTION
432 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
433
434 #undef TARGET_OPTION_OVERRIDE
435 #define TARGET_OPTION_OVERRIDE arm_option_override
436
437 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
438 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
439
440 #undef TARGET_OPTION_RESTORE
441 #define TARGET_OPTION_RESTORE arm_option_restore
442
443 #undef TARGET_OPTION_PRINT
444 #define TARGET_OPTION_PRINT arm_option_print
445
446 #undef TARGET_COMP_TYPE_ATTRIBUTES
447 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
448
449 #undef TARGET_SCHED_CAN_SPECULATE_INSN
450 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
451
452 #undef TARGET_SCHED_MACRO_FUSION_P
453 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
454
455 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
456 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
457
458 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
459 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
460
461 #undef TARGET_SCHED_ADJUST_COST
462 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
463
464 #undef TARGET_SET_CURRENT_FUNCTION
465 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
466
467 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
468 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
469
470 #undef TARGET_SCHED_REORDER
471 #define TARGET_SCHED_REORDER arm_sched_reorder
472
473 #undef TARGET_REGISTER_MOVE_COST
474 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
475
476 #undef TARGET_MEMORY_MOVE_COST
477 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
478
479 #undef TARGET_ENCODE_SECTION_INFO
480 #ifdef ARM_PE
481 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
482 #else
483 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
484 #endif
485
486 #undef TARGET_STRIP_NAME_ENCODING
487 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
488
489 #undef TARGET_ASM_INTERNAL_LABEL
490 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
491
492 #undef TARGET_FLOATN_MODE
493 #define TARGET_FLOATN_MODE arm_floatn_mode
494
495 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
496 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
497
498 #undef TARGET_FUNCTION_VALUE
499 #define TARGET_FUNCTION_VALUE arm_function_value
500
501 #undef TARGET_LIBCALL_VALUE
502 #define TARGET_LIBCALL_VALUE arm_libcall_value
503
504 #undef TARGET_FUNCTION_VALUE_REGNO_P
505 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
506
507 #undef TARGET_ASM_OUTPUT_MI_THUNK
508 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
509 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
510 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
511
512 #undef TARGET_RTX_COSTS
513 #define TARGET_RTX_COSTS arm_rtx_costs
514 #undef TARGET_ADDRESS_COST
515 #define TARGET_ADDRESS_COST arm_address_cost
516 #undef TARGET_INSN_COST
517 #define TARGET_INSN_COST arm_insn_cost
518
519 #undef TARGET_SHIFT_TRUNCATION_MASK
520 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
521 #undef TARGET_VECTOR_MODE_SUPPORTED_P
522 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
523 #undef TARGET_ARRAY_MODE_SUPPORTED_P
524 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
525 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
526 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
527 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
528 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
529 arm_autovectorize_vector_modes
530
531 #undef TARGET_MACHINE_DEPENDENT_REORG
532 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
533
534 #undef TARGET_INIT_BUILTINS
535 #define TARGET_INIT_BUILTINS arm_init_builtins
536 #undef TARGET_EXPAND_BUILTIN
537 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
538 #undef TARGET_BUILTIN_DECL
539 #define TARGET_BUILTIN_DECL arm_builtin_decl
540
541 #undef TARGET_INIT_LIBFUNCS
542 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
543
544 #undef TARGET_PROMOTE_FUNCTION_MODE
545 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
546 #undef TARGET_PROMOTE_PROTOTYPES
547 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
548 #undef TARGET_PASS_BY_REFERENCE
549 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
550 #undef TARGET_ARG_PARTIAL_BYTES
551 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
552 #undef TARGET_FUNCTION_ARG
553 #define TARGET_FUNCTION_ARG arm_function_arg
554 #undef TARGET_FUNCTION_ARG_ADVANCE
555 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
556 #undef TARGET_FUNCTION_ARG_PADDING
557 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
558 #undef TARGET_FUNCTION_ARG_BOUNDARY
559 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
560
561 #undef TARGET_SETUP_INCOMING_VARARGS
562 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
563
564 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
565 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
566
567 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
568 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
569 #undef TARGET_TRAMPOLINE_INIT
570 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
571 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
572 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
573
574 #undef TARGET_WARN_FUNC_RETURN
575 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
576
577 #undef TARGET_DEFAULT_SHORT_ENUMS
578 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
579
580 #undef TARGET_ALIGN_ANON_BITFIELD
581 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
582
583 #undef TARGET_NARROW_VOLATILE_BITFIELD
584 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
585
586 #undef TARGET_CXX_GUARD_TYPE
587 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
588
589 #undef TARGET_CXX_GUARD_MASK_BIT
590 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
591
592 #undef TARGET_CXX_GET_COOKIE_SIZE
593 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
594
595 #undef TARGET_CXX_COOKIE_HAS_SIZE
596 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
597
598 #undef TARGET_CXX_CDTOR_RETURNS_THIS
599 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
600
601 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
602 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
603
604 #undef TARGET_CXX_USE_AEABI_ATEXIT
605 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
606
607 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
608 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
609 arm_cxx_determine_class_data_visibility
610
611 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
612 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
613
614 #undef TARGET_RETURN_IN_MSB
615 #define TARGET_RETURN_IN_MSB arm_return_in_msb
616
617 #undef TARGET_RETURN_IN_MEMORY
618 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
619
620 #undef TARGET_MUST_PASS_IN_STACK
621 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
622
623 #if ARM_UNWIND_INFO
624 #undef TARGET_ASM_UNWIND_EMIT
625 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
626
627 /* EABI unwinding tables use a different format for the typeinfo tables. */
628 #undef TARGET_ASM_TTYPE
629 #define TARGET_ASM_TTYPE arm_output_ttype
630
631 #undef TARGET_ARM_EABI_UNWINDER
632 #define TARGET_ARM_EABI_UNWINDER true
633
634 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
635 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
636
637 #endif /* ARM_UNWIND_INFO */
638
639 #undef TARGET_ASM_INIT_SECTIONS
640 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
641
642 #undef TARGET_DWARF_REGISTER_SPAN
643 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
644
645 #undef TARGET_CANNOT_COPY_INSN_P
646 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
647
648 #ifdef HAVE_AS_TLS
649 #undef TARGET_HAVE_TLS
650 #define TARGET_HAVE_TLS true
651 #endif
652
653 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
654 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
655
656 #undef TARGET_LEGITIMATE_CONSTANT_P
657 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
658
659 #undef TARGET_CANNOT_FORCE_CONST_MEM
660 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
661
662 #undef TARGET_MAX_ANCHOR_OFFSET
663 #define TARGET_MAX_ANCHOR_OFFSET 4095
664
665 /* The minimum is set such that the total size of the block
666 for a particular anchor is -4088 + 1 + 4095 bytes, which is
667 divisible by eight, ensuring natural spacing of anchors. */
668 #undef TARGET_MIN_ANCHOR_OFFSET
669 #define TARGET_MIN_ANCHOR_OFFSET -4088
670
671 #undef TARGET_SCHED_ISSUE_RATE
672 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
673
674 #undef TARGET_SCHED_VARIABLE_ISSUE
675 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
676
677 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
678 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
679 arm_first_cycle_multipass_dfa_lookahead
680
681 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
682 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
683 arm_first_cycle_multipass_dfa_lookahead_guard
684
685 #undef TARGET_MANGLE_TYPE
686 #define TARGET_MANGLE_TYPE arm_mangle_type
687
688 #undef TARGET_INVALID_CONVERSION
689 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
690
691 #undef TARGET_INVALID_UNARY_OP
692 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
693
694 #undef TARGET_INVALID_BINARY_OP
695 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
696
697 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
698 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
699
700 #undef TARGET_BUILD_BUILTIN_VA_LIST
701 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
702 #undef TARGET_EXPAND_BUILTIN_VA_START
703 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
704 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
705 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
706
707 #ifdef HAVE_AS_TLS
708 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
709 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
710 #endif
711
712 #undef TARGET_LEGITIMATE_ADDRESS_P
713 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
714
715 #undef TARGET_PREFERRED_RELOAD_CLASS
716 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
717
718 #undef TARGET_PROMOTED_TYPE
719 #define TARGET_PROMOTED_TYPE arm_promoted_type
720
721 #undef TARGET_SCALAR_MODE_SUPPORTED_P
722 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
723
724 #undef TARGET_COMPUTE_FRAME_LAYOUT
725 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
726
727 #undef TARGET_FRAME_POINTER_REQUIRED
728 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
729
730 #undef TARGET_CAN_ELIMINATE
731 #define TARGET_CAN_ELIMINATE arm_can_eliminate
732
733 #undef TARGET_CONDITIONAL_REGISTER_USAGE
734 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
735
736 #undef TARGET_CLASS_LIKELY_SPILLED_P
737 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
738
739 #undef TARGET_VECTORIZE_BUILTINS
740 #define TARGET_VECTORIZE_BUILTINS
741
742 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
743 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
744 arm_builtin_vectorized_function
745
746 #undef TARGET_VECTOR_ALIGNMENT
747 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
748
749 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
750 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
751 arm_vector_alignment_reachable
752
753 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
754 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
755 arm_builtin_support_vector_misalignment
756
757 #undef TARGET_PREFERRED_RENAME_CLASS
758 #define TARGET_PREFERRED_RENAME_CLASS \
759 arm_preferred_rename_class
760
761 #undef TARGET_VECTORIZE_VEC_PERM_CONST
762 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
763
764 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
765 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
766 arm_builtin_vectorization_cost
767
768 #undef TARGET_CANONICALIZE_COMPARISON
769 #define TARGET_CANONICALIZE_COMPARISON \
770 arm_canonicalize_comparison
771
772 #undef TARGET_ASAN_SHADOW_OFFSET
773 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
774
775 #undef MAX_INSN_PER_IT_BLOCK
776 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
777
778 #undef TARGET_CAN_USE_DOLOOP_P
779 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
780
781 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
782 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
783
784 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
785 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
786
787 #undef TARGET_SCHED_FUSION_PRIORITY
788 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
789
790 #undef TARGET_ASM_FUNCTION_SECTION
791 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
792
793 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
794 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
795
796 #undef TARGET_SECTION_TYPE_FLAGS
797 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
798
799 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
800 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
801
802 #undef TARGET_C_EXCESS_PRECISION
803 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
804
805 /* Although the architecture reserves bits 0 and 1, only the former is
806 used for ARM/Thumb ISA selection in v7 and earlier versions. */
807 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
808 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
809
810 #undef TARGET_FIXED_CONDITION_CODE_REGS
811 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
812
813 #undef TARGET_HARD_REGNO_NREGS
814 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
815 #undef TARGET_HARD_REGNO_MODE_OK
816 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
817
818 #undef TARGET_MODES_TIEABLE_P
819 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
820
821 #undef TARGET_CAN_CHANGE_MODE_CLASS
822 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
823
824 #undef TARGET_CONSTANT_ALIGNMENT
825 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
826
827 #undef TARGET_INVALID_WITHIN_DOLOOP
828 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
829
830 #undef TARGET_MD_ASM_ADJUST
831 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
832
833 #undef TARGET_STACK_PROTECT_GUARD
834 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
835
836 #undef TARGET_VECTORIZE_GET_MASK_MODE
837 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
838 \f
839 /* Obstack for minipool constant handling. */
840 static struct obstack minipool_obstack;
841 static char * minipool_startobj;
842
843 /* The maximum number of insns skipped which
844 will be conditionalised if possible. */
845 static int max_insns_skipped = 5;
846
847 /* True if we are currently building a constant table. */
848 int making_const_table;
849
850 /* The processor for which instructions should be scheduled. */
851 enum processor_type arm_tune = TARGET_CPU_arm_none;
852
853 /* The current tuning set. */
854 const struct tune_params *current_tune;
855
856 /* Which floating point hardware to schedule for. */
857 int arm_fpu_attr;
858
859 /* Used for Thumb call_via trampolines. */
860 rtx thumb_call_via_label[14];
861 static int thumb_call_reg_needed;
862
863 /* The bits in this mask specify which instruction scheduling options should
864 be used. */
865 unsigned int tune_flags = 0;
866
867 /* The highest ARM architecture version supported by the
868 target. */
869 enum base_architecture arm_base_arch = BASE_ARCH_0;
870
871 /* Active target architecture and tuning. */
872
873 struct arm_build_target arm_active_target;
874
875 /* The following are used in the arm.md file as equivalents to bits
876 in the above two flag variables. */
877
878 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
879 int arm_arch4 = 0;
880
881 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
882 int arm_arch4t = 0;
883
884 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
885 int arm_arch5t = 0;
886
887 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
888 int arm_arch5te = 0;
889
890 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
891 int arm_arch6 = 0;
892
893 /* Nonzero if this chip supports the ARM 6K extensions. */
894 int arm_arch6k = 0;
895
896 /* Nonzero if this chip supports the ARM 6KZ extensions. */
897 int arm_arch6kz = 0;
898
899 /* Nonzero if instructions present in ARMv6-M can be used. */
900 int arm_arch6m = 0;
901
902 /* Nonzero if this chip supports the ARM 7 extensions. */
903 int arm_arch7 = 0;
904
905 /* Nonzero if this chip supports the Large Physical Address Extension. */
906 int arm_arch_lpae = 0;
907
908 /* Nonzero if instructions not present in the 'M' profile can be used. */
909 int arm_arch_notm = 0;
910
911 /* Nonzero if instructions present in ARMv7E-M can be used. */
912 int arm_arch7em = 0;
913
914 /* Nonzero if instructions present in ARMv8 can be used. */
915 int arm_arch8 = 0;
916
917 /* Nonzero if this chip supports the ARMv8.1 extensions. */
918 int arm_arch8_1 = 0;
919
920 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
921 int arm_arch8_2 = 0;
922
923 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
924 int arm_arch8_3 = 0;
925
926 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
927 int arm_arch8_4 = 0;
928 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
929 extensions. */
930 int arm_arch8_1m_main = 0;
931
932 /* Nonzero if this chip supports the FP16 instructions extension of ARM
933 Architecture 8.2. */
934 int arm_fp16_inst = 0;
935
936 /* Nonzero if this chip can benefit from load scheduling. */
937 int arm_ld_sched = 0;
938
939 /* Nonzero if this chip is a StrongARM. */
940 int arm_tune_strongarm = 0;
941
942 /* Nonzero if this chip supports Intel Wireless MMX technology. */
943 int arm_arch_iwmmxt = 0;
944
945 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
946 int arm_arch_iwmmxt2 = 0;
947
948 /* Nonzero if this chip is an XScale. */
949 int arm_arch_xscale = 0;
950
951 /* Nonzero if tuning for XScale */
952 int arm_tune_xscale = 0;
953
954 /* Nonzero if we want to tune for stores that access the write-buffer.
955 This typically means an ARM6 or ARM7 with MMU or MPU. */
956 int arm_tune_wbuf = 0;
957
958 /* Nonzero if tuning for Cortex-A9. */
959 int arm_tune_cortex_a9 = 0;
960
961 /* Nonzero if we should define __THUMB_INTERWORK__ in the
962 preprocessor.
963 XXX This is a bit of a hack, it's intended to help work around
964 problems in GLD which doesn't understand that armv5t code is
965 interworking clean. */
966 int arm_cpp_interwork = 0;
967
968 /* Nonzero if chip supports Thumb 1. */
969 int arm_arch_thumb1;
970
971 /* Nonzero if chip supports Thumb 2. */
972 int arm_arch_thumb2;
973
974 /* Nonzero if chip supports integer division instruction. */
975 int arm_arch_arm_hwdiv;
976 int arm_arch_thumb_hwdiv;
977
978 /* Nonzero if chip disallows volatile memory access in IT block. */
979 int arm_arch_no_volatile_ce;
980
981 /* Nonzero if we shouldn't use literal pools. */
982 bool arm_disable_literal_pool = false;
983
984 /* The register number to be used for the PIC offset register. */
985 unsigned arm_pic_register = INVALID_REGNUM;
986
987 enum arm_pcs arm_pcs_default;
988
989 /* For an explanation of these variables, see final_prescan_insn below. */
990 int arm_ccfsm_state;
991 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
992 enum arm_cond_code arm_current_cc;
993
994 rtx arm_target_insn;
995 int arm_target_label;
996 /* The number of conditionally executed insns, including the current insn. */
997 int arm_condexec_count = 0;
998 /* A bitmask specifying the patterns for the IT block.
999 Zero means do not output an IT block before this insn. */
1000 int arm_condexec_mask = 0;
1001 /* The number of bits used in arm_condexec_mask. */
1002 int arm_condexec_masklen = 0;
1003
1004 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1005 int arm_arch_crc = 0;
1006
1007 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1008 int arm_arch_dotprod = 0;
1009
1010 /* Nonzero if chip supports the ARMv8-M security extensions. */
1011 int arm_arch_cmse = 0;
1012
1013 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1014 int arm_m_profile_small_mul = 0;
1015
1016 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1017 int arm_arch_i8mm = 0;
1018
1019 /* Nonzero if chip supports the BFloat16 instructions. */
1020 int arm_arch_bf16 = 0;
1021
1022 /* Nonzero if chip supports the Custom Datapath Extension. */
1023 int arm_arch_cde = 0;
1024 int arm_arch_cde_coproc = 0;
1025 const int arm_arch_cde_coproc_bits[] = {
1026 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1027 };
1028
1029 /* The condition codes of the ARM, and the inverse function. */
1030 static const char * const arm_condition_codes[] =
1031 {
1032 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1033 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1034 };
1035
1036 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1037 int arm_regs_in_sequence[] =
1038 {
1039 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1040 };
1041
1042 #define DEF_FP_SYSREG(reg) #reg,
1043 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1044 FP_SYSREGS
1045 };
1046 #undef DEF_FP_SYSREG
1047
1048 #define ARM_LSL_NAME "lsl"
1049 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1050
1051 #define THUMB2_WORK_REGS \
1052 (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1053 | (1 << SP_REGNUM) \
1054 | (1 << PC_REGNUM) \
1055 | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM \
1056 ? (1 << PIC_OFFSET_TABLE_REGNUM) \
1057 : 0)))
1058 \f
1059 /* Initialization code. */
1060
1061 struct cpu_tune
1062 {
1063 enum processor_type scheduler;
1064 unsigned int tune_flags;
1065 const struct tune_params *tune;
1066 };
1067
1068 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1069 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1070 { \
1071 num_slots, \
1072 l1_size, \
1073 l1_line_size \
1074 }
1075
1076 /* arm generic vectorizer costs. */
1077 static const
1078 struct cpu_vec_costs arm_default_vec_cost = {
1079 1, /* scalar_stmt_cost. */
1080 1, /* scalar load_cost. */
1081 1, /* scalar_store_cost. */
1082 1, /* vec_stmt_cost. */
1083 1, /* vec_to_scalar_cost. */
1084 1, /* scalar_to_vec_cost. */
1085 1, /* vec_align_load_cost. */
1086 1, /* vec_unalign_load_cost. */
1087 1, /* vec_unalign_store_cost. */
1088 1, /* vec_store_cost. */
1089 3, /* cond_taken_branch_cost. */
1090 1, /* cond_not_taken_branch_cost. */
1091 };
1092
1093 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1094 #include "aarch-cost-tables.h"
1095
1096
1097
1098 const struct cpu_cost_table cortexa9_extra_costs =
1099 {
1100 /* ALU */
1101 {
1102 0, /* arith. */
1103 0, /* logical. */
1104 0, /* shift. */
1105 COSTS_N_INSNS (1), /* shift_reg. */
1106 COSTS_N_INSNS (1), /* arith_shift. */
1107 COSTS_N_INSNS (2), /* arith_shift_reg. */
1108 0, /* log_shift. */
1109 COSTS_N_INSNS (1), /* log_shift_reg. */
1110 COSTS_N_INSNS (1), /* extend. */
1111 COSTS_N_INSNS (2), /* extend_arith. */
1112 COSTS_N_INSNS (1), /* bfi. */
1113 COSTS_N_INSNS (1), /* bfx. */
1114 0, /* clz. */
1115 0, /* rev. */
1116 0, /* non_exec. */
1117 true /* non_exec_costs_exec. */
1118 },
1119 {
1120 /* MULT SImode */
1121 {
1122 COSTS_N_INSNS (3), /* simple. */
1123 COSTS_N_INSNS (3), /* flag_setting. */
1124 COSTS_N_INSNS (2), /* extend. */
1125 COSTS_N_INSNS (3), /* add. */
1126 COSTS_N_INSNS (2), /* extend_add. */
1127 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1128 },
1129 /* MULT DImode */
1130 {
1131 0, /* simple (N/A). */
1132 0, /* flag_setting (N/A). */
1133 COSTS_N_INSNS (4), /* extend. */
1134 0, /* add (N/A). */
1135 COSTS_N_INSNS (4), /* extend_add. */
1136 0 /* idiv (N/A). */
1137 }
1138 },
1139 /* LD/ST */
1140 {
1141 COSTS_N_INSNS (2), /* load. */
1142 COSTS_N_INSNS (2), /* load_sign_extend. */
1143 COSTS_N_INSNS (2), /* ldrd. */
1144 COSTS_N_INSNS (2), /* ldm_1st. */
1145 1, /* ldm_regs_per_insn_1st. */
1146 2, /* ldm_regs_per_insn_subsequent. */
1147 COSTS_N_INSNS (5), /* loadf. */
1148 COSTS_N_INSNS (5), /* loadd. */
1149 COSTS_N_INSNS (1), /* load_unaligned. */
1150 COSTS_N_INSNS (2), /* store. */
1151 COSTS_N_INSNS (2), /* strd. */
1152 COSTS_N_INSNS (2), /* stm_1st. */
1153 1, /* stm_regs_per_insn_1st. */
1154 2, /* stm_regs_per_insn_subsequent. */
1155 COSTS_N_INSNS (1), /* storef. */
1156 COSTS_N_INSNS (1), /* stored. */
1157 COSTS_N_INSNS (1), /* store_unaligned. */
1158 COSTS_N_INSNS (1), /* loadv. */
1159 COSTS_N_INSNS (1) /* storev. */
1160 },
1161 {
1162 /* FP SFmode */
1163 {
1164 COSTS_N_INSNS (14), /* div. */
1165 COSTS_N_INSNS (4), /* mult. */
1166 COSTS_N_INSNS (7), /* mult_addsub. */
1167 COSTS_N_INSNS (30), /* fma. */
1168 COSTS_N_INSNS (3), /* addsub. */
1169 COSTS_N_INSNS (1), /* fpconst. */
1170 COSTS_N_INSNS (1), /* neg. */
1171 COSTS_N_INSNS (3), /* compare. */
1172 COSTS_N_INSNS (3), /* widen. */
1173 COSTS_N_INSNS (3), /* narrow. */
1174 COSTS_N_INSNS (3), /* toint. */
1175 COSTS_N_INSNS (3), /* fromint. */
1176 COSTS_N_INSNS (3) /* roundint. */
1177 },
1178 /* FP DFmode */
1179 {
1180 COSTS_N_INSNS (24), /* div. */
1181 COSTS_N_INSNS (5), /* mult. */
1182 COSTS_N_INSNS (8), /* mult_addsub. */
1183 COSTS_N_INSNS (30), /* fma. */
1184 COSTS_N_INSNS (3), /* addsub. */
1185 COSTS_N_INSNS (1), /* fpconst. */
1186 COSTS_N_INSNS (1), /* neg. */
1187 COSTS_N_INSNS (3), /* compare. */
1188 COSTS_N_INSNS (3), /* widen. */
1189 COSTS_N_INSNS (3), /* narrow. */
1190 COSTS_N_INSNS (3), /* toint. */
1191 COSTS_N_INSNS (3), /* fromint. */
1192 COSTS_N_INSNS (3) /* roundint. */
1193 }
1194 },
1195 /* Vector */
1196 {
1197 COSTS_N_INSNS (1), /* alu. */
1198 COSTS_N_INSNS (4), /* mult. */
1199 COSTS_N_INSNS (1), /* movi. */
1200 COSTS_N_INSNS (2), /* dup. */
1201 COSTS_N_INSNS (2) /* extract. */
1202 }
1203 };
1204
1205 const struct cpu_cost_table cortexa8_extra_costs =
1206 {
1207 /* ALU */
1208 {
1209 0, /* arith. */
1210 0, /* logical. */
1211 COSTS_N_INSNS (1), /* shift. */
1212 0, /* shift_reg. */
1213 COSTS_N_INSNS (1), /* arith_shift. */
1214 0, /* arith_shift_reg. */
1215 COSTS_N_INSNS (1), /* log_shift. */
1216 0, /* log_shift_reg. */
1217 0, /* extend. */
1218 0, /* extend_arith. */
1219 0, /* bfi. */
1220 0, /* bfx. */
1221 0, /* clz. */
1222 0, /* rev. */
1223 0, /* non_exec. */
1224 true /* non_exec_costs_exec. */
1225 },
1226 {
1227 /* MULT SImode */
1228 {
1229 COSTS_N_INSNS (1), /* simple. */
1230 COSTS_N_INSNS (1), /* flag_setting. */
1231 COSTS_N_INSNS (1), /* extend. */
1232 COSTS_N_INSNS (1), /* add. */
1233 COSTS_N_INSNS (1), /* extend_add. */
1234 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1235 },
1236 /* MULT DImode */
1237 {
1238 0, /* simple (N/A). */
1239 0, /* flag_setting (N/A). */
1240 COSTS_N_INSNS (2), /* extend. */
1241 0, /* add (N/A). */
1242 COSTS_N_INSNS (2), /* extend_add. */
1243 0 /* idiv (N/A). */
1244 }
1245 },
1246 /* LD/ST */
1247 {
1248 COSTS_N_INSNS (1), /* load. */
1249 COSTS_N_INSNS (1), /* load_sign_extend. */
1250 COSTS_N_INSNS (1), /* ldrd. */
1251 COSTS_N_INSNS (1), /* ldm_1st. */
1252 1, /* ldm_regs_per_insn_1st. */
1253 2, /* ldm_regs_per_insn_subsequent. */
1254 COSTS_N_INSNS (1), /* loadf. */
1255 COSTS_N_INSNS (1), /* loadd. */
1256 COSTS_N_INSNS (1), /* load_unaligned. */
1257 COSTS_N_INSNS (1), /* store. */
1258 COSTS_N_INSNS (1), /* strd. */
1259 COSTS_N_INSNS (1), /* stm_1st. */
1260 1, /* stm_regs_per_insn_1st. */
1261 2, /* stm_regs_per_insn_subsequent. */
1262 COSTS_N_INSNS (1), /* storef. */
1263 COSTS_N_INSNS (1), /* stored. */
1264 COSTS_N_INSNS (1), /* store_unaligned. */
1265 COSTS_N_INSNS (1), /* loadv. */
1266 COSTS_N_INSNS (1) /* storev. */
1267 },
1268 {
1269 /* FP SFmode */
1270 {
1271 COSTS_N_INSNS (36), /* div. */
1272 COSTS_N_INSNS (11), /* mult. */
1273 COSTS_N_INSNS (20), /* mult_addsub. */
1274 COSTS_N_INSNS (30), /* fma. */
1275 COSTS_N_INSNS (9), /* addsub. */
1276 COSTS_N_INSNS (3), /* fpconst. */
1277 COSTS_N_INSNS (3), /* neg. */
1278 COSTS_N_INSNS (6), /* compare. */
1279 COSTS_N_INSNS (4), /* widen. */
1280 COSTS_N_INSNS (4), /* narrow. */
1281 COSTS_N_INSNS (8), /* toint. */
1282 COSTS_N_INSNS (8), /* fromint. */
1283 COSTS_N_INSNS (8) /* roundint. */
1284 },
1285 /* FP DFmode */
1286 {
1287 COSTS_N_INSNS (64), /* div. */
1288 COSTS_N_INSNS (16), /* mult. */
1289 COSTS_N_INSNS (25), /* mult_addsub. */
1290 COSTS_N_INSNS (30), /* fma. */
1291 COSTS_N_INSNS (9), /* addsub. */
1292 COSTS_N_INSNS (3), /* fpconst. */
1293 COSTS_N_INSNS (3), /* neg. */
1294 COSTS_N_INSNS (6), /* compare. */
1295 COSTS_N_INSNS (6), /* widen. */
1296 COSTS_N_INSNS (6), /* narrow. */
1297 COSTS_N_INSNS (8), /* toint. */
1298 COSTS_N_INSNS (8), /* fromint. */
1299 COSTS_N_INSNS (8) /* roundint. */
1300 }
1301 },
1302 /* Vector */
1303 {
1304 COSTS_N_INSNS (1), /* alu. */
1305 COSTS_N_INSNS (4), /* mult. */
1306 COSTS_N_INSNS (1), /* movi. */
1307 COSTS_N_INSNS (2), /* dup. */
1308 COSTS_N_INSNS (2) /* extract. */
1309 }
1310 };
1311
1312 const struct cpu_cost_table cortexa5_extra_costs =
1313 {
1314 /* ALU */
1315 {
1316 0, /* arith. */
1317 0, /* logical. */
1318 COSTS_N_INSNS (1), /* shift. */
1319 COSTS_N_INSNS (1), /* shift_reg. */
1320 COSTS_N_INSNS (1), /* arith_shift. */
1321 COSTS_N_INSNS (1), /* arith_shift_reg. */
1322 COSTS_N_INSNS (1), /* log_shift. */
1323 COSTS_N_INSNS (1), /* log_shift_reg. */
1324 COSTS_N_INSNS (1), /* extend. */
1325 COSTS_N_INSNS (1), /* extend_arith. */
1326 COSTS_N_INSNS (1), /* bfi. */
1327 COSTS_N_INSNS (1), /* bfx. */
1328 COSTS_N_INSNS (1), /* clz. */
1329 COSTS_N_INSNS (1), /* rev. */
1330 0, /* non_exec. */
1331 true /* non_exec_costs_exec. */
1332 },
1333
1334 {
1335 /* MULT SImode */
1336 {
1337 0, /* simple. */
1338 COSTS_N_INSNS (1), /* flag_setting. */
1339 COSTS_N_INSNS (1), /* extend. */
1340 COSTS_N_INSNS (1), /* add. */
1341 COSTS_N_INSNS (1), /* extend_add. */
1342 COSTS_N_INSNS (7) /* idiv. */
1343 },
1344 /* MULT DImode */
1345 {
1346 0, /* simple (N/A). */
1347 0, /* flag_setting (N/A). */
1348 COSTS_N_INSNS (1), /* extend. */
1349 0, /* add. */
1350 COSTS_N_INSNS (2), /* extend_add. */
1351 0 /* idiv (N/A). */
1352 }
1353 },
1354 /* LD/ST */
1355 {
1356 COSTS_N_INSNS (1), /* load. */
1357 COSTS_N_INSNS (1), /* load_sign_extend. */
1358 COSTS_N_INSNS (6), /* ldrd. */
1359 COSTS_N_INSNS (1), /* ldm_1st. */
1360 1, /* ldm_regs_per_insn_1st. */
1361 2, /* ldm_regs_per_insn_subsequent. */
1362 COSTS_N_INSNS (2), /* loadf. */
1363 COSTS_N_INSNS (4), /* loadd. */
1364 COSTS_N_INSNS (1), /* load_unaligned. */
1365 COSTS_N_INSNS (1), /* store. */
1366 COSTS_N_INSNS (3), /* strd. */
1367 COSTS_N_INSNS (1), /* stm_1st. */
1368 1, /* stm_regs_per_insn_1st. */
1369 2, /* stm_regs_per_insn_subsequent. */
1370 COSTS_N_INSNS (2), /* storef. */
1371 COSTS_N_INSNS (2), /* stored. */
1372 COSTS_N_INSNS (1), /* store_unaligned. */
1373 COSTS_N_INSNS (1), /* loadv. */
1374 COSTS_N_INSNS (1) /* storev. */
1375 },
1376 {
1377 /* FP SFmode */
1378 {
1379 COSTS_N_INSNS (15), /* div. */
1380 COSTS_N_INSNS (3), /* mult. */
1381 COSTS_N_INSNS (7), /* mult_addsub. */
1382 COSTS_N_INSNS (7), /* fma. */
1383 COSTS_N_INSNS (3), /* addsub. */
1384 COSTS_N_INSNS (3), /* fpconst. */
1385 COSTS_N_INSNS (3), /* neg. */
1386 COSTS_N_INSNS (3), /* compare. */
1387 COSTS_N_INSNS (3), /* widen. */
1388 COSTS_N_INSNS (3), /* narrow. */
1389 COSTS_N_INSNS (3), /* toint. */
1390 COSTS_N_INSNS (3), /* fromint. */
1391 COSTS_N_INSNS (3) /* roundint. */
1392 },
1393 /* FP DFmode */
1394 {
1395 COSTS_N_INSNS (30), /* div. */
1396 COSTS_N_INSNS (6), /* mult. */
1397 COSTS_N_INSNS (10), /* mult_addsub. */
1398 COSTS_N_INSNS (7), /* fma. */
1399 COSTS_N_INSNS (3), /* addsub. */
1400 COSTS_N_INSNS (3), /* fpconst. */
1401 COSTS_N_INSNS (3), /* neg. */
1402 COSTS_N_INSNS (3), /* compare. */
1403 COSTS_N_INSNS (3), /* widen. */
1404 COSTS_N_INSNS (3), /* narrow. */
1405 COSTS_N_INSNS (3), /* toint. */
1406 COSTS_N_INSNS (3), /* fromint. */
1407 COSTS_N_INSNS (3) /* roundint. */
1408 }
1409 },
1410 /* Vector */
1411 {
1412 COSTS_N_INSNS (1), /* alu. */
1413 COSTS_N_INSNS (4), /* mult. */
1414 COSTS_N_INSNS (1), /* movi. */
1415 COSTS_N_INSNS (2), /* dup. */
1416 COSTS_N_INSNS (2) /* extract. */
1417 }
1418 };
1419
1420
1421 const struct cpu_cost_table cortexa7_extra_costs =
1422 {
1423 /* ALU */
1424 {
1425 0, /* arith. */
1426 0, /* logical. */
1427 COSTS_N_INSNS (1), /* shift. */
1428 COSTS_N_INSNS (1), /* shift_reg. */
1429 COSTS_N_INSNS (1), /* arith_shift. */
1430 COSTS_N_INSNS (1), /* arith_shift_reg. */
1431 COSTS_N_INSNS (1), /* log_shift. */
1432 COSTS_N_INSNS (1), /* log_shift_reg. */
1433 COSTS_N_INSNS (1), /* extend. */
1434 COSTS_N_INSNS (1), /* extend_arith. */
1435 COSTS_N_INSNS (1), /* bfi. */
1436 COSTS_N_INSNS (1), /* bfx. */
1437 COSTS_N_INSNS (1), /* clz. */
1438 COSTS_N_INSNS (1), /* rev. */
1439 0, /* non_exec. */
1440 true /* non_exec_costs_exec. */
1441 },
1442
1443 {
1444 /* MULT SImode */
1445 {
1446 0, /* simple. */
1447 COSTS_N_INSNS (1), /* flag_setting. */
1448 COSTS_N_INSNS (1), /* extend. */
1449 COSTS_N_INSNS (1), /* add. */
1450 COSTS_N_INSNS (1), /* extend_add. */
1451 COSTS_N_INSNS (7) /* idiv. */
1452 },
1453 /* MULT DImode */
1454 {
1455 0, /* simple (N/A). */
1456 0, /* flag_setting (N/A). */
1457 COSTS_N_INSNS (1), /* extend. */
1458 0, /* add. */
1459 COSTS_N_INSNS (2), /* extend_add. */
1460 0 /* idiv (N/A). */
1461 }
1462 },
1463 /* LD/ST */
1464 {
1465 COSTS_N_INSNS (1), /* load. */
1466 COSTS_N_INSNS (1), /* load_sign_extend. */
1467 COSTS_N_INSNS (3), /* ldrd. */
1468 COSTS_N_INSNS (1), /* ldm_1st. */
1469 1, /* ldm_regs_per_insn_1st. */
1470 2, /* ldm_regs_per_insn_subsequent. */
1471 COSTS_N_INSNS (2), /* loadf. */
1472 COSTS_N_INSNS (2), /* loadd. */
1473 COSTS_N_INSNS (1), /* load_unaligned. */
1474 COSTS_N_INSNS (1), /* store. */
1475 COSTS_N_INSNS (3), /* strd. */
1476 COSTS_N_INSNS (1), /* stm_1st. */
1477 1, /* stm_regs_per_insn_1st. */
1478 2, /* stm_regs_per_insn_subsequent. */
1479 COSTS_N_INSNS (2), /* storef. */
1480 COSTS_N_INSNS (2), /* stored. */
1481 COSTS_N_INSNS (1), /* store_unaligned. */
1482 COSTS_N_INSNS (1), /* loadv. */
1483 COSTS_N_INSNS (1) /* storev. */
1484 },
1485 {
1486 /* FP SFmode */
1487 {
1488 COSTS_N_INSNS (15), /* div. */
1489 COSTS_N_INSNS (3), /* mult. */
1490 COSTS_N_INSNS (7), /* mult_addsub. */
1491 COSTS_N_INSNS (7), /* fma. */
1492 COSTS_N_INSNS (3), /* addsub. */
1493 COSTS_N_INSNS (3), /* fpconst. */
1494 COSTS_N_INSNS (3), /* neg. */
1495 COSTS_N_INSNS (3), /* compare. */
1496 COSTS_N_INSNS (3), /* widen. */
1497 COSTS_N_INSNS (3), /* narrow. */
1498 COSTS_N_INSNS (3), /* toint. */
1499 COSTS_N_INSNS (3), /* fromint. */
1500 COSTS_N_INSNS (3) /* roundint. */
1501 },
1502 /* FP DFmode */
1503 {
1504 COSTS_N_INSNS (30), /* div. */
1505 COSTS_N_INSNS (6), /* mult. */
1506 COSTS_N_INSNS (10), /* mult_addsub. */
1507 COSTS_N_INSNS (7), /* fma. */
1508 COSTS_N_INSNS (3), /* addsub. */
1509 COSTS_N_INSNS (3), /* fpconst. */
1510 COSTS_N_INSNS (3), /* neg. */
1511 COSTS_N_INSNS (3), /* compare. */
1512 COSTS_N_INSNS (3), /* widen. */
1513 COSTS_N_INSNS (3), /* narrow. */
1514 COSTS_N_INSNS (3), /* toint. */
1515 COSTS_N_INSNS (3), /* fromint. */
1516 COSTS_N_INSNS (3) /* roundint. */
1517 }
1518 },
1519 /* Vector */
1520 {
1521 COSTS_N_INSNS (1), /* alu. */
1522 COSTS_N_INSNS (4), /* mult. */
1523 COSTS_N_INSNS (1), /* movi. */
1524 COSTS_N_INSNS (2), /* dup. */
1525 COSTS_N_INSNS (2) /* extract. */
1526 }
1527 };
1528
1529 const struct cpu_cost_table cortexa12_extra_costs =
1530 {
1531 /* ALU */
1532 {
1533 0, /* arith. */
1534 0, /* logical. */
1535 0, /* shift. */
1536 COSTS_N_INSNS (1), /* shift_reg. */
1537 COSTS_N_INSNS (1), /* arith_shift. */
1538 COSTS_N_INSNS (1), /* arith_shift_reg. */
1539 COSTS_N_INSNS (1), /* log_shift. */
1540 COSTS_N_INSNS (1), /* log_shift_reg. */
1541 0, /* extend. */
1542 COSTS_N_INSNS (1), /* extend_arith. */
1543 0, /* bfi. */
1544 COSTS_N_INSNS (1), /* bfx. */
1545 COSTS_N_INSNS (1), /* clz. */
1546 COSTS_N_INSNS (1), /* rev. */
1547 0, /* non_exec. */
1548 true /* non_exec_costs_exec. */
1549 },
1550 /* MULT SImode */
1551 {
1552 {
1553 COSTS_N_INSNS (2), /* simple. */
1554 COSTS_N_INSNS (3), /* flag_setting. */
1555 COSTS_N_INSNS (2), /* extend. */
1556 COSTS_N_INSNS (3), /* add. */
1557 COSTS_N_INSNS (2), /* extend_add. */
1558 COSTS_N_INSNS (18) /* idiv. */
1559 },
1560 /* MULT DImode */
1561 {
1562 0, /* simple (N/A). */
1563 0, /* flag_setting (N/A). */
1564 COSTS_N_INSNS (3), /* extend. */
1565 0, /* add (N/A). */
1566 COSTS_N_INSNS (3), /* extend_add. */
1567 0 /* idiv (N/A). */
1568 }
1569 },
1570 /* LD/ST */
1571 {
1572 COSTS_N_INSNS (3), /* load. */
1573 COSTS_N_INSNS (3), /* load_sign_extend. */
1574 COSTS_N_INSNS (3), /* ldrd. */
1575 COSTS_N_INSNS (3), /* ldm_1st. */
1576 1, /* ldm_regs_per_insn_1st. */
1577 2, /* ldm_regs_per_insn_subsequent. */
1578 COSTS_N_INSNS (3), /* loadf. */
1579 COSTS_N_INSNS (3), /* loadd. */
1580 0, /* load_unaligned. */
1581 0, /* store. */
1582 0, /* strd. */
1583 0, /* stm_1st. */
1584 1, /* stm_regs_per_insn_1st. */
1585 2, /* stm_regs_per_insn_subsequent. */
1586 COSTS_N_INSNS (2), /* storef. */
1587 COSTS_N_INSNS (2), /* stored. */
1588 0, /* store_unaligned. */
1589 COSTS_N_INSNS (1), /* loadv. */
1590 COSTS_N_INSNS (1) /* storev. */
1591 },
1592 {
1593 /* FP SFmode */
1594 {
1595 COSTS_N_INSNS (17), /* div. */
1596 COSTS_N_INSNS (4), /* mult. */
1597 COSTS_N_INSNS (8), /* mult_addsub. */
1598 COSTS_N_INSNS (8), /* fma. */
1599 COSTS_N_INSNS (4), /* addsub. */
1600 COSTS_N_INSNS (2), /* fpconst. */
1601 COSTS_N_INSNS (2), /* neg. */
1602 COSTS_N_INSNS (2), /* compare. */
1603 COSTS_N_INSNS (4), /* widen. */
1604 COSTS_N_INSNS (4), /* narrow. */
1605 COSTS_N_INSNS (4), /* toint. */
1606 COSTS_N_INSNS (4), /* fromint. */
1607 COSTS_N_INSNS (4) /* roundint. */
1608 },
1609 /* FP DFmode */
1610 {
1611 COSTS_N_INSNS (31), /* div. */
1612 COSTS_N_INSNS (4), /* mult. */
1613 COSTS_N_INSNS (8), /* mult_addsub. */
1614 COSTS_N_INSNS (8), /* fma. */
1615 COSTS_N_INSNS (4), /* addsub. */
1616 COSTS_N_INSNS (2), /* fpconst. */
1617 COSTS_N_INSNS (2), /* neg. */
1618 COSTS_N_INSNS (2), /* compare. */
1619 COSTS_N_INSNS (4), /* widen. */
1620 COSTS_N_INSNS (4), /* narrow. */
1621 COSTS_N_INSNS (4), /* toint. */
1622 COSTS_N_INSNS (4), /* fromint. */
1623 COSTS_N_INSNS (4) /* roundint. */
1624 }
1625 },
1626 /* Vector */
1627 {
1628 COSTS_N_INSNS (1), /* alu. */
1629 COSTS_N_INSNS (4), /* mult. */
1630 COSTS_N_INSNS (1), /* movi. */
1631 COSTS_N_INSNS (2), /* dup. */
1632 COSTS_N_INSNS (2) /* extract. */
1633 }
1634 };
1635
1636 const struct cpu_cost_table cortexa15_extra_costs =
1637 {
1638 /* ALU */
1639 {
1640 0, /* arith. */
1641 0, /* logical. */
1642 0, /* shift. */
1643 0, /* shift_reg. */
1644 COSTS_N_INSNS (1), /* arith_shift. */
1645 COSTS_N_INSNS (1), /* arith_shift_reg. */
1646 COSTS_N_INSNS (1), /* log_shift. */
1647 COSTS_N_INSNS (1), /* log_shift_reg. */
1648 0, /* extend. */
1649 COSTS_N_INSNS (1), /* extend_arith. */
1650 COSTS_N_INSNS (1), /* bfi. */
1651 0, /* bfx. */
1652 0, /* clz. */
1653 0, /* rev. */
1654 0, /* non_exec. */
1655 true /* non_exec_costs_exec. */
1656 },
1657 /* MULT SImode */
1658 {
1659 {
1660 COSTS_N_INSNS (2), /* simple. */
1661 COSTS_N_INSNS (3), /* flag_setting. */
1662 COSTS_N_INSNS (2), /* extend. */
1663 COSTS_N_INSNS (2), /* add. */
1664 COSTS_N_INSNS (2), /* extend_add. */
1665 COSTS_N_INSNS (18) /* idiv. */
1666 },
1667 /* MULT DImode */
1668 {
1669 0, /* simple (N/A). */
1670 0, /* flag_setting (N/A). */
1671 COSTS_N_INSNS (3), /* extend. */
1672 0, /* add (N/A). */
1673 COSTS_N_INSNS (3), /* extend_add. */
1674 0 /* idiv (N/A). */
1675 }
1676 },
1677 /* LD/ST */
1678 {
1679 COSTS_N_INSNS (3), /* load. */
1680 COSTS_N_INSNS (3), /* load_sign_extend. */
1681 COSTS_N_INSNS (3), /* ldrd. */
1682 COSTS_N_INSNS (4), /* ldm_1st. */
1683 1, /* ldm_regs_per_insn_1st. */
1684 2, /* ldm_regs_per_insn_subsequent. */
1685 COSTS_N_INSNS (4), /* loadf. */
1686 COSTS_N_INSNS (4), /* loadd. */
1687 0, /* load_unaligned. */
1688 0, /* store. */
1689 0, /* strd. */
1690 COSTS_N_INSNS (1), /* stm_1st. */
1691 1, /* stm_regs_per_insn_1st. */
1692 2, /* stm_regs_per_insn_subsequent. */
1693 0, /* storef. */
1694 0, /* stored. */
1695 0, /* store_unaligned. */
1696 COSTS_N_INSNS (1), /* loadv. */
1697 COSTS_N_INSNS (1) /* storev. */
1698 },
1699 {
1700 /* FP SFmode */
1701 {
1702 COSTS_N_INSNS (17), /* div. */
1703 COSTS_N_INSNS (4), /* mult. */
1704 COSTS_N_INSNS (8), /* mult_addsub. */
1705 COSTS_N_INSNS (8), /* fma. */
1706 COSTS_N_INSNS (4), /* addsub. */
1707 COSTS_N_INSNS (2), /* fpconst. */
1708 COSTS_N_INSNS (2), /* neg. */
1709 COSTS_N_INSNS (5), /* compare. */
1710 COSTS_N_INSNS (4), /* widen. */
1711 COSTS_N_INSNS (4), /* narrow. */
1712 COSTS_N_INSNS (4), /* toint. */
1713 COSTS_N_INSNS (4), /* fromint. */
1714 COSTS_N_INSNS (4) /* roundint. */
1715 },
1716 /* FP DFmode */
1717 {
1718 COSTS_N_INSNS (31), /* div. */
1719 COSTS_N_INSNS (4), /* mult. */
1720 COSTS_N_INSNS (8), /* mult_addsub. */
1721 COSTS_N_INSNS (8), /* fma. */
1722 COSTS_N_INSNS (4), /* addsub. */
1723 COSTS_N_INSNS (2), /* fpconst. */
1724 COSTS_N_INSNS (2), /* neg. */
1725 COSTS_N_INSNS (2), /* compare. */
1726 COSTS_N_INSNS (4), /* widen. */
1727 COSTS_N_INSNS (4), /* narrow. */
1728 COSTS_N_INSNS (4), /* toint. */
1729 COSTS_N_INSNS (4), /* fromint. */
1730 COSTS_N_INSNS (4) /* roundint. */
1731 }
1732 },
1733 /* Vector */
1734 {
1735 COSTS_N_INSNS (1), /* alu. */
1736 COSTS_N_INSNS (4), /* mult. */
1737 COSTS_N_INSNS (1), /* movi. */
1738 COSTS_N_INSNS (2), /* dup. */
1739 COSTS_N_INSNS (2) /* extract. */
1740 }
1741 };
1742
1743 const struct cpu_cost_table v7m_extra_costs =
1744 {
1745 /* ALU */
1746 {
1747 0, /* arith. */
1748 0, /* logical. */
1749 0, /* shift. */
1750 0, /* shift_reg. */
1751 0, /* arith_shift. */
1752 COSTS_N_INSNS (1), /* arith_shift_reg. */
1753 0, /* log_shift. */
1754 COSTS_N_INSNS (1), /* log_shift_reg. */
1755 0, /* extend. */
1756 COSTS_N_INSNS (1), /* extend_arith. */
1757 0, /* bfi. */
1758 0, /* bfx. */
1759 0, /* clz. */
1760 0, /* rev. */
1761 COSTS_N_INSNS (1), /* non_exec. */
1762 false /* non_exec_costs_exec. */
1763 },
1764 {
1765 /* MULT SImode */
1766 {
1767 COSTS_N_INSNS (1), /* simple. */
1768 COSTS_N_INSNS (1), /* flag_setting. */
1769 COSTS_N_INSNS (2), /* extend. */
1770 COSTS_N_INSNS (1), /* add. */
1771 COSTS_N_INSNS (3), /* extend_add. */
1772 COSTS_N_INSNS (8) /* idiv. */
1773 },
1774 /* MULT DImode */
1775 {
1776 0, /* simple (N/A). */
1777 0, /* flag_setting (N/A). */
1778 COSTS_N_INSNS (2), /* extend. */
1779 0, /* add (N/A). */
1780 COSTS_N_INSNS (3), /* extend_add. */
1781 0 /* idiv (N/A). */
1782 }
1783 },
1784 /* LD/ST */
1785 {
1786 COSTS_N_INSNS (2), /* load. */
1787 0, /* load_sign_extend. */
1788 COSTS_N_INSNS (3), /* ldrd. */
1789 COSTS_N_INSNS (2), /* ldm_1st. */
1790 1, /* ldm_regs_per_insn_1st. */
1791 1, /* ldm_regs_per_insn_subsequent. */
1792 COSTS_N_INSNS (2), /* loadf. */
1793 COSTS_N_INSNS (3), /* loadd. */
1794 COSTS_N_INSNS (1), /* load_unaligned. */
1795 COSTS_N_INSNS (2), /* store. */
1796 COSTS_N_INSNS (3), /* strd. */
1797 COSTS_N_INSNS (2), /* stm_1st. */
1798 1, /* stm_regs_per_insn_1st. */
1799 1, /* stm_regs_per_insn_subsequent. */
1800 COSTS_N_INSNS (2), /* storef. */
1801 COSTS_N_INSNS (3), /* stored. */
1802 COSTS_N_INSNS (1), /* store_unaligned. */
1803 COSTS_N_INSNS (1), /* loadv. */
1804 COSTS_N_INSNS (1) /* storev. */
1805 },
1806 {
1807 /* FP SFmode */
1808 {
1809 COSTS_N_INSNS (7), /* div. */
1810 COSTS_N_INSNS (2), /* mult. */
1811 COSTS_N_INSNS (5), /* mult_addsub. */
1812 COSTS_N_INSNS (3), /* fma. */
1813 COSTS_N_INSNS (1), /* addsub. */
1814 0, /* fpconst. */
1815 0, /* neg. */
1816 0, /* compare. */
1817 0, /* widen. */
1818 0, /* narrow. */
1819 0, /* toint. */
1820 0, /* fromint. */
1821 0 /* roundint. */
1822 },
1823 /* FP DFmode */
1824 {
1825 COSTS_N_INSNS (15), /* div. */
1826 COSTS_N_INSNS (5), /* mult. */
1827 COSTS_N_INSNS (7), /* mult_addsub. */
1828 COSTS_N_INSNS (7), /* fma. */
1829 COSTS_N_INSNS (3), /* addsub. */
1830 0, /* fpconst. */
1831 0, /* neg. */
1832 0, /* compare. */
1833 0, /* widen. */
1834 0, /* narrow. */
1835 0, /* toint. */
1836 0, /* fromint. */
1837 0 /* roundint. */
1838 }
1839 },
1840 /* Vector */
1841 {
1842 COSTS_N_INSNS (1), /* alu. */
1843 COSTS_N_INSNS (4), /* mult. */
1844 COSTS_N_INSNS (1), /* movi. */
1845 COSTS_N_INSNS (2), /* dup. */
1846 COSTS_N_INSNS (2) /* extract. */
1847 }
1848 };
1849
1850 const struct addr_mode_cost_table generic_addr_mode_costs =
1851 {
1852 /* int. */
1853 {
1854 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1855 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1856 COSTS_N_INSNS (0) /* AMO_WB. */
1857 },
1858 /* float. */
1859 {
1860 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1861 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1862 COSTS_N_INSNS (0) /* AMO_WB. */
1863 },
1864 /* vector. */
1865 {
1866 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1867 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1868 COSTS_N_INSNS (0) /* AMO_WB. */
1869 }
1870 };
1871
1872 const struct tune_params arm_slowmul_tune =
1873 {
1874 &generic_extra_costs, /* Insn extra costs. */
1875 &generic_addr_mode_costs, /* Addressing mode costs. */
1876 NULL, /* Sched adj cost. */
1877 arm_default_branch_cost,
1878 &arm_default_vec_cost,
1879 3, /* Constant limit. */
1880 5, /* Max cond insns. */
1881 8, /* Memset max inline. */
1882 1, /* Issue rate. */
1883 ARM_PREFETCH_NOT_BENEFICIAL,
1884 tune_params::PREF_CONST_POOL_TRUE,
1885 tune_params::PREF_LDRD_FALSE,
1886 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1887 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1888 tune_params::DISPARAGE_FLAGS_NEITHER,
1889 tune_params::PREF_NEON_STRINGOPS_FALSE,
1890 tune_params::FUSE_NOTHING,
1891 tune_params::SCHED_AUTOPREF_OFF
1892 };
1893
1894 const struct tune_params arm_fastmul_tune =
1895 {
1896 &generic_extra_costs, /* Insn extra costs. */
1897 &generic_addr_mode_costs, /* Addressing mode costs. */
1898 NULL, /* Sched adj cost. */
1899 arm_default_branch_cost,
1900 &arm_default_vec_cost,
1901 1, /* Constant limit. */
1902 5, /* Max cond insns. */
1903 8, /* Memset max inline. */
1904 1, /* Issue rate. */
1905 ARM_PREFETCH_NOT_BENEFICIAL,
1906 tune_params::PREF_CONST_POOL_TRUE,
1907 tune_params::PREF_LDRD_FALSE,
1908 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1909 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1910 tune_params::DISPARAGE_FLAGS_NEITHER,
1911 tune_params::PREF_NEON_STRINGOPS_FALSE,
1912 tune_params::FUSE_NOTHING,
1913 tune_params::SCHED_AUTOPREF_OFF
1914 };
1915
1916 /* StrongARM has early execution of branches, so a sequence that is worth
1917 skipping is shorter. Set max_insns_skipped to a lower value. */
1918
1919 const struct tune_params arm_strongarm_tune =
1920 {
1921 &generic_extra_costs, /* Insn extra costs. */
1922 &generic_addr_mode_costs, /* Addressing mode costs. */
1923 NULL, /* Sched adj cost. */
1924 arm_default_branch_cost,
1925 &arm_default_vec_cost,
1926 1, /* Constant limit. */
1927 3, /* Max cond insns. */
1928 8, /* Memset max inline. */
1929 1, /* Issue rate. */
1930 ARM_PREFETCH_NOT_BENEFICIAL,
1931 tune_params::PREF_CONST_POOL_TRUE,
1932 tune_params::PREF_LDRD_FALSE,
1933 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1934 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1935 tune_params::DISPARAGE_FLAGS_NEITHER,
1936 tune_params::PREF_NEON_STRINGOPS_FALSE,
1937 tune_params::FUSE_NOTHING,
1938 tune_params::SCHED_AUTOPREF_OFF
1939 };
1940
1941 const struct tune_params arm_xscale_tune =
1942 {
1943 &generic_extra_costs, /* Insn extra costs. */
1944 &generic_addr_mode_costs, /* Addressing mode costs. */
1945 xscale_sched_adjust_cost,
1946 arm_default_branch_cost,
1947 &arm_default_vec_cost,
1948 2, /* Constant limit. */
1949 3, /* Max cond insns. */
1950 8, /* Memset max inline. */
1951 1, /* Issue rate. */
1952 ARM_PREFETCH_NOT_BENEFICIAL,
1953 tune_params::PREF_CONST_POOL_TRUE,
1954 tune_params::PREF_LDRD_FALSE,
1955 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1956 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1957 tune_params::DISPARAGE_FLAGS_NEITHER,
1958 tune_params::PREF_NEON_STRINGOPS_FALSE,
1959 tune_params::FUSE_NOTHING,
1960 tune_params::SCHED_AUTOPREF_OFF
1961 };
1962
1963 const struct tune_params arm_9e_tune =
1964 {
1965 &generic_extra_costs, /* Insn extra costs. */
1966 &generic_addr_mode_costs, /* Addressing mode costs. */
1967 NULL, /* Sched adj cost. */
1968 arm_default_branch_cost,
1969 &arm_default_vec_cost,
1970 1, /* Constant limit. */
1971 5, /* Max cond insns. */
1972 8, /* Memset max inline. */
1973 1, /* Issue rate. */
1974 ARM_PREFETCH_NOT_BENEFICIAL,
1975 tune_params::PREF_CONST_POOL_TRUE,
1976 tune_params::PREF_LDRD_FALSE,
1977 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1978 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1979 tune_params::DISPARAGE_FLAGS_NEITHER,
1980 tune_params::PREF_NEON_STRINGOPS_FALSE,
1981 tune_params::FUSE_NOTHING,
1982 tune_params::SCHED_AUTOPREF_OFF
1983 };
1984
1985 const struct tune_params arm_marvell_pj4_tune =
1986 {
1987 &generic_extra_costs, /* Insn extra costs. */
1988 &generic_addr_mode_costs, /* Addressing mode costs. */
1989 NULL, /* Sched adj cost. */
1990 arm_default_branch_cost,
1991 &arm_default_vec_cost,
1992 1, /* Constant limit. */
1993 5, /* Max cond insns. */
1994 8, /* Memset max inline. */
1995 2, /* Issue rate. */
1996 ARM_PREFETCH_NOT_BENEFICIAL,
1997 tune_params::PREF_CONST_POOL_TRUE,
1998 tune_params::PREF_LDRD_FALSE,
1999 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2000 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2001 tune_params::DISPARAGE_FLAGS_NEITHER,
2002 tune_params::PREF_NEON_STRINGOPS_FALSE,
2003 tune_params::FUSE_NOTHING,
2004 tune_params::SCHED_AUTOPREF_OFF
2005 };
2006
2007 const struct tune_params arm_v6t2_tune =
2008 {
2009 &generic_extra_costs, /* Insn extra costs. */
2010 &generic_addr_mode_costs, /* Addressing mode costs. */
2011 NULL, /* Sched adj cost. */
2012 arm_default_branch_cost,
2013 &arm_default_vec_cost,
2014 1, /* Constant limit. */
2015 5, /* Max cond insns. */
2016 8, /* Memset max inline. */
2017 1, /* Issue rate. */
2018 ARM_PREFETCH_NOT_BENEFICIAL,
2019 tune_params::PREF_CONST_POOL_FALSE,
2020 tune_params::PREF_LDRD_FALSE,
2021 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2022 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2023 tune_params::DISPARAGE_FLAGS_NEITHER,
2024 tune_params::PREF_NEON_STRINGOPS_FALSE,
2025 tune_params::FUSE_NOTHING,
2026 tune_params::SCHED_AUTOPREF_OFF
2027 };
2028
2029
2030 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2031 const struct tune_params arm_cortex_tune =
2032 {
2033 &generic_extra_costs,
2034 &generic_addr_mode_costs, /* Addressing mode costs. */
2035 NULL, /* Sched adj cost. */
2036 arm_default_branch_cost,
2037 &arm_default_vec_cost,
2038 1, /* Constant limit. */
2039 5, /* Max cond insns. */
2040 8, /* Memset max inline. */
2041 2, /* Issue rate. */
2042 ARM_PREFETCH_NOT_BENEFICIAL,
2043 tune_params::PREF_CONST_POOL_FALSE,
2044 tune_params::PREF_LDRD_FALSE,
2045 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2046 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2047 tune_params::DISPARAGE_FLAGS_NEITHER,
2048 tune_params::PREF_NEON_STRINGOPS_FALSE,
2049 tune_params::FUSE_NOTHING,
2050 tune_params::SCHED_AUTOPREF_OFF
2051 };
2052
2053 const struct tune_params arm_cortex_a8_tune =
2054 {
2055 &cortexa8_extra_costs,
2056 &generic_addr_mode_costs, /* Addressing mode costs. */
2057 NULL, /* Sched adj cost. */
2058 arm_default_branch_cost,
2059 &arm_default_vec_cost,
2060 1, /* Constant limit. */
2061 5, /* Max cond insns. */
2062 8, /* Memset max inline. */
2063 2, /* Issue rate. */
2064 ARM_PREFETCH_NOT_BENEFICIAL,
2065 tune_params::PREF_CONST_POOL_FALSE,
2066 tune_params::PREF_LDRD_FALSE,
2067 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2068 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2069 tune_params::DISPARAGE_FLAGS_NEITHER,
2070 tune_params::PREF_NEON_STRINGOPS_TRUE,
2071 tune_params::FUSE_NOTHING,
2072 tune_params::SCHED_AUTOPREF_OFF
2073 };
2074
2075 const struct tune_params arm_cortex_a7_tune =
2076 {
2077 &cortexa7_extra_costs,
2078 &generic_addr_mode_costs, /* Addressing mode costs. */
2079 NULL, /* Sched adj cost. */
2080 arm_default_branch_cost,
2081 &arm_default_vec_cost,
2082 1, /* Constant limit. */
2083 5, /* Max cond insns. */
2084 8, /* Memset max inline. */
2085 2, /* Issue rate. */
2086 ARM_PREFETCH_NOT_BENEFICIAL,
2087 tune_params::PREF_CONST_POOL_FALSE,
2088 tune_params::PREF_LDRD_FALSE,
2089 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2090 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2091 tune_params::DISPARAGE_FLAGS_NEITHER,
2092 tune_params::PREF_NEON_STRINGOPS_TRUE,
2093 tune_params::FUSE_NOTHING,
2094 tune_params::SCHED_AUTOPREF_OFF
2095 };
2096
2097 const struct tune_params arm_cortex_a15_tune =
2098 {
2099 &cortexa15_extra_costs,
2100 &generic_addr_mode_costs, /* Addressing mode costs. */
2101 NULL, /* Sched adj cost. */
2102 arm_default_branch_cost,
2103 &arm_default_vec_cost,
2104 1, /* Constant limit. */
2105 2, /* Max cond insns. */
2106 8, /* Memset max inline. */
2107 3, /* Issue rate. */
2108 ARM_PREFETCH_NOT_BENEFICIAL,
2109 tune_params::PREF_CONST_POOL_FALSE,
2110 tune_params::PREF_LDRD_TRUE,
2111 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2112 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2113 tune_params::DISPARAGE_FLAGS_ALL,
2114 tune_params::PREF_NEON_STRINGOPS_TRUE,
2115 tune_params::FUSE_NOTHING,
2116 tune_params::SCHED_AUTOPREF_FULL
2117 };
2118
2119 const struct tune_params arm_cortex_a35_tune =
2120 {
2121 &cortexa53_extra_costs,
2122 &generic_addr_mode_costs, /* Addressing mode costs. */
2123 NULL, /* Sched adj cost. */
2124 arm_default_branch_cost,
2125 &arm_default_vec_cost,
2126 1, /* Constant limit. */
2127 5, /* Max cond insns. */
2128 8, /* Memset max inline. */
2129 1, /* Issue rate. */
2130 ARM_PREFETCH_NOT_BENEFICIAL,
2131 tune_params::PREF_CONST_POOL_FALSE,
2132 tune_params::PREF_LDRD_FALSE,
2133 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2134 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2135 tune_params::DISPARAGE_FLAGS_NEITHER,
2136 tune_params::PREF_NEON_STRINGOPS_TRUE,
2137 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2138 tune_params::SCHED_AUTOPREF_OFF
2139 };
2140
2141 const struct tune_params arm_cortex_a53_tune =
2142 {
2143 &cortexa53_extra_costs,
2144 &generic_addr_mode_costs, /* Addressing mode costs. */
2145 NULL, /* Sched adj cost. */
2146 arm_default_branch_cost,
2147 &arm_default_vec_cost,
2148 1, /* Constant limit. */
2149 5, /* Max cond insns. */
2150 8, /* Memset max inline. */
2151 2, /* Issue rate. */
2152 ARM_PREFETCH_NOT_BENEFICIAL,
2153 tune_params::PREF_CONST_POOL_FALSE,
2154 tune_params::PREF_LDRD_FALSE,
2155 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2156 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2157 tune_params::DISPARAGE_FLAGS_NEITHER,
2158 tune_params::PREF_NEON_STRINGOPS_TRUE,
2159 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2160 tune_params::SCHED_AUTOPREF_OFF
2161 };
2162
2163 const struct tune_params arm_cortex_a57_tune =
2164 {
2165 &cortexa57_extra_costs,
2166 &generic_addr_mode_costs, /* addressing mode costs */
2167 NULL, /* Sched adj cost. */
2168 arm_default_branch_cost,
2169 &arm_default_vec_cost,
2170 1, /* Constant limit. */
2171 2, /* Max cond insns. */
2172 8, /* Memset max inline. */
2173 3, /* Issue rate. */
2174 ARM_PREFETCH_NOT_BENEFICIAL,
2175 tune_params::PREF_CONST_POOL_FALSE,
2176 tune_params::PREF_LDRD_TRUE,
2177 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2178 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2179 tune_params::DISPARAGE_FLAGS_ALL,
2180 tune_params::PREF_NEON_STRINGOPS_TRUE,
2181 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2182 tune_params::SCHED_AUTOPREF_FULL
2183 };
2184
2185 const struct tune_params arm_exynosm1_tune =
2186 {
2187 &exynosm1_extra_costs,
2188 &generic_addr_mode_costs, /* Addressing mode costs. */
2189 NULL, /* Sched adj cost. */
2190 arm_default_branch_cost,
2191 &arm_default_vec_cost,
2192 1, /* Constant limit. */
2193 2, /* Max cond insns. */
2194 8, /* Memset max inline. */
2195 3, /* Issue rate. */
2196 ARM_PREFETCH_NOT_BENEFICIAL,
2197 tune_params::PREF_CONST_POOL_FALSE,
2198 tune_params::PREF_LDRD_TRUE,
2199 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2200 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2201 tune_params::DISPARAGE_FLAGS_ALL,
2202 tune_params::PREF_NEON_STRINGOPS_TRUE,
2203 tune_params::FUSE_NOTHING,
2204 tune_params::SCHED_AUTOPREF_OFF
2205 };
2206
2207 const struct tune_params arm_xgene1_tune =
2208 {
2209 &xgene1_extra_costs,
2210 &generic_addr_mode_costs, /* Addressing mode costs. */
2211 NULL, /* Sched adj cost. */
2212 arm_default_branch_cost,
2213 &arm_default_vec_cost,
2214 1, /* Constant limit. */
2215 2, /* Max cond insns. */
2216 32, /* Memset max inline. */
2217 4, /* Issue rate. */
2218 ARM_PREFETCH_NOT_BENEFICIAL,
2219 tune_params::PREF_CONST_POOL_FALSE,
2220 tune_params::PREF_LDRD_TRUE,
2221 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2222 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2223 tune_params::DISPARAGE_FLAGS_ALL,
2224 tune_params::PREF_NEON_STRINGOPS_FALSE,
2225 tune_params::FUSE_NOTHING,
2226 tune_params::SCHED_AUTOPREF_OFF
2227 };
2228
2229 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2230 less appealing. Set max_insns_skipped to a low value. */
2231
2232 const struct tune_params arm_cortex_a5_tune =
2233 {
2234 &cortexa5_extra_costs,
2235 &generic_addr_mode_costs, /* Addressing mode costs. */
2236 NULL, /* Sched adj cost. */
2237 arm_cortex_a5_branch_cost,
2238 &arm_default_vec_cost,
2239 1, /* Constant limit. */
2240 1, /* Max cond insns. */
2241 8, /* Memset max inline. */
2242 2, /* Issue rate. */
2243 ARM_PREFETCH_NOT_BENEFICIAL,
2244 tune_params::PREF_CONST_POOL_FALSE,
2245 tune_params::PREF_LDRD_FALSE,
2246 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2247 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2248 tune_params::DISPARAGE_FLAGS_NEITHER,
2249 tune_params::PREF_NEON_STRINGOPS_TRUE,
2250 tune_params::FUSE_NOTHING,
2251 tune_params::SCHED_AUTOPREF_OFF
2252 };
2253
2254 const struct tune_params arm_cortex_a9_tune =
2255 {
2256 &cortexa9_extra_costs,
2257 &generic_addr_mode_costs, /* Addressing mode costs. */
2258 cortex_a9_sched_adjust_cost,
2259 arm_default_branch_cost,
2260 &arm_default_vec_cost,
2261 1, /* Constant limit. */
2262 5, /* Max cond insns. */
2263 8, /* Memset max inline. */
2264 2, /* Issue rate. */
2265 ARM_PREFETCH_BENEFICIAL(4,32,32),
2266 tune_params::PREF_CONST_POOL_FALSE,
2267 tune_params::PREF_LDRD_FALSE,
2268 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2269 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2270 tune_params::DISPARAGE_FLAGS_NEITHER,
2271 tune_params::PREF_NEON_STRINGOPS_FALSE,
2272 tune_params::FUSE_NOTHING,
2273 tune_params::SCHED_AUTOPREF_OFF
2274 };
2275
2276 const struct tune_params arm_cortex_a12_tune =
2277 {
2278 &cortexa12_extra_costs,
2279 &generic_addr_mode_costs, /* Addressing mode costs. */
2280 NULL, /* Sched adj cost. */
2281 arm_default_branch_cost,
2282 &arm_default_vec_cost, /* Vectorizer costs. */
2283 1, /* Constant limit. */
2284 2, /* Max cond insns. */
2285 8, /* Memset max inline. */
2286 2, /* Issue rate. */
2287 ARM_PREFETCH_NOT_BENEFICIAL,
2288 tune_params::PREF_CONST_POOL_FALSE,
2289 tune_params::PREF_LDRD_TRUE,
2290 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2291 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2292 tune_params::DISPARAGE_FLAGS_ALL,
2293 tune_params::PREF_NEON_STRINGOPS_TRUE,
2294 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2295 tune_params::SCHED_AUTOPREF_OFF
2296 };
2297
2298 const struct tune_params arm_cortex_a73_tune =
2299 {
2300 &cortexa57_extra_costs,
2301 &generic_addr_mode_costs, /* Addressing mode costs. */
2302 NULL, /* Sched adj cost. */
2303 arm_default_branch_cost,
2304 &arm_default_vec_cost, /* Vectorizer costs. */
2305 1, /* Constant limit. */
2306 2, /* Max cond insns. */
2307 8, /* Memset max inline. */
2308 2, /* Issue rate. */
2309 ARM_PREFETCH_NOT_BENEFICIAL,
2310 tune_params::PREF_CONST_POOL_FALSE,
2311 tune_params::PREF_LDRD_TRUE,
2312 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2313 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2314 tune_params::DISPARAGE_FLAGS_ALL,
2315 tune_params::PREF_NEON_STRINGOPS_TRUE,
2316 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2317 tune_params::SCHED_AUTOPREF_FULL
2318 };
2319
2320 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2321 cycle to execute each. An LDR from the constant pool also takes two cycles
2322 to execute, but mildly increases pipelining opportunity (consecutive
2323 loads/stores can be pipelined together, saving one cycle), and may also
2324 improve icache utilisation. Hence we prefer the constant pool for such
2325 processors. */
2326
2327 const struct tune_params arm_v7m_tune =
2328 {
2329 &v7m_extra_costs,
2330 &generic_addr_mode_costs, /* Addressing mode costs. */
2331 NULL, /* Sched adj cost. */
2332 arm_cortex_m_branch_cost,
2333 &arm_default_vec_cost,
2334 1, /* Constant limit. */
2335 2, /* Max cond insns. */
2336 8, /* Memset max inline. */
2337 1, /* Issue rate. */
2338 ARM_PREFETCH_NOT_BENEFICIAL,
2339 tune_params::PREF_CONST_POOL_TRUE,
2340 tune_params::PREF_LDRD_FALSE,
2341 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2342 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2343 tune_params::DISPARAGE_FLAGS_NEITHER,
2344 tune_params::PREF_NEON_STRINGOPS_FALSE,
2345 tune_params::FUSE_NOTHING,
2346 tune_params::SCHED_AUTOPREF_OFF
2347 };
2348
2349 /* Cortex-M7 tuning. */
2350
2351 const struct tune_params arm_cortex_m7_tune =
2352 {
2353 &v7m_extra_costs,
2354 &generic_addr_mode_costs, /* Addressing mode costs. */
2355 NULL, /* Sched adj cost. */
2356 arm_cortex_m7_branch_cost,
2357 &arm_default_vec_cost,
2358 0, /* Constant limit. */
2359 1, /* Max cond insns. */
2360 8, /* Memset max inline. */
2361 2, /* Issue rate. */
2362 ARM_PREFETCH_NOT_BENEFICIAL,
2363 tune_params::PREF_CONST_POOL_TRUE,
2364 tune_params::PREF_LDRD_FALSE,
2365 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2366 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2367 tune_params::DISPARAGE_FLAGS_NEITHER,
2368 tune_params::PREF_NEON_STRINGOPS_FALSE,
2369 tune_params::FUSE_NOTHING,
2370 tune_params::SCHED_AUTOPREF_OFF
2371 };
2372
2373 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2374 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2375 cortex-m23. */
2376 const struct tune_params arm_v6m_tune =
2377 {
2378 &generic_extra_costs, /* Insn extra costs. */
2379 &generic_addr_mode_costs, /* Addressing mode costs. */
2380 NULL, /* Sched adj cost. */
2381 arm_default_branch_cost,
2382 &arm_default_vec_cost, /* Vectorizer costs. */
2383 1, /* Constant limit. */
2384 5, /* Max cond insns. */
2385 8, /* Memset max inline. */
2386 1, /* Issue rate. */
2387 ARM_PREFETCH_NOT_BENEFICIAL,
2388 tune_params::PREF_CONST_POOL_FALSE,
2389 tune_params::PREF_LDRD_FALSE,
2390 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2391 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2392 tune_params::DISPARAGE_FLAGS_NEITHER,
2393 tune_params::PREF_NEON_STRINGOPS_FALSE,
2394 tune_params::FUSE_NOTHING,
2395 tune_params::SCHED_AUTOPREF_OFF
2396 };
2397
2398 const struct tune_params arm_fa726te_tune =
2399 {
2400 &generic_extra_costs, /* Insn extra costs. */
2401 &generic_addr_mode_costs, /* Addressing mode costs. */
2402 fa726te_sched_adjust_cost,
2403 arm_default_branch_cost,
2404 &arm_default_vec_cost,
2405 1, /* Constant limit. */
2406 5, /* Max cond insns. */
2407 8, /* Memset max inline. */
2408 2, /* Issue rate. */
2409 ARM_PREFETCH_NOT_BENEFICIAL,
2410 tune_params::PREF_CONST_POOL_TRUE,
2411 tune_params::PREF_LDRD_FALSE,
2412 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2413 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2414 tune_params::DISPARAGE_FLAGS_NEITHER,
2415 tune_params::PREF_NEON_STRINGOPS_FALSE,
2416 tune_params::FUSE_NOTHING,
2417 tune_params::SCHED_AUTOPREF_OFF
2418 };
2419
2420 /* Auto-generated CPU, FPU and architecture tables. */
2421 #include "arm-cpu-data.h"
2422
2423 /* The name of the preprocessor macro to define for this architecture. PROFILE
2424 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2425 is thus chosen to be big enough to hold the longest architecture name. */
2426
2427 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2428
2429 /* Supported TLS relocations. */
2430
2431 enum tls_reloc {
2432 TLS_GD32,
2433 TLS_GD32_FDPIC,
2434 TLS_LDM32,
2435 TLS_LDM32_FDPIC,
2436 TLS_LDO32,
2437 TLS_IE32,
2438 TLS_IE32_FDPIC,
2439 TLS_LE32,
2440 TLS_DESCSEQ /* GNU scheme */
2441 };
2442
2443 /* The maximum number of insns to be used when loading a constant. */
2444 inline static int
2445 arm_constant_limit (bool size_p)
2446 {
2447 return size_p ? 1 : current_tune->constant_limit;
2448 }
2449
2450 /* Emit an insn that's a simple single-set. Both the operands must be known
2451 to be valid. */
2452 inline static rtx_insn *
2453 emit_set_insn (rtx x, rtx y)
2454 {
2455 return emit_insn (gen_rtx_SET (x, y));
2456 }
2457
2458 /* Return the number of bits set in VALUE. */
2459 static unsigned
2460 bit_count (unsigned long value)
2461 {
2462 unsigned long count = 0;
2463
2464 while (value)
2465 {
2466 count++;
2467 value &= value - 1; /* Clear the least-significant set bit. */
2468 }
2469
2470 return count;
2471 }
2472
2473 /* Return the number of bits set in BMAP. */
2474 static unsigned
2475 bitmap_popcount (const sbitmap bmap)
2476 {
2477 unsigned int count = 0;
2478 unsigned int n = 0;
2479 sbitmap_iterator sbi;
2480
2481 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2482 count++;
2483 return count;
2484 }
2485
2486 typedef struct
2487 {
2488 machine_mode mode;
2489 const char *name;
2490 } arm_fixed_mode_set;
2491
2492 /* A small helper for setting fixed-point library libfuncs. */
2493
2494 static void
2495 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2496 const char *funcname, const char *modename,
2497 int num_suffix)
2498 {
2499 char buffer[50];
2500
2501 if (num_suffix == 0)
2502 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2503 else
2504 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2505
2506 set_optab_libfunc (optable, mode, buffer);
2507 }
2508
2509 static void
2510 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2511 machine_mode from, const char *funcname,
2512 const char *toname, const char *fromname)
2513 {
2514 char buffer[50];
2515 const char *maybe_suffix_2 = "";
2516
2517 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2518 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2519 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2520 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2521 maybe_suffix_2 = "2";
2522
2523 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2524 maybe_suffix_2);
2525
2526 set_conv_libfunc (optable, to, from, buffer);
2527 }
2528
2529 static GTY(()) rtx speculation_barrier_libfunc;
2530
2531 /* Record that we have no arithmetic or comparison libfuncs for
2532 machine mode MODE. */
2533
2534 static void
2535 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2536 {
2537 /* Arithmetic. */
2538 set_optab_libfunc (add_optab, mode, NULL);
2539 set_optab_libfunc (sdiv_optab, mode, NULL);
2540 set_optab_libfunc (smul_optab, mode, NULL);
2541 set_optab_libfunc (neg_optab, mode, NULL);
2542 set_optab_libfunc (sub_optab, mode, NULL);
2543
2544 /* Comparisons. */
2545 set_optab_libfunc (eq_optab, mode, NULL);
2546 set_optab_libfunc (ne_optab, mode, NULL);
2547 set_optab_libfunc (lt_optab, mode, NULL);
2548 set_optab_libfunc (le_optab, mode, NULL);
2549 set_optab_libfunc (ge_optab, mode, NULL);
2550 set_optab_libfunc (gt_optab, mode, NULL);
2551 set_optab_libfunc (unord_optab, mode, NULL);
2552 }
2553
2554 /* Set up library functions unique to ARM. */
2555 static void
2556 arm_init_libfuncs (void)
2557 {
2558 machine_mode mode_iter;
2559
2560 /* For Linux, we have access to kernel support for atomic operations. */
2561 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2562 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2563
2564 /* There are no special library functions unless we are using the
2565 ARM BPABI. */
2566 if (!TARGET_BPABI)
2567 return;
2568
2569 /* The functions below are described in Section 4 of the "Run-Time
2570 ABI for the ARM architecture", Version 1.0. */
2571
2572 /* Double-precision floating-point arithmetic. Table 2. */
2573 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2574 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2575 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2576 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2577 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2578
2579 /* Double-precision comparisons. Table 3. */
2580 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2581 set_optab_libfunc (ne_optab, DFmode, NULL);
2582 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2583 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2584 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2585 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2586 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2587
2588 /* Single-precision floating-point arithmetic. Table 4. */
2589 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2590 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2591 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2592 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2593 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2594
2595 /* Single-precision comparisons. Table 5. */
2596 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2597 set_optab_libfunc (ne_optab, SFmode, NULL);
2598 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2599 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2600 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2601 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2602 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2603
2604 /* Floating-point to integer conversions. Table 6. */
2605 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2606 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2607 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2608 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2609 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2610 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2611 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2612 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2613
2614 /* Conversions between floating types. Table 7. */
2615 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2616 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2617
2618 /* Integer to floating-point conversions. Table 8. */
2619 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2620 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2621 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2622 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2623 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2624 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2625 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2626 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2627
2628 /* Long long. Table 9. */
2629 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2630 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2631 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2632 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2633 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2634 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2635 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2636 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2637
2638 /* Integer (32/32->32) division. \S 4.3.1. */
2639 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2640 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2641
2642 /* The divmod functions are designed so that they can be used for
2643 plain division, even though they return both the quotient and the
2644 remainder. The quotient is returned in the usual location (i.e.,
2645 r0 for SImode, {r0, r1} for DImode), just as would be expected
2646 for an ordinary division routine. Because the AAPCS calling
2647 conventions specify that all of { r0, r1, r2, r3 } are
2648 callee-saved registers, there is no need to tell the compiler
2649 explicitly that those registers are clobbered by these
2650 routines. */
2651 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2652 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2653
2654 /* For SImode division the ABI provides div-without-mod routines,
2655 which are faster. */
2656 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2657 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2658
2659 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2660 divmod libcalls instead. */
2661 set_optab_libfunc (smod_optab, DImode, NULL);
2662 set_optab_libfunc (umod_optab, DImode, NULL);
2663 set_optab_libfunc (smod_optab, SImode, NULL);
2664 set_optab_libfunc (umod_optab, SImode, NULL);
2665
2666 /* Half-precision float operations. The compiler handles all operations
2667 with NULL libfuncs by converting the SFmode. */
2668 switch (arm_fp16_format)
2669 {
2670 case ARM_FP16_FORMAT_IEEE:
2671 case ARM_FP16_FORMAT_ALTERNATIVE:
2672
2673 /* Conversions. */
2674 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2675 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2676 ? "__gnu_f2h_ieee"
2677 : "__gnu_f2h_alternative"));
2678 set_conv_libfunc (sext_optab, SFmode, HFmode,
2679 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2680 ? "__gnu_h2f_ieee"
2681 : "__gnu_h2f_alternative"));
2682
2683 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2684 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2685 ? "__gnu_d2h_ieee"
2686 : "__gnu_d2h_alternative"));
2687
2688 arm_block_arith_comp_libfuncs_for_mode (HFmode);
2689 break;
2690
2691 default:
2692 break;
2693 }
2694
2695 /* For all possible libcalls in BFmode, record NULL. */
2696 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2697 {
2698 set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2699 set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2700 set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2701 set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2702 }
2703 arm_block_arith_comp_libfuncs_for_mode (BFmode);
2704
2705 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2706 {
2707 const arm_fixed_mode_set fixed_arith_modes[] =
2708 {
2709 { E_QQmode, "qq" },
2710 { E_UQQmode, "uqq" },
2711 { E_HQmode, "hq" },
2712 { E_UHQmode, "uhq" },
2713 { E_SQmode, "sq" },
2714 { E_USQmode, "usq" },
2715 { E_DQmode, "dq" },
2716 { E_UDQmode, "udq" },
2717 { E_TQmode, "tq" },
2718 { E_UTQmode, "utq" },
2719 { E_HAmode, "ha" },
2720 { E_UHAmode, "uha" },
2721 { E_SAmode, "sa" },
2722 { E_USAmode, "usa" },
2723 { E_DAmode, "da" },
2724 { E_UDAmode, "uda" },
2725 { E_TAmode, "ta" },
2726 { E_UTAmode, "uta" }
2727 };
2728 const arm_fixed_mode_set fixed_conv_modes[] =
2729 {
2730 { E_QQmode, "qq" },
2731 { E_UQQmode, "uqq" },
2732 { E_HQmode, "hq" },
2733 { E_UHQmode, "uhq" },
2734 { E_SQmode, "sq" },
2735 { E_USQmode, "usq" },
2736 { E_DQmode, "dq" },
2737 { E_UDQmode, "udq" },
2738 { E_TQmode, "tq" },
2739 { E_UTQmode, "utq" },
2740 { E_HAmode, "ha" },
2741 { E_UHAmode, "uha" },
2742 { E_SAmode, "sa" },
2743 { E_USAmode, "usa" },
2744 { E_DAmode, "da" },
2745 { E_UDAmode, "uda" },
2746 { E_TAmode, "ta" },
2747 { E_UTAmode, "uta" },
2748 { E_QImode, "qi" },
2749 { E_HImode, "hi" },
2750 { E_SImode, "si" },
2751 { E_DImode, "di" },
2752 { E_TImode, "ti" },
2753 { E_SFmode, "sf" },
2754 { E_DFmode, "df" }
2755 };
2756 unsigned int i, j;
2757
2758 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2759 {
2760 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2761 "add", fixed_arith_modes[i].name, 3);
2762 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2763 "ssadd", fixed_arith_modes[i].name, 3);
2764 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2765 "usadd", fixed_arith_modes[i].name, 3);
2766 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2767 "sub", fixed_arith_modes[i].name, 3);
2768 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2769 "sssub", fixed_arith_modes[i].name, 3);
2770 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2771 "ussub", fixed_arith_modes[i].name, 3);
2772 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2773 "mul", fixed_arith_modes[i].name, 3);
2774 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2775 "ssmul", fixed_arith_modes[i].name, 3);
2776 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2777 "usmul", fixed_arith_modes[i].name, 3);
2778 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2779 "div", fixed_arith_modes[i].name, 3);
2780 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2781 "udiv", fixed_arith_modes[i].name, 3);
2782 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2783 "ssdiv", fixed_arith_modes[i].name, 3);
2784 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2785 "usdiv", fixed_arith_modes[i].name, 3);
2786 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2787 "neg", fixed_arith_modes[i].name, 2);
2788 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2789 "ssneg", fixed_arith_modes[i].name, 2);
2790 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2791 "usneg", fixed_arith_modes[i].name, 2);
2792 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2793 "ashl", fixed_arith_modes[i].name, 3);
2794 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2795 "ashr", fixed_arith_modes[i].name, 3);
2796 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2797 "lshr", fixed_arith_modes[i].name, 3);
2798 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2799 "ssashl", fixed_arith_modes[i].name, 3);
2800 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2801 "usashl", fixed_arith_modes[i].name, 3);
2802 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2803 "cmp", fixed_arith_modes[i].name, 2);
2804 }
2805
2806 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2807 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2808 {
2809 if (i == j
2810 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2811 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2812 continue;
2813
2814 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2815 fixed_conv_modes[j].mode, "fract",
2816 fixed_conv_modes[i].name,
2817 fixed_conv_modes[j].name);
2818 arm_set_fixed_conv_libfunc (satfract_optab,
2819 fixed_conv_modes[i].mode,
2820 fixed_conv_modes[j].mode, "satfract",
2821 fixed_conv_modes[i].name,
2822 fixed_conv_modes[j].name);
2823 arm_set_fixed_conv_libfunc (fractuns_optab,
2824 fixed_conv_modes[i].mode,
2825 fixed_conv_modes[j].mode, "fractuns",
2826 fixed_conv_modes[i].name,
2827 fixed_conv_modes[j].name);
2828 arm_set_fixed_conv_libfunc (satfractuns_optab,
2829 fixed_conv_modes[i].mode,
2830 fixed_conv_modes[j].mode, "satfractuns",
2831 fixed_conv_modes[i].name,
2832 fixed_conv_modes[j].name);
2833 }
2834 }
2835
2836 if (TARGET_AAPCS_BASED)
2837 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2838
2839 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2840 }
2841
2842 /* On AAPCS systems, this is the "struct __va_list". */
2843 static GTY(()) tree va_list_type;
2844
2845 /* Return the type to use as __builtin_va_list. */
2846 static tree
2847 arm_build_builtin_va_list (void)
2848 {
2849 tree va_list_name;
2850 tree ap_field;
2851
2852 if (!TARGET_AAPCS_BASED)
2853 return std_build_builtin_va_list ();
2854
2855 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2856 defined as:
2857
2858 struct __va_list
2859 {
2860 void *__ap;
2861 };
2862
2863 The C Library ABI further reinforces this definition in \S
2864 4.1.
2865
2866 We must follow this definition exactly. The structure tag
2867 name is visible in C++ mangled names, and thus forms a part
2868 of the ABI. The field name may be used by people who
2869 #include <stdarg.h>. */
2870 /* Create the type. */
2871 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2872 /* Give it the required name. */
2873 va_list_name = build_decl (BUILTINS_LOCATION,
2874 TYPE_DECL,
2875 get_identifier ("__va_list"),
2876 va_list_type);
2877 DECL_ARTIFICIAL (va_list_name) = 1;
2878 TYPE_NAME (va_list_type) = va_list_name;
2879 TYPE_STUB_DECL (va_list_type) = va_list_name;
2880 /* Create the __ap field. */
2881 ap_field = build_decl (BUILTINS_LOCATION,
2882 FIELD_DECL,
2883 get_identifier ("__ap"),
2884 ptr_type_node);
2885 DECL_ARTIFICIAL (ap_field) = 1;
2886 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2887 TYPE_FIELDS (va_list_type) = ap_field;
2888 /* Compute its layout. */
2889 layout_type (va_list_type);
2890
2891 return va_list_type;
2892 }
2893
2894 /* Return an expression of type "void *" pointing to the next
2895 available argument in a variable-argument list. VALIST is the
2896 user-level va_list object, of type __builtin_va_list. */
2897 static tree
2898 arm_extract_valist_ptr (tree valist)
2899 {
2900 if (TREE_TYPE (valist) == error_mark_node)
2901 return error_mark_node;
2902
2903 /* On an AAPCS target, the pointer is stored within "struct
2904 va_list". */
2905 if (TARGET_AAPCS_BASED)
2906 {
2907 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2908 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2909 valist, ap_field, NULL_TREE);
2910 }
2911
2912 return valist;
2913 }
2914
2915 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2916 static void
2917 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2918 {
2919 valist = arm_extract_valist_ptr (valist);
2920 std_expand_builtin_va_start (valist, nextarg);
2921 }
2922
2923 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2924 static tree
2925 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2926 gimple_seq *post_p)
2927 {
2928 valist = arm_extract_valist_ptr (valist);
2929 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2930 }
2931
2932 /* Check any incompatible options that the user has specified. */
2933 static void
2934 arm_option_check_internal (struct gcc_options *opts)
2935 {
2936 int flags = opts->x_target_flags;
2937
2938 /* iWMMXt and NEON are incompatible. */
2939 if (TARGET_IWMMXT
2940 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2941 error ("iWMMXt and NEON are incompatible");
2942
2943 /* Make sure that the processor choice does not conflict with any of the
2944 other command line choices. */
2945 if (TARGET_ARM_P (flags)
2946 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2947 error ("target CPU does not support ARM mode");
2948
2949 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2950 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2951 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2952
2953 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2954 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2955
2956 /* If this target is normally configured to use APCS frames, warn if they
2957 are turned off and debugging is turned on. */
2958 if (TARGET_ARM_P (flags)
2959 && write_symbols != NO_DEBUG
2960 && !TARGET_APCS_FRAME
2961 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2962 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2963 "debugging");
2964
2965 /* iWMMXt unsupported under Thumb mode. */
2966 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2967 error ("iWMMXt unsupported under Thumb mode");
2968
2969 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2970 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2971
2972 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2973 {
2974 error ("RTP PIC is incompatible with Thumb");
2975 flag_pic = 0;
2976 }
2977
2978 if (target_pure_code || target_slow_flash_data)
2979 {
2980 const char *flag = (target_pure_code ? "-mpure-code" :
2981 "-mslow-flash-data");
2982 bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
2983
2984 /* We only support -mslow-flash-data on M-profile targets with
2985 MOVT. */
2986 if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
2987 error ("%s only supports non-pic code on M-profile targets with the "
2988 "MOVT instruction", flag);
2989
2990 /* We only support -mpure-code on M-profile targets. */
2991 if (target_pure_code && common_unsupported_modes)
2992 error ("%s only supports non-pic code on M-profile targets", flag);
2993
2994 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2995 -mword-relocations forbids relocation of MOVT/MOVW. */
2996 if (target_word_relocations)
2997 error ("%s incompatible with %<-mword-relocations%>", flag);
2998 }
2999 }
3000
3001 /* Recompute the global settings depending on target attribute options. */
3002
3003 static void
3004 arm_option_params_internal (void)
3005 {
3006 /* If we are not using the default (ARM mode) section anchor offset
3007 ranges, then set the correct ranges now. */
3008 if (TARGET_THUMB1)
3009 {
3010 /* Thumb-1 LDR instructions cannot have negative offsets.
3011 Permissible positive offset ranges are 5-bit (for byte loads),
3012 6-bit (for halfword loads), or 7-bit (for word loads).
3013 Empirical results suggest a 7-bit anchor range gives the best
3014 overall code size. */
3015 targetm.min_anchor_offset = 0;
3016 targetm.max_anchor_offset = 127;
3017 }
3018 else if (TARGET_THUMB2)
3019 {
3020 /* The minimum is set such that the total size of the block
3021 for a particular anchor is 248 + 1 + 4095 bytes, which is
3022 divisible by eight, ensuring natural spacing of anchors. */
3023 targetm.min_anchor_offset = -248;
3024 targetm.max_anchor_offset = 4095;
3025 }
3026 else
3027 {
3028 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3029 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3030 }
3031
3032 /* Increase the number of conditional instructions with -Os. */
3033 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3034
3035 /* For THUMB2, we limit the conditional sequence to one IT block. */
3036 if (TARGET_THUMB2)
3037 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3038
3039 if (TARGET_THUMB1)
3040 targetm.md_asm_adjust = thumb1_md_asm_adjust;
3041 else
3042 targetm.md_asm_adjust = arm_md_asm_adjust;
3043 }
3044
3045 /* True if -mflip-thumb should next add an attribute for the default
3046 mode, false if it should next add an attribute for the opposite mode. */
3047 static GTY(()) bool thumb_flipper;
3048
3049 /* Options after initial target override. */
3050 static GTY(()) tree init_optimize;
3051
3052 static void
3053 arm_override_options_after_change_1 (struct gcc_options *opts,
3054 struct gcc_options *opts_set)
3055 {
3056 /* -falign-functions without argument: supply one. */
3057 if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3058 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3059 && opts->x_optimize_size ? "2" : "4";
3060 }
3061
3062 /* Implement targetm.override_options_after_change. */
3063
3064 static void
3065 arm_override_options_after_change (void)
3066 {
3067 arm_override_options_after_change_1 (&global_options, &global_options_set);
3068 }
3069
3070 /* Implement TARGET_OPTION_RESTORE. */
3071 static void
3072 arm_option_restore (struct gcc_options */* opts */,
3073 struct gcc_options */* opts_set */,
3074 struct cl_target_option *ptr)
3075 {
3076 arm_configure_build_target (&arm_active_target, ptr, false);
3077 arm_option_reconfigure_globals ();
3078 }
3079
3080 /* Reset options between modes that the user has specified. */
3081 static void
3082 arm_option_override_internal (struct gcc_options *opts,
3083 struct gcc_options *opts_set)
3084 {
3085 arm_override_options_after_change_1 (opts, opts_set);
3086
3087 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3088 {
3089 /* The default is to enable interworking, so this warning message would
3090 be confusing to users who have just compiled with
3091 eg, -march=armv4. */
3092 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3093 opts->x_target_flags &= ~MASK_INTERWORK;
3094 }
3095
3096 if (TARGET_THUMB_P (opts->x_target_flags)
3097 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3098 {
3099 warning (0, "target CPU does not support THUMB instructions");
3100 opts->x_target_flags &= ~MASK_THUMB;
3101 }
3102
3103 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3104 {
3105 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3106 opts->x_target_flags &= ~MASK_APCS_FRAME;
3107 }
3108
3109 /* Callee super interworking implies thumb interworking. Adding
3110 this to the flags here simplifies the logic elsewhere. */
3111 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3112 opts->x_target_flags |= MASK_INTERWORK;
3113
3114 /* need to remember initial values so combinaisons of options like
3115 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3116 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3117
3118 if (! opts_set->x_arm_restrict_it)
3119 opts->x_arm_restrict_it = arm_arch8;
3120
3121 /* ARM execution state and M profile don't have [restrict] IT. */
3122 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3123 opts->x_arm_restrict_it = 0;
3124
3125 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3126 if (!opts_set->x_arm_restrict_it
3127 && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3128 opts->x_arm_restrict_it = 0;
3129
3130 /* Enable -munaligned-access by default for
3131 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3132 i.e. Thumb2 and ARM state only.
3133 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3134 - ARMv8 architecture-base processors.
3135
3136 Disable -munaligned-access by default for
3137 - all pre-ARMv6 architecture-based processors
3138 - ARMv6-M architecture-based processors
3139 - ARMv8-M Baseline processors. */
3140
3141 if (! opts_set->x_unaligned_access)
3142 {
3143 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3144 && arm_arch6 && (arm_arch_notm || arm_arch7));
3145 }
3146 else if (opts->x_unaligned_access == 1
3147 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3148 {
3149 warning (0, "target CPU does not support unaligned accesses");
3150 opts->x_unaligned_access = 0;
3151 }
3152
3153 /* Don't warn since it's on by default in -O2. */
3154 if (TARGET_THUMB1_P (opts->x_target_flags))
3155 opts->x_flag_schedule_insns = 0;
3156 else
3157 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3158
3159 /* Disable shrink-wrap when optimizing function for size, since it tends to
3160 generate additional returns. */
3161 if (optimize_function_for_size_p (cfun)
3162 && TARGET_THUMB2_P (opts->x_target_flags))
3163 opts->x_flag_shrink_wrap = false;
3164 else
3165 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3166
3167 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3168 - epilogue_insns - does not accurately model the corresponding insns
3169 emitted in the asm file. In particular, see the comment in thumb_exit
3170 'Find out how many of the (return) argument registers we can corrupt'.
3171 As a consequence, the epilogue may clobber registers without fipa-ra
3172 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3173 TODO: Accurately model clobbers for epilogue_insns and reenable
3174 fipa-ra. */
3175 if (TARGET_THUMB1_P (opts->x_target_flags))
3176 opts->x_flag_ipa_ra = 0;
3177 else
3178 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3179
3180 /* Thumb2 inline assembly code should always use unified syntax.
3181 This will apply to ARM and Thumb1 eventually. */
3182 if (TARGET_THUMB2_P (opts->x_target_flags))
3183 opts->x_inline_asm_unified = true;
3184
3185 if (arm_stack_protector_guard == SSP_GLOBAL
3186 && opts->x_arm_stack_protector_guard_offset_str)
3187 {
3188 error ("incompatible options %<-mstack-protector-guard=global%> and "
3189 "%<-mstack-protector-guard-offset=%s%>",
3190 arm_stack_protector_guard_offset_str);
3191 }
3192
3193 if (opts->x_arm_stack_protector_guard_offset_str)
3194 {
3195 char *end;
3196 const char *str = arm_stack_protector_guard_offset_str;
3197 errno = 0;
3198 long offs = strtol (arm_stack_protector_guard_offset_str, &end, 0);
3199 if (!*str || *end || errno)
3200 error ("%qs is not a valid offset in %qs", str,
3201 "-mstack-protector-guard-offset=");
3202 arm_stack_protector_guard_offset = offs;
3203 }
3204
3205 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3206 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3207 #endif
3208 }
3209
3210 static sbitmap isa_all_fpubits_internal;
3211 static sbitmap isa_all_fpbits;
3212 static sbitmap isa_quirkbits;
3213
3214 /* Configure a build target TARGET from the user-specified options OPTS and
3215 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3216 architecture have been specified, but the two are not identical. */
3217 void
3218 arm_configure_build_target (struct arm_build_target *target,
3219 struct cl_target_option *opts,
3220 bool warn_compatible)
3221 {
3222 const cpu_option *arm_selected_tune = NULL;
3223 const arch_option *arm_selected_arch = NULL;
3224 const cpu_option *arm_selected_cpu = NULL;
3225 const arm_fpu_desc *arm_selected_fpu = NULL;
3226 const char *tune_opts = NULL;
3227 const char *arch_opts = NULL;
3228 const char *cpu_opts = NULL;
3229
3230 bitmap_clear (target->isa);
3231 target->core_name = NULL;
3232 target->arch_name = NULL;
3233
3234 if (opts->x_arm_arch_string)
3235 {
3236 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3237 "-march",
3238 opts->x_arm_arch_string);
3239 arch_opts = strchr (opts->x_arm_arch_string, '+');
3240 }
3241
3242 if (opts->x_arm_cpu_string)
3243 {
3244 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3245 opts->x_arm_cpu_string);
3246 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3247 arm_selected_tune = arm_selected_cpu;
3248 /* If taking the tuning from -mcpu, we don't need to rescan the
3249 options for tuning. */
3250 }
3251
3252 if (opts->x_arm_tune_string)
3253 {
3254 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3255 opts->x_arm_tune_string);
3256 tune_opts = strchr (opts->x_arm_tune_string, '+');
3257 }
3258
3259 if (arm_selected_arch)
3260 {
3261 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3262 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3263 arch_opts);
3264
3265 if (arm_selected_cpu)
3266 {
3267 auto_sbitmap cpu_isa (isa_num_bits);
3268 auto_sbitmap isa_delta (isa_num_bits);
3269
3270 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3271 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3272 cpu_opts);
3273 bitmap_xor (isa_delta, cpu_isa, target->isa);
3274 /* Ignore any bits that are quirk bits. */
3275 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3276 /* If the user (or the default configuration) has specified a
3277 specific FPU, then ignore any bits that depend on the FPU
3278 configuration. Do similarly if using the soft-float
3279 ABI. */
3280 if (opts->x_arm_fpu_index != TARGET_FPU_auto
3281 || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3282 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3283
3284 if (!bitmap_empty_p (isa_delta))
3285 {
3286 if (warn_compatible)
3287 warning (0, "switch %<-mcpu=%s%> conflicts "
3288 "with switch %<-march=%s%>",
3289 opts->x_arm_cpu_string,
3290 opts->x_arm_arch_string);
3291
3292 /* -march wins for code generation.
3293 -mcpu wins for default tuning. */
3294 if (!arm_selected_tune)
3295 arm_selected_tune = arm_selected_cpu;
3296
3297 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3298 target->arch_name = arm_selected_arch->common.name;
3299 }
3300 else
3301 {
3302 /* Architecture and CPU are essentially the same.
3303 Prefer the CPU setting. */
3304 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3305 target->core_name = arm_selected_cpu->common.name;
3306 /* Copy the CPU's capabilities, so that we inherit the
3307 appropriate extensions and quirks. */
3308 bitmap_copy (target->isa, cpu_isa);
3309 }
3310 }
3311 else
3312 {
3313 /* Pick a CPU based on the architecture. */
3314 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3315 target->arch_name = arm_selected_arch->common.name;
3316 /* Note: target->core_name is left unset in this path. */
3317 }
3318 }
3319 else if (arm_selected_cpu)
3320 {
3321 target->core_name = arm_selected_cpu->common.name;
3322 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3323 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3324 cpu_opts);
3325 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3326 }
3327 /* If the user did not specify a processor or architecture, choose
3328 one for them. */
3329 else
3330 {
3331 const cpu_option *sel;
3332 auto_sbitmap sought_isa (isa_num_bits);
3333 bitmap_clear (sought_isa);
3334 auto_sbitmap default_isa (isa_num_bits);
3335
3336 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3337 TARGET_CPU_DEFAULT);
3338 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3339 gcc_assert (arm_selected_cpu->common.name);
3340
3341 /* RWE: All of the selection logic below (to the end of this
3342 'if' clause) looks somewhat suspect. It appears to be mostly
3343 there to support forcing thumb support when the default CPU
3344 does not have thumb (somewhat dubious in terms of what the
3345 user might be expecting). I think it should be removed once
3346 support for the pre-thumb era cores is removed. */
3347 sel = arm_selected_cpu;
3348 arm_initialize_isa (default_isa, sel->common.isa_bits);
3349 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3350 cpu_opts);
3351
3352 /* Now check to see if the user has specified any command line
3353 switches that require certain abilities from the cpu. */
3354
3355 if (TARGET_INTERWORK || TARGET_THUMB)
3356 bitmap_set_bit (sought_isa, isa_bit_thumb);
3357
3358 /* If there are such requirements and the default CPU does not
3359 satisfy them, we need to run over the complete list of
3360 cores looking for one that is satisfactory. */
3361 if (!bitmap_empty_p (sought_isa)
3362 && !bitmap_subset_p (sought_isa, default_isa))
3363 {
3364 auto_sbitmap candidate_isa (isa_num_bits);
3365 /* We're only interested in a CPU with at least the
3366 capabilities of the default CPU and the required
3367 additional features. */
3368 bitmap_ior (default_isa, default_isa, sought_isa);
3369
3370 /* Try to locate a CPU type that supports all of the abilities
3371 of the default CPU, plus the extra abilities requested by
3372 the user. */
3373 for (sel = all_cores; sel->common.name != NULL; sel++)
3374 {
3375 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3376 /* An exact match? */
3377 if (bitmap_equal_p (default_isa, candidate_isa))
3378 break;
3379 }
3380
3381 if (sel->common.name == NULL)
3382 {
3383 unsigned current_bit_count = isa_num_bits;
3384 const cpu_option *best_fit = NULL;
3385
3386 /* Ideally we would like to issue an error message here
3387 saying that it was not possible to find a CPU compatible
3388 with the default CPU, but which also supports the command
3389 line options specified by the programmer, and so they
3390 ought to use the -mcpu=<name> command line option to
3391 override the default CPU type.
3392
3393 If we cannot find a CPU that has exactly the
3394 characteristics of the default CPU and the given
3395 command line options we scan the array again looking
3396 for a best match. The best match must have at least
3397 the capabilities of the perfect match. */
3398 for (sel = all_cores; sel->common.name != NULL; sel++)
3399 {
3400 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3401
3402 if (bitmap_subset_p (default_isa, candidate_isa))
3403 {
3404 unsigned count;
3405
3406 bitmap_and_compl (candidate_isa, candidate_isa,
3407 default_isa);
3408 count = bitmap_popcount (candidate_isa);
3409
3410 if (count < current_bit_count)
3411 {
3412 best_fit = sel;
3413 current_bit_count = count;
3414 }
3415 }
3416
3417 gcc_assert (best_fit);
3418 sel = best_fit;
3419 }
3420 }
3421 arm_selected_cpu = sel;
3422 }
3423
3424 /* Now we know the CPU, we can finally initialize the target
3425 structure. */
3426 target->core_name = arm_selected_cpu->common.name;
3427 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3428 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3429 cpu_opts);
3430 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3431 }
3432
3433 gcc_assert (arm_selected_cpu);
3434 gcc_assert (arm_selected_arch);
3435
3436 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3437 {
3438 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3439 auto_sbitmap fpu_bits (isa_num_bits);
3440
3441 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3442 /* This should clear out ALL bits relating to the FPU/simd
3443 extensions, to avoid potentially invalid combinations later on
3444 that we can't match. At present we only clear out those bits
3445 that can be set by -mfpu. This should be fixed in GCC-12. */
3446 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3447 bitmap_ior (target->isa, target->isa, fpu_bits);
3448 }
3449
3450 /* If we have the soft-float ABI, clear any feature bits relating to use of
3451 floating-point operations. They'll just confuse things later on. */
3452 if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3453 bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3454
3455 /* There may be implied bits which we still need to enable. These are
3456 non-named features which are needed to complete other sets of features,
3457 but cannot be enabled from arm-cpus.in due to being shared between
3458 multiple fgroups. Each entry in all_implied_fbits is of the form
3459 ante -> cons, meaning that if the feature "ante" is enabled, we should
3460 implicitly enable "cons". */
3461 const struct fbit_implication *impl = all_implied_fbits;
3462 while (impl->ante)
3463 {
3464 if (bitmap_bit_p (target->isa, impl->ante))
3465 bitmap_set_bit (target->isa, impl->cons);
3466 impl++;
3467 }
3468
3469 if (!arm_selected_tune)
3470 arm_selected_tune = arm_selected_cpu;
3471 else /* Validate the features passed to -mtune. */
3472 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3473
3474 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3475
3476 /* Finish initializing the target structure. */
3477 if (!target->arch_name)
3478 target->arch_name = arm_selected_arch->common.name;
3479 target->arch_pp_name = arm_selected_arch->arch;
3480 target->base_arch = arm_selected_arch->base_arch;
3481 target->profile = arm_selected_arch->profile;
3482
3483 target->tune_flags = tune_data->tune_flags;
3484 target->tune = tune_data->tune;
3485 target->tune_core = tune_data->scheduler;
3486 }
3487
3488 /* Fix up any incompatible options that the user has specified. */
3489 static void
3490 arm_option_override (void)
3491 {
3492 static const enum isa_feature fpu_bitlist_internal[]
3493 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3494 /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main. */
3495 static const enum isa_feature fp_bitlist[]
3496 = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3497 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3498 cl_target_option opts;
3499
3500 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3501 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3502
3503 isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3504 isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3505 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3506 arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3507
3508 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3509
3510 if (!OPTION_SET_P (arm_fpu_index))
3511 {
3512 bool ok;
3513 int fpu_index;
3514
3515 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3516 CL_TARGET);
3517 gcc_assert (ok);
3518 arm_fpu_index = (enum fpu_type) fpu_index;
3519 }
3520
3521 cl_target_option_save (&opts, &global_options, &global_options_set);
3522 arm_configure_build_target (&arm_active_target, &opts, true);
3523
3524 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3525 SUBTARGET_OVERRIDE_OPTIONS;
3526 #endif
3527
3528 /* Initialize boolean versions of the architectural flags, for use
3529 in the arm.md file and for enabling feature flags. */
3530 arm_option_reconfigure_globals ();
3531
3532 arm_tune = arm_active_target.tune_core;
3533 tune_flags = arm_active_target.tune_flags;
3534 current_tune = arm_active_target.tune;
3535
3536 /* TBD: Dwarf info for apcs frame is not handled yet. */
3537 if (TARGET_APCS_FRAME)
3538 flag_shrink_wrap = false;
3539
3540 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3541 {
3542 warning (0, "%<-mapcs-stack-check%> incompatible with "
3543 "%<-mno-apcs-frame%>");
3544 target_flags |= MASK_APCS_FRAME;
3545 }
3546
3547 if (TARGET_POKE_FUNCTION_NAME)
3548 target_flags |= MASK_APCS_FRAME;
3549
3550 if (TARGET_APCS_REENT && flag_pic)
3551 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3552
3553 if (TARGET_APCS_REENT)
3554 warning (0, "APCS reentrant code not supported. Ignored");
3555
3556 /* Set up some tuning parameters. */
3557 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3558 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3559 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3560 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3561 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3562 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3563
3564 /* For arm2/3 there is no need to do any scheduling if we are doing
3565 software floating-point. */
3566 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3567 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3568
3569 /* Override the default structure alignment for AAPCS ABI. */
3570 if (!OPTION_SET_P (arm_structure_size_boundary))
3571 {
3572 if (TARGET_AAPCS_BASED)
3573 arm_structure_size_boundary = 8;
3574 }
3575 else
3576 {
3577 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3578
3579 if (arm_structure_size_boundary != 8
3580 && arm_structure_size_boundary != 32
3581 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3582 {
3583 if (ARM_DOUBLEWORD_ALIGN)
3584 warning (0,
3585 "structure size boundary can only be set to 8, 32 or 64");
3586 else
3587 warning (0, "structure size boundary can only be set to 8 or 32");
3588 arm_structure_size_boundary
3589 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3590 }
3591 }
3592
3593 if (TARGET_VXWORKS_RTP)
3594 {
3595 if (!OPTION_SET_P (arm_pic_data_is_text_relative))
3596 arm_pic_data_is_text_relative = 0;
3597 }
3598 else if (flag_pic
3599 && !arm_pic_data_is_text_relative
3600 && !(OPTION_SET_P (target_flags) & MASK_SINGLE_PIC_BASE))
3601 /* When text & data segments don't have a fixed displacement, the
3602 intended use is with a single, read only, pic base register.
3603 Unless the user explicitly requested not to do that, set
3604 it. */
3605 target_flags |= MASK_SINGLE_PIC_BASE;
3606
3607 /* If stack checking is disabled, we can use r10 as the PIC register,
3608 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3609 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3610 {
3611 if (TARGET_VXWORKS_RTP)
3612 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3613 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3614 }
3615
3616 if (flag_pic && TARGET_VXWORKS_RTP)
3617 arm_pic_register = 9;
3618
3619 /* If in FDPIC mode then force arm_pic_register to be r9. */
3620 if (TARGET_FDPIC)
3621 {
3622 arm_pic_register = FDPIC_REGNUM;
3623 if (TARGET_THUMB1)
3624 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3625 }
3626
3627 if (arm_pic_register_string != NULL)
3628 {
3629 int pic_register = decode_reg_name (arm_pic_register_string);
3630
3631 if (!flag_pic)
3632 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3633
3634 /* Prevent the user from choosing an obviously stupid PIC register. */
3635 else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3636 || pic_register == HARD_FRAME_POINTER_REGNUM
3637 || pic_register == STACK_POINTER_REGNUM
3638 || pic_register >= PC_REGNUM
3639 || (TARGET_VXWORKS_RTP
3640 && (unsigned int) pic_register != arm_pic_register))
3641 error ("unable to use %qs for PIC register", arm_pic_register_string);
3642 else
3643 arm_pic_register = pic_register;
3644 }
3645
3646 if (flag_pic)
3647 target_word_relocations = 1;
3648
3649 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3650 if (fix_cm3_ldrd == 2)
3651 {
3652 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3653 fix_cm3_ldrd = 1;
3654 else
3655 fix_cm3_ldrd = 0;
3656 }
3657
3658 /* Enable fix_vlldm by default if required. */
3659 if (fix_vlldm == 2)
3660 {
3661 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3662 fix_vlldm = 1;
3663 else
3664 fix_vlldm = 0;
3665 }
3666
3667 /* Enable fix_aes by default if required. */
3668 if (fix_aes_erratum_1742098 == 2)
3669 {
3670 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_aes_1742098))
3671 fix_aes_erratum_1742098 = 1;
3672 else
3673 fix_aes_erratum_1742098 = 0;
3674 }
3675
3676 /* Hot/Cold partitioning is not currently supported, since we can't
3677 handle literal pool placement in that case. */
3678 if (flag_reorder_blocks_and_partition)
3679 {
3680 inform (input_location,
3681 "%<-freorder-blocks-and-partition%> not supported "
3682 "on this architecture");
3683 flag_reorder_blocks_and_partition = 0;
3684 flag_reorder_blocks = 1;
3685 }
3686
3687 if (flag_pic)
3688 /* Hoisting PIC address calculations more aggressively provides a small,
3689 but measurable, size reduction for PIC code. Therefore, we decrease
3690 the bar for unrestricted expression hoisting to the cost of PIC address
3691 calculation, which is 2 instructions. */
3692 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3693 param_gcse_unrestricted_cost, 2);
3694
3695 /* ARM EABI defaults to strict volatile bitfields. */
3696 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3697 && abi_version_at_least(2))
3698 flag_strict_volatile_bitfields = 1;
3699
3700 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3701 have deemed it beneficial (signified by setting
3702 prefetch.num_slots to 1 or more). */
3703 if (flag_prefetch_loop_arrays < 0
3704 && HAVE_prefetch
3705 && optimize >= 3
3706 && current_tune->prefetch.num_slots > 0)
3707 flag_prefetch_loop_arrays = 1;
3708
3709 /* Set up parameters to be used in prefetching algorithm. Do not
3710 override the defaults unless we are tuning for a core we have
3711 researched values for. */
3712 if (current_tune->prefetch.num_slots > 0)
3713 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3714 param_simultaneous_prefetches,
3715 current_tune->prefetch.num_slots);
3716 if (current_tune->prefetch.l1_cache_line_size >= 0)
3717 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3718 param_l1_cache_line_size,
3719 current_tune->prefetch.l1_cache_line_size);
3720 if (current_tune->prefetch.l1_cache_line_size >= 0)
3721 {
3722 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3723 param_destruct_interfere_size,
3724 current_tune->prefetch.l1_cache_line_size);
3725 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3726 param_construct_interfere_size,
3727 current_tune->prefetch.l1_cache_line_size);
3728 }
3729 else
3730 {
3731 /* For a generic ARM target, JF Bastien proposed using 64 for both. */
3732 /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3733 constructive? */
3734 /* More recent Cortex chips have a 64-byte cache line, but are marked
3735 ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults. */
3736 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3737 param_destruct_interfere_size, 64);
3738 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3739 param_construct_interfere_size, 64);
3740 }
3741
3742 if (current_tune->prefetch.l1_cache_size >= 0)
3743 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3744 param_l1_cache_size,
3745 current_tune->prefetch.l1_cache_size);
3746
3747 /* Look through ready list and all of queue for instructions
3748 relevant for L2 auto-prefetcher. */
3749 int sched_autopref_queue_depth;
3750
3751 switch (current_tune->sched_autopref)
3752 {
3753 case tune_params::SCHED_AUTOPREF_OFF:
3754 sched_autopref_queue_depth = -1;
3755 break;
3756
3757 case tune_params::SCHED_AUTOPREF_RANK:
3758 sched_autopref_queue_depth = 0;
3759 break;
3760
3761 case tune_params::SCHED_AUTOPREF_FULL:
3762 sched_autopref_queue_depth = max_insn_queue_index + 1;
3763 break;
3764
3765 default:
3766 gcc_unreachable ();
3767 }
3768
3769 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3770 param_sched_autopref_queue_depth,
3771 sched_autopref_queue_depth);
3772
3773 /* Currently, for slow flash data, we just disable literal pools. We also
3774 disable it for pure-code. */
3775 if (target_slow_flash_data || target_pure_code)
3776 arm_disable_literal_pool = true;
3777
3778 /* Disable scheduling fusion by default if it's not armv7 processor
3779 or doesn't prefer ldrd/strd. */
3780 if (flag_schedule_fusion == 2
3781 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3782 flag_schedule_fusion = 0;
3783
3784 /* Need to remember initial options before they are overriden. */
3785 init_optimize = build_optimization_node (&global_options,
3786 &global_options_set);
3787
3788 arm_options_perform_arch_sanity_checks ();
3789 arm_option_override_internal (&global_options, &global_options_set);
3790 arm_option_check_internal (&global_options);
3791 arm_option_params_internal ();
3792
3793 /* Create the default target_options structure. */
3794 target_option_default_node = target_option_current_node
3795 = build_target_option_node (&global_options, &global_options_set);
3796
3797 /* Register global variables with the garbage collector. */
3798 arm_add_gc_roots ();
3799
3800 /* Init initial mode for testing. */
3801 thumb_flipper = TARGET_THUMB;
3802 }
3803
3804
3805 /* Reconfigure global status flags from the active_target.isa. */
3806 void
3807 arm_option_reconfigure_globals (void)
3808 {
3809 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3810 arm_base_arch = arm_active_target.base_arch;
3811
3812 /* Initialize boolean versions of the architectural flags, for use
3813 in the arm.md file. */
3814 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3815 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3816 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3817 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3818 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3819 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3820 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3821 arm_arch6m = arm_arch6 && !arm_arch_notm;
3822 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3823 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3824 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3825 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3826 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3827 arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3828 arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3829 arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3830 isa_bit_armv8_1m_main);
3831 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3832 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3833 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3834 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3835 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3836 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3837 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3838 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3839 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3840 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3841 arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3842 arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3843
3844 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3845 if (arm_fp16_inst)
3846 {
3847 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3848 error ("selected fp16 options are incompatible");
3849 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3850 }
3851
3852 arm_arch_cde = 0;
3853 arm_arch_cde_coproc = 0;
3854 int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3855 isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3856 isa_bit_cdecp6, isa_bit_cdecp7};
3857 for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3858 {
3859 int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3860 if (cde_bit)
3861 {
3862 arm_arch_cde |= cde_bit;
3863 arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3864 }
3865 }
3866
3867 /* And finally, set up some quirks. */
3868 arm_arch_no_volatile_ce
3869 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3870 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3871 isa_bit_quirk_armv6kz);
3872
3873 /* Use the cp15 method if it is available. */
3874 if (target_thread_pointer == TP_AUTO)
3875 {
3876 if (arm_arch6k && !TARGET_THUMB1)
3877 target_thread_pointer = TP_CP15;
3878 else
3879 target_thread_pointer = TP_SOFT;
3880 }
3881
3882 if (!TARGET_HARD_TP && arm_stack_protector_guard == SSP_TLSREG)
3883 error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3884 }
3885
3886 /* Perform some validation between the desired architecture and the rest of the
3887 options. */
3888 void
3889 arm_options_perform_arch_sanity_checks (void)
3890 {
3891 /* V5T code we generate is completely interworking capable, so we turn off
3892 TARGET_INTERWORK here to avoid many tests later on. */
3893
3894 /* XXX However, we must pass the right pre-processor defines to CPP
3895 or GLD can get confused. This is a hack. */
3896 if (TARGET_INTERWORK)
3897 arm_cpp_interwork = 1;
3898
3899 if (arm_arch5t)
3900 target_flags &= ~MASK_INTERWORK;
3901
3902 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3903 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3904
3905 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3906 error ("iwmmxt abi requires an iwmmxt capable cpu");
3907
3908 /* BPABI targets use linker tricks to allow interworking on cores
3909 without thumb support. */
3910 if (TARGET_INTERWORK
3911 && !TARGET_BPABI
3912 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3913 {
3914 warning (0, "target CPU does not support interworking" );
3915 target_flags &= ~MASK_INTERWORK;
3916 }
3917
3918 /* If soft-float is specified then don't use FPU. */
3919 if (TARGET_SOFT_FLOAT)
3920 arm_fpu_attr = FPU_NONE;
3921 else
3922 arm_fpu_attr = FPU_VFP;
3923
3924 if (TARGET_AAPCS_BASED)
3925 {
3926 if (TARGET_CALLER_INTERWORKING)
3927 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3928 else
3929 if (TARGET_CALLEE_INTERWORKING)
3930 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3931 }
3932
3933 /* __fp16 support currently assumes the core has ldrh. */
3934 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3935 sorry ("%<__fp16%> and no ldrh");
3936
3937 if (use_cmse && !arm_arch_cmse)
3938 error ("target CPU does not support ARMv8-M Security Extensions");
3939
3940 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3941 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3942 if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3943 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3944
3945
3946 if (TARGET_AAPCS_BASED)
3947 {
3948 if (arm_abi == ARM_ABI_IWMMXT)
3949 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3950 else if (TARGET_HARD_FLOAT_ABI)
3951 {
3952 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3953 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
3954 && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
3955 error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
3956 }
3957 else
3958 arm_pcs_default = ARM_PCS_AAPCS;
3959 }
3960 else
3961 {
3962 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3963 sorry ("%<-mfloat-abi=hard%> and VFP");
3964
3965 if (arm_abi == ARM_ABI_APCS)
3966 arm_pcs_default = ARM_PCS_APCS;
3967 else
3968 arm_pcs_default = ARM_PCS_ATPCS;
3969 }
3970 }
3971
3972 /* Test whether a local function descriptor is canonical, i.e.,
3973 whether we can use GOTOFFFUNCDESC to compute the address of the
3974 function. */
3975 static bool
3976 arm_fdpic_local_funcdesc_p (rtx fnx)
3977 {
3978 tree fn;
3979 enum symbol_visibility vis;
3980 bool ret;
3981
3982 if (!TARGET_FDPIC)
3983 return true;
3984
3985 if (! SYMBOL_REF_LOCAL_P (fnx))
3986 return false;
3987
3988 fn = SYMBOL_REF_DECL (fnx);
3989
3990 if (! fn)
3991 return false;
3992
3993 vis = DECL_VISIBILITY (fn);
3994
3995 if (vis == VISIBILITY_PROTECTED)
3996 /* Private function descriptors for protected functions are not
3997 canonical. Temporarily change the visibility to global so that
3998 we can ensure uniqueness of funcdesc pointers. */
3999 DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
4000
4001 ret = default_binds_local_p_1 (fn, flag_pic);
4002
4003 DECL_VISIBILITY (fn) = vis;
4004
4005 return ret;
4006 }
4007
4008 static void
4009 arm_add_gc_roots (void)
4010 {
4011 gcc_obstack_init(&minipool_obstack);
4012 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
4013 }
4014 \f
4015 /* A table of known ARM exception types.
4016 For use with the interrupt function attribute. */
4017
4018 typedef struct
4019 {
4020 const char *const arg;
4021 const unsigned long return_value;
4022 }
4023 isr_attribute_arg;
4024
4025 static const isr_attribute_arg isr_attribute_args [] =
4026 {
4027 { "IRQ", ARM_FT_ISR },
4028 { "irq", ARM_FT_ISR },
4029 { "FIQ", ARM_FT_FIQ },
4030 { "fiq", ARM_FT_FIQ },
4031 { "ABORT", ARM_FT_ISR },
4032 { "abort", ARM_FT_ISR },
4033 { "UNDEF", ARM_FT_EXCEPTION },
4034 { "undef", ARM_FT_EXCEPTION },
4035 { "SWI", ARM_FT_EXCEPTION },
4036 { "swi", ARM_FT_EXCEPTION },
4037 { NULL, ARM_FT_NORMAL }
4038 };
4039
4040 /* Returns the (interrupt) function type of the current
4041 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
4042
4043 static unsigned long
4044 arm_isr_value (tree argument)
4045 {
4046 const isr_attribute_arg * ptr;
4047 const char * arg;
4048
4049 if (!arm_arch_notm)
4050 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
4051
4052 /* No argument - default to IRQ. */
4053 if (argument == NULL_TREE)
4054 return ARM_FT_ISR;
4055
4056 /* Get the value of the argument. */
4057 if (TREE_VALUE (argument) == NULL_TREE
4058 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
4059 return ARM_FT_UNKNOWN;
4060
4061 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
4062
4063 /* Check it against the list of known arguments. */
4064 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
4065 if (streq (arg, ptr->arg))
4066 return ptr->return_value;
4067
4068 /* An unrecognized interrupt type. */
4069 return ARM_FT_UNKNOWN;
4070 }
4071
4072 /* Computes the type of the current function. */
4073
4074 static unsigned long
4075 arm_compute_func_type (void)
4076 {
4077 unsigned long type = ARM_FT_UNKNOWN;
4078 tree a;
4079 tree attr;
4080
4081 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4082
4083 /* Decide if the current function is volatile. Such functions
4084 never return, and many memory cycles can be saved by not storing
4085 register values that will never be needed again. This optimization
4086 was added to speed up context switching in a kernel application. */
4087 if (optimize > 0
4088 && (TREE_NOTHROW (current_function_decl)
4089 || !(flag_unwind_tables
4090 || (flag_exceptions
4091 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4092 && TREE_THIS_VOLATILE (current_function_decl))
4093 type |= ARM_FT_VOLATILE;
4094
4095 if (cfun->static_chain_decl != NULL)
4096 type |= ARM_FT_NESTED;
4097
4098 attr = DECL_ATTRIBUTES (current_function_decl);
4099
4100 a = lookup_attribute ("naked", attr);
4101 if (a != NULL_TREE)
4102 type |= ARM_FT_NAKED;
4103
4104 a = lookup_attribute ("isr", attr);
4105 if (a == NULL_TREE)
4106 a = lookup_attribute ("interrupt", attr);
4107
4108 if (a == NULL_TREE)
4109 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4110 else
4111 type |= arm_isr_value (TREE_VALUE (a));
4112
4113 if (lookup_attribute ("cmse_nonsecure_entry", attr))
4114 type |= ARM_FT_CMSE_ENTRY;
4115
4116 return type;
4117 }
4118
4119 /* Returns the type of the current function. */
4120
4121 unsigned long
4122 arm_current_func_type (void)
4123 {
4124 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4125 cfun->machine->func_type = arm_compute_func_type ();
4126
4127 return cfun->machine->func_type;
4128 }
4129
4130 bool
4131 arm_allocate_stack_slots_for_args (void)
4132 {
4133 /* Naked functions should not allocate stack slots for arguments. */
4134 return !IS_NAKED (arm_current_func_type ());
4135 }
4136
4137 static bool
4138 arm_warn_func_return (tree decl)
4139 {
4140 /* Naked functions are implemented entirely in assembly, including the
4141 return sequence, so suppress warnings about this. */
4142 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4143 }
4144
4145 \f
4146 /* Output assembler code for a block containing the constant parts
4147 of a trampoline, leaving space for the variable parts.
4148
4149 On the ARM, (if r8 is the static chain regnum, and remembering that
4150 referencing pc adds an offset of 8) the trampoline looks like:
4151 ldr r8, [pc, #0]
4152 ldr pc, [pc]
4153 .word static chain value
4154 .word function's address
4155 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4156
4157 In FDPIC mode, the trampoline looks like:
4158 .word trampoline address
4159 .word trampoline GOT address
4160 ldr r12, [pc, #8] ; #4 for Arm mode
4161 ldr r9, [pc, #8] ; #4 for Arm mode
4162 ldr pc, [pc, #8] ; #4 for Arm mode
4163 .word static chain value
4164 .word GOT address
4165 .word function's address
4166 */
4167
4168 static void
4169 arm_asm_trampoline_template (FILE *f)
4170 {
4171 fprintf (f, "\t.syntax unified\n");
4172
4173 if (TARGET_FDPIC)
4174 {
4175 /* The first two words are a function descriptor pointing to the
4176 trampoline code just below. */
4177 if (TARGET_ARM)
4178 fprintf (f, "\t.arm\n");
4179 else if (TARGET_THUMB2)
4180 fprintf (f, "\t.thumb\n");
4181 else
4182 /* Only ARM and Thumb-2 are supported. */
4183 gcc_unreachable ();
4184
4185 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4186 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4187 /* Trampoline code which sets the static chain register but also
4188 PIC register before jumping into real code. */
4189 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4190 STATIC_CHAIN_REGNUM, PC_REGNUM,
4191 TARGET_THUMB2 ? 8 : 4);
4192 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4193 PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4194 TARGET_THUMB2 ? 8 : 4);
4195 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4196 PC_REGNUM, PC_REGNUM,
4197 TARGET_THUMB2 ? 8 : 4);
4198 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4199 }
4200 else if (TARGET_ARM)
4201 {
4202 fprintf (f, "\t.arm\n");
4203 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4204 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4205 }
4206 else if (TARGET_THUMB2)
4207 {
4208 fprintf (f, "\t.thumb\n");
4209 /* The Thumb-2 trampoline is similar to the arm implementation.
4210 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4211 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4212 STATIC_CHAIN_REGNUM, PC_REGNUM);
4213 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4214 }
4215 else
4216 {
4217 ASM_OUTPUT_ALIGN (f, 2);
4218 fprintf (f, "\t.code\t16\n");
4219 fprintf (f, ".Ltrampoline_start:\n");
4220 asm_fprintf (f, "\tpush\t{r0, r1}\n");
4221 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4222 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4223 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4224 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4225 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4226 }
4227 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4228 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4229 }
4230
4231 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4232
4233 static void
4234 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4235 {
4236 rtx fnaddr, mem, a_tramp;
4237
4238 emit_block_move (m_tramp, assemble_trampoline_template (),
4239 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4240
4241 if (TARGET_FDPIC)
4242 {
4243 rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4244 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4245 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4246 /* The function start address is at offset 8, but in Thumb mode
4247 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4248 below. */
4249 rtx trampoline_code_start
4250 = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4251
4252 /* Write initial funcdesc which points to the trampoline. */
4253 mem = adjust_address (m_tramp, SImode, 0);
4254 emit_move_insn (mem, trampoline_code_start);
4255 mem = adjust_address (m_tramp, SImode, 4);
4256 emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4257 /* Setup static chain. */
4258 mem = adjust_address (m_tramp, SImode, 20);
4259 emit_move_insn (mem, chain_value);
4260 /* GOT + real function entry point. */
4261 mem = adjust_address (m_tramp, SImode, 24);
4262 emit_move_insn (mem, gotaddr);
4263 mem = adjust_address (m_tramp, SImode, 28);
4264 emit_move_insn (mem, fnaddr);
4265 }
4266 else
4267 {
4268 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4269 emit_move_insn (mem, chain_value);
4270
4271 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4272 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4273 emit_move_insn (mem, fnaddr);
4274 }
4275
4276 a_tramp = XEXP (m_tramp, 0);
4277 maybe_emit_call_builtin___clear_cache (a_tramp,
4278 plus_constant (ptr_mode,
4279 a_tramp,
4280 TRAMPOLINE_SIZE));
4281 }
4282
4283 /* Thumb trampolines should be entered in thumb mode, so set
4284 the bottom bit of the address. */
4285
4286 static rtx
4287 arm_trampoline_adjust_address (rtx addr)
4288 {
4289 /* For FDPIC don't fix trampoline address since it's a function
4290 descriptor and not a function address. */
4291 if (TARGET_THUMB && !TARGET_FDPIC)
4292 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4293 NULL, 0, OPTAB_LIB_WIDEN);
4294 return addr;
4295 }
4296 \f
4297 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4298 includes call-clobbered registers too. If this is a leaf function
4299 we can just examine the registers used by the RTL, but otherwise we
4300 have to assume that whatever function is called might clobber
4301 anything, and so we have to save all the call-clobbered registers
4302 as well. */
4303 static inline bool reg_needs_saving_p (unsigned reg)
4304 {
4305 unsigned long func_type = arm_current_func_type ();
4306
4307 if (IS_INTERRUPT (func_type))
4308 if (df_regs_ever_live_p (reg)
4309 /* Save call-clobbered core registers. */
4310 || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4311 return true;
4312 else
4313 return false;
4314 else
4315 if (!df_regs_ever_live_p (reg)
4316 || call_used_or_fixed_reg_p (reg))
4317 return false;
4318 else
4319 return true;
4320 }
4321
4322 /* Return 1 if it is possible to return using a single instruction.
4323 If SIBLING is non-null, this is a test for a return before a sibling
4324 call. SIBLING is the call insn, so we can examine its register usage. */
4325
4326 int
4327 use_return_insn (int iscond, rtx sibling)
4328 {
4329 int regno;
4330 unsigned int func_type;
4331 unsigned long saved_int_regs;
4332 unsigned HOST_WIDE_INT stack_adjust;
4333 arm_stack_offsets *offsets;
4334
4335 /* Never use a return instruction before reload has run. */
4336 if (!reload_completed)
4337 return 0;
4338
4339 func_type = arm_current_func_type ();
4340
4341 /* Naked, volatile and stack alignment functions need special
4342 consideration. */
4343 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4344 return 0;
4345
4346 /* So do interrupt functions that use the frame pointer and Thumb
4347 interrupt functions. */
4348 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4349 return 0;
4350
4351 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4352 && !optimize_function_for_size_p (cfun))
4353 return 0;
4354
4355 offsets = arm_get_frame_offsets ();
4356 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4357
4358 /* As do variadic functions. */
4359 if (crtl->args.pretend_args_size
4360 || cfun->machine->uses_anonymous_args
4361 /* Or if the function calls __builtin_eh_return () */
4362 || crtl->calls_eh_return
4363 /* Or if the function calls alloca */
4364 || cfun->calls_alloca
4365 /* Or if there is a stack adjustment. However, if the stack pointer
4366 is saved on the stack, we can use a pre-incrementing stack load. */
4367 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4368 && stack_adjust == 4))
4369 /* Or if the static chain register was saved above the frame, under the
4370 assumption that the stack pointer isn't saved on the stack. */
4371 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4372 && arm_compute_static_chain_stack_bytes() != 0))
4373 return 0;
4374
4375 saved_int_regs = offsets->saved_regs_mask;
4376
4377 /* Unfortunately, the insn
4378
4379 ldmib sp, {..., sp, ...}
4380
4381 triggers a bug on most SA-110 based devices, such that the stack
4382 pointer won't be correctly restored if the instruction takes a
4383 page fault. We work around this problem by popping r3 along with
4384 the other registers, since that is never slower than executing
4385 another instruction.
4386
4387 We test for !arm_arch5t here, because code for any architecture
4388 less than this could potentially be run on one of the buggy
4389 chips. */
4390 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4391 {
4392 /* Validate that r3 is a call-clobbered register (always true in
4393 the default abi) ... */
4394 if (!call_used_or_fixed_reg_p (3))
4395 return 0;
4396
4397 /* ... that it isn't being used for a return value ... */
4398 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4399 return 0;
4400
4401 /* ... or for a tail-call argument ... */
4402 if (sibling)
4403 {
4404 gcc_assert (CALL_P (sibling));
4405
4406 if (find_regno_fusage (sibling, USE, 3))
4407 return 0;
4408 }
4409
4410 /* ... and that there are no call-saved registers in r0-r2
4411 (always true in the default ABI). */
4412 if (saved_int_regs & 0x7)
4413 return 0;
4414 }
4415
4416 /* Can't be done if interworking with Thumb, and any registers have been
4417 stacked. */
4418 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4419 return 0;
4420
4421 /* On StrongARM, conditional returns are expensive if they aren't
4422 taken and multiple registers have been stacked. */
4423 if (iscond && arm_tune_strongarm)
4424 {
4425 /* Conditional return when just the LR is stored is a simple
4426 conditional-load instruction, that's not expensive. */
4427 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4428 return 0;
4429
4430 if (flag_pic
4431 && arm_pic_register != INVALID_REGNUM
4432 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4433 return 0;
4434 }
4435
4436 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4437 several instructions if anything needs to be popped. Armv8.1-M Mainline
4438 also needs several instructions to save and restore FP context. */
4439 if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4440 return 0;
4441
4442 /* If there are saved registers but the LR isn't saved, then we need
4443 two instructions for the return. */
4444 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4445 return 0;
4446
4447 /* Can't be done if any of the VFP regs are pushed,
4448 since this also requires an insn. */
4449 if (TARGET_VFP_BASE)
4450 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4451 if (reg_needs_saving_p (regno))
4452 return 0;
4453
4454 if (TARGET_REALLY_IWMMXT)
4455 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4456 if (reg_needs_saving_p (regno))
4457 return 0;
4458
4459 return 1;
4460 }
4461
4462 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4463 shrink-wrapping if possible. This is the case if we need to emit a
4464 prologue, which we can test by looking at the offsets. */
4465 bool
4466 use_simple_return_p (void)
4467 {
4468 arm_stack_offsets *offsets;
4469
4470 /* Note this function can be called before or after reload. */
4471 if (!reload_completed)
4472 arm_compute_frame_layout ();
4473
4474 offsets = arm_get_frame_offsets ();
4475 return offsets->outgoing_args != 0;
4476 }
4477
4478 /* Return TRUE if int I is a valid immediate ARM constant. */
4479
4480 int
4481 const_ok_for_arm (HOST_WIDE_INT i)
4482 {
4483 int lowbit;
4484
4485 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4486 be all zero, or all one. */
4487 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4488 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4489 != ((~(unsigned HOST_WIDE_INT) 0)
4490 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4491 return FALSE;
4492
4493 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4494
4495 /* Fast return for 0 and small values. We must do this for zero, since
4496 the code below can't handle that one case. */
4497 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4498 return TRUE;
4499
4500 /* Get the number of trailing zeros. */
4501 lowbit = ffs((int) i) - 1;
4502
4503 /* Only even shifts are allowed in ARM mode so round down to the
4504 nearest even number. */
4505 if (TARGET_ARM)
4506 lowbit &= ~1;
4507
4508 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4509 return TRUE;
4510
4511 if (TARGET_ARM)
4512 {
4513 /* Allow rotated constants in ARM mode. */
4514 if (lowbit <= 4
4515 && ((i & ~0xc000003f) == 0
4516 || (i & ~0xf000000f) == 0
4517 || (i & ~0xfc000003) == 0))
4518 return TRUE;
4519 }
4520 else if (TARGET_THUMB2)
4521 {
4522 HOST_WIDE_INT v;
4523
4524 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4525 v = i & 0xff;
4526 v |= v << 16;
4527 if (i == v || i == (v | (v << 8)))
4528 return TRUE;
4529
4530 /* Allow repeated pattern 0xXY00XY00. */
4531 v = i & 0xff00;
4532 v |= v << 16;
4533 if (i == v)
4534 return TRUE;
4535 }
4536 else if (TARGET_HAVE_MOVT)
4537 {
4538 /* Thumb-1 Targets with MOVT. */
4539 if (i > 0xffff)
4540 return FALSE;
4541 else
4542 return TRUE;
4543 }
4544
4545 return FALSE;
4546 }
4547
4548 /* Return true if I is a valid constant for the operation CODE. */
4549 int
4550 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4551 {
4552 if (const_ok_for_arm (i))
4553 return 1;
4554
4555 switch (code)
4556 {
4557 case SET:
4558 /* See if we can use movw. */
4559 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4560 return 1;
4561 else
4562 /* Otherwise, try mvn. */
4563 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4564
4565 case PLUS:
4566 /* See if we can use addw or subw. */
4567 if (TARGET_THUMB2
4568 && ((i & 0xfffff000) == 0
4569 || ((-i) & 0xfffff000) == 0))
4570 return 1;
4571 /* Fall through. */
4572 case COMPARE:
4573 case EQ:
4574 case NE:
4575 case GT:
4576 case LE:
4577 case LT:
4578 case GE:
4579 case GEU:
4580 case LTU:
4581 case GTU:
4582 case LEU:
4583 case UNORDERED:
4584 case ORDERED:
4585 case UNEQ:
4586 case UNGE:
4587 case UNLT:
4588 case UNGT:
4589 case UNLE:
4590 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4591
4592 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4593 case XOR:
4594 return 0;
4595
4596 case IOR:
4597 if (TARGET_THUMB2)
4598 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4599 return 0;
4600
4601 case AND:
4602 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4603
4604 default:
4605 gcc_unreachable ();
4606 }
4607 }
4608
4609 /* Return true if I is a valid di mode constant for the operation CODE. */
4610 int
4611 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4612 {
4613 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4614 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4615 rtx hi = GEN_INT (hi_val);
4616 rtx lo = GEN_INT (lo_val);
4617
4618 if (TARGET_THUMB1)
4619 return 0;
4620
4621 switch (code)
4622 {
4623 case AND:
4624 case IOR:
4625 case XOR:
4626 return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4627 || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4628 case PLUS:
4629 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4630
4631 default:
4632 return 0;
4633 }
4634 }
4635
4636 /* Emit a sequence of insns to handle a large constant.
4637 CODE is the code of the operation required, it can be any of SET, PLUS,
4638 IOR, AND, XOR, MINUS;
4639 MODE is the mode in which the operation is being performed;
4640 VAL is the integer to operate on;
4641 SOURCE is the other operand (a register, or a null-pointer for SET);
4642 SUBTARGETS means it is safe to create scratch registers if that will
4643 either produce a simpler sequence, or we will want to cse the values.
4644 Return value is the number of insns emitted. */
4645
4646 /* ??? Tweak this for thumb2. */
4647 int
4648 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4649 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4650 {
4651 rtx cond;
4652
4653 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4654 cond = COND_EXEC_TEST (PATTERN (insn));
4655 else
4656 cond = NULL_RTX;
4657
4658 if (subtargets || code == SET
4659 || (REG_P (target) && REG_P (source)
4660 && REGNO (target) != REGNO (source)))
4661 {
4662 /* After arm_reorg has been called, we can't fix up expensive
4663 constants by pushing them into memory so we must synthesize
4664 them in-line, regardless of the cost. This is only likely to
4665 be more costly on chips that have load delay slots and we are
4666 compiling without running the scheduler (so no splitting
4667 occurred before the final instruction emission).
4668
4669 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4670 */
4671 if (!cfun->machine->after_arm_reorg
4672 && !cond
4673 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4674 1, 0)
4675 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4676 + (code != SET))))
4677 {
4678 if (code == SET)
4679 {
4680 /* Currently SET is the only monadic value for CODE, all
4681 the rest are diadic. */
4682 if (TARGET_USE_MOVT)
4683 arm_emit_movpair (target, GEN_INT (val));
4684 else
4685 emit_set_insn (target, GEN_INT (val));
4686
4687 return 1;
4688 }
4689 else
4690 {
4691 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4692
4693 if (TARGET_USE_MOVT)
4694 arm_emit_movpair (temp, GEN_INT (val));
4695 else
4696 emit_set_insn (temp, GEN_INT (val));
4697
4698 /* For MINUS, the value is subtracted from, since we never
4699 have subtraction of a constant. */
4700 if (code == MINUS)
4701 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4702 else
4703 emit_set_insn (target,
4704 gen_rtx_fmt_ee (code, mode, source, temp));
4705 return 2;
4706 }
4707 }
4708 }
4709
4710 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4711 1);
4712 }
4713
4714 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4715 ARM/THUMB2 immediates, and add up to VAL.
4716 Thr function return value gives the number of insns required. */
4717 static int
4718 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4719 struct four_ints *return_sequence)
4720 {
4721 int best_consecutive_zeros = 0;
4722 int i;
4723 int best_start = 0;
4724 int insns1, insns2;
4725 struct four_ints tmp_sequence;
4726
4727 /* If we aren't targeting ARM, the best place to start is always at
4728 the bottom, otherwise look more closely. */
4729 if (TARGET_ARM)
4730 {
4731 for (i = 0; i < 32; i += 2)
4732 {
4733 int consecutive_zeros = 0;
4734
4735 if (!(val & (3 << i)))
4736 {
4737 while ((i < 32) && !(val & (3 << i)))
4738 {
4739 consecutive_zeros += 2;
4740 i += 2;
4741 }
4742 if (consecutive_zeros > best_consecutive_zeros)
4743 {
4744 best_consecutive_zeros = consecutive_zeros;
4745 best_start = i - consecutive_zeros;
4746 }
4747 i -= 2;
4748 }
4749 }
4750 }
4751
4752 /* So long as it won't require any more insns to do so, it's
4753 desirable to emit a small constant (in bits 0...9) in the last
4754 insn. This way there is more chance that it can be combined with
4755 a later addressing insn to form a pre-indexed load or store
4756 operation. Consider:
4757
4758 *((volatile int *)0xe0000100) = 1;
4759 *((volatile int *)0xe0000110) = 2;
4760
4761 We want this to wind up as:
4762
4763 mov rA, #0xe0000000
4764 mov rB, #1
4765 str rB, [rA, #0x100]
4766 mov rB, #2
4767 str rB, [rA, #0x110]
4768
4769 rather than having to synthesize both large constants from scratch.
4770
4771 Therefore, we calculate how many insns would be required to emit
4772 the constant starting from `best_start', and also starting from
4773 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4774 yield a shorter sequence, we may as well use zero. */
4775 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4776 if (best_start != 0
4777 && ((HOST_WIDE_INT_1U << best_start) < val))
4778 {
4779 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4780 if (insns2 <= insns1)
4781 {
4782 *return_sequence = tmp_sequence;
4783 insns1 = insns2;
4784 }
4785 }
4786
4787 return insns1;
4788 }
4789
4790 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4791 static int
4792 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4793 struct four_ints *return_sequence, int i)
4794 {
4795 int remainder = val & 0xffffffff;
4796 int insns = 0;
4797
4798 /* Try and find a way of doing the job in either two or three
4799 instructions.
4800
4801 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4802 location. We start at position I. This may be the MSB, or
4803 optimial_immediate_sequence may have positioned it at the largest block
4804 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4805 wrapping around to the top of the word when we drop off the bottom.
4806 In the worst case this code should produce no more than four insns.
4807
4808 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4809 constants, shifted to any arbitrary location. We should always start
4810 at the MSB. */
4811 do
4812 {
4813 int end;
4814 unsigned int b1, b2, b3, b4;
4815 unsigned HOST_WIDE_INT result;
4816 int loc;
4817
4818 gcc_assert (insns < 4);
4819
4820 if (i <= 0)
4821 i += 32;
4822
4823 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4824 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4825 {
4826 loc = i;
4827 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4828 /* We can use addw/subw for the last 12 bits. */
4829 result = remainder;
4830 else
4831 {
4832 /* Use an 8-bit shifted/rotated immediate. */
4833 end = i - 8;
4834 if (end < 0)
4835 end += 32;
4836 result = remainder & ((0x0ff << end)
4837 | ((i < end) ? (0xff >> (32 - end))
4838 : 0));
4839 i -= 8;
4840 }
4841 }
4842 else
4843 {
4844 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4845 arbitrary shifts. */
4846 i -= TARGET_ARM ? 2 : 1;
4847 continue;
4848 }
4849
4850 /* Next, see if we can do a better job with a thumb2 replicated
4851 constant.
4852
4853 We do it this way around to catch the cases like 0x01F001E0 where
4854 two 8-bit immediates would work, but a replicated constant would
4855 make it worse.
4856
4857 TODO: 16-bit constants that don't clear all the bits, but still win.
4858 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4859 if (TARGET_THUMB2)
4860 {
4861 b1 = (remainder & 0xff000000) >> 24;
4862 b2 = (remainder & 0x00ff0000) >> 16;
4863 b3 = (remainder & 0x0000ff00) >> 8;
4864 b4 = remainder & 0xff;
4865
4866 if (loc > 24)
4867 {
4868 /* The 8-bit immediate already found clears b1 (and maybe b2),
4869 but must leave b3 and b4 alone. */
4870
4871 /* First try to find a 32-bit replicated constant that clears
4872 almost everything. We can assume that we can't do it in one,
4873 or else we wouldn't be here. */
4874 unsigned int tmp = b1 & b2 & b3 & b4;
4875 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4876 + (tmp << 24);
4877 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4878 + (tmp == b3) + (tmp == b4);
4879 if (tmp
4880 && (matching_bytes >= 3
4881 || (matching_bytes == 2
4882 && const_ok_for_op (remainder & ~tmp2, code))))
4883 {
4884 /* At least 3 of the bytes match, and the fourth has at
4885 least as many bits set, or two of the bytes match
4886 and it will only require one more insn to finish. */
4887 result = tmp2;
4888 i = tmp != b1 ? 32
4889 : tmp != b2 ? 24
4890 : tmp != b3 ? 16
4891 : 8;
4892 }
4893
4894 /* Second, try to find a 16-bit replicated constant that can
4895 leave three of the bytes clear. If b2 or b4 is already
4896 zero, then we can. If the 8-bit from above would not
4897 clear b2 anyway, then we still win. */
4898 else if (b1 == b3 && (!b2 || !b4
4899 || (remainder & 0x00ff0000 & ~result)))
4900 {
4901 result = remainder & 0xff00ff00;
4902 i = 24;
4903 }
4904 }
4905 else if (loc > 16)
4906 {
4907 /* The 8-bit immediate already found clears b2 (and maybe b3)
4908 and we don't get here unless b1 is alredy clear, but it will
4909 leave b4 unchanged. */
4910
4911 /* If we can clear b2 and b4 at once, then we win, since the
4912 8-bits couldn't possibly reach that far. */
4913 if (b2 == b4)
4914 {
4915 result = remainder & 0x00ff00ff;
4916 i = 16;
4917 }
4918 }
4919 }
4920
4921 return_sequence->i[insns++] = result;
4922 remainder &= ~result;
4923
4924 if (code == SET || code == MINUS)
4925 code = PLUS;
4926 }
4927 while (remainder);
4928
4929 return insns;
4930 }
4931
4932 /* Emit an instruction with the indicated PATTERN. If COND is
4933 non-NULL, conditionalize the execution of the instruction on COND
4934 being true. */
4935
4936 static void
4937 emit_constant_insn (rtx cond, rtx pattern)
4938 {
4939 if (cond)
4940 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4941 emit_insn (pattern);
4942 }
4943
4944 /* As above, but extra parameter GENERATE which, if clear, suppresses
4945 RTL generation. */
4946
4947 static int
4948 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4949 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4950 int subtargets, int generate)
4951 {
4952 int can_invert = 0;
4953 int can_negate = 0;
4954 int final_invert = 0;
4955 int i;
4956 int set_sign_bit_copies = 0;
4957 int clear_sign_bit_copies = 0;
4958 int clear_zero_bit_copies = 0;
4959 int set_zero_bit_copies = 0;
4960 int insns = 0, neg_insns, inv_insns;
4961 unsigned HOST_WIDE_INT temp1, temp2;
4962 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4963 struct four_ints *immediates;
4964 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4965
4966 /* Find out which operations are safe for a given CODE. Also do a quick
4967 check for degenerate cases; these can occur when DImode operations
4968 are split. */
4969 switch (code)
4970 {
4971 case SET:
4972 can_invert = 1;
4973 break;
4974
4975 case PLUS:
4976 can_negate = 1;
4977 break;
4978
4979 case IOR:
4980 if (remainder == 0xffffffff)
4981 {
4982 if (generate)
4983 emit_constant_insn (cond,
4984 gen_rtx_SET (target,
4985 GEN_INT (ARM_SIGN_EXTEND (val))));
4986 return 1;
4987 }
4988
4989 if (remainder == 0)
4990 {
4991 if (reload_completed && rtx_equal_p (target, source))
4992 return 0;
4993
4994 if (generate)
4995 emit_constant_insn (cond, gen_rtx_SET (target, source));
4996 return 1;
4997 }
4998 break;
4999
5000 case AND:
5001 if (remainder == 0)
5002 {
5003 if (generate)
5004 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
5005 return 1;
5006 }
5007 if (remainder == 0xffffffff)
5008 {
5009 if (reload_completed && rtx_equal_p (target, source))
5010 return 0;
5011 if (generate)
5012 emit_constant_insn (cond, gen_rtx_SET (target, source));
5013 return 1;
5014 }
5015 can_invert = 1;
5016 break;
5017
5018 case XOR:
5019 if (remainder == 0)
5020 {
5021 if (reload_completed && rtx_equal_p (target, source))
5022 return 0;
5023 if (generate)
5024 emit_constant_insn (cond, gen_rtx_SET (target, source));
5025 return 1;
5026 }
5027
5028 if (remainder == 0xffffffff)
5029 {
5030 if (generate)
5031 emit_constant_insn (cond,
5032 gen_rtx_SET (target,
5033 gen_rtx_NOT (mode, source)));
5034 return 1;
5035 }
5036 final_invert = 1;
5037 break;
5038
5039 case MINUS:
5040 /* We treat MINUS as (val - source), since (source - val) is always
5041 passed as (source + (-val)). */
5042 if (remainder == 0)
5043 {
5044 if (generate)
5045 emit_constant_insn (cond,
5046 gen_rtx_SET (target,
5047 gen_rtx_NEG (mode, source)));
5048 return 1;
5049 }
5050 if (const_ok_for_arm (val))
5051 {
5052 if (generate)
5053 emit_constant_insn (cond,
5054 gen_rtx_SET (target,
5055 gen_rtx_MINUS (mode, GEN_INT (val),
5056 source)));
5057 return 1;
5058 }
5059
5060 break;
5061
5062 default:
5063 gcc_unreachable ();
5064 }
5065
5066 /* If we can do it in one insn get out quickly. */
5067 if (const_ok_for_op (val, code))
5068 {
5069 if (generate)
5070 emit_constant_insn (cond,
5071 gen_rtx_SET (target,
5072 (source
5073 ? gen_rtx_fmt_ee (code, mode, source,
5074 GEN_INT (val))
5075 : GEN_INT (val))));
5076 return 1;
5077 }
5078
5079 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5080 insn. */
5081 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5082 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5083 {
5084 if (generate)
5085 {
5086 if (mode == SImode && i == 16)
5087 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5088 smaller insn. */
5089 emit_constant_insn (cond,
5090 gen_zero_extendhisi2
5091 (target, gen_lowpart (HImode, source)));
5092 else
5093 /* Extz only supports SImode, but we can coerce the operands
5094 into that mode. */
5095 emit_constant_insn (cond,
5096 gen_extzv_t2 (gen_lowpart (SImode, target),
5097 gen_lowpart (SImode, source),
5098 GEN_INT (i), const0_rtx));
5099 }
5100
5101 return 1;
5102 }
5103
5104 /* Calculate a few attributes that may be useful for specific
5105 optimizations. */
5106 /* Count number of leading zeros. */
5107 for (i = 31; i >= 0; i--)
5108 {
5109 if ((remainder & (1 << i)) == 0)
5110 clear_sign_bit_copies++;
5111 else
5112 break;
5113 }
5114
5115 /* Count number of leading 1's. */
5116 for (i = 31; i >= 0; i--)
5117 {
5118 if ((remainder & (1 << i)) != 0)
5119 set_sign_bit_copies++;
5120 else
5121 break;
5122 }
5123
5124 /* Count number of trailing zero's. */
5125 for (i = 0; i <= 31; i++)
5126 {
5127 if ((remainder & (1 << i)) == 0)
5128 clear_zero_bit_copies++;
5129 else
5130 break;
5131 }
5132
5133 /* Count number of trailing 1's. */
5134 for (i = 0; i <= 31; i++)
5135 {
5136 if ((remainder & (1 << i)) != 0)
5137 set_zero_bit_copies++;
5138 else
5139 break;
5140 }
5141
5142 switch (code)
5143 {
5144 case SET:
5145 /* See if we can do this by sign_extending a constant that is known
5146 to be negative. This is a good, way of doing it, since the shift
5147 may well merge into a subsequent insn. */
5148 if (set_sign_bit_copies > 1)
5149 {
5150 if (const_ok_for_arm
5151 (temp1 = ARM_SIGN_EXTEND (remainder
5152 << (set_sign_bit_copies - 1))))
5153 {
5154 if (generate)
5155 {
5156 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5157 emit_constant_insn (cond,
5158 gen_rtx_SET (new_src, GEN_INT (temp1)));
5159 emit_constant_insn (cond,
5160 gen_ashrsi3 (target, new_src,
5161 GEN_INT (set_sign_bit_copies - 1)));
5162 }
5163 return 2;
5164 }
5165 /* For an inverted constant, we will need to set the low bits,
5166 these will be shifted out of harm's way. */
5167 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5168 if (const_ok_for_arm (~temp1))
5169 {
5170 if (generate)
5171 {
5172 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5173 emit_constant_insn (cond,
5174 gen_rtx_SET (new_src, GEN_INT (temp1)));
5175 emit_constant_insn (cond,
5176 gen_ashrsi3 (target, new_src,
5177 GEN_INT (set_sign_bit_copies - 1)));
5178 }
5179 return 2;
5180 }
5181 }
5182
5183 /* See if we can calculate the value as the difference between two
5184 valid immediates. */
5185 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5186 {
5187 int topshift = clear_sign_bit_copies & ~1;
5188
5189 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5190 & (0xff000000 >> topshift));
5191
5192 /* If temp1 is zero, then that means the 9 most significant
5193 bits of remainder were 1 and we've caused it to overflow.
5194 When topshift is 0 we don't need to do anything since we
5195 can borrow from 'bit 32'. */
5196 if (temp1 == 0 && topshift != 0)
5197 temp1 = 0x80000000 >> (topshift - 1);
5198
5199 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5200
5201 if (const_ok_for_arm (temp2))
5202 {
5203 if (generate)
5204 {
5205 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5206 emit_constant_insn (cond,
5207 gen_rtx_SET (new_src, GEN_INT (temp1)));
5208 emit_constant_insn (cond,
5209 gen_addsi3 (target, new_src,
5210 GEN_INT (-temp2)));
5211 }
5212
5213 return 2;
5214 }
5215 }
5216
5217 /* See if we can generate this by setting the bottom (or the top)
5218 16 bits, and then shifting these into the other half of the
5219 word. We only look for the simplest cases, to do more would cost
5220 too much. Be careful, however, not to generate this when the
5221 alternative would take fewer insns. */
5222 if (val & 0xffff0000)
5223 {
5224 temp1 = remainder & 0xffff0000;
5225 temp2 = remainder & 0x0000ffff;
5226
5227 /* Overlaps outside this range are best done using other methods. */
5228 for (i = 9; i < 24; i++)
5229 {
5230 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5231 && !const_ok_for_arm (temp2))
5232 {
5233 rtx new_src = (subtargets
5234 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5235 : target);
5236 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5237 source, subtargets, generate);
5238 source = new_src;
5239 if (generate)
5240 emit_constant_insn
5241 (cond,
5242 gen_rtx_SET
5243 (target,
5244 gen_rtx_IOR (mode,
5245 gen_rtx_ASHIFT (mode, source,
5246 GEN_INT (i)),
5247 source)));
5248 return insns + 1;
5249 }
5250 }
5251
5252 /* Don't duplicate cases already considered. */
5253 for (i = 17; i < 24; i++)
5254 {
5255 if (((temp1 | (temp1 >> i)) == remainder)
5256 && !const_ok_for_arm (temp1))
5257 {
5258 rtx new_src = (subtargets
5259 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5260 : target);
5261 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5262 source, subtargets, generate);
5263 source = new_src;
5264 if (generate)
5265 emit_constant_insn
5266 (cond,
5267 gen_rtx_SET (target,
5268 gen_rtx_IOR
5269 (mode,
5270 gen_rtx_LSHIFTRT (mode, source,
5271 GEN_INT (i)),
5272 source)));
5273 return insns + 1;
5274 }
5275 }
5276 }
5277 break;
5278
5279 case IOR:
5280 case XOR:
5281 /* If we have IOR or XOR, and the constant can be loaded in a
5282 single instruction, and we can find a temporary to put it in,
5283 then this can be done in two instructions instead of 3-4. */
5284 if (subtargets
5285 /* TARGET can't be NULL if SUBTARGETS is 0 */
5286 || (reload_completed && !reg_mentioned_p (target, source)))
5287 {
5288 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5289 {
5290 if (generate)
5291 {
5292 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5293
5294 emit_constant_insn (cond,
5295 gen_rtx_SET (sub, GEN_INT (val)));
5296 emit_constant_insn (cond,
5297 gen_rtx_SET (target,
5298 gen_rtx_fmt_ee (code, mode,
5299 source, sub)));
5300 }
5301 return 2;
5302 }
5303 }
5304
5305 if (code == XOR)
5306 break;
5307
5308 /* Convert.
5309 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5310 and the remainder 0s for e.g. 0xfff00000)
5311 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5312
5313 This can be done in 2 instructions by using shifts with mov or mvn.
5314 e.g. for
5315 x = x | 0xfff00000;
5316 we generate.
5317 mvn r0, r0, asl #12
5318 mvn r0, r0, lsr #12 */
5319 if (set_sign_bit_copies > 8
5320 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5321 {
5322 if (generate)
5323 {
5324 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5325 rtx shift = GEN_INT (set_sign_bit_copies);
5326
5327 emit_constant_insn
5328 (cond,
5329 gen_rtx_SET (sub,
5330 gen_rtx_NOT (mode,
5331 gen_rtx_ASHIFT (mode,
5332 source,
5333 shift))));
5334 emit_constant_insn
5335 (cond,
5336 gen_rtx_SET (target,
5337 gen_rtx_NOT (mode,
5338 gen_rtx_LSHIFTRT (mode, sub,
5339 shift))));
5340 }
5341 return 2;
5342 }
5343
5344 /* Convert
5345 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5346 to
5347 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5348
5349 For eg. r0 = r0 | 0xfff
5350 mvn r0, r0, lsr #12
5351 mvn r0, r0, asl #12
5352
5353 */
5354 if (set_zero_bit_copies > 8
5355 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5356 {
5357 if (generate)
5358 {
5359 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5360 rtx shift = GEN_INT (set_zero_bit_copies);
5361
5362 emit_constant_insn
5363 (cond,
5364 gen_rtx_SET (sub,
5365 gen_rtx_NOT (mode,
5366 gen_rtx_LSHIFTRT (mode,
5367 source,
5368 shift))));
5369 emit_constant_insn
5370 (cond,
5371 gen_rtx_SET (target,
5372 gen_rtx_NOT (mode,
5373 gen_rtx_ASHIFT (mode, sub,
5374 shift))));
5375 }
5376 return 2;
5377 }
5378
5379 /* This will never be reached for Thumb2 because orn is a valid
5380 instruction. This is for Thumb1 and the ARM 32 bit cases.
5381
5382 x = y | constant (such that ~constant is a valid constant)
5383 Transform this to
5384 x = ~(~y & ~constant).
5385 */
5386 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5387 {
5388 if (generate)
5389 {
5390 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5391 emit_constant_insn (cond,
5392 gen_rtx_SET (sub,
5393 gen_rtx_NOT (mode, source)));
5394 source = sub;
5395 if (subtargets)
5396 sub = gen_reg_rtx (mode);
5397 emit_constant_insn (cond,
5398 gen_rtx_SET (sub,
5399 gen_rtx_AND (mode, source,
5400 GEN_INT (temp1))));
5401 emit_constant_insn (cond,
5402 gen_rtx_SET (target,
5403 gen_rtx_NOT (mode, sub)));
5404 }
5405 return 3;
5406 }
5407 break;
5408
5409 case AND:
5410 /* See if two shifts will do 2 or more insn's worth of work. */
5411 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5412 {
5413 HOST_WIDE_INT shift_mask = ((0xffffffff
5414 << (32 - clear_sign_bit_copies))
5415 & 0xffffffff);
5416
5417 if ((remainder | shift_mask) != 0xffffffff)
5418 {
5419 HOST_WIDE_INT new_val
5420 = ARM_SIGN_EXTEND (remainder | shift_mask);
5421
5422 if (generate)
5423 {
5424 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5425 insns = arm_gen_constant (AND, SImode, cond, new_val,
5426 new_src, source, subtargets, 1);
5427 source = new_src;
5428 }
5429 else
5430 {
5431 rtx targ = subtargets ? NULL_RTX : target;
5432 insns = arm_gen_constant (AND, mode, cond, new_val,
5433 targ, source, subtargets, 0);
5434 }
5435 }
5436
5437 if (generate)
5438 {
5439 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5440 rtx shift = GEN_INT (clear_sign_bit_copies);
5441
5442 emit_insn (gen_ashlsi3 (new_src, source, shift));
5443 emit_insn (gen_lshrsi3 (target, new_src, shift));
5444 }
5445
5446 return insns + 2;
5447 }
5448
5449 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5450 {
5451 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5452
5453 if ((remainder | shift_mask) != 0xffffffff)
5454 {
5455 HOST_WIDE_INT new_val
5456 = ARM_SIGN_EXTEND (remainder | shift_mask);
5457 if (generate)
5458 {
5459 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5460
5461 insns = arm_gen_constant (AND, mode, cond, new_val,
5462 new_src, source, subtargets, 1);
5463 source = new_src;
5464 }
5465 else
5466 {
5467 rtx targ = subtargets ? NULL_RTX : target;
5468
5469 insns = arm_gen_constant (AND, mode, cond, new_val,
5470 targ, source, subtargets, 0);
5471 }
5472 }
5473
5474 if (generate)
5475 {
5476 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5477 rtx shift = GEN_INT (clear_zero_bit_copies);
5478
5479 emit_insn (gen_lshrsi3 (new_src, source, shift));
5480 emit_insn (gen_ashlsi3 (target, new_src, shift));
5481 }
5482
5483 return insns + 2;
5484 }
5485
5486 break;
5487
5488 default:
5489 break;
5490 }
5491
5492 /* Calculate what the instruction sequences would be if we generated it
5493 normally, negated, or inverted. */
5494 if (code == AND)
5495 /* AND cannot be split into multiple insns, so invert and use BIC. */
5496 insns = 99;
5497 else
5498 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5499
5500 if (can_negate)
5501 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5502 &neg_immediates);
5503 else
5504 neg_insns = 99;
5505
5506 if (can_invert || final_invert)
5507 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5508 &inv_immediates);
5509 else
5510 inv_insns = 99;
5511
5512 immediates = &pos_immediates;
5513
5514 /* Is the negated immediate sequence more efficient? */
5515 if (neg_insns < insns && neg_insns <= inv_insns)
5516 {
5517 insns = neg_insns;
5518 immediates = &neg_immediates;
5519 }
5520 else
5521 can_negate = 0;
5522
5523 /* Is the inverted immediate sequence more efficient?
5524 We must allow for an extra NOT instruction for XOR operations, although
5525 there is some chance that the final 'mvn' will get optimized later. */
5526 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5527 {
5528 insns = inv_insns;
5529 immediates = &inv_immediates;
5530 }
5531 else
5532 {
5533 can_invert = 0;
5534 final_invert = 0;
5535 }
5536
5537 /* Now output the chosen sequence as instructions. */
5538 if (generate)
5539 {
5540 for (i = 0; i < insns; i++)
5541 {
5542 rtx new_src, temp1_rtx;
5543
5544 temp1 = immediates->i[i];
5545
5546 if (code == SET || code == MINUS)
5547 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5548 else if ((final_invert || i < (insns - 1)) && subtargets)
5549 new_src = gen_reg_rtx (mode);
5550 else
5551 new_src = target;
5552
5553 if (can_invert)
5554 temp1 = ~temp1;
5555 else if (can_negate)
5556 temp1 = -temp1;
5557
5558 temp1 = trunc_int_for_mode (temp1, mode);
5559 temp1_rtx = GEN_INT (temp1);
5560
5561 if (code == SET)
5562 ;
5563 else if (code == MINUS)
5564 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5565 else
5566 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5567
5568 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5569 source = new_src;
5570
5571 if (code == SET)
5572 {
5573 can_negate = can_invert;
5574 can_invert = 0;
5575 code = PLUS;
5576 }
5577 else if (code == MINUS)
5578 code = PLUS;
5579 }
5580 }
5581
5582 if (final_invert)
5583 {
5584 if (generate)
5585 emit_constant_insn (cond, gen_rtx_SET (target,
5586 gen_rtx_NOT (mode, source)));
5587 insns++;
5588 }
5589
5590 return insns;
5591 }
5592
5593 /* Return TRUE if op is a constant where both the low and top words are
5594 suitable for RSB/RSC instructions. This is never true for Thumb, since
5595 we do not have RSC in that case. */
5596 static bool
5597 arm_const_double_prefer_rsbs_rsc (rtx op)
5598 {
5599 /* Thumb lacks RSC, so we never prefer that sequence. */
5600 if (TARGET_THUMB || !CONST_INT_P (op))
5601 return false;
5602 HOST_WIDE_INT hi, lo;
5603 lo = UINTVAL (op) & 0xffffffffULL;
5604 hi = UINTVAL (op) >> 32;
5605 return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5606 }
5607
5608 /* Canonicalize a comparison so that we are more likely to recognize it.
5609 This can be done for a few constant compares, where we can make the
5610 immediate value easier to load. */
5611
5612 static void
5613 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5614 bool op0_preserve_value)
5615 {
5616 machine_mode mode;
5617 unsigned HOST_WIDE_INT i, maxval;
5618
5619 mode = GET_MODE (*op0);
5620 if (mode == VOIDmode)
5621 mode = GET_MODE (*op1);
5622
5623 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5624
5625 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5626 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5627 either reversed or (for constant OP1) adjusted to GE/LT.
5628 Similarly for GTU/LEU in Thumb mode. */
5629 if (mode == DImode)
5630 {
5631
5632 if (*code == GT || *code == LE
5633 || *code == GTU || *code == LEU)
5634 {
5635 /* Missing comparison. First try to use an available
5636 comparison. */
5637 if (CONST_INT_P (*op1))
5638 {
5639 i = INTVAL (*op1);
5640 switch (*code)
5641 {
5642 case GT:
5643 case LE:
5644 if (i != maxval)
5645 {
5646 /* Try to convert to GE/LT, unless that would be more
5647 expensive. */
5648 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5649 && arm_const_double_prefer_rsbs_rsc (*op1))
5650 return;
5651 *op1 = GEN_INT (i + 1);
5652 *code = *code == GT ? GE : LT;
5653 }
5654 else
5655 {
5656 /* GT maxval is always false, LE maxval is always true.
5657 We can't fold that away here as we must make a
5658 comparison, but we can fold them to comparisons
5659 with the same result that can be handled:
5660 op0 GT maxval -> op0 LT minval
5661 op0 LE maxval -> op0 GE minval
5662 where minval = (-maxval - 1). */
5663 *op1 = GEN_INT (-maxval - 1);
5664 *code = *code == GT ? LT : GE;
5665 }
5666 return;
5667
5668 case GTU:
5669 case LEU:
5670 if (i != ~((unsigned HOST_WIDE_INT) 0))
5671 {
5672 /* Try to convert to GEU/LTU, unless that would
5673 be more expensive. */
5674 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5675 && arm_const_double_prefer_rsbs_rsc (*op1))
5676 return;
5677 *op1 = GEN_INT (i + 1);
5678 *code = *code == GTU ? GEU : LTU;
5679 }
5680 else
5681 {
5682 /* GTU ~0 is always false, LEU ~0 is always true.
5683 We can't fold that away here as we must make a
5684 comparison, but we can fold them to comparisons
5685 with the same result that can be handled:
5686 op0 GTU ~0 -> op0 LTU 0
5687 op0 LEU ~0 -> op0 GEU 0. */
5688 *op1 = const0_rtx;
5689 *code = *code == GTU ? LTU : GEU;
5690 }
5691 return;
5692
5693 default:
5694 gcc_unreachable ();
5695 }
5696 }
5697
5698 if (!op0_preserve_value)
5699 {
5700 std::swap (*op0, *op1);
5701 *code = (int)swap_condition ((enum rtx_code)*code);
5702 }
5703 }
5704 return;
5705 }
5706
5707 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5708 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5709 to facilitate possible combining with a cmp into 'ands'. */
5710 if (mode == SImode
5711 && GET_CODE (*op0) == ZERO_EXTEND
5712 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5713 && GET_MODE (XEXP (*op0, 0)) == QImode
5714 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5715 && subreg_lowpart_p (XEXP (*op0, 0))
5716 && *op1 == const0_rtx)
5717 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5718 GEN_INT (255));
5719
5720 /* Comparisons smaller than DImode. Only adjust comparisons against
5721 an out-of-range constant. */
5722 if (!CONST_INT_P (*op1)
5723 || const_ok_for_arm (INTVAL (*op1))
5724 || const_ok_for_arm (- INTVAL (*op1)))
5725 return;
5726
5727 i = INTVAL (*op1);
5728
5729 switch (*code)
5730 {
5731 case EQ:
5732 case NE:
5733 return;
5734
5735 case GT:
5736 case LE:
5737 if (i != maxval
5738 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5739 {
5740 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5741 *code = *code == GT ? GE : LT;
5742 return;
5743 }
5744 break;
5745
5746 case GE:
5747 case LT:
5748 if (i != ~maxval
5749 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5750 {
5751 *op1 = GEN_INT (i - 1);
5752 *code = *code == GE ? GT : LE;
5753 return;
5754 }
5755 break;
5756
5757 case GTU:
5758 case LEU:
5759 if (i != ~((unsigned HOST_WIDE_INT) 0)
5760 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5761 {
5762 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5763 *code = *code == GTU ? GEU : LTU;
5764 return;
5765 }
5766 break;
5767
5768 case GEU:
5769 case LTU:
5770 if (i != 0
5771 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5772 {
5773 *op1 = GEN_INT (i - 1);
5774 *code = *code == GEU ? GTU : LEU;
5775 return;
5776 }
5777 break;
5778
5779 default:
5780 gcc_unreachable ();
5781 }
5782 }
5783
5784
5785 /* Define how to find the value returned by a function. */
5786
5787 static rtx
5788 arm_function_value(const_tree type, const_tree func,
5789 bool outgoing ATTRIBUTE_UNUSED)
5790 {
5791 machine_mode mode;
5792 int unsignedp ATTRIBUTE_UNUSED;
5793 rtx r ATTRIBUTE_UNUSED;
5794
5795 mode = TYPE_MODE (type);
5796
5797 if (TARGET_AAPCS_BASED)
5798 return aapcs_allocate_return_reg (mode, type, func);
5799
5800 /* Promote integer types. */
5801 if (INTEGRAL_TYPE_P (type))
5802 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5803
5804 /* Promotes small structs returned in a register to full-word size
5805 for big-endian AAPCS. */
5806 if (arm_return_in_msb (type))
5807 {
5808 HOST_WIDE_INT size = int_size_in_bytes (type);
5809 if (size % UNITS_PER_WORD != 0)
5810 {
5811 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5812 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5813 }
5814 }
5815
5816 return arm_libcall_value_1 (mode);
5817 }
5818
5819 /* libcall hashtable helpers. */
5820
5821 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5822 {
5823 static inline hashval_t hash (const rtx_def *);
5824 static inline bool equal (const rtx_def *, const rtx_def *);
5825 static inline void remove (rtx_def *);
5826 };
5827
5828 inline bool
5829 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5830 {
5831 return rtx_equal_p (p1, p2);
5832 }
5833
5834 inline hashval_t
5835 libcall_hasher::hash (const rtx_def *p1)
5836 {
5837 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5838 }
5839
5840 typedef hash_table<libcall_hasher> libcall_table_type;
5841
5842 static void
5843 add_libcall (libcall_table_type *htab, rtx libcall)
5844 {
5845 *htab->find_slot (libcall, INSERT) = libcall;
5846 }
5847
5848 static bool
5849 arm_libcall_uses_aapcs_base (const_rtx libcall)
5850 {
5851 static bool init_done = false;
5852 static libcall_table_type *libcall_htab = NULL;
5853
5854 if (!init_done)
5855 {
5856 init_done = true;
5857
5858 libcall_htab = new libcall_table_type (31);
5859 add_libcall (libcall_htab,
5860 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5861 add_libcall (libcall_htab,
5862 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5863 add_libcall (libcall_htab,
5864 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5865 add_libcall (libcall_htab,
5866 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5867
5868 add_libcall (libcall_htab,
5869 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5870 add_libcall (libcall_htab,
5871 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5872 add_libcall (libcall_htab,
5873 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5874 add_libcall (libcall_htab,
5875 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5876
5877 add_libcall (libcall_htab,
5878 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5879 add_libcall (libcall_htab,
5880 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5881 add_libcall (libcall_htab,
5882 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5883 add_libcall (libcall_htab,
5884 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5885 add_libcall (libcall_htab,
5886 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5887 add_libcall (libcall_htab,
5888 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5889 add_libcall (libcall_htab,
5890 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5891 add_libcall (libcall_htab,
5892 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5893 add_libcall (libcall_htab,
5894 convert_optab_libfunc (sfix_optab, SImode, SFmode));
5895 add_libcall (libcall_htab,
5896 convert_optab_libfunc (ufix_optab, SImode, SFmode));
5897
5898 /* Values from double-precision helper functions are returned in core
5899 registers if the selected core only supports single-precision
5900 arithmetic, even if we are using the hard-float ABI. The same is
5901 true for single-precision helpers except in case of MVE, because in
5902 MVE we will be using the hard-float ABI on a CPU which doesn't support
5903 single-precision operations in hardware. In MVE the following check
5904 enables use of emulation for the single-precision arithmetic
5905 operations. */
5906 if (TARGET_HAVE_MVE)
5907 {
5908 add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5909 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5910 add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5911 add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5912 add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5913 add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5914 add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5915 add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5916 add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5917 add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5918 add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5919 }
5920 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5921 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5922 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5923 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5924 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5925 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5926 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5927 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5928 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5929 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5930 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5931 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5932 SFmode));
5933 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5934 DFmode));
5935 add_libcall (libcall_htab,
5936 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5937 }
5938
5939 return libcall && libcall_htab->find (libcall) != NULL;
5940 }
5941
5942 static rtx
5943 arm_libcall_value_1 (machine_mode mode)
5944 {
5945 if (TARGET_AAPCS_BASED)
5946 return aapcs_libcall_value (mode);
5947 else if (TARGET_IWMMXT_ABI
5948 && arm_vector_mode_supported_p (mode))
5949 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5950 else
5951 return gen_rtx_REG (mode, ARG_REGISTER (1));
5952 }
5953
5954 /* Define how to find the value returned by a library function
5955 assuming the value has mode MODE. */
5956
5957 static rtx
5958 arm_libcall_value (machine_mode mode, const_rtx libcall)
5959 {
5960 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5961 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5962 {
5963 /* The following libcalls return their result in integer registers,
5964 even though they return a floating point value. */
5965 if (arm_libcall_uses_aapcs_base (libcall))
5966 return gen_rtx_REG (mode, ARG_REGISTER(1));
5967
5968 }
5969
5970 return arm_libcall_value_1 (mode);
5971 }
5972
5973 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5974
5975 static bool
5976 arm_function_value_regno_p (const unsigned int regno)
5977 {
5978 if (regno == ARG_REGISTER (1)
5979 || (TARGET_32BIT
5980 && TARGET_AAPCS_BASED
5981 && TARGET_HARD_FLOAT
5982 && regno == FIRST_VFP_REGNUM)
5983 || (TARGET_IWMMXT_ABI
5984 && regno == FIRST_IWMMXT_REGNUM))
5985 return true;
5986
5987 return false;
5988 }
5989
5990 /* Determine the amount of memory needed to store the possible return
5991 registers of an untyped call. */
5992 int
5993 arm_apply_result_size (void)
5994 {
5995 int size = 16;
5996
5997 if (TARGET_32BIT)
5998 {
5999 if (TARGET_HARD_FLOAT_ABI)
6000 size += 32;
6001 if (TARGET_IWMMXT_ABI)
6002 size += 8;
6003 }
6004
6005 return size;
6006 }
6007
6008 /* Decide whether TYPE should be returned in memory (true)
6009 or in a register (false). FNTYPE is the type of the function making
6010 the call. */
6011 static bool
6012 arm_return_in_memory (const_tree type, const_tree fntype)
6013 {
6014 HOST_WIDE_INT size;
6015
6016 size = int_size_in_bytes (type); /* Negative if not fixed size. */
6017
6018 if (TARGET_AAPCS_BASED)
6019 {
6020 /* Simple, non-aggregate types (ie not including vectors and
6021 complex) are always returned in a register (or registers).
6022 We don't care about which register here, so we can short-cut
6023 some of the detail. */
6024 if (!AGGREGATE_TYPE_P (type)
6025 && TREE_CODE (type) != VECTOR_TYPE
6026 && TREE_CODE (type) != COMPLEX_TYPE)
6027 return false;
6028
6029 /* Any return value that is no larger than one word can be
6030 returned in r0. */
6031 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
6032 return false;
6033
6034 /* Check any available co-processors to see if they accept the
6035 type as a register candidate (VFP, for example, can return
6036 some aggregates in consecutive registers). These aren't
6037 available if the call is variadic. */
6038 if (aapcs_select_return_coproc (type, fntype) >= 0)
6039 return false;
6040
6041 /* Vector values should be returned using ARM registers, not
6042 memory (unless they're over 16 bytes, which will break since
6043 we only have four call-clobbered registers to play with). */
6044 if (TREE_CODE (type) == VECTOR_TYPE)
6045 return (size < 0 || size > (4 * UNITS_PER_WORD));
6046
6047 /* The rest go in memory. */
6048 return true;
6049 }
6050
6051 if (TREE_CODE (type) == VECTOR_TYPE)
6052 return (size < 0 || size > (4 * UNITS_PER_WORD));
6053
6054 if (!AGGREGATE_TYPE_P (type) &&
6055 (TREE_CODE (type) != VECTOR_TYPE))
6056 /* All simple types are returned in registers. */
6057 return false;
6058
6059 if (arm_abi != ARM_ABI_APCS)
6060 {
6061 /* ATPCS and later return aggregate types in memory only if they are
6062 larger than a word (or are variable size). */
6063 return (size < 0 || size > UNITS_PER_WORD);
6064 }
6065
6066 /* For the arm-wince targets we choose to be compatible with Microsoft's
6067 ARM and Thumb compilers, which always return aggregates in memory. */
6068 #ifndef ARM_WINCE
6069 /* All structures/unions bigger than one word are returned in memory.
6070 Also catch the case where int_size_in_bytes returns -1. In this case
6071 the aggregate is either huge or of variable size, and in either case
6072 we will want to return it via memory and not in a register. */
6073 if (size < 0 || size > UNITS_PER_WORD)
6074 return true;
6075
6076 if (TREE_CODE (type) == RECORD_TYPE)
6077 {
6078 tree field;
6079
6080 /* For a struct the APCS says that we only return in a register
6081 if the type is 'integer like' and every addressable element
6082 has an offset of zero. For practical purposes this means
6083 that the structure can have at most one non bit-field element
6084 and that this element must be the first one in the structure. */
6085
6086 /* Find the first field, ignoring non FIELD_DECL things which will
6087 have been created by C++. */
6088 /* NOTE: This code is deprecated and has not been updated to handle
6089 DECL_FIELD_ABI_IGNORED. */
6090 for (field = TYPE_FIELDS (type);
6091 field && TREE_CODE (field) != FIELD_DECL;
6092 field = DECL_CHAIN (field))
6093 continue;
6094
6095 if (field == NULL)
6096 return false; /* An empty structure. Allowed by an extension to ANSI C. */
6097
6098 /* Check that the first field is valid for returning in a register. */
6099
6100 /* ... Floats are not allowed */
6101 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6102 return true;
6103
6104 /* ... Aggregates that are not themselves valid for returning in
6105 a register are not allowed. */
6106 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6107 return true;
6108
6109 /* Now check the remaining fields, if any. Only bitfields are allowed,
6110 since they are not addressable. */
6111 for (field = DECL_CHAIN (field);
6112 field;
6113 field = DECL_CHAIN (field))
6114 {
6115 if (TREE_CODE (field) != FIELD_DECL)
6116 continue;
6117
6118 if (!DECL_BIT_FIELD_TYPE (field))
6119 return true;
6120 }
6121
6122 return false;
6123 }
6124
6125 if (TREE_CODE (type) == UNION_TYPE)
6126 {
6127 tree field;
6128
6129 /* Unions can be returned in registers if every element is
6130 integral, or can be returned in an integer register. */
6131 for (field = TYPE_FIELDS (type);
6132 field;
6133 field = DECL_CHAIN (field))
6134 {
6135 if (TREE_CODE (field) != FIELD_DECL)
6136 continue;
6137
6138 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6139 return true;
6140
6141 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6142 return true;
6143 }
6144
6145 return false;
6146 }
6147 #endif /* not ARM_WINCE */
6148
6149 /* Return all other types in memory. */
6150 return true;
6151 }
6152
6153 const struct pcs_attribute_arg
6154 {
6155 const char *arg;
6156 enum arm_pcs value;
6157 } pcs_attribute_args[] =
6158 {
6159 {"aapcs", ARM_PCS_AAPCS},
6160 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6161 #if 0
6162 /* We could recognize these, but changes would be needed elsewhere
6163 * to implement them. */
6164 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6165 {"atpcs", ARM_PCS_ATPCS},
6166 {"apcs", ARM_PCS_APCS},
6167 #endif
6168 {NULL, ARM_PCS_UNKNOWN}
6169 };
6170
6171 static enum arm_pcs
6172 arm_pcs_from_attribute (tree attr)
6173 {
6174 const struct pcs_attribute_arg *ptr;
6175 const char *arg;
6176
6177 /* Get the value of the argument. */
6178 if (TREE_VALUE (attr) == NULL_TREE
6179 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6180 return ARM_PCS_UNKNOWN;
6181
6182 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6183
6184 /* Check it against the list of known arguments. */
6185 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6186 if (streq (arg, ptr->arg))
6187 return ptr->value;
6188
6189 /* An unrecognized interrupt type. */
6190 return ARM_PCS_UNKNOWN;
6191 }
6192
6193 /* Get the PCS variant to use for this call. TYPE is the function's type
6194 specification, DECL is the specific declartion. DECL may be null if
6195 the call could be indirect or if this is a library call. */
6196 static enum arm_pcs
6197 arm_get_pcs_model (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
6198 {
6199 bool user_convention = false;
6200 enum arm_pcs user_pcs = arm_pcs_default;
6201 tree attr;
6202
6203 gcc_assert (type);
6204
6205 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6206 if (attr)
6207 {
6208 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6209 user_convention = true;
6210 }
6211
6212 if (TARGET_AAPCS_BASED)
6213 {
6214 /* Detect varargs functions. These always use the base rules
6215 (no argument is ever a candidate for a co-processor
6216 register). */
6217 bool base_rules = stdarg_p (type);
6218
6219 if (user_convention)
6220 {
6221 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6222 sorry ("non-AAPCS derived PCS variant");
6223 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6224 error ("variadic functions must use the base AAPCS variant");
6225 }
6226
6227 if (base_rules)
6228 return ARM_PCS_AAPCS;
6229 else if (user_convention)
6230 return user_pcs;
6231 #if 0
6232 /* Unfortunately, this is not safe and can lead to wrong code
6233 being generated (PR96882). Not all calls into the back-end
6234 pass the DECL, so it is unsafe to make any PCS-changing
6235 decisions based on it. In particular the RETURN_IN_MEMORY
6236 hook is only ever passed a TYPE. This needs revisiting to
6237 see if there are any partial improvements that can be
6238 re-enabled. */
6239 else if (decl && flag_unit_at_a_time)
6240 {
6241 /* Local functions never leak outside this compilation unit,
6242 so we are free to use whatever conventions are
6243 appropriate. */
6244 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6245 cgraph_node *local_info_node
6246 = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6247 if (local_info_node && local_info_node->local)
6248 return ARM_PCS_AAPCS_LOCAL;
6249 }
6250 #endif
6251 }
6252 else if (user_convention && user_pcs != arm_pcs_default)
6253 sorry ("PCS variant");
6254
6255 /* For everything else we use the target's default. */
6256 return arm_pcs_default;
6257 }
6258
6259
6260 static void
6261 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6262 const_tree fntype ATTRIBUTE_UNUSED,
6263 rtx libcall ATTRIBUTE_UNUSED,
6264 const_tree fndecl ATTRIBUTE_UNUSED)
6265 {
6266 /* Record the unallocated VFP registers. */
6267 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6268 pcum->aapcs_vfp_reg_alloc = 0;
6269 }
6270
6271 /* Bitmasks that indicate whether earlier versions of GCC would have
6272 taken a different path through the ABI logic. This should result in
6273 a -Wpsabi warning if the earlier path led to a different ABI decision.
6274
6275 WARN_PSABI_EMPTY_CXX17_BASE
6276 Indicates that the type includes an artificial empty C++17 base field
6277 that, prior to GCC 10.1, would prevent the type from being treated as
6278 a HFA or HVA. See PR94711 for details.
6279
6280 WARN_PSABI_NO_UNIQUE_ADDRESS
6281 Indicates that the type includes an empty [[no_unique_address]] field
6282 that, prior to GCC 10.1, would prevent the type from being treated as
6283 a HFA or HVA. */
6284 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6285 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6286 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD = 1U << 2;
6287
6288 /* Walk down the type tree of TYPE counting consecutive base elements.
6289 If *MODEP is VOIDmode, then set it to the first valid floating point
6290 type. If a non-floating point type is found, or if a floating point
6291 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6292 otherwise return the count in the sub-tree.
6293
6294 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6295 function has changed its behavior relative to earlier versions of GCC.
6296 Normally the argument should be nonnull and point to a zero-initialized
6297 variable. The function then records whether the ABI decision might
6298 be affected by a known fix to the ABI logic, setting the associated
6299 WARN_PSABI_* bits if so.
6300
6301 When the argument is instead a null pointer, the function tries to
6302 simulate the behavior of GCC before all such ABI fixes were made.
6303 This is useful to check whether the function returns something
6304 different after the ABI fixes. */
6305 static int
6306 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6307 unsigned int *warn_psabi_flags)
6308 {
6309 machine_mode mode;
6310 HOST_WIDE_INT size;
6311
6312 switch (TREE_CODE (type))
6313 {
6314 case REAL_TYPE:
6315 mode = TYPE_MODE (type);
6316 if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6317 return -1;
6318
6319 if (*modep == VOIDmode)
6320 *modep = mode;
6321
6322 if (*modep == mode)
6323 return 1;
6324
6325 break;
6326
6327 case COMPLEX_TYPE:
6328 mode = TYPE_MODE (TREE_TYPE (type));
6329 if (mode != DFmode && mode != SFmode)
6330 return -1;
6331
6332 if (*modep == VOIDmode)
6333 *modep = mode;
6334
6335 if (*modep == mode)
6336 return 2;
6337
6338 break;
6339
6340 case VECTOR_TYPE:
6341 /* Use V2SImode and V4SImode as representatives of all 64-bit
6342 and 128-bit vector types, whether or not those modes are
6343 supported with the present options. */
6344 size = int_size_in_bytes (type);
6345 switch (size)
6346 {
6347 case 8:
6348 mode = V2SImode;
6349 break;
6350 case 16:
6351 mode = V4SImode;
6352 break;
6353 default:
6354 return -1;
6355 }
6356
6357 if (*modep == VOIDmode)
6358 *modep = mode;
6359
6360 /* Vector modes are considered to be opaque: two vectors are
6361 equivalent for the purposes of being homogeneous aggregates
6362 if they are the same size. */
6363 if (*modep == mode)
6364 return 1;
6365
6366 break;
6367
6368 case ARRAY_TYPE:
6369 {
6370 int count;
6371 tree index = TYPE_DOMAIN (type);
6372
6373 /* Can't handle incomplete types nor sizes that are not
6374 fixed. */
6375 if (!COMPLETE_TYPE_P (type)
6376 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6377 return -1;
6378
6379 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6380 warn_psabi_flags);
6381 if (count == -1
6382 || !index
6383 || !TYPE_MAX_VALUE (index)
6384 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6385 || !TYPE_MIN_VALUE (index)
6386 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6387 || count < 0)
6388 return -1;
6389
6390 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6391 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6392
6393 /* There must be no padding. */
6394 if (wi::to_wide (TYPE_SIZE (type))
6395 != count * GET_MODE_BITSIZE (*modep))
6396 return -1;
6397
6398 return count;
6399 }
6400
6401 case RECORD_TYPE:
6402 {
6403 int count = 0;
6404 int sub_count;
6405 tree field;
6406
6407 /* Can't handle incomplete types nor sizes that are not
6408 fixed. */
6409 if (!COMPLETE_TYPE_P (type)
6410 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6411 return -1;
6412
6413 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6414 {
6415 if (TREE_CODE (field) != FIELD_DECL)
6416 continue;
6417
6418 if (DECL_FIELD_ABI_IGNORED (field))
6419 {
6420 /* See whether this is something that earlier versions of
6421 GCC failed to ignore. */
6422 unsigned int flag;
6423 if (lookup_attribute ("no_unique_address",
6424 DECL_ATTRIBUTES (field)))
6425 flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6426 else if (cxx17_empty_base_field_p (field))
6427 flag = WARN_PSABI_EMPTY_CXX17_BASE;
6428 else
6429 /* No compatibility problem. */
6430 continue;
6431
6432 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6433 if (warn_psabi_flags)
6434 {
6435 *warn_psabi_flags |= flag;
6436 continue;
6437 }
6438 }
6439 /* A zero-width bitfield may affect layout in some
6440 circumstances, but adds no members. The determination
6441 of whether or not a type is an HFA is performed after
6442 layout is complete, so if the type still looks like an
6443 HFA afterwards, it is still classed as one. This is
6444 potentially an ABI break for the hard-float ABI. */
6445 else if (DECL_BIT_FIELD (field)
6446 && integer_zerop (DECL_SIZE (field)))
6447 {
6448 /* Prior to GCC-12 these fields were striped early,
6449 hiding them from the back-end entirely and
6450 resulting in the correct behaviour for argument
6451 passing. Simulate that old behaviour without
6452 generating a warning. */
6453 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
6454 continue;
6455 if (warn_psabi_flags)
6456 {
6457 *warn_psabi_flags |= WARN_PSABI_ZERO_WIDTH_BITFIELD;
6458 continue;
6459 }
6460 }
6461
6462 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6463 warn_psabi_flags);
6464 if (sub_count < 0)
6465 return -1;
6466 count += sub_count;
6467 }
6468
6469 /* There must be no padding. */
6470 if (wi::to_wide (TYPE_SIZE (type))
6471 != count * GET_MODE_BITSIZE (*modep))
6472 return -1;
6473
6474 return count;
6475 }
6476
6477 case UNION_TYPE:
6478 case QUAL_UNION_TYPE:
6479 {
6480 /* These aren't very interesting except in a degenerate case. */
6481 int count = 0;
6482 int sub_count;
6483 tree field;
6484
6485 /* Can't handle incomplete types nor sizes that are not
6486 fixed. */
6487 if (!COMPLETE_TYPE_P (type)
6488 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6489 return -1;
6490
6491 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6492 {
6493 if (TREE_CODE (field) != FIELD_DECL)
6494 continue;
6495
6496 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6497 warn_psabi_flags);
6498 if (sub_count < 0)
6499 return -1;
6500 count = count > sub_count ? count : sub_count;
6501 }
6502
6503 /* There must be no padding. */
6504 if (wi::to_wide (TYPE_SIZE (type))
6505 != count * GET_MODE_BITSIZE (*modep))
6506 return -1;
6507
6508 return count;
6509 }
6510
6511 default:
6512 break;
6513 }
6514
6515 return -1;
6516 }
6517
6518 /* Return true if PCS_VARIANT should use VFP registers. */
6519 static bool
6520 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6521 {
6522 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6523 {
6524 static bool seen_thumb1_vfp = false;
6525
6526 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6527 {
6528 sorry ("Thumb-1 %<hard-float%> VFP ABI");
6529 /* sorry() is not immediately fatal, so only display this once. */
6530 seen_thumb1_vfp = true;
6531 }
6532
6533 return true;
6534 }
6535
6536 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6537 return false;
6538
6539 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6540 (TARGET_VFP_DOUBLE || !is_double));
6541 }
6542
6543 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6544 suitable for passing or returning in VFP registers for the PCS
6545 variant selected. If it is, then *BASE_MODE is updated to contain
6546 a machine mode describing each element of the argument's type and
6547 *COUNT to hold the number of such elements. */
6548 static bool
6549 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6550 machine_mode mode, const_tree type,
6551 machine_mode *base_mode, int *count)
6552 {
6553 machine_mode new_mode = VOIDmode;
6554
6555 /* If we have the type information, prefer that to working things
6556 out from the mode. */
6557 if (type)
6558 {
6559 unsigned int warn_psabi_flags = 0;
6560 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6561 &warn_psabi_flags);
6562 if (ag_count > 0 && ag_count <= 4)
6563 {
6564 static unsigned last_reported_type_uid;
6565 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6566 int alt;
6567 if (warn_psabi
6568 && warn_psabi_flags
6569 && uid != last_reported_type_uid
6570 && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6571 != ag_count))
6572 {
6573 const char *url10
6574 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6575 const char *url12
6576 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
6577 gcc_assert (alt == -1);
6578 last_reported_type_uid = uid;
6579 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6580 qualification. */
6581 if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6582 inform (input_location, "parameter passing for argument of "
6583 "type %qT with %<[[no_unique_address]]%> members "
6584 "changed %{in GCC 10.1%}",
6585 TYPE_MAIN_VARIANT (type), url10);
6586 else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6587 inform (input_location, "parameter passing for argument of "
6588 "type %qT when C++17 is enabled changed to match "
6589 "C++14 %{in GCC 10.1%}",
6590 TYPE_MAIN_VARIANT (type), url10);
6591 else if (warn_psabi_flags & WARN_PSABI_ZERO_WIDTH_BITFIELD)
6592 inform (input_location, "parameter passing for argument of "
6593 "type %qT changed %{in GCC 12.1%}",
6594 TYPE_MAIN_VARIANT (type), url12);
6595 }
6596 *count = ag_count;
6597 }
6598 else
6599 return false;
6600 }
6601 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6602 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6603 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6604 {
6605 *count = 1;
6606 new_mode = mode;
6607 }
6608 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6609 {
6610 *count = 2;
6611 new_mode = (mode == DCmode ? DFmode : SFmode);
6612 }
6613 else
6614 return false;
6615
6616
6617 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6618 return false;
6619
6620 *base_mode = new_mode;
6621
6622 if (TARGET_GENERAL_REGS_ONLY)
6623 error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6624 type);
6625
6626 return true;
6627 }
6628
6629 static bool
6630 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6631 machine_mode mode, const_tree type)
6632 {
6633 int count ATTRIBUTE_UNUSED;
6634 machine_mode ag_mode ATTRIBUTE_UNUSED;
6635
6636 if (!use_vfp_abi (pcs_variant, false))
6637 return false;
6638 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6639 &ag_mode, &count);
6640 }
6641
6642 static bool
6643 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6644 const_tree type)
6645 {
6646 if (!use_vfp_abi (pcum->pcs_variant, false))
6647 return false;
6648
6649 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6650 &pcum->aapcs_vfp_rmode,
6651 &pcum->aapcs_vfp_rcount);
6652 }
6653
6654 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6655 for the behaviour of this function. */
6656
6657 static bool
6658 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6659 const_tree type ATTRIBUTE_UNUSED)
6660 {
6661 int rmode_size
6662 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6663 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6664 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6665 int regno;
6666
6667 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6668 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6669 {
6670 pcum->aapcs_vfp_reg_alloc = mask << regno;
6671 if (mode == BLKmode
6672 || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6673 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6674 {
6675 int i;
6676 int rcount = pcum->aapcs_vfp_rcount;
6677 int rshift = shift;
6678 machine_mode rmode = pcum->aapcs_vfp_rmode;
6679 rtx par;
6680 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6681 {
6682 /* Avoid using unsupported vector modes. */
6683 if (rmode == V2SImode)
6684 rmode = DImode;
6685 else if (rmode == V4SImode)
6686 {
6687 rmode = DImode;
6688 rcount *= 2;
6689 rshift /= 2;
6690 }
6691 }
6692 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6693 for (i = 0; i < rcount; i++)
6694 {
6695 rtx tmp = gen_rtx_REG (rmode,
6696 FIRST_VFP_REGNUM + regno + i * rshift);
6697 tmp = gen_rtx_EXPR_LIST
6698 (VOIDmode, tmp,
6699 GEN_INT (i * GET_MODE_SIZE (rmode)));
6700 XVECEXP (par, 0, i) = tmp;
6701 }
6702
6703 pcum->aapcs_reg = par;
6704 }
6705 else
6706 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6707 return true;
6708 }
6709 return false;
6710 }
6711
6712 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6713 comment there for the behaviour of this function. */
6714
6715 static rtx
6716 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6717 machine_mode mode,
6718 const_tree type ATTRIBUTE_UNUSED)
6719 {
6720 if (!use_vfp_abi (pcs_variant, false))
6721 return NULL;
6722
6723 if (mode == BLKmode
6724 || (GET_MODE_CLASS (mode) == MODE_INT
6725 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6726 && !(TARGET_NEON || TARGET_HAVE_MVE)))
6727 {
6728 int count;
6729 machine_mode ag_mode;
6730 int i;
6731 rtx par;
6732 int shift;
6733
6734 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6735 &ag_mode, &count);
6736
6737 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6738 {
6739 if (ag_mode == V2SImode)
6740 ag_mode = DImode;
6741 else if (ag_mode == V4SImode)
6742 {
6743 ag_mode = DImode;
6744 count *= 2;
6745 }
6746 }
6747 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6748 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6749 for (i = 0; i < count; i++)
6750 {
6751 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6752 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6753 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6754 XVECEXP (par, 0, i) = tmp;
6755 }
6756
6757 return par;
6758 }
6759
6760 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6761 }
6762
6763 static void
6764 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6765 machine_mode mode ATTRIBUTE_UNUSED,
6766 const_tree type ATTRIBUTE_UNUSED)
6767 {
6768 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6769 pcum->aapcs_vfp_reg_alloc = 0;
6770 return;
6771 }
6772
6773 #define AAPCS_CP(X) \
6774 { \
6775 aapcs_ ## X ## _cum_init, \
6776 aapcs_ ## X ## _is_call_candidate, \
6777 aapcs_ ## X ## _allocate, \
6778 aapcs_ ## X ## _is_return_candidate, \
6779 aapcs_ ## X ## _allocate_return_reg, \
6780 aapcs_ ## X ## _advance \
6781 }
6782
6783 /* Table of co-processors that can be used to pass arguments in
6784 registers. Idealy no arugment should be a candidate for more than
6785 one co-processor table entry, but the table is processed in order
6786 and stops after the first match. If that entry then fails to put
6787 the argument into a co-processor register, the argument will go on
6788 the stack. */
6789 static struct
6790 {
6791 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6792 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6793
6794 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6795 BLKmode) is a candidate for this co-processor's registers; this
6796 function should ignore any position-dependent state in
6797 CUMULATIVE_ARGS and only use call-type dependent information. */
6798 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6799
6800 /* Return true if the argument does get a co-processor register; it
6801 should set aapcs_reg to an RTX of the register allocated as is
6802 required for a return from FUNCTION_ARG. */
6803 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6804
6805 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6806 be returned in this co-processor's registers. */
6807 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6808
6809 /* Allocate and return an RTX element to hold the return type of a call. This
6810 routine must not fail and will only be called if is_return_candidate
6811 returned true with the same parameters. */
6812 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6813
6814 /* Finish processing this argument and prepare to start processing
6815 the next one. */
6816 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6817 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6818 {
6819 AAPCS_CP(vfp)
6820 };
6821
6822 #undef AAPCS_CP
6823
6824 static int
6825 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6826 const_tree type)
6827 {
6828 int i;
6829
6830 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6831 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6832 return i;
6833
6834 return -1;
6835 }
6836
6837 static int
6838 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6839 {
6840 /* We aren't passed a decl, so we can't check that a call is local.
6841 However, it isn't clear that that would be a win anyway, since it
6842 might limit some tail-calling opportunities. */
6843 enum arm_pcs pcs_variant;
6844
6845 if (fntype)
6846 {
6847 const_tree fndecl = NULL_TREE;
6848
6849 if (TREE_CODE (fntype) == FUNCTION_DECL)
6850 {
6851 fndecl = fntype;
6852 fntype = TREE_TYPE (fntype);
6853 }
6854
6855 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6856 }
6857 else
6858 pcs_variant = arm_pcs_default;
6859
6860 if (pcs_variant != ARM_PCS_AAPCS)
6861 {
6862 int i;
6863
6864 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6865 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6866 TYPE_MODE (type),
6867 type))
6868 return i;
6869 }
6870 return -1;
6871 }
6872
6873 static rtx
6874 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6875 const_tree fntype)
6876 {
6877 /* We aren't passed a decl, so we can't check that a call is local.
6878 However, it isn't clear that that would be a win anyway, since it
6879 might limit some tail-calling opportunities. */
6880 enum arm_pcs pcs_variant;
6881 int unsignedp ATTRIBUTE_UNUSED;
6882
6883 if (fntype)
6884 {
6885 const_tree fndecl = NULL_TREE;
6886
6887 if (TREE_CODE (fntype) == FUNCTION_DECL)
6888 {
6889 fndecl = fntype;
6890 fntype = TREE_TYPE (fntype);
6891 }
6892
6893 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6894 }
6895 else
6896 pcs_variant = arm_pcs_default;
6897
6898 /* Promote integer types. */
6899 if (type && INTEGRAL_TYPE_P (type))
6900 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6901
6902 if (pcs_variant != ARM_PCS_AAPCS)
6903 {
6904 int i;
6905
6906 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6907 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6908 type))
6909 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6910 mode, type);
6911 }
6912
6913 /* Promotes small structs returned in a register to full-word size
6914 for big-endian AAPCS. */
6915 if (type && arm_return_in_msb (type))
6916 {
6917 HOST_WIDE_INT size = int_size_in_bytes (type);
6918 if (size % UNITS_PER_WORD != 0)
6919 {
6920 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6921 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6922 }
6923 }
6924
6925 return gen_rtx_REG (mode, R0_REGNUM);
6926 }
6927
6928 static rtx
6929 aapcs_libcall_value (machine_mode mode)
6930 {
6931 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6932 && GET_MODE_SIZE (mode) <= 4)
6933 mode = SImode;
6934
6935 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6936 }
6937
6938 /* Lay out a function argument using the AAPCS rules. The rule
6939 numbers referred to here are those in the AAPCS. */
6940 static void
6941 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6942 const_tree type, bool named)
6943 {
6944 int nregs, nregs2;
6945 int ncrn;
6946
6947 /* We only need to do this once per argument. */
6948 if (pcum->aapcs_arg_processed)
6949 return;
6950
6951 pcum->aapcs_arg_processed = true;
6952
6953 /* Special case: if named is false then we are handling an incoming
6954 anonymous argument which is on the stack. */
6955 if (!named)
6956 return;
6957
6958 /* Is this a potential co-processor register candidate? */
6959 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6960 {
6961 int slot = aapcs_select_call_coproc (pcum, mode, type);
6962 pcum->aapcs_cprc_slot = slot;
6963
6964 /* We don't have to apply any of the rules from part B of the
6965 preparation phase, these are handled elsewhere in the
6966 compiler. */
6967
6968 if (slot >= 0)
6969 {
6970 /* A Co-processor register candidate goes either in its own
6971 class of registers or on the stack. */
6972 if (!pcum->aapcs_cprc_failed[slot])
6973 {
6974 /* C1.cp - Try to allocate the argument to co-processor
6975 registers. */
6976 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6977 return;
6978
6979 /* C2.cp - Put the argument on the stack and note that we
6980 can't assign any more candidates in this slot. We also
6981 need to note that we have allocated stack space, so that
6982 we won't later try to split a non-cprc candidate between
6983 core registers and the stack. */
6984 pcum->aapcs_cprc_failed[slot] = true;
6985 pcum->can_split = false;
6986 }
6987
6988 /* We didn't get a register, so this argument goes on the
6989 stack. */
6990 gcc_assert (pcum->can_split == false);
6991 return;
6992 }
6993 }
6994
6995 /* C3 - For double-word aligned arguments, round the NCRN up to the
6996 next even number. */
6997 ncrn = pcum->aapcs_ncrn;
6998 if (ncrn & 1)
6999 {
7000 int res = arm_needs_doubleword_align (mode, type);
7001 /* Only warn during RTL expansion of call stmts, otherwise we would
7002 warn e.g. during gimplification even on functions that will be
7003 always inlined, and we'd warn multiple times. Don't warn when
7004 called in expand_function_start either, as we warn instead in
7005 arm_function_arg_boundary in that case. */
7006 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
7007 inform (input_location, "parameter passing for argument of type "
7008 "%qT changed in GCC 7.1", type);
7009 else if (res > 0)
7010 ncrn++;
7011 }
7012
7013 nregs = ARM_NUM_REGS2(mode, type);
7014
7015 /* Sigh, this test should really assert that nregs > 0, but a GCC
7016 extension allows empty structs and then gives them empty size; it
7017 then allows such a structure to be passed by value. For some of
7018 the code below we have to pretend that such an argument has
7019 non-zero size so that we 'locate' it correctly either in
7020 registers or on the stack. */
7021 gcc_assert (nregs >= 0);
7022
7023 nregs2 = nregs ? nregs : 1;
7024
7025 /* C4 - Argument fits entirely in core registers. */
7026 if (ncrn + nregs2 <= NUM_ARG_REGS)
7027 {
7028 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7029 pcum->aapcs_next_ncrn = ncrn + nregs;
7030 return;
7031 }
7032
7033 /* C5 - Some core registers left and there are no arguments already
7034 on the stack: split this argument between the remaining core
7035 registers and the stack. */
7036 if (ncrn < NUM_ARG_REGS && pcum->can_split)
7037 {
7038 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7039 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7040 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
7041 return;
7042 }
7043
7044 /* C6 - NCRN is set to 4. */
7045 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7046
7047 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
7048 return;
7049 }
7050
7051 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7052 for a call to a function whose data type is FNTYPE.
7053 For a library call, FNTYPE is NULL. */
7054 void
7055 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
7056 rtx libname,
7057 tree fndecl ATTRIBUTE_UNUSED)
7058 {
7059 /* Long call handling. */
7060 if (fntype)
7061 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
7062 else
7063 pcum->pcs_variant = arm_pcs_default;
7064
7065 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7066 {
7067 if (arm_libcall_uses_aapcs_base (libname))
7068 pcum->pcs_variant = ARM_PCS_AAPCS;
7069
7070 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
7071 pcum->aapcs_reg = NULL_RTX;
7072 pcum->aapcs_partial = 0;
7073 pcum->aapcs_arg_processed = false;
7074 pcum->aapcs_cprc_slot = -1;
7075 pcum->can_split = true;
7076
7077 if (pcum->pcs_variant != ARM_PCS_AAPCS)
7078 {
7079 int i;
7080
7081 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7082 {
7083 pcum->aapcs_cprc_failed[i] = false;
7084 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
7085 }
7086 }
7087 return;
7088 }
7089
7090 /* Legacy ABIs */
7091
7092 /* On the ARM, the offset starts at 0. */
7093 pcum->nregs = 0;
7094 pcum->iwmmxt_nregs = 0;
7095 pcum->can_split = true;
7096
7097 /* Varargs vectors are treated the same as long long.
7098 named_count avoids having to change the way arm handles 'named' */
7099 pcum->named_count = 0;
7100 pcum->nargs = 0;
7101
7102 if (TARGET_REALLY_IWMMXT && fntype)
7103 {
7104 tree fn_arg;
7105
7106 for (fn_arg = TYPE_ARG_TYPES (fntype);
7107 fn_arg;
7108 fn_arg = TREE_CHAIN (fn_arg))
7109 pcum->named_count += 1;
7110
7111 if (! pcum->named_count)
7112 pcum->named_count = INT_MAX;
7113 }
7114 }
7115
7116 /* Return 2 if double word alignment is required for argument passing,
7117 but wasn't required before the fix for PR88469.
7118 Return 1 if double word alignment is required for argument passing.
7119 Return -1 if double word alignment used to be required for argument
7120 passing before PR77728 ABI fix, but is not required anymore.
7121 Return 0 if double word alignment is not required and wasn't requried
7122 before either. */
7123 static int
7124 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7125 {
7126 if (!type)
7127 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7128
7129 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
7130 if (!AGGREGATE_TYPE_P (type))
7131 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7132
7133 /* Array types: Use member alignment of element type. */
7134 if (TREE_CODE (type) == ARRAY_TYPE)
7135 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7136
7137 int ret = 0;
7138 int ret2 = 0;
7139 /* Record/aggregate types: Use greatest member alignment of any member.
7140
7141 Note that we explicitly consider zero-sized fields here, even though
7142 they don't map to AAPCS machine types. For example, in:
7143
7144 struct __attribute__((aligned(8))) empty {};
7145
7146 struct s {
7147 [[no_unique_address]] empty e;
7148 int x;
7149 };
7150
7151 "s" contains only one Fundamental Data Type (the int field)
7152 but gains 8-byte alignment and size thanks to "e". */
7153 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7154 if (DECL_ALIGN (field) > PARM_BOUNDARY)
7155 {
7156 if (TREE_CODE (field) == FIELD_DECL)
7157 return 1;
7158 else
7159 /* Before PR77728 fix, we were incorrectly considering also
7160 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7161 Make sure we can warn about that with -Wpsabi. */
7162 ret = -1;
7163 }
7164 else if (TREE_CODE (field) == FIELD_DECL
7165 && DECL_BIT_FIELD_TYPE (field)
7166 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7167 ret2 = 1;
7168
7169 if (ret2)
7170 return 2;
7171
7172 return ret;
7173 }
7174
7175
7176 /* Determine where to put an argument to a function.
7177 Value is zero to push the argument on the stack,
7178 or a hard register in which to store the argument.
7179
7180 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7181 the preceding args and about the function being called.
7182 ARG is a description of the argument.
7183
7184 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7185 other arguments are passed on the stack. If (NAMED == 0) (which happens
7186 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7187 defined), say it is passed in the stack (function_prologue will
7188 indeed make it pass in the stack if necessary). */
7189
7190 static rtx
7191 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7192 {
7193 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7194 int nregs;
7195
7196 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7197 a call insn (op3 of a call_value insn). */
7198 if (arg.end_marker_p ())
7199 return const0_rtx;
7200
7201 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7202 {
7203 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7204 return pcum->aapcs_reg;
7205 }
7206
7207 /* Varargs vectors are treated the same as long long.
7208 named_count avoids having to change the way arm handles 'named' */
7209 if (TARGET_IWMMXT_ABI
7210 && arm_vector_mode_supported_p (arg.mode)
7211 && pcum->named_count > pcum->nargs + 1)
7212 {
7213 if (pcum->iwmmxt_nregs <= 9)
7214 return gen_rtx_REG (arg.mode,
7215 pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7216 else
7217 {
7218 pcum->can_split = false;
7219 return NULL_RTX;
7220 }
7221 }
7222
7223 /* Put doubleword aligned quantities in even register pairs. */
7224 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7225 {
7226 int res = arm_needs_doubleword_align (arg.mode, arg.type);
7227 if (res < 0 && warn_psabi)
7228 inform (input_location, "parameter passing for argument of type "
7229 "%qT changed in GCC 7.1", arg.type);
7230 else if (res > 0)
7231 {
7232 pcum->nregs++;
7233 if (res > 1 && warn_psabi)
7234 inform (input_location, "parameter passing for argument of type "
7235 "%qT changed in GCC 9.1", arg.type);
7236 }
7237 }
7238
7239 /* Only allow splitting an arg between regs and memory if all preceding
7240 args were allocated to regs. For args passed by reference we only count
7241 the reference pointer. */
7242 if (pcum->can_split)
7243 nregs = 1;
7244 else
7245 nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7246
7247 if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7248 return NULL_RTX;
7249
7250 return gen_rtx_REG (arg.mode, pcum->nregs);
7251 }
7252
7253 static unsigned int
7254 arm_function_arg_boundary (machine_mode mode, const_tree type)
7255 {
7256 if (!ARM_DOUBLEWORD_ALIGN)
7257 return PARM_BOUNDARY;
7258
7259 int res = arm_needs_doubleword_align (mode, type);
7260 if (res < 0 && warn_psabi)
7261 inform (input_location, "parameter passing for argument of type %qT "
7262 "changed in GCC 7.1", type);
7263 if (res > 1 && warn_psabi)
7264 inform (input_location, "parameter passing for argument of type "
7265 "%qT changed in GCC 9.1", type);
7266
7267 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7268 }
7269
7270 static int
7271 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7272 {
7273 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7274 int nregs = pcum->nregs;
7275
7276 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7277 {
7278 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7279 return pcum->aapcs_partial;
7280 }
7281
7282 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7283 return 0;
7284
7285 if (NUM_ARG_REGS > nregs
7286 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7287 && pcum->can_split)
7288 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7289
7290 return 0;
7291 }
7292
7293 /* Update the data in PCUM to advance over argument ARG. */
7294
7295 static void
7296 arm_function_arg_advance (cumulative_args_t pcum_v,
7297 const function_arg_info &arg)
7298 {
7299 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7300
7301 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7302 {
7303 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7304
7305 if (pcum->aapcs_cprc_slot >= 0)
7306 {
7307 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7308 arg.type);
7309 pcum->aapcs_cprc_slot = -1;
7310 }
7311
7312 /* Generic stuff. */
7313 pcum->aapcs_arg_processed = false;
7314 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7315 pcum->aapcs_reg = NULL_RTX;
7316 pcum->aapcs_partial = 0;
7317 }
7318 else
7319 {
7320 pcum->nargs += 1;
7321 if (arm_vector_mode_supported_p (arg.mode)
7322 && pcum->named_count > pcum->nargs
7323 && TARGET_IWMMXT_ABI)
7324 pcum->iwmmxt_nregs += 1;
7325 else
7326 pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7327 }
7328 }
7329
7330 /* Variable sized types are passed by reference. This is a GCC
7331 extension to the ARM ABI. */
7332
7333 static bool
7334 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7335 {
7336 return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7337 }
7338 \f
7339 /* Encode the current state of the #pragma [no_]long_calls. */
7340 typedef enum
7341 {
7342 OFF, /* No #pragma [no_]long_calls is in effect. */
7343 LONG, /* #pragma long_calls is in effect. */
7344 SHORT /* #pragma no_long_calls is in effect. */
7345 } arm_pragma_enum;
7346
7347 static arm_pragma_enum arm_pragma_long_calls = OFF;
7348
7349 void
7350 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7351 {
7352 arm_pragma_long_calls = LONG;
7353 }
7354
7355 void
7356 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7357 {
7358 arm_pragma_long_calls = SHORT;
7359 }
7360
7361 void
7362 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7363 {
7364 arm_pragma_long_calls = OFF;
7365 }
7366 \f
7367 /* Handle an attribute requiring a FUNCTION_DECL;
7368 arguments as in struct attribute_spec.handler. */
7369 static tree
7370 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7371 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7372 {
7373 if (TREE_CODE (*node) != FUNCTION_DECL)
7374 {
7375 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7376 name);
7377 *no_add_attrs = true;
7378 }
7379
7380 return NULL_TREE;
7381 }
7382
7383 /* Handle an "interrupt" or "isr" attribute;
7384 arguments as in struct attribute_spec.handler. */
7385 static tree
7386 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7387 bool *no_add_attrs)
7388 {
7389 if (DECL_P (*node))
7390 {
7391 if (TREE_CODE (*node) != FUNCTION_DECL)
7392 {
7393 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7394 name);
7395 *no_add_attrs = true;
7396 }
7397 else if (TARGET_VFP_BASE)
7398 {
7399 warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7400 name);
7401 }
7402 /* FIXME: the argument if any is checked for type attributes;
7403 should it be checked for decl ones? */
7404 }
7405 else
7406 {
7407 if (TREE_CODE (*node) == FUNCTION_TYPE
7408 || TREE_CODE (*node) == METHOD_TYPE)
7409 {
7410 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7411 {
7412 warning (OPT_Wattributes, "%qE attribute ignored",
7413 name);
7414 *no_add_attrs = true;
7415 }
7416 }
7417 else if (TREE_CODE (*node) == POINTER_TYPE
7418 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
7419 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
7420 && arm_isr_value (args) != ARM_FT_UNKNOWN)
7421 {
7422 *node = build_variant_type_copy (*node);
7423 TREE_TYPE (*node) = build_type_attribute_variant
7424 (TREE_TYPE (*node),
7425 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7426 *no_add_attrs = true;
7427 }
7428 else
7429 {
7430 /* Possibly pass this attribute on from the type to a decl. */
7431 if (flags & ((int) ATTR_FLAG_DECL_NEXT
7432 | (int) ATTR_FLAG_FUNCTION_NEXT
7433 | (int) ATTR_FLAG_ARRAY_NEXT))
7434 {
7435 *no_add_attrs = true;
7436 return tree_cons (name, args, NULL_TREE);
7437 }
7438 else
7439 {
7440 warning (OPT_Wattributes, "%qE attribute ignored",
7441 name);
7442 }
7443 }
7444 }
7445
7446 return NULL_TREE;
7447 }
7448
7449 /* Handle a "pcs" attribute; arguments as in struct
7450 attribute_spec.handler. */
7451 static tree
7452 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7453 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7454 {
7455 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7456 {
7457 warning (OPT_Wattributes, "%qE attribute ignored", name);
7458 *no_add_attrs = true;
7459 }
7460 return NULL_TREE;
7461 }
7462
7463 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7464 /* Handle the "notshared" attribute. This attribute is another way of
7465 requesting hidden visibility. ARM's compiler supports
7466 "__declspec(notshared)"; we support the same thing via an
7467 attribute. */
7468
7469 static tree
7470 arm_handle_notshared_attribute (tree *node,
7471 tree name ATTRIBUTE_UNUSED,
7472 tree args ATTRIBUTE_UNUSED,
7473 int flags ATTRIBUTE_UNUSED,
7474 bool *no_add_attrs)
7475 {
7476 tree decl = TYPE_NAME (*node);
7477
7478 if (decl)
7479 {
7480 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7481 DECL_VISIBILITY_SPECIFIED (decl) = 1;
7482 *no_add_attrs = false;
7483 }
7484 return NULL_TREE;
7485 }
7486 #endif
7487
7488 /* This function returns true if a function with declaration FNDECL and type
7489 FNTYPE uses the stack to pass arguments or return variables and false
7490 otherwise. This is used for functions with the attributes
7491 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7492 diagnostic messages if the stack is used. NAME is the name of the attribute
7493 used. */
7494
7495 static bool
7496 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7497 {
7498 function_args_iterator args_iter;
7499 CUMULATIVE_ARGS args_so_far_v;
7500 cumulative_args_t args_so_far;
7501 bool first_param = true;
7502 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7503
7504 /* Error out if any argument is passed on the stack. */
7505 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7506 args_so_far = pack_cumulative_args (&args_so_far_v);
7507 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7508 {
7509 rtx arg_rtx;
7510
7511 prev_arg_type = arg_type;
7512 if (VOID_TYPE_P (arg_type))
7513 continue;
7514
7515 function_arg_info arg (arg_type, /*named=*/true);
7516 if (!first_param)
7517 /* ??? We should advance after processing the argument and pass
7518 the argument we're advancing past. */
7519 arm_function_arg_advance (args_so_far, arg);
7520 arg_rtx = arm_function_arg (args_so_far, arg);
7521 if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7522 {
7523 error ("%qE attribute not available to functions with arguments "
7524 "passed on the stack", name);
7525 return true;
7526 }
7527 first_param = false;
7528 }
7529
7530 /* Error out for variadic functions since we cannot control how many
7531 arguments will be passed and thus stack could be used. stdarg_p () is not
7532 used for the checking to avoid browsing arguments twice. */
7533 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7534 {
7535 error ("%qE attribute not available to functions with variable number "
7536 "of arguments", name);
7537 return true;
7538 }
7539
7540 /* Error out if return value is passed on the stack. */
7541 ret_type = TREE_TYPE (fntype);
7542 if (arm_return_in_memory (ret_type, fntype))
7543 {
7544 error ("%qE attribute not available to functions that return value on "
7545 "the stack", name);
7546 return true;
7547 }
7548 return false;
7549 }
7550
7551 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7552 function will check whether the attribute is allowed here and will add the
7553 attribute to the function declaration tree or otherwise issue a warning. */
7554
7555 static tree
7556 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7557 tree /* args */,
7558 int /* flags */,
7559 bool *no_add_attrs)
7560 {
7561 tree fndecl;
7562
7563 if (!use_cmse)
7564 {
7565 *no_add_attrs = true;
7566 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7567 "option", name);
7568 return NULL_TREE;
7569 }
7570
7571 /* Ignore attribute for function types. */
7572 if (TREE_CODE (*node) != FUNCTION_DECL)
7573 {
7574 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7575 name);
7576 *no_add_attrs = true;
7577 return NULL_TREE;
7578 }
7579
7580 fndecl = *node;
7581
7582 /* Warn for static linkage functions. */
7583 if (!TREE_PUBLIC (fndecl))
7584 {
7585 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7586 "with static linkage", name);
7587 *no_add_attrs = true;
7588 return NULL_TREE;
7589 }
7590
7591 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7592 TREE_TYPE (fndecl));
7593 return NULL_TREE;
7594 }
7595
7596
7597 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7598 function will check whether the attribute is allowed here and will add the
7599 attribute to the function type tree or otherwise issue a diagnostic. The
7600 reason we check this at declaration time is to only allow the use of the
7601 attribute with declarations of function pointers and not function
7602 declarations. This function checks NODE is of the expected type and issues
7603 diagnostics otherwise using NAME. If it is not of the expected type
7604 *NO_ADD_ATTRS will be set to true. */
7605
7606 static tree
7607 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7608 tree /* args */,
7609 int /* flags */,
7610 bool *no_add_attrs)
7611 {
7612 tree decl = NULL_TREE, fntype = NULL_TREE;
7613 tree type;
7614
7615 if (!use_cmse)
7616 {
7617 *no_add_attrs = true;
7618 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7619 "option", name);
7620 return NULL_TREE;
7621 }
7622
7623 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7624 {
7625 decl = *node;
7626 fntype = TREE_TYPE (decl);
7627 }
7628
7629 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7630 fntype = TREE_TYPE (fntype);
7631
7632 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7633 {
7634 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7635 "function pointer", name);
7636 *no_add_attrs = true;
7637 return NULL_TREE;
7638 }
7639
7640 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7641
7642 if (*no_add_attrs)
7643 return NULL_TREE;
7644
7645 /* Prevent trees being shared among function types with and without
7646 cmse_nonsecure_call attribute. */
7647 type = TREE_TYPE (decl);
7648
7649 type = build_distinct_type_copy (type);
7650 TREE_TYPE (decl) = type;
7651 fntype = type;
7652
7653 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7654 {
7655 type = fntype;
7656 fntype = TREE_TYPE (fntype);
7657 fntype = build_distinct_type_copy (fntype);
7658 TREE_TYPE (type) = fntype;
7659 }
7660
7661 /* Construct a type attribute and add it to the function type. */
7662 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7663 TYPE_ATTRIBUTES (fntype));
7664 TYPE_ATTRIBUTES (fntype) = attrs;
7665 return NULL_TREE;
7666 }
7667
7668 /* Return 0 if the attributes for two types are incompatible, 1 if they
7669 are compatible, and 2 if they are nearly compatible (which causes a
7670 warning to be generated). */
7671 static int
7672 arm_comp_type_attributes (const_tree type1, const_tree type2)
7673 {
7674 int l1, l2, s1, s2;
7675
7676 tree attrs1 = lookup_attribute ("Advanced SIMD type",
7677 TYPE_ATTRIBUTES (type1));
7678 tree attrs2 = lookup_attribute ("Advanced SIMD type",
7679 TYPE_ATTRIBUTES (type2));
7680 if (bool (attrs1) != bool (attrs2))
7681 return 0;
7682 if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7683 return 0;
7684
7685 /* Check for mismatch of non-default calling convention. */
7686 if (TREE_CODE (type1) != FUNCTION_TYPE)
7687 return 1;
7688
7689 /* Check for mismatched call attributes. */
7690 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7691 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7692 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7693 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7694
7695 /* Only bother to check if an attribute is defined. */
7696 if (l1 | l2 | s1 | s2)
7697 {
7698 /* If one type has an attribute, the other must have the same attribute. */
7699 if ((l1 != l2) || (s1 != s2))
7700 return 0;
7701
7702 /* Disallow mixed attributes. */
7703 if ((l1 & s2) || (l2 & s1))
7704 return 0;
7705 }
7706
7707 /* Check for mismatched ISR attribute. */
7708 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7709 if (! l1)
7710 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7711 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7712 if (! l2)
7713 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7714 if (l1 != l2)
7715 return 0;
7716
7717 l1 = lookup_attribute ("cmse_nonsecure_call",
7718 TYPE_ATTRIBUTES (type1)) != NULL;
7719 l2 = lookup_attribute ("cmse_nonsecure_call",
7720 TYPE_ATTRIBUTES (type2)) != NULL;
7721
7722 if (l1 != l2)
7723 return 0;
7724
7725 return 1;
7726 }
7727
7728 /* Assigns default attributes to newly defined type. This is used to
7729 set short_call/long_call attributes for function types of
7730 functions defined inside corresponding #pragma scopes. */
7731 static void
7732 arm_set_default_type_attributes (tree type)
7733 {
7734 /* Add __attribute__ ((long_call)) to all functions, when
7735 inside #pragma long_calls or __attribute__ ((short_call)),
7736 when inside #pragma no_long_calls. */
7737 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7738 {
7739 tree type_attr_list, attr_name;
7740 type_attr_list = TYPE_ATTRIBUTES (type);
7741
7742 if (arm_pragma_long_calls == LONG)
7743 attr_name = get_identifier ("long_call");
7744 else if (arm_pragma_long_calls == SHORT)
7745 attr_name = get_identifier ("short_call");
7746 else
7747 return;
7748
7749 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7750 TYPE_ATTRIBUTES (type) = type_attr_list;
7751 }
7752 }
7753 \f
7754 /* Return true if DECL is known to be linked into section SECTION. */
7755
7756 static bool
7757 arm_function_in_section_p (tree decl, section *section)
7758 {
7759 /* We can only be certain about the prevailing symbol definition. */
7760 if (!decl_binds_to_current_def_p (decl))
7761 return false;
7762
7763 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7764 if (!DECL_SECTION_NAME (decl))
7765 {
7766 /* Make sure that we will not create a unique section for DECL. */
7767 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7768 return false;
7769 }
7770
7771 return function_section (decl) == section;
7772 }
7773
7774 /* Return nonzero if a 32-bit "long_call" should be generated for
7775 a call from the current function to DECL. We generate a long_call
7776 if the function:
7777
7778 a. has an __attribute__((long call))
7779 or b. is within the scope of a #pragma long_calls
7780 or c. the -mlong-calls command line switch has been specified
7781
7782 However we do not generate a long call if the function:
7783
7784 d. has an __attribute__ ((short_call))
7785 or e. is inside the scope of a #pragma no_long_calls
7786 or f. is defined in the same section as the current function. */
7787
7788 bool
7789 arm_is_long_call_p (tree decl)
7790 {
7791 tree attrs;
7792
7793 if (!decl)
7794 return TARGET_LONG_CALLS;
7795
7796 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7797 if (lookup_attribute ("short_call", attrs))
7798 return false;
7799
7800 /* For "f", be conservative, and only cater for cases in which the
7801 whole of the current function is placed in the same section. */
7802 if (!flag_reorder_blocks_and_partition
7803 && TREE_CODE (decl) == FUNCTION_DECL
7804 && arm_function_in_section_p (decl, current_function_section ()))
7805 return false;
7806
7807 if (lookup_attribute ("long_call", attrs))
7808 return true;
7809
7810 return TARGET_LONG_CALLS;
7811 }
7812
7813 /* Return nonzero if it is ok to make a tail-call to DECL. */
7814 static bool
7815 arm_function_ok_for_sibcall (tree decl, tree exp)
7816 {
7817 unsigned long func_type;
7818
7819 if (cfun->machine->sibcall_blocked)
7820 return false;
7821
7822 if (TARGET_FDPIC)
7823 {
7824 /* In FDPIC, never tailcall something for which we have no decl:
7825 the target function could be in a different module, requiring
7826 a different FDPIC register value. */
7827 if (decl == NULL)
7828 return false;
7829 }
7830
7831 /* Never tailcall something if we are generating code for Thumb-1. */
7832 if (TARGET_THUMB1)
7833 return false;
7834
7835 /* The PIC register is live on entry to VxWorks PLT entries, so we
7836 must make the call before restoring the PIC register. */
7837 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7838 return false;
7839
7840 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7841 may be used both as target of the call and base register for restoring
7842 the VFP registers */
7843 if (TARGET_APCS_FRAME && TARGET_ARM
7844 && TARGET_HARD_FLOAT
7845 && decl && arm_is_long_call_p (decl))
7846 return false;
7847
7848 /* If we are interworking and the function is not declared static
7849 then we can't tail-call it unless we know that it exists in this
7850 compilation unit (since it might be a Thumb routine). */
7851 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7852 && !TREE_ASM_WRITTEN (decl))
7853 return false;
7854
7855 func_type = arm_current_func_type ();
7856 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7857 if (IS_INTERRUPT (func_type))
7858 return false;
7859
7860 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7861 generated for entry functions themselves. */
7862 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7863 return false;
7864
7865 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7866 this would complicate matters for later code generation. */
7867 if (TREE_CODE (exp) == CALL_EXPR)
7868 {
7869 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7870 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7871 return false;
7872 }
7873
7874 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7875 {
7876 /* Check that the return value locations are the same. For
7877 example that we aren't returning a value from the sibling in
7878 a VFP register but then need to transfer it to a core
7879 register. */
7880 rtx a, b;
7881 tree decl_or_type = decl;
7882
7883 /* If it is an indirect function pointer, get the function type. */
7884 if (!decl)
7885 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7886
7887 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7888 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7889 cfun->decl, false);
7890 if (!rtx_equal_p (a, b))
7891 return false;
7892 }
7893
7894 /* Never tailcall if function may be called with a misaligned SP. */
7895 if (IS_STACKALIGN (func_type))
7896 return false;
7897
7898 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7899 references should become a NOP. Don't convert such calls into
7900 sibling calls. */
7901 if (TARGET_AAPCS_BASED
7902 && arm_abi == ARM_ABI_AAPCS
7903 && decl
7904 && DECL_WEAK (decl))
7905 return false;
7906
7907 /* We cannot do a tailcall for an indirect call by descriptor if all the
7908 argument registers are used because the only register left to load the
7909 address is IP and it will already contain the static chain. */
7910 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7911 {
7912 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7913 CUMULATIVE_ARGS cum;
7914 cumulative_args_t cum_v;
7915
7916 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7917 cum_v = pack_cumulative_args (&cum);
7918
7919 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7920 {
7921 tree type = TREE_VALUE (t);
7922 if (!VOID_TYPE_P (type))
7923 {
7924 function_arg_info arg (type, /*named=*/true);
7925 arm_function_arg_advance (cum_v, arg);
7926 }
7927 }
7928
7929 function_arg_info arg (integer_type_node, /*named=*/true);
7930 if (!arm_function_arg (cum_v, arg))
7931 return false;
7932 }
7933
7934 /* Everything else is ok. */
7935 return true;
7936 }
7937
7938 \f
7939 /* Addressing mode support functions. */
7940
7941 /* Return nonzero if X is a legitimate immediate operand when compiling
7942 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7943 int
7944 legitimate_pic_operand_p (rtx x)
7945 {
7946 if (SYMBOL_REF_P (x)
7947 || (GET_CODE (x) == CONST
7948 && GET_CODE (XEXP (x, 0)) == PLUS
7949 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7950 return 0;
7951
7952 return 1;
7953 }
7954
7955 /* Record that the current function needs a PIC register. If PIC_REG is null,
7956 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
7957 both case cfun->machine->pic_reg is initialized if we have not already done
7958 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
7959 PIC register is reloaded in the current position of the instruction stream
7960 irregardless of whether it was loaded before. Otherwise, it is only loaded
7961 if not already done so (crtl->uses_pic_offset_table is null). Note that
7962 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7963 is only supported iff COMPUTE_NOW is false. */
7964
7965 static void
7966 require_pic_register (rtx pic_reg, bool compute_now)
7967 {
7968 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7969
7970 /* A lot of the logic here is made obscure by the fact that this
7971 routine gets called as part of the rtx cost estimation process.
7972 We don't want those calls to affect any assumptions about the real
7973 function; and further, we can't call entry_of_function() until we
7974 start the real expansion process. */
7975 if (!crtl->uses_pic_offset_table || compute_now)
7976 {
7977 gcc_assert (can_create_pseudo_p ()
7978 || (pic_reg != NULL_RTX
7979 && REG_P (pic_reg)
7980 && GET_MODE (pic_reg) == Pmode));
7981 if (arm_pic_register != INVALID_REGNUM
7982 && !compute_now
7983 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7984 {
7985 if (!cfun->machine->pic_reg)
7986 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7987
7988 /* Play games to avoid marking the function as needing pic
7989 if we are being called as part of the cost-estimation
7990 process. */
7991 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7992 crtl->uses_pic_offset_table = 1;
7993 }
7994 else
7995 {
7996 rtx_insn *seq, *insn;
7997
7998 if (pic_reg == NULL_RTX)
7999 pic_reg = gen_reg_rtx (Pmode);
8000 if (!cfun->machine->pic_reg)
8001 cfun->machine->pic_reg = pic_reg;
8002
8003 /* Play games to avoid marking the function as needing pic
8004 if we are being called as part of the cost-estimation
8005 process. */
8006 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8007 {
8008 crtl->uses_pic_offset_table = 1;
8009 start_sequence ();
8010
8011 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
8012 && arm_pic_register > LAST_LO_REGNUM
8013 && !compute_now)
8014 emit_move_insn (cfun->machine->pic_reg,
8015 gen_rtx_REG (Pmode, arm_pic_register));
8016 else
8017 arm_load_pic_register (0UL, pic_reg);
8018
8019 seq = get_insns ();
8020 end_sequence ();
8021
8022 for (insn = seq; insn; insn = NEXT_INSN (insn))
8023 if (INSN_P (insn))
8024 INSN_LOCATION (insn) = prologue_location;
8025
8026 /* We can be called during expansion of PHI nodes, where
8027 we can't yet emit instructions directly in the final
8028 insn stream. Queue the insns on the entry edge, they will
8029 be committed after everything else is expanded. */
8030 if (currently_expanding_to_rtl)
8031 insert_insn_on_edge (seq,
8032 single_succ_edge
8033 (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
8034 else
8035 emit_insn (seq);
8036 }
8037 }
8038 }
8039 }
8040
8041 /* Generate insns to calculate the address of ORIG in pic mode. */
8042 static rtx_insn *
8043 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
8044 {
8045 rtx pat;
8046 rtx mem;
8047
8048 pat = gen_calculate_pic_address (reg, pic_reg, orig);
8049
8050 /* Make the MEM as close to a constant as possible. */
8051 mem = SET_SRC (pat);
8052 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
8053 MEM_READONLY_P (mem) = 1;
8054 MEM_NOTRAP_P (mem) = 1;
8055
8056 return emit_insn (pat);
8057 }
8058
8059 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
8060 created to hold the result of the load. If not NULL, PIC_REG indicates
8061 which register to use as PIC register, otherwise it is decided by register
8062 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
8063 location in the instruction stream, irregardless of whether it was loaded
8064 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8065 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8066
8067 Returns the register REG into which the PIC load is performed. */
8068
8069 rtx
8070 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
8071 bool compute_now)
8072 {
8073 gcc_assert (compute_now == (pic_reg != NULL_RTX));
8074
8075 if (SYMBOL_REF_P (orig)
8076 || LABEL_REF_P (orig))
8077 {
8078 if (reg == 0)
8079 {
8080 gcc_assert (can_create_pseudo_p ());
8081 reg = gen_reg_rtx (Pmode);
8082 }
8083
8084 /* VxWorks does not impose a fixed gap between segments; the run-time
8085 gap can be different from the object-file gap. We therefore can't
8086 use GOTOFF unless we are absolutely sure that the symbol is in the
8087 same segment as the GOT. Unfortunately, the flexibility of linker
8088 scripts means that we can't be sure of that in general, so assume
8089 that GOTOFF is never valid on VxWorks. */
8090 /* References to weak symbols cannot be resolved locally: they
8091 may be overridden by a non-weak definition at link time. */
8092 rtx_insn *insn;
8093 if ((LABEL_REF_P (orig)
8094 || (SYMBOL_REF_P (orig)
8095 && SYMBOL_REF_LOCAL_P (orig)
8096 && (SYMBOL_REF_DECL (orig)
8097 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
8098 && (!SYMBOL_REF_FUNCTION_P (orig)
8099 || arm_fdpic_local_funcdesc_p (orig))))
8100 && NEED_GOT_RELOC
8101 && arm_pic_data_is_text_relative)
8102 insn = arm_pic_static_addr (orig, reg);
8103 else
8104 {
8105 /* If this function doesn't have a pic register, create one now. */
8106 require_pic_register (pic_reg, compute_now);
8107
8108 if (pic_reg == NULL_RTX)
8109 pic_reg = cfun->machine->pic_reg;
8110
8111 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8112 }
8113
8114 /* Put a REG_EQUAL note on this insn, so that it can be optimized
8115 by loop. */
8116 set_unique_reg_note (insn, REG_EQUAL, orig);
8117
8118 return reg;
8119 }
8120 else if (GET_CODE (orig) == CONST)
8121 {
8122 rtx base, offset;
8123
8124 if (GET_CODE (XEXP (orig, 0)) == PLUS
8125 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8126 return orig;
8127
8128 /* Handle the case where we have: const (UNSPEC_TLS). */
8129 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8130 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8131 return orig;
8132
8133 /* Handle the case where we have:
8134 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
8135 CONST_INT. */
8136 if (GET_CODE (XEXP (orig, 0)) == PLUS
8137 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8138 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8139 {
8140 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8141 return orig;
8142 }
8143
8144 if (reg == 0)
8145 {
8146 gcc_assert (can_create_pseudo_p ());
8147 reg = gen_reg_rtx (Pmode);
8148 }
8149
8150 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8151
8152 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8153 pic_reg, compute_now);
8154 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8155 base == reg ? 0 : reg, pic_reg,
8156 compute_now);
8157
8158 if (CONST_INT_P (offset))
8159 {
8160 /* The base register doesn't really matter, we only want to
8161 test the index for the appropriate mode. */
8162 if (!arm_legitimate_index_p (mode, offset, SET, 0))
8163 {
8164 gcc_assert (can_create_pseudo_p ());
8165 offset = force_reg (Pmode, offset);
8166 }
8167
8168 if (CONST_INT_P (offset))
8169 return plus_constant (Pmode, base, INTVAL (offset));
8170 }
8171
8172 if (GET_MODE_SIZE (mode) > 4
8173 && (GET_MODE_CLASS (mode) == MODE_INT
8174 || TARGET_SOFT_FLOAT))
8175 {
8176 emit_insn (gen_addsi3 (reg, base, offset));
8177 return reg;
8178 }
8179
8180 return gen_rtx_PLUS (Pmode, base, offset);
8181 }
8182
8183 return orig;
8184 }
8185
8186
8187 /* Generate insns that produce the address of the stack canary */
8188 rtx
8189 arm_stack_protect_tls_canary_mem (bool reload)
8190 {
8191 rtx tp = gen_reg_rtx (SImode);
8192 if (reload)
8193 emit_insn (gen_reload_tp_hard (tp));
8194 else
8195 emit_insn (gen_load_tp_hard (tp));
8196
8197 rtx reg = gen_reg_rtx (SImode);
8198 rtx offset = GEN_INT (arm_stack_protector_guard_offset);
8199 emit_set_insn (reg, gen_rtx_PLUS (SImode, tp, offset));
8200 return gen_rtx_MEM (SImode, reg);
8201 }
8202
8203
8204 /* Whether a register is callee saved or not. This is necessary because high
8205 registers are marked as caller saved when optimizing for size on Thumb-1
8206 targets despite being callee saved in order to avoid using them. */
8207 #define callee_saved_reg_p(reg) \
8208 (!call_used_or_fixed_reg_p (reg) \
8209 || (TARGET_THUMB1 && optimize_size \
8210 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8211
8212 /* Return a mask for the call-clobbered low registers that are unused
8213 at the end of the prologue. */
8214 static unsigned long
8215 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8216 {
8217 unsigned long mask = 0;
8218 bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8219
8220 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8221 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8222 mask |= 1 << (reg - FIRST_LO_REGNUM);
8223 return mask;
8224 }
8225
8226 /* Similarly for the start of the epilogue. */
8227 static unsigned long
8228 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8229 {
8230 unsigned long mask = 0;
8231 bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8232
8233 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8234 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8235 mask |= 1 << (reg - FIRST_LO_REGNUM);
8236 return mask;
8237 }
8238
8239 /* Find a spare register to use during the prolog of a function. */
8240
8241 static int
8242 thumb_find_work_register (unsigned long pushed_regs_mask)
8243 {
8244 int reg;
8245
8246 unsigned long unused_regs
8247 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8248
8249 /* Check the argument registers first as these are call-used. The
8250 register allocation order means that sometimes r3 might be used
8251 but earlier argument registers might not, so check them all. */
8252 for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8253 if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8254 return reg;
8255
8256 /* Otherwise look for a call-saved register that is going to be pushed. */
8257 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8258 if (pushed_regs_mask & (1 << reg))
8259 return reg;
8260
8261 if (TARGET_THUMB2)
8262 {
8263 /* Thumb-2 can use high regs. */
8264 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8265 if (pushed_regs_mask & (1 << reg))
8266 return reg;
8267 }
8268 /* Something went wrong - thumb_compute_save_reg_mask()
8269 should have arranged for a suitable register to be pushed. */
8270 gcc_unreachable ();
8271 }
8272
8273 static GTY(()) int pic_labelno;
8274
8275 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8276 low register. */
8277
8278 void
8279 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8280 {
8281 rtx l1, labelno, pic_tmp, pic_rtx;
8282
8283 if (crtl->uses_pic_offset_table == 0
8284 || TARGET_SINGLE_PIC_BASE
8285 || TARGET_FDPIC)
8286 return;
8287
8288 gcc_assert (flag_pic);
8289
8290 if (pic_reg == NULL_RTX)
8291 pic_reg = cfun->machine->pic_reg;
8292 if (TARGET_VXWORKS_RTP)
8293 {
8294 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8295 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8296 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8297
8298 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8299
8300 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8301 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8302 }
8303 else
8304 {
8305 /* We use an UNSPEC rather than a LABEL_REF because this label
8306 never appears in the code stream. */
8307
8308 labelno = GEN_INT (pic_labelno++);
8309 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8310 l1 = gen_rtx_CONST (VOIDmode, l1);
8311
8312 /* On the ARM the PC register contains 'dot + 8' at the time of the
8313 addition, on the Thumb it is 'dot + 4'. */
8314 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8315 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8316 UNSPEC_GOTSYM_OFF);
8317 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8318
8319 if (TARGET_32BIT)
8320 {
8321 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8322 }
8323 else /* TARGET_THUMB1 */
8324 {
8325 if (arm_pic_register != INVALID_REGNUM
8326 && REGNO (pic_reg) > LAST_LO_REGNUM)
8327 {
8328 /* We will have pushed the pic register, so we should always be
8329 able to find a work register. */
8330 pic_tmp = gen_rtx_REG (SImode,
8331 thumb_find_work_register (saved_regs));
8332 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8333 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8334 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8335 }
8336 else if (arm_pic_register != INVALID_REGNUM
8337 && arm_pic_register > LAST_LO_REGNUM
8338 && REGNO (pic_reg) <= LAST_LO_REGNUM)
8339 {
8340 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8341 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8342 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8343 }
8344 else
8345 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8346 }
8347 }
8348
8349 /* Need to emit this whether or not we obey regdecls,
8350 since setjmp/longjmp can cause life info to screw up. */
8351 emit_use (pic_reg);
8352 }
8353
8354 /* Try to determine whether an object, referenced via ORIG, will be
8355 placed in the text or data segment. This is used in FDPIC mode, to
8356 decide which relocations to use when accessing ORIG. *IS_READONLY
8357 is set to true if ORIG is a read-only location, false otherwise.
8358 Return true if we could determine the location of ORIG, false
8359 otherwise. *IS_READONLY is valid only when we return true. */
8360 static bool
8361 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8362 {
8363 *is_readonly = false;
8364
8365 if (LABEL_REF_P (orig))
8366 {
8367 *is_readonly = true;
8368 return true;
8369 }
8370
8371 if (SYMBOL_REF_P (orig))
8372 {
8373 if (CONSTANT_POOL_ADDRESS_P (orig))
8374 {
8375 *is_readonly = true;
8376 return true;
8377 }
8378 if (SYMBOL_REF_LOCAL_P (orig)
8379 && !SYMBOL_REF_EXTERNAL_P (orig)
8380 && SYMBOL_REF_DECL (orig)
8381 && (!DECL_P (SYMBOL_REF_DECL (orig))
8382 || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8383 {
8384 tree decl = SYMBOL_REF_DECL (orig);
8385 tree init = (TREE_CODE (decl) == VAR_DECL)
8386 ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8387 ? decl : 0;
8388 int reloc = 0;
8389 bool named_section, readonly;
8390
8391 if (init && init != error_mark_node)
8392 reloc = compute_reloc_for_constant (init);
8393
8394 named_section = TREE_CODE (decl) == VAR_DECL
8395 && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8396 readonly = decl_readonly_section (decl, reloc);
8397
8398 /* We don't know where the link script will put a named
8399 section, so return false in such a case. */
8400 if (named_section)
8401 return false;
8402
8403 *is_readonly = readonly;
8404 return true;
8405 }
8406
8407 /* We don't know. */
8408 return false;
8409 }
8410
8411 gcc_unreachable ();
8412 }
8413
8414 /* Generate code to load the address of a static var when flag_pic is set. */
8415 static rtx_insn *
8416 arm_pic_static_addr (rtx orig, rtx reg)
8417 {
8418 rtx l1, labelno, offset_rtx;
8419 rtx_insn *insn;
8420
8421 gcc_assert (flag_pic);
8422
8423 bool is_readonly = false;
8424 bool info_known = false;
8425
8426 if (TARGET_FDPIC
8427 && SYMBOL_REF_P (orig)
8428 && !SYMBOL_REF_FUNCTION_P (orig))
8429 info_known = arm_is_segment_info_known (orig, &is_readonly);
8430
8431 if (TARGET_FDPIC
8432 && SYMBOL_REF_P (orig)
8433 && !SYMBOL_REF_FUNCTION_P (orig)
8434 && !info_known)
8435 {
8436 /* We don't know where orig is stored, so we have be
8437 pessimistic and use a GOT relocation. */
8438 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8439
8440 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8441 }
8442 else if (TARGET_FDPIC
8443 && SYMBOL_REF_P (orig)
8444 && (SYMBOL_REF_FUNCTION_P (orig)
8445 || !is_readonly))
8446 {
8447 /* We use the GOTOFF relocation. */
8448 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8449
8450 rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8451 emit_insn (gen_movsi (reg, l1));
8452 insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8453 }
8454 else
8455 {
8456 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8457 PC-relative access. */
8458 /* We use an UNSPEC rather than a LABEL_REF because this label
8459 never appears in the code stream. */
8460 labelno = GEN_INT (pic_labelno++);
8461 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8462 l1 = gen_rtx_CONST (VOIDmode, l1);
8463
8464 /* On the ARM the PC register contains 'dot + 8' at the time of the
8465 addition, on the Thumb it is 'dot + 4'. */
8466 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8467 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8468 UNSPEC_SYMBOL_OFFSET);
8469 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8470
8471 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8472 labelno));
8473 }
8474
8475 return insn;
8476 }
8477
8478 /* Return nonzero if X is valid as an ARM state addressing register. */
8479 static int
8480 arm_address_register_rtx_p (rtx x, int strict_p)
8481 {
8482 int regno;
8483
8484 if (!REG_P (x))
8485 return 0;
8486
8487 regno = REGNO (x);
8488
8489 if (strict_p)
8490 return ARM_REGNO_OK_FOR_BASE_P (regno);
8491
8492 return (regno <= LAST_ARM_REGNUM
8493 || regno >= FIRST_PSEUDO_REGISTER
8494 || regno == FRAME_POINTER_REGNUM
8495 || regno == ARG_POINTER_REGNUM);
8496 }
8497
8498 /* Return TRUE if this rtx is the difference of a symbol and a label,
8499 and will reduce to a PC-relative relocation in the object file.
8500 Expressions like this can be left alone when generating PIC, rather
8501 than forced through the GOT. */
8502 static int
8503 pcrel_constant_p (rtx x)
8504 {
8505 if (GET_CODE (x) == MINUS)
8506 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8507
8508 return FALSE;
8509 }
8510
8511 /* Return true if X will surely end up in an index register after next
8512 splitting pass. */
8513 static bool
8514 will_be_in_index_register (const_rtx x)
8515 {
8516 /* arm.md: calculate_pic_address will split this into a register. */
8517 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8518 }
8519
8520 /* Return nonzero if X is a valid ARM state address operand. */
8521 int
8522 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8523 int strict_p)
8524 {
8525 bool use_ldrd;
8526 enum rtx_code code = GET_CODE (x);
8527
8528 if (arm_address_register_rtx_p (x, strict_p))
8529 return 1;
8530
8531 use_ldrd = (TARGET_LDRD
8532 && (mode == DImode || mode == DFmode));
8533
8534 if (code == POST_INC || code == PRE_DEC
8535 || ((code == PRE_INC || code == POST_DEC)
8536 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8537 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8538
8539 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8540 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8541 && GET_CODE (XEXP (x, 1)) == PLUS
8542 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8543 {
8544 rtx addend = XEXP (XEXP (x, 1), 1);
8545
8546 /* Don't allow ldrd post increment by register because it's hard
8547 to fixup invalid register choices. */
8548 if (use_ldrd
8549 && GET_CODE (x) == POST_MODIFY
8550 && REG_P (addend))
8551 return 0;
8552
8553 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8554 && arm_legitimate_index_p (mode, addend, outer, strict_p));
8555 }
8556
8557 /* After reload constants split into minipools will have addresses
8558 from a LABEL_REF. */
8559 else if (reload_completed
8560 && (code == LABEL_REF
8561 || (code == CONST
8562 && GET_CODE (XEXP (x, 0)) == PLUS
8563 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8564 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8565 return 1;
8566
8567 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8568 return 0;
8569
8570 else if (code == PLUS)
8571 {
8572 rtx xop0 = XEXP (x, 0);
8573 rtx xop1 = XEXP (x, 1);
8574
8575 return ((arm_address_register_rtx_p (xop0, strict_p)
8576 && ((CONST_INT_P (xop1)
8577 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8578 || (!strict_p && will_be_in_index_register (xop1))))
8579 || (arm_address_register_rtx_p (xop1, strict_p)
8580 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8581 }
8582
8583 #if 0
8584 /* Reload currently can't handle MINUS, so disable this for now */
8585 else if (GET_CODE (x) == MINUS)
8586 {
8587 rtx xop0 = XEXP (x, 0);
8588 rtx xop1 = XEXP (x, 1);
8589
8590 return (arm_address_register_rtx_p (xop0, strict_p)
8591 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8592 }
8593 #endif
8594
8595 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8596 && code == SYMBOL_REF
8597 && CONSTANT_POOL_ADDRESS_P (x)
8598 && ! (flag_pic
8599 && symbol_mentioned_p (get_pool_constant (x))
8600 && ! pcrel_constant_p (get_pool_constant (x))))
8601 return 1;
8602
8603 return 0;
8604 }
8605
8606 /* Return true if we can avoid creating a constant pool entry for x. */
8607 static bool
8608 can_avoid_literal_pool_for_label_p (rtx x)
8609 {
8610 /* Normally we can assign constant values to target registers without
8611 the help of constant pool. But there are cases we have to use constant
8612 pool like:
8613 1) assign a label to register.
8614 2) sign-extend a 8bit value to 32bit and then assign to register.
8615
8616 Constant pool access in format:
8617 (set (reg r0) (mem (symbol_ref (".LC0"))))
8618 will cause the use of literal pool (later in function arm_reorg).
8619 So here we mark such format as an invalid format, then the compiler
8620 will adjust it into:
8621 (set (reg r0) (symbol_ref (".LC0")))
8622 (set (reg r0) (mem (reg r0))).
8623 No extra register is required, and (mem (reg r0)) won't cause the use
8624 of literal pools. */
8625 if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8626 && CONSTANT_POOL_ADDRESS_P (x))
8627 return 1;
8628 return 0;
8629 }
8630
8631
8632 /* Return nonzero if X is a valid Thumb-2 address operand. */
8633 static int
8634 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8635 {
8636 bool use_ldrd;
8637 enum rtx_code code = GET_CODE (x);
8638
8639 if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
8640 return mve_vector_mem_operand (mode, x, strict_p);
8641
8642 if (arm_address_register_rtx_p (x, strict_p))
8643 return 1;
8644
8645 use_ldrd = (TARGET_LDRD
8646 && (mode == DImode || mode == DFmode));
8647
8648 if (code == POST_INC || code == PRE_DEC
8649 || ((code == PRE_INC || code == POST_DEC)
8650 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8651 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8652
8653 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8654 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8655 && GET_CODE (XEXP (x, 1)) == PLUS
8656 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8657 {
8658 /* Thumb-2 only has autoincrement by constant. */
8659 rtx addend = XEXP (XEXP (x, 1), 1);
8660 HOST_WIDE_INT offset;
8661
8662 if (!CONST_INT_P (addend))
8663 return 0;
8664
8665 offset = INTVAL(addend);
8666 if (GET_MODE_SIZE (mode) <= 4)
8667 return (offset > -256 && offset < 256);
8668
8669 return (use_ldrd && offset > -1024 && offset < 1024
8670 && (offset & 3) == 0);
8671 }
8672
8673 /* After reload constants split into minipools will have addresses
8674 from a LABEL_REF. */
8675 else if (reload_completed
8676 && (code == LABEL_REF
8677 || (code == CONST
8678 && GET_CODE (XEXP (x, 0)) == PLUS
8679 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8680 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8681 return 1;
8682
8683 else if (mode == TImode
8684 || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8685 || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8686 return 0;
8687
8688 else if (code == PLUS)
8689 {
8690 rtx xop0 = XEXP (x, 0);
8691 rtx xop1 = XEXP (x, 1);
8692
8693 return ((arm_address_register_rtx_p (xop0, strict_p)
8694 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8695 || (!strict_p && will_be_in_index_register (xop1))))
8696 || (arm_address_register_rtx_p (xop1, strict_p)
8697 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8698 }
8699
8700 else if (can_avoid_literal_pool_for_label_p (x))
8701 return 0;
8702
8703 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8704 && code == SYMBOL_REF
8705 && CONSTANT_POOL_ADDRESS_P (x)
8706 && ! (flag_pic
8707 && symbol_mentioned_p (get_pool_constant (x))
8708 && ! pcrel_constant_p (get_pool_constant (x))))
8709 return 1;
8710
8711 return 0;
8712 }
8713
8714 /* Return nonzero if INDEX is valid for an address index operand in
8715 ARM state. */
8716 static int
8717 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8718 int strict_p)
8719 {
8720 HOST_WIDE_INT range;
8721 enum rtx_code code = GET_CODE (index);
8722
8723 /* Standard coprocessor addressing modes. */
8724 if (TARGET_HARD_FLOAT
8725 && (mode == SFmode || mode == DFmode))
8726 return (code == CONST_INT && INTVAL (index) < 1024
8727 && INTVAL (index) > -1024
8728 && (INTVAL (index) & 3) == 0);
8729
8730 /* For quad modes, we restrict the constant offset to be slightly less
8731 than what the instruction format permits. We do this because for
8732 quad mode moves, we will actually decompose them into two separate
8733 double-mode reads or writes. INDEX must therefore be a valid
8734 (double-mode) offset and so should INDEX+8. */
8735 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8736 return (code == CONST_INT
8737 && INTVAL (index) < 1016
8738 && INTVAL (index) > -1024
8739 && (INTVAL (index) & 3) == 0);
8740
8741 /* We have no such constraint on double mode offsets, so we permit the
8742 full range of the instruction format. */
8743 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8744 return (code == CONST_INT
8745 && INTVAL (index) < 1024
8746 && INTVAL (index) > -1024
8747 && (INTVAL (index) & 3) == 0);
8748
8749 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8750 return (code == CONST_INT
8751 && INTVAL (index) < 1024
8752 && INTVAL (index) > -1024
8753 && (INTVAL (index) & 3) == 0);
8754
8755 if (arm_address_register_rtx_p (index, strict_p)
8756 && (GET_MODE_SIZE (mode) <= 4))
8757 return 1;
8758
8759 if (mode == DImode || mode == DFmode)
8760 {
8761 if (code == CONST_INT)
8762 {
8763 HOST_WIDE_INT val = INTVAL (index);
8764
8765 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8766 If vldr is selected it uses arm_coproc_mem_operand. */
8767 if (TARGET_LDRD)
8768 return val > -256 && val < 256;
8769 else
8770 return val > -4096 && val < 4092;
8771 }
8772
8773 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8774 }
8775
8776 if (GET_MODE_SIZE (mode) <= 4
8777 && ! (arm_arch4
8778 && (mode == HImode
8779 || mode == HFmode
8780 || (mode == QImode && outer == SIGN_EXTEND))))
8781 {
8782 if (code == MULT)
8783 {
8784 rtx xiop0 = XEXP (index, 0);
8785 rtx xiop1 = XEXP (index, 1);
8786
8787 return ((arm_address_register_rtx_p (xiop0, strict_p)
8788 && power_of_two_operand (xiop1, SImode))
8789 || (arm_address_register_rtx_p (xiop1, strict_p)
8790 && power_of_two_operand (xiop0, SImode)));
8791 }
8792 else if (code == LSHIFTRT || code == ASHIFTRT
8793 || code == ASHIFT || code == ROTATERT)
8794 {
8795 rtx op = XEXP (index, 1);
8796
8797 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8798 && CONST_INT_P (op)
8799 && INTVAL (op) > 0
8800 && INTVAL (op) <= 31);
8801 }
8802 }
8803
8804 /* For ARM v4 we may be doing a sign-extend operation during the
8805 load. */
8806 if (arm_arch4)
8807 {
8808 if (mode == HImode
8809 || mode == HFmode
8810 || (outer == SIGN_EXTEND && mode == QImode))
8811 range = 256;
8812 else
8813 range = 4096;
8814 }
8815 else
8816 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8817
8818 return (code == CONST_INT
8819 && INTVAL (index) < range
8820 && INTVAL (index) > -range);
8821 }
8822
8823 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8824 index operand. i.e. 1, 2, 4 or 8. */
8825 static bool
8826 thumb2_index_mul_operand (rtx op)
8827 {
8828 HOST_WIDE_INT val;
8829
8830 if (!CONST_INT_P (op))
8831 return false;
8832
8833 val = INTVAL(op);
8834 return (val == 1 || val == 2 || val == 4 || val == 8);
8835 }
8836
8837 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8838 static int
8839 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8840 {
8841 enum rtx_code code = GET_CODE (index);
8842
8843 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8844 /* Standard coprocessor addressing modes. */
8845 if (TARGET_VFP_BASE
8846 && (mode == SFmode || mode == DFmode))
8847 return (code == CONST_INT && INTVAL (index) < 1024
8848 /* Thumb-2 allows only > -256 index range for it's core register
8849 load/stores. Since we allow SF/DF in core registers, we have
8850 to use the intersection between -256~4096 (core) and -1024~1024
8851 (coprocessor). */
8852 && INTVAL (index) > -256
8853 && (INTVAL (index) & 3) == 0);
8854
8855 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8856 {
8857 /* For DImode assume values will usually live in core regs
8858 and only allow LDRD addressing modes. */
8859 if (!TARGET_LDRD || mode != DImode)
8860 return (code == CONST_INT
8861 && INTVAL (index) < 1024
8862 && INTVAL (index) > -1024
8863 && (INTVAL (index) & 3) == 0);
8864 }
8865
8866 /* For quad modes, we restrict the constant offset to be slightly less
8867 than what the instruction format permits. We do this because for
8868 quad mode moves, we will actually decompose them into two separate
8869 double-mode reads or writes. INDEX must therefore be a valid
8870 (double-mode) offset and so should INDEX+8. */
8871 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8872 return (code == CONST_INT
8873 && INTVAL (index) < 1016
8874 && INTVAL (index) > -1024
8875 && (INTVAL (index) & 3) == 0);
8876
8877 /* We have no such constraint on double mode offsets, so we permit the
8878 full range of the instruction format. */
8879 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8880 return (code == CONST_INT
8881 && INTVAL (index) < 1024
8882 && INTVAL (index) > -1024
8883 && (INTVAL (index) & 3) == 0);
8884
8885 if (arm_address_register_rtx_p (index, strict_p)
8886 && (GET_MODE_SIZE (mode) <= 4))
8887 return 1;
8888
8889 if (mode == DImode || mode == DFmode)
8890 {
8891 if (code == CONST_INT)
8892 {
8893 HOST_WIDE_INT val = INTVAL (index);
8894 /* Thumb-2 ldrd only has reg+const addressing modes.
8895 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8896 If vldr is selected it uses arm_coproc_mem_operand. */
8897 if (TARGET_LDRD)
8898 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8899 else
8900 return IN_RANGE (val, -255, 4095 - 4);
8901 }
8902 else
8903 return 0;
8904 }
8905
8906 if (code == MULT)
8907 {
8908 rtx xiop0 = XEXP (index, 0);
8909 rtx xiop1 = XEXP (index, 1);
8910
8911 return ((arm_address_register_rtx_p (xiop0, strict_p)
8912 && thumb2_index_mul_operand (xiop1))
8913 || (arm_address_register_rtx_p (xiop1, strict_p)
8914 && thumb2_index_mul_operand (xiop0)));
8915 }
8916 else if (code == ASHIFT)
8917 {
8918 rtx op = XEXP (index, 1);
8919
8920 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8921 && CONST_INT_P (op)
8922 && INTVAL (op) > 0
8923 && INTVAL (op) <= 3);
8924 }
8925
8926 return (code == CONST_INT
8927 && INTVAL (index) < 4096
8928 && INTVAL (index) > -256);
8929 }
8930
8931 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8932 static int
8933 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8934 {
8935 int regno;
8936
8937 if (!REG_P (x))
8938 return 0;
8939
8940 regno = REGNO (x);
8941
8942 if (strict_p)
8943 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8944
8945 return (regno <= LAST_LO_REGNUM
8946 || regno > LAST_VIRTUAL_REGISTER
8947 || regno == FRAME_POINTER_REGNUM
8948 || (GET_MODE_SIZE (mode) >= 4
8949 && (regno == STACK_POINTER_REGNUM
8950 || regno >= FIRST_PSEUDO_REGISTER
8951 || x == hard_frame_pointer_rtx
8952 || x == arg_pointer_rtx)));
8953 }
8954
8955 /* Return nonzero if x is a legitimate index register. This is the case
8956 for any base register that can access a QImode object. */
8957 inline static int
8958 thumb1_index_register_rtx_p (rtx x, int strict_p)
8959 {
8960 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8961 }
8962
8963 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8964
8965 The AP may be eliminated to either the SP or the FP, so we use the
8966 least common denominator, e.g. SImode, and offsets from 0 to 64.
8967
8968 ??? Verify whether the above is the right approach.
8969
8970 ??? Also, the FP may be eliminated to the SP, so perhaps that
8971 needs special handling also.
8972
8973 ??? Look at how the mips16 port solves this problem. It probably uses
8974 better ways to solve some of these problems.
8975
8976 Although it is not incorrect, we don't accept QImode and HImode
8977 addresses based on the frame pointer or arg pointer until the
8978 reload pass starts. This is so that eliminating such addresses
8979 into stack based ones won't produce impossible code. */
8980 int
8981 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8982 {
8983 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8984 return 0;
8985
8986 /* ??? Not clear if this is right. Experiment. */
8987 if (GET_MODE_SIZE (mode) < 4
8988 && !(reload_in_progress || reload_completed)
8989 && (reg_mentioned_p (frame_pointer_rtx, x)
8990 || reg_mentioned_p (arg_pointer_rtx, x)
8991 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8992 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8993 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8994 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8995 return 0;
8996
8997 /* Accept any base register. SP only in SImode or larger. */
8998 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8999 return 1;
9000
9001 /* This is PC relative data before arm_reorg runs. */
9002 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
9003 && SYMBOL_REF_P (x)
9004 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
9005 && !arm_disable_literal_pool)
9006 return 1;
9007
9008 /* This is PC relative data after arm_reorg runs. */
9009 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
9010 && reload_completed
9011 && (LABEL_REF_P (x)
9012 || (GET_CODE (x) == CONST
9013 && GET_CODE (XEXP (x, 0)) == PLUS
9014 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
9015 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
9016 return 1;
9017
9018 /* Post-inc indexing only supported for SImode and larger. */
9019 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
9020 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
9021 return 1;
9022
9023 else if (GET_CODE (x) == PLUS)
9024 {
9025 /* REG+REG address can be any two index registers. */
9026 /* We disallow FRAME+REG addressing since we know that FRAME
9027 will be replaced with STACK, and SP relative addressing only
9028 permits SP+OFFSET. */
9029 if (GET_MODE_SIZE (mode) <= 4
9030 && XEXP (x, 0) != frame_pointer_rtx
9031 && XEXP (x, 1) != frame_pointer_rtx
9032 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9033 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
9034 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
9035 return 1;
9036
9037 /* REG+const has 5-7 bit offset for non-SP registers. */
9038 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9039 || XEXP (x, 0) == arg_pointer_rtx)
9040 && CONST_INT_P (XEXP (x, 1))
9041 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
9042 return 1;
9043
9044 /* REG+const has 10-bit offset for SP, but only SImode and
9045 larger is supported. */
9046 /* ??? Should probably check for DI/DFmode overflow here
9047 just like GO_IF_LEGITIMATE_OFFSET does. */
9048 else if (REG_P (XEXP (x, 0))
9049 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
9050 && GET_MODE_SIZE (mode) >= 4
9051 && CONST_INT_P (XEXP (x, 1))
9052 && INTVAL (XEXP (x, 1)) >= 0
9053 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
9054 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9055 return 1;
9056
9057 else if (REG_P (XEXP (x, 0))
9058 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
9059 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
9060 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
9061 && REGNO (XEXP (x, 0))
9062 <= LAST_VIRTUAL_POINTER_REGISTER))
9063 && GET_MODE_SIZE (mode) >= 4
9064 && CONST_INT_P (XEXP (x, 1))
9065 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9066 return 1;
9067 }
9068
9069 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
9070 && GET_MODE_SIZE (mode) == 4
9071 && SYMBOL_REF_P (x)
9072 && CONSTANT_POOL_ADDRESS_P (x)
9073 && !arm_disable_literal_pool
9074 && ! (flag_pic
9075 && symbol_mentioned_p (get_pool_constant (x))
9076 && ! pcrel_constant_p (get_pool_constant (x))))
9077 return 1;
9078
9079 return 0;
9080 }
9081
9082 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9083 instruction of mode MODE. */
9084 int
9085 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
9086 {
9087 switch (GET_MODE_SIZE (mode))
9088 {
9089 case 1:
9090 return val >= 0 && val < 32;
9091
9092 case 2:
9093 return val >= 0 && val < 64 && (val & 1) == 0;
9094
9095 default:
9096 return (val >= 0
9097 && (val + GET_MODE_SIZE (mode)) <= 128
9098 && (val & 3) == 0);
9099 }
9100 }
9101
9102 bool
9103 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
9104 {
9105 if (TARGET_ARM)
9106 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
9107 else if (TARGET_THUMB2)
9108 return thumb2_legitimate_address_p (mode, x, strict_p);
9109 else /* if (TARGET_THUMB1) */
9110 return thumb1_legitimate_address_p (mode, x, strict_p);
9111 }
9112
9113 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9114
9115 Given an rtx X being reloaded into a reg required to be
9116 in class CLASS, return the class of reg to actually use.
9117 In general this is just CLASS, but for the Thumb core registers and
9118 immediate constants we prefer a LO_REGS class or a subset. */
9119
9120 static reg_class_t
9121 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
9122 {
9123 if (TARGET_32BIT)
9124 return rclass;
9125 else
9126 {
9127 if (rclass == GENERAL_REGS)
9128 return LO_REGS;
9129 else
9130 return rclass;
9131 }
9132 }
9133
9134 /* Build the SYMBOL_REF for __tls_get_addr. */
9135
9136 static GTY(()) rtx tls_get_addr_libfunc;
9137
9138 static rtx
9139 get_tls_get_addr (void)
9140 {
9141 if (!tls_get_addr_libfunc)
9142 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9143 return tls_get_addr_libfunc;
9144 }
9145
9146 rtx
9147 arm_load_tp (rtx target)
9148 {
9149 if (!target)
9150 target = gen_reg_rtx (SImode);
9151
9152 if (TARGET_HARD_TP)
9153 {
9154 /* Can return in any reg. */
9155 emit_insn (gen_load_tp_hard (target));
9156 }
9157 else
9158 {
9159 /* Always returned in r0. Immediately copy the result into a pseudo,
9160 otherwise other uses of r0 (e.g. setting up function arguments) may
9161 clobber the value. */
9162
9163 rtx tmp;
9164
9165 if (TARGET_FDPIC)
9166 {
9167 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9168 rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9169
9170 emit_insn (gen_load_tp_soft_fdpic ());
9171
9172 /* Restore r9. */
9173 emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9174 }
9175 else
9176 emit_insn (gen_load_tp_soft ());
9177
9178 tmp = gen_rtx_REG (SImode, R0_REGNUM);
9179 emit_move_insn (target, tmp);
9180 }
9181 return target;
9182 }
9183
9184 static rtx
9185 load_tls_operand (rtx x, rtx reg)
9186 {
9187 rtx tmp;
9188
9189 if (reg == NULL_RTX)
9190 reg = gen_reg_rtx (SImode);
9191
9192 tmp = gen_rtx_CONST (SImode, x);
9193
9194 emit_move_insn (reg, tmp);
9195
9196 return reg;
9197 }
9198
9199 static rtx_insn *
9200 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9201 {
9202 rtx label, labelno = NULL_RTX, sum;
9203
9204 gcc_assert (reloc != TLS_DESCSEQ);
9205 start_sequence ();
9206
9207 if (TARGET_FDPIC)
9208 {
9209 sum = gen_rtx_UNSPEC (Pmode,
9210 gen_rtvec (2, x, GEN_INT (reloc)),
9211 UNSPEC_TLS);
9212 }
9213 else
9214 {
9215 labelno = GEN_INT (pic_labelno++);
9216 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9217 label = gen_rtx_CONST (VOIDmode, label);
9218
9219 sum = gen_rtx_UNSPEC (Pmode,
9220 gen_rtvec (4, x, GEN_INT (reloc), label,
9221 GEN_INT (TARGET_ARM ? 8 : 4)),
9222 UNSPEC_TLS);
9223 }
9224 reg = load_tls_operand (sum, reg);
9225
9226 if (TARGET_FDPIC)
9227 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9228 else if (TARGET_ARM)
9229 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9230 else
9231 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9232
9233 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9234 LCT_PURE, /* LCT_CONST? */
9235 Pmode, reg, Pmode);
9236
9237 rtx_insn *insns = get_insns ();
9238 end_sequence ();
9239
9240 return insns;
9241 }
9242
9243 static rtx
9244 arm_tls_descseq_addr (rtx x, rtx reg)
9245 {
9246 rtx labelno = GEN_INT (pic_labelno++);
9247 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9248 rtx sum = gen_rtx_UNSPEC (Pmode,
9249 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9250 gen_rtx_CONST (VOIDmode, label),
9251 GEN_INT (!TARGET_ARM)),
9252 UNSPEC_TLS);
9253 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9254
9255 emit_insn (gen_tlscall (x, labelno));
9256 if (!reg)
9257 reg = gen_reg_rtx (SImode);
9258 else
9259 gcc_assert (REGNO (reg) != R0_REGNUM);
9260
9261 emit_move_insn (reg, reg0);
9262
9263 return reg;
9264 }
9265
9266
9267 rtx
9268 legitimize_tls_address (rtx x, rtx reg)
9269 {
9270 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9271 rtx_insn *insns;
9272 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9273
9274 switch (model)
9275 {
9276 case TLS_MODEL_GLOBAL_DYNAMIC:
9277 if (TARGET_GNU2_TLS)
9278 {
9279 gcc_assert (!TARGET_FDPIC);
9280
9281 reg = arm_tls_descseq_addr (x, reg);
9282
9283 tp = arm_load_tp (NULL_RTX);
9284
9285 dest = gen_rtx_PLUS (Pmode, tp, reg);
9286 }
9287 else
9288 {
9289 /* Original scheme */
9290 if (TARGET_FDPIC)
9291 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9292 else
9293 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9294 dest = gen_reg_rtx (Pmode);
9295 emit_libcall_block (insns, dest, ret, x);
9296 }
9297 return dest;
9298
9299 case TLS_MODEL_LOCAL_DYNAMIC:
9300 if (TARGET_GNU2_TLS)
9301 {
9302 gcc_assert (!TARGET_FDPIC);
9303
9304 reg = arm_tls_descseq_addr (x, reg);
9305
9306 tp = arm_load_tp (NULL_RTX);
9307
9308 dest = gen_rtx_PLUS (Pmode, tp, reg);
9309 }
9310 else
9311 {
9312 if (TARGET_FDPIC)
9313 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9314 else
9315 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9316
9317 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9318 share the LDM result with other LD model accesses. */
9319 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9320 UNSPEC_TLS);
9321 dest = gen_reg_rtx (Pmode);
9322 emit_libcall_block (insns, dest, ret, eqv);
9323
9324 /* Load the addend. */
9325 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9326 GEN_INT (TLS_LDO32)),
9327 UNSPEC_TLS);
9328 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9329 dest = gen_rtx_PLUS (Pmode, dest, addend);
9330 }
9331 return dest;
9332
9333 case TLS_MODEL_INITIAL_EXEC:
9334 if (TARGET_FDPIC)
9335 {
9336 sum = gen_rtx_UNSPEC (Pmode,
9337 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9338 UNSPEC_TLS);
9339 reg = load_tls_operand (sum, reg);
9340 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9341 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9342 }
9343 else
9344 {
9345 labelno = GEN_INT (pic_labelno++);
9346 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9347 label = gen_rtx_CONST (VOIDmode, label);
9348 sum = gen_rtx_UNSPEC (Pmode,
9349 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9350 GEN_INT (TARGET_ARM ? 8 : 4)),
9351 UNSPEC_TLS);
9352 reg = load_tls_operand (sum, reg);
9353
9354 if (TARGET_ARM)
9355 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9356 else if (TARGET_THUMB2)
9357 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9358 else
9359 {
9360 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9361 emit_move_insn (reg, gen_const_mem (SImode, reg));
9362 }
9363 }
9364
9365 tp = arm_load_tp (NULL_RTX);
9366
9367 return gen_rtx_PLUS (Pmode, tp, reg);
9368
9369 case TLS_MODEL_LOCAL_EXEC:
9370 tp = arm_load_tp (NULL_RTX);
9371
9372 reg = gen_rtx_UNSPEC (Pmode,
9373 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9374 UNSPEC_TLS);
9375 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9376
9377 return gen_rtx_PLUS (Pmode, tp, reg);
9378
9379 default:
9380 abort ();
9381 }
9382 }
9383
9384 /* Try machine-dependent ways of modifying an illegitimate address
9385 to be legitimate. If we find one, return the new, valid address. */
9386 rtx
9387 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9388 {
9389 if (arm_tls_referenced_p (x))
9390 {
9391 rtx addend = NULL;
9392
9393 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9394 {
9395 addend = XEXP (XEXP (x, 0), 1);
9396 x = XEXP (XEXP (x, 0), 0);
9397 }
9398
9399 if (!SYMBOL_REF_P (x))
9400 return x;
9401
9402 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9403
9404 x = legitimize_tls_address (x, NULL_RTX);
9405
9406 if (addend)
9407 {
9408 x = gen_rtx_PLUS (SImode, x, addend);
9409 orig_x = x;
9410 }
9411 else
9412 return x;
9413 }
9414
9415 if (TARGET_THUMB1)
9416 return thumb_legitimize_address (x, orig_x, mode);
9417
9418 if (GET_CODE (x) == PLUS)
9419 {
9420 rtx xop0 = XEXP (x, 0);
9421 rtx xop1 = XEXP (x, 1);
9422
9423 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9424 xop0 = force_reg (SImode, xop0);
9425
9426 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9427 && !symbol_mentioned_p (xop1))
9428 xop1 = force_reg (SImode, xop1);
9429
9430 if (ARM_BASE_REGISTER_RTX_P (xop0)
9431 && CONST_INT_P (xop1))
9432 {
9433 HOST_WIDE_INT n, low_n;
9434 rtx base_reg, val;
9435 n = INTVAL (xop1);
9436
9437 /* VFP addressing modes actually allow greater offsets, but for
9438 now we just stick with the lowest common denominator. */
9439 if (mode == DImode || mode == DFmode)
9440 {
9441 low_n = n & 0x0f;
9442 n &= ~0x0f;
9443 if (low_n > 4)
9444 {
9445 n += 16;
9446 low_n -= 16;
9447 }
9448 }
9449 else
9450 {
9451 low_n = ((mode) == TImode ? 0
9452 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9453 n -= low_n;
9454 }
9455
9456 base_reg = gen_reg_rtx (SImode);
9457 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9458 emit_move_insn (base_reg, val);
9459 x = plus_constant (Pmode, base_reg, low_n);
9460 }
9461 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9462 x = gen_rtx_PLUS (SImode, xop0, xop1);
9463 }
9464
9465 /* XXX We don't allow MINUS any more -- see comment in
9466 arm_legitimate_address_outer_p (). */
9467 else if (GET_CODE (x) == MINUS)
9468 {
9469 rtx xop0 = XEXP (x, 0);
9470 rtx xop1 = XEXP (x, 1);
9471
9472 if (CONSTANT_P (xop0))
9473 xop0 = force_reg (SImode, xop0);
9474
9475 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9476 xop1 = force_reg (SImode, xop1);
9477
9478 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9479 x = gen_rtx_MINUS (SImode, xop0, xop1);
9480 }
9481
9482 /* Make sure to take full advantage of the pre-indexed addressing mode
9483 with absolute addresses which often allows for the base register to
9484 be factorized for multiple adjacent memory references, and it might
9485 even allows for the mini pool to be avoided entirely. */
9486 else if (CONST_INT_P (x) && optimize > 0)
9487 {
9488 unsigned int bits;
9489 HOST_WIDE_INT mask, base, index;
9490 rtx base_reg;
9491
9492 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9493 only use a 8-bit index. So let's use a 12-bit index for
9494 SImode only and hope that arm_gen_constant will enable LDRB
9495 to use more bits. */
9496 bits = (mode == SImode) ? 12 : 8;
9497 mask = (1 << bits) - 1;
9498 base = INTVAL (x) & ~mask;
9499 index = INTVAL (x) & mask;
9500 if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9501 {
9502 /* It'll most probably be more efficient to generate the
9503 base with more bits set and use a negative index instead.
9504 Don't do this for Thumb as negative offsets are much more
9505 limited. */
9506 base |= mask;
9507 index -= mask;
9508 }
9509 base_reg = force_reg (SImode, GEN_INT (base));
9510 x = plus_constant (Pmode, base_reg, index);
9511 }
9512
9513 if (flag_pic)
9514 {
9515 /* We need to find and carefully transform any SYMBOL and LABEL
9516 references; so go back to the original address expression. */
9517 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9518 false /*compute_now*/);
9519
9520 if (new_x != orig_x)
9521 x = new_x;
9522 }
9523
9524 return x;
9525 }
9526
9527
9528 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9529 to be legitimate. If we find one, return the new, valid address. */
9530 rtx
9531 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9532 {
9533 if (GET_CODE (x) == PLUS
9534 && CONST_INT_P (XEXP (x, 1))
9535 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9536 || INTVAL (XEXP (x, 1)) < 0))
9537 {
9538 rtx xop0 = XEXP (x, 0);
9539 rtx xop1 = XEXP (x, 1);
9540 HOST_WIDE_INT offset = INTVAL (xop1);
9541
9542 /* Try and fold the offset into a biasing of the base register and
9543 then offsetting that. Don't do this when optimizing for space
9544 since it can cause too many CSEs. */
9545 if (optimize_size && offset >= 0
9546 && offset < 256 + 31 * GET_MODE_SIZE (mode))
9547 {
9548 HOST_WIDE_INT delta;
9549
9550 if (offset >= 256)
9551 delta = offset - (256 - GET_MODE_SIZE (mode));
9552 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9553 delta = 31 * GET_MODE_SIZE (mode);
9554 else
9555 delta = offset & (~31 * GET_MODE_SIZE (mode));
9556
9557 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9558 NULL_RTX);
9559 x = plus_constant (Pmode, xop0, delta);
9560 }
9561 else if (offset < 0 && offset > -256)
9562 /* Small negative offsets are best done with a subtract before the
9563 dereference, forcing these into a register normally takes two
9564 instructions. */
9565 x = force_operand (x, NULL_RTX);
9566 else
9567 {
9568 /* For the remaining cases, force the constant into a register. */
9569 xop1 = force_reg (SImode, xop1);
9570 x = gen_rtx_PLUS (SImode, xop0, xop1);
9571 }
9572 }
9573 else if (GET_CODE (x) == PLUS
9574 && s_register_operand (XEXP (x, 1), SImode)
9575 && !s_register_operand (XEXP (x, 0), SImode))
9576 {
9577 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9578
9579 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9580 }
9581
9582 if (flag_pic)
9583 {
9584 /* We need to find and carefully transform any SYMBOL and LABEL
9585 references; so go back to the original address expression. */
9586 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9587 false /*compute_now*/);
9588
9589 if (new_x != orig_x)
9590 x = new_x;
9591 }
9592
9593 return x;
9594 }
9595
9596 /* Return TRUE if X contains any TLS symbol references. */
9597
9598 bool
9599 arm_tls_referenced_p (rtx x)
9600 {
9601 if (! TARGET_HAVE_TLS)
9602 return false;
9603
9604 subrtx_iterator::array_type array;
9605 FOR_EACH_SUBRTX (iter, array, x, ALL)
9606 {
9607 const_rtx x = *iter;
9608 if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9609 {
9610 /* ARM currently does not provide relocations to encode TLS variables
9611 into AArch32 instructions, only data, so there is no way to
9612 currently implement these if a literal pool is disabled. */
9613 if (arm_disable_literal_pool)
9614 sorry ("accessing thread-local storage is not currently supported "
9615 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9616
9617 return true;
9618 }
9619
9620 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9621 TLS offsets, not real symbol references. */
9622 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9623 iter.skip_subrtxes ();
9624 }
9625 return false;
9626 }
9627
9628 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9629
9630 On the ARM, allow any integer (invalid ones are removed later by insn
9631 patterns), nice doubles and symbol_refs which refer to the function's
9632 constant pool XXX.
9633
9634 When generating pic allow anything. */
9635
9636 static bool
9637 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9638 {
9639 if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9640 return false;
9641
9642 return flag_pic || !label_mentioned_p (x);
9643 }
9644
9645 static bool
9646 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9647 {
9648 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9649 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9650 for ARMv8-M Baseline or later the result is valid. */
9651 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9652 x = XEXP (x, 0);
9653
9654 return (CONST_INT_P (x)
9655 || CONST_DOUBLE_P (x)
9656 || CONSTANT_ADDRESS_P (x)
9657 || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9658 /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9659 we build the symbol address with upper/lower
9660 relocations. */
9661 || (TARGET_THUMB1
9662 && !label_mentioned_p (x)
9663 && arm_valid_symbolic_address_p (x)
9664 && arm_disable_literal_pool)
9665 || flag_pic);
9666 }
9667
9668 static bool
9669 arm_legitimate_constant_p (machine_mode mode, rtx x)
9670 {
9671 return (!arm_cannot_force_const_mem (mode, x)
9672 && (TARGET_32BIT
9673 ? arm_legitimate_constant_p_1 (mode, x)
9674 : thumb_legitimate_constant_p (mode, x)));
9675 }
9676
9677 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9678
9679 static bool
9680 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9681 {
9682 rtx base, offset;
9683 split_const (x, &base, &offset);
9684
9685 if (SYMBOL_REF_P (base))
9686 {
9687 /* Function symbols cannot have an offset due to the Thumb bit. */
9688 if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9689 && INTVAL (offset) != 0)
9690 return true;
9691
9692 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9693 && !offset_within_block_p (base, INTVAL (offset)))
9694 return true;
9695 }
9696 return arm_tls_referenced_p (x);
9697 }
9698 \f
9699 #define REG_OR_SUBREG_REG(X) \
9700 (REG_P (X) \
9701 || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9702
9703 #define REG_OR_SUBREG_RTX(X) \
9704 (REG_P (X) ? (X) : SUBREG_REG (X))
9705
9706 static inline int
9707 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9708 {
9709 machine_mode mode = GET_MODE (x);
9710 int total, words;
9711
9712 switch (code)
9713 {
9714 case ASHIFT:
9715 case ASHIFTRT:
9716 case LSHIFTRT:
9717 case ROTATERT:
9718 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9719
9720 case PLUS:
9721 case MINUS:
9722 case COMPARE:
9723 case NEG:
9724 case NOT:
9725 return COSTS_N_INSNS (1);
9726
9727 case MULT:
9728 if (arm_arch6m && arm_m_profile_small_mul)
9729 return COSTS_N_INSNS (32);
9730
9731 if (CONST_INT_P (XEXP (x, 1)))
9732 {
9733 int cycles = 0;
9734 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9735
9736 while (i)
9737 {
9738 i >>= 2;
9739 cycles++;
9740 }
9741 return COSTS_N_INSNS (2) + cycles;
9742 }
9743 return COSTS_N_INSNS (1) + 16;
9744
9745 case SET:
9746 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9747 the mode. */
9748 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9749 return (COSTS_N_INSNS (words)
9750 + 4 * ((MEM_P (SET_SRC (x)))
9751 + MEM_P (SET_DEST (x))));
9752
9753 case CONST_INT:
9754 if (outer == SET)
9755 {
9756 if (UINTVAL (x) < 256
9757 /* 16-bit constant. */
9758 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9759 return 0;
9760 if (thumb_shiftable_const (INTVAL (x)))
9761 return COSTS_N_INSNS (2);
9762 return arm_disable_literal_pool
9763 ? COSTS_N_INSNS (8)
9764 : COSTS_N_INSNS (3);
9765 }
9766 else if ((outer == PLUS || outer == COMPARE)
9767 && INTVAL (x) < 256 && INTVAL (x) > -256)
9768 return 0;
9769 else if ((outer == IOR || outer == XOR || outer == AND)
9770 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9771 return COSTS_N_INSNS (1);
9772 else if (outer == AND)
9773 {
9774 int i;
9775 /* This duplicates the tests in the andsi3 expander. */
9776 for (i = 9; i <= 31; i++)
9777 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9778 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9779 return COSTS_N_INSNS (2);
9780 }
9781 else if (outer == ASHIFT || outer == ASHIFTRT
9782 || outer == LSHIFTRT)
9783 return 0;
9784 return COSTS_N_INSNS (2);
9785
9786 case CONST:
9787 case CONST_DOUBLE:
9788 case LABEL_REF:
9789 case SYMBOL_REF:
9790 return COSTS_N_INSNS (3);
9791
9792 case UDIV:
9793 case UMOD:
9794 case DIV:
9795 case MOD:
9796 return 100;
9797
9798 case TRUNCATE:
9799 return 99;
9800
9801 case AND:
9802 case XOR:
9803 case IOR:
9804 /* XXX guess. */
9805 return 8;
9806
9807 case MEM:
9808 /* XXX another guess. */
9809 /* Memory costs quite a lot for the first word, but subsequent words
9810 load at the equivalent of a single insn each. */
9811 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9812 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9813 ? 4 : 0));
9814
9815 case IF_THEN_ELSE:
9816 /* XXX a guess. */
9817 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9818 return 14;
9819 return 2;
9820
9821 case SIGN_EXTEND:
9822 case ZERO_EXTEND:
9823 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9824 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9825
9826 if (mode == SImode)
9827 return total;
9828
9829 if (arm_arch6)
9830 return total + COSTS_N_INSNS (1);
9831
9832 /* Assume a two-shift sequence. Increase the cost slightly so
9833 we prefer actual shifts over an extend operation. */
9834 return total + 1 + COSTS_N_INSNS (2);
9835
9836 default:
9837 return 99;
9838 }
9839 }
9840
9841 /* Estimates the size cost of thumb1 instructions.
9842 For now most of the code is copied from thumb1_rtx_costs. We need more
9843 fine grain tuning when we have more related test cases. */
9844 static inline int
9845 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9846 {
9847 machine_mode mode = GET_MODE (x);
9848 int words, cost;
9849
9850 switch (code)
9851 {
9852 case ASHIFT:
9853 case ASHIFTRT:
9854 case LSHIFTRT:
9855 case ROTATERT:
9856 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9857
9858 case PLUS:
9859 case MINUS:
9860 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9861 defined by RTL expansion, especially for the expansion of
9862 multiplication. */
9863 if ((GET_CODE (XEXP (x, 0)) == MULT
9864 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9865 || (GET_CODE (XEXP (x, 1)) == MULT
9866 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9867 return COSTS_N_INSNS (2);
9868 /* Fall through. */
9869 case COMPARE:
9870 case NEG:
9871 case NOT:
9872 return COSTS_N_INSNS (1);
9873
9874 case MULT:
9875 if (CONST_INT_P (XEXP (x, 1)))
9876 {
9877 /* Thumb1 mul instruction can't operate on const. We must Load it
9878 into a register first. */
9879 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9880 /* For the targets which have a very small and high-latency multiply
9881 unit, we prefer to synthesize the mult with up to 5 instructions,
9882 giving a good balance between size and performance. */
9883 if (arm_arch6m && arm_m_profile_small_mul)
9884 return COSTS_N_INSNS (5);
9885 else
9886 return COSTS_N_INSNS (1) + const_size;
9887 }
9888 return COSTS_N_INSNS (1);
9889
9890 case SET:
9891 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9892 the mode. */
9893 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9894 cost = COSTS_N_INSNS (words);
9895 if (satisfies_constraint_J (SET_SRC (x))
9896 || satisfies_constraint_K (SET_SRC (x))
9897 /* Too big an immediate for a 2-byte mov, using MOVT. */
9898 || (CONST_INT_P (SET_SRC (x))
9899 && UINTVAL (SET_SRC (x)) >= 256
9900 && TARGET_HAVE_MOVT
9901 && satisfies_constraint_j (SET_SRC (x)))
9902 /* thumb1_movdi_insn. */
9903 || ((words > 1) && MEM_P (SET_SRC (x))))
9904 cost += COSTS_N_INSNS (1);
9905 return cost;
9906
9907 case CONST_INT:
9908 if (outer == SET)
9909 {
9910 if (UINTVAL (x) < 256)
9911 return COSTS_N_INSNS (1);
9912 /* movw is 4byte long. */
9913 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9914 return COSTS_N_INSNS (2);
9915 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9916 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9917 return COSTS_N_INSNS (2);
9918 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9919 if (thumb_shiftable_const (INTVAL (x)))
9920 return COSTS_N_INSNS (2);
9921 return arm_disable_literal_pool
9922 ? COSTS_N_INSNS (8)
9923 : COSTS_N_INSNS (3);
9924 }
9925 else if ((outer == PLUS || outer == COMPARE)
9926 && INTVAL (x) < 256 && INTVAL (x) > -256)
9927 return 0;
9928 else if ((outer == IOR || outer == XOR || outer == AND)
9929 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9930 return COSTS_N_INSNS (1);
9931 else if (outer == AND)
9932 {
9933 int i;
9934 /* This duplicates the tests in the andsi3 expander. */
9935 for (i = 9; i <= 31; i++)
9936 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9937 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9938 return COSTS_N_INSNS (2);
9939 }
9940 else if (outer == ASHIFT || outer == ASHIFTRT
9941 || outer == LSHIFTRT)
9942 return 0;
9943 return COSTS_N_INSNS (2);
9944
9945 case CONST:
9946 case CONST_DOUBLE:
9947 case LABEL_REF:
9948 case SYMBOL_REF:
9949 return COSTS_N_INSNS (3);
9950
9951 case UDIV:
9952 case UMOD:
9953 case DIV:
9954 case MOD:
9955 return 100;
9956
9957 case TRUNCATE:
9958 return 99;
9959
9960 case AND:
9961 case XOR:
9962 case IOR:
9963 return COSTS_N_INSNS (1);
9964
9965 case MEM:
9966 return (COSTS_N_INSNS (1)
9967 + COSTS_N_INSNS (1)
9968 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9969 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9970 ? COSTS_N_INSNS (1) : 0));
9971
9972 case IF_THEN_ELSE:
9973 /* XXX a guess. */
9974 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9975 return 14;
9976 return 2;
9977
9978 case ZERO_EXTEND:
9979 /* XXX still guessing. */
9980 switch (GET_MODE (XEXP (x, 0)))
9981 {
9982 case E_QImode:
9983 return (1 + (mode == DImode ? 4 : 0)
9984 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9985
9986 case E_HImode:
9987 return (4 + (mode == DImode ? 4 : 0)
9988 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9989
9990 case E_SImode:
9991 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9992
9993 default:
9994 return 99;
9995 }
9996
9997 default:
9998 return 99;
9999 }
10000 }
10001
10002 /* Helper function for arm_rtx_costs. If one operand of the OP, a
10003 PLUS, adds the carry flag, then return the other operand. If
10004 neither is a carry, return OP unchanged. */
10005 static rtx
10006 strip_carry_operation (rtx op)
10007 {
10008 gcc_assert (GET_CODE (op) == PLUS);
10009 if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
10010 return XEXP (op, 1);
10011 else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
10012 return XEXP (op, 0);
10013 return op;
10014 }
10015
10016 /* Helper function for arm_rtx_costs. If the operand is a valid shift
10017 operand, then return the operand that is being shifted. If the shift
10018 is not by a constant, then set SHIFT_REG to point to the operand.
10019 Return NULL if OP is not a shifter operand. */
10020 static rtx
10021 shifter_op_p (rtx op, rtx *shift_reg)
10022 {
10023 enum rtx_code code = GET_CODE (op);
10024
10025 if (code == MULT && CONST_INT_P (XEXP (op, 1))
10026 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
10027 return XEXP (op, 0);
10028 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
10029 return XEXP (op, 0);
10030 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
10031 || code == ASHIFTRT)
10032 {
10033 if (!CONST_INT_P (XEXP (op, 1)))
10034 *shift_reg = XEXP (op, 1);
10035 return XEXP (op, 0);
10036 }
10037
10038 return NULL;
10039 }
10040
10041 static bool
10042 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
10043 {
10044 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
10045 rtx_code code = GET_CODE (x);
10046 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
10047
10048 switch (XINT (x, 1))
10049 {
10050 case UNSPEC_UNALIGNED_LOAD:
10051 /* We can only do unaligned loads into the integer unit, and we can't
10052 use LDM or LDRD. */
10053 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10054 if (speed_p)
10055 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
10056 + extra_cost->ldst.load_unaligned);
10057
10058 #ifdef NOT_YET
10059 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10060 ADDR_SPACE_GENERIC, speed_p);
10061 #endif
10062 return true;
10063
10064 case UNSPEC_UNALIGNED_STORE:
10065 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10066 if (speed_p)
10067 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
10068 + extra_cost->ldst.store_unaligned);
10069
10070 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
10071 #ifdef NOT_YET
10072 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10073 ADDR_SPACE_GENERIC, speed_p);
10074 #endif
10075 return true;
10076
10077 case UNSPEC_VRINTZ:
10078 case UNSPEC_VRINTP:
10079 case UNSPEC_VRINTM:
10080 case UNSPEC_VRINTR:
10081 case UNSPEC_VRINTX:
10082 case UNSPEC_VRINTA:
10083 if (speed_p)
10084 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
10085
10086 return true;
10087 default:
10088 *cost = COSTS_N_INSNS (2);
10089 break;
10090 }
10091 return true;
10092 }
10093
10094 /* Cost of a libcall. We assume one insn per argument, an amount for the
10095 call (one insn for -Os) and then one for processing the result. */
10096 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10097
10098 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
10099 do \
10100 { \
10101 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
10102 if (shift_op != NULL \
10103 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
10104 { \
10105 if (shift_reg) \
10106 { \
10107 if (speed_p) \
10108 *cost += extra_cost->alu.arith_shift_reg; \
10109 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10110 ASHIFT, 1, speed_p); \
10111 } \
10112 else if (speed_p) \
10113 *cost += extra_cost->alu.arith_shift; \
10114 \
10115 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
10116 ASHIFT, 0, speed_p) \
10117 + rtx_cost (XEXP (x, 1 - IDX), \
10118 GET_MODE (shift_op), \
10119 OP, 1, speed_p)); \
10120 return true; \
10121 } \
10122 } \
10123 while (0)
10124
10125 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
10126 considering the costs of the addressing mode and memory access
10127 separately. */
10128 static bool
10129 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
10130 int *cost, bool speed_p)
10131 {
10132 machine_mode mode = GET_MODE (x);
10133
10134 *cost = COSTS_N_INSNS (1);
10135
10136 if (flag_pic
10137 && GET_CODE (XEXP (x, 0)) == PLUS
10138 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
10139 /* This will be split into two instructions. Add the cost of the
10140 additional instruction here. The cost of the memory access is computed
10141 below. See arm.md:calculate_pic_address. */
10142 *cost += COSTS_N_INSNS (1);
10143
10144 /* Calculate cost of the addressing mode. */
10145 if (speed_p)
10146 {
10147 arm_addr_mode_op op_type;
10148 switch (GET_CODE (XEXP (x, 0)))
10149 {
10150 default:
10151 case REG:
10152 op_type = AMO_DEFAULT;
10153 break;
10154 case MINUS:
10155 /* MINUS does not appear in RTL, but the architecture supports it,
10156 so handle this case defensively. */
10157 /* fall through */
10158 case PLUS:
10159 op_type = AMO_NO_WB;
10160 break;
10161 case PRE_INC:
10162 case PRE_DEC:
10163 case POST_INC:
10164 case POST_DEC:
10165 case PRE_MODIFY:
10166 case POST_MODIFY:
10167 op_type = AMO_WB;
10168 break;
10169 }
10170
10171 if (VECTOR_MODE_P (mode))
10172 *cost += current_tune->addr_mode_costs->vector[op_type];
10173 else if (FLOAT_MODE_P (mode))
10174 *cost += current_tune->addr_mode_costs->fp[op_type];
10175 else
10176 *cost += current_tune->addr_mode_costs->integer[op_type];
10177 }
10178
10179 /* Calculate cost of memory access. */
10180 if (speed_p)
10181 {
10182 if (FLOAT_MODE_P (mode))
10183 {
10184 if (GET_MODE_SIZE (mode) == 8)
10185 *cost += extra_cost->ldst.loadd;
10186 else
10187 *cost += extra_cost->ldst.loadf;
10188 }
10189 else if (VECTOR_MODE_P (mode))
10190 *cost += extra_cost->ldst.loadv;
10191 else
10192 {
10193 /* Integer modes */
10194 if (GET_MODE_SIZE (mode) == 8)
10195 *cost += extra_cost->ldst.ldrd;
10196 else
10197 *cost += extra_cost->ldst.load;
10198 }
10199 }
10200
10201 return true;
10202 }
10203
10204 /* Helper for arm_bfi_p. */
10205 static bool
10206 arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
10207 {
10208 unsigned HOST_WIDE_INT const1;
10209 unsigned HOST_WIDE_INT const2 = 0;
10210
10211 if (!CONST_INT_P (XEXP (op0, 1)))
10212 return false;
10213
10214 const1 = UINTVAL (XEXP (op0, 1));
10215 if (!CONST_INT_P (XEXP (op1, 1))
10216 || ~UINTVAL (XEXP (op1, 1)) != const1)
10217 return false;
10218
10219 if (GET_CODE (XEXP (op0, 0)) == ASHIFT
10220 && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
10221 {
10222 const2 = UINTVAL (XEXP (XEXP (op0, 0), 1));
10223 *sub0 = XEXP (XEXP (op0, 0), 0);
10224 }
10225 else
10226 *sub0 = XEXP (op0, 0);
10227
10228 if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
10229 return false;
10230
10231 *sub1 = XEXP (op1, 0);
10232 return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
10233 }
10234
10235 /* Recognize a BFI idiom. Helper for arm_rtx_costs_internal. The
10236 format looks something like:
10237
10238 (IOR (AND (reg1) (~const1))
10239 (AND (ASHIFT (reg2) (const2))
10240 (const1)))
10241
10242 where const1 is a consecutive sequence of 1-bits with the
10243 least-significant non-zero bit starting at bit position const2. If
10244 const2 is zero, then the shift will not appear at all, due to
10245 canonicalization. The two arms of the IOR expression may be
10246 flipped. */
10247 static bool
10248 arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
10249 {
10250 if (GET_CODE (x) != IOR)
10251 return false;
10252 if (GET_CODE (XEXP (x, 0)) != AND
10253 || GET_CODE (XEXP (x, 1)) != AND)
10254 return false;
10255 return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
10256 || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
10257 }
10258
10259 /* RTX costs. Make an estimate of the cost of executing the operation
10260 X, which is contained within an operation with code OUTER_CODE.
10261 SPEED_P indicates whether the cost desired is the performance cost,
10262 or the size cost. The estimate is stored in COST and the return
10263 value is TRUE if the cost calculation is final, or FALSE if the
10264 caller should recurse through the operands of X to add additional
10265 costs.
10266
10267 We currently make no attempt to model the size savings of Thumb-2
10268 16-bit instructions. At the normal points in compilation where
10269 this code is called we have no measure of whether the condition
10270 flags are live or not, and thus no realistic way to determine what
10271 the size will eventually be. */
10272 static bool
10273 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10274 const struct cpu_cost_table *extra_cost,
10275 int *cost, bool speed_p)
10276 {
10277 machine_mode mode = GET_MODE (x);
10278
10279 *cost = COSTS_N_INSNS (1);
10280
10281 if (TARGET_THUMB1)
10282 {
10283 if (speed_p)
10284 *cost = thumb1_rtx_costs (x, code, outer_code);
10285 else
10286 *cost = thumb1_size_rtx_costs (x, code, outer_code);
10287 return true;
10288 }
10289
10290 switch (code)
10291 {
10292 case SET:
10293 *cost = 0;
10294 /* SET RTXs don't have a mode so we get it from the destination. */
10295 mode = GET_MODE (SET_DEST (x));
10296
10297 if (REG_P (SET_SRC (x))
10298 && REG_P (SET_DEST (x)))
10299 {
10300 /* Assume that most copies can be done with a single insn,
10301 unless we don't have HW FP, in which case everything
10302 larger than word mode will require two insns. */
10303 *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10304 && GET_MODE_SIZE (mode) > 4)
10305 || mode == DImode)
10306 ? 2 : 1);
10307 /* Conditional register moves can be encoded
10308 in 16 bits in Thumb mode. */
10309 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10310 *cost >>= 1;
10311
10312 return true;
10313 }
10314
10315 if (CONST_INT_P (SET_SRC (x)))
10316 {
10317 /* Handle CONST_INT here, since the value doesn't have a mode
10318 and we would otherwise be unable to work out the true cost. */
10319 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10320 0, speed_p);
10321 outer_code = SET;
10322 /* Slightly lower the cost of setting a core reg to a constant.
10323 This helps break up chains and allows for better scheduling. */
10324 if (REG_P (SET_DEST (x))
10325 && REGNO (SET_DEST (x)) <= LR_REGNUM)
10326 *cost -= 1;
10327 x = SET_SRC (x);
10328 /* Immediate moves with an immediate in the range [0, 255] can be
10329 encoded in 16 bits in Thumb mode. */
10330 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10331 && INTVAL (x) >= 0 && INTVAL (x) <=255)
10332 *cost >>= 1;
10333 goto const_int_cost;
10334 }
10335
10336 return false;
10337
10338 case MEM:
10339 return arm_mem_costs (x, extra_cost, cost, speed_p);
10340
10341 case PARALLEL:
10342 {
10343 /* Calculations of LDM costs are complex. We assume an initial cost
10344 (ldm_1st) which will load the number of registers mentioned in
10345 ldm_regs_per_insn_1st registers; then each additional
10346 ldm_regs_per_insn_subsequent registers cost one more insn. The
10347 formula for N regs is thus:
10348
10349 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10350 + ldm_regs_per_insn_subsequent - 1)
10351 / ldm_regs_per_insn_subsequent).
10352
10353 Additional costs may also be added for addressing. A similar
10354 formula is used for STM. */
10355
10356 bool is_ldm = load_multiple_operation (x, SImode);
10357 bool is_stm = store_multiple_operation (x, SImode);
10358
10359 if (is_ldm || is_stm)
10360 {
10361 if (speed_p)
10362 {
10363 HOST_WIDE_INT nregs = XVECLEN (x, 0);
10364 HOST_WIDE_INT regs_per_insn_1st = is_ldm
10365 ? extra_cost->ldst.ldm_regs_per_insn_1st
10366 : extra_cost->ldst.stm_regs_per_insn_1st;
10367 HOST_WIDE_INT regs_per_insn_sub = is_ldm
10368 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10369 : extra_cost->ldst.stm_regs_per_insn_subsequent;
10370
10371 *cost += regs_per_insn_1st
10372 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10373 + regs_per_insn_sub - 1)
10374 / regs_per_insn_sub);
10375 return true;
10376 }
10377
10378 }
10379 return false;
10380 }
10381 case DIV:
10382 case UDIV:
10383 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10384 && (mode == SFmode || !TARGET_VFP_SINGLE))
10385 *cost += COSTS_N_INSNS (speed_p
10386 ? extra_cost->fp[mode != SFmode].div : 0);
10387 else if (mode == SImode && TARGET_IDIV)
10388 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10389 else
10390 *cost = LIBCALL_COST (2);
10391
10392 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10393 possible udiv is prefered. */
10394 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10395 return false; /* All arguments must be in registers. */
10396
10397 case MOD:
10398 /* MOD by a power of 2 can be expanded as:
10399 rsbs r1, r0, #0
10400 and r0, r0, #(n - 1)
10401 and r1, r1, #(n - 1)
10402 rsbpl r0, r1, #0. */
10403 if (CONST_INT_P (XEXP (x, 1))
10404 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10405 && mode == SImode)
10406 {
10407 *cost += COSTS_N_INSNS (3);
10408
10409 if (speed_p)
10410 *cost += 2 * extra_cost->alu.logical
10411 + extra_cost->alu.arith;
10412 return true;
10413 }
10414
10415 /* Fall-through. */
10416 case UMOD:
10417 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10418 possible udiv is prefered. */
10419 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10420 return false; /* All arguments must be in registers. */
10421
10422 case ROTATE:
10423 if (mode == SImode && REG_P (XEXP (x, 1)))
10424 {
10425 *cost += (COSTS_N_INSNS (1)
10426 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10427 if (speed_p)
10428 *cost += extra_cost->alu.shift_reg;
10429 return true;
10430 }
10431 /* Fall through */
10432 case ROTATERT:
10433 case ASHIFT:
10434 case LSHIFTRT:
10435 case ASHIFTRT:
10436 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10437 {
10438 *cost += (COSTS_N_INSNS (2)
10439 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10440 if (speed_p)
10441 *cost += 2 * extra_cost->alu.shift;
10442 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10443 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10444 *cost += 1;
10445 return true;
10446 }
10447 else if (mode == SImode)
10448 {
10449 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10450 /* Slightly disparage register shifts at -Os, but not by much. */
10451 if (!CONST_INT_P (XEXP (x, 1)))
10452 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10453 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10454 return true;
10455 }
10456 else if (GET_MODE_CLASS (mode) == MODE_INT
10457 && GET_MODE_SIZE (mode) < 4)
10458 {
10459 if (code == ASHIFT)
10460 {
10461 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10462 /* Slightly disparage register shifts at -Os, but not by
10463 much. */
10464 if (!CONST_INT_P (XEXP (x, 1)))
10465 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10466 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10467 }
10468 else if (code == LSHIFTRT || code == ASHIFTRT)
10469 {
10470 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10471 {
10472 /* Can use SBFX/UBFX. */
10473 if (speed_p)
10474 *cost += extra_cost->alu.bfx;
10475 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10476 }
10477 else
10478 {
10479 *cost += COSTS_N_INSNS (1);
10480 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10481 if (speed_p)
10482 {
10483 if (CONST_INT_P (XEXP (x, 1)))
10484 *cost += 2 * extra_cost->alu.shift;
10485 else
10486 *cost += (extra_cost->alu.shift
10487 + extra_cost->alu.shift_reg);
10488 }
10489 else
10490 /* Slightly disparage register shifts. */
10491 *cost += !CONST_INT_P (XEXP (x, 1));
10492 }
10493 }
10494 else /* Rotates. */
10495 {
10496 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10497 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10498 if (speed_p)
10499 {
10500 if (CONST_INT_P (XEXP (x, 1)))
10501 *cost += (2 * extra_cost->alu.shift
10502 + extra_cost->alu.log_shift);
10503 else
10504 *cost += (extra_cost->alu.shift
10505 + extra_cost->alu.shift_reg
10506 + extra_cost->alu.log_shift_reg);
10507 }
10508 }
10509 return true;
10510 }
10511
10512 *cost = LIBCALL_COST (2);
10513 return false;
10514
10515 case BSWAP:
10516 if (arm_arch6)
10517 {
10518 if (mode == SImode)
10519 {
10520 if (speed_p)
10521 *cost += extra_cost->alu.rev;
10522
10523 return false;
10524 }
10525 }
10526 else
10527 {
10528 /* No rev instruction available. Look at arm_legacy_rev
10529 and thumb_legacy_rev for the form of RTL used then. */
10530 if (TARGET_THUMB)
10531 {
10532 *cost += COSTS_N_INSNS (9);
10533
10534 if (speed_p)
10535 {
10536 *cost += 6 * extra_cost->alu.shift;
10537 *cost += 3 * extra_cost->alu.logical;
10538 }
10539 }
10540 else
10541 {
10542 *cost += COSTS_N_INSNS (4);
10543
10544 if (speed_p)
10545 {
10546 *cost += 2 * extra_cost->alu.shift;
10547 *cost += extra_cost->alu.arith_shift;
10548 *cost += 2 * extra_cost->alu.logical;
10549 }
10550 }
10551 return true;
10552 }
10553 return false;
10554
10555 case MINUS:
10556 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10557 && (mode == SFmode || !TARGET_VFP_SINGLE))
10558 {
10559 if (GET_CODE (XEXP (x, 0)) == MULT
10560 || GET_CODE (XEXP (x, 1)) == MULT)
10561 {
10562 rtx mul_op0, mul_op1, sub_op;
10563
10564 if (speed_p)
10565 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10566
10567 if (GET_CODE (XEXP (x, 0)) == MULT)
10568 {
10569 mul_op0 = XEXP (XEXP (x, 0), 0);
10570 mul_op1 = XEXP (XEXP (x, 0), 1);
10571 sub_op = XEXP (x, 1);
10572 }
10573 else
10574 {
10575 mul_op0 = XEXP (XEXP (x, 1), 0);
10576 mul_op1 = XEXP (XEXP (x, 1), 1);
10577 sub_op = XEXP (x, 0);
10578 }
10579
10580 /* The first operand of the multiply may be optionally
10581 negated. */
10582 if (GET_CODE (mul_op0) == NEG)
10583 mul_op0 = XEXP (mul_op0, 0);
10584
10585 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10586 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10587 + rtx_cost (sub_op, mode, code, 0, speed_p));
10588
10589 return true;
10590 }
10591
10592 if (speed_p)
10593 *cost += extra_cost->fp[mode != SFmode].addsub;
10594 return false;
10595 }
10596
10597 if (mode == SImode)
10598 {
10599 rtx shift_by_reg = NULL;
10600 rtx shift_op;
10601 rtx non_shift_op;
10602 rtx op0 = XEXP (x, 0);
10603 rtx op1 = XEXP (x, 1);
10604
10605 /* Factor out any borrow operation. There's more than one way
10606 of expressing this; try to recognize them all. */
10607 if (GET_CODE (op0) == MINUS)
10608 {
10609 if (arm_borrow_operation (op1, SImode))
10610 {
10611 op1 = XEXP (op0, 1);
10612 op0 = XEXP (op0, 0);
10613 }
10614 else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10615 op0 = XEXP (op0, 0);
10616 }
10617 else if (GET_CODE (op1) == PLUS
10618 && arm_borrow_operation (XEXP (op1, 0), SImode))
10619 op1 = XEXP (op1, 0);
10620 else if (GET_CODE (op0) == NEG
10621 && arm_borrow_operation (op1, SImode))
10622 {
10623 /* Negate with carry-in. For Thumb2 this is done with
10624 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10625 RSC instruction that exists in Arm mode. */
10626 if (speed_p)
10627 *cost += (TARGET_THUMB2
10628 ? extra_cost->alu.arith_shift
10629 : extra_cost->alu.arith);
10630 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10631 return true;
10632 }
10633 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10634 Note we do mean ~borrow here. */
10635 else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10636 {
10637 *cost += rtx_cost (op1, mode, code, 1, speed_p);
10638 return true;
10639 }
10640
10641 shift_op = shifter_op_p (op0, &shift_by_reg);
10642 if (shift_op == NULL)
10643 {
10644 shift_op = shifter_op_p (op1, &shift_by_reg);
10645 non_shift_op = op0;
10646 }
10647 else
10648 non_shift_op = op1;
10649
10650 if (shift_op != NULL)
10651 {
10652 if (shift_by_reg != NULL)
10653 {
10654 if (speed_p)
10655 *cost += extra_cost->alu.arith_shift_reg;
10656 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10657 }
10658 else if (speed_p)
10659 *cost += extra_cost->alu.arith_shift;
10660
10661 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10662 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10663 return true;
10664 }
10665
10666 if (arm_arch_thumb2
10667 && GET_CODE (XEXP (x, 1)) == MULT)
10668 {
10669 /* MLS. */
10670 if (speed_p)
10671 *cost += extra_cost->mult[0].add;
10672 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10673 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10674 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10675 return true;
10676 }
10677
10678 if (CONST_INT_P (op0))
10679 {
10680 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10681 INTVAL (op0), NULL_RTX,
10682 NULL_RTX, 1, 0);
10683 *cost = COSTS_N_INSNS (insns);
10684 if (speed_p)
10685 *cost += insns * extra_cost->alu.arith;
10686 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10687 return true;
10688 }
10689 else if (speed_p)
10690 *cost += extra_cost->alu.arith;
10691
10692 /* Don't recurse as we don't want to cost any borrow that
10693 we've stripped. */
10694 *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10695 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10696 return true;
10697 }
10698
10699 if (GET_MODE_CLASS (mode) == MODE_INT
10700 && GET_MODE_SIZE (mode) < 4)
10701 {
10702 rtx shift_op, shift_reg;
10703 shift_reg = NULL;
10704
10705 /* We check both sides of the MINUS for shifter operands since,
10706 unlike PLUS, it's not commutative. */
10707
10708 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10709 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10710
10711 /* Slightly disparage, as we might need to widen the result. */
10712 *cost += 1;
10713 if (speed_p)
10714 *cost += extra_cost->alu.arith;
10715
10716 if (CONST_INT_P (XEXP (x, 0)))
10717 {
10718 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10719 return true;
10720 }
10721
10722 return false;
10723 }
10724
10725 if (mode == DImode)
10726 {
10727 *cost += COSTS_N_INSNS (1);
10728
10729 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10730 {
10731 rtx op1 = XEXP (x, 1);
10732
10733 if (speed_p)
10734 *cost += 2 * extra_cost->alu.arith;
10735
10736 if (GET_CODE (op1) == ZERO_EXTEND)
10737 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10738 0, speed_p);
10739 else
10740 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10741 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10742 0, speed_p);
10743 return true;
10744 }
10745 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10746 {
10747 if (speed_p)
10748 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10749 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10750 0, speed_p)
10751 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10752 return true;
10753 }
10754 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10755 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10756 {
10757 if (speed_p)
10758 *cost += (extra_cost->alu.arith
10759 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10760 ? extra_cost->alu.arith
10761 : extra_cost->alu.arith_shift));
10762 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10763 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10764 GET_CODE (XEXP (x, 1)), 0, speed_p));
10765 return true;
10766 }
10767
10768 if (speed_p)
10769 *cost += 2 * extra_cost->alu.arith;
10770 return false;
10771 }
10772
10773 /* Vector mode? */
10774
10775 *cost = LIBCALL_COST (2);
10776 return false;
10777
10778 case PLUS:
10779 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10780 && (mode == SFmode || !TARGET_VFP_SINGLE))
10781 {
10782 if (GET_CODE (XEXP (x, 0)) == MULT)
10783 {
10784 rtx mul_op0, mul_op1, add_op;
10785
10786 if (speed_p)
10787 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10788
10789 mul_op0 = XEXP (XEXP (x, 0), 0);
10790 mul_op1 = XEXP (XEXP (x, 0), 1);
10791 add_op = XEXP (x, 1);
10792
10793 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10794 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10795 + rtx_cost (add_op, mode, code, 0, speed_p));
10796
10797 return true;
10798 }
10799
10800 if (speed_p)
10801 *cost += extra_cost->fp[mode != SFmode].addsub;
10802 return false;
10803 }
10804 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10805 {
10806 *cost = LIBCALL_COST (2);
10807 return false;
10808 }
10809
10810 /* Narrow modes can be synthesized in SImode, but the range
10811 of useful sub-operations is limited. Check for shift operations
10812 on one of the operands. Only left shifts can be used in the
10813 narrow modes. */
10814 if (GET_MODE_CLASS (mode) == MODE_INT
10815 && GET_MODE_SIZE (mode) < 4)
10816 {
10817 rtx shift_op, shift_reg;
10818 shift_reg = NULL;
10819
10820 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10821
10822 if (CONST_INT_P (XEXP (x, 1)))
10823 {
10824 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10825 INTVAL (XEXP (x, 1)), NULL_RTX,
10826 NULL_RTX, 1, 0);
10827 *cost = COSTS_N_INSNS (insns);
10828 if (speed_p)
10829 *cost += insns * extra_cost->alu.arith;
10830 /* Slightly penalize a narrow operation as the result may
10831 need widening. */
10832 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10833 return true;
10834 }
10835
10836 /* Slightly penalize a narrow operation as the result may
10837 need widening. */
10838 *cost += 1;
10839 if (speed_p)
10840 *cost += extra_cost->alu.arith;
10841
10842 return false;
10843 }
10844
10845 if (mode == SImode)
10846 {
10847 rtx shift_op, shift_reg;
10848
10849 if (TARGET_INT_SIMD
10850 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10851 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10852 {
10853 /* UXTA[BH] or SXTA[BH]. */
10854 if (speed_p)
10855 *cost += extra_cost->alu.extend_arith;
10856 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10857 0, speed_p)
10858 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10859 return true;
10860 }
10861
10862 rtx op0 = XEXP (x, 0);
10863 rtx op1 = XEXP (x, 1);
10864
10865 /* Handle a side effect of adding in the carry to an addition. */
10866 if (GET_CODE (op0) == PLUS
10867 && arm_carry_operation (op1, mode))
10868 {
10869 op1 = XEXP (op0, 1);
10870 op0 = XEXP (op0, 0);
10871 }
10872 else if (GET_CODE (op1) == PLUS
10873 && arm_carry_operation (op0, mode))
10874 {
10875 op0 = XEXP (op1, 0);
10876 op1 = XEXP (op1, 1);
10877 }
10878 else if (GET_CODE (op0) == PLUS)
10879 {
10880 op0 = strip_carry_operation (op0);
10881 if (swap_commutative_operands_p (op0, op1))
10882 std::swap (op0, op1);
10883 }
10884
10885 if (arm_carry_operation (op0, mode))
10886 {
10887 /* Adding the carry to a register is a canonicalization of
10888 adding 0 to the register plus the carry. */
10889 if (speed_p)
10890 *cost += extra_cost->alu.arith;
10891 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10892 return true;
10893 }
10894
10895 shift_reg = NULL;
10896 shift_op = shifter_op_p (op0, &shift_reg);
10897 if (shift_op != NULL)
10898 {
10899 if (shift_reg)
10900 {
10901 if (speed_p)
10902 *cost += extra_cost->alu.arith_shift_reg;
10903 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10904 }
10905 else if (speed_p)
10906 *cost += extra_cost->alu.arith_shift;
10907
10908 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10909 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10910 return true;
10911 }
10912
10913 if (GET_CODE (op0) == MULT)
10914 {
10915 rtx mul_op = op0;
10916
10917 if (TARGET_DSP_MULTIPLY
10918 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10919 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10920 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10921 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10922 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10923 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10924 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10925 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10926 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10927 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10928 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10929 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10930 == 16))))))
10931 {
10932 /* SMLA[BT][BT]. */
10933 if (speed_p)
10934 *cost += extra_cost->mult[0].extend_add;
10935 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10936 SIGN_EXTEND, 0, speed_p)
10937 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10938 SIGN_EXTEND, 0, speed_p)
10939 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10940 return true;
10941 }
10942
10943 if (speed_p)
10944 *cost += extra_cost->mult[0].add;
10945 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10946 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10947 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10948 return true;
10949 }
10950
10951 if (CONST_INT_P (op1))
10952 {
10953 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10954 INTVAL (op1), NULL_RTX,
10955 NULL_RTX, 1, 0);
10956 *cost = COSTS_N_INSNS (insns);
10957 if (speed_p)
10958 *cost += insns * extra_cost->alu.arith;
10959 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10960 return true;
10961 }
10962
10963 if (speed_p)
10964 *cost += extra_cost->alu.arith;
10965
10966 /* Don't recurse here because we want to test the operands
10967 without any carry operation. */
10968 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10969 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10970 return true;
10971 }
10972
10973 if (mode == DImode)
10974 {
10975 if (GET_CODE (XEXP (x, 0)) == MULT
10976 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10977 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10978 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10979 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10980 {
10981 if (speed_p)
10982 *cost += extra_cost->mult[1].extend_add;
10983 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10984 ZERO_EXTEND, 0, speed_p)
10985 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10986 ZERO_EXTEND, 0, speed_p)
10987 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10988 return true;
10989 }
10990
10991 *cost += COSTS_N_INSNS (1);
10992
10993 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10994 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10995 {
10996 if (speed_p)
10997 *cost += (extra_cost->alu.arith
10998 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10999 ? extra_cost->alu.arith
11000 : extra_cost->alu.arith_shift));
11001
11002 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
11003 0, speed_p)
11004 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11005 return true;
11006 }
11007
11008 if (speed_p)
11009 *cost += 2 * extra_cost->alu.arith;
11010 return false;
11011 }
11012
11013 /* Vector mode? */
11014 *cost = LIBCALL_COST (2);
11015 return false;
11016 case IOR:
11017 {
11018 rtx sub0, sub1;
11019 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
11020 {
11021 if (speed_p)
11022 *cost += extra_cost->alu.rev;
11023
11024 return true;
11025 }
11026 else if (mode == SImode && arm_arch_thumb2
11027 && arm_bfi_p (x, &sub0, &sub1))
11028 {
11029 *cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
11030 *cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
11031 if (speed_p)
11032 *cost += extra_cost->alu.bfi;
11033
11034 return true;
11035 }
11036 }
11037
11038 /* Fall through. */
11039 case AND: case XOR:
11040 if (mode == SImode)
11041 {
11042 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
11043 rtx op0 = XEXP (x, 0);
11044 rtx shift_op, shift_reg;
11045
11046 if (subcode == NOT
11047 && (code == AND
11048 || (code == IOR && TARGET_THUMB2)))
11049 op0 = XEXP (op0, 0);
11050
11051 shift_reg = NULL;
11052 shift_op = shifter_op_p (op0, &shift_reg);
11053 if (shift_op != NULL)
11054 {
11055 if (shift_reg)
11056 {
11057 if (speed_p)
11058 *cost += extra_cost->alu.log_shift_reg;
11059 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11060 }
11061 else if (speed_p)
11062 *cost += extra_cost->alu.log_shift;
11063
11064 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11065 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11066 return true;
11067 }
11068
11069 if (CONST_INT_P (XEXP (x, 1)))
11070 {
11071 int insns = arm_gen_constant (code, SImode, NULL_RTX,
11072 INTVAL (XEXP (x, 1)), NULL_RTX,
11073 NULL_RTX, 1, 0);
11074
11075 *cost = COSTS_N_INSNS (insns);
11076 if (speed_p)
11077 *cost += insns * extra_cost->alu.logical;
11078 *cost += rtx_cost (op0, mode, code, 0, speed_p);
11079 return true;
11080 }
11081
11082 if (speed_p)
11083 *cost += extra_cost->alu.logical;
11084 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
11085 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11086 return true;
11087 }
11088
11089 if (mode == DImode)
11090 {
11091 rtx op0 = XEXP (x, 0);
11092 enum rtx_code subcode = GET_CODE (op0);
11093
11094 *cost += COSTS_N_INSNS (1);
11095
11096 if (subcode == NOT
11097 && (code == AND
11098 || (code == IOR && TARGET_THUMB2)))
11099 op0 = XEXP (op0, 0);
11100
11101 if (GET_CODE (op0) == ZERO_EXTEND)
11102 {
11103 if (speed_p)
11104 *cost += 2 * extra_cost->alu.logical;
11105
11106 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
11107 0, speed_p)
11108 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11109 return true;
11110 }
11111 else if (GET_CODE (op0) == SIGN_EXTEND)
11112 {
11113 if (speed_p)
11114 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
11115
11116 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
11117 0, speed_p)
11118 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11119 return true;
11120 }
11121
11122 if (speed_p)
11123 *cost += 2 * extra_cost->alu.logical;
11124
11125 return true;
11126 }
11127 /* Vector mode? */
11128
11129 *cost = LIBCALL_COST (2);
11130 return false;
11131
11132 case MULT:
11133 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11134 && (mode == SFmode || !TARGET_VFP_SINGLE))
11135 {
11136 rtx op0 = XEXP (x, 0);
11137
11138 if (GET_CODE (op0) == NEG && !flag_rounding_math)
11139 op0 = XEXP (op0, 0);
11140
11141 if (speed_p)
11142 *cost += extra_cost->fp[mode != SFmode].mult;
11143
11144 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
11145 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
11146 return true;
11147 }
11148 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11149 {
11150 *cost = LIBCALL_COST (2);
11151 return false;
11152 }
11153
11154 if (mode == SImode)
11155 {
11156 if (TARGET_DSP_MULTIPLY
11157 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11158 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11159 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11160 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11161 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
11162 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11163 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11164 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
11165 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11166 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11167 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11168 && (INTVAL (XEXP (XEXP (x, 1), 1))
11169 == 16))))))
11170 {
11171 /* SMUL[TB][TB]. */
11172 if (speed_p)
11173 *cost += extra_cost->mult[0].extend;
11174 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
11175 SIGN_EXTEND, 0, speed_p);
11176 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
11177 SIGN_EXTEND, 1, speed_p);
11178 return true;
11179 }
11180 if (speed_p)
11181 *cost += extra_cost->mult[0].simple;
11182 return false;
11183 }
11184
11185 if (mode == DImode)
11186 {
11187 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11188 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
11189 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11190 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
11191 {
11192 if (speed_p)
11193 *cost += extra_cost->mult[1].extend;
11194 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
11195 ZERO_EXTEND, 0, speed_p)
11196 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
11197 ZERO_EXTEND, 0, speed_p));
11198 return true;
11199 }
11200
11201 *cost = LIBCALL_COST (2);
11202 return false;
11203 }
11204
11205 /* Vector mode? */
11206 *cost = LIBCALL_COST (2);
11207 return false;
11208
11209 case NEG:
11210 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11211 && (mode == SFmode || !TARGET_VFP_SINGLE))
11212 {
11213 if (GET_CODE (XEXP (x, 0)) == MULT)
11214 {
11215 /* VNMUL. */
11216 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11217 return true;
11218 }
11219
11220 if (speed_p)
11221 *cost += extra_cost->fp[mode != SFmode].neg;
11222
11223 return false;
11224 }
11225 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11226 {
11227 *cost = LIBCALL_COST (1);
11228 return false;
11229 }
11230
11231 if (mode == SImode)
11232 {
11233 if (GET_CODE (XEXP (x, 0)) == ABS)
11234 {
11235 *cost += COSTS_N_INSNS (1);
11236 /* Assume the non-flag-changing variant. */
11237 if (speed_p)
11238 *cost += (extra_cost->alu.log_shift
11239 + extra_cost->alu.arith_shift);
11240 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11241 return true;
11242 }
11243
11244 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11245 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11246 {
11247 *cost += COSTS_N_INSNS (1);
11248 /* No extra cost for MOV imm and MVN imm. */
11249 /* If the comparison op is using the flags, there's no further
11250 cost, otherwise we need to add the cost of the comparison. */
11251 if (!(REG_P (XEXP (XEXP (x, 0), 0))
11252 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11253 && XEXP (XEXP (x, 0), 1) == const0_rtx))
11254 {
11255 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11256 *cost += (COSTS_N_INSNS (1)
11257 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11258 0, speed_p)
11259 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11260 1, speed_p));
11261 if (speed_p)
11262 *cost += extra_cost->alu.arith;
11263 }
11264 return true;
11265 }
11266
11267 if (speed_p)
11268 *cost += extra_cost->alu.arith;
11269 return false;
11270 }
11271
11272 if (GET_MODE_CLASS (mode) == MODE_INT
11273 && GET_MODE_SIZE (mode) < 4)
11274 {
11275 /* Slightly disparage, as we might need an extend operation. */
11276 *cost += 1;
11277 if (speed_p)
11278 *cost += extra_cost->alu.arith;
11279 return false;
11280 }
11281
11282 if (mode == DImode)
11283 {
11284 *cost += COSTS_N_INSNS (1);
11285 if (speed_p)
11286 *cost += 2 * extra_cost->alu.arith;
11287 return false;
11288 }
11289
11290 /* Vector mode? */
11291 *cost = LIBCALL_COST (1);
11292 return false;
11293
11294 case NOT:
11295 if (mode == SImode)
11296 {
11297 rtx shift_op;
11298 rtx shift_reg = NULL;
11299
11300 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11301
11302 if (shift_op)
11303 {
11304 if (shift_reg != NULL)
11305 {
11306 if (speed_p)
11307 *cost += extra_cost->alu.log_shift_reg;
11308 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11309 }
11310 else if (speed_p)
11311 *cost += extra_cost->alu.log_shift;
11312 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11313 return true;
11314 }
11315
11316 if (speed_p)
11317 *cost += extra_cost->alu.logical;
11318 return false;
11319 }
11320 if (mode == DImode)
11321 {
11322 *cost += COSTS_N_INSNS (1);
11323 return false;
11324 }
11325
11326 /* Vector mode? */
11327
11328 *cost += LIBCALL_COST (1);
11329 return false;
11330
11331 case IF_THEN_ELSE:
11332 {
11333 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11334 {
11335 *cost += COSTS_N_INSNS (3);
11336 return true;
11337 }
11338 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11339 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11340
11341 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11342 /* Assume that if one arm of the if_then_else is a register,
11343 that it will be tied with the result and eliminate the
11344 conditional insn. */
11345 if (REG_P (XEXP (x, 1)))
11346 *cost += op2cost;
11347 else if (REG_P (XEXP (x, 2)))
11348 *cost += op1cost;
11349 else
11350 {
11351 if (speed_p)
11352 {
11353 if (extra_cost->alu.non_exec_costs_exec)
11354 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11355 else
11356 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11357 }
11358 else
11359 *cost += op1cost + op2cost;
11360 }
11361 }
11362 return true;
11363
11364 case COMPARE:
11365 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11366 *cost = 0;
11367 else
11368 {
11369 machine_mode op0mode;
11370 /* We'll mostly assume that the cost of a compare is the cost of the
11371 LHS. However, there are some notable exceptions. */
11372
11373 /* Floating point compares are never done as side-effects. */
11374 op0mode = GET_MODE (XEXP (x, 0));
11375 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11376 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11377 {
11378 if (speed_p)
11379 *cost += extra_cost->fp[op0mode != SFmode].compare;
11380
11381 if (XEXP (x, 1) == CONST0_RTX (op0mode))
11382 {
11383 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11384 return true;
11385 }
11386
11387 return false;
11388 }
11389 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11390 {
11391 *cost = LIBCALL_COST (2);
11392 return false;
11393 }
11394
11395 /* DImode compares normally take two insns. */
11396 if (op0mode == DImode)
11397 {
11398 *cost += COSTS_N_INSNS (1);
11399 if (speed_p)
11400 *cost += 2 * extra_cost->alu.arith;
11401 return false;
11402 }
11403
11404 if (op0mode == SImode)
11405 {
11406 rtx shift_op;
11407 rtx shift_reg;
11408
11409 if (XEXP (x, 1) == const0_rtx
11410 && !(REG_P (XEXP (x, 0))
11411 || (GET_CODE (XEXP (x, 0)) == SUBREG
11412 && REG_P (SUBREG_REG (XEXP (x, 0))))))
11413 {
11414 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11415
11416 /* Multiply operations that set the flags are often
11417 significantly more expensive. */
11418 if (speed_p
11419 && GET_CODE (XEXP (x, 0)) == MULT
11420 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11421 *cost += extra_cost->mult[0].flag_setting;
11422
11423 if (speed_p
11424 && GET_CODE (XEXP (x, 0)) == PLUS
11425 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11426 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11427 0), 1), mode))
11428 *cost += extra_cost->mult[0].flag_setting;
11429 return true;
11430 }
11431
11432 shift_reg = NULL;
11433 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11434 if (shift_op != NULL)
11435 {
11436 if (shift_reg != NULL)
11437 {
11438 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11439 1, speed_p);
11440 if (speed_p)
11441 *cost += extra_cost->alu.arith_shift_reg;
11442 }
11443 else if (speed_p)
11444 *cost += extra_cost->alu.arith_shift;
11445 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11446 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11447 return true;
11448 }
11449
11450 if (speed_p)
11451 *cost += extra_cost->alu.arith;
11452 if (CONST_INT_P (XEXP (x, 1))
11453 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11454 {
11455 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11456 return true;
11457 }
11458 return false;
11459 }
11460
11461 /* Vector mode? */
11462
11463 *cost = LIBCALL_COST (2);
11464 return false;
11465 }
11466 return true;
11467
11468 case EQ:
11469 case GE:
11470 case GT:
11471 case LE:
11472 case LT:
11473 /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11474 vcle and vclt). */
11475 if (TARGET_NEON
11476 && TARGET_HARD_FLOAT
11477 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11478 && (XEXP (x, 1) == CONST0_RTX (mode)))
11479 {
11480 *cost = 0;
11481 return true;
11482 }
11483
11484 /* Fall through. */
11485 case NE:
11486 case LTU:
11487 case LEU:
11488 case GEU:
11489 case GTU:
11490 case ORDERED:
11491 case UNORDERED:
11492 case UNEQ:
11493 case UNLE:
11494 case UNLT:
11495 case UNGE:
11496 case UNGT:
11497 case LTGT:
11498 if (outer_code == SET)
11499 {
11500 /* Is it a store-flag operation? */
11501 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11502 && XEXP (x, 1) == const0_rtx)
11503 {
11504 /* Thumb also needs an IT insn. */
11505 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11506 return true;
11507 }
11508 if (XEXP (x, 1) == const0_rtx)
11509 {
11510 switch (code)
11511 {
11512 case LT:
11513 /* LSR Rd, Rn, #31. */
11514 if (speed_p)
11515 *cost += extra_cost->alu.shift;
11516 break;
11517
11518 case EQ:
11519 /* RSBS T1, Rn, #0
11520 ADC Rd, Rn, T1. */
11521
11522 case NE:
11523 /* SUBS T1, Rn, #1
11524 SBC Rd, Rn, T1. */
11525 *cost += COSTS_N_INSNS (1);
11526 break;
11527
11528 case LE:
11529 /* RSBS T1, Rn, Rn, LSR #31
11530 ADC Rd, Rn, T1. */
11531 *cost += COSTS_N_INSNS (1);
11532 if (speed_p)
11533 *cost += extra_cost->alu.arith_shift;
11534 break;
11535
11536 case GT:
11537 /* RSB Rd, Rn, Rn, ASR #1
11538 LSR Rd, Rd, #31. */
11539 *cost += COSTS_N_INSNS (1);
11540 if (speed_p)
11541 *cost += (extra_cost->alu.arith_shift
11542 + extra_cost->alu.shift);
11543 break;
11544
11545 case GE:
11546 /* ASR Rd, Rn, #31
11547 ADD Rd, Rn, #1. */
11548 *cost += COSTS_N_INSNS (1);
11549 if (speed_p)
11550 *cost += extra_cost->alu.shift;
11551 break;
11552
11553 default:
11554 /* Remaining cases are either meaningless or would take
11555 three insns anyway. */
11556 *cost = COSTS_N_INSNS (3);
11557 break;
11558 }
11559 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11560 return true;
11561 }
11562 else
11563 {
11564 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11565 if (CONST_INT_P (XEXP (x, 1))
11566 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11567 {
11568 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11569 return true;
11570 }
11571
11572 return false;
11573 }
11574 }
11575 /* Not directly inside a set. If it involves the condition code
11576 register it must be the condition for a branch, cond_exec or
11577 I_T_E operation. Since the comparison is performed elsewhere
11578 this is just the control part which has no additional
11579 cost. */
11580 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11581 && XEXP (x, 1) == const0_rtx)
11582 {
11583 *cost = 0;
11584 return true;
11585 }
11586 return false;
11587
11588 case ABS:
11589 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11590 && (mode == SFmode || !TARGET_VFP_SINGLE))
11591 {
11592 if (speed_p)
11593 *cost += extra_cost->fp[mode != SFmode].neg;
11594
11595 return false;
11596 }
11597 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11598 {
11599 *cost = LIBCALL_COST (1);
11600 return false;
11601 }
11602
11603 if (mode == SImode)
11604 {
11605 if (speed_p)
11606 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11607 return false;
11608 }
11609 /* Vector mode? */
11610 *cost = LIBCALL_COST (1);
11611 return false;
11612
11613 case SIGN_EXTEND:
11614 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11615 && MEM_P (XEXP (x, 0)))
11616 {
11617 if (mode == DImode)
11618 *cost += COSTS_N_INSNS (1);
11619
11620 if (!speed_p)
11621 return true;
11622
11623 if (GET_MODE (XEXP (x, 0)) == SImode)
11624 *cost += extra_cost->ldst.load;
11625 else
11626 *cost += extra_cost->ldst.load_sign_extend;
11627
11628 if (mode == DImode)
11629 *cost += extra_cost->alu.shift;
11630
11631 return true;
11632 }
11633
11634 /* Widening from less than 32-bits requires an extend operation. */
11635 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11636 {
11637 /* We have SXTB/SXTH. */
11638 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11639 if (speed_p)
11640 *cost += extra_cost->alu.extend;
11641 }
11642 else if (GET_MODE (XEXP (x, 0)) != SImode)
11643 {
11644 /* Needs two shifts. */
11645 *cost += COSTS_N_INSNS (1);
11646 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11647 if (speed_p)
11648 *cost += 2 * extra_cost->alu.shift;
11649 }
11650
11651 /* Widening beyond 32-bits requires one more insn. */
11652 if (mode == DImode)
11653 {
11654 *cost += COSTS_N_INSNS (1);
11655 if (speed_p)
11656 *cost += extra_cost->alu.shift;
11657 }
11658
11659 return true;
11660
11661 case ZERO_EXTEND:
11662 if ((arm_arch4
11663 || GET_MODE (XEXP (x, 0)) == SImode
11664 || GET_MODE (XEXP (x, 0)) == QImode)
11665 && MEM_P (XEXP (x, 0)))
11666 {
11667 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11668
11669 if (mode == DImode)
11670 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11671
11672 return true;
11673 }
11674
11675 /* Widening from less than 32-bits requires an extend operation. */
11676 if (GET_MODE (XEXP (x, 0)) == QImode)
11677 {
11678 /* UXTB can be a shorter instruction in Thumb2, but it might
11679 be slower than the AND Rd, Rn, #255 alternative. When
11680 optimizing for speed it should never be slower to use
11681 AND, and we don't really model 16-bit vs 32-bit insns
11682 here. */
11683 if (speed_p)
11684 *cost += extra_cost->alu.logical;
11685 }
11686 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11687 {
11688 /* We have UXTB/UXTH. */
11689 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11690 if (speed_p)
11691 *cost += extra_cost->alu.extend;
11692 }
11693 else if (GET_MODE (XEXP (x, 0)) != SImode)
11694 {
11695 /* Needs two shifts. It's marginally preferable to use
11696 shifts rather than two BIC instructions as the second
11697 shift may merge with a subsequent insn as a shifter
11698 op. */
11699 *cost = COSTS_N_INSNS (2);
11700 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11701 if (speed_p)
11702 *cost += 2 * extra_cost->alu.shift;
11703 }
11704
11705 /* Widening beyond 32-bits requires one more insn. */
11706 if (mode == DImode)
11707 {
11708 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11709 }
11710
11711 return true;
11712
11713 case CONST_INT:
11714 *cost = 0;
11715 /* CONST_INT has no mode, so we cannot tell for sure how many
11716 insns are really going to be needed. The best we can do is
11717 look at the value passed. If it fits in SImode, then assume
11718 that's the mode it will be used for. Otherwise assume it
11719 will be used in DImode. */
11720 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11721 mode = SImode;
11722 else
11723 mode = DImode;
11724
11725 /* Avoid blowing up in arm_gen_constant (). */
11726 if (!(outer_code == PLUS
11727 || outer_code == AND
11728 || outer_code == IOR
11729 || outer_code == XOR
11730 || outer_code == MINUS))
11731 outer_code = SET;
11732
11733 const_int_cost:
11734 if (mode == SImode)
11735 {
11736 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11737 INTVAL (x), NULL, NULL,
11738 0, 0));
11739 /* Extra costs? */
11740 }
11741 else
11742 {
11743 *cost += COSTS_N_INSNS (arm_gen_constant
11744 (outer_code, SImode, NULL,
11745 trunc_int_for_mode (INTVAL (x), SImode),
11746 NULL, NULL, 0, 0)
11747 + arm_gen_constant (outer_code, SImode, NULL,
11748 INTVAL (x) >> 32, NULL,
11749 NULL, 0, 0));
11750 /* Extra costs? */
11751 }
11752
11753 return true;
11754
11755 case CONST:
11756 case LABEL_REF:
11757 case SYMBOL_REF:
11758 if (speed_p)
11759 {
11760 if (arm_arch_thumb2 && !flag_pic)
11761 *cost += COSTS_N_INSNS (1);
11762 else
11763 *cost += extra_cost->ldst.load;
11764 }
11765 else
11766 *cost += COSTS_N_INSNS (1);
11767
11768 if (flag_pic)
11769 {
11770 *cost += COSTS_N_INSNS (1);
11771 if (speed_p)
11772 *cost += extra_cost->alu.arith;
11773 }
11774
11775 return true;
11776
11777 case CONST_FIXED:
11778 *cost = COSTS_N_INSNS (4);
11779 /* Fixme. */
11780 return true;
11781
11782 case CONST_DOUBLE:
11783 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11784 && (mode == SFmode || !TARGET_VFP_SINGLE))
11785 {
11786 if (vfp3_const_double_rtx (x))
11787 {
11788 if (speed_p)
11789 *cost += extra_cost->fp[mode == DFmode].fpconst;
11790 return true;
11791 }
11792
11793 if (speed_p)
11794 {
11795 if (mode == DFmode)
11796 *cost += extra_cost->ldst.loadd;
11797 else
11798 *cost += extra_cost->ldst.loadf;
11799 }
11800 else
11801 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11802
11803 return true;
11804 }
11805 *cost = COSTS_N_INSNS (4);
11806 return true;
11807
11808 case CONST_VECTOR:
11809 /* Fixme. */
11810 if (((TARGET_NEON && TARGET_HARD_FLOAT
11811 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11812 || TARGET_HAVE_MVE)
11813 && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11814 *cost = COSTS_N_INSNS (1);
11815 else
11816 *cost = COSTS_N_INSNS (4);
11817 return true;
11818
11819 case HIGH:
11820 case LO_SUM:
11821 /* When optimizing for size, we prefer constant pool entries to
11822 MOVW/MOVT pairs, so bump the cost of these slightly. */
11823 if (!speed_p)
11824 *cost += 1;
11825 return true;
11826
11827 case CLZ:
11828 if (speed_p)
11829 *cost += extra_cost->alu.clz;
11830 return false;
11831
11832 case SMIN:
11833 if (XEXP (x, 1) == const0_rtx)
11834 {
11835 if (speed_p)
11836 *cost += extra_cost->alu.log_shift;
11837 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11838 return true;
11839 }
11840 /* Fall through. */
11841 case SMAX:
11842 case UMIN:
11843 case UMAX:
11844 *cost += COSTS_N_INSNS (1);
11845 return false;
11846
11847 case TRUNCATE:
11848 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11849 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11850 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11851 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11852 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11853 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11854 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11855 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11856 == ZERO_EXTEND))))
11857 {
11858 if (speed_p)
11859 *cost += extra_cost->mult[1].extend;
11860 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11861 ZERO_EXTEND, 0, speed_p)
11862 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11863 ZERO_EXTEND, 0, speed_p));
11864 return true;
11865 }
11866 *cost = LIBCALL_COST (1);
11867 return false;
11868
11869 case UNSPEC_VOLATILE:
11870 case UNSPEC:
11871 return arm_unspec_cost (x, outer_code, speed_p, cost);
11872
11873 case PC:
11874 /* Reading the PC is like reading any other register. Writing it
11875 is more expensive, but we take that into account elsewhere. */
11876 *cost = 0;
11877 return true;
11878
11879 case ZERO_EXTRACT:
11880 /* TODO: Simple zero_extract of bottom bits using AND. */
11881 /* Fall through. */
11882 case SIGN_EXTRACT:
11883 if (arm_arch6
11884 && mode == SImode
11885 && CONST_INT_P (XEXP (x, 1))
11886 && CONST_INT_P (XEXP (x, 2)))
11887 {
11888 if (speed_p)
11889 *cost += extra_cost->alu.bfx;
11890 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11891 return true;
11892 }
11893 /* Without UBFX/SBFX, need to resort to shift operations. */
11894 *cost += COSTS_N_INSNS (1);
11895 if (speed_p)
11896 *cost += 2 * extra_cost->alu.shift;
11897 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11898 return true;
11899
11900 case FLOAT_EXTEND:
11901 if (TARGET_HARD_FLOAT)
11902 {
11903 if (speed_p)
11904 *cost += extra_cost->fp[mode == DFmode].widen;
11905 if (!TARGET_VFP5
11906 && GET_MODE (XEXP (x, 0)) == HFmode)
11907 {
11908 /* Pre v8, widening HF->DF is a two-step process, first
11909 widening to SFmode. */
11910 *cost += COSTS_N_INSNS (1);
11911 if (speed_p)
11912 *cost += extra_cost->fp[0].widen;
11913 }
11914 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11915 return true;
11916 }
11917
11918 *cost = LIBCALL_COST (1);
11919 return false;
11920
11921 case FLOAT_TRUNCATE:
11922 if (TARGET_HARD_FLOAT)
11923 {
11924 if (speed_p)
11925 *cost += extra_cost->fp[mode == DFmode].narrow;
11926 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11927 return true;
11928 /* Vector modes? */
11929 }
11930 *cost = LIBCALL_COST (1);
11931 return false;
11932
11933 case FMA:
11934 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11935 {
11936 rtx op0 = XEXP (x, 0);
11937 rtx op1 = XEXP (x, 1);
11938 rtx op2 = XEXP (x, 2);
11939
11940
11941 /* vfms or vfnma. */
11942 if (GET_CODE (op0) == NEG)
11943 op0 = XEXP (op0, 0);
11944
11945 /* vfnms or vfnma. */
11946 if (GET_CODE (op2) == NEG)
11947 op2 = XEXP (op2, 0);
11948
11949 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11950 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11951 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11952
11953 if (speed_p)
11954 *cost += extra_cost->fp[mode ==DFmode].fma;
11955
11956 return true;
11957 }
11958
11959 *cost = LIBCALL_COST (3);
11960 return false;
11961
11962 case FIX:
11963 case UNSIGNED_FIX:
11964 if (TARGET_HARD_FLOAT)
11965 {
11966 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11967 a vcvt fixed-point conversion. */
11968 if (code == FIX && mode == SImode
11969 && GET_CODE (XEXP (x, 0)) == FIX
11970 && GET_MODE (XEXP (x, 0)) == SFmode
11971 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11972 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11973 > 0)
11974 {
11975 if (speed_p)
11976 *cost += extra_cost->fp[0].toint;
11977
11978 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11979 code, 0, speed_p);
11980 return true;
11981 }
11982
11983 if (GET_MODE_CLASS (mode) == MODE_INT)
11984 {
11985 mode = GET_MODE (XEXP (x, 0));
11986 if (speed_p)
11987 *cost += extra_cost->fp[mode == DFmode].toint;
11988 /* Strip of the 'cost' of rounding towards zero. */
11989 if (GET_CODE (XEXP (x, 0)) == FIX)
11990 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11991 0, speed_p);
11992 else
11993 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11994 /* ??? Increase the cost to deal with transferring from
11995 FP -> CORE registers? */
11996 return true;
11997 }
11998 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11999 && TARGET_VFP5)
12000 {
12001 if (speed_p)
12002 *cost += extra_cost->fp[mode == DFmode].roundint;
12003 return false;
12004 }
12005 /* Vector costs? */
12006 }
12007 *cost = LIBCALL_COST (1);
12008 return false;
12009
12010 case FLOAT:
12011 case UNSIGNED_FLOAT:
12012 if (TARGET_HARD_FLOAT)
12013 {
12014 /* ??? Increase the cost to deal with transferring from CORE
12015 -> FP registers? */
12016 if (speed_p)
12017 *cost += extra_cost->fp[mode == DFmode].fromint;
12018 return false;
12019 }
12020 *cost = LIBCALL_COST (1);
12021 return false;
12022
12023 case CALL:
12024 return true;
12025
12026 case ASM_OPERANDS:
12027 {
12028 /* Just a guess. Guess number of instructions in the asm
12029 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
12030 though (see PR60663). */
12031 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
12032 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
12033
12034 *cost = COSTS_N_INSNS (asm_length + num_operands);
12035 return true;
12036 }
12037 default:
12038 if (mode != VOIDmode)
12039 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
12040 else
12041 *cost = COSTS_N_INSNS (4); /* Who knows? */
12042 return false;
12043 }
12044 }
12045
12046 #undef HANDLE_NARROW_SHIFT_ARITH
12047
12048 /* RTX costs entry point. */
12049
12050 static bool
12051 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
12052 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
12053 {
12054 bool result;
12055 int code = GET_CODE (x);
12056 gcc_assert (current_tune->insn_extra_cost);
12057
12058 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
12059 (enum rtx_code) outer_code,
12060 current_tune->insn_extra_cost,
12061 total, speed);
12062
12063 if (dump_file && arm_verbose_cost)
12064 {
12065 print_rtl_single (dump_file, x);
12066 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
12067 *total, result ? "final" : "partial");
12068 }
12069 return result;
12070 }
12071
12072 static int
12073 arm_insn_cost (rtx_insn *insn, bool speed)
12074 {
12075 int cost;
12076
12077 /* Don't cost a simple reg-reg move at a full insn cost: such moves
12078 will likely disappear during register allocation. */
12079 if (!reload_completed
12080 && GET_CODE (PATTERN (insn)) == SET
12081 && REG_P (SET_DEST (PATTERN (insn)))
12082 && REG_P (SET_SRC (PATTERN (insn))))
12083 return 2;
12084 cost = pattern_cost (PATTERN (insn), speed);
12085 /* If the cost is zero, then it's likely a complex insn. We don't want the
12086 cost of these to be less than something we know about. */
12087 return cost ? cost : COSTS_N_INSNS (2);
12088 }
12089
12090 /* All address computations that can be done are free, but rtx cost returns
12091 the same for practically all of them. So we weight the different types
12092 of address here in the order (most pref first):
12093 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
12094 static inline int
12095 arm_arm_address_cost (rtx x)
12096 {
12097 enum rtx_code c = GET_CODE (x);
12098
12099 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
12100 return 0;
12101 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
12102 return 10;
12103
12104 if (c == PLUS)
12105 {
12106 if (CONST_INT_P (XEXP (x, 1)))
12107 return 2;
12108
12109 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
12110 return 3;
12111
12112 return 4;
12113 }
12114
12115 return 6;
12116 }
12117
12118 static inline int
12119 arm_thumb_address_cost (rtx x)
12120 {
12121 enum rtx_code c = GET_CODE (x);
12122
12123 if (c == REG)
12124 return 1;
12125 if (c == PLUS
12126 && REG_P (XEXP (x, 0))
12127 && CONST_INT_P (XEXP (x, 1)))
12128 return 1;
12129
12130 return 2;
12131 }
12132
12133 static int
12134 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
12135 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
12136 {
12137 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
12138 }
12139
12140 /* Adjust cost hook for XScale. */
12141 static bool
12142 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12143 int * cost)
12144 {
12145 /* Some true dependencies can have a higher cost depending
12146 on precisely how certain input operands are used. */
12147 if (dep_type == 0
12148 && recog_memoized (insn) >= 0
12149 && recog_memoized (dep) >= 0)
12150 {
12151 int shift_opnum = get_attr_shift (insn);
12152 enum attr_type attr_type = get_attr_type (dep);
12153
12154 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12155 operand for INSN. If we have a shifted input operand and the
12156 instruction we depend on is another ALU instruction, then we may
12157 have to account for an additional stall. */
12158 if (shift_opnum != 0
12159 && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
12160 || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
12161 || attr_type == TYPE_ALUS_SHIFT_IMM
12162 || attr_type == TYPE_LOGIC_SHIFT_IMM
12163 || attr_type == TYPE_LOGICS_SHIFT_IMM
12164 || attr_type == TYPE_ALU_SHIFT_REG
12165 || attr_type == TYPE_ALUS_SHIFT_REG
12166 || attr_type == TYPE_LOGIC_SHIFT_REG
12167 || attr_type == TYPE_LOGICS_SHIFT_REG
12168 || attr_type == TYPE_MOV_SHIFT
12169 || attr_type == TYPE_MVN_SHIFT
12170 || attr_type == TYPE_MOV_SHIFT_REG
12171 || attr_type == TYPE_MVN_SHIFT_REG))
12172 {
12173 rtx shifted_operand;
12174 int opno;
12175
12176 /* Get the shifted operand. */
12177 extract_insn (insn);
12178 shifted_operand = recog_data.operand[shift_opnum];
12179
12180 /* Iterate over all the operands in DEP. If we write an operand
12181 that overlaps with SHIFTED_OPERAND, then we have increase the
12182 cost of this dependency. */
12183 extract_insn (dep);
12184 preprocess_constraints (dep);
12185 for (opno = 0; opno < recog_data.n_operands; opno++)
12186 {
12187 /* We can ignore strict inputs. */
12188 if (recog_data.operand_type[opno] == OP_IN)
12189 continue;
12190
12191 if (reg_overlap_mentioned_p (recog_data.operand[opno],
12192 shifted_operand))
12193 {
12194 *cost = 2;
12195 return false;
12196 }
12197 }
12198 }
12199 }
12200 return true;
12201 }
12202
12203 /* Adjust cost hook for Cortex A9. */
12204 static bool
12205 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12206 int * cost)
12207 {
12208 switch (dep_type)
12209 {
12210 case REG_DEP_ANTI:
12211 *cost = 0;
12212 return false;
12213
12214 case REG_DEP_TRUE:
12215 case REG_DEP_OUTPUT:
12216 if (recog_memoized (insn) >= 0
12217 && recog_memoized (dep) >= 0)
12218 {
12219 if (GET_CODE (PATTERN (insn)) == SET)
12220 {
12221 if (GET_MODE_CLASS
12222 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
12223 || GET_MODE_CLASS
12224 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
12225 {
12226 enum attr_type attr_type_insn = get_attr_type (insn);
12227 enum attr_type attr_type_dep = get_attr_type (dep);
12228
12229 /* By default all dependencies of the form
12230 s0 = s0 <op> s1
12231 s0 = s0 <op> s2
12232 have an extra latency of 1 cycle because
12233 of the input and output dependency in this
12234 case. However this gets modeled as an true
12235 dependency and hence all these checks. */
12236 if (REG_P (SET_DEST (PATTERN (insn)))
12237 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
12238 {
12239 /* FMACS is a special case where the dependent
12240 instruction can be issued 3 cycles before
12241 the normal latency in case of an output
12242 dependency. */
12243 if ((attr_type_insn == TYPE_FMACS
12244 || attr_type_insn == TYPE_FMACD)
12245 && (attr_type_dep == TYPE_FMACS
12246 || attr_type_dep == TYPE_FMACD))
12247 {
12248 if (dep_type == REG_DEP_OUTPUT)
12249 *cost = insn_default_latency (dep) - 3;
12250 else
12251 *cost = insn_default_latency (dep);
12252 return false;
12253 }
12254 else
12255 {
12256 if (dep_type == REG_DEP_OUTPUT)
12257 *cost = insn_default_latency (dep) + 1;
12258 else
12259 *cost = insn_default_latency (dep);
12260 }
12261 return false;
12262 }
12263 }
12264 }
12265 }
12266 break;
12267
12268 default:
12269 gcc_unreachable ();
12270 }
12271
12272 return true;
12273 }
12274
12275 /* Adjust cost hook for FA726TE. */
12276 static bool
12277 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12278 int * cost)
12279 {
12280 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12281 have penalty of 3. */
12282 if (dep_type == REG_DEP_TRUE
12283 && recog_memoized (insn) >= 0
12284 && recog_memoized (dep) >= 0
12285 && get_attr_conds (dep) == CONDS_SET)
12286 {
12287 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12288 if (get_attr_conds (insn) == CONDS_USE
12289 && get_attr_type (insn) != TYPE_BRANCH)
12290 {
12291 *cost = 3;
12292 return false;
12293 }
12294
12295 if (GET_CODE (PATTERN (insn)) == COND_EXEC
12296 || get_attr_conds (insn) == CONDS_USE)
12297 {
12298 *cost = 0;
12299 return false;
12300 }
12301 }
12302
12303 return true;
12304 }
12305
12306 /* Implement TARGET_REGISTER_MOVE_COST.
12307
12308 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12309 it is typically more expensive than a single memory access. We set
12310 the cost to less than two memory accesses so that floating
12311 point to integer conversion does not go through memory. */
12312
12313 int
12314 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12315 reg_class_t from, reg_class_t to)
12316 {
12317 if (TARGET_32BIT)
12318 {
12319 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12320 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12321 return 15;
12322 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12323 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12324 return 4;
12325 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12326 return 20;
12327 else
12328 return 2;
12329 }
12330 else
12331 {
12332 if (from == HI_REGS || to == HI_REGS)
12333 return 4;
12334 else
12335 return 2;
12336 }
12337 }
12338
12339 /* Implement TARGET_MEMORY_MOVE_COST. */
12340
12341 int
12342 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12343 bool in ATTRIBUTE_UNUSED)
12344 {
12345 if (TARGET_32BIT)
12346 return 10;
12347 else
12348 {
12349 if (GET_MODE_SIZE (mode) < 4)
12350 return 8;
12351 else
12352 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12353 }
12354 }
12355
12356 /* Vectorizer cost model implementation. */
12357
12358 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12359 static int
12360 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12361 tree vectype,
12362 int misalign ATTRIBUTE_UNUSED)
12363 {
12364 unsigned elements;
12365
12366 switch (type_of_cost)
12367 {
12368 case scalar_stmt:
12369 return current_tune->vec_costs->scalar_stmt_cost;
12370
12371 case scalar_load:
12372 return current_tune->vec_costs->scalar_load_cost;
12373
12374 case scalar_store:
12375 return current_tune->vec_costs->scalar_store_cost;
12376
12377 case vector_stmt:
12378 return current_tune->vec_costs->vec_stmt_cost;
12379
12380 case vector_load:
12381 return current_tune->vec_costs->vec_align_load_cost;
12382
12383 case vector_store:
12384 return current_tune->vec_costs->vec_store_cost;
12385
12386 case vec_to_scalar:
12387 return current_tune->vec_costs->vec_to_scalar_cost;
12388
12389 case scalar_to_vec:
12390 return current_tune->vec_costs->scalar_to_vec_cost;
12391
12392 case unaligned_load:
12393 case vector_gather_load:
12394 return current_tune->vec_costs->vec_unalign_load_cost;
12395
12396 case unaligned_store:
12397 case vector_scatter_store:
12398 return current_tune->vec_costs->vec_unalign_store_cost;
12399
12400 case cond_branch_taken:
12401 return current_tune->vec_costs->cond_taken_branch_cost;
12402
12403 case cond_branch_not_taken:
12404 return current_tune->vec_costs->cond_not_taken_branch_cost;
12405
12406 case vec_perm:
12407 case vec_promote_demote:
12408 return current_tune->vec_costs->vec_stmt_cost;
12409
12410 case vec_construct:
12411 elements = TYPE_VECTOR_SUBPARTS (vectype);
12412 return elements / 2 + 1;
12413
12414 default:
12415 gcc_unreachable ();
12416 }
12417 }
12418
12419 /* Return true if and only if this insn can dual-issue only as older. */
12420 static bool
12421 cortexa7_older_only (rtx_insn *insn)
12422 {
12423 if (recog_memoized (insn) < 0)
12424 return false;
12425
12426 switch (get_attr_type (insn))
12427 {
12428 case TYPE_ALU_DSP_REG:
12429 case TYPE_ALU_SREG:
12430 case TYPE_ALUS_SREG:
12431 case TYPE_LOGIC_REG:
12432 case TYPE_LOGICS_REG:
12433 case TYPE_ADC_REG:
12434 case TYPE_ADCS_REG:
12435 case TYPE_ADR:
12436 case TYPE_BFM:
12437 case TYPE_REV:
12438 case TYPE_MVN_REG:
12439 case TYPE_SHIFT_IMM:
12440 case TYPE_SHIFT_REG:
12441 case TYPE_LOAD_BYTE:
12442 case TYPE_LOAD_4:
12443 case TYPE_STORE_4:
12444 case TYPE_FFARITHS:
12445 case TYPE_FADDS:
12446 case TYPE_FFARITHD:
12447 case TYPE_FADDD:
12448 case TYPE_FMOV:
12449 case TYPE_F_CVT:
12450 case TYPE_FCMPS:
12451 case TYPE_FCMPD:
12452 case TYPE_FCONSTS:
12453 case TYPE_FCONSTD:
12454 case TYPE_FMULS:
12455 case TYPE_FMACS:
12456 case TYPE_FMULD:
12457 case TYPE_FMACD:
12458 case TYPE_FDIVS:
12459 case TYPE_FDIVD:
12460 case TYPE_F_MRC:
12461 case TYPE_F_MRRC:
12462 case TYPE_F_FLAG:
12463 case TYPE_F_LOADS:
12464 case TYPE_F_STORES:
12465 return true;
12466 default:
12467 return false;
12468 }
12469 }
12470
12471 /* Return true if and only if this insn can dual-issue as younger. */
12472 static bool
12473 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12474 {
12475 if (recog_memoized (insn) < 0)
12476 {
12477 if (verbose > 5)
12478 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12479 return false;
12480 }
12481
12482 switch (get_attr_type (insn))
12483 {
12484 case TYPE_ALU_IMM:
12485 case TYPE_ALUS_IMM:
12486 case TYPE_LOGIC_IMM:
12487 case TYPE_LOGICS_IMM:
12488 case TYPE_EXTEND:
12489 case TYPE_MVN_IMM:
12490 case TYPE_MOV_IMM:
12491 case TYPE_MOV_REG:
12492 case TYPE_MOV_SHIFT:
12493 case TYPE_MOV_SHIFT_REG:
12494 case TYPE_BRANCH:
12495 case TYPE_CALL:
12496 return true;
12497 default:
12498 return false;
12499 }
12500 }
12501
12502
12503 /* Look for an instruction that can dual issue only as an older
12504 instruction, and move it in front of any instructions that can
12505 dual-issue as younger, while preserving the relative order of all
12506 other instructions in the ready list. This is a hueuristic to help
12507 dual-issue in later cycles, by postponing issue of more flexible
12508 instructions. This heuristic may affect dual issue opportunities
12509 in the current cycle. */
12510 static void
12511 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12512 int *n_readyp, int clock)
12513 {
12514 int i;
12515 int first_older_only = -1, first_younger = -1;
12516
12517 if (verbose > 5)
12518 fprintf (file,
12519 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12520 clock,
12521 *n_readyp);
12522
12523 /* Traverse the ready list from the head (the instruction to issue
12524 first), and looking for the first instruction that can issue as
12525 younger and the first instruction that can dual-issue only as
12526 older. */
12527 for (i = *n_readyp - 1; i >= 0; i--)
12528 {
12529 rtx_insn *insn = ready[i];
12530 if (cortexa7_older_only (insn))
12531 {
12532 first_older_only = i;
12533 if (verbose > 5)
12534 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12535 break;
12536 }
12537 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12538 first_younger = i;
12539 }
12540
12541 /* Nothing to reorder because either no younger insn found or insn
12542 that can dual-issue only as older appears before any insn that
12543 can dual-issue as younger. */
12544 if (first_younger == -1)
12545 {
12546 if (verbose > 5)
12547 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12548 return;
12549 }
12550
12551 /* Nothing to reorder because no older-only insn in the ready list. */
12552 if (first_older_only == -1)
12553 {
12554 if (verbose > 5)
12555 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12556 return;
12557 }
12558
12559 /* Move first_older_only insn before first_younger. */
12560 if (verbose > 5)
12561 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12562 INSN_UID(ready [first_older_only]),
12563 INSN_UID(ready [first_younger]));
12564 rtx_insn *first_older_only_insn = ready [first_older_only];
12565 for (i = first_older_only; i < first_younger; i++)
12566 {
12567 ready[i] = ready[i+1];
12568 }
12569
12570 ready[i] = first_older_only_insn;
12571 return;
12572 }
12573
12574 /* Implement TARGET_SCHED_REORDER. */
12575 static int
12576 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12577 int clock)
12578 {
12579 switch (arm_tune)
12580 {
12581 case TARGET_CPU_cortexa7:
12582 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12583 break;
12584 default:
12585 /* Do nothing for other cores. */
12586 break;
12587 }
12588
12589 return arm_issue_rate ();
12590 }
12591
12592 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12593 It corrects the value of COST based on the relationship between
12594 INSN and DEP through the dependence LINK. It returns the new
12595 value. There is a per-core adjust_cost hook to adjust scheduler costs
12596 and the per-core hook can choose to completely override the generic
12597 adjust_cost function. Only put bits of code into arm_adjust_cost that
12598 are common across all cores. */
12599 static int
12600 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12601 unsigned int)
12602 {
12603 rtx i_pat, d_pat;
12604
12605 /* When generating Thumb-1 code, we want to place flag-setting operations
12606 close to a conditional branch which depends on them, so that we can
12607 omit the comparison. */
12608 if (TARGET_THUMB1
12609 && dep_type == 0
12610 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12611 && recog_memoized (dep) >= 0
12612 && get_attr_conds (dep) == CONDS_SET)
12613 return 0;
12614
12615 if (current_tune->sched_adjust_cost != NULL)
12616 {
12617 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12618 return cost;
12619 }
12620
12621 /* XXX Is this strictly true? */
12622 if (dep_type == REG_DEP_ANTI
12623 || dep_type == REG_DEP_OUTPUT)
12624 return 0;
12625
12626 /* Call insns don't incur a stall, even if they follow a load. */
12627 if (dep_type == 0
12628 && CALL_P (insn))
12629 return 1;
12630
12631 if ((i_pat = single_set (insn)) != NULL
12632 && MEM_P (SET_SRC (i_pat))
12633 && (d_pat = single_set (dep)) != NULL
12634 && MEM_P (SET_DEST (d_pat)))
12635 {
12636 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12637 /* This is a load after a store, there is no conflict if the load reads
12638 from a cached area. Assume that loads from the stack, and from the
12639 constant pool are cached, and that others will miss. This is a
12640 hack. */
12641
12642 if ((SYMBOL_REF_P (src_mem)
12643 && CONSTANT_POOL_ADDRESS_P (src_mem))
12644 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12645 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12646 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12647 return 1;
12648 }
12649
12650 return cost;
12651 }
12652
12653 int
12654 arm_max_conditional_execute (void)
12655 {
12656 return max_insns_skipped;
12657 }
12658
12659 static int
12660 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12661 {
12662 if (TARGET_32BIT)
12663 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12664 else
12665 return (optimize > 0) ? 2 : 0;
12666 }
12667
12668 static int
12669 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12670 {
12671 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12672 }
12673
12674 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12675 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12676 sequences of non-executed instructions in IT blocks probably take the same
12677 amount of time as executed instructions (and the IT instruction itself takes
12678 space in icache). This function was experimentally determined to give good
12679 results on a popular embedded benchmark. */
12680
12681 static int
12682 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12683 {
12684 return (TARGET_32BIT && speed_p) ? 1
12685 : arm_default_branch_cost (speed_p, predictable_p);
12686 }
12687
12688 static int
12689 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12690 {
12691 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12692 }
12693
12694 static bool fp_consts_inited = false;
12695
12696 static REAL_VALUE_TYPE value_fp0;
12697
12698 static void
12699 init_fp_table (void)
12700 {
12701 REAL_VALUE_TYPE r;
12702
12703 r = REAL_VALUE_ATOF ("0", DFmode);
12704 value_fp0 = r;
12705 fp_consts_inited = true;
12706 }
12707
12708 /* Return TRUE if rtx X is a valid immediate FP constant. */
12709 int
12710 arm_const_double_rtx (rtx x)
12711 {
12712 const REAL_VALUE_TYPE *r;
12713
12714 if (!fp_consts_inited)
12715 init_fp_table ();
12716
12717 r = CONST_DOUBLE_REAL_VALUE (x);
12718 if (REAL_VALUE_MINUS_ZERO (*r))
12719 return 0;
12720
12721 if (real_equal (r, &value_fp0))
12722 return 1;
12723
12724 return 0;
12725 }
12726
12727 /* VFPv3 has a fairly wide range of representable immediates, formed from
12728 "quarter-precision" floating-point values. These can be evaluated using this
12729 formula (with ^ for exponentiation):
12730
12731 -1^s * n * 2^-r
12732
12733 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12734 16 <= n <= 31 and 0 <= r <= 7.
12735
12736 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12737
12738 - A (most-significant) is the sign bit.
12739 - BCD are the exponent (encoded as r XOR 3).
12740 - EFGH are the mantissa (encoded as n - 16).
12741 */
12742
12743 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12744 fconst[sd] instruction, or -1 if X isn't suitable. */
12745 static int
12746 vfp3_const_double_index (rtx x)
12747 {
12748 REAL_VALUE_TYPE r, m;
12749 int sign, exponent;
12750 unsigned HOST_WIDE_INT mantissa, mant_hi;
12751 unsigned HOST_WIDE_INT mask;
12752 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12753 bool fail;
12754
12755 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12756 return -1;
12757
12758 r = *CONST_DOUBLE_REAL_VALUE (x);
12759
12760 /* We can't represent these things, so detect them first. */
12761 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12762 return -1;
12763
12764 /* Extract sign, exponent and mantissa. */
12765 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12766 r = real_value_abs (&r);
12767 exponent = REAL_EXP (&r);
12768 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12769 highest (sign) bit, with a fixed binary point at bit point_pos.
12770 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12771 bits for the mantissa, this may fail (low bits would be lost). */
12772 real_ldexp (&m, &r, point_pos - exponent);
12773 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12774 mantissa = w.elt (0);
12775 mant_hi = w.elt (1);
12776
12777 /* If there are bits set in the low part of the mantissa, we can't
12778 represent this value. */
12779 if (mantissa != 0)
12780 return -1;
12781
12782 /* Now make it so that mantissa contains the most-significant bits, and move
12783 the point_pos to indicate that the least-significant bits have been
12784 discarded. */
12785 point_pos -= HOST_BITS_PER_WIDE_INT;
12786 mantissa = mant_hi;
12787
12788 /* We can permit four significant bits of mantissa only, plus a high bit
12789 which is always 1. */
12790 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12791 if ((mantissa & mask) != 0)
12792 return -1;
12793
12794 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12795 mantissa >>= point_pos - 5;
12796
12797 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12798 floating-point immediate zero with Neon using an integer-zero load, but
12799 that case is handled elsewhere.) */
12800 if (mantissa == 0)
12801 return -1;
12802
12803 gcc_assert (mantissa >= 16 && mantissa <= 31);
12804
12805 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12806 normalized significands are in the range [1, 2). (Our mantissa is shifted
12807 left 4 places at this point relative to normalized IEEE754 values). GCC
12808 internally uses [0.5, 1) (see real.cc), so the exponent returned from
12809 REAL_EXP must be altered. */
12810 exponent = 5 - exponent;
12811
12812 if (exponent < 0 || exponent > 7)
12813 return -1;
12814
12815 /* Sign, mantissa and exponent are now in the correct form to plug into the
12816 formula described in the comment above. */
12817 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12818 }
12819
12820 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12821 int
12822 vfp3_const_double_rtx (rtx x)
12823 {
12824 if (!TARGET_VFP3)
12825 return 0;
12826
12827 return vfp3_const_double_index (x) != -1;
12828 }
12829
12830 /* Recognize immediates which can be used in various Neon and MVE instructions.
12831 Legal immediates are described by the following table (for VMVN variants, the
12832 bitwise inverse of the constant shown is recognized. In either case, VMOV
12833 is output and the correct instruction to use for a given constant is chosen
12834 by the assembler). The constant shown is replicated across all elements of
12835 the destination vector.
12836
12837 insn elems variant constant (binary)
12838 ---- ----- ------- -----------------
12839 vmov i32 0 00000000 00000000 00000000 abcdefgh
12840 vmov i32 1 00000000 00000000 abcdefgh 00000000
12841 vmov i32 2 00000000 abcdefgh 00000000 00000000
12842 vmov i32 3 abcdefgh 00000000 00000000 00000000
12843 vmov i16 4 00000000 abcdefgh
12844 vmov i16 5 abcdefgh 00000000
12845 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12846 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12847 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12848 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12849 vmvn i16 10 00000000 abcdefgh
12850 vmvn i16 11 abcdefgh 00000000
12851 vmov i32 12 00000000 00000000 abcdefgh 11111111
12852 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12853 vmov i32 14 00000000 abcdefgh 11111111 11111111
12854 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12855 vmov i8 16 abcdefgh
12856 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12857 eeeeeeee ffffffff gggggggg hhhhhhhh
12858 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12859 vmov f32 19 00000000 00000000 00000000 00000000
12860
12861 For case 18, B = !b. Representable values are exactly those accepted by
12862 vfp3_const_double_index, but are output as floating-point numbers rather
12863 than indices.
12864
12865 For case 19, we will change it to vmov.i32 when assembling.
12866
12867 Variants 0-5 (inclusive) may also be used as immediates for the second
12868 operand of VORR/VBIC instructions.
12869
12870 The INVERSE argument causes the bitwise inverse of the given operand to be
12871 recognized instead (used for recognizing legal immediates for the VAND/VORN
12872 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12873 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12874 output, rather than the real insns vbic/vorr).
12875
12876 INVERSE makes no difference to the recognition of float vectors.
12877
12878 The return value is the variant of immediate as shown in the above table, or
12879 -1 if the given value doesn't match any of the listed patterns.
12880 */
12881 static int
12882 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12883 rtx *modconst, int *elementwidth)
12884 {
12885 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12886 matches = 1; \
12887 for (i = 0; i < idx; i += (STRIDE)) \
12888 if (!(TEST)) \
12889 matches = 0; \
12890 if (matches) \
12891 { \
12892 immtype = (CLASS); \
12893 elsize = (ELSIZE); \
12894 break; \
12895 }
12896
12897 unsigned int i, elsize = 0, idx = 0, n_elts;
12898 unsigned int innersize;
12899 unsigned char bytes[16] = {};
12900 int immtype = -1, matches;
12901 unsigned int invmask = inverse ? 0xff : 0;
12902 bool vector = GET_CODE (op) == CONST_VECTOR;
12903
12904 if (vector)
12905 n_elts = CONST_VECTOR_NUNITS (op);
12906 else
12907 {
12908 n_elts = 1;
12909 gcc_assert (mode != VOIDmode);
12910 }
12911
12912 innersize = GET_MODE_UNIT_SIZE (mode);
12913
12914 /* Only support 128-bit vectors for MVE. */
12915 if (TARGET_HAVE_MVE
12916 && (!vector
12917 || (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
12918 || n_elts * innersize != 16))
12919 return -1;
12920
12921 if (!TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
12922 return -1;
12923
12924 /* Vectors of float constants. */
12925 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12926 {
12927 rtx el0 = CONST_VECTOR_ELT (op, 0);
12928
12929 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12930 return -1;
12931
12932 /* FP16 vectors cannot be represented. */
12933 if (GET_MODE_INNER (mode) == HFmode)
12934 return -1;
12935
12936 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12937 are distinct in this context. */
12938 if (!const_vec_duplicate_p (op))
12939 return -1;
12940
12941 if (modconst)
12942 *modconst = CONST_VECTOR_ELT (op, 0);
12943
12944 if (elementwidth)
12945 *elementwidth = 0;
12946
12947 if (el0 == CONST0_RTX (GET_MODE (el0)))
12948 return 19;
12949 else
12950 return 18;
12951 }
12952
12953 /* The tricks done in the code below apply for little-endian vector layout.
12954 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12955 FIXME: Implement logic for big-endian vectors. */
12956 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12957 return -1;
12958
12959 /* Splat vector constant out into a byte vector. */
12960 for (i = 0; i < n_elts; i++)
12961 {
12962 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12963 unsigned HOST_WIDE_INT elpart;
12964
12965 gcc_assert (CONST_INT_P (el));
12966 elpart = INTVAL (el);
12967
12968 for (unsigned int byte = 0; byte < innersize; byte++)
12969 {
12970 bytes[idx++] = (elpart & 0xff) ^ invmask;
12971 elpart >>= BITS_PER_UNIT;
12972 }
12973 }
12974
12975 /* Sanity check. */
12976 gcc_assert (idx == GET_MODE_SIZE (mode));
12977
12978 do
12979 {
12980 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12981 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12982
12983 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12984 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12985
12986 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12987 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12988
12989 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12990 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12991
12992 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12993
12994 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12995
12996 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12997 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12998
12999 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13000 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13001
13002 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
13003 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13004
13005 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
13006 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
13007
13008 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
13009
13010 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
13011
13012 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13013 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13014
13015 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13016 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13017
13018 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
13019 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13020
13021 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
13022 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13023
13024 CHECK (1, 8, 16, bytes[i] == bytes[0]);
13025
13026 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
13027 && bytes[i] == bytes[(i + 8) % idx]);
13028 }
13029 while (0);
13030
13031 if (immtype == -1)
13032 return -1;
13033
13034 if (elementwidth)
13035 *elementwidth = elsize;
13036
13037 if (modconst)
13038 {
13039 unsigned HOST_WIDE_INT imm = 0;
13040
13041 /* Un-invert bytes of recognized vector, if necessary. */
13042 if (invmask != 0)
13043 for (i = 0; i < idx; i++)
13044 bytes[i] ^= invmask;
13045
13046 if (immtype == 17)
13047 {
13048 /* FIXME: Broken on 32-bit H_W_I hosts. */
13049 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
13050
13051 for (i = 0; i < 8; i++)
13052 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
13053 << (i * BITS_PER_UNIT);
13054
13055 *modconst = GEN_INT (imm);
13056 }
13057 else
13058 {
13059 unsigned HOST_WIDE_INT imm = 0;
13060
13061 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
13062 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
13063
13064 *modconst = GEN_INT (imm);
13065 }
13066 }
13067
13068 return immtype;
13069 #undef CHECK
13070 }
13071
13072 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13073 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
13074 (or zero for float elements), and a modified constant (whatever should be
13075 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
13076 modified to "simd_immediate_valid_for_move" as this function will be used
13077 both by neon and mve. */
13078 int
13079 simd_immediate_valid_for_move (rtx op, machine_mode mode,
13080 rtx *modconst, int *elementwidth)
13081 {
13082 rtx tmpconst;
13083 int tmpwidth;
13084 int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
13085
13086 if (retval == -1)
13087 return 0;
13088
13089 if (modconst)
13090 *modconst = tmpconst;
13091
13092 if (elementwidth)
13093 *elementwidth = tmpwidth;
13094
13095 return 1;
13096 }
13097
13098 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
13099 the immediate is valid, write a constant suitable for using as an operand
13100 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13101 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
13102
13103 int
13104 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
13105 rtx *modconst, int *elementwidth)
13106 {
13107 rtx tmpconst;
13108 int tmpwidth;
13109 int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
13110
13111 if (retval < 0 || retval > 5)
13112 return 0;
13113
13114 if (modconst)
13115 *modconst = tmpconst;
13116
13117 if (elementwidth)
13118 *elementwidth = tmpwidth;
13119
13120 return 1;
13121 }
13122
13123 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
13124 the immediate is valid, write a constant suitable for using as an operand
13125 to VSHR/VSHL to *MODCONST and the corresponding element width to
13126 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13127 because they have different limitations. */
13128
13129 int
13130 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
13131 rtx *modconst, int *elementwidth,
13132 bool isleftshift)
13133 {
13134 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
13135 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
13136 unsigned HOST_WIDE_INT last_elt = 0;
13137 unsigned HOST_WIDE_INT maxshift;
13138
13139 /* Split vector constant out into a byte vector. */
13140 for (i = 0; i < n_elts; i++)
13141 {
13142 rtx el = CONST_VECTOR_ELT (op, i);
13143 unsigned HOST_WIDE_INT elpart;
13144
13145 if (CONST_INT_P (el))
13146 elpart = INTVAL (el);
13147 else if (CONST_DOUBLE_P (el))
13148 return 0;
13149 else
13150 gcc_unreachable ();
13151
13152 if (i != 0 && elpart != last_elt)
13153 return 0;
13154
13155 last_elt = elpart;
13156 }
13157
13158 /* Shift less than element size. */
13159 maxshift = innersize * 8;
13160
13161 if (isleftshift)
13162 {
13163 /* Left shift immediate value can be from 0 to <size>-1. */
13164 if (last_elt >= maxshift)
13165 return 0;
13166 }
13167 else
13168 {
13169 /* Right shift immediate value can be from 1 to <size>. */
13170 if (last_elt == 0 || last_elt > maxshift)
13171 return 0;
13172 }
13173
13174 if (elementwidth)
13175 *elementwidth = innersize * 8;
13176
13177 if (modconst)
13178 *modconst = CONST_VECTOR_ELT (op, 0);
13179
13180 return 1;
13181 }
13182
13183 /* Return a string suitable for output of Neon immediate logic operation
13184 MNEM. */
13185
13186 char *
13187 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
13188 int inverse, int quad)
13189 {
13190 int width, is_valid;
13191 static char templ[40];
13192
13193 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
13194
13195 gcc_assert (is_valid != 0);
13196
13197 if (quad)
13198 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
13199 else
13200 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
13201
13202 return templ;
13203 }
13204
13205 /* Return a string suitable for output of Neon immediate shift operation
13206 (VSHR or VSHL) MNEM. */
13207
13208 char *
13209 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13210 machine_mode mode, int quad,
13211 bool isleftshift)
13212 {
13213 int width, is_valid;
13214 static char templ[40];
13215
13216 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13217 gcc_assert (is_valid != 0);
13218
13219 if (quad)
13220 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13221 else
13222 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13223
13224 return templ;
13225 }
13226
13227 /* Output a sequence of pairwise operations to implement a reduction.
13228 NOTE: We do "too much work" here, because pairwise operations work on two
13229 registers-worth of operands in one go. Unfortunately we can't exploit those
13230 extra calculations to do the full operation in fewer steps, I don't think.
13231 Although all vector elements of the result but the first are ignored, we
13232 actually calculate the same result in each of the elements. An alternative
13233 such as initially loading a vector with zero to use as each of the second
13234 operands would use up an additional register and take an extra instruction,
13235 for no particular gain. */
13236
13237 void
13238 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13239 rtx (*reduc) (rtx, rtx, rtx))
13240 {
13241 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13242 rtx tmpsum = op1;
13243
13244 for (i = parts / 2; i >= 1; i /= 2)
13245 {
13246 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13247 emit_insn (reduc (dest, tmpsum, tmpsum));
13248 tmpsum = dest;
13249 }
13250 }
13251
13252 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13253 loaded into a register using VDUP.
13254
13255 If this is the case, and GENERATE is set, we also generate
13256 instructions to do this and return an RTX to assign to the register. */
13257
13258 static rtx
13259 neon_vdup_constant (rtx vals, bool generate)
13260 {
13261 machine_mode mode = GET_MODE (vals);
13262 machine_mode inner_mode = GET_MODE_INNER (mode);
13263 rtx x;
13264
13265 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13266 return NULL_RTX;
13267
13268 if (!const_vec_duplicate_p (vals, &x))
13269 /* The elements are not all the same. We could handle repeating
13270 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13271 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13272 vdup.i16). */
13273 return NULL_RTX;
13274
13275 if (!generate)
13276 return x;
13277
13278 /* We can load this constant by using VDUP and a constant in a
13279 single ARM register. This will be cheaper than a vector
13280 load. */
13281
13282 x = copy_to_mode_reg (inner_mode, x);
13283 return gen_vec_duplicate (mode, x);
13284 }
13285
13286 /* Return a HI representation of CONST_VEC suitable for MVE predicates. */
13287 rtx
13288 mve_bool_vec_to_const (rtx const_vec)
13289 {
13290 int n_elts = GET_MODE_NUNITS ( GET_MODE (const_vec));
13291 int repeat = 16 / n_elts;
13292 int i;
13293 int hi_val = 0;
13294
13295 for (i = 0; i < n_elts; i++)
13296 {
13297 rtx el = CONST_VECTOR_ELT (const_vec, i);
13298 unsigned HOST_WIDE_INT elpart;
13299
13300 gcc_assert (CONST_INT_P (el));
13301 elpart = INTVAL (el);
13302
13303 for (int j = 0; j < repeat; j++)
13304 hi_val |= elpart << (i * repeat + j);
13305 }
13306 return gen_int_mode (hi_val, HImode);
13307 }
13308
13309 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13310 constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13311 into a register.
13312
13313 If this is the case, and GENERATE is set, we also generate code to do
13314 this and return an RTX to copy into the register. */
13315
13316 rtx
13317 neon_make_constant (rtx vals, bool generate)
13318 {
13319 machine_mode mode = GET_MODE (vals);
13320 rtx target;
13321 rtx const_vec = NULL_RTX;
13322 int n_elts = GET_MODE_NUNITS (mode);
13323 int n_const = 0;
13324 int i;
13325
13326 if (GET_CODE (vals) == CONST_VECTOR)
13327 const_vec = vals;
13328 else if (GET_CODE (vals) == PARALLEL)
13329 {
13330 /* A CONST_VECTOR must contain only CONST_INTs and
13331 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13332 Only store valid constants in a CONST_VECTOR. */
13333 for (i = 0; i < n_elts; ++i)
13334 {
13335 rtx x = XVECEXP (vals, 0, i);
13336 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13337 n_const++;
13338 }
13339 if (n_const == n_elts)
13340 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13341 }
13342 else
13343 gcc_unreachable ();
13344
13345 if (const_vec != NULL
13346 && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13347 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13348 return const_vec;
13349 else if (TARGET_HAVE_MVE && (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL))
13350 return mve_bool_vec_to_const (const_vec);
13351 else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13352 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13353 pipeline cycle; creating the constant takes one or two ARM
13354 pipeline cycles. */
13355 return target;
13356 else if (const_vec != NULL_RTX)
13357 /* Load from constant pool. On Cortex-A8 this takes two cycles
13358 (for either double or quad vectors). We cannot take advantage
13359 of single-cycle VLD1 because we need a PC-relative addressing
13360 mode. */
13361 return arm_disable_literal_pool ? NULL_RTX : const_vec;
13362 else
13363 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13364 We cannot construct an initializer. */
13365 return NULL_RTX;
13366 }
13367
13368 /* Initialize vector TARGET to VALS. */
13369
13370 void
13371 neon_expand_vector_init (rtx target, rtx vals)
13372 {
13373 machine_mode mode = GET_MODE (target);
13374 machine_mode inner_mode = GET_MODE_INNER (mode);
13375 int n_elts = GET_MODE_NUNITS (mode);
13376 int n_var = 0, one_var = -1;
13377 bool all_same = true;
13378 rtx x, mem;
13379 int i;
13380
13381 for (i = 0; i < n_elts; ++i)
13382 {
13383 x = XVECEXP (vals, 0, i);
13384 if (!CONSTANT_P (x))
13385 ++n_var, one_var = i;
13386
13387 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13388 all_same = false;
13389 }
13390
13391 if (n_var == 0)
13392 {
13393 rtx constant = neon_make_constant (vals);
13394 if (constant != NULL_RTX)
13395 {
13396 emit_move_insn (target, constant);
13397 return;
13398 }
13399 }
13400
13401 /* Splat a single non-constant element if we can. */
13402 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13403 {
13404 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13405 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13406 return;
13407 }
13408
13409 /* One field is non-constant. Load constant then overwrite varying
13410 field. This is more efficient than using the stack. */
13411 if (n_var == 1)
13412 {
13413 rtx copy = copy_rtx (vals);
13414 rtx merge_mask = GEN_INT (1 << one_var);
13415
13416 /* Load constant part of vector, substitute neighboring value for
13417 varying element. */
13418 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13419 neon_expand_vector_init (target, copy);
13420
13421 /* Insert variable. */
13422 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13423 emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13424 return;
13425 }
13426
13427 /* Construct the vector in memory one field at a time
13428 and load the whole vector. */
13429 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13430 for (i = 0; i < n_elts; i++)
13431 emit_move_insn (adjust_address_nv (mem, inner_mode,
13432 i * GET_MODE_SIZE (inner_mode)),
13433 XVECEXP (vals, 0, i));
13434 emit_move_insn (target, mem);
13435 }
13436
13437 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13438 ERR if it doesn't. EXP indicates the source location, which includes the
13439 inlining history for intrinsics. */
13440
13441 static void
13442 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13443 const_tree exp, const char *desc)
13444 {
13445 HOST_WIDE_INT lane;
13446
13447 gcc_assert (CONST_INT_P (operand));
13448
13449 lane = INTVAL (operand);
13450
13451 if (lane < low || lane >= high)
13452 {
13453 if (exp)
13454 error_at (EXPR_LOCATION (exp),
13455 "%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13456 else
13457 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13458 }
13459 }
13460
13461 /* Bounds-check lanes. */
13462
13463 void
13464 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13465 const_tree exp)
13466 {
13467 bounds_check (operand, low, high, exp, "lane");
13468 }
13469
13470 /* Bounds-check constants. */
13471
13472 void
13473 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13474 {
13475 bounds_check (operand, low, high, NULL_TREE, "constant");
13476 }
13477
13478 HOST_WIDE_INT
13479 neon_element_bits (machine_mode mode)
13480 {
13481 return GET_MODE_UNIT_BITSIZE (mode);
13482 }
13483
13484 \f
13485 /* Predicates for `match_operand' and `match_operator'. */
13486
13487 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13488 WB level is 2 if full writeback address modes are allowed, 1
13489 if limited writeback address modes (POST_INC and PRE_DEC) are
13490 allowed and 0 if no writeback at all is supported. */
13491
13492 int
13493 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13494 {
13495 gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13496 rtx ind;
13497
13498 /* Reject eliminable registers. */
13499 if (! (reload_in_progress || reload_completed || lra_in_progress)
13500 && ( reg_mentioned_p (frame_pointer_rtx, op)
13501 || reg_mentioned_p (arg_pointer_rtx, op)
13502 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13503 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13504 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13505 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13506 return FALSE;
13507
13508 /* Constants are converted into offsets from labels. */
13509 if (!MEM_P (op))
13510 return FALSE;
13511
13512 ind = XEXP (op, 0);
13513
13514 if (reload_completed
13515 && (LABEL_REF_P (ind)
13516 || (GET_CODE (ind) == CONST
13517 && GET_CODE (XEXP (ind, 0)) == PLUS
13518 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13519 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13520 return TRUE;
13521
13522 /* Match: (mem (reg)). */
13523 if (REG_P (ind))
13524 return arm_address_register_rtx_p (ind, 0);
13525
13526 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13527 acceptable in any case (subject to verification by
13528 arm_address_register_rtx_p). We need full writeback to accept
13529 PRE_INC and POST_DEC, and at least restricted writeback for
13530 PRE_INC and POST_DEC. */
13531 if (wb_level > 0
13532 && (GET_CODE (ind) == POST_INC
13533 || GET_CODE (ind) == PRE_DEC
13534 || (wb_level > 1
13535 && (GET_CODE (ind) == PRE_INC
13536 || GET_CODE (ind) == POST_DEC))))
13537 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13538
13539 if (wb_level > 1
13540 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13541 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13542 && GET_CODE (XEXP (ind, 1)) == PLUS
13543 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13544 ind = XEXP (ind, 1);
13545
13546 /* Match:
13547 (plus (reg)
13548 (const))
13549
13550 The encoded immediate for 16-bit modes is multiplied by 2,
13551 while the encoded immediate for 32-bit and 64-bit modes is
13552 multiplied by 4. */
13553 int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13554 if (GET_CODE (ind) == PLUS
13555 && REG_P (XEXP (ind, 0))
13556 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13557 && CONST_INT_P (XEXP (ind, 1))
13558 && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13559 && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13560 return TRUE;
13561
13562 return FALSE;
13563 }
13564
13565 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13566 WB is true if full writeback address modes are allowed and is false
13567 if limited writeback address modes (POST_INC and PRE_DEC) are
13568 allowed. */
13569
13570 int arm_coproc_mem_operand (rtx op, bool wb)
13571 {
13572 return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13573 }
13574
13575 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13576 context in which no writeback address modes are allowed. */
13577
13578 int
13579 arm_coproc_mem_operand_no_writeback (rtx op)
13580 {
13581 return arm_coproc_mem_operand_wb (op, 0);
13582 }
13583
13584 /* This function returns TRUE on matching mode and op.
13585 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13586 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13587 int
13588 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13589 {
13590 enum rtx_code code;
13591 int val, reg_no;
13592
13593 /* Match: (mem (reg)). */
13594 if (REG_P (op))
13595 {
13596 int reg_no = REGNO (op);
13597 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13598 ? reg_no <= LAST_LO_REGNUM
13599 : reg_no < LAST_ARM_REGNUM)
13600 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13601 }
13602 code = GET_CODE (op);
13603
13604 if (code == POST_INC || code == PRE_DEC
13605 || code == PRE_INC || code == POST_DEC)
13606 {
13607 reg_no = REGNO (XEXP (op, 0));
13608 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13609 ? reg_no <= LAST_LO_REGNUM
13610 :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13611 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13612 }
13613 else if (((code == POST_MODIFY || code == PRE_MODIFY)
13614 && GET_CODE (XEXP (op, 1)) == PLUS
13615 && XEXP (op, 0) == XEXP (XEXP (op, 1), 0)
13616 && REG_P (XEXP (op, 0))
13617 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT)
13618 /* Make sure to only accept PLUS after reload_completed, otherwise
13619 this will interfere with auto_inc's pattern detection. */
13620 || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
13621 && GET_CODE (XEXP (op, 1)) == CONST_INT))
13622 {
13623 reg_no = REGNO (XEXP (op, 0));
13624 if (code == PLUS)
13625 val = INTVAL (XEXP (op, 1));
13626 else
13627 val = INTVAL (XEXP(XEXP (op, 1), 1));
13628
13629 switch (mode)
13630 {
13631 case E_V16QImode:
13632 case E_V8QImode:
13633 case E_V4QImode:
13634 if (abs (val) > 127)
13635 return FALSE;
13636 break;
13637 case E_V8HImode:
13638 case E_V8HFmode:
13639 case E_V4HImode:
13640 case E_V4HFmode:
13641 if (val % 2 != 0 || abs (val) > 254)
13642 return FALSE;
13643 break;
13644 case E_V4SImode:
13645 case E_V4SFmode:
13646 if (val % 4 != 0 || abs (val) > 508)
13647 return FALSE;
13648 break;
13649 default:
13650 return FALSE;
13651 }
13652 return ((!strict && reg_no >= FIRST_PSEUDO_REGISTER)
13653 || (MVE_STN_LDW_MODE (mode)
13654 ? reg_no <= LAST_LO_REGNUM
13655 : (reg_no < LAST_ARM_REGNUM
13656 && (code == PLUS || reg_no != SP_REGNUM))));
13657 }
13658 return FALSE;
13659 }
13660
13661 /* Return TRUE if OP is a memory operand which we can load or store a vector
13662 to/from. TYPE is one of the following values:
13663 0 - Vector load/stor (vldr)
13664 1 - Core registers (ldm)
13665 2 - Element/structure loads (vld1)
13666 */
13667 int
13668 neon_vector_mem_operand (rtx op, int type, bool strict)
13669 {
13670 rtx ind;
13671
13672 /* Reject eliminable registers. */
13673 if (strict && ! (reload_in_progress || reload_completed)
13674 && (reg_mentioned_p (frame_pointer_rtx, op)
13675 || reg_mentioned_p (arg_pointer_rtx, op)
13676 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13677 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13678 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13679 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13680 return FALSE;
13681
13682 /* Constants are converted into offsets from labels. */
13683 if (!MEM_P (op))
13684 return FALSE;
13685
13686 ind = XEXP (op, 0);
13687
13688 if (reload_completed
13689 && (LABEL_REF_P (ind)
13690 || (GET_CODE (ind) == CONST
13691 && GET_CODE (XEXP (ind, 0)) == PLUS
13692 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13693 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13694 return TRUE;
13695
13696 /* Match: (mem (reg)). */
13697 if (REG_P (ind))
13698 return arm_address_register_rtx_p (ind, 0);
13699
13700 /* Allow post-increment with Neon registers. */
13701 if ((type != 1 && GET_CODE (ind) == POST_INC)
13702 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13703 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13704
13705 /* Allow post-increment by register for VLDn */
13706 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13707 && GET_CODE (XEXP (ind, 1)) == PLUS
13708 && REG_P (XEXP (XEXP (ind, 1), 1))
13709 && REG_P (XEXP (ind, 0))
13710 && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13711 return true;
13712
13713 /* Match:
13714 (plus (reg)
13715 (const)). */
13716 if (type == 0
13717 && GET_CODE (ind) == PLUS
13718 && REG_P (XEXP (ind, 0))
13719 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13720 && CONST_INT_P (XEXP (ind, 1))
13721 && INTVAL (XEXP (ind, 1)) > -1024
13722 /* For quad modes, we restrict the constant offset to be slightly less
13723 than what the instruction format permits. We have no such constraint
13724 on double mode offsets. (This must match arm_legitimate_index_p.) */
13725 && (INTVAL (XEXP (ind, 1))
13726 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13727 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13728 return TRUE;
13729
13730 return FALSE;
13731 }
13732
13733 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13734 type. */
13735 int
13736 neon_struct_mem_operand (rtx op)
13737 {
13738 rtx ind;
13739
13740 /* Reject eliminable registers. */
13741 if (! (reload_in_progress || reload_completed)
13742 && ( reg_mentioned_p (frame_pointer_rtx, op)
13743 || reg_mentioned_p (arg_pointer_rtx, op)
13744 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13745 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13746 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13747 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13748 return FALSE;
13749
13750 /* Constants are converted into offsets from labels. */
13751 if (!MEM_P (op))
13752 return FALSE;
13753
13754 ind = XEXP (op, 0);
13755
13756 if (reload_completed
13757 && (LABEL_REF_P (ind)
13758 || (GET_CODE (ind) == CONST
13759 && GET_CODE (XEXP (ind, 0)) == PLUS
13760 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13761 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13762 return TRUE;
13763
13764 /* Match: (mem (reg)). */
13765 if (REG_P (ind))
13766 return arm_address_register_rtx_p (ind, 0);
13767
13768 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13769 if (GET_CODE (ind) == POST_INC
13770 || GET_CODE (ind) == PRE_DEC)
13771 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13772
13773 return FALSE;
13774 }
13775
13776 /* Prepares the operands for the VCMLA by lane instruction such that the right
13777 register number is selected. This instruction is special in that it always
13778 requires a D register, however there is a choice to be made between Dn[0],
13779 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13780
13781 The VCMLA by lane function always selects two values. For instance given D0
13782 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13783 used by the instruction. However given V4SF then index 0 and 1 are valid as
13784 D0[0] or D1[0] are both valid.
13785
13786 This function centralizes that information based on OPERANDS, OPERANDS[3]
13787 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13788 updated to contain the right index. */
13789
13790 rtx *
13791 neon_vcmla_lane_prepare_operands (rtx *operands)
13792 {
13793 int lane = INTVAL (operands[4]);
13794 machine_mode constmode = SImode;
13795 machine_mode mode = GET_MODE (operands[3]);
13796 int regno = REGNO (operands[3]);
13797 regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13798 if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13799 {
13800 operands[3] = gen_int_mode (regno + 1, constmode);
13801 operands[4]
13802 = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13803 }
13804 else
13805 {
13806 operands[3] = gen_int_mode (regno, constmode);
13807 operands[4] = gen_int_mode (lane, constmode);
13808 }
13809 return operands;
13810 }
13811
13812
13813 /* Return true if X is a register that will be eliminated later on. */
13814 int
13815 arm_eliminable_register (rtx x)
13816 {
13817 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13818 || REGNO (x) == ARG_POINTER_REGNUM
13819 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13820 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13821 }
13822
13823 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13824 coprocessor registers. Otherwise return NO_REGS. */
13825
13826 enum reg_class
13827 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13828 {
13829 if (mode == HFmode)
13830 {
13831 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13832 return GENERAL_REGS;
13833 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13834 return NO_REGS;
13835 return GENERAL_REGS;
13836 }
13837
13838 /* The neon move patterns handle all legitimate vector and struct
13839 addresses. */
13840 if (TARGET_NEON
13841 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13842 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13843 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13844 || VALID_NEON_STRUCT_MODE (mode)))
13845 return NO_REGS;
13846
13847 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13848 return NO_REGS;
13849
13850 return GENERAL_REGS;
13851 }
13852
13853 /* Values which must be returned in the most-significant end of the return
13854 register. */
13855
13856 static bool
13857 arm_return_in_msb (const_tree valtype)
13858 {
13859 return (TARGET_AAPCS_BASED
13860 && BYTES_BIG_ENDIAN
13861 && (AGGREGATE_TYPE_P (valtype)
13862 || TREE_CODE (valtype) == COMPLEX_TYPE
13863 || FIXED_POINT_TYPE_P (valtype)));
13864 }
13865
13866 /* Return TRUE if X references a SYMBOL_REF. */
13867 int
13868 symbol_mentioned_p (rtx x)
13869 {
13870 const char * fmt;
13871 int i;
13872
13873 if (SYMBOL_REF_P (x))
13874 return 1;
13875
13876 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13877 are constant offsets, not symbols. */
13878 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13879 return 0;
13880
13881 fmt = GET_RTX_FORMAT (GET_CODE (x));
13882
13883 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13884 {
13885 if (fmt[i] == 'E')
13886 {
13887 int j;
13888
13889 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13890 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13891 return 1;
13892 }
13893 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13894 return 1;
13895 }
13896
13897 return 0;
13898 }
13899
13900 /* Return TRUE if X references a LABEL_REF. */
13901 int
13902 label_mentioned_p (rtx x)
13903 {
13904 const char * fmt;
13905 int i;
13906
13907 if (LABEL_REF_P (x))
13908 return 1;
13909
13910 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13911 instruction, but they are constant offsets, not symbols. */
13912 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13913 return 0;
13914
13915 fmt = GET_RTX_FORMAT (GET_CODE (x));
13916 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13917 {
13918 if (fmt[i] == 'E')
13919 {
13920 int j;
13921
13922 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13923 if (label_mentioned_p (XVECEXP (x, i, j)))
13924 return 1;
13925 }
13926 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13927 return 1;
13928 }
13929
13930 return 0;
13931 }
13932
13933 int
13934 tls_mentioned_p (rtx x)
13935 {
13936 switch (GET_CODE (x))
13937 {
13938 case CONST:
13939 return tls_mentioned_p (XEXP (x, 0));
13940
13941 case UNSPEC:
13942 if (XINT (x, 1) == UNSPEC_TLS)
13943 return 1;
13944
13945 /* Fall through. */
13946 default:
13947 return 0;
13948 }
13949 }
13950
13951 /* Must not copy any rtx that uses a pc-relative address.
13952 Also, disallow copying of load-exclusive instructions that
13953 may appear after splitting of compare-and-swap-style operations
13954 so as to prevent those loops from being transformed away from their
13955 canonical forms (see PR 69904). */
13956
13957 static bool
13958 arm_cannot_copy_insn_p (rtx_insn *insn)
13959 {
13960 /* The tls call insn cannot be copied, as it is paired with a data
13961 word. */
13962 if (recog_memoized (insn) == CODE_FOR_tlscall)
13963 return true;
13964
13965 subrtx_iterator::array_type array;
13966 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13967 {
13968 const_rtx x = *iter;
13969 if (GET_CODE (x) == UNSPEC
13970 && (XINT (x, 1) == UNSPEC_PIC_BASE
13971 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13972 return true;
13973 }
13974
13975 rtx set = single_set (insn);
13976 if (set)
13977 {
13978 rtx src = SET_SRC (set);
13979 if (GET_CODE (src) == ZERO_EXTEND)
13980 src = XEXP (src, 0);
13981
13982 /* Catch the load-exclusive and load-acquire operations. */
13983 if (GET_CODE (src) == UNSPEC_VOLATILE
13984 && (XINT (src, 1) == VUNSPEC_LL
13985 || XINT (src, 1) == VUNSPEC_LAX))
13986 return true;
13987 }
13988 return false;
13989 }
13990
13991 enum rtx_code
13992 minmax_code (rtx x)
13993 {
13994 enum rtx_code code = GET_CODE (x);
13995
13996 switch (code)
13997 {
13998 case SMAX:
13999 return GE;
14000 case SMIN:
14001 return LE;
14002 case UMIN:
14003 return LEU;
14004 case UMAX:
14005 return GEU;
14006 default:
14007 gcc_unreachable ();
14008 }
14009 }
14010
14011 /* Match pair of min/max operators that can be implemented via usat/ssat. */
14012
14013 bool
14014 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
14015 int *mask, bool *signed_sat)
14016 {
14017 /* The high bound must be a power of two minus one. */
14018 int log = exact_log2 (INTVAL (hi_bound) + 1);
14019 if (log == -1)
14020 return false;
14021
14022 /* The low bound is either zero (for usat) or one less than the
14023 negation of the high bound (for ssat). */
14024 if (INTVAL (lo_bound) == 0)
14025 {
14026 if (mask)
14027 *mask = log;
14028 if (signed_sat)
14029 *signed_sat = false;
14030
14031 return true;
14032 }
14033
14034 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
14035 {
14036 if (mask)
14037 *mask = log + 1;
14038 if (signed_sat)
14039 *signed_sat = true;
14040
14041 return true;
14042 }
14043
14044 return false;
14045 }
14046
14047 /* Return 1 if memory locations are adjacent. */
14048 int
14049 adjacent_mem_locations (rtx a, rtx b)
14050 {
14051 /* We don't guarantee to preserve the order of these memory refs. */
14052 if (volatile_refs_p (a) || volatile_refs_p (b))
14053 return 0;
14054
14055 if ((REG_P (XEXP (a, 0))
14056 || (GET_CODE (XEXP (a, 0)) == PLUS
14057 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
14058 && (REG_P (XEXP (b, 0))
14059 || (GET_CODE (XEXP (b, 0)) == PLUS
14060 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
14061 {
14062 HOST_WIDE_INT val0 = 0, val1 = 0;
14063 rtx reg0, reg1;
14064 int val_diff;
14065
14066 if (GET_CODE (XEXP (a, 0)) == PLUS)
14067 {
14068 reg0 = XEXP (XEXP (a, 0), 0);
14069 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
14070 }
14071 else
14072 reg0 = XEXP (a, 0);
14073
14074 if (GET_CODE (XEXP (b, 0)) == PLUS)
14075 {
14076 reg1 = XEXP (XEXP (b, 0), 0);
14077 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
14078 }
14079 else
14080 reg1 = XEXP (b, 0);
14081
14082 /* Don't accept any offset that will require multiple
14083 instructions to handle, since this would cause the
14084 arith_adjacentmem pattern to output an overlong sequence. */
14085 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
14086 return 0;
14087
14088 /* Don't allow an eliminable register: register elimination can make
14089 the offset too large. */
14090 if (arm_eliminable_register (reg0))
14091 return 0;
14092
14093 val_diff = val1 - val0;
14094
14095 if (arm_ld_sched)
14096 {
14097 /* If the target has load delay slots, then there's no benefit
14098 to using an ldm instruction unless the offset is zero and
14099 we are optimizing for size. */
14100 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
14101 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
14102 && (val_diff == 4 || val_diff == -4));
14103 }
14104
14105 return ((REGNO (reg0) == REGNO (reg1))
14106 && (val_diff == 4 || val_diff == -4));
14107 }
14108
14109 return 0;
14110 }
14111
14112 /* Return true if OP is a valid load or store multiple operation. LOAD is true
14113 for load operations, false for store operations. CONSECUTIVE is true
14114 if the register numbers in the operation must be consecutive in the register
14115 bank. RETURN_PC is true if value is to be loaded in PC.
14116 The pattern we are trying to match for load is:
14117 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14118 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14119 :
14120 :
14121 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14122 ]
14123 where
14124 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14125 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14126 3. If consecutive is TRUE, then for kth register being loaded,
14127 REGNO (R_dk) = REGNO (R_d0) + k.
14128 The pattern for store is similar. */
14129 bool
14130 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
14131 bool consecutive, bool return_pc)
14132 {
14133 HOST_WIDE_INT count = XVECLEN (op, 0);
14134 rtx reg, mem, addr;
14135 unsigned regno;
14136 unsigned first_regno;
14137 HOST_WIDE_INT i = 1, base = 0, offset = 0;
14138 rtx elt;
14139 bool addr_reg_in_reglist = false;
14140 bool update = false;
14141 int reg_increment;
14142 int offset_adj;
14143 int regs_per_val;
14144
14145 /* If not in SImode, then registers must be consecutive
14146 (e.g., VLDM instructions for DFmode). */
14147 gcc_assert ((mode == SImode) || consecutive);
14148 /* Setting return_pc for stores is illegal. */
14149 gcc_assert (!return_pc || load);
14150
14151 /* Set up the increments and the regs per val based on the mode. */
14152 reg_increment = GET_MODE_SIZE (mode);
14153 regs_per_val = reg_increment / 4;
14154 offset_adj = return_pc ? 1 : 0;
14155
14156 if (count <= 1
14157 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
14158 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
14159 return false;
14160
14161 /* Check if this is a write-back. */
14162 elt = XVECEXP (op, 0, offset_adj);
14163 if (GET_CODE (SET_SRC (elt)) == PLUS)
14164 {
14165 i++;
14166 base = 1;
14167 update = true;
14168
14169 /* The offset adjustment must be the number of registers being
14170 popped times the size of a single register. */
14171 if (!REG_P (SET_DEST (elt))
14172 || !REG_P (XEXP (SET_SRC (elt), 0))
14173 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
14174 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
14175 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
14176 ((count - 1 - offset_adj) * reg_increment))
14177 return false;
14178 }
14179
14180 i = i + offset_adj;
14181 base = base + offset_adj;
14182 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14183 success depends on the type: VLDM can do just one reg,
14184 LDM must do at least two. */
14185 if ((count <= i) && (mode == SImode))
14186 return false;
14187
14188 elt = XVECEXP (op, 0, i - 1);
14189 if (GET_CODE (elt) != SET)
14190 return false;
14191
14192 if (load)
14193 {
14194 reg = SET_DEST (elt);
14195 mem = SET_SRC (elt);
14196 }
14197 else
14198 {
14199 reg = SET_SRC (elt);
14200 mem = SET_DEST (elt);
14201 }
14202
14203 if (!REG_P (reg) || !MEM_P (mem))
14204 return false;
14205
14206 regno = REGNO (reg);
14207 first_regno = regno;
14208 addr = XEXP (mem, 0);
14209 if (GET_CODE (addr) == PLUS)
14210 {
14211 if (!CONST_INT_P (XEXP (addr, 1)))
14212 return false;
14213
14214 offset = INTVAL (XEXP (addr, 1));
14215 addr = XEXP (addr, 0);
14216 }
14217
14218 if (!REG_P (addr))
14219 return false;
14220
14221 /* Don't allow SP to be loaded unless it is also the base register. It
14222 guarantees that SP is reset correctly when an LDM instruction
14223 is interrupted. Otherwise, we might end up with a corrupt stack. */
14224 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14225 return false;
14226
14227 if (regno == REGNO (addr))
14228 addr_reg_in_reglist = true;
14229
14230 for (; i < count; i++)
14231 {
14232 elt = XVECEXP (op, 0, i);
14233 if (GET_CODE (elt) != SET)
14234 return false;
14235
14236 if (load)
14237 {
14238 reg = SET_DEST (elt);
14239 mem = SET_SRC (elt);
14240 }
14241 else
14242 {
14243 reg = SET_SRC (elt);
14244 mem = SET_DEST (elt);
14245 }
14246
14247 if (!REG_P (reg)
14248 || GET_MODE (reg) != mode
14249 || REGNO (reg) <= regno
14250 || (consecutive
14251 && (REGNO (reg) !=
14252 (unsigned int) (first_regno + regs_per_val * (i - base))))
14253 /* Don't allow SP to be loaded unless it is also the base register. It
14254 guarantees that SP is reset correctly when an LDM instruction
14255 is interrupted. Otherwise, we might end up with a corrupt stack. */
14256 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14257 || !MEM_P (mem)
14258 || GET_MODE (mem) != mode
14259 || ((GET_CODE (XEXP (mem, 0)) != PLUS
14260 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14261 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14262 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14263 offset + (i - base) * reg_increment))
14264 && (!REG_P (XEXP (mem, 0))
14265 || offset + (i - base) * reg_increment != 0)))
14266 return false;
14267
14268 regno = REGNO (reg);
14269 if (regno == REGNO (addr))
14270 addr_reg_in_reglist = true;
14271 }
14272
14273 if (load)
14274 {
14275 if (update && addr_reg_in_reglist)
14276 return false;
14277
14278 /* For Thumb-1, address register is always modified - either by write-back
14279 or by explicit load. If the pattern does not describe an update,
14280 then the address register must be in the list of loaded registers. */
14281 if (TARGET_THUMB1)
14282 return update || addr_reg_in_reglist;
14283 }
14284
14285 return true;
14286 }
14287
14288 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14289 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14290 following form:
14291
14292 [(set (reg:SI <N>) (const_int 0))
14293 (set (reg:SI <M>) (const_int 0))
14294 ...
14295 (unspec_volatile [(const_int 0)]
14296 VUNSPEC_CLRM_APSR)
14297 (clobber (reg:CC CC_REGNUM))
14298 ]
14299
14300 Any number (including 0) of set expressions is valid, the volatile unspec is
14301 optional. All registers but SP and PC are allowed and registers must be in
14302 strict increasing order.
14303
14304 To be a valid VSCCLRM pattern, OP must have the following form:
14305
14306 [(unspec_volatile [(const_int 0)]
14307 VUNSPEC_VSCCLRM_VPR)
14308 (set (reg:SF <N>) (const_int 0))
14309 (set (reg:SF <M>) (const_int 0))
14310 ...
14311 ]
14312
14313 As with CLRM, any number (including 0) of set expressions is valid, however
14314 the volatile unspec is mandatory here. Any VFP single-precision register is
14315 accepted but all registers must be consecutive and in increasing order. */
14316
14317 bool
14318 clear_operation_p (rtx op, bool vfp)
14319 {
14320 unsigned regno;
14321 unsigned last_regno = INVALID_REGNUM;
14322 rtx elt, reg, zero;
14323 int count = XVECLEN (op, 0);
14324 int first_set = vfp ? 1 : 0;
14325 machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14326
14327 for (int i = first_set; i < count; i++)
14328 {
14329 elt = XVECEXP (op, 0, i);
14330
14331 if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14332 {
14333 if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14334 || XVECLEN (elt, 0) != 1
14335 || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14336 || i != count - 2)
14337 return false;
14338
14339 continue;
14340 }
14341
14342 if (GET_CODE (elt) == CLOBBER)
14343 continue;
14344
14345 if (GET_CODE (elt) != SET)
14346 return false;
14347
14348 reg = SET_DEST (elt);
14349 zero = SET_SRC (elt);
14350
14351 if (!REG_P (reg)
14352 || GET_MODE (reg) != expected_mode
14353 || zero != CONST0_RTX (SImode))
14354 return false;
14355
14356 regno = REGNO (reg);
14357
14358 if (vfp)
14359 {
14360 if (i != first_set && regno != last_regno + 1)
14361 return false;
14362 }
14363 else
14364 {
14365 if (regno == SP_REGNUM || regno == PC_REGNUM)
14366 return false;
14367 if (i != first_set && regno <= last_regno)
14368 return false;
14369 }
14370
14371 last_regno = regno;
14372 }
14373
14374 return true;
14375 }
14376
14377 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14378 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14379 instruction. ADD_OFFSET is nonzero if the base address register needs
14380 to be modified with an add instruction before we can use it. */
14381
14382 static bool
14383 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14384 int nops, HOST_WIDE_INT add_offset)
14385 {
14386 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14387 if the offset isn't small enough. The reason 2 ldrs are faster
14388 is because these ARMs are able to do more than one cache access
14389 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14390 whilst the ARM8 has a double bandwidth cache. This means that
14391 these cores can do both an instruction fetch and a data fetch in
14392 a single cycle, so the trick of calculating the address into a
14393 scratch register (one of the result regs) and then doing a load
14394 multiple actually becomes slower (and no smaller in code size).
14395 That is the transformation
14396
14397 ldr rd1, [rbase + offset]
14398 ldr rd2, [rbase + offset + 4]
14399
14400 to
14401
14402 add rd1, rbase, offset
14403 ldmia rd1, {rd1, rd2}
14404
14405 produces worse code -- '3 cycles + any stalls on rd2' instead of
14406 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14407 access per cycle, the first sequence could never complete in less
14408 than 6 cycles, whereas the ldm sequence would only take 5 and
14409 would make better use of sequential accesses if not hitting the
14410 cache.
14411
14412 We cheat here and test 'arm_ld_sched' which we currently know to
14413 only be true for the ARM8, ARM9 and StrongARM. If this ever
14414 changes, then the test below needs to be reworked. */
14415 if (nops == 2 && arm_ld_sched && add_offset != 0)
14416 return false;
14417
14418 /* XScale has load-store double instructions, but they have stricter
14419 alignment requirements than load-store multiple, so we cannot
14420 use them.
14421
14422 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14423 the pipeline until completion.
14424
14425 NREGS CYCLES
14426 1 3
14427 2 4
14428 3 5
14429 4 6
14430
14431 An ldr instruction takes 1-3 cycles, but does not block the
14432 pipeline.
14433
14434 NREGS CYCLES
14435 1 1-3
14436 2 2-6
14437 3 3-9
14438 4 4-12
14439
14440 Best case ldr will always win. However, the more ldr instructions
14441 we issue, the less likely we are to be able to schedule them well.
14442 Using ldr instructions also increases code size.
14443
14444 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14445 for counts of 3 or 4 regs. */
14446 if (nops <= 2 && arm_tune_xscale && !optimize_size)
14447 return false;
14448 return true;
14449 }
14450
14451 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14452 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14453 an array ORDER which describes the sequence to use when accessing the
14454 offsets that produces an ascending order. In this sequence, each
14455 offset must be larger by exactly 4 than the previous one. ORDER[0]
14456 must have been filled in with the lowest offset by the caller.
14457 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14458 we use to verify that ORDER produces an ascending order of registers.
14459 Return true if it was possible to construct such an order, false if
14460 not. */
14461
14462 static bool
14463 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14464 int *unsorted_regs)
14465 {
14466 int i;
14467 for (i = 1; i < nops; i++)
14468 {
14469 int j;
14470
14471 order[i] = order[i - 1];
14472 for (j = 0; j < nops; j++)
14473 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14474 {
14475 /* We must find exactly one offset that is higher than the
14476 previous one by 4. */
14477 if (order[i] != order[i - 1])
14478 return false;
14479 order[i] = j;
14480 }
14481 if (order[i] == order[i - 1])
14482 return false;
14483 /* The register numbers must be ascending. */
14484 if (unsorted_regs != NULL
14485 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14486 return false;
14487 }
14488 return true;
14489 }
14490
14491 /* Used to determine in a peephole whether a sequence of load
14492 instructions can be changed into a load-multiple instruction.
14493 NOPS is the number of separate load instructions we are examining. The
14494 first NOPS entries in OPERANDS are the destination registers, the
14495 next NOPS entries are memory operands. If this function is
14496 successful, *BASE is set to the common base register of the memory
14497 accesses; *LOAD_OFFSET is set to the first memory location's offset
14498 from that base register.
14499 REGS is an array filled in with the destination register numbers.
14500 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14501 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14502 the sequence of registers in REGS matches the loads from ascending memory
14503 locations, and the function verifies that the register numbers are
14504 themselves ascending. If CHECK_REGS is false, the register numbers
14505 are stored in the order they are found in the operands. */
14506 static int
14507 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14508 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14509 {
14510 int unsorted_regs[MAX_LDM_STM_OPS];
14511 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14512 int order[MAX_LDM_STM_OPS];
14513 int base_reg = -1;
14514 int i, ldm_case;
14515
14516 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14517 easily extended if required. */
14518 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14519
14520 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14521
14522 /* Loop over the operands and check that the memory references are
14523 suitable (i.e. immediate offsets from the same base register). At
14524 the same time, extract the target register, and the memory
14525 offsets. */
14526 for (i = 0; i < nops; i++)
14527 {
14528 rtx reg;
14529 rtx offset;
14530
14531 /* Convert a subreg of a mem into the mem itself. */
14532 if (GET_CODE (operands[nops + i]) == SUBREG)
14533 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14534
14535 gcc_assert (MEM_P (operands[nops + i]));
14536
14537 /* Don't reorder volatile memory references; it doesn't seem worth
14538 looking for the case where the order is ok anyway. */
14539 if (MEM_VOLATILE_P (operands[nops + i]))
14540 return 0;
14541
14542 offset = const0_rtx;
14543
14544 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14545 || (SUBREG_P (reg)
14546 && REG_P (reg = SUBREG_REG (reg))))
14547 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14548 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14549 || (SUBREG_P (reg)
14550 && REG_P (reg = SUBREG_REG (reg))))
14551 && (CONST_INT_P (offset
14552 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14553 {
14554 if (i == 0)
14555 {
14556 base_reg = REGNO (reg);
14557 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14558 return 0;
14559 }
14560 else if (base_reg != (int) REGNO (reg))
14561 /* Not addressed from the same base register. */
14562 return 0;
14563
14564 unsorted_regs[i] = (REG_P (operands[i])
14565 ? REGNO (operands[i])
14566 : REGNO (SUBREG_REG (operands[i])));
14567
14568 /* If it isn't an integer register, or if it overwrites the
14569 base register but isn't the last insn in the list, then
14570 we can't do this. */
14571 if (unsorted_regs[i] < 0
14572 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14573 || unsorted_regs[i] > 14
14574 || (i != nops - 1 && unsorted_regs[i] == base_reg))
14575 return 0;
14576
14577 /* Don't allow SP to be loaded unless it is also the base
14578 register. It guarantees that SP is reset correctly when
14579 an LDM instruction is interrupted. Otherwise, we might
14580 end up with a corrupt stack. */
14581 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14582 return 0;
14583
14584 unsorted_offsets[i] = INTVAL (offset);
14585 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14586 order[0] = i;
14587 }
14588 else
14589 /* Not a suitable memory address. */
14590 return 0;
14591 }
14592
14593 /* All the useful information has now been extracted from the
14594 operands into unsorted_regs and unsorted_offsets; additionally,
14595 order[0] has been set to the lowest offset in the list. Sort
14596 the offsets into order, verifying that they are adjacent, and
14597 check that the register numbers are ascending. */
14598 if (!compute_offset_order (nops, unsorted_offsets, order,
14599 check_regs ? unsorted_regs : NULL))
14600 return 0;
14601
14602 if (saved_order)
14603 memcpy (saved_order, order, sizeof order);
14604
14605 if (base)
14606 {
14607 *base = base_reg;
14608
14609 for (i = 0; i < nops; i++)
14610 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14611
14612 *load_offset = unsorted_offsets[order[0]];
14613 }
14614
14615 if (unsorted_offsets[order[0]] == 0)
14616 ldm_case = 1; /* ldmia */
14617 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14618 ldm_case = 2; /* ldmib */
14619 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14620 ldm_case = 3; /* ldmda */
14621 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14622 ldm_case = 4; /* ldmdb */
14623 else if (const_ok_for_arm (unsorted_offsets[order[0]])
14624 || const_ok_for_arm (-unsorted_offsets[order[0]]))
14625 ldm_case = 5;
14626 else
14627 return 0;
14628
14629 if (!multiple_operation_profitable_p (false, nops,
14630 ldm_case == 5
14631 ? unsorted_offsets[order[0]] : 0))
14632 return 0;
14633
14634 return ldm_case;
14635 }
14636
14637 /* Used to determine in a peephole whether a sequence of store instructions can
14638 be changed into a store-multiple instruction.
14639 NOPS is the number of separate store instructions we are examining.
14640 NOPS_TOTAL is the total number of instructions recognized by the peephole
14641 pattern.
14642 The first NOPS entries in OPERANDS are the source registers, the next
14643 NOPS entries are memory operands. If this function is successful, *BASE is
14644 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14645 to the first memory location's offset from that base register. REGS is an
14646 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14647 likewise filled with the corresponding rtx's.
14648 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14649 numbers to an ascending order of stores.
14650 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14651 from ascending memory locations, and the function verifies that the register
14652 numbers are themselves ascending. If CHECK_REGS is false, the register
14653 numbers are stored in the order they are found in the operands. */
14654 static int
14655 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14656 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14657 HOST_WIDE_INT *load_offset, bool check_regs)
14658 {
14659 int unsorted_regs[MAX_LDM_STM_OPS];
14660 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14661 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14662 int order[MAX_LDM_STM_OPS];
14663 int base_reg = -1;
14664 rtx base_reg_rtx = NULL;
14665 int i, stm_case;
14666
14667 /* Write back of base register is currently only supported for Thumb 1. */
14668 int base_writeback = TARGET_THUMB1;
14669
14670 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14671 easily extended if required. */
14672 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14673
14674 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14675
14676 /* Loop over the operands and check that the memory references are
14677 suitable (i.e. immediate offsets from the same base register). At
14678 the same time, extract the target register, and the memory
14679 offsets. */
14680 for (i = 0; i < nops; i++)
14681 {
14682 rtx reg;
14683 rtx offset;
14684
14685 /* Convert a subreg of a mem into the mem itself. */
14686 if (GET_CODE (operands[nops + i]) == SUBREG)
14687 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14688
14689 gcc_assert (MEM_P (operands[nops + i]));
14690
14691 /* Don't reorder volatile memory references; it doesn't seem worth
14692 looking for the case where the order is ok anyway. */
14693 if (MEM_VOLATILE_P (operands[nops + i]))
14694 return 0;
14695
14696 offset = const0_rtx;
14697
14698 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14699 || (SUBREG_P (reg)
14700 && REG_P (reg = SUBREG_REG (reg))))
14701 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14702 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14703 || (SUBREG_P (reg)
14704 && REG_P (reg = SUBREG_REG (reg))))
14705 && (CONST_INT_P (offset
14706 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14707 {
14708 unsorted_reg_rtxs[i] = (REG_P (operands[i])
14709 ? operands[i] : SUBREG_REG (operands[i]));
14710 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14711
14712 if (i == 0)
14713 {
14714 base_reg = REGNO (reg);
14715 base_reg_rtx = reg;
14716 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14717 return 0;
14718 }
14719 else if (base_reg != (int) REGNO (reg))
14720 /* Not addressed from the same base register. */
14721 return 0;
14722
14723 /* If it isn't an integer register, then we can't do this. */
14724 if (unsorted_regs[i] < 0
14725 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14726 /* The effects are unpredictable if the base register is
14727 both updated and stored. */
14728 || (base_writeback && unsorted_regs[i] == base_reg)
14729 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14730 || unsorted_regs[i] > 14)
14731 return 0;
14732
14733 unsorted_offsets[i] = INTVAL (offset);
14734 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14735 order[0] = i;
14736 }
14737 else
14738 /* Not a suitable memory address. */
14739 return 0;
14740 }
14741
14742 /* All the useful information has now been extracted from the
14743 operands into unsorted_regs and unsorted_offsets; additionally,
14744 order[0] has been set to the lowest offset in the list. Sort
14745 the offsets into order, verifying that they are adjacent, and
14746 check that the register numbers are ascending. */
14747 if (!compute_offset_order (nops, unsorted_offsets, order,
14748 check_regs ? unsorted_regs : NULL))
14749 return 0;
14750
14751 if (saved_order)
14752 memcpy (saved_order, order, sizeof order);
14753
14754 if (base)
14755 {
14756 *base = base_reg;
14757
14758 for (i = 0; i < nops; i++)
14759 {
14760 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14761 if (reg_rtxs)
14762 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14763 }
14764
14765 *load_offset = unsorted_offsets[order[0]];
14766 }
14767
14768 if (TARGET_THUMB1
14769 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14770 return 0;
14771
14772 if (unsorted_offsets[order[0]] == 0)
14773 stm_case = 1; /* stmia */
14774 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14775 stm_case = 2; /* stmib */
14776 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14777 stm_case = 3; /* stmda */
14778 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14779 stm_case = 4; /* stmdb */
14780 else
14781 return 0;
14782
14783 if (!multiple_operation_profitable_p (false, nops, 0))
14784 return 0;
14785
14786 return stm_case;
14787 }
14788 \f
14789 /* Routines for use in generating RTL. */
14790
14791 /* Generate a load-multiple instruction. COUNT is the number of loads in
14792 the instruction; REGS and MEMS are arrays containing the operands.
14793 BASEREG is the base register to be used in addressing the memory operands.
14794 WBACK_OFFSET is nonzero if the instruction should update the base
14795 register. */
14796
14797 static rtx
14798 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14799 HOST_WIDE_INT wback_offset)
14800 {
14801 int i = 0, j;
14802 rtx result;
14803
14804 if (!multiple_operation_profitable_p (false, count, 0))
14805 {
14806 rtx seq;
14807
14808 start_sequence ();
14809
14810 for (i = 0; i < count; i++)
14811 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14812
14813 if (wback_offset != 0)
14814 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14815
14816 seq = get_insns ();
14817 end_sequence ();
14818
14819 return seq;
14820 }
14821
14822 result = gen_rtx_PARALLEL (VOIDmode,
14823 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14824 if (wback_offset != 0)
14825 {
14826 XVECEXP (result, 0, 0)
14827 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14828 i = 1;
14829 count++;
14830 }
14831
14832 for (j = 0; i < count; i++, j++)
14833 XVECEXP (result, 0, i)
14834 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14835
14836 return result;
14837 }
14838
14839 /* Generate a store-multiple instruction. COUNT is the number of stores in
14840 the instruction; REGS and MEMS are arrays containing the operands.
14841 BASEREG is the base register to be used in addressing the memory operands.
14842 WBACK_OFFSET is nonzero if the instruction should update the base
14843 register. */
14844
14845 static rtx
14846 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14847 HOST_WIDE_INT wback_offset)
14848 {
14849 int i = 0, j;
14850 rtx result;
14851
14852 if (GET_CODE (basereg) == PLUS)
14853 basereg = XEXP (basereg, 0);
14854
14855 if (!multiple_operation_profitable_p (false, count, 0))
14856 {
14857 rtx seq;
14858
14859 start_sequence ();
14860
14861 for (i = 0; i < count; i++)
14862 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14863
14864 if (wback_offset != 0)
14865 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14866
14867 seq = get_insns ();
14868 end_sequence ();
14869
14870 return seq;
14871 }
14872
14873 result = gen_rtx_PARALLEL (VOIDmode,
14874 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14875 if (wback_offset != 0)
14876 {
14877 XVECEXP (result, 0, 0)
14878 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14879 i = 1;
14880 count++;
14881 }
14882
14883 for (j = 0; i < count; i++, j++)
14884 XVECEXP (result, 0, i)
14885 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14886
14887 return result;
14888 }
14889
14890 /* Generate either a load-multiple or a store-multiple instruction. This
14891 function can be used in situations where we can start with a single MEM
14892 rtx and adjust its address upwards.
14893 COUNT is the number of operations in the instruction, not counting a
14894 possible update of the base register. REGS is an array containing the
14895 register operands.
14896 BASEREG is the base register to be used in addressing the memory operands,
14897 which are constructed from BASEMEM.
14898 WRITE_BACK specifies whether the generated instruction should include an
14899 update of the base register.
14900 OFFSETP is used to pass an offset to and from this function; this offset
14901 is not used when constructing the address (instead BASEMEM should have an
14902 appropriate offset in its address), it is used only for setting
14903 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14904
14905 static rtx
14906 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14907 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14908 {
14909 rtx mems[MAX_LDM_STM_OPS];
14910 HOST_WIDE_INT offset = *offsetp;
14911 int i;
14912
14913 gcc_assert (count <= MAX_LDM_STM_OPS);
14914
14915 if (GET_CODE (basereg) == PLUS)
14916 basereg = XEXP (basereg, 0);
14917
14918 for (i = 0; i < count; i++)
14919 {
14920 rtx addr = plus_constant (Pmode, basereg, i * 4);
14921 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14922 offset += 4;
14923 }
14924
14925 if (write_back)
14926 *offsetp = offset;
14927
14928 if (is_load)
14929 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14930 write_back ? 4 * count : 0);
14931 else
14932 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14933 write_back ? 4 * count : 0);
14934 }
14935
14936 rtx
14937 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14938 rtx basemem, HOST_WIDE_INT *offsetp)
14939 {
14940 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14941 offsetp);
14942 }
14943
14944 rtx
14945 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14946 rtx basemem, HOST_WIDE_INT *offsetp)
14947 {
14948 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14949 offsetp);
14950 }
14951
14952 /* Called from a peephole2 expander to turn a sequence of loads into an
14953 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14954 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14955 is true if we can reorder the registers because they are used commutatively
14956 subsequently.
14957 Returns true iff we could generate a new instruction. */
14958
14959 bool
14960 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14961 {
14962 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14963 rtx mems[MAX_LDM_STM_OPS];
14964 int i, j, base_reg;
14965 rtx base_reg_rtx;
14966 HOST_WIDE_INT offset;
14967 int write_back = FALSE;
14968 int ldm_case;
14969 rtx addr;
14970
14971 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14972 &base_reg, &offset, !sort_regs);
14973
14974 if (ldm_case == 0)
14975 return false;
14976
14977 if (sort_regs)
14978 for (i = 0; i < nops - 1; i++)
14979 for (j = i + 1; j < nops; j++)
14980 if (regs[i] > regs[j])
14981 {
14982 int t = regs[i];
14983 regs[i] = regs[j];
14984 regs[j] = t;
14985 }
14986 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14987
14988 if (TARGET_THUMB1)
14989 {
14990 gcc_assert (ldm_case == 1 || ldm_case == 5);
14991
14992 /* Thumb-1 ldm uses writeback except if the base is loaded. */
14993 write_back = true;
14994 for (i = 0; i < nops; i++)
14995 if (base_reg == regs[i])
14996 write_back = false;
14997
14998 /* Ensure the base is dead if it is updated. */
14999 if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
15000 return false;
15001 }
15002
15003 if (ldm_case == 5)
15004 {
15005 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
15006 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
15007 offset = 0;
15008 base_reg_rtx = newbase;
15009 }
15010
15011 for (i = 0; i < nops; i++)
15012 {
15013 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15014 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15015 SImode, addr, 0);
15016 }
15017 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
15018 write_back ? offset + i * 4 : 0));
15019 return true;
15020 }
15021
15022 /* Called from a peephole2 expander to turn a sequence of stores into an
15023 STM instruction. OPERANDS are the operands found by the peephole matcher;
15024 NOPS indicates how many separate stores we are trying to combine.
15025 Returns true iff we could generate a new instruction. */
15026
15027 bool
15028 gen_stm_seq (rtx *operands, int nops)
15029 {
15030 int i;
15031 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15032 rtx mems[MAX_LDM_STM_OPS];
15033 int base_reg;
15034 rtx base_reg_rtx;
15035 HOST_WIDE_INT offset;
15036 int write_back = FALSE;
15037 int stm_case;
15038 rtx addr;
15039 bool base_reg_dies;
15040
15041 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
15042 mem_order, &base_reg, &offset, true);
15043
15044 if (stm_case == 0)
15045 return false;
15046
15047 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15048
15049 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
15050 if (TARGET_THUMB1)
15051 {
15052 gcc_assert (base_reg_dies);
15053 write_back = TRUE;
15054 }
15055
15056 if (stm_case == 5)
15057 {
15058 gcc_assert (base_reg_dies);
15059 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15060 offset = 0;
15061 }
15062
15063 addr = plus_constant (Pmode, base_reg_rtx, offset);
15064
15065 for (i = 0; i < nops; i++)
15066 {
15067 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15068 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15069 SImode, addr, 0);
15070 }
15071 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
15072 write_back ? offset + i * 4 : 0));
15073 return true;
15074 }
15075
15076 /* Called from a peephole2 expander to turn a sequence of stores that are
15077 preceded by constant loads into an STM instruction. OPERANDS are the
15078 operands found by the peephole matcher; NOPS indicates how many
15079 separate stores we are trying to combine; there are 2 * NOPS
15080 instructions in the peephole.
15081 Returns true iff we could generate a new instruction. */
15082
15083 bool
15084 gen_const_stm_seq (rtx *operands, int nops)
15085 {
15086 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
15087 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15088 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
15089 rtx mems[MAX_LDM_STM_OPS];
15090 int base_reg;
15091 rtx base_reg_rtx;
15092 HOST_WIDE_INT offset;
15093 int write_back = FALSE;
15094 int stm_case;
15095 rtx addr;
15096 bool base_reg_dies;
15097 int i, j;
15098 HARD_REG_SET allocated;
15099
15100 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
15101 mem_order, &base_reg, &offset, false);
15102
15103 if (stm_case == 0)
15104 return false;
15105
15106 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
15107
15108 /* If the same register is used more than once, try to find a free
15109 register. */
15110 CLEAR_HARD_REG_SET (allocated);
15111 for (i = 0; i < nops; i++)
15112 {
15113 for (j = i + 1; j < nops; j++)
15114 if (regs[i] == regs[j])
15115 {
15116 rtx t = peep2_find_free_register (0, nops * 2,
15117 TARGET_THUMB1 ? "l" : "r",
15118 SImode, &allocated);
15119 if (t == NULL_RTX)
15120 return false;
15121 reg_rtxs[i] = t;
15122 regs[i] = REGNO (t);
15123 }
15124 }
15125
15126 /* Compute an ordering that maps the register numbers to an ascending
15127 sequence. */
15128 reg_order[0] = 0;
15129 for (i = 0; i < nops; i++)
15130 if (regs[i] < regs[reg_order[0]])
15131 reg_order[0] = i;
15132
15133 for (i = 1; i < nops; i++)
15134 {
15135 int this_order = reg_order[i - 1];
15136 for (j = 0; j < nops; j++)
15137 if (regs[j] > regs[reg_order[i - 1]]
15138 && (this_order == reg_order[i - 1]
15139 || regs[j] < regs[this_order]))
15140 this_order = j;
15141 reg_order[i] = this_order;
15142 }
15143
15144 /* Ensure that registers that must be live after the instruction end
15145 up with the correct value. */
15146 for (i = 0; i < nops; i++)
15147 {
15148 int this_order = reg_order[i];
15149 if ((this_order != mem_order[i]
15150 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
15151 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
15152 return false;
15153 }
15154
15155 /* Load the constants. */
15156 for (i = 0; i < nops; i++)
15157 {
15158 rtx op = operands[2 * nops + mem_order[i]];
15159 sorted_regs[i] = regs[reg_order[i]];
15160 emit_move_insn (reg_rtxs[reg_order[i]], op);
15161 }
15162
15163 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15164
15165 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
15166 if (TARGET_THUMB1)
15167 {
15168 gcc_assert (base_reg_dies);
15169 write_back = TRUE;
15170 }
15171
15172 if (stm_case == 5)
15173 {
15174 gcc_assert (base_reg_dies);
15175 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15176 offset = 0;
15177 }
15178
15179 addr = plus_constant (Pmode, base_reg_rtx, offset);
15180
15181 for (i = 0; i < nops; i++)
15182 {
15183 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15184 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15185 SImode, addr, 0);
15186 }
15187 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
15188 write_back ? offset + i * 4 : 0));
15189 return true;
15190 }
15191
15192 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15193 unaligned copies on processors which support unaligned semantics for those
15194 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
15195 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15196 An interleave factor of 1 (the minimum) will perform no interleaving.
15197 Load/store multiple are used for aligned addresses where possible. */
15198
15199 static void
15200 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
15201 HOST_WIDE_INT length,
15202 unsigned int interleave_factor)
15203 {
15204 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
15205 int *regnos = XALLOCAVEC (int, interleave_factor);
15206 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
15207 HOST_WIDE_INT i, j;
15208 HOST_WIDE_INT remaining = length, words;
15209 rtx halfword_tmp = NULL, byte_tmp = NULL;
15210 rtx dst, src;
15211 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
15212 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
15213 HOST_WIDE_INT srcoffset, dstoffset;
15214 HOST_WIDE_INT src_autoinc, dst_autoinc;
15215 rtx mem, addr;
15216
15217 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
15218
15219 /* Use hard registers if we have aligned source or destination so we can use
15220 load/store multiple with contiguous registers. */
15221 if (dst_aligned || src_aligned)
15222 for (i = 0; i < interleave_factor; i++)
15223 regs[i] = gen_rtx_REG (SImode, i);
15224 else
15225 for (i = 0; i < interleave_factor; i++)
15226 regs[i] = gen_reg_rtx (SImode);
15227
15228 dst = copy_addr_to_reg (XEXP (dstbase, 0));
15229 src = copy_addr_to_reg (XEXP (srcbase, 0));
15230
15231 srcoffset = dstoffset = 0;
15232
15233 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15234 For copying the last bytes we want to subtract this offset again. */
15235 src_autoinc = dst_autoinc = 0;
15236
15237 for (i = 0; i < interleave_factor; i++)
15238 regnos[i] = i;
15239
15240 /* Copy BLOCK_SIZE_BYTES chunks. */
15241
15242 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15243 {
15244 /* Load words. */
15245 if (src_aligned && interleave_factor > 1)
15246 {
15247 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15248 TRUE, srcbase, &srcoffset));
15249 src_autoinc += UNITS_PER_WORD * interleave_factor;
15250 }
15251 else
15252 {
15253 for (j = 0; j < interleave_factor; j++)
15254 {
15255 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15256 - src_autoinc));
15257 mem = adjust_automodify_address (srcbase, SImode, addr,
15258 srcoffset + j * UNITS_PER_WORD);
15259 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15260 }
15261 srcoffset += block_size_bytes;
15262 }
15263
15264 /* Store words. */
15265 if (dst_aligned && interleave_factor > 1)
15266 {
15267 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15268 TRUE, dstbase, &dstoffset));
15269 dst_autoinc += UNITS_PER_WORD * interleave_factor;
15270 }
15271 else
15272 {
15273 for (j = 0; j < interleave_factor; j++)
15274 {
15275 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15276 - dst_autoinc));
15277 mem = adjust_automodify_address (dstbase, SImode, addr,
15278 dstoffset + j * UNITS_PER_WORD);
15279 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15280 }
15281 dstoffset += block_size_bytes;
15282 }
15283
15284 remaining -= block_size_bytes;
15285 }
15286
15287 /* Copy any whole words left (note these aren't interleaved with any
15288 subsequent halfword/byte load/stores in the interests of simplicity). */
15289
15290 words = remaining / UNITS_PER_WORD;
15291
15292 gcc_assert (words < interleave_factor);
15293
15294 if (src_aligned && words > 1)
15295 {
15296 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15297 &srcoffset));
15298 src_autoinc += UNITS_PER_WORD * words;
15299 }
15300 else
15301 {
15302 for (j = 0; j < words; j++)
15303 {
15304 addr = plus_constant (Pmode, src,
15305 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15306 mem = adjust_automodify_address (srcbase, SImode, addr,
15307 srcoffset + j * UNITS_PER_WORD);
15308 if (src_aligned)
15309 emit_move_insn (regs[j], mem);
15310 else
15311 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15312 }
15313 srcoffset += words * UNITS_PER_WORD;
15314 }
15315
15316 if (dst_aligned && words > 1)
15317 {
15318 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15319 &dstoffset));
15320 dst_autoinc += words * UNITS_PER_WORD;
15321 }
15322 else
15323 {
15324 for (j = 0; j < words; j++)
15325 {
15326 addr = plus_constant (Pmode, dst,
15327 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15328 mem = adjust_automodify_address (dstbase, SImode, addr,
15329 dstoffset + j * UNITS_PER_WORD);
15330 if (dst_aligned)
15331 emit_move_insn (mem, regs[j]);
15332 else
15333 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15334 }
15335 dstoffset += words * UNITS_PER_WORD;
15336 }
15337
15338 remaining -= words * UNITS_PER_WORD;
15339
15340 gcc_assert (remaining < 4);
15341
15342 /* Copy a halfword if necessary. */
15343
15344 if (remaining >= 2)
15345 {
15346 halfword_tmp = gen_reg_rtx (SImode);
15347
15348 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15349 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15350 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15351
15352 /* Either write out immediately, or delay until we've loaded the last
15353 byte, depending on interleave factor. */
15354 if (interleave_factor == 1)
15355 {
15356 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15357 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15358 emit_insn (gen_unaligned_storehi (mem,
15359 gen_lowpart (HImode, halfword_tmp)));
15360 halfword_tmp = NULL;
15361 dstoffset += 2;
15362 }
15363
15364 remaining -= 2;
15365 srcoffset += 2;
15366 }
15367
15368 gcc_assert (remaining < 2);
15369
15370 /* Copy last byte. */
15371
15372 if ((remaining & 1) != 0)
15373 {
15374 byte_tmp = gen_reg_rtx (SImode);
15375
15376 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15377 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15378 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15379
15380 if (interleave_factor == 1)
15381 {
15382 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15383 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15384 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15385 byte_tmp = NULL;
15386 dstoffset++;
15387 }
15388
15389 remaining--;
15390 srcoffset++;
15391 }
15392
15393 /* Store last halfword if we haven't done so already. */
15394
15395 if (halfword_tmp)
15396 {
15397 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15398 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15399 emit_insn (gen_unaligned_storehi (mem,
15400 gen_lowpart (HImode, halfword_tmp)));
15401 dstoffset += 2;
15402 }
15403
15404 /* Likewise for last byte. */
15405
15406 if (byte_tmp)
15407 {
15408 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15409 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15410 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15411 dstoffset++;
15412 }
15413
15414 gcc_assert (remaining == 0 && srcoffset == dstoffset);
15415 }
15416
15417 /* From mips_adjust_block_mem:
15418
15419 Helper function for doing a loop-based block operation on memory
15420 reference MEM. Each iteration of the loop will operate on LENGTH
15421 bytes of MEM.
15422
15423 Create a new base register for use within the loop and point it to
15424 the start of MEM. Create a new memory reference that uses this
15425 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15426
15427 static void
15428 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15429 rtx *loop_mem)
15430 {
15431 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15432
15433 /* Although the new mem does not refer to a known location,
15434 it does keep up to LENGTH bytes of alignment. */
15435 *loop_mem = change_address (mem, BLKmode, *loop_reg);
15436 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15437 }
15438
15439 /* From mips_block_move_loop:
15440
15441 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15442 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15443 the memory regions do not overlap. */
15444
15445 static void
15446 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15447 unsigned int interleave_factor,
15448 HOST_WIDE_INT bytes_per_iter)
15449 {
15450 rtx src_reg, dest_reg, final_src, test;
15451 HOST_WIDE_INT leftover;
15452
15453 leftover = length % bytes_per_iter;
15454 length -= leftover;
15455
15456 /* Create registers and memory references for use within the loop. */
15457 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15458 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15459
15460 /* Calculate the value that SRC_REG should have after the last iteration of
15461 the loop. */
15462 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15463 0, 0, OPTAB_WIDEN);
15464
15465 /* Emit the start of the loop. */
15466 rtx_code_label *label = gen_label_rtx ();
15467 emit_label (label);
15468
15469 /* Emit the loop body. */
15470 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15471 interleave_factor);
15472
15473 /* Move on to the next block. */
15474 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15475 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15476
15477 /* Emit the loop condition. */
15478 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15479 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15480
15481 /* Mop up any left-over bytes. */
15482 if (leftover)
15483 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15484 }
15485
15486 /* Emit a block move when either the source or destination is unaligned (not
15487 aligned to a four-byte boundary). This may need further tuning depending on
15488 core type, optimize_size setting, etc. */
15489
15490 static int
15491 arm_cpymemqi_unaligned (rtx *operands)
15492 {
15493 HOST_WIDE_INT length = INTVAL (operands[2]);
15494
15495 if (optimize_size)
15496 {
15497 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15498 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15499 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15500 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15501 or dst_aligned though: allow more interleaving in those cases since the
15502 resulting code can be smaller. */
15503 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15504 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15505
15506 if (length > 12)
15507 arm_block_move_unaligned_loop (operands[0], operands[1], length,
15508 interleave_factor, bytes_per_iter);
15509 else
15510 arm_block_move_unaligned_straight (operands[0], operands[1], length,
15511 interleave_factor);
15512 }
15513 else
15514 {
15515 /* Note that the loop created by arm_block_move_unaligned_loop may be
15516 subject to loop unrolling, which makes tuning this condition a little
15517 redundant. */
15518 if (length > 32)
15519 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15520 else
15521 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15522 }
15523
15524 return 1;
15525 }
15526
15527 int
15528 arm_gen_cpymemqi (rtx *operands)
15529 {
15530 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15531 HOST_WIDE_INT srcoffset, dstoffset;
15532 rtx src, dst, srcbase, dstbase;
15533 rtx part_bytes_reg = NULL;
15534 rtx mem;
15535
15536 if (!CONST_INT_P (operands[2])
15537 || !CONST_INT_P (operands[3])
15538 || INTVAL (operands[2]) > 64)
15539 return 0;
15540
15541 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15542 return arm_cpymemqi_unaligned (operands);
15543
15544 if (INTVAL (operands[3]) & 3)
15545 return 0;
15546
15547 dstbase = operands[0];
15548 srcbase = operands[1];
15549
15550 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15551 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15552
15553 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15554 out_words_to_go = INTVAL (operands[2]) / 4;
15555 last_bytes = INTVAL (operands[2]) & 3;
15556 dstoffset = srcoffset = 0;
15557
15558 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15559 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15560
15561 while (in_words_to_go >= 2)
15562 {
15563 if (in_words_to_go > 4)
15564 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15565 TRUE, srcbase, &srcoffset));
15566 else
15567 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15568 src, FALSE, srcbase,
15569 &srcoffset));
15570
15571 if (out_words_to_go)
15572 {
15573 if (out_words_to_go > 4)
15574 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15575 TRUE, dstbase, &dstoffset));
15576 else if (out_words_to_go != 1)
15577 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15578 out_words_to_go, dst,
15579 (last_bytes == 0
15580 ? FALSE : TRUE),
15581 dstbase, &dstoffset));
15582 else
15583 {
15584 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15585 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15586 if (last_bytes != 0)
15587 {
15588 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15589 dstoffset += 4;
15590 }
15591 }
15592 }
15593
15594 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15595 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15596 }
15597
15598 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15599 if (out_words_to_go)
15600 {
15601 rtx sreg;
15602
15603 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15604 sreg = copy_to_reg (mem);
15605
15606 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15607 emit_move_insn (mem, sreg);
15608 in_words_to_go--;
15609
15610 gcc_assert (!in_words_to_go); /* Sanity check */
15611 }
15612
15613 if (in_words_to_go)
15614 {
15615 gcc_assert (in_words_to_go > 0);
15616
15617 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15618 part_bytes_reg = copy_to_mode_reg (SImode, mem);
15619 }
15620
15621 gcc_assert (!last_bytes || part_bytes_reg);
15622
15623 if (BYTES_BIG_ENDIAN && last_bytes)
15624 {
15625 rtx tmp = gen_reg_rtx (SImode);
15626
15627 /* The bytes we want are in the top end of the word. */
15628 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15629 GEN_INT (8 * (4 - last_bytes))));
15630 part_bytes_reg = tmp;
15631
15632 while (last_bytes)
15633 {
15634 mem = adjust_automodify_address (dstbase, QImode,
15635 plus_constant (Pmode, dst,
15636 last_bytes - 1),
15637 dstoffset + last_bytes - 1);
15638 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15639
15640 if (--last_bytes)
15641 {
15642 tmp = gen_reg_rtx (SImode);
15643 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15644 part_bytes_reg = tmp;
15645 }
15646 }
15647
15648 }
15649 else
15650 {
15651 if (last_bytes > 1)
15652 {
15653 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15654 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15655 last_bytes -= 2;
15656 if (last_bytes)
15657 {
15658 rtx tmp = gen_reg_rtx (SImode);
15659 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15660 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15661 part_bytes_reg = tmp;
15662 dstoffset += 2;
15663 }
15664 }
15665
15666 if (last_bytes)
15667 {
15668 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15669 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15670 }
15671 }
15672
15673 return 1;
15674 }
15675
15676 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15677 by mode size. */
15678 inline static rtx
15679 next_consecutive_mem (rtx mem)
15680 {
15681 machine_mode mode = GET_MODE (mem);
15682 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15683 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15684
15685 return adjust_automodify_address (mem, mode, addr, offset);
15686 }
15687
15688 /* Copy using LDRD/STRD instructions whenever possible.
15689 Returns true upon success. */
15690 bool
15691 gen_cpymem_ldrd_strd (rtx *operands)
15692 {
15693 unsigned HOST_WIDE_INT len;
15694 HOST_WIDE_INT align;
15695 rtx src, dst, base;
15696 rtx reg0;
15697 bool src_aligned, dst_aligned;
15698 bool src_volatile, dst_volatile;
15699
15700 gcc_assert (CONST_INT_P (operands[2]));
15701 gcc_assert (CONST_INT_P (operands[3]));
15702
15703 len = UINTVAL (operands[2]);
15704 if (len > 64)
15705 return false;
15706
15707 /* Maximum alignment we can assume for both src and dst buffers. */
15708 align = INTVAL (operands[3]);
15709
15710 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15711 return false;
15712
15713 /* Place src and dst addresses in registers
15714 and update the corresponding mem rtx. */
15715 dst = operands[0];
15716 dst_volatile = MEM_VOLATILE_P (dst);
15717 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15718 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15719 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15720
15721 src = operands[1];
15722 src_volatile = MEM_VOLATILE_P (src);
15723 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15724 base = copy_to_mode_reg (SImode, XEXP (src, 0));
15725 src = adjust_automodify_address (src, VOIDmode, base, 0);
15726
15727 if (!unaligned_access && !(src_aligned && dst_aligned))
15728 return false;
15729
15730 if (src_volatile || dst_volatile)
15731 return false;
15732
15733 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15734 if (!(dst_aligned || src_aligned))
15735 return arm_gen_cpymemqi (operands);
15736
15737 /* If the either src or dst is unaligned we'll be accessing it as pairs
15738 of unaligned SImode accesses. Otherwise we can generate DImode
15739 ldrd/strd instructions. */
15740 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15741 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15742
15743 while (len >= 8)
15744 {
15745 len -= 8;
15746 reg0 = gen_reg_rtx (DImode);
15747 rtx first_reg = NULL_RTX;
15748 rtx second_reg = NULL_RTX;
15749
15750 if (!src_aligned || !dst_aligned)
15751 {
15752 if (BYTES_BIG_ENDIAN)
15753 {
15754 second_reg = gen_lowpart (SImode, reg0);
15755 first_reg = gen_highpart_mode (SImode, DImode, reg0);
15756 }
15757 else
15758 {
15759 first_reg = gen_lowpart (SImode, reg0);
15760 second_reg = gen_highpart_mode (SImode, DImode, reg0);
15761 }
15762 }
15763 if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15764 emit_move_insn (reg0, src);
15765 else if (src_aligned)
15766 emit_insn (gen_unaligned_loaddi (reg0, src));
15767 else
15768 {
15769 emit_insn (gen_unaligned_loadsi (first_reg, src));
15770 src = next_consecutive_mem (src);
15771 emit_insn (gen_unaligned_loadsi (second_reg, src));
15772 }
15773
15774 if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15775 emit_move_insn (dst, reg0);
15776 else if (dst_aligned)
15777 emit_insn (gen_unaligned_storedi (dst, reg0));
15778 else
15779 {
15780 emit_insn (gen_unaligned_storesi (dst, first_reg));
15781 dst = next_consecutive_mem (dst);
15782 emit_insn (gen_unaligned_storesi (dst, second_reg));
15783 }
15784
15785 src = next_consecutive_mem (src);
15786 dst = next_consecutive_mem (dst);
15787 }
15788
15789 gcc_assert (len < 8);
15790 if (len >= 4)
15791 {
15792 /* More than a word but less than a double-word to copy. Copy a word. */
15793 reg0 = gen_reg_rtx (SImode);
15794 src = adjust_address (src, SImode, 0);
15795 dst = adjust_address (dst, SImode, 0);
15796 if (src_aligned)
15797 emit_move_insn (reg0, src);
15798 else
15799 emit_insn (gen_unaligned_loadsi (reg0, src));
15800
15801 if (dst_aligned)
15802 emit_move_insn (dst, reg0);
15803 else
15804 emit_insn (gen_unaligned_storesi (dst, reg0));
15805
15806 src = next_consecutive_mem (src);
15807 dst = next_consecutive_mem (dst);
15808 len -= 4;
15809 }
15810
15811 if (len == 0)
15812 return true;
15813
15814 /* Copy the remaining bytes. */
15815 if (len >= 2)
15816 {
15817 dst = adjust_address (dst, HImode, 0);
15818 src = adjust_address (src, HImode, 0);
15819 reg0 = gen_reg_rtx (SImode);
15820 if (src_aligned)
15821 emit_insn (gen_zero_extendhisi2 (reg0, src));
15822 else
15823 emit_insn (gen_unaligned_loadhiu (reg0, src));
15824
15825 if (dst_aligned)
15826 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15827 else
15828 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15829
15830 src = next_consecutive_mem (src);
15831 dst = next_consecutive_mem (dst);
15832 if (len == 2)
15833 return true;
15834 }
15835
15836 dst = adjust_address (dst, QImode, 0);
15837 src = adjust_address (src, QImode, 0);
15838 reg0 = gen_reg_rtx (QImode);
15839 emit_move_insn (reg0, src);
15840 emit_move_insn (dst, reg0);
15841 return true;
15842 }
15843
15844 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15845 into its component 32-bit subregs. OP2 may be an immediate
15846 constant and we want to simplify it in that case. */
15847 void
15848 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15849 rtx *lo_op2, rtx *hi_op2)
15850 {
15851 *lo_op1 = gen_lowpart (SImode, op1);
15852 *hi_op1 = gen_highpart (SImode, op1);
15853 *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15854 subreg_lowpart_offset (SImode, DImode));
15855 *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15856 subreg_highpart_offset (SImode, DImode));
15857 }
15858
15859 /* Select a dominance comparison mode if possible for a test of the general
15860 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15861 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15862 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15863 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15864 In all cases OP will be either EQ or NE, but we don't need to know which
15865 here. If we are unable to support a dominance comparison we return
15866 CC mode. This will then fail to match for the RTL expressions that
15867 generate this call. */
15868 machine_mode
15869 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15870 {
15871 enum rtx_code cond1, cond2;
15872 int swapped = 0;
15873
15874 /* Currently we will probably get the wrong result if the individual
15875 comparisons are not simple. This also ensures that it is safe to
15876 reverse a comparison if necessary. */
15877 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15878 != CCmode)
15879 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15880 != CCmode))
15881 return CCmode;
15882
15883 /* The if_then_else variant of this tests the second condition if the
15884 first passes, but is true if the first fails. Reverse the first
15885 condition to get a true "inclusive-or" expression. */
15886 if (cond_or == DOM_CC_NX_OR_Y)
15887 cond1 = reverse_condition (cond1);
15888
15889 /* If the comparisons are not equal, and one doesn't dominate the other,
15890 then we can't do this. */
15891 if (cond1 != cond2
15892 && !comparison_dominates_p (cond1, cond2)
15893 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15894 return CCmode;
15895
15896 if (swapped)
15897 std::swap (cond1, cond2);
15898
15899 switch (cond1)
15900 {
15901 case EQ:
15902 if (cond_or == DOM_CC_X_AND_Y)
15903 return CC_DEQmode;
15904
15905 switch (cond2)
15906 {
15907 case EQ: return CC_DEQmode;
15908 case LE: return CC_DLEmode;
15909 case LEU: return CC_DLEUmode;
15910 case GE: return CC_DGEmode;
15911 case GEU: return CC_DGEUmode;
15912 default: gcc_unreachable ();
15913 }
15914
15915 case LT:
15916 if (cond_or == DOM_CC_X_AND_Y)
15917 return CC_DLTmode;
15918
15919 switch (cond2)
15920 {
15921 case LT:
15922 return CC_DLTmode;
15923 case LE:
15924 return CC_DLEmode;
15925 case NE:
15926 return CC_DNEmode;
15927 default:
15928 gcc_unreachable ();
15929 }
15930
15931 case GT:
15932 if (cond_or == DOM_CC_X_AND_Y)
15933 return CC_DGTmode;
15934
15935 switch (cond2)
15936 {
15937 case GT:
15938 return CC_DGTmode;
15939 case GE:
15940 return CC_DGEmode;
15941 case NE:
15942 return CC_DNEmode;
15943 default:
15944 gcc_unreachable ();
15945 }
15946
15947 case LTU:
15948 if (cond_or == DOM_CC_X_AND_Y)
15949 return CC_DLTUmode;
15950
15951 switch (cond2)
15952 {
15953 case LTU:
15954 return CC_DLTUmode;
15955 case LEU:
15956 return CC_DLEUmode;
15957 case NE:
15958 return CC_DNEmode;
15959 default:
15960 gcc_unreachable ();
15961 }
15962
15963 case GTU:
15964 if (cond_or == DOM_CC_X_AND_Y)
15965 return CC_DGTUmode;
15966
15967 switch (cond2)
15968 {
15969 case GTU:
15970 return CC_DGTUmode;
15971 case GEU:
15972 return CC_DGEUmode;
15973 case NE:
15974 return CC_DNEmode;
15975 default:
15976 gcc_unreachable ();
15977 }
15978
15979 /* The remaining cases only occur when both comparisons are the
15980 same. */
15981 case NE:
15982 gcc_assert (cond1 == cond2);
15983 return CC_DNEmode;
15984
15985 case LE:
15986 gcc_assert (cond1 == cond2);
15987 return CC_DLEmode;
15988
15989 case GE:
15990 gcc_assert (cond1 == cond2);
15991 return CC_DGEmode;
15992
15993 case LEU:
15994 gcc_assert (cond1 == cond2);
15995 return CC_DLEUmode;
15996
15997 case GEU:
15998 gcc_assert (cond1 == cond2);
15999 return CC_DGEUmode;
16000
16001 default:
16002 gcc_unreachable ();
16003 }
16004 }
16005
16006 machine_mode
16007 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
16008 {
16009 /* All floating point compares return CCFP if it is an equality
16010 comparison, and CCFPE otherwise. */
16011 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16012 {
16013 switch (op)
16014 {
16015 case EQ:
16016 case NE:
16017 case UNORDERED:
16018 case ORDERED:
16019 case UNLT:
16020 case UNLE:
16021 case UNGT:
16022 case UNGE:
16023 case UNEQ:
16024 case LTGT:
16025 return CCFPmode;
16026
16027 case LT:
16028 case LE:
16029 case GT:
16030 case GE:
16031 return CCFPEmode;
16032
16033 default:
16034 gcc_unreachable ();
16035 }
16036 }
16037
16038 /* A compare with a shifted operand. Because of canonicalization, the
16039 comparison will have to be swapped when we emit the assembler. */
16040 if (GET_MODE (y) == SImode
16041 && (REG_P (y) || (SUBREG_P (y)))
16042 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16043 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
16044 || GET_CODE (x) == ROTATERT))
16045 return CC_SWPmode;
16046
16047 /* A widened compare of the sum of a value plus a carry against a
16048 constant. This is a representation of RSC. We want to swap the
16049 result of the comparison at output. Not valid if the Z bit is
16050 needed. */
16051 if (GET_MODE (x) == DImode
16052 && GET_CODE (x) == PLUS
16053 && arm_borrow_operation (XEXP (x, 1), DImode)
16054 && CONST_INT_P (y)
16055 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16056 && (op == LE || op == GT))
16057 || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
16058 && (op == LEU || op == GTU))))
16059 return CC_SWPmode;
16060
16061 /* If X is a constant we want to use CC_RSBmode. This is
16062 non-canonical, but arm_gen_compare_reg uses this to generate the
16063 correct canonical form. */
16064 if (GET_MODE (y) == SImode
16065 && (REG_P (y) || SUBREG_P (y))
16066 && CONST_INT_P (x))
16067 return CC_RSBmode;
16068
16069 /* This operation is performed swapped, but since we only rely on the Z
16070 flag we don't need an additional mode. */
16071 if (GET_MODE (y) == SImode
16072 && (REG_P (y) || (SUBREG_P (y)))
16073 && GET_CODE (x) == NEG
16074 && (op == EQ || op == NE))
16075 return CC_Zmode;
16076
16077 /* This is a special case that is used by combine to allow a
16078 comparison of a shifted byte load to be split into a zero-extend
16079 followed by a comparison of the shifted integer (only valid for
16080 equalities and unsigned inequalities). */
16081 if (GET_MODE (x) == SImode
16082 && GET_CODE (x) == ASHIFT
16083 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
16084 && GET_CODE (XEXP (x, 0)) == SUBREG
16085 && MEM_P (SUBREG_REG (XEXP (x, 0)))
16086 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
16087 && (op == EQ || op == NE
16088 || op == GEU || op == GTU || op == LTU || op == LEU)
16089 && CONST_INT_P (y))
16090 return CC_Zmode;
16091
16092 /* A construct for a conditional compare, if the false arm contains
16093 0, then both conditions must be true, otherwise either condition
16094 must be true. Not all conditions are possible, so CCmode is
16095 returned if it can't be done. */
16096 if (GET_CODE (x) == IF_THEN_ELSE
16097 && (XEXP (x, 2) == const0_rtx
16098 || XEXP (x, 2) == const1_rtx)
16099 && COMPARISON_P (XEXP (x, 0))
16100 && COMPARISON_P (XEXP (x, 1)))
16101 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16102 INTVAL (XEXP (x, 2)));
16103
16104 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
16105 if (GET_CODE (x) == AND
16106 && (op == EQ || op == NE)
16107 && COMPARISON_P (XEXP (x, 0))
16108 && COMPARISON_P (XEXP (x, 1)))
16109 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16110 DOM_CC_X_AND_Y);
16111
16112 if (GET_CODE (x) == IOR
16113 && (op == EQ || op == NE)
16114 && COMPARISON_P (XEXP (x, 0))
16115 && COMPARISON_P (XEXP (x, 1)))
16116 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16117 DOM_CC_X_OR_Y);
16118
16119 /* An operation (on Thumb) where we want to test for a single bit.
16120 This is done by shifting that bit up into the top bit of a
16121 scratch register; we can then branch on the sign bit. */
16122 if (TARGET_THUMB1
16123 && GET_MODE (x) == SImode
16124 && (op == EQ || op == NE)
16125 && GET_CODE (x) == ZERO_EXTRACT
16126 && XEXP (x, 1) == const1_rtx)
16127 return CC_Nmode;
16128
16129 /* An operation that sets the condition codes as a side-effect, the
16130 V flag is not set correctly, so we can only use comparisons where
16131 this doesn't matter. (For LT and GE we can use "mi" and "pl"
16132 instead.) */
16133 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
16134 if (GET_MODE (x) == SImode
16135 && y == const0_rtx
16136 && (op == EQ || op == NE || op == LT || op == GE)
16137 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
16138 || GET_CODE (x) == AND || GET_CODE (x) == IOR
16139 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
16140 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
16141 || GET_CODE (x) == LSHIFTRT
16142 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16143 || GET_CODE (x) == ROTATERT
16144 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
16145 return CC_NZmode;
16146
16147 /* A comparison of ~reg with a const is really a special
16148 canoncialization of compare (~const, reg), which is a reverse
16149 subtract operation. We may not get here if CONST is 0, but that
16150 doesn't matter because ~0 isn't a valid immediate for RSB. */
16151 if (GET_MODE (x) == SImode
16152 && GET_CODE (x) == NOT
16153 && CONST_INT_P (y))
16154 return CC_RSBmode;
16155
16156 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
16157 return CC_Zmode;
16158
16159 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
16160 && GET_CODE (x) == PLUS
16161 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
16162 return CC_Cmode;
16163
16164 if (GET_MODE (x) == DImode
16165 && GET_CODE (x) == PLUS
16166 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
16167 && CONST_INT_P (y)
16168 && UINTVAL (y) == 0x800000000
16169 && (op == GEU || op == LTU))
16170 return CC_ADCmode;
16171
16172 if (GET_MODE (x) == DImode
16173 && (op == GE || op == LT)
16174 && GET_CODE (x) == SIGN_EXTEND
16175 && ((GET_CODE (y) == PLUS
16176 && arm_borrow_operation (XEXP (y, 0), DImode))
16177 || arm_borrow_operation (y, DImode)))
16178 return CC_NVmode;
16179
16180 if (GET_MODE (x) == DImode
16181 && (op == GEU || op == LTU)
16182 && GET_CODE (x) == ZERO_EXTEND
16183 && ((GET_CODE (y) == PLUS
16184 && arm_borrow_operation (XEXP (y, 0), DImode))
16185 || arm_borrow_operation (y, DImode)))
16186 return CC_Bmode;
16187
16188 if (GET_MODE (x) == DImode
16189 && (op == EQ || op == NE)
16190 && (GET_CODE (x) == PLUS
16191 || GET_CODE (x) == MINUS)
16192 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16193 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
16194 && GET_CODE (y) == SIGN_EXTEND
16195 && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
16196 return CC_Vmode;
16197
16198 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
16199 return GET_MODE (x);
16200
16201 return CCmode;
16202 }
16203
16204 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
16205 the sequence of instructions needed to generate a suitable condition
16206 code register. Return the CC register result. */
16207 static rtx
16208 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16209 {
16210 machine_mode mode;
16211 rtx cc_reg;
16212
16213 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
16214 gcc_assert (TARGET_32BIT);
16215 gcc_assert (!CONST_INT_P (x));
16216
16217 rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
16218 subreg_lowpart_offset (SImode, DImode));
16219 rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
16220 subreg_highpart_offset (SImode, DImode));
16221 rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
16222 subreg_lowpart_offset (SImode, DImode));
16223 rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
16224 subreg_highpart_offset (SImode, DImode));
16225 switch (code)
16226 {
16227 case EQ:
16228 case NE:
16229 {
16230 if (y_lo == const0_rtx || y_hi == const0_rtx)
16231 {
16232 if (y_lo != const0_rtx)
16233 {
16234 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16235
16236 gcc_assert (y_hi == const0_rtx);
16237 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16238 if (!arm_add_operand (y_lo, SImode))
16239 y_lo = force_reg (SImode, y_lo);
16240 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16241 x_lo = scratch2;
16242 }
16243 else if (y_hi != const0_rtx)
16244 {
16245 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16246
16247 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16248 if (!arm_add_operand (y_hi, SImode))
16249 y_hi = force_reg (SImode, y_hi);
16250 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16251 x_hi = scratch2;
16252 }
16253
16254 if (!scratch)
16255 {
16256 gcc_assert (!reload_completed);
16257 scratch = gen_rtx_SCRATCH (SImode);
16258 }
16259
16260 rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16261 cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16262
16263 rtx set
16264 = gen_rtx_SET (cc_reg,
16265 gen_rtx_COMPARE (CC_NZmode,
16266 gen_rtx_IOR (SImode, x_lo, x_hi),
16267 const0_rtx));
16268 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16269 clobber)));
16270 return cc_reg;
16271 }
16272
16273 if (!arm_add_operand (y_lo, SImode))
16274 y_lo = force_reg (SImode, y_lo);
16275
16276 if (!arm_add_operand (y_hi, SImode))
16277 y_hi = force_reg (SImode, y_hi);
16278
16279 rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16280 rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16281 rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16282 mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16283 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16284
16285 emit_insn (gen_rtx_SET (cc_reg,
16286 gen_rtx_COMPARE (mode, conjunction,
16287 const0_rtx)));
16288 return cc_reg;
16289 }
16290
16291 case LT:
16292 case GE:
16293 {
16294 if (y_lo == const0_rtx)
16295 {
16296 /* If the low word of y is 0, then this is simply a normal
16297 compare of the upper words. */
16298 if (!arm_add_operand (y_hi, SImode))
16299 y_hi = force_reg (SImode, y_hi);
16300
16301 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16302 }
16303
16304 if (!arm_add_operand (y_lo, SImode))
16305 y_lo = force_reg (SImode, y_lo);
16306
16307 rtx cmp1
16308 = gen_rtx_LTU (DImode,
16309 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16310 const0_rtx);
16311
16312 if (!scratch)
16313 scratch = gen_rtx_SCRATCH (SImode);
16314
16315 if (!arm_not_operand (y_hi, SImode))
16316 y_hi = force_reg (SImode, y_hi);
16317
16318 rtx_insn *insn;
16319 if (y_hi == const0_rtx)
16320 insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16321 cmp1));
16322 else if (CONST_INT_P (y_hi))
16323 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16324 y_hi, cmp1));
16325 else
16326 insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16327 cmp1));
16328 return SET_DEST (single_set (insn));
16329 }
16330
16331 case LE:
16332 case GT:
16333 {
16334 /* During expansion, we only expect to get here if y is a
16335 constant that we want to handle, otherwise we should have
16336 swapped the operands already. */
16337 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16338
16339 if (!const_ok_for_arm (INTVAL (y_lo)))
16340 y_lo = force_reg (SImode, y_lo);
16341
16342 /* Perform a reverse subtract and compare. */
16343 rtx cmp1
16344 = gen_rtx_LTU (DImode,
16345 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16346 const0_rtx);
16347 rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16348 x_hi, cmp1));
16349 return SET_DEST (single_set (insn));
16350 }
16351
16352 case LTU:
16353 case GEU:
16354 {
16355 if (y_lo == const0_rtx)
16356 {
16357 /* If the low word of y is 0, then this is simply a normal
16358 compare of the upper words. */
16359 if (!arm_add_operand (y_hi, SImode))
16360 y_hi = force_reg (SImode, y_hi);
16361
16362 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16363 }
16364
16365 if (!arm_add_operand (y_lo, SImode))
16366 y_lo = force_reg (SImode, y_lo);
16367
16368 rtx cmp1
16369 = gen_rtx_LTU (DImode,
16370 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16371 const0_rtx);
16372
16373 if (!scratch)
16374 scratch = gen_rtx_SCRATCH (SImode);
16375 if (!arm_not_operand (y_hi, SImode))
16376 y_hi = force_reg (SImode, y_hi);
16377
16378 rtx_insn *insn;
16379 if (y_hi == const0_rtx)
16380 insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16381 cmp1));
16382 else if (CONST_INT_P (y_hi))
16383 {
16384 /* Constant is viewed as unsigned when zero-extended. */
16385 y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16386 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16387 y_hi, cmp1));
16388 }
16389 else
16390 insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16391 cmp1));
16392 return SET_DEST (single_set (insn));
16393 }
16394
16395 case LEU:
16396 case GTU:
16397 {
16398 /* During expansion, we only expect to get here if y is a
16399 constant that we want to handle, otherwise we should have
16400 swapped the operands already. */
16401 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16402
16403 if (!const_ok_for_arm (INTVAL (y_lo)))
16404 y_lo = force_reg (SImode, y_lo);
16405
16406 /* Perform a reverse subtract and compare. */
16407 rtx cmp1
16408 = gen_rtx_LTU (DImode,
16409 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16410 const0_rtx);
16411 y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16412 rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16413 x_hi, cmp1));
16414 return SET_DEST (single_set (insn));
16415 }
16416
16417 default:
16418 gcc_unreachable ();
16419 }
16420 }
16421
16422 /* X and Y are two things to compare using CODE. Emit the compare insn and
16423 return the rtx for register 0 in the proper mode. */
16424 rtx
16425 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16426 {
16427 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16428 return arm_gen_dicompare_reg (code, x, y, scratch);
16429
16430 machine_mode mode = SELECT_CC_MODE (code, x, y);
16431 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16432 if (mode == CC_RSBmode)
16433 {
16434 if (!scratch)
16435 scratch = gen_rtx_SCRATCH (SImode);
16436 emit_insn (gen_rsb_imm_compare_scratch (scratch,
16437 GEN_INT (~UINTVAL (x)), y));
16438 }
16439 else
16440 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16441
16442 return cc_reg;
16443 }
16444
16445 /* Generate a sequence of insns that will generate the correct return
16446 address mask depending on the physical architecture that the program
16447 is running on. */
16448 rtx
16449 arm_gen_return_addr_mask (void)
16450 {
16451 rtx reg = gen_reg_rtx (Pmode);
16452
16453 emit_insn (gen_return_addr_mask (reg));
16454 return reg;
16455 }
16456
16457 void
16458 arm_reload_in_hi (rtx *operands)
16459 {
16460 rtx ref = operands[1];
16461 rtx base, scratch;
16462 HOST_WIDE_INT offset = 0;
16463
16464 if (SUBREG_P (ref))
16465 {
16466 offset = SUBREG_BYTE (ref);
16467 ref = SUBREG_REG (ref);
16468 }
16469
16470 if (REG_P (ref))
16471 {
16472 /* We have a pseudo which has been spilt onto the stack; there
16473 are two cases here: the first where there is a simple
16474 stack-slot replacement and a second where the stack-slot is
16475 out of range, or is used as a subreg. */
16476 if (reg_equiv_mem (REGNO (ref)))
16477 {
16478 ref = reg_equiv_mem (REGNO (ref));
16479 base = find_replacement (&XEXP (ref, 0));
16480 }
16481 else
16482 /* The slot is out of range, or was dressed up in a SUBREG. */
16483 base = reg_equiv_address (REGNO (ref));
16484
16485 /* PR 62554: If there is no equivalent memory location then just move
16486 the value as an SImode register move. This happens when the target
16487 architecture variant does not have an HImode register move. */
16488 if (base == NULL)
16489 {
16490 gcc_assert (REG_P (operands[0]));
16491 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16492 gen_rtx_SUBREG (SImode, ref, 0)));
16493 return;
16494 }
16495 }
16496 else
16497 base = find_replacement (&XEXP (ref, 0));
16498
16499 /* Handle the case where the address is too complex to be offset by 1. */
16500 if (GET_CODE (base) == MINUS
16501 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16502 {
16503 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16504
16505 emit_set_insn (base_plus, base);
16506 base = base_plus;
16507 }
16508 else if (GET_CODE (base) == PLUS)
16509 {
16510 /* The addend must be CONST_INT, or we would have dealt with it above. */
16511 HOST_WIDE_INT hi, lo;
16512
16513 offset += INTVAL (XEXP (base, 1));
16514 base = XEXP (base, 0);
16515
16516 /* Rework the address into a legal sequence of insns. */
16517 /* Valid range for lo is -4095 -> 4095 */
16518 lo = (offset >= 0
16519 ? (offset & 0xfff)
16520 : -((-offset) & 0xfff));
16521
16522 /* Corner case, if lo is the max offset then we would be out of range
16523 once we have added the additional 1 below, so bump the msb into the
16524 pre-loading insn(s). */
16525 if (lo == 4095)
16526 lo &= 0x7ff;
16527
16528 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16529 ^ (HOST_WIDE_INT) 0x80000000)
16530 - (HOST_WIDE_INT) 0x80000000);
16531
16532 gcc_assert (hi + lo == offset);
16533
16534 if (hi != 0)
16535 {
16536 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16537
16538 /* Get the base address; addsi3 knows how to handle constants
16539 that require more than one insn. */
16540 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16541 base = base_plus;
16542 offset = lo;
16543 }
16544 }
16545
16546 /* Operands[2] may overlap operands[0] (though it won't overlap
16547 operands[1]), that's why we asked for a DImode reg -- so we can
16548 use the bit that does not overlap. */
16549 if (REGNO (operands[2]) == REGNO (operands[0]))
16550 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16551 else
16552 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16553
16554 emit_insn (gen_zero_extendqisi2 (scratch,
16555 gen_rtx_MEM (QImode,
16556 plus_constant (Pmode, base,
16557 offset))));
16558 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16559 gen_rtx_MEM (QImode,
16560 plus_constant (Pmode, base,
16561 offset + 1))));
16562 if (!BYTES_BIG_ENDIAN)
16563 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16564 gen_rtx_IOR (SImode,
16565 gen_rtx_ASHIFT
16566 (SImode,
16567 gen_rtx_SUBREG (SImode, operands[0], 0),
16568 GEN_INT (8)),
16569 scratch));
16570 else
16571 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16572 gen_rtx_IOR (SImode,
16573 gen_rtx_ASHIFT (SImode, scratch,
16574 GEN_INT (8)),
16575 gen_rtx_SUBREG (SImode, operands[0], 0)));
16576 }
16577
16578 /* Handle storing a half-word to memory during reload by synthesizing as two
16579 byte stores. Take care not to clobber the input values until after we
16580 have moved them somewhere safe. This code assumes that if the DImode
16581 scratch in operands[2] overlaps either the input value or output address
16582 in some way, then that value must die in this insn (we absolutely need
16583 two scratch registers for some corner cases). */
16584 void
16585 arm_reload_out_hi (rtx *operands)
16586 {
16587 rtx ref = operands[0];
16588 rtx outval = operands[1];
16589 rtx base, scratch;
16590 HOST_WIDE_INT offset = 0;
16591
16592 if (SUBREG_P (ref))
16593 {
16594 offset = SUBREG_BYTE (ref);
16595 ref = SUBREG_REG (ref);
16596 }
16597
16598 if (REG_P (ref))
16599 {
16600 /* We have a pseudo which has been spilt onto the stack; there
16601 are two cases here: the first where there is a simple
16602 stack-slot replacement and a second where the stack-slot is
16603 out of range, or is used as a subreg. */
16604 if (reg_equiv_mem (REGNO (ref)))
16605 {
16606 ref = reg_equiv_mem (REGNO (ref));
16607 base = find_replacement (&XEXP (ref, 0));
16608 }
16609 else
16610 /* The slot is out of range, or was dressed up in a SUBREG. */
16611 base = reg_equiv_address (REGNO (ref));
16612
16613 /* PR 62254: If there is no equivalent memory location then just move
16614 the value as an SImode register move. This happens when the target
16615 architecture variant does not have an HImode register move. */
16616 if (base == NULL)
16617 {
16618 gcc_assert (REG_P (outval) || SUBREG_P (outval));
16619
16620 if (REG_P (outval))
16621 {
16622 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16623 gen_rtx_SUBREG (SImode, outval, 0)));
16624 }
16625 else /* SUBREG_P (outval) */
16626 {
16627 if (GET_MODE (SUBREG_REG (outval)) == SImode)
16628 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16629 SUBREG_REG (outval)));
16630 else
16631 /* FIXME: Handle other cases ? */
16632 gcc_unreachable ();
16633 }
16634 return;
16635 }
16636 }
16637 else
16638 base = find_replacement (&XEXP (ref, 0));
16639
16640 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16641
16642 /* Handle the case where the address is too complex to be offset by 1. */
16643 if (GET_CODE (base) == MINUS
16644 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16645 {
16646 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16647
16648 /* Be careful not to destroy OUTVAL. */
16649 if (reg_overlap_mentioned_p (base_plus, outval))
16650 {
16651 /* Updating base_plus might destroy outval, see if we can
16652 swap the scratch and base_plus. */
16653 if (!reg_overlap_mentioned_p (scratch, outval))
16654 std::swap (scratch, base_plus);
16655 else
16656 {
16657 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16658
16659 /* Be conservative and copy OUTVAL into the scratch now,
16660 this should only be necessary if outval is a subreg
16661 of something larger than a word. */
16662 /* XXX Might this clobber base? I can't see how it can,
16663 since scratch is known to overlap with OUTVAL, and
16664 must be wider than a word. */
16665 emit_insn (gen_movhi (scratch_hi, outval));
16666 outval = scratch_hi;
16667 }
16668 }
16669
16670 emit_set_insn (base_plus, base);
16671 base = base_plus;
16672 }
16673 else if (GET_CODE (base) == PLUS)
16674 {
16675 /* The addend must be CONST_INT, or we would have dealt with it above. */
16676 HOST_WIDE_INT hi, lo;
16677
16678 offset += INTVAL (XEXP (base, 1));
16679 base = XEXP (base, 0);
16680
16681 /* Rework the address into a legal sequence of insns. */
16682 /* Valid range for lo is -4095 -> 4095 */
16683 lo = (offset >= 0
16684 ? (offset & 0xfff)
16685 : -((-offset) & 0xfff));
16686
16687 /* Corner case, if lo is the max offset then we would be out of range
16688 once we have added the additional 1 below, so bump the msb into the
16689 pre-loading insn(s). */
16690 if (lo == 4095)
16691 lo &= 0x7ff;
16692
16693 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16694 ^ (HOST_WIDE_INT) 0x80000000)
16695 - (HOST_WIDE_INT) 0x80000000);
16696
16697 gcc_assert (hi + lo == offset);
16698
16699 if (hi != 0)
16700 {
16701 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16702
16703 /* Be careful not to destroy OUTVAL. */
16704 if (reg_overlap_mentioned_p (base_plus, outval))
16705 {
16706 /* Updating base_plus might destroy outval, see if we
16707 can swap the scratch and base_plus. */
16708 if (!reg_overlap_mentioned_p (scratch, outval))
16709 std::swap (scratch, base_plus);
16710 else
16711 {
16712 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16713
16714 /* Be conservative and copy outval into scratch now,
16715 this should only be necessary if outval is a
16716 subreg of something larger than a word. */
16717 /* XXX Might this clobber base? I can't see how it
16718 can, since scratch is known to overlap with
16719 outval. */
16720 emit_insn (gen_movhi (scratch_hi, outval));
16721 outval = scratch_hi;
16722 }
16723 }
16724
16725 /* Get the base address; addsi3 knows how to handle constants
16726 that require more than one insn. */
16727 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16728 base = base_plus;
16729 offset = lo;
16730 }
16731 }
16732
16733 if (BYTES_BIG_ENDIAN)
16734 {
16735 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16736 plus_constant (Pmode, base,
16737 offset + 1)),
16738 gen_lowpart (QImode, outval)));
16739 emit_insn (gen_lshrsi3 (scratch,
16740 gen_rtx_SUBREG (SImode, outval, 0),
16741 GEN_INT (8)));
16742 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16743 offset)),
16744 gen_lowpart (QImode, scratch)));
16745 }
16746 else
16747 {
16748 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16749 offset)),
16750 gen_lowpart (QImode, outval)));
16751 emit_insn (gen_lshrsi3 (scratch,
16752 gen_rtx_SUBREG (SImode, outval, 0),
16753 GEN_INT (8)));
16754 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16755 plus_constant (Pmode, base,
16756 offset + 1)),
16757 gen_lowpart (QImode, scratch)));
16758 }
16759 }
16760
16761 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16762 (padded to the size of a word) should be passed in a register. */
16763
16764 static bool
16765 arm_must_pass_in_stack (const function_arg_info &arg)
16766 {
16767 if (TARGET_AAPCS_BASED)
16768 return must_pass_in_stack_var_size (arg);
16769 else
16770 return must_pass_in_stack_var_size_or_pad (arg);
16771 }
16772
16773
16774 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16775 byte of a stack argument has useful data. For legacy APCS ABIs we use
16776 the default. For AAPCS based ABIs small aggregate types are placed
16777 in the lowest memory address. */
16778
16779 static pad_direction
16780 arm_function_arg_padding (machine_mode mode, const_tree type)
16781 {
16782 if (!TARGET_AAPCS_BASED)
16783 return default_function_arg_padding (mode, type);
16784
16785 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16786 return PAD_DOWNWARD;
16787
16788 return PAD_UPWARD;
16789 }
16790
16791
16792 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16793 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16794 register has useful data, and return the opposite if the most
16795 significant byte does. */
16796
16797 bool
16798 arm_pad_reg_upward (machine_mode mode,
16799 tree type, int first ATTRIBUTE_UNUSED)
16800 {
16801 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16802 {
16803 /* For AAPCS, small aggregates, small fixed-point types,
16804 and small complex types are always padded upwards. */
16805 if (type)
16806 {
16807 if ((AGGREGATE_TYPE_P (type)
16808 || TREE_CODE (type) == COMPLEX_TYPE
16809 || FIXED_POINT_TYPE_P (type))
16810 && int_size_in_bytes (type) <= 4)
16811 return true;
16812 }
16813 else
16814 {
16815 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16816 && GET_MODE_SIZE (mode) <= 4)
16817 return true;
16818 }
16819 }
16820
16821 /* Otherwise, use default padding. */
16822 return !BYTES_BIG_ENDIAN;
16823 }
16824
16825 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16826 assuming that the address in the base register is word aligned. */
16827 bool
16828 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16829 {
16830 HOST_WIDE_INT max_offset;
16831
16832 /* Offset must be a multiple of 4 in Thumb mode. */
16833 if (TARGET_THUMB2 && ((offset & 3) != 0))
16834 return false;
16835
16836 if (TARGET_THUMB2)
16837 max_offset = 1020;
16838 else if (TARGET_ARM)
16839 max_offset = 255;
16840 else
16841 return false;
16842
16843 return ((offset <= max_offset) && (offset >= -max_offset));
16844 }
16845
16846 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16847 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16848 Assumes that the address in the base register RN is word aligned. Pattern
16849 guarantees that both memory accesses use the same base register,
16850 the offsets are constants within the range, and the gap between the offsets is 4.
16851 If preload complete then check that registers are legal. WBACK indicates whether
16852 address is updated. LOAD indicates whether memory access is load or store. */
16853 bool
16854 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16855 bool wback, bool load)
16856 {
16857 unsigned int t, t2, n;
16858
16859 if (!reload_completed)
16860 return true;
16861
16862 if (!offset_ok_for_ldrd_strd (offset))
16863 return false;
16864
16865 t = REGNO (rt);
16866 t2 = REGNO (rt2);
16867 n = REGNO (rn);
16868
16869 if ((TARGET_THUMB2)
16870 && ((wback && (n == t || n == t2))
16871 || (t == SP_REGNUM)
16872 || (t == PC_REGNUM)
16873 || (t2 == SP_REGNUM)
16874 || (t2 == PC_REGNUM)
16875 || (!load && (n == PC_REGNUM))
16876 || (load && (t == t2))
16877 /* Triggers Cortex-M3 LDRD errata. */
16878 || (!wback && load && fix_cm3_ldrd && (n == t))))
16879 return false;
16880
16881 if ((TARGET_ARM)
16882 && ((wback && (n == t || n == t2))
16883 || (t2 == PC_REGNUM)
16884 || (t % 2 != 0) /* First destination register is not even. */
16885 || (t2 != t + 1)
16886 /* PC can be used as base register (for offset addressing only),
16887 but it is depricated. */
16888 || (n == PC_REGNUM)))
16889 return false;
16890
16891 return true;
16892 }
16893
16894 /* Return true if a 64-bit access with alignment ALIGN and with a
16895 constant offset OFFSET from the base pointer is permitted on this
16896 architecture. */
16897 static bool
16898 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
16899 {
16900 return (unaligned_access
16901 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
16902 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
16903 }
16904
16905 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
16906 operand MEM's address contains an immediate offset from the base
16907 register and has no side effects, in which case it sets BASE,
16908 OFFSET and ALIGN accordingly. */
16909 static bool
16910 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
16911 {
16912 rtx addr;
16913
16914 gcc_assert (base != NULL && offset != NULL);
16915
16916 /* TODO: Handle more general memory operand patterns, such as
16917 PRE_DEC and PRE_INC. */
16918
16919 if (side_effects_p (mem))
16920 return false;
16921
16922 /* Can't deal with subregs. */
16923 if (SUBREG_P (mem))
16924 return false;
16925
16926 gcc_assert (MEM_P (mem));
16927
16928 *offset = const0_rtx;
16929 *align = MEM_ALIGN (mem);
16930
16931 addr = XEXP (mem, 0);
16932
16933 /* If addr isn't valid for DImode, then we can't handle it. */
16934 if (!arm_legitimate_address_p (DImode, addr,
16935 reload_in_progress || reload_completed))
16936 return false;
16937
16938 if (REG_P (addr))
16939 {
16940 *base = addr;
16941 return true;
16942 }
16943 else if (GET_CODE (addr) == PLUS)
16944 {
16945 *base = XEXP (addr, 0);
16946 *offset = XEXP (addr, 1);
16947 return (REG_P (*base) && CONST_INT_P (*offset));
16948 }
16949
16950 return false;
16951 }
16952
16953 /* Called from a peephole2 to replace two word-size accesses with a
16954 single LDRD/STRD instruction. Returns true iff we can generate a
16955 new instruction sequence. That is, both accesses use the same base
16956 register and the gap between constant offsets is 4. This function
16957 may reorder its operands to match ldrd/strd RTL templates.
16958 OPERANDS are the operands found by the peephole matcher;
16959 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16960 corresponding memory operands. LOAD indicaates whether the access
16961 is load or store. CONST_STORE indicates a store of constant
16962 integer values held in OPERANDS[4,5] and assumes that the pattern
16963 is of length 4 insn, for the purpose of checking dead registers.
16964 COMMUTE indicates that register operands may be reordered. */
16965 bool
16966 gen_operands_ldrd_strd (rtx *operands, bool load,
16967 bool const_store, bool commute)
16968 {
16969 int nops = 2;
16970 HOST_WIDE_INT offsets[2], offset, align[2];
16971 rtx base = NULL_RTX;
16972 rtx cur_base, cur_offset, tmp;
16973 int i, gap;
16974 HARD_REG_SET regset;
16975
16976 gcc_assert (!const_store || !load);
16977 /* Check that the memory references are immediate offsets from the
16978 same base register. Extract the base register, the destination
16979 registers, and the corresponding memory offsets. */
16980 for (i = 0; i < nops; i++)
16981 {
16982 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16983 &align[i]))
16984 return false;
16985
16986 if (i == 0)
16987 base = cur_base;
16988 else if (REGNO (base) != REGNO (cur_base))
16989 return false;
16990
16991 offsets[i] = INTVAL (cur_offset);
16992 if (GET_CODE (operands[i]) == SUBREG)
16993 {
16994 tmp = SUBREG_REG (operands[i]);
16995 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
16996 operands[i] = tmp;
16997 }
16998 }
16999
17000 /* Make sure there is no dependency between the individual loads. */
17001 if (load && REGNO (operands[0]) == REGNO (base))
17002 return false; /* RAW */
17003
17004 if (load && REGNO (operands[0]) == REGNO (operands[1]))
17005 return false; /* WAW */
17006
17007 /* If the same input register is used in both stores
17008 when storing different constants, try to find a free register.
17009 For example, the code
17010 mov r0, 0
17011 str r0, [r2]
17012 mov r0, 1
17013 str r0, [r2, #4]
17014 can be transformed into
17015 mov r1, 0
17016 mov r0, 1
17017 strd r1, r0, [r2]
17018 in Thumb mode assuming that r1 is free.
17019 For ARM mode do the same but only if the starting register
17020 can be made to be even. */
17021 if (const_store
17022 && REGNO (operands[0]) == REGNO (operands[1])
17023 && INTVAL (operands[4]) != INTVAL (operands[5]))
17024 {
17025 if (TARGET_THUMB2)
17026 {
17027 CLEAR_HARD_REG_SET (regset);
17028 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17029 if (tmp == NULL_RTX)
17030 return false;
17031
17032 /* Use the new register in the first load to ensure that
17033 if the original input register is not dead after peephole,
17034 then it will have the correct constant value. */
17035 operands[0] = tmp;
17036 }
17037 else if (TARGET_ARM)
17038 {
17039 int regno = REGNO (operands[0]);
17040 if (!peep2_reg_dead_p (4, operands[0]))
17041 {
17042 /* When the input register is even and is not dead after the
17043 pattern, it has to hold the second constant but we cannot
17044 form a legal STRD in ARM mode with this register as the second
17045 register. */
17046 if (regno % 2 == 0)
17047 return false;
17048
17049 /* Is regno-1 free? */
17050 SET_HARD_REG_SET (regset);
17051 CLEAR_HARD_REG_BIT(regset, regno - 1);
17052 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17053 if (tmp == NULL_RTX)
17054 return false;
17055
17056 operands[0] = tmp;
17057 }
17058 else
17059 {
17060 /* Find a DImode register. */
17061 CLEAR_HARD_REG_SET (regset);
17062 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17063 if (tmp != NULL_RTX)
17064 {
17065 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17066 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17067 }
17068 else
17069 {
17070 /* Can we use the input register to form a DI register? */
17071 SET_HARD_REG_SET (regset);
17072 CLEAR_HARD_REG_BIT(regset,
17073 regno % 2 == 0 ? regno + 1 : regno - 1);
17074 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17075 if (tmp == NULL_RTX)
17076 return false;
17077 operands[regno % 2 == 1 ? 0 : 1] = tmp;
17078 }
17079 }
17080
17081 gcc_assert (operands[0] != NULL_RTX);
17082 gcc_assert (operands[1] != NULL_RTX);
17083 gcc_assert (REGNO (operands[0]) % 2 == 0);
17084 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
17085 }
17086 }
17087
17088 /* Make sure the instructions are ordered with lower memory access first. */
17089 if (offsets[0] > offsets[1])
17090 {
17091 gap = offsets[0] - offsets[1];
17092 offset = offsets[1];
17093
17094 /* Swap the instructions such that lower memory is accessed first. */
17095 std::swap (operands[0], operands[1]);
17096 std::swap (operands[2], operands[3]);
17097 std::swap (align[0], align[1]);
17098 if (const_store)
17099 std::swap (operands[4], operands[5]);
17100 }
17101 else
17102 {
17103 gap = offsets[1] - offsets[0];
17104 offset = offsets[0];
17105 }
17106
17107 /* Make sure accesses are to consecutive memory locations. */
17108 if (gap != GET_MODE_SIZE (SImode))
17109 return false;
17110
17111 if (!align_ok_ldrd_strd (align[0], offset))
17112 return false;
17113
17114 /* Make sure we generate legal instructions. */
17115 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17116 false, load))
17117 return true;
17118
17119 /* In Thumb state, where registers are almost unconstrained, there
17120 is little hope to fix it. */
17121 if (TARGET_THUMB2)
17122 return false;
17123
17124 if (load && commute)
17125 {
17126 /* Try reordering registers. */
17127 std::swap (operands[0], operands[1]);
17128 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17129 false, load))
17130 return true;
17131 }
17132
17133 if (const_store)
17134 {
17135 /* If input registers are dead after this pattern, they can be
17136 reordered or replaced by other registers that are free in the
17137 current pattern. */
17138 if (!peep2_reg_dead_p (4, operands[0])
17139 || !peep2_reg_dead_p (4, operands[1]))
17140 return false;
17141
17142 /* Try to reorder the input registers. */
17143 /* For example, the code
17144 mov r0, 0
17145 mov r1, 1
17146 str r1, [r2]
17147 str r0, [r2, #4]
17148 can be transformed into
17149 mov r1, 0
17150 mov r0, 1
17151 strd r0, [r2]
17152 */
17153 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
17154 false, false))
17155 {
17156 std::swap (operands[0], operands[1]);
17157 return true;
17158 }
17159
17160 /* Try to find a free DI register. */
17161 CLEAR_HARD_REG_SET (regset);
17162 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
17163 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
17164 while (true)
17165 {
17166 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17167 if (tmp == NULL_RTX)
17168 return false;
17169
17170 /* DREG must be an even-numbered register in DImode.
17171 Split it into SI registers. */
17172 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17173 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17174 gcc_assert (operands[0] != NULL_RTX);
17175 gcc_assert (operands[1] != NULL_RTX);
17176 gcc_assert (REGNO (operands[0]) % 2 == 0);
17177 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
17178
17179 return (operands_ok_ldrd_strd (operands[0], operands[1],
17180 base, offset,
17181 false, load));
17182 }
17183 }
17184
17185 return false;
17186 }
17187
17188
17189 /* Return true if parallel execution of the two word-size accesses provided
17190 could be satisfied with a single LDRD/STRD instruction. Two word-size
17191 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17192 register operands and OPERANDS[2,3] are the corresponding memory operands.
17193 */
17194 bool
17195 valid_operands_ldrd_strd (rtx *operands, bool load)
17196 {
17197 int nops = 2;
17198 HOST_WIDE_INT offsets[2], offset, align[2];
17199 rtx base = NULL_RTX;
17200 rtx cur_base, cur_offset;
17201 int i, gap;
17202
17203 /* Check that the memory references are immediate offsets from the
17204 same base register. Extract the base register, the destination
17205 registers, and the corresponding memory offsets. */
17206 for (i = 0; i < nops; i++)
17207 {
17208 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17209 &align[i]))
17210 return false;
17211
17212 if (i == 0)
17213 base = cur_base;
17214 else if (REGNO (base) != REGNO (cur_base))
17215 return false;
17216
17217 offsets[i] = INTVAL (cur_offset);
17218 if (GET_CODE (operands[i]) == SUBREG)
17219 return false;
17220 }
17221
17222 if (offsets[0] > offsets[1])
17223 return false;
17224
17225 gap = offsets[1] - offsets[0];
17226 offset = offsets[0];
17227
17228 /* Make sure accesses are to consecutive memory locations. */
17229 if (gap != GET_MODE_SIZE (SImode))
17230 return false;
17231
17232 if (!align_ok_ldrd_strd (align[0], offset))
17233 return false;
17234
17235 return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17236 false, load);
17237 }
17238
17239 \f
17240 /* Print a symbolic form of X to the debug file, F. */
17241 static void
17242 arm_print_value (FILE *f, rtx x)
17243 {
17244 switch (GET_CODE (x))
17245 {
17246 case CONST_INT:
17247 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17248 return;
17249
17250 case CONST_DOUBLE:
17251 {
17252 char fpstr[20];
17253 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17254 sizeof (fpstr), 0, 1);
17255 fputs (fpstr, f);
17256 }
17257 return;
17258
17259 case CONST_VECTOR:
17260 {
17261 int i;
17262
17263 fprintf (f, "<");
17264 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17265 {
17266 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17267 if (i < (CONST_VECTOR_NUNITS (x) - 1))
17268 fputc (',', f);
17269 }
17270 fprintf (f, ">");
17271 }
17272 return;
17273
17274 case CONST_STRING:
17275 fprintf (f, "\"%s\"", XSTR (x, 0));
17276 return;
17277
17278 case SYMBOL_REF:
17279 fprintf (f, "`%s'", XSTR (x, 0));
17280 return;
17281
17282 case LABEL_REF:
17283 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17284 return;
17285
17286 case CONST:
17287 arm_print_value (f, XEXP (x, 0));
17288 return;
17289
17290 case PLUS:
17291 arm_print_value (f, XEXP (x, 0));
17292 fprintf (f, "+");
17293 arm_print_value (f, XEXP (x, 1));
17294 return;
17295
17296 case PC:
17297 fprintf (f, "pc");
17298 return;
17299
17300 default:
17301 fprintf (f, "????");
17302 return;
17303 }
17304 }
17305 \f
17306 /* Routines for manipulation of the constant pool. */
17307
17308 /* Arm instructions cannot load a large constant directly into a
17309 register; they have to come from a pc relative load. The constant
17310 must therefore be placed in the addressable range of the pc
17311 relative load. Depending on the precise pc relative load
17312 instruction the range is somewhere between 256 bytes and 4k. This
17313 means that we often have to dump a constant inside a function, and
17314 generate code to branch around it.
17315
17316 It is important to minimize this, since the branches will slow
17317 things down and make the code larger.
17318
17319 Normally we can hide the table after an existing unconditional
17320 branch so that there is no interruption of the flow, but in the
17321 worst case the code looks like this:
17322
17323 ldr rn, L1
17324 ...
17325 b L2
17326 align
17327 L1: .long value
17328 L2:
17329 ...
17330
17331 ldr rn, L3
17332 ...
17333 b L4
17334 align
17335 L3: .long value
17336 L4:
17337 ...
17338
17339 We fix this by performing a scan after scheduling, which notices
17340 which instructions need to have their operands fetched from the
17341 constant table and builds the table.
17342
17343 The algorithm starts by building a table of all the constants that
17344 need fixing up and all the natural barriers in the function (places
17345 where a constant table can be dropped without breaking the flow).
17346 For each fixup we note how far the pc-relative replacement will be
17347 able to reach and the offset of the instruction into the function.
17348
17349 Having built the table we then group the fixes together to form
17350 tables that are as large as possible (subject to addressing
17351 constraints) and emit each table of constants after the last
17352 barrier that is within range of all the instructions in the group.
17353 If a group does not contain a barrier, then we forcibly create one
17354 by inserting a jump instruction into the flow. Once the table has
17355 been inserted, the insns are then modified to reference the
17356 relevant entry in the pool.
17357
17358 Possible enhancements to the algorithm (not implemented) are:
17359
17360 1) For some processors and object formats, there may be benefit in
17361 aligning the pools to the start of cache lines; this alignment
17362 would need to be taken into account when calculating addressability
17363 of a pool. */
17364
17365 /* These typedefs are located at the start of this file, so that
17366 they can be used in the prototypes there. This comment is to
17367 remind readers of that fact so that the following structures
17368 can be understood more easily.
17369
17370 typedef struct minipool_node Mnode;
17371 typedef struct minipool_fixup Mfix; */
17372
17373 struct minipool_node
17374 {
17375 /* Doubly linked chain of entries. */
17376 Mnode * next;
17377 Mnode * prev;
17378 /* The maximum offset into the code that this entry can be placed. While
17379 pushing fixes for forward references, all entries are sorted in order
17380 of increasing max_address. */
17381 HOST_WIDE_INT max_address;
17382 /* Similarly for an entry inserted for a backwards ref. */
17383 HOST_WIDE_INT min_address;
17384 /* The number of fixes referencing this entry. This can become zero
17385 if we "unpush" an entry. In this case we ignore the entry when we
17386 come to emit the code. */
17387 int refcount;
17388 /* The offset from the start of the minipool. */
17389 HOST_WIDE_INT offset;
17390 /* The value in table. */
17391 rtx value;
17392 /* The mode of value. */
17393 machine_mode mode;
17394 /* The size of the value. With iWMMXt enabled
17395 sizes > 4 also imply an alignment of 8-bytes. */
17396 int fix_size;
17397 };
17398
17399 struct minipool_fixup
17400 {
17401 Mfix * next;
17402 rtx_insn * insn;
17403 HOST_WIDE_INT address;
17404 rtx * loc;
17405 machine_mode mode;
17406 int fix_size;
17407 rtx value;
17408 Mnode * minipool;
17409 HOST_WIDE_INT forwards;
17410 HOST_WIDE_INT backwards;
17411 };
17412
17413 /* Fixes less than a word need padding out to a word boundary. */
17414 #define MINIPOOL_FIX_SIZE(mode) \
17415 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17416
17417 static Mnode * minipool_vector_head;
17418 static Mnode * minipool_vector_tail;
17419 static rtx_code_label *minipool_vector_label;
17420 static int minipool_pad;
17421
17422 /* The linked list of all minipool fixes required for this function. */
17423 Mfix * minipool_fix_head;
17424 Mfix * minipool_fix_tail;
17425 /* The fix entry for the current minipool, once it has been placed. */
17426 Mfix * minipool_barrier;
17427
17428 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17429 #define JUMP_TABLES_IN_TEXT_SECTION 0
17430 #endif
17431
17432 static HOST_WIDE_INT
17433 get_jump_table_size (rtx_jump_table_data *insn)
17434 {
17435 /* ADDR_VECs only take room if read-only data does into the text
17436 section. */
17437 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17438 {
17439 rtx body = PATTERN (insn);
17440 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17441 HOST_WIDE_INT size;
17442 HOST_WIDE_INT modesize;
17443
17444 modesize = GET_MODE_SIZE (GET_MODE (body));
17445 size = modesize * XVECLEN (body, elt);
17446 switch (modesize)
17447 {
17448 case 1:
17449 /* Round up size of TBB table to a halfword boundary. */
17450 size = (size + 1) & ~HOST_WIDE_INT_1;
17451 break;
17452 case 2:
17453 /* No padding necessary for TBH. */
17454 break;
17455 case 4:
17456 /* Add two bytes for alignment on Thumb. */
17457 if (TARGET_THUMB)
17458 size += 2;
17459 break;
17460 default:
17461 gcc_unreachable ();
17462 }
17463 return size;
17464 }
17465
17466 return 0;
17467 }
17468
17469 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17470 function descriptor) into a register and the GOT address into the
17471 FDPIC register, returning an rtx for the register holding the
17472 function address. */
17473
17474 rtx
17475 arm_load_function_descriptor (rtx funcdesc)
17476 {
17477 rtx fnaddr_reg = gen_reg_rtx (Pmode);
17478 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17479 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17480 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17481
17482 emit_move_insn (fnaddr_reg, fnaddr);
17483
17484 /* The ABI requires the entry point address to be loaded first, but
17485 since we cannot support lazy binding for lack of atomic load of
17486 two 32-bits values, we do not need to bother to prevent the
17487 previous load from being moved after that of the GOT address. */
17488 emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17489
17490 return fnaddr_reg;
17491 }
17492
17493 /* Return the maximum amount of padding that will be inserted before
17494 label LABEL. */
17495 static HOST_WIDE_INT
17496 get_label_padding (rtx label)
17497 {
17498 HOST_WIDE_INT align, min_insn_size;
17499
17500 align = 1 << label_to_alignment (label).levels[0].log;
17501 min_insn_size = TARGET_THUMB ? 2 : 4;
17502 return align > min_insn_size ? align - min_insn_size : 0;
17503 }
17504
17505 /* Move a minipool fix MP from its current location to before MAX_MP.
17506 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17507 constraints may need updating. */
17508 static Mnode *
17509 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17510 HOST_WIDE_INT max_address)
17511 {
17512 /* The code below assumes these are different. */
17513 gcc_assert (mp != max_mp);
17514
17515 if (max_mp == NULL)
17516 {
17517 if (max_address < mp->max_address)
17518 mp->max_address = max_address;
17519 }
17520 else
17521 {
17522 if (max_address > max_mp->max_address - mp->fix_size)
17523 mp->max_address = max_mp->max_address - mp->fix_size;
17524 else
17525 mp->max_address = max_address;
17526
17527 /* Unlink MP from its current position. Since max_mp is non-null,
17528 mp->prev must be non-null. */
17529 mp->prev->next = mp->next;
17530 if (mp->next != NULL)
17531 mp->next->prev = mp->prev;
17532 else
17533 minipool_vector_tail = mp->prev;
17534
17535 /* Re-insert it before MAX_MP. */
17536 mp->next = max_mp;
17537 mp->prev = max_mp->prev;
17538 max_mp->prev = mp;
17539
17540 if (mp->prev != NULL)
17541 mp->prev->next = mp;
17542 else
17543 minipool_vector_head = mp;
17544 }
17545
17546 /* Save the new entry. */
17547 max_mp = mp;
17548
17549 /* Scan over the preceding entries and adjust their addresses as
17550 required. */
17551 while (mp->prev != NULL
17552 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17553 {
17554 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17555 mp = mp->prev;
17556 }
17557
17558 return max_mp;
17559 }
17560
17561 /* Add a constant to the minipool for a forward reference. Returns the
17562 node added or NULL if the constant will not fit in this pool. */
17563 static Mnode *
17564 add_minipool_forward_ref (Mfix *fix)
17565 {
17566 /* If set, max_mp is the first pool_entry that has a lower
17567 constraint than the one we are trying to add. */
17568 Mnode * max_mp = NULL;
17569 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17570 Mnode * mp;
17571
17572 /* If the minipool starts before the end of FIX->INSN then this FIX
17573 cannot be placed into the current pool. Furthermore, adding the
17574 new constant pool entry may cause the pool to start FIX_SIZE bytes
17575 earlier. */
17576 if (minipool_vector_head &&
17577 (fix->address + get_attr_length (fix->insn)
17578 >= minipool_vector_head->max_address - fix->fix_size))
17579 return NULL;
17580
17581 /* Scan the pool to see if a constant with the same value has
17582 already been added. While we are doing this, also note the
17583 location where we must insert the constant if it doesn't already
17584 exist. */
17585 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17586 {
17587 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17588 && fix->mode == mp->mode
17589 && (!LABEL_P (fix->value)
17590 || (CODE_LABEL_NUMBER (fix->value)
17591 == CODE_LABEL_NUMBER (mp->value)))
17592 && rtx_equal_p (fix->value, mp->value))
17593 {
17594 /* More than one fix references this entry. */
17595 mp->refcount++;
17596 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17597 }
17598
17599 /* Note the insertion point if necessary. */
17600 if (max_mp == NULL
17601 && mp->max_address > max_address)
17602 max_mp = mp;
17603
17604 /* If we are inserting an 8-bytes aligned quantity and
17605 we have not already found an insertion point, then
17606 make sure that all such 8-byte aligned quantities are
17607 placed at the start of the pool. */
17608 if (ARM_DOUBLEWORD_ALIGN
17609 && max_mp == NULL
17610 && fix->fix_size >= 8
17611 && mp->fix_size < 8)
17612 {
17613 max_mp = mp;
17614 max_address = mp->max_address;
17615 }
17616 }
17617
17618 /* The value is not currently in the minipool, so we need to create
17619 a new entry for it. If MAX_MP is NULL, the entry will be put on
17620 the end of the list since the placement is less constrained than
17621 any existing entry. Otherwise, we insert the new fix before
17622 MAX_MP and, if necessary, adjust the constraints on the other
17623 entries. */
17624 mp = XNEW (Mnode);
17625 mp->fix_size = fix->fix_size;
17626 mp->mode = fix->mode;
17627 mp->value = fix->value;
17628 mp->refcount = 1;
17629 /* Not yet required for a backwards ref. */
17630 mp->min_address = -65536;
17631
17632 if (max_mp == NULL)
17633 {
17634 mp->max_address = max_address;
17635 mp->next = NULL;
17636 mp->prev = minipool_vector_tail;
17637
17638 if (mp->prev == NULL)
17639 {
17640 minipool_vector_head = mp;
17641 minipool_vector_label = gen_label_rtx ();
17642 }
17643 else
17644 mp->prev->next = mp;
17645
17646 minipool_vector_tail = mp;
17647 }
17648 else
17649 {
17650 if (max_address > max_mp->max_address - mp->fix_size)
17651 mp->max_address = max_mp->max_address - mp->fix_size;
17652 else
17653 mp->max_address = max_address;
17654
17655 mp->next = max_mp;
17656 mp->prev = max_mp->prev;
17657 max_mp->prev = mp;
17658 if (mp->prev != NULL)
17659 mp->prev->next = mp;
17660 else
17661 minipool_vector_head = mp;
17662 }
17663
17664 /* Save the new entry. */
17665 max_mp = mp;
17666
17667 /* Scan over the preceding entries and adjust their addresses as
17668 required. */
17669 while (mp->prev != NULL
17670 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17671 {
17672 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17673 mp = mp->prev;
17674 }
17675
17676 return max_mp;
17677 }
17678
17679 static Mnode *
17680 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17681 HOST_WIDE_INT min_address)
17682 {
17683 HOST_WIDE_INT offset;
17684
17685 /* The code below assumes these are different. */
17686 gcc_assert (mp != min_mp);
17687
17688 if (min_mp == NULL)
17689 {
17690 if (min_address > mp->min_address)
17691 mp->min_address = min_address;
17692 }
17693 else
17694 {
17695 /* We will adjust this below if it is too loose. */
17696 mp->min_address = min_address;
17697
17698 /* Unlink MP from its current position. Since min_mp is non-null,
17699 mp->next must be non-null. */
17700 mp->next->prev = mp->prev;
17701 if (mp->prev != NULL)
17702 mp->prev->next = mp->next;
17703 else
17704 minipool_vector_head = mp->next;
17705
17706 /* Reinsert it after MIN_MP. */
17707 mp->prev = min_mp;
17708 mp->next = min_mp->next;
17709 min_mp->next = mp;
17710 if (mp->next != NULL)
17711 mp->next->prev = mp;
17712 else
17713 minipool_vector_tail = mp;
17714 }
17715
17716 min_mp = mp;
17717
17718 offset = 0;
17719 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17720 {
17721 mp->offset = offset;
17722 if (mp->refcount > 0)
17723 offset += mp->fix_size;
17724
17725 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17726 mp->next->min_address = mp->min_address + mp->fix_size;
17727 }
17728
17729 return min_mp;
17730 }
17731
17732 /* Add a constant to the minipool for a backward reference. Returns the
17733 node added or NULL if the constant will not fit in this pool.
17734
17735 Note that the code for insertion for a backwards reference can be
17736 somewhat confusing because the calculated offsets for each fix do
17737 not take into account the size of the pool (which is still under
17738 construction. */
17739 static Mnode *
17740 add_minipool_backward_ref (Mfix *fix)
17741 {
17742 /* If set, min_mp is the last pool_entry that has a lower constraint
17743 than the one we are trying to add. */
17744 Mnode *min_mp = NULL;
17745 /* This can be negative, since it is only a constraint. */
17746 HOST_WIDE_INT min_address = fix->address - fix->backwards;
17747 Mnode *mp;
17748
17749 /* If we can't reach the current pool from this insn, or if we can't
17750 insert this entry at the end of the pool without pushing other
17751 fixes out of range, then we don't try. This ensures that we
17752 can't fail later on. */
17753 if (min_address >= minipool_barrier->address
17754 || (minipool_vector_tail->min_address + fix->fix_size
17755 >= minipool_barrier->address))
17756 return NULL;
17757
17758 /* Scan the pool to see if a constant with the same value has
17759 already been added. While we are doing this, also note the
17760 location where we must insert the constant if it doesn't already
17761 exist. */
17762 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17763 {
17764 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17765 && fix->mode == mp->mode
17766 && (!LABEL_P (fix->value)
17767 || (CODE_LABEL_NUMBER (fix->value)
17768 == CODE_LABEL_NUMBER (mp->value)))
17769 && rtx_equal_p (fix->value, mp->value)
17770 /* Check that there is enough slack to move this entry to the
17771 end of the table (this is conservative). */
17772 && (mp->max_address
17773 > (minipool_barrier->address
17774 + minipool_vector_tail->offset
17775 + minipool_vector_tail->fix_size)))
17776 {
17777 mp->refcount++;
17778 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17779 }
17780
17781 if (min_mp != NULL)
17782 mp->min_address += fix->fix_size;
17783 else
17784 {
17785 /* Note the insertion point if necessary. */
17786 if (mp->min_address < min_address)
17787 {
17788 /* For now, we do not allow the insertion of 8-byte alignment
17789 requiring nodes anywhere but at the start of the pool. */
17790 if (ARM_DOUBLEWORD_ALIGN
17791 && fix->fix_size >= 8 && mp->fix_size < 8)
17792 return NULL;
17793 else
17794 min_mp = mp;
17795 }
17796 else if (mp->max_address
17797 < minipool_barrier->address + mp->offset + fix->fix_size)
17798 {
17799 /* Inserting before this entry would push the fix beyond
17800 its maximum address (which can happen if we have
17801 re-located a forwards fix); force the new fix to come
17802 after it. */
17803 if (ARM_DOUBLEWORD_ALIGN
17804 && fix->fix_size >= 8 && mp->fix_size < 8)
17805 return NULL;
17806 else
17807 {
17808 min_mp = mp;
17809 min_address = mp->min_address + fix->fix_size;
17810 }
17811 }
17812 /* Do not insert a non-8-byte aligned quantity before 8-byte
17813 aligned quantities. */
17814 else if (ARM_DOUBLEWORD_ALIGN
17815 && fix->fix_size < 8
17816 && mp->fix_size >= 8)
17817 {
17818 min_mp = mp;
17819 min_address = mp->min_address + fix->fix_size;
17820 }
17821 }
17822 }
17823
17824 /* We need to create a new entry. */
17825 mp = XNEW (Mnode);
17826 mp->fix_size = fix->fix_size;
17827 mp->mode = fix->mode;
17828 mp->value = fix->value;
17829 mp->refcount = 1;
17830 mp->max_address = minipool_barrier->address + 65536;
17831
17832 mp->min_address = min_address;
17833
17834 if (min_mp == NULL)
17835 {
17836 mp->prev = NULL;
17837 mp->next = minipool_vector_head;
17838
17839 if (mp->next == NULL)
17840 {
17841 minipool_vector_tail = mp;
17842 minipool_vector_label = gen_label_rtx ();
17843 }
17844 else
17845 mp->next->prev = mp;
17846
17847 minipool_vector_head = mp;
17848 }
17849 else
17850 {
17851 mp->next = min_mp->next;
17852 mp->prev = min_mp;
17853 min_mp->next = mp;
17854
17855 if (mp->next != NULL)
17856 mp->next->prev = mp;
17857 else
17858 minipool_vector_tail = mp;
17859 }
17860
17861 /* Save the new entry. */
17862 min_mp = mp;
17863
17864 if (mp->prev)
17865 mp = mp->prev;
17866 else
17867 mp->offset = 0;
17868
17869 /* Scan over the following entries and adjust their offsets. */
17870 while (mp->next != NULL)
17871 {
17872 if (mp->next->min_address < mp->min_address + mp->fix_size)
17873 mp->next->min_address = mp->min_address + mp->fix_size;
17874
17875 if (mp->refcount)
17876 mp->next->offset = mp->offset + mp->fix_size;
17877 else
17878 mp->next->offset = mp->offset;
17879
17880 mp = mp->next;
17881 }
17882
17883 return min_mp;
17884 }
17885
17886 static void
17887 assign_minipool_offsets (Mfix *barrier)
17888 {
17889 HOST_WIDE_INT offset = 0;
17890 Mnode *mp;
17891
17892 minipool_barrier = barrier;
17893
17894 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17895 {
17896 mp->offset = offset;
17897
17898 if (mp->refcount > 0)
17899 offset += mp->fix_size;
17900 }
17901 }
17902
17903 /* Output the literal table */
17904 static void
17905 dump_minipool (rtx_insn *scan)
17906 {
17907 Mnode * mp;
17908 Mnode * nmp;
17909 int align64 = 0;
17910
17911 if (ARM_DOUBLEWORD_ALIGN)
17912 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17913 if (mp->refcount > 0 && mp->fix_size >= 8)
17914 {
17915 align64 = 1;
17916 break;
17917 }
17918
17919 if (dump_file)
17920 fprintf (dump_file,
17921 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17922 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
17923
17924 scan = emit_label_after (gen_label_rtx (), scan);
17925 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
17926 scan = emit_label_after (minipool_vector_label, scan);
17927
17928 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
17929 {
17930 if (mp->refcount > 0)
17931 {
17932 if (dump_file)
17933 {
17934 fprintf (dump_file,
17935 ";; Offset %u, min %ld, max %ld ",
17936 (unsigned) mp->offset, (unsigned long) mp->min_address,
17937 (unsigned long) mp->max_address);
17938 arm_print_value (dump_file, mp->value);
17939 fputc ('\n', dump_file);
17940 }
17941
17942 rtx val = copy_rtx (mp->value);
17943
17944 switch (GET_MODE_SIZE (mp->mode))
17945 {
17946 #ifdef HAVE_consttable_1
17947 case 1:
17948 scan = emit_insn_after (gen_consttable_1 (val), scan);
17949 break;
17950
17951 #endif
17952 #ifdef HAVE_consttable_2
17953 case 2:
17954 scan = emit_insn_after (gen_consttable_2 (val), scan);
17955 break;
17956
17957 #endif
17958 #ifdef HAVE_consttable_4
17959 case 4:
17960 scan = emit_insn_after (gen_consttable_4 (val), scan);
17961 break;
17962
17963 #endif
17964 #ifdef HAVE_consttable_8
17965 case 8:
17966 scan = emit_insn_after (gen_consttable_8 (val), scan);
17967 break;
17968
17969 #endif
17970 #ifdef HAVE_consttable_16
17971 case 16:
17972 scan = emit_insn_after (gen_consttable_16 (val), scan);
17973 break;
17974
17975 #endif
17976 default:
17977 gcc_unreachable ();
17978 }
17979 }
17980
17981 nmp = mp->next;
17982 free (mp);
17983 }
17984
17985 minipool_vector_head = minipool_vector_tail = NULL;
17986 scan = emit_insn_after (gen_consttable_end (), scan);
17987 scan = emit_barrier_after (scan);
17988 }
17989
17990 /* Return the cost of forcibly inserting a barrier after INSN. */
17991 static int
17992 arm_barrier_cost (rtx_insn *insn)
17993 {
17994 /* Basing the location of the pool on the loop depth is preferable,
17995 but at the moment, the basic block information seems to be
17996 corrupt by this stage of the compilation. */
17997 int base_cost = 50;
17998 rtx_insn *next = next_nonnote_insn (insn);
17999
18000 if (next != NULL && LABEL_P (next))
18001 base_cost -= 20;
18002
18003 switch (GET_CODE (insn))
18004 {
18005 case CODE_LABEL:
18006 /* It will always be better to place the table before the label, rather
18007 than after it. */
18008 return 50;
18009
18010 case INSN:
18011 case CALL_INSN:
18012 return base_cost;
18013
18014 case JUMP_INSN:
18015 return base_cost - 10;
18016
18017 default:
18018 return base_cost + 10;
18019 }
18020 }
18021
18022 /* Find the best place in the insn stream in the range
18023 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18024 Create the barrier by inserting a jump and add a new fix entry for
18025 it. */
18026 static Mfix *
18027 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
18028 {
18029 HOST_WIDE_INT count = 0;
18030 rtx_barrier *barrier;
18031 rtx_insn *from = fix->insn;
18032 /* The instruction after which we will insert the jump. */
18033 rtx_insn *selected = NULL;
18034 int selected_cost;
18035 /* The address at which the jump instruction will be placed. */
18036 HOST_WIDE_INT selected_address;
18037 Mfix * new_fix;
18038 HOST_WIDE_INT max_count = max_address - fix->address;
18039 rtx_code_label *label = gen_label_rtx ();
18040
18041 selected_cost = arm_barrier_cost (from);
18042 selected_address = fix->address;
18043
18044 while (from && count < max_count)
18045 {
18046 rtx_jump_table_data *tmp;
18047 int new_cost;
18048
18049 /* This code shouldn't have been called if there was a natural barrier
18050 within range. */
18051 gcc_assert (!BARRIER_P (from));
18052
18053 /* Count the length of this insn. This must stay in sync with the
18054 code that pushes minipool fixes. */
18055 if (LABEL_P (from))
18056 count += get_label_padding (from);
18057 else
18058 count += get_attr_length (from);
18059
18060 /* If there is a jump table, add its length. */
18061 if (tablejump_p (from, NULL, &tmp))
18062 {
18063 count += get_jump_table_size (tmp);
18064
18065 /* Jump tables aren't in a basic block, so base the cost on
18066 the dispatch insn. If we select this location, we will
18067 still put the pool after the table. */
18068 new_cost = arm_barrier_cost (from);
18069
18070 if (count < max_count
18071 && (!selected || new_cost <= selected_cost))
18072 {
18073 selected = tmp;
18074 selected_cost = new_cost;
18075 selected_address = fix->address + count;
18076 }
18077
18078 /* Continue after the dispatch table. */
18079 from = NEXT_INSN (tmp);
18080 continue;
18081 }
18082
18083 new_cost = arm_barrier_cost (from);
18084
18085 if (count < max_count
18086 && (!selected || new_cost <= selected_cost))
18087 {
18088 selected = from;
18089 selected_cost = new_cost;
18090 selected_address = fix->address + count;
18091 }
18092
18093 from = NEXT_INSN (from);
18094 }
18095
18096 /* Make sure that we found a place to insert the jump. */
18097 gcc_assert (selected);
18098
18099 /* Create a new JUMP_INSN that branches around a barrier. */
18100 from = emit_jump_insn_after (gen_jump (label), selected);
18101 JUMP_LABEL (from) = label;
18102 barrier = emit_barrier_after (from);
18103 emit_label_after (label, barrier);
18104
18105 /* Create a minipool barrier entry for the new barrier. */
18106 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
18107 new_fix->insn = barrier;
18108 new_fix->address = selected_address;
18109 new_fix->next = fix->next;
18110 fix->next = new_fix;
18111
18112 return new_fix;
18113 }
18114
18115 /* Record that there is a natural barrier in the insn stream at
18116 ADDRESS. */
18117 static void
18118 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
18119 {
18120 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18121
18122 fix->insn = insn;
18123 fix->address = address;
18124
18125 fix->next = NULL;
18126 if (minipool_fix_head != NULL)
18127 minipool_fix_tail->next = fix;
18128 else
18129 minipool_fix_head = fix;
18130
18131 minipool_fix_tail = fix;
18132 }
18133
18134 /* Record INSN, which will need fixing up to load a value from the
18135 minipool. ADDRESS is the offset of the insn since the start of the
18136 function; LOC is a pointer to the part of the insn which requires
18137 fixing; VALUE is the constant that must be loaded, which is of type
18138 MODE. */
18139 static void
18140 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
18141 machine_mode mode, rtx value)
18142 {
18143 gcc_assert (!arm_disable_literal_pool);
18144 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18145
18146 fix->insn = insn;
18147 fix->address = address;
18148 fix->loc = loc;
18149 fix->mode = mode;
18150 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
18151 fix->value = value;
18152 fix->forwards = get_attr_pool_range (insn);
18153 fix->backwards = get_attr_neg_pool_range (insn);
18154 fix->minipool = NULL;
18155
18156 /* If an insn doesn't have a range defined for it, then it isn't
18157 expecting to be reworked by this code. Better to stop now than
18158 to generate duff assembly code. */
18159 gcc_assert (fix->forwards || fix->backwards);
18160
18161 /* If an entry requires 8-byte alignment then assume all constant pools
18162 require 4 bytes of padding. Trying to do this later on a per-pool
18163 basis is awkward because existing pool entries have to be modified. */
18164 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
18165 minipool_pad = 4;
18166
18167 if (dump_file)
18168 {
18169 fprintf (dump_file,
18170 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18171 GET_MODE_NAME (mode),
18172 INSN_UID (insn), (unsigned long) address,
18173 -1 * (long)fix->backwards, (long)fix->forwards);
18174 arm_print_value (dump_file, fix->value);
18175 fprintf (dump_file, "\n");
18176 }
18177
18178 /* Add it to the chain of fixes. */
18179 fix->next = NULL;
18180
18181 if (minipool_fix_head != NULL)
18182 minipool_fix_tail->next = fix;
18183 else
18184 minipool_fix_head = fix;
18185
18186 minipool_fix_tail = fix;
18187 }
18188
18189 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18190 Returns the number of insns needed, or 99 if we always want to synthesize
18191 the value. */
18192 int
18193 arm_max_const_double_inline_cost ()
18194 {
18195 return ((optimize_size || arm_ld_sched) ? 3 : 4);
18196 }
18197
18198 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18199 Returns the number of insns needed, or 99 if we don't know how to
18200 do it. */
18201 int
18202 arm_const_double_inline_cost (rtx val)
18203 {
18204 rtx lowpart, highpart;
18205 machine_mode mode;
18206
18207 mode = GET_MODE (val);
18208
18209 if (mode == VOIDmode)
18210 mode = DImode;
18211
18212 gcc_assert (GET_MODE_SIZE (mode) == 8);
18213
18214 lowpart = gen_lowpart (SImode, val);
18215 highpart = gen_highpart_mode (SImode, mode, val);
18216
18217 gcc_assert (CONST_INT_P (lowpart));
18218 gcc_assert (CONST_INT_P (highpart));
18219
18220 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
18221 NULL_RTX, NULL_RTX, 0, 0)
18222 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
18223 NULL_RTX, NULL_RTX, 0, 0));
18224 }
18225
18226 /* Cost of loading a SImode constant. */
18227 static inline int
18228 arm_const_inline_cost (enum rtx_code code, rtx val)
18229 {
18230 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18231 NULL_RTX, NULL_RTX, 1, 0);
18232 }
18233
18234 /* Return true if it is worthwhile to split a 64-bit constant into two
18235 32-bit operations. This is the case if optimizing for size, or
18236 if we have load delay slots, or if one 32-bit part can be done with
18237 a single data operation. */
18238 bool
18239 arm_const_double_by_parts (rtx val)
18240 {
18241 machine_mode mode = GET_MODE (val);
18242 rtx part;
18243
18244 if (optimize_size || arm_ld_sched)
18245 return true;
18246
18247 if (mode == VOIDmode)
18248 mode = DImode;
18249
18250 part = gen_highpart_mode (SImode, mode, val);
18251
18252 gcc_assert (CONST_INT_P (part));
18253
18254 if (const_ok_for_arm (INTVAL (part))
18255 || const_ok_for_arm (~INTVAL (part)))
18256 return true;
18257
18258 part = gen_lowpart (SImode, val);
18259
18260 gcc_assert (CONST_INT_P (part));
18261
18262 if (const_ok_for_arm (INTVAL (part))
18263 || const_ok_for_arm (~INTVAL (part)))
18264 return true;
18265
18266 return false;
18267 }
18268
18269 /* Return true if it is possible to inline both the high and low parts
18270 of a 64-bit constant into 32-bit data processing instructions. */
18271 bool
18272 arm_const_double_by_immediates (rtx val)
18273 {
18274 machine_mode mode = GET_MODE (val);
18275 rtx part;
18276
18277 if (mode == VOIDmode)
18278 mode = DImode;
18279
18280 part = gen_highpart_mode (SImode, mode, val);
18281
18282 gcc_assert (CONST_INT_P (part));
18283
18284 if (!const_ok_for_arm (INTVAL (part)))
18285 return false;
18286
18287 part = gen_lowpart (SImode, val);
18288
18289 gcc_assert (CONST_INT_P (part));
18290
18291 if (!const_ok_for_arm (INTVAL (part)))
18292 return false;
18293
18294 return true;
18295 }
18296
18297 /* Scan INSN and note any of its operands that need fixing.
18298 If DO_PUSHES is false we do not actually push any of the fixups
18299 needed. */
18300 static void
18301 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18302 {
18303 int opno;
18304
18305 extract_constrain_insn (insn);
18306
18307 if (recog_data.n_alternatives == 0)
18308 return;
18309
18310 /* Fill in recog_op_alt with information about the constraints of
18311 this insn. */
18312 preprocess_constraints (insn);
18313
18314 const operand_alternative *op_alt = which_op_alt ();
18315 for (opno = 0; opno < recog_data.n_operands; opno++)
18316 {
18317 /* Things we need to fix can only occur in inputs. */
18318 if (recog_data.operand_type[opno] != OP_IN)
18319 continue;
18320
18321 /* If this alternative is a memory reference, then any mention
18322 of constants in this alternative is really to fool reload
18323 into allowing us to accept one there. We need to fix them up
18324 now so that we output the right code. */
18325 if (op_alt[opno].memory_ok)
18326 {
18327 rtx op = recog_data.operand[opno];
18328
18329 if (CONSTANT_P (op))
18330 {
18331 if (do_pushes)
18332 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18333 recog_data.operand_mode[opno], op);
18334 }
18335 else if (MEM_P (op)
18336 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18337 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18338 {
18339 if (do_pushes)
18340 {
18341 rtx cop = avoid_constant_pool_reference (op);
18342
18343 /* Casting the address of something to a mode narrower
18344 than a word can cause avoid_constant_pool_reference()
18345 to return the pool reference itself. That's no good to
18346 us here. Lets just hope that we can use the
18347 constant pool value directly. */
18348 if (op == cop)
18349 cop = get_pool_constant (XEXP (op, 0));
18350
18351 push_minipool_fix (insn, address,
18352 recog_data.operand_loc[opno],
18353 recog_data.operand_mode[opno], cop);
18354 }
18355
18356 }
18357 }
18358 }
18359
18360 return;
18361 }
18362
18363 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18364 and unions in the context of ARMv8-M Security Extensions. It is used as a
18365 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18366 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18367 or four masks, depending on whether it is being computed for a
18368 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18369 respectively. The tree for the type of the argument or a field within an
18370 argument is passed in ARG_TYPE, the current register this argument or field
18371 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18372 argument or field starts at is passed in STARTING_BIT and the last used bit
18373 is kept in LAST_USED_BIT which is also updated accordingly. */
18374
18375 static unsigned HOST_WIDE_INT
18376 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18377 uint32_t * padding_bits_to_clear,
18378 unsigned starting_bit, int * last_used_bit)
18379
18380 {
18381 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18382
18383 if (TREE_CODE (arg_type) == RECORD_TYPE)
18384 {
18385 unsigned current_bit = starting_bit;
18386 tree field;
18387 long int offset, size;
18388
18389
18390 field = TYPE_FIELDS (arg_type);
18391 while (field)
18392 {
18393 /* The offset within a structure is always an offset from
18394 the start of that structure. Make sure we take that into the
18395 calculation of the register based offset that we use here. */
18396 offset = starting_bit;
18397 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18398 offset %= 32;
18399
18400 /* This is the actual size of the field, for bitfields this is the
18401 bitfield width and not the container size. */
18402 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18403
18404 if (*last_used_bit != offset)
18405 {
18406 if (offset < *last_used_bit)
18407 {
18408 /* This field's offset is before the 'last_used_bit', that
18409 means this field goes on the next register. So we need to
18410 pad the rest of the current register and increase the
18411 register number. */
18412 uint32_t mask;
18413 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18414 mask++;
18415
18416 padding_bits_to_clear[*regno] |= mask;
18417 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18418 (*regno)++;
18419 }
18420 else
18421 {
18422 /* Otherwise we pad the bits between the last field's end and
18423 the start of the new field. */
18424 uint32_t mask;
18425
18426 mask = ((uint32_t)-1) >> (32 - offset);
18427 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18428 padding_bits_to_clear[*regno] |= mask;
18429 }
18430 current_bit = offset;
18431 }
18432
18433 /* Calculate further padding bits for inner structs/unions too. */
18434 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18435 {
18436 *last_used_bit = current_bit;
18437 not_to_clear_reg_mask
18438 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18439 padding_bits_to_clear, offset,
18440 last_used_bit);
18441 }
18442 else
18443 {
18444 /* Update 'current_bit' with this field's size. If the
18445 'current_bit' lies in a subsequent register, update 'regno' and
18446 reset 'current_bit' to point to the current bit in that new
18447 register. */
18448 current_bit += size;
18449 while (current_bit >= 32)
18450 {
18451 current_bit-=32;
18452 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18453 (*regno)++;
18454 }
18455 *last_used_bit = current_bit;
18456 }
18457
18458 field = TREE_CHAIN (field);
18459 }
18460 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18461 }
18462 else if (TREE_CODE (arg_type) == UNION_TYPE)
18463 {
18464 tree field, field_t;
18465 int i, regno_t, field_size;
18466 int max_reg = -1;
18467 int max_bit = -1;
18468 uint32_t mask;
18469 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18470 = {-1, -1, -1, -1};
18471
18472 /* To compute the padding bits in a union we only consider bits as
18473 padding bits if they are always either a padding bit or fall outside a
18474 fields size for all fields in the union. */
18475 field = TYPE_FIELDS (arg_type);
18476 while (field)
18477 {
18478 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18479 = {0U, 0U, 0U, 0U};
18480 int last_used_bit_t = *last_used_bit;
18481 regno_t = *regno;
18482 field_t = TREE_TYPE (field);
18483
18484 /* If the field's type is either a record or a union make sure to
18485 compute their padding bits too. */
18486 if (RECORD_OR_UNION_TYPE_P (field_t))
18487 not_to_clear_reg_mask
18488 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18489 &padding_bits_to_clear_t[0],
18490 starting_bit, &last_used_bit_t);
18491 else
18492 {
18493 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18494 regno_t = (field_size / 32) + *regno;
18495 last_used_bit_t = (starting_bit + field_size) % 32;
18496 }
18497
18498 for (i = *regno; i < regno_t; i++)
18499 {
18500 /* For all but the last register used by this field only keep the
18501 padding bits that were padding bits in this field. */
18502 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18503 }
18504
18505 /* For the last register, keep all padding bits that were padding
18506 bits in this field and any padding bits that are still valid
18507 as padding bits but fall outside of this field's size. */
18508 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18509 padding_bits_to_clear_res[regno_t]
18510 &= padding_bits_to_clear_t[regno_t] | mask;
18511
18512 /* Update the maximum size of the fields in terms of registers used
18513 ('max_reg') and the 'last_used_bit' in said register. */
18514 if (max_reg < regno_t)
18515 {
18516 max_reg = regno_t;
18517 max_bit = last_used_bit_t;
18518 }
18519 else if (max_reg == regno_t && max_bit < last_used_bit_t)
18520 max_bit = last_used_bit_t;
18521
18522 field = TREE_CHAIN (field);
18523 }
18524
18525 /* Update the current padding_bits_to_clear using the intersection of the
18526 padding bits of all the fields. */
18527 for (i=*regno; i < max_reg; i++)
18528 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18529
18530 /* Do not keep trailing padding bits, we do not know yet whether this
18531 is the end of the argument. */
18532 mask = ((uint32_t) 1 << max_bit) - 1;
18533 padding_bits_to_clear[max_reg]
18534 |= padding_bits_to_clear_res[max_reg] & mask;
18535
18536 *regno = max_reg;
18537 *last_used_bit = max_bit;
18538 }
18539 else
18540 /* This function should only be used for structs and unions. */
18541 gcc_unreachable ();
18542
18543 return not_to_clear_reg_mask;
18544 }
18545
18546 /* In the context of ARMv8-M Security Extensions, this function is used for both
18547 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18548 registers are used when returning or passing arguments, which is then
18549 returned as a mask. It will also compute a mask to indicate padding/unused
18550 bits for each of these registers, and passes this through the
18551 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18552 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18553 the starting register used to pass this argument or return value is passed
18554 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18555 for struct and union types. */
18556
18557 static unsigned HOST_WIDE_INT
18558 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18559 uint32_t * padding_bits_to_clear)
18560
18561 {
18562 int last_used_bit = 0;
18563 unsigned HOST_WIDE_INT not_to_clear_mask;
18564
18565 if (RECORD_OR_UNION_TYPE_P (arg_type))
18566 {
18567 not_to_clear_mask
18568 = comp_not_to_clear_mask_str_un (arg_type, &regno,
18569 padding_bits_to_clear, 0,
18570 &last_used_bit);
18571
18572
18573 /* If the 'last_used_bit' is not zero, that means we are still using a
18574 part of the last 'regno'. In such cases we must clear the trailing
18575 bits. Otherwise we are not using regno and we should mark it as to
18576 clear. */
18577 if (last_used_bit != 0)
18578 padding_bits_to_clear[regno]
18579 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18580 else
18581 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18582 }
18583 else
18584 {
18585 not_to_clear_mask = 0;
18586 /* We are not dealing with structs nor unions. So these arguments may be
18587 passed in floating point registers too. In some cases a BLKmode is
18588 used when returning or passing arguments in multiple VFP registers. */
18589 if (GET_MODE (arg_rtx) == BLKmode)
18590 {
18591 int i, arg_regs;
18592 rtx reg;
18593
18594 /* This should really only occur when dealing with the hard-float
18595 ABI. */
18596 gcc_assert (TARGET_HARD_FLOAT_ABI);
18597
18598 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18599 {
18600 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18601 gcc_assert (REG_P (reg));
18602
18603 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18604
18605 /* If we are dealing with DF mode, make sure we don't
18606 clear either of the registers it addresses. */
18607 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18608 if (arg_regs > 1)
18609 {
18610 unsigned HOST_WIDE_INT mask;
18611 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18612 mask -= HOST_WIDE_INT_1U << REGNO (reg);
18613 not_to_clear_mask |= mask;
18614 }
18615 }
18616 }
18617 else
18618 {
18619 /* Otherwise we can rely on the MODE to determine how many registers
18620 are being used by this argument. */
18621 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18622 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18623 if (arg_regs > 1)
18624 {
18625 unsigned HOST_WIDE_INT
18626 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18627 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18628 not_to_clear_mask |= mask;
18629 }
18630 }
18631 }
18632
18633 return not_to_clear_mask;
18634 }
18635
18636 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18637 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18638 are to be fully cleared, using the value in register CLEARING_REG if more
18639 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18640 the bits that needs to be cleared in caller-saved core registers, with
18641 SCRATCH_REG used as a scratch register for that clearing.
18642
18643 NOTE: one of three following assertions must hold:
18644 - SCRATCH_REG is a low register
18645 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18646 in TO_CLEAR_BITMAP)
18647 - CLEARING_REG is a low register. */
18648
18649 static void
18650 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18651 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18652 {
18653 bool saved_clearing = false;
18654 rtx saved_clearing_reg = NULL_RTX;
18655 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18656
18657 gcc_assert (arm_arch_cmse);
18658
18659 if (!bitmap_empty_p (to_clear_bitmap))
18660 {
18661 minregno = bitmap_first_set_bit (to_clear_bitmap);
18662 maxregno = bitmap_last_set_bit (to_clear_bitmap);
18663 }
18664 clearing_regno = REGNO (clearing_reg);
18665
18666 /* Clear padding bits. */
18667 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18668 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18669 {
18670 uint64_t mask;
18671 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18672
18673 if (padding_bits_to_clear[i] == 0)
18674 continue;
18675
18676 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18677 CLEARING_REG as scratch. */
18678 if (TARGET_THUMB1
18679 && REGNO (scratch_reg) > LAST_LO_REGNUM)
18680 {
18681 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18682 such that we can use clearing_reg to clear the unused bits in the
18683 arguments. */
18684 if ((clearing_regno > maxregno
18685 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18686 && !saved_clearing)
18687 {
18688 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18689 emit_move_insn (scratch_reg, clearing_reg);
18690 saved_clearing = true;
18691 saved_clearing_reg = scratch_reg;
18692 }
18693 scratch_reg = clearing_reg;
18694 }
18695
18696 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18697 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18698 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18699
18700 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18701 mask = (~padding_bits_to_clear[i]) >> 16;
18702 rtx16 = gen_int_mode (16, SImode);
18703 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18704 if (mask)
18705 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18706
18707 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18708 }
18709 if (saved_clearing)
18710 emit_move_insn (clearing_reg, saved_clearing_reg);
18711
18712
18713 /* Clear full registers. */
18714
18715 if (TARGET_HAVE_FPCXT_CMSE)
18716 {
18717 rtvec vunspec_vec;
18718 int i, j, k, nb_regs;
18719 rtx use_seq, par, reg, set, vunspec;
18720 int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18721 auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18722 auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18723
18724 for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18725 {
18726 /* Find next register to clear and exit if none. */
18727 for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18728 if (i > maxregno)
18729 break;
18730
18731 /* Compute number of consecutive registers to clear. */
18732 for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18733 j++);
18734 nb_regs = j - i;
18735
18736 /* Create VSCCLRM RTX pattern. */
18737 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18738 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18739 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18740 VUNSPEC_VSCCLRM_VPR);
18741 XVECEXP (par, 0, 0) = vunspec;
18742
18743 /* Insert VFP register clearing RTX in the pattern. */
18744 start_sequence ();
18745 for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18746 {
18747 if (!bitmap_bit_p (to_clear_bitmap, j))
18748 continue;
18749
18750 reg = gen_rtx_REG (SFmode, j);
18751 set = gen_rtx_SET (reg, const0_rtx);
18752 XVECEXP (par, 0, k++) = set;
18753 emit_use (reg);
18754 }
18755 use_seq = get_insns ();
18756 end_sequence ();
18757
18758 emit_insn_after (use_seq, emit_insn (par));
18759 }
18760
18761 /* Get set of core registers to clear. */
18762 bitmap_clear (core_regs_bitmap);
18763 bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18764 IP_REGNUM - R0_REGNUM + 1);
18765 bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18766 core_regs_bitmap);
18767 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18768
18769 if (bitmap_empty_p (to_clear_core_bitmap))
18770 return;
18771
18772 /* Create clrm RTX pattern. */
18773 nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18774 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18775
18776 /* Insert core register clearing RTX in the pattern. */
18777 start_sequence ();
18778 for (j = 0, i = minregno; j < nb_regs; i++)
18779 {
18780 if (!bitmap_bit_p (to_clear_core_bitmap, i))
18781 continue;
18782
18783 reg = gen_rtx_REG (SImode, i);
18784 set = gen_rtx_SET (reg, const0_rtx);
18785 XVECEXP (par, 0, j++) = set;
18786 emit_use (reg);
18787 }
18788
18789 /* Insert APSR register clearing RTX in the pattern
18790 * along with clobbering CC. */
18791 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18792 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18793 VUNSPEC_CLRM_APSR);
18794
18795 XVECEXP (par, 0, j++) = vunspec;
18796
18797 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18798 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18799 XVECEXP (par, 0, j) = clobber;
18800
18801 use_seq = get_insns ();
18802 end_sequence ();
18803
18804 emit_insn_after (use_seq, emit_insn (par));
18805 }
18806 else
18807 {
18808 /* If not marked for clearing, clearing_reg already does not contain
18809 any secret. */
18810 if (clearing_regno <= maxregno
18811 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18812 {
18813 emit_move_insn (clearing_reg, const0_rtx);
18814 emit_use (clearing_reg);
18815 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18816 }
18817
18818 for (regno = minregno; regno <= maxregno; regno++)
18819 {
18820 if (!bitmap_bit_p (to_clear_bitmap, regno))
18821 continue;
18822
18823 if (IS_VFP_REGNUM (regno))
18824 {
18825 /* If regno is an even vfp register and its successor is also to
18826 be cleared, use vmov. */
18827 if (TARGET_VFP_DOUBLE
18828 && VFP_REGNO_OK_FOR_DOUBLE (regno)
18829 && bitmap_bit_p (to_clear_bitmap, regno + 1))
18830 {
18831 emit_move_insn (gen_rtx_REG (DFmode, regno),
18832 CONST1_RTX (DFmode));
18833 emit_use (gen_rtx_REG (DFmode, regno));
18834 regno++;
18835 }
18836 else
18837 {
18838 emit_move_insn (gen_rtx_REG (SFmode, regno),
18839 CONST1_RTX (SFmode));
18840 emit_use (gen_rtx_REG (SFmode, regno));
18841 }
18842 }
18843 else
18844 {
18845 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18846 emit_use (gen_rtx_REG (SImode, regno));
18847 }
18848 }
18849 }
18850 }
18851
18852 /* Clear core and caller-saved VFP registers not used to pass arguments before
18853 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
18854 registers is done in the __gnu_cmse_nonsecure_call libcall. See
18855 libgcc/config/arm/cmse_nonsecure_call.S. */
18856
18857 static void
18858 cmse_nonsecure_call_inline_register_clear (void)
18859 {
18860 basic_block bb;
18861
18862 FOR_EACH_BB_FN (bb, cfun)
18863 {
18864 rtx_insn *insn;
18865
18866 FOR_BB_INSNS (bb, insn)
18867 {
18868 bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18869 /* frame = VFP regs + FPSCR + VPR. */
18870 unsigned lazy_store_stack_frame_size
18871 = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18872 unsigned long callee_saved_mask
18873 = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18874 & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18875 unsigned address_regnum, regno;
18876 unsigned max_int_regno
18877 = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
18878 unsigned max_fp_regno
18879 = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
18880 unsigned maxregno
18881 = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
18882 auto_sbitmap to_clear_bitmap (maxregno + 1);
18883 rtx_insn *seq;
18884 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
18885 rtx address;
18886 CUMULATIVE_ARGS args_so_far_v;
18887 cumulative_args_t args_so_far;
18888 tree arg_type, fntype;
18889 bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
18890 function_args_iterator args_iter;
18891 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
18892
18893 if (!NONDEBUG_INSN_P (insn))
18894 continue;
18895
18896 if (!CALL_P (insn))
18897 continue;
18898
18899 pat = PATTERN (insn);
18900 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
18901 call = XVECEXP (pat, 0, 0);
18902
18903 /* Get the real call RTX if the insn sets a value, ie. returns. */
18904 if (GET_CODE (call) == SET)
18905 call = SET_SRC (call);
18906
18907 /* Check if it is a cmse_nonsecure_call. */
18908 unspec = XEXP (call, 0);
18909 if (GET_CODE (unspec) != UNSPEC
18910 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
18911 continue;
18912
18913 /* Mark registers that needs to be cleared. Those that holds a
18914 parameter are removed from the set further below. */
18915 bitmap_clear (to_clear_bitmap);
18916 bitmap_set_range (to_clear_bitmap, R0_REGNUM,
18917 max_int_regno - R0_REGNUM + 1);
18918
18919 /* Only look at the caller-saved floating point registers in case of
18920 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
18921 lazy store and loads which clear both caller- and callee-saved
18922 registers. */
18923 if (!lazy_fpclear)
18924 {
18925 auto_sbitmap float_bitmap (maxregno + 1);
18926
18927 bitmap_clear (float_bitmap);
18928 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
18929 max_fp_regno - FIRST_VFP_REGNUM + 1);
18930 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
18931 }
18932
18933 /* Make sure the register used to hold the function address is not
18934 cleared. */
18935 address = RTVEC_ELT (XVEC (unspec, 0), 0);
18936 gcc_assert (MEM_P (address));
18937 gcc_assert (REG_P (XEXP (address, 0)));
18938 address_regnum = REGNO (XEXP (address, 0));
18939 if (address_regnum <= max_int_regno)
18940 bitmap_clear_bit (to_clear_bitmap, address_regnum);
18941
18942 /* Set basic block of call insn so that df rescan is performed on
18943 insns inserted here. */
18944 set_block_for_insn (insn, bb);
18945 df_set_flags (DF_DEFER_INSN_RESCAN);
18946 start_sequence ();
18947
18948 /* Make sure the scheduler doesn't schedule other insns beyond
18949 here. */
18950 emit_insn (gen_blockage ());
18951
18952 /* Walk through all arguments and clear registers appropriately.
18953 */
18954 fntype = TREE_TYPE (MEM_EXPR (address));
18955 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
18956 NULL_TREE);
18957 args_so_far = pack_cumulative_args (&args_so_far_v);
18958 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
18959 {
18960 rtx arg_rtx;
18961 uint64_t to_clear_args_mask;
18962
18963 if (VOID_TYPE_P (arg_type))
18964 continue;
18965
18966 function_arg_info arg (arg_type, /*named=*/true);
18967 if (!first_param)
18968 /* ??? We should advance after processing the argument and pass
18969 the argument we're advancing past. */
18970 arm_function_arg_advance (args_so_far, arg);
18971
18972 arg_rtx = arm_function_arg (args_so_far, arg);
18973 gcc_assert (REG_P (arg_rtx));
18974 to_clear_args_mask
18975 = compute_not_to_clear_mask (arg_type, arg_rtx,
18976 REGNO (arg_rtx),
18977 &padding_bits_to_clear[0]);
18978 if (to_clear_args_mask)
18979 {
18980 for (regno = R0_REGNUM; regno <= maxregno; regno++)
18981 {
18982 if (to_clear_args_mask & (1ULL << regno))
18983 bitmap_clear_bit (to_clear_bitmap, regno);
18984 }
18985 }
18986
18987 first_param = false;
18988 }
18989
18990 /* We use right shift and left shift to clear the LSB of the address
18991 we jump to instead of using bic, to avoid having to use an extra
18992 register on Thumb-1. */
18993 clearing_reg = XEXP (address, 0);
18994 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
18995 emit_insn (gen_rtx_SET (clearing_reg, shift));
18996 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
18997 emit_insn (gen_rtx_SET (clearing_reg, shift));
18998
18999 if (clear_callee_saved)
19000 {
19001 rtx push_insn =
19002 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
19003 /* Disable frame debug info in push because it needs to be
19004 disabled for pop (see below). */
19005 RTX_FRAME_RELATED_P (push_insn) = 0;
19006
19007 /* Lazy store multiple. */
19008 if (lazy_fpclear)
19009 {
19010 rtx imm;
19011 rtx_insn *add_insn;
19012
19013 imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
19014 add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19015 stack_pointer_rtx, imm));
19016 /* If we have the frame pointer, then it will be the
19017 CFA reg. Otherwise, the stack pointer is the CFA
19018 reg, so we need to emit a CFA adjust. */
19019 if (!frame_pointer_needed)
19020 arm_add_cfa_adjust_cfa_note (add_insn,
19021 - lazy_store_stack_frame_size,
19022 stack_pointer_rtx,
19023 stack_pointer_rtx);
19024 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
19025 }
19026 /* Save VFP callee-saved registers. */
19027 else
19028 {
19029 vfp_emit_fstmd (D7_VFP_REGNUM + 1,
19030 (max_fp_regno - D7_VFP_REGNUM) / 2);
19031 /* Disable frame debug info in push because it needs to be
19032 disabled for vpop (see below). */
19033 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19034 }
19035 }
19036
19037 /* Clear caller-saved registers that leak before doing a non-secure
19038 call. */
19039 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
19040 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
19041 NUM_ARG_REGS, ip_reg, clearing_reg);
19042
19043 seq = get_insns ();
19044 end_sequence ();
19045 emit_insn_before (seq, insn);
19046
19047 if (TARGET_HAVE_FPCXT_CMSE)
19048 {
19049 rtx_insn *last, *pop_insn, *after = insn;
19050
19051 start_sequence ();
19052
19053 /* Lazy load multiple done as part of libcall in Armv8-M. */
19054 if (lazy_fpclear)
19055 {
19056 rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
19057 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
19058 rtx_insn *add_insn =
19059 emit_insn (gen_addsi3 (stack_pointer_rtx,
19060 stack_pointer_rtx, imm));
19061 if (!frame_pointer_needed)
19062 arm_add_cfa_adjust_cfa_note (add_insn,
19063 lazy_store_stack_frame_size,
19064 stack_pointer_rtx,
19065 stack_pointer_rtx);
19066 }
19067 /* Restore VFP callee-saved registers. */
19068 else
19069 {
19070 int nb_callee_saved_vfp_regs =
19071 (max_fp_regno - D7_VFP_REGNUM) / 2;
19072 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
19073 nb_callee_saved_vfp_regs,
19074 stack_pointer_rtx);
19075 /* Disable frame debug info in vpop because the SP adjustment
19076 is made using a CFA adjustment note while CFA used is
19077 sometimes R7. This then causes an assert failure in the
19078 CFI note creation code. */
19079 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19080 }
19081
19082 arm_emit_multi_reg_pop (callee_saved_mask);
19083 pop_insn = get_last_insn ();
19084
19085 /* Disable frame debug info in pop because they reset the state
19086 of popped registers to what it was at the beginning of the
19087 function, before the prologue. This leads to incorrect state
19088 when doing the pop after the nonsecure call for registers that
19089 are pushed both in prologue and before the nonsecure call.
19090
19091 It also occasionally triggers an assert failure in CFI note
19092 creation code when there are two codepaths to the epilogue,
19093 one of which does not go through the nonsecure call.
19094 Obviously this mean that debugging between the push and pop is
19095 not reliable. */
19096 RTX_FRAME_RELATED_P (pop_insn) = 0;
19097
19098 seq = get_insns ();
19099 last = get_last_insn ();
19100 end_sequence ();
19101
19102 emit_insn_after (seq, after);
19103
19104 /* Skip pop we have just inserted after nonsecure call, we know
19105 it does not contain a nonsecure call. */
19106 insn = last;
19107 }
19108 }
19109 }
19110 }
19111
19112 /* Rewrite move insn into subtract of 0 if the condition codes will
19113 be useful in next conditional jump insn. */
19114
19115 static void
19116 thumb1_reorg (void)
19117 {
19118 basic_block bb;
19119
19120 FOR_EACH_BB_FN (bb, cfun)
19121 {
19122 rtx dest, src;
19123 rtx cmp, op0, op1, set = NULL;
19124 rtx_insn *prev, *insn = BB_END (bb);
19125 bool insn_clobbered = false;
19126
19127 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
19128 insn = PREV_INSN (insn);
19129
19130 /* Find the last cbranchsi4_insn in basic block BB. */
19131 if (insn == BB_HEAD (bb)
19132 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19133 continue;
19134
19135 /* Get the register with which we are comparing. */
19136 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
19137 op0 = XEXP (cmp, 0);
19138 op1 = XEXP (cmp, 1);
19139
19140 /* Check that comparison is against ZERO. */
19141 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
19142 continue;
19143
19144 /* Find the first flag setting insn before INSN in basic block BB. */
19145 gcc_assert (insn != BB_HEAD (bb));
19146 for (prev = PREV_INSN (insn);
19147 (!insn_clobbered
19148 && prev != BB_HEAD (bb)
19149 && (NOTE_P (prev)
19150 || DEBUG_INSN_P (prev)
19151 || ((set = single_set (prev)) != NULL
19152 && get_attr_conds (prev) == CONDS_NOCOND)));
19153 prev = PREV_INSN (prev))
19154 {
19155 if (reg_set_p (op0, prev))
19156 insn_clobbered = true;
19157 }
19158
19159 /* Skip if op0 is clobbered by insn other than prev. */
19160 if (insn_clobbered)
19161 continue;
19162
19163 if (!set)
19164 continue;
19165
19166 dest = SET_DEST (set);
19167 src = SET_SRC (set);
19168 if (!low_register_operand (dest, SImode)
19169 || !low_register_operand (src, SImode))
19170 continue;
19171
19172 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19173 in INSN. Both src and dest of the move insn are checked. */
19174 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
19175 {
19176 dest = copy_rtx (dest);
19177 src = copy_rtx (src);
19178 src = gen_rtx_MINUS (SImode, src, const0_rtx);
19179 PATTERN (prev) = gen_rtx_SET (dest, src);
19180 INSN_CODE (prev) = -1;
19181 /* Set test register in INSN to dest. */
19182 XEXP (cmp, 0) = copy_rtx (dest);
19183 INSN_CODE (insn) = -1;
19184 }
19185 }
19186 }
19187
19188 /* Convert instructions to their cc-clobbering variant if possible, since
19189 that allows us to use smaller encodings. */
19190
19191 static void
19192 thumb2_reorg (void)
19193 {
19194 basic_block bb;
19195 regset_head live;
19196
19197 INIT_REG_SET (&live);
19198
19199 /* We are freeing block_for_insn in the toplev to keep compatibility
19200 with old MDEP_REORGS that are not CFG based. Recompute it now. */
19201 compute_bb_for_insn ();
19202 df_analyze ();
19203
19204 enum Convert_Action {SKIP, CONV, SWAP_CONV};
19205
19206 FOR_EACH_BB_FN (bb, cfun)
19207 {
19208 if ((current_tune->disparage_flag_setting_t16_encodings
19209 == tune_params::DISPARAGE_FLAGS_ALL)
19210 && optimize_bb_for_speed_p (bb))
19211 continue;
19212
19213 rtx_insn *insn;
19214 Convert_Action action = SKIP;
19215 Convert_Action action_for_partial_flag_setting
19216 = ((current_tune->disparage_flag_setting_t16_encodings
19217 != tune_params::DISPARAGE_FLAGS_NEITHER)
19218 && optimize_bb_for_speed_p (bb))
19219 ? SKIP : CONV;
19220
19221 COPY_REG_SET (&live, DF_LR_OUT (bb));
19222 df_simulate_initialize_backwards (bb, &live);
19223 FOR_BB_INSNS_REVERSE (bb, insn)
19224 {
19225 if (NONJUMP_INSN_P (insn)
19226 && !REGNO_REG_SET_P (&live, CC_REGNUM)
19227 && GET_CODE (PATTERN (insn)) == SET)
19228 {
19229 action = SKIP;
19230 rtx pat = PATTERN (insn);
19231 rtx dst = XEXP (pat, 0);
19232 rtx src = XEXP (pat, 1);
19233 rtx op0 = NULL_RTX, op1 = NULL_RTX;
19234
19235 if (UNARY_P (src) || BINARY_P (src))
19236 op0 = XEXP (src, 0);
19237
19238 if (BINARY_P (src))
19239 op1 = XEXP (src, 1);
19240
19241 if (low_register_operand (dst, SImode))
19242 {
19243 switch (GET_CODE (src))
19244 {
19245 case PLUS:
19246 /* Adding two registers and storing the result
19247 in the first source is already a 16-bit
19248 operation. */
19249 if (rtx_equal_p (dst, op0)
19250 && register_operand (op1, SImode))
19251 break;
19252
19253 if (low_register_operand (op0, SImode))
19254 {
19255 /* ADDS <Rd>,<Rn>,<Rm> */
19256 if (low_register_operand (op1, SImode))
19257 action = CONV;
19258 /* ADDS <Rdn>,#<imm8> */
19259 /* SUBS <Rdn>,#<imm8> */
19260 else if (rtx_equal_p (dst, op0)
19261 && CONST_INT_P (op1)
19262 && IN_RANGE (INTVAL (op1), -255, 255))
19263 action = CONV;
19264 /* ADDS <Rd>,<Rn>,#<imm3> */
19265 /* SUBS <Rd>,<Rn>,#<imm3> */
19266 else if (CONST_INT_P (op1)
19267 && IN_RANGE (INTVAL (op1), -7, 7))
19268 action = CONV;
19269 }
19270 /* ADCS <Rd>, <Rn> */
19271 else if (GET_CODE (XEXP (src, 0)) == PLUS
19272 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19273 && low_register_operand (XEXP (XEXP (src, 0), 1),
19274 SImode)
19275 && COMPARISON_P (op1)
19276 && cc_register (XEXP (op1, 0), VOIDmode)
19277 && maybe_get_arm_condition_code (op1) == ARM_CS
19278 && XEXP (op1, 1) == const0_rtx)
19279 action = CONV;
19280 break;
19281
19282 case MINUS:
19283 /* RSBS <Rd>,<Rn>,#0
19284 Not handled here: see NEG below. */
19285 /* SUBS <Rd>,<Rn>,#<imm3>
19286 SUBS <Rdn>,#<imm8>
19287 Not handled here: see PLUS above. */
19288 /* SUBS <Rd>,<Rn>,<Rm> */
19289 if (low_register_operand (op0, SImode)
19290 && low_register_operand (op1, SImode))
19291 action = CONV;
19292 break;
19293
19294 case MULT:
19295 /* MULS <Rdm>,<Rn>,<Rdm>
19296 As an exception to the rule, this is only used
19297 when optimizing for size since MULS is slow on all
19298 known implementations. We do not even want to use
19299 MULS in cold code, if optimizing for speed, so we
19300 test the global flag here. */
19301 if (!optimize_size)
19302 break;
19303 /* Fall through. */
19304 case AND:
19305 case IOR:
19306 case XOR:
19307 /* ANDS <Rdn>,<Rm> */
19308 if (rtx_equal_p (dst, op0)
19309 && low_register_operand (op1, SImode))
19310 action = action_for_partial_flag_setting;
19311 else if (rtx_equal_p (dst, op1)
19312 && low_register_operand (op0, SImode))
19313 action = action_for_partial_flag_setting == SKIP
19314 ? SKIP : SWAP_CONV;
19315 break;
19316
19317 case ASHIFTRT:
19318 case ASHIFT:
19319 case LSHIFTRT:
19320 /* ASRS <Rdn>,<Rm> */
19321 /* LSRS <Rdn>,<Rm> */
19322 /* LSLS <Rdn>,<Rm> */
19323 if (rtx_equal_p (dst, op0)
19324 && low_register_operand (op1, SImode))
19325 action = action_for_partial_flag_setting;
19326 /* ASRS <Rd>,<Rm>,#<imm5> */
19327 /* LSRS <Rd>,<Rm>,#<imm5> */
19328 /* LSLS <Rd>,<Rm>,#<imm5> */
19329 else if (low_register_operand (op0, SImode)
19330 && CONST_INT_P (op1)
19331 && IN_RANGE (INTVAL (op1), 0, 31))
19332 action = action_for_partial_flag_setting;
19333 break;
19334
19335 case ROTATERT:
19336 /* RORS <Rdn>,<Rm> */
19337 if (rtx_equal_p (dst, op0)
19338 && low_register_operand (op1, SImode))
19339 action = action_for_partial_flag_setting;
19340 break;
19341
19342 case NOT:
19343 /* MVNS <Rd>,<Rm> */
19344 if (low_register_operand (op0, SImode))
19345 action = action_for_partial_flag_setting;
19346 break;
19347
19348 case NEG:
19349 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19350 if (low_register_operand (op0, SImode))
19351 action = CONV;
19352 break;
19353
19354 case CONST_INT:
19355 /* MOVS <Rd>,#<imm8> */
19356 if (CONST_INT_P (src)
19357 && IN_RANGE (INTVAL (src), 0, 255))
19358 action = action_for_partial_flag_setting;
19359 break;
19360
19361 case REG:
19362 /* MOVS and MOV<c> with registers have different
19363 encodings, so are not relevant here. */
19364 break;
19365
19366 default:
19367 break;
19368 }
19369 }
19370
19371 if (action != SKIP)
19372 {
19373 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19374 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19375 rtvec vec;
19376
19377 if (action == SWAP_CONV)
19378 {
19379 src = copy_rtx (src);
19380 XEXP (src, 0) = op1;
19381 XEXP (src, 1) = op0;
19382 pat = gen_rtx_SET (dst, src);
19383 vec = gen_rtvec (2, pat, clobber);
19384 }
19385 else /* action == CONV */
19386 vec = gen_rtvec (2, pat, clobber);
19387
19388 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19389 INSN_CODE (insn) = -1;
19390 }
19391 }
19392
19393 if (NONDEBUG_INSN_P (insn))
19394 df_simulate_one_insn_backwards (bb, insn, &live);
19395 }
19396 }
19397
19398 CLEAR_REG_SET (&live);
19399 }
19400
19401 /* Gcc puts the pool in the wrong place for ARM, since we can only
19402 load addresses a limited distance around the pc. We do some
19403 special munging to move the constant pool values to the correct
19404 point in the code. */
19405 static void
19406 arm_reorg (void)
19407 {
19408 rtx_insn *insn;
19409 HOST_WIDE_INT address = 0;
19410 Mfix * fix;
19411
19412 if (use_cmse)
19413 cmse_nonsecure_call_inline_register_clear ();
19414
19415 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19416 if (cfun->is_thunk)
19417 ;
19418 else if (TARGET_THUMB1)
19419 thumb1_reorg ();
19420 else if (TARGET_THUMB2)
19421 thumb2_reorg ();
19422
19423 /* Ensure all insns that must be split have been split at this point.
19424 Otherwise, the pool placement code below may compute incorrect
19425 insn lengths. Note that when optimizing, all insns have already
19426 been split at this point. */
19427 if (!optimize)
19428 split_all_insns_noflow ();
19429
19430 /* Make sure we do not attempt to create a literal pool even though it should
19431 no longer be necessary to create any. */
19432 if (arm_disable_literal_pool)
19433 return ;
19434
19435 minipool_fix_head = minipool_fix_tail = NULL;
19436
19437 /* The first insn must always be a note, or the code below won't
19438 scan it properly. */
19439 insn = get_insns ();
19440 gcc_assert (NOTE_P (insn));
19441 minipool_pad = 0;
19442
19443 /* Scan all the insns and record the operands that will need fixing. */
19444 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19445 {
19446 if (BARRIER_P (insn))
19447 push_minipool_barrier (insn, address);
19448 else if (INSN_P (insn))
19449 {
19450 rtx_jump_table_data *table;
19451
19452 note_invalid_constants (insn, address, true);
19453 address += get_attr_length (insn);
19454
19455 /* If the insn is a vector jump, add the size of the table
19456 and skip the table. */
19457 if (tablejump_p (insn, NULL, &table))
19458 {
19459 address += get_jump_table_size (table);
19460 insn = table;
19461 }
19462 }
19463 else if (LABEL_P (insn))
19464 /* Add the worst-case padding due to alignment. We don't add
19465 the _current_ padding because the minipool insertions
19466 themselves might change it. */
19467 address += get_label_padding (insn);
19468 }
19469
19470 fix = minipool_fix_head;
19471
19472 /* Now scan the fixups and perform the required changes. */
19473 while (fix)
19474 {
19475 Mfix * ftmp;
19476 Mfix * fdel;
19477 Mfix * last_added_fix;
19478 Mfix * last_barrier = NULL;
19479 Mfix * this_fix;
19480
19481 /* Skip any further barriers before the next fix. */
19482 while (fix && BARRIER_P (fix->insn))
19483 fix = fix->next;
19484
19485 /* No more fixes. */
19486 if (fix == NULL)
19487 break;
19488
19489 last_added_fix = NULL;
19490
19491 for (ftmp = fix; ftmp; ftmp = ftmp->next)
19492 {
19493 if (BARRIER_P (ftmp->insn))
19494 {
19495 if (ftmp->address >= minipool_vector_head->max_address)
19496 break;
19497
19498 last_barrier = ftmp;
19499 }
19500 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19501 break;
19502
19503 last_added_fix = ftmp; /* Keep track of the last fix added. */
19504 }
19505
19506 /* If we found a barrier, drop back to that; any fixes that we
19507 could have reached but come after the barrier will now go in
19508 the next mini-pool. */
19509 if (last_barrier != NULL)
19510 {
19511 /* Reduce the refcount for those fixes that won't go into this
19512 pool after all. */
19513 for (fdel = last_barrier->next;
19514 fdel && fdel != ftmp;
19515 fdel = fdel->next)
19516 {
19517 fdel->minipool->refcount--;
19518 fdel->minipool = NULL;
19519 }
19520
19521 ftmp = last_barrier;
19522 }
19523 else
19524 {
19525 /* ftmp is first fix that we can't fit into this pool and
19526 there no natural barriers that we could use. Insert a
19527 new barrier in the code somewhere between the previous
19528 fix and this one, and arrange to jump around it. */
19529 HOST_WIDE_INT max_address;
19530
19531 /* The last item on the list of fixes must be a barrier, so
19532 we can never run off the end of the list of fixes without
19533 last_barrier being set. */
19534 gcc_assert (ftmp);
19535
19536 max_address = minipool_vector_head->max_address;
19537 /* Check that there isn't another fix that is in range that
19538 we couldn't fit into this pool because the pool was
19539 already too large: we need to put the pool before such an
19540 instruction. The pool itself may come just after the
19541 fix because create_fix_barrier also allows space for a
19542 jump instruction. */
19543 if (ftmp->address < max_address)
19544 max_address = ftmp->address + 1;
19545
19546 last_barrier = create_fix_barrier (last_added_fix, max_address);
19547 }
19548
19549 assign_minipool_offsets (last_barrier);
19550
19551 while (ftmp)
19552 {
19553 if (!BARRIER_P (ftmp->insn)
19554 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19555 == NULL))
19556 break;
19557
19558 ftmp = ftmp->next;
19559 }
19560
19561 /* Scan over the fixes we have identified for this pool, fixing them
19562 up and adding the constants to the pool itself. */
19563 for (this_fix = fix; this_fix && ftmp != this_fix;
19564 this_fix = this_fix->next)
19565 if (!BARRIER_P (this_fix->insn))
19566 {
19567 rtx addr
19568 = plus_constant (Pmode,
19569 gen_rtx_LABEL_REF (VOIDmode,
19570 minipool_vector_label),
19571 this_fix->minipool->offset);
19572 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19573 }
19574
19575 dump_minipool (last_barrier->insn);
19576 fix = ftmp;
19577 }
19578
19579 /* From now on we must synthesize any constants that we can't handle
19580 directly. This can happen if the RTL gets split during final
19581 instruction generation. */
19582 cfun->machine->after_arm_reorg = 1;
19583
19584 /* Free the minipool memory. */
19585 obstack_free (&minipool_obstack, minipool_startobj);
19586 }
19587 \f
19588 /* Routines to output assembly language. */
19589
19590 /* Return string representation of passed in real value. */
19591 static const char *
19592 fp_const_from_val (REAL_VALUE_TYPE *r)
19593 {
19594 if (!fp_consts_inited)
19595 init_fp_table ();
19596
19597 gcc_assert (real_equal (r, &value_fp0));
19598 return "0";
19599 }
19600
19601 /* OPERANDS[0] is the entire list of insns that constitute pop,
19602 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19603 is in the list, UPDATE is true iff the list contains explicit
19604 update of base register. */
19605 void
19606 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19607 bool update)
19608 {
19609 int i;
19610 char pattern[100];
19611 int offset;
19612 const char *conditional;
19613 int num_saves = XVECLEN (operands[0], 0);
19614 unsigned int regno;
19615 unsigned int regno_base = REGNO (operands[1]);
19616 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19617
19618 offset = 0;
19619 offset += update ? 1 : 0;
19620 offset += return_pc ? 1 : 0;
19621
19622 /* Is the base register in the list? */
19623 for (i = offset; i < num_saves; i++)
19624 {
19625 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19626 /* If SP is in the list, then the base register must be SP. */
19627 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19628 /* If base register is in the list, there must be no explicit update. */
19629 if (regno == regno_base)
19630 gcc_assert (!update);
19631 }
19632
19633 conditional = reverse ? "%?%D0" : "%?%d0";
19634 /* Can't use POP if returning from an interrupt. */
19635 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19636 sprintf (pattern, "pop%s\t{", conditional);
19637 else
19638 {
19639 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19640 It's just a convention, their semantics are identical. */
19641 if (regno_base == SP_REGNUM)
19642 sprintf (pattern, "ldmfd%s\t", conditional);
19643 else if (update)
19644 sprintf (pattern, "ldmia%s\t", conditional);
19645 else
19646 sprintf (pattern, "ldm%s\t", conditional);
19647
19648 strcat (pattern, reg_names[regno_base]);
19649 if (update)
19650 strcat (pattern, "!, {");
19651 else
19652 strcat (pattern, ", {");
19653 }
19654
19655 /* Output the first destination register. */
19656 strcat (pattern,
19657 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19658
19659 /* Output the rest of the destination registers. */
19660 for (i = offset + 1; i < num_saves; i++)
19661 {
19662 strcat (pattern, ", ");
19663 strcat (pattern,
19664 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19665 }
19666
19667 strcat (pattern, "}");
19668
19669 if (interrupt_p && return_pc)
19670 strcat (pattern, "^");
19671
19672 output_asm_insn (pattern, &cond);
19673 }
19674
19675
19676 /* Output the assembly for a store multiple. */
19677
19678 const char *
19679 vfp_output_vstmd (rtx * operands)
19680 {
19681 char pattern[100];
19682 int p;
19683 int base;
19684 int i;
19685 rtx addr_reg = REG_P (XEXP (operands[0], 0))
19686 ? XEXP (operands[0], 0)
19687 : XEXP (XEXP (operands[0], 0), 0);
19688 bool push_p = REGNO (addr_reg) == SP_REGNUM;
19689
19690 if (push_p)
19691 strcpy (pattern, "vpush%?.64\t{%P1");
19692 else
19693 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19694
19695 p = strlen (pattern);
19696
19697 gcc_assert (REG_P (operands[1]));
19698
19699 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19700 for (i = 1; i < XVECLEN (operands[2], 0); i++)
19701 {
19702 p += sprintf (&pattern[p], ", d%d", base + i);
19703 }
19704 strcpy (&pattern[p], "}");
19705
19706 output_asm_insn (pattern, operands);
19707 return "";
19708 }
19709
19710
19711 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19712 number of bytes pushed. */
19713
19714 static int
19715 vfp_emit_fstmd (int base_reg, int count)
19716 {
19717 rtx par;
19718 rtx dwarf;
19719 rtx tmp, reg;
19720 int i;
19721
19722 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19723 register pairs are stored by a store multiple insn. We avoid this
19724 by pushing an extra pair. */
19725 if (count == 2 && !arm_arch6)
19726 {
19727 if (base_reg == LAST_VFP_REGNUM - 3)
19728 base_reg -= 2;
19729 count++;
19730 }
19731
19732 /* FSTMD may not store more than 16 doubleword registers at once. Split
19733 larger stores into multiple parts (up to a maximum of two, in
19734 practice). */
19735 if (count > 16)
19736 {
19737 int saved;
19738 /* NOTE: base_reg is an internal register number, so each D register
19739 counts as 2. */
19740 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19741 saved += vfp_emit_fstmd (base_reg, 16);
19742 return saved;
19743 }
19744
19745 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19746 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19747
19748 reg = gen_rtx_REG (DFmode, base_reg);
19749 base_reg += 2;
19750
19751 XVECEXP (par, 0, 0)
19752 = gen_rtx_SET (gen_frame_mem
19753 (BLKmode,
19754 gen_rtx_PRE_MODIFY (Pmode,
19755 stack_pointer_rtx,
19756 plus_constant
19757 (Pmode, stack_pointer_rtx,
19758 - (count * 8)))
19759 ),
19760 gen_rtx_UNSPEC (BLKmode,
19761 gen_rtvec (1, reg),
19762 UNSPEC_PUSH_MULT));
19763
19764 tmp = gen_rtx_SET (stack_pointer_rtx,
19765 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19766 RTX_FRAME_RELATED_P (tmp) = 1;
19767 XVECEXP (dwarf, 0, 0) = tmp;
19768
19769 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19770 RTX_FRAME_RELATED_P (tmp) = 1;
19771 XVECEXP (dwarf, 0, 1) = tmp;
19772
19773 for (i = 1; i < count; i++)
19774 {
19775 reg = gen_rtx_REG (DFmode, base_reg);
19776 base_reg += 2;
19777 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19778
19779 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19780 plus_constant (Pmode,
19781 stack_pointer_rtx,
19782 i * 8)),
19783 reg);
19784 RTX_FRAME_RELATED_P (tmp) = 1;
19785 XVECEXP (dwarf, 0, i + 1) = tmp;
19786 }
19787
19788 par = emit_insn (par);
19789 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19790 RTX_FRAME_RELATED_P (par) = 1;
19791
19792 return count * 8;
19793 }
19794
19795 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19796 has the cmse_nonsecure_call attribute and returns false otherwise. */
19797
19798 bool
19799 detect_cmse_nonsecure_call (tree addr)
19800 {
19801 if (!addr)
19802 return FALSE;
19803
19804 tree fntype = TREE_TYPE (addr);
19805 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19806 TYPE_ATTRIBUTES (fntype)))
19807 return TRUE;
19808 return FALSE;
19809 }
19810
19811
19812 /* Emit a call instruction with pattern PAT. ADDR is the address of
19813 the call target. */
19814
19815 void
19816 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19817 {
19818 rtx insn;
19819
19820 insn = emit_call_insn (pat);
19821
19822 /* The PIC register is live on entry to VxWorks PIC PLT entries.
19823 If the call might use such an entry, add a use of the PIC register
19824 to the instruction's CALL_INSN_FUNCTION_USAGE. */
19825 if (TARGET_VXWORKS_RTP
19826 && flag_pic
19827 && !sibcall
19828 && SYMBOL_REF_P (addr)
19829 && (SYMBOL_REF_DECL (addr)
19830 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19831 : !SYMBOL_REF_LOCAL_P (addr)))
19832 {
19833 require_pic_register (NULL_RTX, false /*compute_now*/);
19834 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19835 }
19836
19837 if (TARGET_FDPIC)
19838 {
19839 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19840 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19841 }
19842
19843 if (TARGET_AAPCS_BASED)
19844 {
19845 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19846 linker. We need to add an IP clobber to allow setting
19847 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
19848 is not needed since it's a fixed register. */
19849 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19850 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19851 }
19852 }
19853
19854 /* Output a 'call' insn. */
19855 const char *
19856 output_call (rtx *operands)
19857 {
19858 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
19859
19860 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
19861 if (REGNO (operands[0]) == LR_REGNUM)
19862 {
19863 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19864 output_asm_insn ("mov%?\t%0, %|lr", operands);
19865 }
19866
19867 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19868
19869 if (TARGET_INTERWORK || arm_arch4t)
19870 output_asm_insn ("bx%?\t%0", operands);
19871 else
19872 output_asm_insn ("mov%?\t%|pc, %0", operands);
19873
19874 return "";
19875 }
19876
19877 /* Output a move from arm registers to arm registers of a long double
19878 OPERANDS[0] is the destination.
19879 OPERANDS[1] is the source. */
19880 const char *
19881 output_mov_long_double_arm_from_arm (rtx *operands)
19882 {
19883 /* We have to be careful here because the two might overlap. */
19884 int dest_start = REGNO (operands[0]);
19885 int src_start = REGNO (operands[1]);
19886 rtx ops[2];
19887 int i;
19888
19889 if (dest_start < src_start)
19890 {
19891 for (i = 0; i < 3; i++)
19892 {
19893 ops[0] = gen_rtx_REG (SImode, dest_start + i);
19894 ops[1] = gen_rtx_REG (SImode, src_start + i);
19895 output_asm_insn ("mov%?\t%0, %1", ops);
19896 }
19897 }
19898 else
19899 {
19900 for (i = 2; i >= 0; i--)
19901 {
19902 ops[0] = gen_rtx_REG (SImode, dest_start + i);
19903 ops[1] = gen_rtx_REG (SImode, src_start + i);
19904 output_asm_insn ("mov%?\t%0, %1", ops);
19905 }
19906 }
19907
19908 return "";
19909 }
19910
19911 void
19912 arm_emit_movpair (rtx dest, rtx src)
19913 {
19914 /* If the src is an immediate, simplify it. */
19915 if (CONST_INT_P (src))
19916 {
19917 HOST_WIDE_INT val = INTVAL (src);
19918 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
19919 if ((val >> 16) & 0x0000ffff)
19920 {
19921 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
19922 GEN_INT (16)),
19923 GEN_INT ((val >> 16) & 0x0000ffff));
19924 rtx_insn *insn = get_last_insn ();
19925 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19926 }
19927 return;
19928 }
19929 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
19930 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
19931 rtx_insn *insn = get_last_insn ();
19932 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19933 }
19934
19935 /* Output a move between double words. It must be REG<-MEM
19936 or MEM<-REG. */
19937 const char *
19938 output_move_double (rtx *operands, bool emit, int *count)
19939 {
19940 enum rtx_code code0 = GET_CODE (operands[0]);
19941 enum rtx_code code1 = GET_CODE (operands[1]);
19942 rtx otherops[3];
19943 if (count)
19944 *count = 1;
19945
19946 /* The only case when this might happen is when
19947 you are looking at the length of a DImode instruction
19948 that has an invalid constant in it. */
19949 if (code0 == REG && code1 != MEM)
19950 {
19951 gcc_assert (!emit);
19952 *count = 2;
19953 return "";
19954 }
19955
19956 if (code0 == REG)
19957 {
19958 unsigned int reg0 = REGNO (operands[0]);
19959 const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
19960
19961 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
19962
19963 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
19964
19965 switch (GET_CODE (XEXP (operands[1], 0)))
19966 {
19967 case REG:
19968
19969 if (emit)
19970 {
19971 if (can_ldrd
19972 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
19973 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
19974 else
19975 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
19976 }
19977 break;
19978
19979 case PRE_INC:
19980 gcc_assert (can_ldrd);
19981 if (emit)
19982 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
19983 break;
19984
19985 case PRE_DEC:
19986 if (emit)
19987 {
19988 if (can_ldrd)
19989 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
19990 else
19991 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
19992 }
19993 break;
19994
19995 case POST_INC:
19996 if (emit)
19997 {
19998 if (can_ldrd)
19999 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
20000 else
20001 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
20002 }
20003 break;
20004
20005 case POST_DEC:
20006 gcc_assert (can_ldrd);
20007 if (emit)
20008 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
20009 break;
20010
20011 case PRE_MODIFY:
20012 case POST_MODIFY:
20013 /* Autoicrement addressing modes should never have overlapping
20014 base and destination registers, and overlapping index registers
20015 are already prohibited, so this doesn't need to worry about
20016 fix_cm3_ldrd. */
20017 otherops[0] = operands[0];
20018 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
20019 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
20020
20021 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
20022 {
20023 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
20024 {
20025 /* Registers overlap so split out the increment. */
20026 if (emit)
20027 {
20028 gcc_assert (can_ldrd);
20029 output_asm_insn ("add%?\t%1, %1, %2", otherops);
20030 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
20031 }
20032 if (count)
20033 *count = 2;
20034 }
20035 else
20036 {
20037 /* Use a single insn if we can.
20038 FIXME: IWMMXT allows offsets larger than ldrd can
20039 handle, fix these up with a pair of ldr. */
20040 if (can_ldrd
20041 && (TARGET_THUMB2
20042 || !CONST_INT_P (otherops[2])
20043 || (INTVAL (otherops[2]) > -256
20044 && INTVAL (otherops[2]) < 256)))
20045 {
20046 if (emit)
20047 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
20048 }
20049 else
20050 {
20051 if (emit)
20052 {
20053 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
20054 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20055 }
20056 if (count)
20057 *count = 2;
20058
20059 }
20060 }
20061 }
20062 else
20063 {
20064 /* Use a single insn if we can.
20065 FIXME: IWMMXT allows offsets larger than ldrd can handle,
20066 fix these up with a pair of ldr. */
20067 if (can_ldrd
20068 && (TARGET_THUMB2
20069 || !CONST_INT_P (otherops[2])
20070 || (INTVAL (otherops[2]) > -256
20071 && INTVAL (otherops[2]) < 256)))
20072 {
20073 if (emit)
20074 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
20075 }
20076 else
20077 {
20078 if (emit)
20079 {
20080 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20081 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
20082 }
20083 if (count)
20084 *count = 2;
20085 }
20086 }
20087 break;
20088
20089 case LABEL_REF:
20090 case CONST:
20091 /* We might be able to use ldrd %0, %1 here. However the range is
20092 different to ldr/adr, and it is broken on some ARMv7-M
20093 implementations. */
20094 /* Use the second register of the pair to avoid problematic
20095 overlap. */
20096 otherops[1] = operands[1];
20097 if (emit)
20098 output_asm_insn ("adr%?\t%0, %1", otherops);
20099 operands[1] = otherops[0];
20100 if (emit)
20101 {
20102 if (can_ldrd)
20103 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20104 else
20105 output_asm_insn ("ldmia%?\t%1, %M0", operands);
20106 }
20107
20108 if (count)
20109 *count = 2;
20110 break;
20111
20112 /* ??? This needs checking for thumb2. */
20113 default:
20114 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
20115 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
20116 {
20117 otherops[0] = operands[0];
20118 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
20119 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
20120
20121 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
20122 {
20123 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20124 {
20125 switch ((int) INTVAL (otherops[2]))
20126 {
20127 case -8:
20128 if (emit)
20129 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
20130 return "";
20131 case -4:
20132 if (TARGET_THUMB2)
20133 break;
20134 if (emit)
20135 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
20136 return "";
20137 case 4:
20138 if (TARGET_THUMB2)
20139 break;
20140 if (emit)
20141 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
20142 return "";
20143 }
20144 }
20145 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
20146 operands[1] = otherops[0];
20147 if (can_ldrd
20148 && (REG_P (otherops[2])
20149 || TARGET_THUMB2
20150 || (CONST_INT_P (otherops[2])
20151 && INTVAL (otherops[2]) > -256
20152 && INTVAL (otherops[2]) < 256)))
20153 {
20154 if (reg_overlap_mentioned_p (operands[0],
20155 otherops[2]))
20156 {
20157 /* Swap base and index registers over to
20158 avoid a conflict. */
20159 std::swap (otherops[1], otherops[2]);
20160 }
20161 /* If both registers conflict, it will usually
20162 have been fixed by a splitter. */
20163 if (reg_overlap_mentioned_p (operands[0], otherops[2])
20164 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
20165 {
20166 if (emit)
20167 {
20168 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20169 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20170 }
20171 if (count)
20172 *count = 2;
20173 }
20174 else
20175 {
20176 otherops[0] = operands[0];
20177 if (emit)
20178 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
20179 }
20180 return "";
20181 }
20182
20183 if (CONST_INT_P (otherops[2]))
20184 {
20185 if (emit)
20186 {
20187 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
20188 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
20189 else
20190 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20191 }
20192 }
20193 else
20194 {
20195 if (emit)
20196 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20197 }
20198 }
20199 else
20200 {
20201 if (emit)
20202 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
20203 }
20204
20205 if (count)
20206 *count = 2;
20207
20208 if (can_ldrd)
20209 return "ldrd%?\t%0, [%1]";
20210
20211 return "ldmia%?\t%1, %M0";
20212 }
20213 else
20214 {
20215 otherops[1] = adjust_address (operands[1], SImode, 4);
20216 /* Take care of overlapping base/data reg. */
20217 if (reg_mentioned_p (operands[0], operands[1]))
20218 {
20219 if (emit)
20220 {
20221 output_asm_insn ("ldr%?\t%0, %1", otherops);
20222 output_asm_insn ("ldr%?\t%0, %1", operands);
20223 }
20224 if (count)
20225 *count = 2;
20226
20227 }
20228 else
20229 {
20230 if (emit)
20231 {
20232 output_asm_insn ("ldr%?\t%0, %1", operands);
20233 output_asm_insn ("ldr%?\t%0, %1", otherops);
20234 }
20235 if (count)
20236 *count = 2;
20237 }
20238 }
20239 }
20240 }
20241 else
20242 {
20243 /* Constraints should ensure this. */
20244 gcc_assert (code0 == MEM && code1 == REG);
20245 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20246 || (TARGET_ARM && TARGET_LDRD));
20247
20248 /* For TARGET_ARM the first source register of an STRD
20249 must be even. This is usually the case for double-word
20250 values but user assembly constraints can force an odd
20251 starting register. */
20252 bool allow_strd = TARGET_LDRD
20253 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20254 switch (GET_CODE (XEXP (operands[0], 0)))
20255 {
20256 case REG:
20257 if (emit)
20258 {
20259 if (allow_strd)
20260 output_asm_insn ("strd%?\t%1, [%m0]", operands);
20261 else
20262 output_asm_insn ("stm%?\t%m0, %M1", operands);
20263 }
20264 break;
20265
20266 case PRE_INC:
20267 gcc_assert (allow_strd);
20268 if (emit)
20269 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20270 break;
20271
20272 case PRE_DEC:
20273 if (emit)
20274 {
20275 if (allow_strd)
20276 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20277 else
20278 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20279 }
20280 break;
20281
20282 case POST_INC:
20283 if (emit)
20284 {
20285 if (allow_strd)
20286 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20287 else
20288 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20289 }
20290 break;
20291
20292 case POST_DEC:
20293 gcc_assert (allow_strd);
20294 if (emit)
20295 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20296 break;
20297
20298 case PRE_MODIFY:
20299 case POST_MODIFY:
20300 otherops[0] = operands[1];
20301 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20302 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20303
20304 /* IWMMXT allows offsets larger than strd can handle,
20305 fix these up with a pair of str. */
20306 if (!TARGET_THUMB2
20307 && CONST_INT_P (otherops[2])
20308 && (INTVAL(otherops[2]) <= -256
20309 || INTVAL(otherops[2]) >= 256))
20310 {
20311 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20312 {
20313 if (emit)
20314 {
20315 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20316 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20317 }
20318 if (count)
20319 *count = 2;
20320 }
20321 else
20322 {
20323 if (emit)
20324 {
20325 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20326 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20327 }
20328 if (count)
20329 *count = 2;
20330 }
20331 }
20332 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20333 {
20334 if (emit)
20335 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20336 }
20337 else
20338 {
20339 if (emit)
20340 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20341 }
20342 break;
20343
20344 case PLUS:
20345 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20346 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20347 {
20348 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20349 {
20350 case -8:
20351 if (emit)
20352 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20353 return "";
20354
20355 case -4:
20356 if (TARGET_THUMB2)
20357 break;
20358 if (emit)
20359 output_asm_insn ("stmda%?\t%m0, %M1", operands);
20360 return "";
20361
20362 case 4:
20363 if (TARGET_THUMB2)
20364 break;
20365 if (emit)
20366 output_asm_insn ("stmib%?\t%m0, %M1", operands);
20367 return "";
20368 }
20369 }
20370 if (allow_strd
20371 && (REG_P (otherops[2])
20372 || TARGET_THUMB2
20373 || (CONST_INT_P (otherops[2])
20374 && INTVAL (otherops[2]) > -256
20375 && INTVAL (otherops[2]) < 256)))
20376 {
20377 otherops[0] = operands[1];
20378 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20379 if (emit)
20380 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20381 return "";
20382 }
20383 /* Fall through */
20384
20385 default:
20386 otherops[0] = adjust_address (operands[0], SImode, 4);
20387 otherops[1] = operands[1];
20388 if (emit)
20389 {
20390 output_asm_insn ("str%?\t%1, %0", operands);
20391 output_asm_insn ("str%?\t%H1, %0", otherops);
20392 }
20393 if (count)
20394 *count = 2;
20395 }
20396 }
20397
20398 return "";
20399 }
20400
20401 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20402 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20403
20404 const char *
20405 output_move_quad (rtx *operands)
20406 {
20407 if (REG_P (operands[0]))
20408 {
20409 /* Load, or reg->reg move. */
20410
20411 if (MEM_P (operands[1]))
20412 {
20413 switch (GET_CODE (XEXP (operands[1], 0)))
20414 {
20415 case REG:
20416 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20417 break;
20418
20419 case LABEL_REF:
20420 case CONST:
20421 output_asm_insn ("adr%?\t%0, %1", operands);
20422 output_asm_insn ("ldmia%?\t%0, %M0", operands);
20423 break;
20424
20425 default:
20426 gcc_unreachable ();
20427 }
20428 }
20429 else
20430 {
20431 rtx ops[2];
20432 int dest, src, i;
20433
20434 gcc_assert (REG_P (operands[1]));
20435
20436 dest = REGNO (operands[0]);
20437 src = REGNO (operands[1]);
20438
20439 /* This seems pretty dumb, but hopefully GCC won't try to do it
20440 very often. */
20441 if (dest < src)
20442 for (i = 0; i < 4; i++)
20443 {
20444 ops[0] = gen_rtx_REG (SImode, dest + i);
20445 ops[1] = gen_rtx_REG (SImode, src + i);
20446 output_asm_insn ("mov%?\t%0, %1", ops);
20447 }
20448 else
20449 for (i = 3; i >= 0; i--)
20450 {
20451 ops[0] = gen_rtx_REG (SImode, dest + i);
20452 ops[1] = gen_rtx_REG (SImode, src + i);
20453 output_asm_insn ("mov%?\t%0, %1", ops);
20454 }
20455 }
20456 }
20457 else
20458 {
20459 gcc_assert (MEM_P (operands[0]));
20460 gcc_assert (REG_P (operands[1]));
20461 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20462
20463 switch (GET_CODE (XEXP (operands[0], 0)))
20464 {
20465 case REG:
20466 output_asm_insn ("stm%?\t%m0, %M1", operands);
20467 break;
20468
20469 default:
20470 gcc_unreachable ();
20471 }
20472 }
20473
20474 return "";
20475 }
20476
20477 /* Output a VFP load or store instruction. */
20478
20479 const char *
20480 output_move_vfp (rtx *operands)
20481 {
20482 rtx reg, mem, addr, ops[2];
20483 int load = REG_P (operands[0]);
20484 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20485 int sp = (!TARGET_VFP_FP16INST
20486 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20487 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20488 const char *templ;
20489 char buff[50];
20490 machine_mode mode;
20491
20492 reg = operands[!load];
20493 mem = operands[load];
20494
20495 mode = GET_MODE (reg);
20496
20497 gcc_assert (REG_P (reg));
20498 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20499 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20500 || mode == SFmode
20501 || mode == DFmode
20502 || mode == HImode
20503 || mode == SImode
20504 || mode == DImode
20505 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20506 gcc_assert (MEM_P (mem));
20507
20508 addr = XEXP (mem, 0);
20509
20510 switch (GET_CODE (addr))
20511 {
20512 case PRE_DEC:
20513 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20514 ops[0] = XEXP (addr, 0);
20515 ops[1] = reg;
20516 break;
20517
20518 case POST_INC:
20519 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20520 ops[0] = XEXP (addr, 0);
20521 ops[1] = reg;
20522 break;
20523
20524 default:
20525 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20526 ops[0] = reg;
20527 ops[1] = mem;
20528 break;
20529 }
20530
20531 sprintf (buff, templ,
20532 load ? "ld" : "st",
20533 dp ? "64" : sp ? "32" : "16",
20534 dp ? "P" : "",
20535 integer_p ? "\t%@ int" : "");
20536 output_asm_insn (buff, ops);
20537
20538 return "";
20539 }
20540
20541 /* Output a Neon double-word or quad-word load or store, or a load
20542 or store for larger structure modes.
20543
20544 WARNING: The ordering of elements is weird in big-endian mode,
20545 because the EABI requires that vectors stored in memory appear
20546 as though they were stored by a VSTM, as required by the EABI.
20547 GCC RTL defines element ordering based on in-memory order.
20548 This can be different from the architectural ordering of elements
20549 within a NEON register. The intrinsics defined in arm_neon.h use the
20550 NEON register element ordering, not the GCC RTL element ordering.
20551
20552 For example, the in-memory ordering of a big-endian a quadword
20553 vector with 16-bit elements when stored from register pair {d0,d1}
20554 will be (lowest address first, d0[N] is NEON register element N):
20555
20556 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20557
20558 When necessary, quadword registers (dN, dN+1) are moved to ARM
20559 registers from rN in the order:
20560
20561 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20562
20563 So that STM/LDM can be used on vectors in ARM registers, and the
20564 same memory layout will result as if VSTM/VLDM were used.
20565
20566 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20567 possible, which allows use of appropriate alignment tags.
20568 Note that the choice of "64" is independent of the actual vector
20569 element size; this size simply ensures that the behavior is
20570 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20571
20572 Due to limitations of those instructions, use of VST1.64/VLD1.64
20573 is not possible if:
20574 - the address contains PRE_DEC, or
20575 - the mode refers to more than 4 double-word registers
20576
20577 In those cases, it would be possible to replace VSTM/VLDM by a
20578 sequence of instructions; this is not currently implemented since
20579 this is not certain to actually improve performance. */
20580
20581 const char *
20582 output_move_neon (rtx *operands)
20583 {
20584 rtx reg, mem, addr, ops[2];
20585 int regno, nregs, load = REG_P (operands[0]);
20586 const char *templ;
20587 char buff[50];
20588 machine_mode mode;
20589
20590 reg = operands[!load];
20591 mem = operands[load];
20592
20593 mode = GET_MODE (reg);
20594
20595 gcc_assert (REG_P (reg));
20596 regno = REGNO (reg);
20597 nregs = REG_NREGS (reg) / 2;
20598 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20599 || NEON_REGNO_OK_FOR_QUAD (regno));
20600 gcc_assert (VALID_NEON_DREG_MODE (mode)
20601 || VALID_NEON_QREG_MODE (mode)
20602 || VALID_NEON_STRUCT_MODE (mode));
20603 gcc_assert (MEM_P (mem));
20604
20605 addr = XEXP (mem, 0);
20606
20607 /* Strip off const from addresses like (const (plus (...))). */
20608 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20609 addr = XEXP (addr, 0);
20610
20611 switch (GET_CODE (addr))
20612 {
20613 case POST_INC:
20614 /* We have to use vldm / vstm for too-large modes. */
20615 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20616 {
20617 templ = "v%smia%%?\t%%0!, %%h1";
20618 ops[0] = XEXP (addr, 0);
20619 }
20620 else
20621 {
20622 templ = "v%s1.64\t%%h1, %%A0";
20623 ops[0] = mem;
20624 }
20625 ops[1] = reg;
20626 break;
20627
20628 case PRE_DEC:
20629 /* We have to use vldm / vstm in this case, since there is no
20630 pre-decrement form of the vld1 / vst1 instructions. */
20631 templ = "v%smdb%%?\t%%0!, %%h1";
20632 ops[0] = XEXP (addr, 0);
20633 ops[1] = reg;
20634 break;
20635
20636 case POST_MODIFY:
20637 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20638 gcc_unreachable ();
20639
20640 case REG:
20641 /* We have to use vldm / vstm for too-large modes. */
20642 if (nregs > 1)
20643 {
20644 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20645 templ = "v%smia%%?\t%%m0, %%h1";
20646 else
20647 templ = "v%s1.64\t%%h1, %%A0";
20648
20649 ops[0] = mem;
20650 ops[1] = reg;
20651 break;
20652 }
20653 /* Fall through. */
20654 case PLUS:
20655 if (GET_CODE (addr) == PLUS)
20656 addr = XEXP (addr, 0);
20657 /* Fall through. */
20658 case LABEL_REF:
20659 {
20660 int i;
20661 int overlap = -1;
20662 for (i = 0; i < nregs; i++)
20663 {
20664 /* We're only using DImode here because it's a convenient
20665 size. */
20666 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20667 ops[1] = adjust_address (mem, DImode, 8 * i);
20668 if (reg_overlap_mentioned_p (ops[0], mem))
20669 {
20670 gcc_assert (overlap == -1);
20671 overlap = i;
20672 }
20673 else
20674 {
20675 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20676 sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20677 else
20678 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20679 output_asm_insn (buff, ops);
20680 }
20681 }
20682 if (overlap != -1)
20683 {
20684 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20685 ops[1] = adjust_address (mem, SImode, 8 * overlap);
20686 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20687 sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20688 else
20689 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20690 output_asm_insn (buff, ops);
20691 }
20692
20693 return "";
20694 }
20695
20696 default:
20697 gcc_unreachable ();
20698 }
20699
20700 sprintf (buff, templ, load ? "ld" : "st");
20701 output_asm_insn (buff, ops);
20702
20703 return "";
20704 }
20705
20706 /* Compute and return the length of neon_mov<mode>, where <mode> is
20707 one of VSTRUCT modes: EI, OI, CI or XI. */
20708 int
20709 arm_attr_length_move_neon (rtx_insn *insn)
20710 {
20711 rtx reg, mem, addr;
20712 int load;
20713 machine_mode mode;
20714
20715 extract_insn_cached (insn);
20716
20717 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20718 {
20719 mode = GET_MODE (recog_data.operand[0]);
20720 switch (mode)
20721 {
20722 case E_EImode:
20723 case E_OImode:
20724 return 8;
20725 case E_CImode:
20726 return 12;
20727 case E_XImode:
20728 return 16;
20729 default:
20730 gcc_unreachable ();
20731 }
20732 }
20733
20734 load = REG_P (recog_data.operand[0]);
20735 reg = recog_data.operand[!load];
20736 mem = recog_data.operand[load];
20737
20738 gcc_assert (MEM_P (mem));
20739
20740 addr = XEXP (mem, 0);
20741
20742 /* Strip off const from addresses like (const (plus (...))). */
20743 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20744 addr = XEXP (addr, 0);
20745
20746 if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20747 {
20748 int insns = REG_NREGS (reg) / 2;
20749 return insns * 4;
20750 }
20751 else
20752 return 4;
20753 }
20754
20755 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20756 return zero. */
20757
20758 int
20759 arm_address_offset_is_imm (rtx_insn *insn)
20760 {
20761 rtx mem, addr;
20762
20763 extract_insn_cached (insn);
20764
20765 if (REG_P (recog_data.operand[0]))
20766 return 0;
20767
20768 mem = recog_data.operand[0];
20769
20770 gcc_assert (MEM_P (mem));
20771
20772 addr = XEXP (mem, 0);
20773
20774 if (REG_P (addr)
20775 || (GET_CODE (addr) == PLUS
20776 && REG_P (XEXP (addr, 0))
20777 && CONST_INT_P (XEXP (addr, 1))))
20778 return 1;
20779 else
20780 return 0;
20781 }
20782
20783 /* Output an ADD r, s, #n where n may be too big for one instruction.
20784 If adding zero to one register, output nothing. */
20785 const char *
20786 output_add_immediate (rtx *operands)
20787 {
20788 HOST_WIDE_INT n = INTVAL (operands[2]);
20789
20790 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20791 {
20792 if (n < 0)
20793 output_multi_immediate (operands,
20794 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20795 -n);
20796 else
20797 output_multi_immediate (operands,
20798 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20799 n);
20800 }
20801
20802 return "";
20803 }
20804
20805 /* Output a multiple immediate operation.
20806 OPERANDS is the vector of operands referred to in the output patterns.
20807 INSTR1 is the output pattern to use for the first constant.
20808 INSTR2 is the output pattern to use for subsequent constants.
20809 IMMED_OP is the index of the constant slot in OPERANDS.
20810 N is the constant value. */
20811 static const char *
20812 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20813 int immed_op, HOST_WIDE_INT n)
20814 {
20815 #if HOST_BITS_PER_WIDE_INT > 32
20816 n &= 0xffffffff;
20817 #endif
20818
20819 if (n == 0)
20820 {
20821 /* Quick and easy output. */
20822 operands[immed_op] = const0_rtx;
20823 output_asm_insn (instr1, operands);
20824 }
20825 else
20826 {
20827 int i;
20828 const char * instr = instr1;
20829
20830 /* Note that n is never zero here (which would give no output). */
20831 for (i = 0; i < 32; i += 2)
20832 {
20833 if (n & (3 << i))
20834 {
20835 operands[immed_op] = GEN_INT (n & (255 << i));
20836 output_asm_insn (instr, operands);
20837 instr = instr2;
20838 i += 6;
20839 }
20840 }
20841 }
20842
20843 return "";
20844 }
20845
20846 /* Return the name of a shifter operation. */
20847 static const char *
20848 arm_shift_nmem(enum rtx_code code)
20849 {
20850 switch (code)
20851 {
20852 case ASHIFT:
20853 return ARM_LSL_NAME;
20854
20855 case ASHIFTRT:
20856 return "asr";
20857
20858 case LSHIFTRT:
20859 return "lsr";
20860
20861 case ROTATERT:
20862 return "ror";
20863
20864 default:
20865 abort();
20866 }
20867 }
20868
20869 /* Return the appropriate ARM instruction for the operation code.
20870 The returned result should not be overwritten. OP is the rtx of the
20871 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20872 was shifted. */
20873 const char *
20874 arithmetic_instr (rtx op, int shift_first_arg)
20875 {
20876 switch (GET_CODE (op))
20877 {
20878 case PLUS:
20879 return "add";
20880
20881 case MINUS:
20882 return shift_first_arg ? "rsb" : "sub";
20883
20884 case IOR:
20885 return "orr";
20886
20887 case XOR:
20888 return "eor";
20889
20890 case AND:
20891 return "and";
20892
20893 case ASHIFT:
20894 case ASHIFTRT:
20895 case LSHIFTRT:
20896 case ROTATERT:
20897 return arm_shift_nmem(GET_CODE(op));
20898
20899 default:
20900 gcc_unreachable ();
20901 }
20902 }
20903
20904 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20905 for the operation code. The returned result should not be overwritten.
20906 OP is the rtx code of the shift.
20907 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20908 shift. */
20909 static const char *
20910 shift_op (rtx op, HOST_WIDE_INT *amountp)
20911 {
20912 const char * mnem;
20913 enum rtx_code code = GET_CODE (op);
20914
20915 switch (code)
20916 {
20917 case ROTATE:
20918 if (!CONST_INT_P (XEXP (op, 1)))
20919 {
20920 output_operand_lossage ("invalid shift operand");
20921 return NULL;
20922 }
20923
20924 code = ROTATERT;
20925 *amountp = 32 - INTVAL (XEXP (op, 1));
20926 mnem = "ror";
20927 break;
20928
20929 case ASHIFT:
20930 case ASHIFTRT:
20931 case LSHIFTRT:
20932 case ROTATERT:
20933 mnem = arm_shift_nmem(code);
20934 if (CONST_INT_P (XEXP (op, 1)))
20935 {
20936 *amountp = INTVAL (XEXP (op, 1));
20937 }
20938 else if (REG_P (XEXP (op, 1)))
20939 {
20940 *amountp = -1;
20941 return mnem;
20942 }
20943 else
20944 {
20945 output_operand_lossage ("invalid shift operand");
20946 return NULL;
20947 }
20948 break;
20949
20950 case MULT:
20951 /* We never have to worry about the amount being other than a
20952 power of 2, since this case can never be reloaded from a reg. */
20953 if (!CONST_INT_P (XEXP (op, 1)))
20954 {
20955 output_operand_lossage ("invalid shift operand");
20956 return NULL;
20957 }
20958
20959 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
20960
20961 /* Amount must be a power of two. */
20962 if (*amountp & (*amountp - 1))
20963 {
20964 output_operand_lossage ("invalid shift operand");
20965 return NULL;
20966 }
20967
20968 *amountp = exact_log2 (*amountp);
20969 gcc_assert (IN_RANGE (*amountp, 0, 31));
20970 return ARM_LSL_NAME;
20971
20972 default:
20973 output_operand_lossage ("invalid shift operand");
20974 return NULL;
20975 }
20976
20977 /* This is not 100% correct, but follows from the desire to merge
20978 multiplication by a power of 2 with the recognizer for a
20979 shift. >=32 is not a valid shift for "lsl", so we must try and
20980 output a shift that produces the correct arithmetical result.
20981 Using lsr #32 is identical except for the fact that the carry bit
20982 is not set correctly if we set the flags; but we never use the
20983 carry bit from such an operation, so we can ignore that. */
20984 if (code == ROTATERT)
20985 /* Rotate is just modulo 32. */
20986 *amountp &= 31;
20987 else if (*amountp != (*amountp & 31))
20988 {
20989 if (code == ASHIFT)
20990 mnem = "lsr";
20991 *amountp = 32;
20992 }
20993
20994 /* Shifts of 0 are no-ops. */
20995 if (*amountp == 0)
20996 return NULL;
20997
20998 return mnem;
20999 }
21000
21001 /* Output a .ascii pseudo-op, keeping track of lengths. This is
21002 because /bin/as is horribly restrictive. The judgement about
21003 whether or not each character is 'printable' (and can be output as
21004 is) or not (and must be printed with an octal escape) must be made
21005 with reference to the *host* character set -- the situation is
21006 similar to that discussed in the comments above pp_c_char in
21007 c-pretty-print.cc. */
21008
21009 #define MAX_ASCII_LEN 51
21010
21011 void
21012 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
21013 {
21014 int i;
21015 int len_so_far = 0;
21016
21017 fputs ("\t.ascii\t\"", stream);
21018
21019 for (i = 0; i < len; i++)
21020 {
21021 int c = p[i];
21022
21023 if (len_so_far >= MAX_ASCII_LEN)
21024 {
21025 fputs ("\"\n\t.ascii\t\"", stream);
21026 len_so_far = 0;
21027 }
21028
21029 if (ISPRINT (c))
21030 {
21031 if (c == '\\' || c == '\"')
21032 {
21033 putc ('\\', stream);
21034 len_so_far++;
21035 }
21036 putc (c, stream);
21037 len_so_far++;
21038 }
21039 else
21040 {
21041 fprintf (stream, "\\%03o", c);
21042 len_so_far += 4;
21043 }
21044 }
21045
21046 fputs ("\"\n", stream);
21047 }
21048 \f
21049
21050 /* Compute the register save mask for registers 0 through 12
21051 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
21052
21053 static unsigned long
21054 arm_compute_save_reg0_reg12_mask (void)
21055 {
21056 unsigned long func_type = arm_current_func_type ();
21057 unsigned long save_reg_mask = 0;
21058 unsigned int reg;
21059
21060 if (IS_INTERRUPT (func_type))
21061 {
21062 unsigned int max_reg;
21063 /* Interrupt functions must not corrupt any registers,
21064 even call clobbered ones. If this is a leaf function
21065 we can just examine the registers used by the RTL, but
21066 otherwise we have to assume that whatever function is
21067 called might clobber anything, and so we have to save
21068 all the call-clobbered registers as well. */
21069 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
21070 /* FIQ handlers have registers r8 - r12 banked, so
21071 we only need to check r0 - r7, Normal ISRs only
21072 bank r14 and r15, so we must check up to r12.
21073 r13 is the stack pointer which is always preserved,
21074 so we do not need to consider it here. */
21075 max_reg = 7;
21076 else
21077 max_reg = 12;
21078
21079 for (reg = 0; reg <= max_reg; reg++)
21080 if (reg_needs_saving_p (reg))
21081 save_reg_mask |= (1 << reg);
21082
21083 /* Also save the pic base register if necessary. */
21084 if (PIC_REGISTER_MAY_NEED_SAVING
21085 && crtl->uses_pic_offset_table)
21086 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21087 }
21088 else if (IS_VOLATILE(func_type))
21089 {
21090 /* For noreturn functions we historically omitted register saves
21091 altogether. However this really messes up debugging. As a
21092 compromise save just the frame pointers. Combined with the link
21093 register saved elsewhere this should be sufficient to get
21094 a backtrace. */
21095 if (frame_pointer_needed)
21096 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21097 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
21098 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21099 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
21100 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
21101 }
21102 else
21103 {
21104 /* In the normal case we only need to save those registers
21105 which are call saved and which are used by this function. */
21106 for (reg = 0; reg <= 11; reg++)
21107 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21108 save_reg_mask |= (1 << reg);
21109
21110 /* Handle the frame pointer as a special case. */
21111 if (frame_pointer_needed)
21112 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21113
21114 /* If we aren't loading the PIC register,
21115 don't stack it even though it may be live. */
21116 if (PIC_REGISTER_MAY_NEED_SAVING
21117 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
21118 || crtl->uses_pic_offset_table))
21119 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21120
21121 /* The prologue will copy SP into R0, so save it. */
21122 if (IS_STACKALIGN (func_type))
21123 save_reg_mask |= 1;
21124 }
21125
21126 /* Save registers so the exception handler can modify them. */
21127 if (crtl->calls_eh_return)
21128 {
21129 unsigned int i;
21130
21131 for (i = 0; ; i++)
21132 {
21133 reg = EH_RETURN_DATA_REGNO (i);
21134 if (reg == INVALID_REGNUM)
21135 break;
21136 save_reg_mask |= 1 << reg;
21137 }
21138 }
21139
21140 return save_reg_mask;
21141 }
21142
21143 /* Return true if r3 is live at the start of the function. */
21144
21145 static bool
21146 arm_r3_live_at_start_p (void)
21147 {
21148 /* Just look at cfg info, which is still close enough to correct at this
21149 point. This gives false positives for broken functions that might use
21150 uninitialized data that happens to be allocated in r3, but who cares? */
21151 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
21152 }
21153
21154 /* Compute the number of bytes used to store the static chain register on the
21155 stack, above the stack frame. We need to know this accurately to get the
21156 alignment of the rest of the stack frame correct. */
21157
21158 static int
21159 arm_compute_static_chain_stack_bytes (void)
21160 {
21161 /* Once the value is updated from the init value of -1, do not
21162 re-compute. */
21163 if (cfun->machine->static_chain_stack_bytes != -1)
21164 return cfun->machine->static_chain_stack_bytes;
21165
21166 /* See the defining assertion in arm_expand_prologue. */
21167 if (IS_NESTED (arm_current_func_type ())
21168 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21169 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21170 || flag_stack_clash_protection)
21171 && !df_regs_ever_live_p (LR_REGNUM)))
21172 && arm_r3_live_at_start_p ()
21173 && crtl->args.pretend_args_size == 0)
21174 return 4;
21175
21176 return 0;
21177 }
21178
21179 /* Compute a bit mask of which core registers need to be
21180 saved on the stack for the current function.
21181 This is used by arm_compute_frame_layout, which may add extra registers. */
21182
21183 static unsigned long
21184 arm_compute_save_core_reg_mask (void)
21185 {
21186 unsigned int save_reg_mask = 0;
21187 unsigned long func_type = arm_current_func_type ();
21188 unsigned int reg;
21189
21190 if (IS_NAKED (func_type))
21191 /* This should never really happen. */
21192 return 0;
21193
21194 /* If we are creating a stack frame, then we must save the frame pointer,
21195 IP (which will hold the old stack pointer), LR and the PC. */
21196 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21197 save_reg_mask |=
21198 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
21199 | (1 << IP_REGNUM)
21200 | (1 << LR_REGNUM)
21201 | (1 << PC_REGNUM);
21202
21203 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
21204
21205 /* Decide if we need to save the link register.
21206 Interrupt routines have their own banked link register,
21207 so they never need to save it.
21208 Otherwise if we do not use the link register we do not need to save
21209 it. If we are pushing other registers onto the stack however, we
21210 can save an instruction in the epilogue by pushing the link register
21211 now and then popping it back into the PC. This incurs extra memory
21212 accesses though, so we only do it when optimizing for size, and only
21213 if we know that we will not need a fancy return sequence. */
21214 if (df_regs_ever_live_p (LR_REGNUM)
21215 || (save_reg_mask
21216 && optimize_size
21217 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
21218 && !crtl->tail_call_emit
21219 && !crtl->calls_eh_return))
21220 save_reg_mask |= 1 << LR_REGNUM;
21221
21222 if (cfun->machine->lr_save_eliminated)
21223 save_reg_mask &= ~ (1 << LR_REGNUM);
21224
21225 if (TARGET_REALLY_IWMMXT
21226 && ((bit_count (save_reg_mask)
21227 + ARM_NUM_INTS (crtl->args.pretend_args_size +
21228 arm_compute_static_chain_stack_bytes())
21229 ) % 2) != 0)
21230 {
21231 /* The total number of registers that are going to be pushed
21232 onto the stack is odd. We need to ensure that the stack
21233 is 64-bit aligned before we start to save iWMMXt registers,
21234 and also before we start to create locals. (A local variable
21235 might be a double or long long which we will load/store using
21236 an iWMMXt instruction). Therefore we need to push another
21237 ARM register, so that the stack will be 64-bit aligned. We
21238 try to avoid using the arg registers (r0 -r3) as they might be
21239 used to pass values in a tail call. */
21240 for (reg = 4; reg <= 12; reg++)
21241 if ((save_reg_mask & (1 << reg)) == 0)
21242 break;
21243
21244 if (reg <= 12)
21245 save_reg_mask |= (1 << reg);
21246 else
21247 {
21248 cfun->machine->sibcall_blocked = 1;
21249 save_reg_mask |= (1 << 3);
21250 }
21251 }
21252
21253 /* We may need to push an additional register for use initializing the
21254 PIC base register. */
21255 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21256 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21257 {
21258 reg = thumb_find_work_register (1 << 4);
21259 if (!call_used_or_fixed_reg_p (reg))
21260 save_reg_mask |= (1 << reg);
21261 }
21262
21263 return save_reg_mask;
21264 }
21265
21266 /* Compute a bit mask of which core registers need to be
21267 saved on the stack for the current function. */
21268 static unsigned long
21269 thumb1_compute_save_core_reg_mask (void)
21270 {
21271 unsigned long mask;
21272 unsigned reg;
21273
21274 mask = 0;
21275 for (reg = 0; reg < 12; reg ++)
21276 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21277 mask |= 1 << reg;
21278
21279 /* Handle the frame pointer as a special case. */
21280 if (frame_pointer_needed)
21281 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21282
21283 if (flag_pic
21284 && !TARGET_SINGLE_PIC_BASE
21285 && arm_pic_register != INVALID_REGNUM
21286 && crtl->uses_pic_offset_table)
21287 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21288
21289 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21290 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21291 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21292
21293 /* LR will also be pushed if any lo regs are pushed. */
21294 if (mask & 0xff || thumb_force_lr_save ())
21295 mask |= (1 << LR_REGNUM);
21296
21297 bool call_clobbered_scratch
21298 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21299 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21300
21301 /* Make sure we have a low work register if we need one. We will
21302 need one if we are going to push a high register, but we are not
21303 currently intending to push a low register. However if both the
21304 prologue and epilogue have a spare call-clobbered low register,
21305 then we won't need to find an additional work register. It does
21306 not need to be the same register in the prologue and
21307 epilogue. */
21308 if ((mask & 0xff) == 0
21309 && !call_clobbered_scratch
21310 && ((mask & 0x0f00) || TARGET_BACKTRACE))
21311 {
21312 /* Use thumb_find_work_register to choose which register
21313 we will use. If the register is live then we will
21314 have to push it. Use LAST_LO_REGNUM as our fallback
21315 choice for the register to select. */
21316 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21317 /* Make sure the register returned by thumb_find_work_register is
21318 not part of the return value. */
21319 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21320 reg = LAST_LO_REGNUM;
21321
21322 if (callee_saved_reg_p (reg))
21323 mask |= 1 << reg;
21324 }
21325
21326 /* The 504 below is 8 bytes less than 512 because there are two possible
21327 alignment words. We can't tell here if they will be present or not so we
21328 have to play it safe and assume that they are. */
21329 if ((CALLER_INTERWORKING_SLOT_SIZE +
21330 ROUND_UP_WORD (get_frame_size ()) +
21331 crtl->outgoing_args_size) >= 504)
21332 {
21333 /* This is the same as the code in thumb1_expand_prologue() which
21334 determines which register to use for stack decrement. */
21335 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21336 if (mask & (1 << reg))
21337 break;
21338
21339 if (reg > LAST_LO_REGNUM)
21340 {
21341 /* Make sure we have a register available for stack decrement. */
21342 mask |= 1 << LAST_LO_REGNUM;
21343 }
21344 }
21345
21346 return mask;
21347 }
21348
21349 /* Return the number of bytes required to save VFP registers. */
21350 static int
21351 arm_get_vfp_saved_size (void)
21352 {
21353 unsigned int regno;
21354 int count;
21355 int saved;
21356
21357 saved = 0;
21358 /* Space for saved VFP registers. */
21359 if (TARGET_VFP_BASE)
21360 {
21361 count = 0;
21362 for (regno = FIRST_VFP_REGNUM;
21363 regno < LAST_VFP_REGNUM;
21364 regno += 2)
21365 {
21366 if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21367 {
21368 if (count > 0)
21369 {
21370 /* Workaround ARM10 VFPr1 bug. */
21371 if (count == 2 && !arm_arch6)
21372 count++;
21373 saved += count * 8;
21374 }
21375 count = 0;
21376 }
21377 else
21378 count++;
21379 }
21380 if (count > 0)
21381 {
21382 if (count == 2 && !arm_arch6)
21383 count++;
21384 saved += count * 8;
21385 }
21386 }
21387 return saved;
21388 }
21389
21390
21391 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21392 everything bar the final return instruction. If simple_return is true,
21393 then do not output epilogue, because it has already been emitted in RTL.
21394
21395 Note: do not forget to update length attribute of corresponding insn pattern
21396 when changing assembly output (eg. length attribute of
21397 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21398 register clearing sequences). */
21399 const char *
21400 output_return_instruction (rtx operand, bool really_return, bool reverse,
21401 bool simple_return)
21402 {
21403 char conditional[10];
21404 char instr[100];
21405 unsigned reg;
21406 unsigned long live_regs_mask;
21407 unsigned long func_type;
21408 arm_stack_offsets *offsets;
21409
21410 func_type = arm_current_func_type ();
21411
21412 if (IS_NAKED (func_type))
21413 return "";
21414
21415 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21416 {
21417 /* If this function was declared non-returning, and we have
21418 found a tail call, then we have to trust that the called
21419 function won't return. */
21420 if (really_return)
21421 {
21422 rtx ops[2];
21423
21424 /* Otherwise, trap an attempted return by aborting. */
21425 ops[0] = operand;
21426 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21427 : "abort");
21428 assemble_external_libcall (ops[1]);
21429 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21430 }
21431
21432 return "";
21433 }
21434
21435 gcc_assert (!cfun->calls_alloca || really_return);
21436
21437 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21438
21439 cfun->machine->return_used_this_function = 1;
21440
21441 offsets = arm_get_frame_offsets ();
21442 live_regs_mask = offsets->saved_regs_mask;
21443
21444 if (!simple_return && live_regs_mask)
21445 {
21446 const char * return_reg;
21447
21448 /* If we do not have any special requirements for function exit
21449 (e.g. interworking) then we can load the return address
21450 directly into the PC. Otherwise we must load it into LR. */
21451 if (really_return
21452 && !IS_CMSE_ENTRY (func_type)
21453 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21454 return_reg = reg_names[PC_REGNUM];
21455 else
21456 return_reg = reg_names[LR_REGNUM];
21457
21458 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21459 {
21460 /* There are three possible reasons for the IP register
21461 being saved. 1) a stack frame was created, in which case
21462 IP contains the old stack pointer, or 2) an ISR routine
21463 corrupted it, or 3) it was saved to align the stack on
21464 iWMMXt. In case 1, restore IP into SP, otherwise just
21465 restore IP. */
21466 if (frame_pointer_needed)
21467 {
21468 live_regs_mask &= ~ (1 << IP_REGNUM);
21469 live_regs_mask |= (1 << SP_REGNUM);
21470 }
21471 else
21472 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21473 }
21474
21475 /* On some ARM architectures it is faster to use LDR rather than
21476 LDM to load a single register. On other architectures, the
21477 cost is the same. In 26 bit mode, or for exception handlers,
21478 we have to use LDM to load the PC so that the CPSR is also
21479 restored. */
21480 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21481 if (live_regs_mask == (1U << reg))
21482 break;
21483
21484 if (reg <= LAST_ARM_REGNUM
21485 && (reg != LR_REGNUM
21486 || ! really_return
21487 || ! IS_INTERRUPT (func_type)))
21488 {
21489 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21490 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21491 }
21492 else
21493 {
21494 char *p;
21495 int first = 1;
21496
21497 /* Generate the load multiple instruction to restore the
21498 registers. Note we can get here, even if
21499 frame_pointer_needed is true, but only if sp already
21500 points to the base of the saved core registers. */
21501 if (live_regs_mask & (1 << SP_REGNUM))
21502 {
21503 unsigned HOST_WIDE_INT stack_adjust;
21504
21505 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21506 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21507
21508 if (stack_adjust && arm_arch5t && TARGET_ARM)
21509 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21510 else
21511 {
21512 /* If we can't use ldmib (SA110 bug),
21513 then try to pop r3 instead. */
21514 if (stack_adjust)
21515 live_regs_mask |= 1 << 3;
21516
21517 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21518 }
21519 }
21520 /* For interrupt returns we have to use an LDM rather than
21521 a POP so that we can use the exception return variant. */
21522 else if (IS_INTERRUPT (func_type))
21523 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21524 else
21525 sprintf (instr, "pop%s\t{", conditional);
21526
21527 p = instr + strlen (instr);
21528
21529 for (reg = 0; reg <= SP_REGNUM; reg++)
21530 if (live_regs_mask & (1 << reg))
21531 {
21532 int l = strlen (reg_names[reg]);
21533
21534 if (first)
21535 first = 0;
21536 else
21537 {
21538 memcpy (p, ", ", 2);
21539 p += 2;
21540 }
21541
21542 memcpy (p, "%|", 2);
21543 memcpy (p + 2, reg_names[reg], l);
21544 p += l + 2;
21545 }
21546
21547 if (live_regs_mask & (1 << LR_REGNUM))
21548 {
21549 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21550 /* If returning from an interrupt, restore the CPSR. */
21551 if (IS_INTERRUPT (func_type))
21552 strcat (p, "^");
21553 }
21554 else
21555 strcpy (p, "}");
21556 }
21557
21558 output_asm_insn (instr, & operand);
21559
21560 /* See if we need to generate an extra instruction to
21561 perform the actual function return. */
21562 if (really_return
21563 && func_type != ARM_FT_INTERWORKED
21564 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21565 {
21566 /* The return has already been handled
21567 by loading the LR into the PC. */
21568 return "";
21569 }
21570 }
21571
21572 if (really_return)
21573 {
21574 switch ((int) ARM_FUNC_TYPE (func_type))
21575 {
21576 case ARM_FT_ISR:
21577 case ARM_FT_FIQ:
21578 /* ??? This is wrong for unified assembly syntax. */
21579 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21580 break;
21581
21582 case ARM_FT_INTERWORKED:
21583 gcc_assert (arm_arch5t || arm_arch4t);
21584 sprintf (instr, "bx%s\t%%|lr", conditional);
21585 break;
21586
21587 case ARM_FT_EXCEPTION:
21588 /* ??? This is wrong for unified assembly syntax. */
21589 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21590 break;
21591
21592 default:
21593 if (IS_CMSE_ENTRY (func_type))
21594 {
21595 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21596 emitted by cmse_nonsecure_entry_clear_before_return () and the
21597 VSTR/VLDR instructions in the prologue and epilogue. */
21598 if (!TARGET_HAVE_FPCXT_CMSE)
21599 {
21600 /* Check if we have to clear the 'GE bits' which is only used if
21601 parallel add and subtraction instructions are available. */
21602 if (TARGET_INT_SIMD)
21603 snprintf (instr, sizeof (instr),
21604 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21605 else
21606 snprintf (instr, sizeof (instr),
21607 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21608
21609 output_asm_insn (instr, & operand);
21610 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21611 care of it. */
21612 if (TARGET_HARD_FLOAT)
21613 {
21614 /* Clear the cumulative exception-status bits (0-4,7) and
21615 the condition code bits (28-31) of the FPSCR. We need
21616 to remember to clear the first scratch register used
21617 (IP) and save and restore the second (r4).
21618
21619 Important note: the length of the
21620 thumb2_cmse_entry_return insn pattern must account for
21621 the size of the below instructions. */
21622 output_asm_insn ("push\t{%|r4}", & operand);
21623 output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21624 output_asm_insn ("movw\t%|r4, #65376", & operand);
21625 output_asm_insn ("movt\t%|r4, #4095", & operand);
21626 output_asm_insn ("and\t%|ip, %|r4", & operand);
21627 output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21628 output_asm_insn ("pop\t{%|r4}", & operand);
21629 output_asm_insn ("mov\t%|ip, %|lr", & operand);
21630 }
21631 }
21632 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21633 }
21634 /* Use bx if it's available. */
21635 else if (arm_arch5t || arm_arch4t)
21636 sprintf (instr, "bx%s\t%%|lr", conditional);
21637 else
21638 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21639 break;
21640 }
21641
21642 output_asm_insn (instr, & operand);
21643 }
21644
21645 return "";
21646 }
21647
21648 /* Output in FILE asm statements needed to declare the NAME of the function
21649 defined by its DECL node. */
21650
21651 void
21652 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21653 {
21654 size_t cmse_name_len;
21655 char *cmse_name = 0;
21656 char cmse_prefix[] = "__acle_se_";
21657
21658 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21659 extra function label for each function with the 'cmse_nonsecure_entry'
21660 attribute. This extra function label should be prepended with
21661 '__acle_se_', telling the linker that it needs to create secure gateway
21662 veneers for this function. */
21663 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21664 DECL_ATTRIBUTES (decl)))
21665 {
21666 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21667 cmse_name = XALLOCAVEC (char, cmse_name_len);
21668 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21669 targetm.asm_out.globalize_label (file, cmse_name);
21670
21671 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21672 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21673 }
21674
21675 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21676 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21677 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21678 ASM_OUTPUT_LABEL (file, name);
21679
21680 if (cmse_name)
21681 ASM_OUTPUT_LABEL (file, cmse_name);
21682
21683 ARM_OUTPUT_FN_UNWIND (file, TRUE);
21684 }
21685
21686 /* Write the function name into the code section, directly preceding
21687 the function prologue.
21688
21689 Code will be output similar to this:
21690 t0
21691 .ascii "arm_poke_function_name", 0
21692 .align
21693 t1
21694 .word 0xff000000 + (t1 - t0)
21695 arm_poke_function_name
21696 mov ip, sp
21697 stmfd sp!, {fp, ip, lr, pc}
21698 sub fp, ip, #4
21699
21700 When performing a stack backtrace, code can inspect the value
21701 of 'pc' stored at 'fp' + 0. If the trace function then looks
21702 at location pc - 12 and the top 8 bits are set, then we know
21703 that there is a function name embedded immediately preceding this
21704 location and has length ((pc[-3]) & 0xff000000).
21705
21706 We assume that pc is declared as a pointer to an unsigned long.
21707
21708 It is of no benefit to output the function name if we are assembling
21709 a leaf function. These function types will not contain a stack
21710 backtrace structure, therefore it is not possible to determine the
21711 function name. */
21712 void
21713 arm_poke_function_name (FILE *stream, const char *name)
21714 {
21715 unsigned long alignlength;
21716 unsigned long length;
21717 rtx x;
21718
21719 length = strlen (name) + 1;
21720 alignlength = ROUND_UP_WORD (length);
21721
21722 ASM_OUTPUT_ASCII (stream, name, length);
21723 ASM_OUTPUT_ALIGN (stream, 2);
21724 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21725 assemble_aligned_integer (UNITS_PER_WORD, x);
21726 }
21727
21728 /* Place some comments into the assembler stream
21729 describing the current function. */
21730 static void
21731 arm_output_function_prologue (FILE *f)
21732 {
21733 unsigned long func_type;
21734
21735 /* Sanity check. */
21736 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21737
21738 func_type = arm_current_func_type ();
21739
21740 switch ((int) ARM_FUNC_TYPE (func_type))
21741 {
21742 default:
21743 case ARM_FT_NORMAL:
21744 break;
21745 case ARM_FT_INTERWORKED:
21746 asm_fprintf (f, "\t%@ Function supports interworking.\n");
21747 break;
21748 case ARM_FT_ISR:
21749 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21750 break;
21751 case ARM_FT_FIQ:
21752 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21753 break;
21754 case ARM_FT_EXCEPTION:
21755 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21756 break;
21757 }
21758
21759 if (IS_NAKED (func_type))
21760 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21761
21762 if (IS_VOLATILE (func_type))
21763 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21764
21765 if (IS_NESTED (func_type))
21766 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21767 if (IS_STACKALIGN (func_type))
21768 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21769 if (IS_CMSE_ENTRY (func_type))
21770 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21771
21772 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21773 (HOST_WIDE_INT) crtl->args.size,
21774 crtl->args.pretend_args_size,
21775 (HOST_WIDE_INT) get_frame_size ());
21776
21777 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21778 frame_pointer_needed,
21779 cfun->machine->uses_anonymous_args);
21780
21781 if (cfun->machine->lr_save_eliminated)
21782 asm_fprintf (f, "\t%@ link register save eliminated.\n");
21783
21784 if (crtl->calls_eh_return)
21785 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21786
21787 }
21788
21789 static void
21790 arm_output_function_epilogue (FILE *)
21791 {
21792 arm_stack_offsets *offsets;
21793
21794 if (TARGET_THUMB1)
21795 {
21796 int regno;
21797
21798 /* Emit any call-via-reg trampolines that are needed for v4t support
21799 of call_reg and call_value_reg type insns. */
21800 for (regno = 0; regno < LR_REGNUM; regno++)
21801 {
21802 rtx label = cfun->machine->call_via[regno];
21803
21804 if (label != NULL)
21805 {
21806 switch_to_section (function_section (current_function_decl));
21807 targetm.asm_out.internal_label (asm_out_file, "L",
21808 CODE_LABEL_NUMBER (label));
21809 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21810 }
21811 }
21812
21813 /* ??? Probably not safe to set this here, since it assumes that a
21814 function will be emitted as assembly immediately after we generate
21815 RTL for it. This does not happen for inline functions. */
21816 cfun->machine->return_used_this_function = 0;
21817 }
21818 else /* TARGET_32BIT */
21819 {
21820 /* We need to take into account any stack-frame rounding. */
21821 offsets = arm_get_frame_offsets ();
21822
21823 gcc_assert (!use_return_insn (FALSE, NULL)
21824 || (cfun->machine->return_used_this_function != 0)
21825 || offsets->saved_regs == offsets->outgoing_args
21826 || frame_pointer_needed);
21827 }
21828 }
21829
21830 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21831 STR and STRD. If an even number of registers are being pushed, one
21832 or more STRD patterns are created for each register pair. If an
21833 odd number of registers are pushed, emit an initial STR followed by
21834 as many STRD instructions as are needed. This works best when the
21835 stack is initially 64-bit aligned (the normal case), since it
21836 ensures that each STRD is also 64-bit aligned. */
21837 static void
21838 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21839 {
21840 int num_regs = 0;
21841 int i;
21842 int regno;
21843 rtx par = NULL_RTX;
21844 rtx dwarf = NULL_RTX;
21845 rtx tmp;
21846 bool first = true;
21847
21848 num_regs = bit_count (saved_regs_mask);
21849
21850 /* Must be at least one register to save, and can't save SP or PC. */
21851 gcc_assert (num_regs > 0 && num_regs <= 14);
21852 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21853 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21854
21855 /* Create sequence for DWARF info. All the frame-related data for
21856 debugging is held in this wrapper. */
21857 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21858
21859 /* Describe the stack adjustment. */
21860 tmp = gen_rtx_SET (stack_pointer_rtx,
21861 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21862 RTX_FRAME_RELATED_P (tmp) = 1;
21863 XVECEXP (dwarf, 0, 0) = tmp;
21864
21865 /* Find the first register. */
21866 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21867 ;
21868
21869 i = 0;
21870
21871 /* If there's an odd number of registers to push. Start off by
21872 pushing a single register. This ensures that subsequent strd
21873 operations are dword aligned (assuming that SP was originally
21874 64-bit aligned). */
21875 if ((num_regs & 1) != 0)
21876 {
21877 rtx reg, mem, insn;
21878
21879 reg = gen_rtx_REG (SImode, regno);
21880 if (num_regs == 1)
21881 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
21882 stack_pointer_rtx));
21883 else
21884 mem = gen_frame_mem (Pmode,
21885 gen_rtx_PRE_MODIFY
21886 (Pmode, stack_pointer_rtx,
21887 plus_constant (Pmode, stack_pointer_rtx,
21888 -4 * num_regs)));
21889
21890 tmp = gen_rtx_SET (mem, reg);
21891 RTX_FRAME_RELATED_P (tmp) = 1;
21892 insn = emit_insn (tmp);
21893 RTX_FRAME_RELATED_P (insn) = 1;
21894 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21895 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
21896 RTX_FRAME_RELATED_P (tmp) = 1;
21897 i++;
21898 regno++;
21899 XVECEXP (dwarf, 0, i) = tmp;
21900 first = false;
21901 }
21902
21903 while (i < num_regs)
21904 if (saved_regs_mask & (1 << regno))
21905 {
21906 rtx reg1, reg2, mem1, mem2;
21907 rtx tmp0, tmp1, tmp2;
21908 int regno2;
21909
21910 /* Find the register to pair with this one. */
21911 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
21912 regno2++)
21913 ;
21914
21915 reg1 = gen_rtx_REG (SImode, regno);
21916 reg2 = gen_rtx_REG (SImode, regno2);
21917
21918 if (first)
21919 {
21920 rtx insn;
21921
21922 first = false;
21923 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21924 stack_pointer_rtx,
21925 -4 * num_regs));
21926 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21927 stack_pointer_rtx,
21928 -4 * (num_regs - 1)));
21929 tmp0 = gen_rtx_SET (stack_pointer_rtx,
21930 plus_constant (Pmode, stack_pointer_rtx,
21931 -4 * (num_regs)));
21932 tmp1 = gen_rtx_SET (mem1, reg1);
21933 tmp2 = gen_rtx_SET (mem2, reg2);
21934 RTX_FRAME_RELATED_P (tmp0) = 1;
21935 RTX_FRAME_RELATED_P (tmp1) = 1;
21936 RTX_FRAME_RELATED_P (tmp2) = 1;
21937 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
21938 XVECEXP (par, 0, 0) = tmp0;
21939 XVECEXP (par, 0, 1) = tmp1;
21940 XVECEXP (par, 0, 2) = tmp2;
21941 insn = emit_insn (par);
21942 RTX_FRAME_RELATED_P (insn) = 1;
21943 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21944 }
21945 else
21946 {
21947 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21948 stack_pointer_rtx,
21949 4 * i));
21950 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21951 stack_pointer_rtx,
21952 4 * (i + 1)));
21953 tmp1 = gen_rtx_SET (mem1, reg1);
21954 tmp2 = gen_rtx_SET (mem2, reg2);
21955 RTX_FRAME_RELATED_P (tmp1) = 1;
21956 RTX_FRAME_RELATED_P (tmp2) = 1;
21957 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21958 XVECEXP (par, 0, 0) = tmp1;
21959 XVECEXP (par, 0, 1) = tmp2;
21960 emit_insn (par);
21961 }
21962
21963 /* Create unwind information. This is an approximation. */
21964 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
21965 plus_constant (Pmode,
21966 stack_pointer_rtx,
21967 4 * i)),
21968 reg1);
21969 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
21970 plus_constant (Pmode,
21971 stack_pointer_rtx,
21972 4 * (i + 1))),
21973 reg2);
21974
21975 RTX_FRAME_RELATED_P (tmp1) = 1;
21976 RTX_FRAME_RELATED_P (tmp2) = 1;
21977 XVECEXP (dwarf, 0, i + 1) = tmp1;
21978 XVECEXP (dwarf, 0, i + 2) = tmp2;
21979 i += 2;
21980 regno = regno2 + 1;
21981 }
21982 else
21983 regno++;
21984
21985 return;
21986 }
21987
21988 /* STRD in ARM mode requires consecutive registers. This function emits STRD
21989 whenever possible, otherwise it emits single-word stores. The first store
21990 also allocates stack space for all saved registers, using writeback with
21991 post-addressing mode. All other stores use offset addressing. If no STRD
21992 can be emitted, this function emits a sequence of single-word stores,
21993 and not an STM as before, because single-word stores provide more freedom
21994 scheduling and can be turned into an STM by peephole optimizations. */
21995 static void
21996 arm_emit_strd_push (unsigned long saved_regs_mask)
21997 {
21998 int num_regs = 0;
21999 int i, j, dwarf_index = 0;
22000 int offset = 0;
22001 rtx dwarf = NULL_RTX;
22002 rtx insn = NULL_RTX;
22003 rtx tmp, mem;
22004
22005 /* TODO: A more efficient code can be emitted by changing the
22006 layout, e.g., first push all pairs that can use STRD to keep the
22007 stack aligned, and then push all other registers. */
22008 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22009 if (saved_regs_mask & (1 << i))
22010 num_regs++;
22011
22012 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22013 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22014 gcc_assert (num_regs > 0);
22015
22016 /* Create sequence for DWARF info. */
22017 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22018
22019 /* For dwarf info, we generate explicit stack update. */
22020 tmp = gen_rtx_SET (stack_pointer_rtx,
22021 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22022 RTX_FRAME_RELATED_P (tmp) = 1;
22023 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22024
22025 /* Save registers. */
22026 offset = - 4 * num_regs;
22027 j = 0;
22028 while (j <= LAST_ARM_REGNUM)
22029 if (saved_regs_mask & (1 << j))
22030 {
22031 if ((j % 2 == 0)
22032 && (saved_regs_mask & (1 << (j + 1))))
22033 {
22034 /* Current register and previous register form register pair for
22035 which STRD can be generated. */
22036 if (offset < 0)
22037 {
22038 /* Allocate stack space for all saved registers. */
22039 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22040 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22041 mem = gen_frame_mem (DImode, tmp);
22042 offset = 0;
22043 }
22044 else if (offset > 0)
22045 mem = gen_frame_mem (DImode,
22046 plus_constant (Pmode,
22047 stack_pointer_rtx,
22048 offset));
22049 else
22050 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22051
22052 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
22053 RTX_FRAME_RELATED_P (tmp) = 1;
22054 tmp = emit_insn (tmp);
22055
22056 /* Record the first store insn. */
22057 if (dwarf_index == 1)
22058 insn = tmp;
22059
22060 /* Generate dwarf info. */
22061 mem = gen_frame_mem (SImode,
22062 plus_constant (Pmode,
22063 stack_pointer_rtx,
22064 offset));
22065 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22066 RTX_FRAME_RELATED_P (tmp) = 1;
22067 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22068
22069 mem = gen_frame_mem (SImode,
22070 plus_constant (Pmode,
22071 stack_pointer_rtx,
22072 offset + 4));
22073 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
22074 RTX_FRAME_RELATED_P (tmp) = 1;
22075 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22076
22077 offset += 8;
22078 j += 2;
22079 }
22080 else
22081 {
22082 /* Emit a single word store. */
22083 if (offset < 0)
22084 {
22085 /* Allocate stack space for all saved registers. */
22086 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22087 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22088 mem = gen_frame_mem (SImode, tmp);
22089 offset = 0;
22090 }
22091 else if (offset > 0)
22092 mem = gen_frame_mem (SImode,
22093 plus_constant (Pmode,
22094 stack_pointer_rtx,
22095 offset));
22096 else
22097 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22098
22099 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22100 RTX_FRAME_RELATED_P (tmp) = 1;
22101 tmp = emit_insn (tmp);
22102
22103 /* Record the first store insn. */
22104 if (dwarf_index == 1)
22105 insn = tmp;
22106
22107 /* Generate dwarf info. */
22108 mem = gen_frame_mem (SImode,
22109 plus_constant(Pmode,
22110 stack_pointer_rtx,
22111 offset));
22112 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22113 RTX_FRAME_RELATED_P (tmp) = 1;
22114 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22115
22116 offset += 4;
22117 j += 1;
22118 }
22119 }
22120 else
22121 j++;
22122
22123 /* Attach dwarf info to the first insn we generate. */
22124 gcc_assert (insn != NULL_RTX);
22125 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22126 RTX_FRAME_RELATED_P (insn) = 1;
22127 }
22128
22129 /* Generate and emit an insn that we will recognize as a push_multi.
22130 Unfortunately, since this insn does not reflect very well the actual
22131 semantics of the operation, we need to annotate the insn for the benefit
22132 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
22133 MASK for registers that should be annotated for DWARF2 frame unwind
22134 information. */
22135 static rtx
22136 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
22137 {
22138 int num_regs = 0;
22139 int num_dwarf_regs = 0;
22140 int i, j;
22141 rtx par;
22142 rtx dwarf;
22143 int dwarf_par_index;
22144 rtx tmp, reg;
22145
22146 /* We don't record the PC in the dwarf frame information. */
22147 dwarf_regs_mask &= ~(1 << PC_REGNUM);
22148
22149 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22150 {
22151 if (mask & (1 << i))
22152 num_regs++;
22153 if (dwarf_regs_mask & (1 << i))
22154 num_dwarf_regs++;
22155 }
22156
22157 gcc_assert (num_regs && num_regs <= 16);
22158 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
22159
22160 /* For the body of the insn we are going to generate an UNSPEC in
22161 parallel with several USEs. This allows the insn to be recognized
22162 by the push_multi pattern in the arm.md file.
22163
22164 The body of the insn looks something like this:
22165
22166 (parallel [
22167 (set (mem:BLK (pre_modify:SI (reg:SI sp)
22168 (const_int:SI <num>)))
22169 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22170 (use (reg:SI XX))
22171 (use (reg:SI YY))
22172 ...
22173 ])
22174
22175 For the frame note however, we try to be more explicit and actually
22176 show each register being stored into the stack frame, plus a (single)
22177 decrement of the stack pointer. We do it this way in order to be
22178 friendly to the stack unwinding code, which only wants to see a single
22179 stack decrement per instruction. The RTL we generate for the note looks
22180 something like this:
22181
22182 (sequence [
22183 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22184 (set (mem:SI (reg:SI sp)) (reg:SI r4))
22185 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22186 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22187 ...
22188 ])
22189
22190 FIXME:: In an ideal world the PRE_MODIFY would not exist and
22191 instead we'd have a parallel expression detailing all
22192 the stores to the various memory addresses so that debug
22193 information is more up-to-date. Remember however while writing
22194 this to take care of the constraints with the push instruction.
22195
22196 Note also that this has to be taken care of for the VFP registers.
22197
22198 For more see PR43399. */
22199
22200 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
22201 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
22202 dwarf_par_index = 1;
22203
22204 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22205 {
22206 if (mask & (1 << i))
22207 {
22208 reg = gen_rtx_REG (SImode, i);
22209
22210 XVECEXP (par, 0, 0)
22211 = gen_rtx_SET (gen_frame_mem
22212 (BLKmode,
22213 gen_rtx_PRE_MODIFY (Pmode,
22214 stack_pointer_rtx,
22215 plus_constant
22216 (Pmode, stack_pointer_rtx,
22217 -4 * num_regs))
22218 ),
22219 gen_rtx_UNSPEC (BLKmode,
22220 gen_rtvec (1, reg),
22221 UNSPEC_PUSH_MULT));
22222
22223 if (dwarf_regs_mask & (1 << i))
22224 {
22225 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22226 reg);
22227 RTX_FRAME_RELATED_P (tmp) = 1;
22228 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22229 }
22230
22231 break;
22232 }
22233 }
22234
22235 for (j = 1, i++; j < num_regs; i++)
22236 {
22237 if (mask & (1 << i))
22238 {
22239 reg = gen_rtx_REG (SImode, i);
22240
22241 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22242
22243 if (dwarf_regs_mask & (1 << i))
22244 {
22245 tmp
22246 = gen_rtx_SET (gen_frame_mem
22247 (SImode,
22248 plus_constant (Pmode, stack_pointer_rtx,
22249 4 * j)),
22250 reg);
22251 RTX_FRAME_RELATED_P (tmp) = 1;
22252 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22253 }
22254
22255 j++;
22256 }
22257 }
22258
22259 par = emit_insn (par);
22260
22261 tmp = gen_rtx_SET (stack_pointer_rtx,
22262 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22263 RTX_FRAME_RELATED_P (tmp) = 1;
22264 XVECEXP (dwarf, 0, 0) = tmp;
22265
22266 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22267
22268 return par;
22269 }
22270
22271 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22272 SIZE is the offset to be adjusted.
22273 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22274 static void
22275 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22276 {
22277 rtx dwarf;
22278
22279 RTX_FRAME_RELATED_P (insn) = 1;
22280 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22281 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22282 }
22283
22284 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22285 SAVED_REGS_MASK shows which registers need to be restored.
22286
22287 Unfortunately, since this insn does not reflect very well the actual
22288 semantics of the operation, we need to annotate the insn for the benefit
22289 of DWARF2 frame unwind information. */
22290 static void
22291 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22292 {
22293 int num_regs = 0;
22294 int i, j;
22295 rtx par;
22296 rtx dwarf = NULL_RTX;
22297 rtx tmp, reg;
22298 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22299 int offset_adj;
22300 int emit_update;
22301
22302 offset_adj = return_in_pc ? 1 : 0;
22303 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22304 if (saved_regs_mask & (1 << i))
22305 num_regs++;
22306
22307 gcc_assert (num_regs && num_regs <= 16);
22308
22309 /* If SP is in reglist, then we don't emit SP update insn. */
22310 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22311
22312 /* The parallel needs to hold num_regs SETs
22313 and one SET for the stack update. */
22314 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22315
22316 if (return_in_pc)
22317 XVECEXP (par, 0, 0) = ret_rtx;
22318
22319 if (emit_update)
22320 {
22321 /* Increment the stack pointer, based on there being
22322 num_regs 4-byte registers to restore. */
22323 tmp = gen_rtx_SET (stack_pointer_rtx,
22324 plus_constant (Pmode,
22325 stack_pointer_rtx,
22326 4 * num_regs));
22327 RTX_FRAME_RELATED_P (tmp) = 1;
22328 XVECEXP (par, 0, offset_adj) = tmp;
22329 }
22330
22331 /* Now restore every reg, which may include PC. */
22332 for (j = 0, i = 0; j < num_regs; i++)
22333 if (saved_regs_mask & (1 << i))
22334 {
22335 reg = gen_rtx_REG (SImode, i);
22336 if ((num_regs == 1) && emit_update && !return_in_pc)
22337 {
22338 /* Emit single load with writeback. */
22339 tmp = gen_frame_mem (SImode,
22340 gen_rtx_POST_INC (Pmode,
22341 stack_pointer_rtx));
22342 tmp = emit_insn (gen_rtx_SET (reg, tmp));
22343 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22344 return;
22345 }
22346
22347 tmp = gen_rtx_SET (reg,
22348 gen_frame_mem
22349 (SImode,
22350 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22351 RTX_FRAME_RELATED_P (tmp) = 1;
22352 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22353
22354 /* We need to maintain a sequence for DWARF info too. As dwarf info
22355 should not have PC, skip PC. */
22356 if (i != PC_REGNUM)
22357 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22358
22359 j++;
22360 }
22361
22362 if (return_in_pc)
22363 par = emit_jump_insn (par);
22364 else
22365 par = emit_insn (par);
22366
22367 REG_NOTES (par) = dwarf;
22368 if (!return_in_pc)
22369 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22370 stack_pointer_rtx, stack_pointer_rtx);
22371 }
22372
22373 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22374 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22375
22376 Unfortunately, since this insn does not reflect very well the actual
22377 semantics of the operation, we need to annotate the insn for the benefit
22378 of DWARF2 frame unwind information. */
22379 static void
22380 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22381 {
22382 int i, j;
22383 rtx par;
22384 rtx dwarf = NULL_RTX;
22385 rtx tmp, reg;
22386
22387 gcc_assert (num_regs && num_regs <= 32);
22388
22389 /* Workaround ARM10 VFPr1 bug. */
22390 if (num_regs == 2 && !arm_arch6)
22391 {
22392 if (first_reg == 15)
22393 first_reg--;
22394
22395 num_regs++;
22396 }
22397
22398 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22399 there could be up to 32 D-registers to restore.
22400 If there are more than 16 D-registers, make two recursive calls,
22401 each of which emits one pop_multi instruction. */
22402 if (num_regs > 16)
22403 {
22404 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22405 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22406 return;
22407 }
22408
22409 /* The parallel needs to hold num_regs SETs
22410 and one SET for the stack update. */
22411 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22412
22413 /* Increment the stack pointer, based on there being
22414 num_regs 8-byte registers to restore. */
22415 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22416 RTX_FRAME_RELATED_P (tmp) = 1;
22417 XVECEXP (par, 0, 0) = tmp;
22418
22419 /* Now show every reg that will be restored, using a SET for each. */
22420 for (j = 0, i=first_reg; j < num_regs; i += 2)
22421 {
22422 reg = gen_rtx_REG (DFmode, i);
22423
22424 tmp = gen_rtx_SET (reg,
22425 gen_frame_mem
22426 (DFmode,
22427 plus_constant (Pmode, base_reg, 8 * j)));
22428 RTX_FRAME_RELATED_P (tmp) = 1;
22429 XVECEXP (par, 0, j + 1) = tmp;
22430
22431 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22432
22433 j++;
22434 }
22435
22436 par = emit_insn (par);
22437 REG_NOTES (par) = dwarf;
22438
22439 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22440 if (REGNO (base_reg) == IP_REGNUM)
22441 {
22442 RTX_FRAME_RELATED_P (par) = 1;
22443 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22444 }
22445 else
22446 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22447 base_reg, base_reg);
22448 }
22449
22450 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22451 number of registers are being popped, multiple LDRD patterns are created for
22452 all register pairs. If odd number of registers are popped, last register is
22453 loaded by using LDR pattern. */
22454 static void
22455 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22456 {
22457 int num_regs = 0;
22458 int i, j;
22459 rtx par = NULL_RTX;
22460 rtx dwarf = NULL_RTX;
22461 rtx tmp, reg, tmp1;
22462 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22463
22464 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22465 if (saved_regs_mask & (1 << i))
22466 num_regs++;
22467
22468 gcc_assert (num_regs && num_regs <= 16);
22469
22470 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22471 to be popped. So, if num_regs is even, now it will become odd,
22472 and we can generate pop with PC. If num_regs is odd, it will be
22473 even now, and ldr with return can be generated for PC. */
22474 if (return_in_pc)
22475 num_regs--;
22476
22477 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22478
22479 /* Var j iterates over all the registers to gather all the registers in
22480 saved_regs_mask. Var i gives index of saved registers in stack frame.
22481 A PARALLEL RTX of register-pair is created here, so that pattern for
22482 LDRD can be matched. As PC is always last register to be popped, and
22483 we have already decremented num_regs if PC, we don't have to worry
22484 about PC in this loop. */
22485 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22486 if (saved_regs_mask & (1 << j))
22487 {
22488 /* Create RTX for memory load. */
22489 reg = gen_rtx_REG (SImode, j);
22490 tmp = gen_rtx_SET (reg,
22491 gen_frame_mem (SImode,
22492 plus_constant (Pmode,
22493 stack_pointer_rtx, 4 * i)));
22494 RTX_FRAME_RELATED_P (tmp) = 1;
22495
22496 if (i % 2 == 0)
22497 {
22498 /* When saved-register index (i) is even, the RTX to be emitted is
22499 yet to be created. Hence create it first. The LDRD pattern we
22500 are generating is :
22501 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22502 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22503 where target registers need not be consecutive. */
22504 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22505 dwarf = NULL_RTX;
22506 }
22507
22508 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22509 added as 0th element and if i is odd, reg_i is added as 1st element
22510 of LDRD pattern shown above. */
22511 XVECEXP (par, 0, (i % 2)) = tmp;
22512 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22513
22514 if ((i % 2) == 1)
22515 {
22516 /* When saved-register index (i) is odd, RTXs for both the registers
22517 to be loaded are generated in above given LDRD pattern, and the
22518 pattern can be emitted now. */
22519 par = emit_insn (par);
22520 REG_NOTES (par) = dwarf;
22521 RTX_FRAME_RELATED_P (par) = 1;
22522 }
22523
22524 i++;
22525 }
22526
22527 /* If the number of registers pushed is odd AND return_in_pc is false OR
22528 number of registers are even AND return_in_pc is true, last register is
22529 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22530 then LDR with post increment. */
22531
22532 /* Increment the stack pointer, based on there being
22533 num_regs 4-byte registers to restore. */
22534 tmp = gen_rtx_SET (stack_pointer_rtx,
22535 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22536 RTX_FRAME_RELATED_P (tmp) = 1;
22537 tmp = emit_insn (tmp);
22538 if (!return_in_pc)
22539 {
22540 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22541 stack_pointer_rtx, stack_pointer_rtx);
22542 }
22543
22544 dwarf = NULL_RTX;
22545
22546 if (((num_regs % 2) == 1 && !return_in_pc)
22547 || ((num_regs % 2) == 0 && return_in_pc))
22548 {
22549 /* Scan for the single register to be popped. Skip until the saved
22550 register is found. */
22551 for (; (saved_regs_mask & (1 << j)) == 0; j++);
22552
22553 /* Gen LDR with post increment here. */
22554 tmp1 = gen_rtx_MEM (SImode,
22555 gen_rtx_POST_INC (SImode,
22556 stack_pointer_rtx));
22557 set_mem_alias_set (tmp1, get_frame_alias_set ());
22558
22559 reg = gen_rtx_REG (SImode, j);
22560 tmp = gen_rtx_SET (reg, tmp1);
22561 RTX_FRAME_RELATED_P (tmp) = 1;
22562 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22563
22564 if (return_in_pc)
22565 {
22566 /* If return_in_pc, j must be PC_REGNUM. */
22567 gcc_assert (j == PC_REGNUM);
22568 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22569 XVECEXP (par, 0, 0) = ret_rtx;
22570 XVECEXP (par, 0, 1) = tmp;
22571 par = emit_jump_insn (par);
22572 }
22573 else
22574 {
22575 par = emit_insn (tmp);
22576 REG_NOTES (par) = dwarf;
22577 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22578 stack_pointer_rtx, stack_pointer_rtx);
22579 }
22580
22581 }
22582 else if ((num_regs % 2) == 1 && return_in_pc)
22583 {
22584 /* There are 2 registers to be popped. So, generate the pattern
22585 pop_multiple_with_stack_update_and_return to pop in PC. */
22586 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22587 }
22588
22589 return;
22590 }
22591
22592 /* LDRD in ARM mode needs consecutive registers as operands. This function
22593 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22594 offset addressing and then generates one separate stack udpate. This provides
22595 more scheduling freedom, compared to writeback on every load. However,
22596 if the function returns using load into PC directly
22597 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22598 before the last load. TODO: Add a peephole optimization to recognize
22599 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22600 peephole optimization to merge the load at stack-offset zero
22601 with the stack update instruction using load with writeback
22602 in post-index addressing mode. */
22603 static void
22604 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22605 {
22606 int j = 0;
22607 int offset = 0;
22608 rtx par = NULL_RTX;
22609 rtx dwarf = NULL_RTX;
22610 rtx tmp, mem;
22611
22612 /* Restore saved registers. */
22613 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22614 j = 0;
22615 while (j <= LAST_ARM_REGNUM)
22616 if (saved_regs_mask & (1 << j))
22617 {
22618 if ((j % 2) == 0
22619 && (saved_regs_mask & (1 << (j + 1)))
22620 && (j + 1) != PC_REGNUM)
22621 {
22622 /* Current register and next register form register pair for which
22623 LDRD can be generated. PC is always the last register popped, and
22624 we handle it separately. */
22625 if (offset > 0)
22626 mem = gen_frame_mem (DImode,
22627 plus_constant (Pmode,
22628 stack_pointer_rtx,
22629 offset));
22630 else
22631 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22632
22633 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22634 tmp = emit_insn (tmp);
22635 RTX_FRAME_RELATED_P (tmp) = 1;
22636
22637 /* Generate dwarf info. */
22638
22639 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22640 gen_rtx_REG (SImode, j),
22641 NULL_RTX);
22642 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22643 gen_rtx_REG (SImode, j + 1),
22644 dwarf);
22645
22646 REG_NOTES (tmp) = dwarf;
22647
22648 offset += 8;
22649 j += 2;
22650 }
22651 else if (j != PC_REGNUM)
22652 {
22653 /* Emit a single word load. */
22654 if (offset > 0)
22655 mem = gen_frame_mem (SImode,
22656 plus_constant (Pmode,
22657 stack_pointer_rtx,
22658 offset));
22659 else
22660 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22661
22662 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22663 tmp = emit_insn (tmp);
22664 RTX_FRAME_RELATED_P (tmp) = 1;
22665
22666 /* Generate dwarf info. */
22667 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22668 gen_rtx_REG (SImode, j),
22669 NULL_RTX);
22670
22671 offset += 4;
22672 j += 1;
22673 }
22674 else /* j == PC_REGNUM */
22675 j++;
22676 }
22677 else
22678 j++;
22679
22680 /* Update the stack. */
22681 if (offset > 0)
22682 {
22683 tmp = gen_rtx_SET (stack_pointer_rtx,
22684 plus_constant (Pmode,
22685 stack_pointer_rtx,
22686 offset));
22687 tmp = emit_insn (tmp);
22688 arm_add_cfa_adjust_cfa_note (tmp, offset,
22689 stack_pointer_rtx, stack_pointer_rtx);
22690 offset = 0;
22691 }
22692
22693 if (saved_regs_mask & (1 << PC_REGNUM))
22694 {
22695 /* Only PC is to be popped. */
22696 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22697 XVECEXP (par, 0, 0) = ret_rtx;
22698 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22699 gen_frame_mem (SImode,
22700 gen_rtx_POST_INC (SImode,
22701 stack_pointer_rtx)));
22702 RTX_FRAME_RELATED_P (tmp) = 1;
22703 XVECEXP (par, 0, 1) = tmp;
22704 par = emit_jump_insn (par);
22705
22706 /* Generate dwarf info. */
22707 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22708 gen_rtx_REG (SImode, PC_REGNUM),
22709 NULL_RTX);
22710 REG_NOTES (par) = dwarf;
22711 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22712 stack_pointer_rtx, stack_pointer_rtx);
22713 }
22714 }
22715
22716 /* Calculate the size of the return value that is passed in registers. */
22717 static unsigned
22718 arm_size_return_regs (void)
22719 {
22720 machine_mode mode;
22721
22722 if (crtl->return_rtx != 0)
22723 mode = GET_MODE (crtl->return_rtx);
22724 else
22725 mode = DECL_MODE (DECL_RESULT (current_function_decl));
22726
22727 return GET_MODE_SIZE (mode);
22728 }
22729
22730 /* Return true if the current function needs to save/restore LR. */
22731 static bool
22732 thumb_force_lr_save (void)
22733 {
22734 return !cfun->machine->lr_save_eliminated
22735 && (!crtl->is_leaf
22736 || thumb_far_jump_used_p ()
22737 || df_regs_ever_live_p (LR_REGNUM));
22738 }
22739
22740 /* We do not know if r3 will be available because
22741 we do have an indirect tailcall happening in this
22742 particular case. */
22743 static bool
22744 is_indirect_tailcall_p (rtx call)
22745 {
22746 rtx pat = PATTERN (call);
22747
22748 /* Indirect tail call. */
22749 pat = XVECEXP (pat, 0, 0);
22750 if (GET_CODE (pat) == SET)
22751 pat = SET_SRC (pat);
22752
22753 pat = XEXP (XEXP (pat, 0), 0);
22754 return REG_P (pat);
22755 }
22756
22757 /* Return true if r3 is used by any of the tail call insns in the
22758 current function. */
22759 static bool
22760 any_sibcall_could_use_r3 (void)
22761 {
22762 edge_iterator ei;
22763 edge e;
22764
22765 if (!crtl->tail_call_emit)
22766 return false;
22767 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22768 if (e->flags & EDGE_SIBCALL)
22769 {
22770 rtx_insn *call = BB_END (e->src);
22771 if (!CALL_P (call))
22772 call = prev_nonnote_nondebug_insn (call);
22773 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22774 if (find_regno_fusage (call, USE, 3)
22775 || is_indirect_tailcall_p (call))
22776 return true;
22777 }
22778 return false;
22779 }
22780
22781
22782 /* Compute the distance from register FROM to register TO.
22783 These can be the arg pointer (26), the soft frame pointer (25),
22784 the stack pointer (13) or the hard frame pointer (11).
22785 In thumb mode r7 is used as the soft frame pointer, if needed.
22786 Typical stack layout looks like this:
22787
22788 old stack pointer -> | |
22789 ----
22790 | | \
22791 | | saved arguments for
22792 | | vararg functions
22793 | | /
22794 --
22795 hard FP & arg pointer -> | | \
22796 | | stack
22797 | | frame
22798 | | /
22799 --
22800 | | \
22801 | | call saved
22802 | | registers
22803 soft frame pointer -> | | /
22804 --
22805 | | \
22806 | | local
22807 | | variables
22808 locals base pointer -> | | /
22809 --
22810 | | \
22811 | | outgoing
22812 | | arguments
22813 current stack pointer -> | | /
22814 --
22815
22816 For a given function some or all of these stack components
22817 may not be needed, giving rise to the possibility of
22818 eliminating some of the registers.
22819
22820 The values returned by this function must reflect the behavior
22821 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22822
22823 The sign of the number returned reflects the direction of stack
22824 growth, so the values are positive for all eliminations except
22825 from the soft frame pointer to the hard frame pointer.
22826
22827 SFP may point just inside the local variables block to ensure correct
22828 alignment. */
22829
22830
22831 /* Return cached stack offsets. */
22832
22833 static arm_stack_offsets *
22834 arm_get_frame_offsets (void)
22835 {
22836 struct arm_stack_offsets *offsets;
22837
22838 offsets = &cfun->machine->stack_offsets;
22839
22840 return offsets;
22841 }
22842
22843
22844 /* Calculate stack offsets. These are used to calculate register elimination
22845 offsets and in prologue/epilogue code. Also calculates which registers
22846 should be saved. */
22847
22848 static void
22849 arm_compute_frame_layout (void)
22850 {
22851 struct arm_stack_offsets *offsets;
22852 unsigned long func_type;
22853 int saved;
22854 int core_saved;
22855 HOST_WIDE_INT frame_size;
22856 int i;
22857
22858 offsets = &cfun->machine->stack_offsets;
22859
22860 /* Initially this is the size of the local variables. It will translated
22861 into an offset once we have determined the size of preceding data. */
22862 frame_size = ROUND_UP_WORD (get_frame_size ());
22863
22864 /* Space for variadic functions. */
22865 offsets->saved_args = crtl->args.pretend_args_size;
22866
22867 /* In Thumb mode this is incorrect, but never used. */
22868 offsets->frame
22869 = (offsets->saved_args
22870 + arm_compute_static_chain_stack_bytes ()
22871 + (frame_pointer_needed ? 4 : 0));
22872
22873 if (TARGET_32BIT)
22874 {
22875 unsigned int regno;
22876
22877 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
22878 core_saved = bit_count (offsets->saved_regs_mask) * 4;
22879 saved = core_saved;
22880
22881 /* We know that SP will be doubleword aligned on entry, and we must
22882 preserve that condition at any subroutine call. We also require the
22883 soft frame pointer to be doubleword aligned. */
22884
22885 if (TARGET_REALLY_IWMMXT)
22886 {
22887 /* Check for the call-saved iWMMXt registers. */
22888 for (regno = FIRST_IWMMXT_REGNUM;
22889 regno <= LAST_IWMMXT_REGNUM;
22890 regno++)
22891 if (reg_needs_saving_p (regno))
22892 saved += 8;
22893 }
22894
22895 func_type = arm_current_func_type ();
22896 /* Space for saved VFP registers. */
22897 if (! IS_VOLATILE (func_type)
22898 && TARGET_VFP_BASE)
22899 saved += arm_get_vfp_saved_size ();
22900
22901 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
22902 nonecure entry functions with VSTR/VLDR. */
22903 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
22904 saved += 4;
22905 }
22906 else /* TARGET_THUMB1 */
22907 {
22908 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
22909 core_saved = bit_count (offsets->saved_regs_mask) * 4;
22910 saved = core_saved;
22911 if (TARGET_BACKTRACE)
22912 saved += 16;
22913 }
22914
22915 /* Saved registers include the stack frame. */
22916 offsets->saved_regs
22917 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
22918 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
22919
22920 /* A leaf function does not need any stack alignment if it has nothing
22921 on the stack. */
22922 if (crtl->is_leaf && frame_size == 0
22923 /* However if it calls alloca(), we have a dynamically allocated
22924 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
22925 && ! cfun->calls_alloca)
22926 {
22927 offsets->outgoing_args = offsets->soft_frame;
22928 offsets->locals_base = offsets->soft_frame;
22929 return;
22930 }
22931
22932 /* Ensure SFP has the correct alignment. */
22933 if (ARM_DOUBLEWORD_ALIGN
22934 && (offsets->soft_frame & 7))
22935 {
22936 offsets->soft_frame += 4;
22937 /* Try to align stack by pushing an extra reg. Don't bother doing this
22938 when there is a stack frame as the alignment will be rolled into
22939 the normal stack adjustment. */
22940 if (frame_size + crtl->outgoing_args_size == 0)
22941 {
22942 int reg = -1;
22943
22944 /* Register r3 is caller-saved. Normally it does not need to be
22945 saved on entry by the prologue. However if we choose to save
22946 it for padding then we may confuse the compiler into thinking
22947 a prologue sequence is required when in fact it is not. This
22948 will occur when shrink-wrapping if r3 is used as a scratch
22949 register and there are no other callee-saved writes.
22950
22951 This situation can be avoided when other callee-saved registers
22952 are available and r3 is not mandatory if we choose a callee-saved
22953 register for padding. */
22954 bool prefer_callee_reg_p = false;
22955
22956 /* If it is safe to use r3, then do so. This sometimes
22957 generates better code on Thumb-2 by avoiding the need to
22958 use 32-bit push/pop instructions. */
22959 if (! any_sibcall_could_use_r3 ()
22960 && arm_size_return_regs () <= 12
22961 && (offsets->saved_regs_mask & (1 << 3)) == 0
22962 && (TARGET_THUMB2
22963 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
22964 {
22965 reg = 3;
22966 if (!TARGET_THUMB2)
22967 prefer_callee_reg_p = true;
22968 }
22969 if (reg == -1
22970 || prefer_callee_reg_p)
22971 {
22972 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
22973 {
22974 /* Avoid fixed registers; they may be changed at
22975 arbitrary times so it's unsafe to restore them
22976 during the epilogue. */
22977 if (!fixed_regs[i]
22978 && (offsets->saved_regs_mask & (1 << i)) == 0)
22979 {
22980 reg = i;
22981 break;
22982 }
22983 }
22984 }
22985
22986 if (reg != -1)
22987 {
22988 offsets->saved_regs += 4;
22989 offsets->saved_regs_mask |= (1 << reg);
22990 }
22991 }
22992 }
22993
22994 offsets->locals_base = offsets->soft_frame + frame_size;
22995 offsets->outgoing_args = (offsets->locals_base
22996 + crtl->outgoing_args_size);
22997
22998 if (ARM_DOUBLEWORD_ALIGN)
22999 {
23000 /* Ensure SP remains doubleword aligned. */
23001 if (offsets->outgoing_args & 7)
23002 offsets->outgoing_args += 4;
23003 gcc_assert (!(offsets->outgoing_args & 7));
23004 }
23005 }
23006
23007
23008 /* Calculate the relative offsets for the different stack pointers. Positive
23009 offsets are in the direction of stack growth. */
23010
23011 HOST_WIDE_INT
23012 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23013 {
23014 arm_stack_offsets *offsets;
23015
23016 offsets = arm_get_frame_offsets ();
23017
23018 /* OK, now we have enough information to compute the distances.
23019 There must be an entry in these switch tables for each pair
23020 of registers in ELIMINABLE_REGS, even if some of the entries
23021 seem to be redundant or useless. */
23022 switch (from)
23023 {
23024 case ARG_POINTER_REGNUM:
23025 switch (to)
23026 {
23027 case THUMB_HARD_FRAME_POINTER_REGNUM:
23028 return 0;
23029
23030 case FRAME_POINTER_REGNUM:
23031 /* This is the reverse of the soft frame pointer
23032 to hard frame pointer elimination below. */
23033 return offsets->soft_frame - offsets->saved_args;
23034
23035 case ARM_HARD_FRAME_POINTER_REGNUM:
23036 /* This is only non-zero in the case where the static chain register
23037 is stored above the frame. */
23038 return offsets->frame - offsets->saved_args - 4;
23039
23040 case STACK_POINTER_REGNUM:
23041 /* If nothing has been pushed on the stack at all
23042 then this will return -4. This *is* correct! */
23043 return offsets->outgoing_args - (offsets->saved_args + 4);
23044
23045 default:
23046 gcc_unreachable ();
23047 }
23048 gcc_unreachable ();
23049
23050 case FRAME_POINTER_REGNUM:
23051 switch (to)
23052 {
23053 case THUMB_HARD_FRAME_POINTER_REGNUM:
23054 return 0;
23055
23056 case ARM_HARD_FRAME_POINTER_REGNUM:
23057 /* The hard frame pointer points to the top entry in the
23058 stack frame. The soft frame pointer to the bottom entry
23059 in the stack frame. If there is no stack frame at all,
23060 then they are identical. */
23061
23062 return offsets->frame - offsets->soft_frame;
23063
23064 case STACK_POINTER_REGNUM:
23065 return offsets->outgoing_args - offsets->soft_frame;
23066
23067 default:
23068 gcc_unreachable ();
23069 }
23070 gcc_unreachable ();
23071
23072 default:
23073 /* You cannot eliminate from the stack pointer.
23074 In theory you could eliminate from the hard frame
23075 pointer to the stack pointer, but this will never
23076 happen, since if a stack frame is not needed the
23077 hard frame pointer will never be used. */
23078 gcc_unreachable ();
23079 }
23080 }
23081
23082 /* Given FROM and TO register numbers, say whether this elimination is
23083 allowed. Frame pointer elimination is automatically handled.
23084
23085 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
23086 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
23087 pointer, we must eliminate FRAME_POINTER_REGNUM into
23088 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23089 ARG_POINTER_REGNUM. */
23090
23091 bool
23092 arm_can_eliminate (const int from, const int to)
23093 {
23094 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
23095 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
23096 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
23097 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
23098 true);
23099 }
23100
23101 /* Emit RTL to save coprocessor registers on function entry. Returns the
23102 number of bytes pushed. */
23103
23104 static int
23105 arm_save_coproc_regs(void)
23106 {
23107 int saved_size = 0;
23108 unsigned reg;
23109 unsigned start_reg;
23110 rtx insn;
23111
23112 if (TARGET_REALLY_IWMMXT)
23113 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
23114 if (reg_needs_saving_p (reg))
23115 {
23116 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23117 insn = gen_rtx_MEM (V2SImode, insn);
23118 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
23119 RTX_FRAME_RELATED_P (insn) = 1;
23120 saved_size += 8;
23121 }
23122
23123 if (TARGET_VFP_BASE)
23124 {
23125 start_reg = FIRST_VFP_REGNUM;
23126
23127 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
23128 {
23129 if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
23130 {
23131 if (start_reg != reg)
23132 saved_size += vfp_emit_fstmd (start_reg,
23133 (reg - start_reg) / 2);
23134 start_reg = reg + 2;
23135 }
23136 }
23137 if (start_reg != reg)
23138 saved_size += vfp_emit_fstmd (start_reg,
23139 (reg - start_reg) / 2);
23140 }
23141 return saved_size;
23142 }
23143
23144
23145 /* Set the Thumb frame pointer from the stack pointer. */
23146
23147 static void
23148 thumb_set_frame_pointer (arm_stack_offsets *offsets)
23149 {
23150 HOST_WIDE_INT amount;
23151 rtx insn, dwarf;
23152
23153 amount = offsets->outgoing_args - offsets->locals_base;
23154 if (amount < 1024)
23155 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23156 stack_pointer_rtx, GEN_INT (amount)));
23157 else
23158 {
23159 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
23160 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
23161 expects the first two operands to be the same. */
23162 if (TARGET_THUMB2)
23163 {
23164 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23165 stack_pointer_rtx,
23166 hard_frame_pointer_rtx));
23167 }
23168 else
23169 {
23170 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23171 hard_frame_pointer_rtx,
23172 stack_pointer_rtx));
23173 }
23174 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
23175 plus_constant (Pmode, stack_pointer_rtx, amount));
23176 RTX_FRAME_RELATED_P (dwarf) = 1;
23177 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23178 }
23179
23180 RTX_FRAME_RELATED_P (insn) = 1;
23181 }
23182
23183 struct scratch_reg {
23184 rtx reg;
23185 bool saved;
23186 };
23187
23188 /* Return a short-lived scratch register for use as a 2nd scratch register on
23189 function entry after the registers are saved in the prologue. This register
23190 must be released by means of release_scratch_register_on_entry. IP is not
23191 considered since it is always used as the 1st scratch register if available.
23192
23193 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23194 mask of live registers. */
23195
23196 static void
23197 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
23198 unsigned long live_regs)
23199 {
23200 int regno = -1;
23201
23202 sr->saved = false;
23203
23204 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
23205 regno = LR_REGNUM;
23206 else
23207 {
23208 unsigned int i;
23209
23210 for (i = 4; i < 11; i++)
23211 if (regno1 != i && (live_regs & (1 << i)) != 0)
23212 {
23213 regno = i;
23214 break;
23215 }
23216
23217 if (regno < 0)
23218 {
23219 /* If IP is used as the 1st scratch register for a nested function,
23220 then either r3 wasn't available or is used to preserve IP. */
23221 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
23222 regno1 = 3;
23223 regno = (regno1 == 3 ? 2 : 3);
23224 sr->saved
23225 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23226 regno);
23227 }
23228 }
23229
23230 sr->reg = gen_rtx_REG (SImode, regno);
23231 if (sr->saved)
23232 {
23233 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23234 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23235 rtx x = gen_rtx_SET (stack_pointer_rtx,
23236 plus_constant (Pmode, stack_pointer_rtx, -4));
23237 RTX_FRAME_RELATED_P (insn) = 1;
23238 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23239 }
23240 }
23241
23242 /* Release a scratch register obtained from the preceding function. */
23243
23244 static void
23245 release_scratch_register_on_entry (struct scratch_reg *sr)
23246 {
23247 if (sr->saved)
23248 {
23249 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23250 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23251 rtx x = gen_rtx_SET (stack_pointer_rtx,
23252 plus_constant (Pmode, stack_pointer_rtx, 4));
23253 RTX_FRAME_RELATED_P (insn) = 1;
23254 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23255 }
23256 }
23257
23258 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23259
23260 #if PROBE_INTERVAL > 4096
23261 #error Cannot use indexed addressing mode for stack probing
23262 #endif
23263
23264 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23265 inclusive. These are offsets from the current stack pointer. REGNO1
23266 is the index number of the 1st scratch register and LIVE_REGS is the
23267 mask of live registers. */
23268
23269 static void
23270 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23271 unsigned int regno1, unsigned long live_regs)
23272 {
23273 rtx reg1 = gen_rtx_REG (Pmode, regno1);
23274
23275 /* See if we have a constant small number of probes to generate. If so,
23276 that's the easy case. */
23277 if (size <= PROBE_INTERVAL)
23278 {
23279 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23280 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23281 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23282 }
23283
23284 /* The run-time loop is made up of 10 insns in the generic case while the
23285 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23286 else if (size <= 5 * PROBE_INTERVAL)
23287 {
23288 HOST_WIDE_INT i, rem;
23289
23290 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23291 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23292 emit_stack_probe (reg1);
23293
23294 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23295 it exceeds SIZE. If only two probes are needed, this will not
23296 generate any code. Then probe at FIRST + SIZE. */
23297 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23298 {
23299 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23300 emit_stack_probe (reg1);
23301 }
23302
23303 rem = size - (i - PROBE_INTERVAL);
23304 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23305 {
23306 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23307 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23308 }
23309 else
23310 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23311 }
23312
23313 /* Otherwise, do the same as above, but in a loop. Note that we must be
23314 extra careful with variables wrapping around because we might be at
23315 the very top (or the very bottom) of the address space and we have
23316 to be able to handle this case properly; in particular, we use an
23317 equality test for the loop condition. */
23318 else
23319 {
23320 HOST_WIDE_INT rounded_size;
23321 struct scratch_reg sr;
23322
23323 get_scratch_register_on_entry (&sr, regno1, live_regs);
23324
23325 emit_move_insn (reg1, GEN_INT (first));
23326
23327
23328 /* Step 1: round SIZE to the previous multiple of the interval. */
23329
23330 rounded_size = size & -PROBE_INTERVAL;
23331 emit_move_insn (sr.reg, GEN_INT (rounded_size));
23332
23333
23334 /* Step 2: compute initial and final value of the loop counter. */
23335
23336 /* TEST_ADDR = SP + FIRST. */
23337 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23338
23339 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23340 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23341
23342
23343 /* Step 3: the loop
23344
23345 do
23346 {
23347 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23348 probe at TEST_ADDR
23349 }
23350 while (TEST_ADDR != LAST_ADDR)
23351
23352 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23353 until it is equal to ROUNDED_SIZE. */
23354
23355 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23356
23357
23358 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23359 that SIZE is equal to ROUNDED_SIZE. */
23360
23361 if (size != rounded_size)
23362 {
23363 HOST_WIDE_INT rem = size - rounded_size;
23364
23365 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23366 {
23367 emit_set_insn (sr.reg,
23368 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23369 emit_stack_probe (plus_constant (Pmode, sr.reg,
23370 PROBE_INTERVAL - rem));
23371 }
23372 else
23373 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23374 }
23375
23376 release_scratch_register_on_entry (&sr);
23377 }
23378
23379 /* Make sure nothing is scheduled before we are done. */
23380 emit_insn (gen_blockage ());
23381 }
23382
23383 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23384 absolute addresses. */
23385
23386 const char *
23387 output_probe_stack_range (rtx reg1, rtx reg2)
23388 {
23389 static int labelno = 0;
23390 char loop_lab[32];
23391 rtx xops[2];
23392
23393 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23394
23395 /* Loop. */
23396 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23397
23398 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23399 xops[0] = reg1;
23400 xops[1] = GEN_INT (PROBE_INTERVAL);
23401 output_asm_insn ("sub\t%0, %0, %1", xops);
23402
23403 /* Probe at TEST_ADDR. */
23404 output_asm_insn ("str\tr0, [%0, #0]", xops);
23405
23406 /* Test if TEST_ADDR == LAST_ADDR. */
23407 xops[1] = reg2;
23408 output_asm_insn ("cmp\t%0, %1", xops);
23409
23410 /* Branch. */
23411 fputs ("\tbne\t", asm_out_file);
23412 assemble_name_raw (asm_out_file, loop_lab);
23413 fputc ('\n', asm_out_file);
23414
23415 return "";
23416 }
23417
23418 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23419 function. */
23420 void
23421 arm_expand_prologue (void)
23422 {
23423 rtx amount;
23424 rtx insn;
23425 rtx ip_rtx;
23426 unsigned long live_regs_mask;
23427 unsigned long func_type;
23428 int fp_offset = 0;
23429 int saved_pretend_args = 0;
23430 int saved_regs = 0;
23431 unsigned HOST_WIDE_INT args_to_push;
23432 HOST_WIDE_INT size;
23433 arm_stack_offsets *offsets;
23434 bool clobber_ip;
23435
23436 func_type = arm_current_func_type ();
23437
23438 /* Naked functions don't have prologues. */
23439 if (IS_NAKED (func_type))
23440 {
23441 if (flag_stack_usage_info)
23442 current_function_static_stack_size = 0;
23443 return;
23444 }
23445
23446 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23447 args_to_push = crtl->args.pretend_args_size;
23448
23449 /* Compute which register we will have to save onto the stack. */
23450 offsets = arm_get_frame_offsets ();
23451 live_regs_mask = offsets->saved_regs_mask;
23452
23453 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23454
23455 if (IS_STACKALIGN (func_type))
23456 {
23457 rtx r0, r1;
23458
23459 /* Handle a word-aligned stack pointer. We generate the following:
23460
23461 mov r0, sp
23462 bic r1, r0, #7
23463 mov sp, r1
23464 <save and restore r0 in normal prologue/epilogue>
23465 mov sp, r0
23466 bx lr
23467
23468 The unwinder doesn't need to know about the stack realignment.
23469 Just tell it we saved SP in r0. */
23470 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23471
23472 r0 = gen_rtx_REG (SImode, R0_REGNUM);
23473 r1 = gen_rtx_REG (SImode, R1_REGNUM);
23474
23475 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23476 RTX_FRAME_RELATED_P (insn) = 1;
23477 add_reg_note (insn, REG_CFA_REGISTER, NULL);
23478
23479 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23480
23481 /* ??? The CFA changes here, which may cause GDB to conclude that it
23482 has entered a different function. That said, the unwind info is
23483 correct, individually, before and after this instruction because
23484 we've described the save of SP, which will override the default
23485 handling of SP as restoring from the CFA. */
23486 emit_insn (gen_movsi (stack_pointer_rtx, r1));
23487 }
23488
23489 /* Let's compute the static_chain_stack_bytes required and store it. Right
23490 now the value must be -1 as stored by arm_init_machine_status (). */
23491 cfun->machine->static_chain_stack_bytes
23492 = arm_compute_static_chain_stack_bytes ();
23493
23494 /* The static chain register is the same as the IP register. If it is
23495 clobbered when creating the frame, we need to save and restore it. */
23496 clobber_ip = IS_NESTED (func_type)
23497 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23498 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23499 || flag_stack_clash_protection)
23500 && !df_regs_ever_live_p (LR_REGNUM)
23501 && arm_r3_live_at_start_p ()));
23502
23503 /* Find somewhere to store IP whilst the frame is being created.
23504 We try the following places in order:
23505
23506 1. The last argument register r3 if it is available.
23507 2. A slot on the stack above the frame if there are no
23508 arguments to push onto the stack.
23509 3. Register r3 again, after pushing the argument registers
23510 onto the stack, if this is a varargs function.
23511 4. The last slot on the stack created for the arguments to
23512 push, if this isn't a varargs function.
23513
23514 Note - we only need to tell the dwarf2 backend about the SP
23515 adjustment in the second variant; the static chain register
23516 doesn't need to be unwound, as it doesn't contain a value
23517 inherited from the caller. */
23518 if (clobber_ip)
23519 {
23520 if (!arm_r3_live_at_start_p ())
23521 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23522 else if (args_to_push == 0)
23523 {
23524 rtx addr, dwarf;
23525
23526 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
23527 saved_regs += 4;
23528
23529 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23530 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23531 fp_offset = 4;
23532
23533 /* Just tell the dwarf backend that we adjusted SP. */
23534 dwarf = gen_rtx_SET (stack_pointer_rtx,
23535 plus_constant (Pmode, stack_pointer_rtx,
23536 -fp_offset));
23537 RTX_FRAME_RELATED_P (insn) = 1;
23538 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23539 }
23540 else
23541 {
23542 /* Store the args on the stack. */
23543 if (cfun->machine->uses_anonymous_args)
23544 {
23545 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23546 (0xf0 >> (args_to_push / 4)) & 0xf);
23547 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23548 saved_pretend_args = 1;
23549 }
23550 else
23551 {
23552 rtx addr, dwarf;
23553
23554 if (args_to_push == 4)
23555 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23556 else
23557 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23558 plus_constant (Pmode,
23559 stack_pointer_rtx,
23560 -args_to_push));
23561
23562 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23563
23564 /* Just tell the dwarf backend that we adjusted SP. */
23565 dwarf = gen_rtx_SET (stack_pointer_rtx,
23566 plus_constant (Pmode, stack_pointer_rtx,
23567 -args_to_push));
23568 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23569 }
23570
23571 RTX_FRAME_RELATED_P (insn) = 1;
23572 fp_offset = args_to_push;
23573 args_to_push = 0;
23574 }
23575 }
23576
23577 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23578 {
23579 if (IS_INTERRUPT (func_type))
23580 {
23581 /* Interrupt functions must not corrupt any registers.
23582 Creating a frame pointer however, corrupts the IP
23583 register, so we must push it first. */
23584 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23585
23586 /* Do not set RTX_FRAME_RELATED_P on this insn.
23587 The dwarf stack unwinding code only wants to see one
23588 stack decrement per function, and this is not it. If
23589 this instruction is labeled as being part of the frame
23590 creation sequence then dwarf2out_frame_debug_expr will
23591 die when it encounters the assignment of IP to FP
23592 later on, since the use of SP here establishes SP as
23593 the CFA register and not IP.
23594
23595 Anyway this instruction is not really part of the stack
23596 frame creation although it is part of the prologue. */
23597 }
23598
23599 insn = emit_set_insn (ip_rtx,
23600 plus_constant (Pmode, stack_pointer_rtx,
23601 fp_offset));
23602 RTX_FRAME_RELATED_P (insn) = 1;
23603 }
23604
23605 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23606 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23607 {
23608 saved_regs += 4;
23609 insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23610 GEN_INT (FPCXTNS_ENUM)));
23611 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23612 plus_constant (Pmode, stack_pointer_rtx, -4));
23613 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23614 RTX_FRAME_RELATED_P (insn) = 1;
23615 }
23616
23617 if (args_to_push)
23618 {
23619 /* Push the argument registers, or reserve space for them. */
23620 if (cfun->machine->uses_anonymous_args)
23621 insn = emit_multi_reg_push
23622 ((0xf0 >> (args_to_push / 4)) & 0xf,
23623 (0xf0 >> (args_to_push / 4)) & 0xf);
23624 else
23625 insn = emit_insn
23626 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23627 GEN_INT (- args_to_push)));
23628 RTX_FRAME_RELATED_P (insn) = 1;
23629 }
23630
23631 /* If this is an interrupt service routine, and the link register
23632 is going to be pushed, and we're not generating extra
23633 push of IP (needed when frame is needed and frame layout if apcs),
23634 subtracting four from LR now will mean that the function return
23635 can be done with a single instruction. */
23636 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23637 && (live_regs_mask & (1 << LR_REGNUM)) != 0
23638 && !(frame_pointer_needed && TARGET_APCS_FRAME)
23639 && TARGET_ARM)
23640 {
23641 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23642
23643 emit_set_insn (lr, plus_constant (SImode, lr, -4));
23644 }
23645
23646 if (live_regs_mask)
23647 {
23648 unsigned long dwarf_regs_mask = live_regs_mask;
23649
23650 saved_regs += bit_count (live_regs_mask) * 4;
23651 if (optimize_size && !frame_pointer_needed
23652 && saved_regs == offsets->saved_regs - offsets->saved_args)
23653 {
23654 /* If no coprocessor registers are being pushed and we don't have
23655 to worry about a frame pointer then push extra registers to
23656 create the stack frame. This is done in a way that does not
23657 alter the frame layout, so is independent of the epilogue. */
23658 int n;
23659 int frame;
23660 n = 0;
23661 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23662 n++;
23663 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23664 if (frame && n * 4 >= frame)
23665 {
23666 n = frame / 4;
23667 live_regs_mask |= (1 << n) - 1;
23668 saved_regs += frame;
23669 }
23670 }
23671
23672 if (TARGET_LDRD
23673 && current_tune->prefer_ldrd_strd
23674 && !optimize_function_for_size_p (cfun))
23675 {
23676 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23677 if (TARGET_THUMB2)
23678 thumb2_emit_strd_push (live_regs_mask);
23679 else if (TARGET_ARM
23680 && !TARGET_APCS_FRAME
23681 && !IS_INTERRUPT (func_type))
23682 arm_emit_strd_push (live_regs_mask);
23683 else
23684 {
23685 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23686 RTX_FRAME_RELATED_P (insn) = 1;
23687 }
23688 }
23689 else
23690 {
23691 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23692 RTX_FRAME_RELATED_P (insn) = 1;
23693 }
23694 }
23695
23696 if (! IS_VOLATILE (func_type))
23697 saved_regs += arm_save_coproc_regs ();
23698
23699 if (frame_pointer_needed && TARGET_ARM)
23700 {
23701 /* Create the new frame pointer. */
23702 if (TARGET_APCS_FRAME)
23703 {
23704 insn = GEN_INT (-(4 + args_to_push + fp_offset));
23705 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23706 RTX_FRAME_RELATED_P (insn) = 1;
23707 }
23708 else
23709 {
23710 insn = GEN_INT (saved_regs - (4 + fp_offset));
23711 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23712 stack_pointer_rtx, insn));
23713 RTX_FRAME_RELATED_P (insn) = 1;
23714 }
23715 }
23716
23717 size = offsets->outgoing_args - offsets->saved_args;
23718 if (flag_stack_usage_info)
23719 current_function_static_stack_size = size;
23720
23721 /* If this isn't an interrupt service routine and we have a frame, then do
23722 stack checking. We use IP as the first scratch register, except for the
23723 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
23724 if (!IS_INTERRUPT (func_type)
23725 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23726 || flag_stack_clash_protection))
23727 {
23728 unsigned int regno;
23729
23730 if (!IS_NESTED (func_type) || clobber_ip)
23731 regno = IP_REGNUM;
23732 else if (df_regs_ever_live_p (LR_REGNUM))
23733 regno = LR_REGNUM;
23734 else
23735 regno = 3;
23736
23737 if (crtl->is_leaf && !cfun->calls_alloca)
23738 {
23739 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23740 arm_emit_probe_stack_range (get_stack_check_protect (),
23741 size - get_stack_check_protect (),
23742 regno, live_regs_mask);
23743 }
23744 else if (size > 0)
23745 arm_emit_probe_stack_range (get_stack_check_protect (), size,
23746 regno, live_regs_mask);
23747 }
23748
23749 /* Recover the static chain register. */
23750 if (clobber_ip)
23751 {
23752 if (!arm_r3_live_at_start_p () || saved_pretend_args)
23753 insn = gen_rtx_REG (SImode, 3);
23754 else
23755 {
23756 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23757 insn = gen_frame_mem (SImode, insn);
23758 }
23759 emit_set_insn (ip_rtx, insn);
23760 emit_insn (gen_force_register_use (ip_rtx));
23761 }
23762
23763 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23764 {
23765 /* This add can produce multiple insns for a large constant, so we
23766 need to get tricky. */
23767 rtx_insn *last = get_last_insn ();
23768
23769 amount = GEN_INT (offsets->saved_args + saved_regs
23770 - offsets->outgoing_args);
23771
23772 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23773 amount));
23774 do
23775 {
23776 last = last ? NEXT_INSN (last) : get_insns ();
23777 RTX_FRAME_RELATED_P (last) = 1;
23778 }
23779 while (last != insn);
23780
23781 /* If the frame pointer is needed, emit a special barrier that
23782 will prevent the scheduler from moving stores to the frame
23783 before the stack adjustment. */
23784 if (frame_pointer_needed)
23785 emit_insn (gen_stack_tie (stack_pointer_rtx,
23786 hard_frame_pointer_rtx));
23787 }
23788
23789
23790 if (frame_pointer_needed && TARGET_THUMB2)
23791 thumb_set_frame_pointer (offsets);
23792
23793 if (flag_pic && arm_pic_register != INVALID_REGNUM)
23794 {
23795 unsigned long mask;
23796
23797 mask = live_regs_mask;
23798 mask &= THUMB2_WORK_REGS;
23799 if (!IS_NESTED (func_type))
23800 mask |= (1 << IP_REGNUM);
23801 arm_load_pic_register (mask, NULL_RTX);
23802 }
23803
23804 /* If we are profiling, make sure no instructions are scheduled before
23805 the call to mcount. Similarly if the user has requested no
23806 scheduling in the prolog. Similarly if we want non-call exceptions
23807 using the EABI unwinder, to prevent faulting instructions from being
23808 swapped with a stack adjustment. */
23809 if (crtl->profile || !TARGET_SCHED_PROLOG
23810 || (arm_except_unwind_info (&global_options) == UI_TARGET
23811 && cfun->can_throw_non_call_exceptions))
23812 emit_insn (gen_blockage ());
23813
23814 /* If the link register is being kept alive, with the return address in it,
23815 then make sure that it does not get reused by the ce2 pass. */
23816 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23817 cfun->machine->lr_save_eliminated = 1;
23818 }
23819 \f
23820 /* Print condition code to STREAM. Helper function for arm_print_operand. */
23821 static void
23822 arm_print_condition (FILE *stream)
23823 {
23824 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
23825 {
23826 /* Branch conversion is not implemented for Thumb-2. */
23827 if (TARGET_THUMB)
23828 {
23829 output_operand_lossage ("predicated Thumb instruction");
23830 return;
23831 }
23832 if (current_insn_predicate != NULL)
23833 {
23834 output_operand_lossage
23835 ("predicated instruction in conditional sequence");
23836 return;
23837 }
23838
23839 fputs (arm_condition_codes[arm_current_cc], stream);
23840 }
23841 else if (current_insn_predicate)
23842 {
23843 enum arm_cond_code code;
23844
23845 if (TARGET_THUMB1)
23846 {
23847 output_operand_lossage ("predicated Thumb instruction");
23848 return;
23849 }
23850
23851 code = get_arm_condition_code (current_insn_predicate);
23852 fputs (arm_condition_codes[code], stream);
23853 }
23854 }
23855
23856
23857 /* Globally reserved letters: acln
23858 Puncutation letters currently used: @_|?().!#
23859 Lower case letters currently used: bcdefhimpqtvwxyz
23860 Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
23861 Letters previously used, but now deprecated/obsolete: sWXYZ.
23862
23863 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23864
23865 If CODE is 'd', then the X is a condition operand and the instruction
23866 should only be executed if the condition is true.
23867 if CODE is 'D', then the X is a condition operand and the instruction
23868 should only be executed if the condition is false: however, if the mode
23869 of the comparison is CCFPEmode, then always execute the instruction -- we
23870 do this because in these circumstances !GE does not necessarily imply LT;
23871 in these cases the instruction pattern will take care to make sure that
23872 an instruction containing %d will follow, thereby undoing the effects of
23873 doing this instruction unconditionally.
23874 If CODE is 'N' then X is a floating point operand that must be negated
23875 before output.
23876 If CODE is 'B' then output a bitwise inverted value of X (a const int).
23877 If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
23878 If CODE is 'V', then the operand must be a CONST_INT representing
23879 the bits to preserve in the modified register (Rd) of a BFI or BFC
23880 instruction: print out both the width and lsb (shift) fields. */
23881 static void
23882 arm_print_operand (FILE *stream, rtx x, int code)
23883 {
23884 switch (code)
23885 {
23886 case '@':
23887 fputs (ASM_COMMENT_START, stream);
23888 return;
23889
23890 case '_':
23891 fputs (user_label_prefix, stream);
23892 return;
23893
23894 case '|':
23895 fputs (REGISTER_PREFIX, stream);
23896 return;
23897
23898 case '?':
23899 arm_print_condition (stream);
23900 return;
23901
23902 case '.':
23903 /* The current condition code for a condition code setting instruction.
23904 Preceded by 's' in unified syntax, otherwise followed by 's'. */
23905 fputc('s', stream);
23906 arm_print_condition (stream);
23907 return;
23908
23909 case '!':
23910 /* If the instruction is conditionally executed then print
23911 the current condition code, otherwise print 's'. */
23912 gcc_assert (TARGET_THUMB2);
23913 if (current_insn_predicate)
23914 arm_print_condition (stream);
23915 else
23916 fputc('s', stream);
23917 break;
23918
23919 /* %# is a "break" sequence. It doesn't output anything, but is used to
23920 separate e.g. operand numbers from following text, if that text consists
23921 of further digits which we don't want to be part of the operand
23922 number. */
23923 case '#':
23924 return;
23925
23926 case 'N':
23927 {
23928 REAL_VALUE_TYPE r;
23929 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
23930 fprintf (stream, "%s", fp_const_from_val (&r));
23931 }
23932 return;
23933
23934 /* An integer or symbol address without a preceding # sign. */
23935 case 'c':
23936 switch (GET_CODE (x))
23937 {
23938 case CONST_INT:
23939 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
23940 break;
23941
23942 case SYMBOL_REF:
23943 output_addr_const (stream, x);
23944 break;
23945
23946 case CONST:
23947 if (GET_CODE (XEXP (x, 0)) == PLUS
23948 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
23949 {
23950 output_addr_const (stream, x);
23951 break;
23952 }
23953 /* Fall through. */
23954
23955 default:
23956 output_operand_lossage ("Unsupported operand for code '%c'", code);
23957 }
23958 return;
23959
23960 /* An integer that we want to print in HEX. */
23961 case 'x':
23962 switch (GET_CODE (x))
23963 {
23964 case CONST_INT:
23965 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
23966 break;
23967
23968 default:
23969 output_operand_lossage ("Unsupported operand for code '%c'", code);
23970 }
23971 return;
23972
23973 case 'B':
23974 if (CONST_INT_P (x))
23975 {
23976 HOST_WIDE_INT val;
23977 val = ARM_SIGN_EXTEND (~INTVAL (x));
23978 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
23979 }
23980 else
23981 {
23982 putc ('~', stream);
23983 output_addr_const (stream, x);
23984 }
23985 return;
23986
23987 case 'b':
23988 /* Print the log2 of a CONST_INT. */
23989 {
23990 HOST_WIDE_INT val;
23991
23992 if (!CONST_INT_P (x)
23993 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
23994 output_operand_lossage ("Unsupported operand for code '%c'", code);
23995 else
23996 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
23997 }
23998 return;
23999
24000 case 'L':
24001 /* The low 16 bits of an immediate constant. */
24002 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
24003 return;
24004
24005 case 'i':
24006 fprintf (stream, "%s", arithmetic_instr (x, 1));
24007 return;
24008
24009 case 'I':
24010 fprintf (stream, "%s", arithmetic_instr (x, 0));
24011 return;
24012
24013 case 'S':
24014 {
24015 HOST_WIDE_INT val;
24016 const char *shift;
24017
24018 shift = shift_op (x, &val);
24019
24020 if (shift)
24021 {
24022 fprintf (stream, ", %s ", shift);
24023 if (val == -1)
24024 arm_print_operand (stream, XEXP (x, 1), 0);
24025 else
24026 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24027 }
24028 }
24029 return;
24030
24031 /* An explanation of the 'Q', 'R' and 'H' register operands:
24032
24033 In a pair of registers containing a DI or DF value the 'Q'
24034 operand returns the register number of the register containing
24035 the least significant part of the value. The 'R' operand returns
24036 the register number of the register containing the most
24037 significant part of the value.
24038
24039 The 'H' operand returns the higher of the two register numbers.
24040 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24041 same as the 'Q' operand, since the most significant part of the
24042 value is held in the lower number register. The reverse is true
24043 on systems where WORDS_BIG_ENDIAN is false.
24044
24045 The purpose of these operands is to distinguish between cases
24046 where the endian-ness of the values is important (for example
24047 when they are added together), and cases where the endian-ness
24048 is irrelevant, but the order of register operations is important.
24049 For example when loading a value from memory into a register
24050 pair, the endian-ness does not matter. Provided that the value
24051 from the lower memory address is put into the lower numbered
24052 register, and the value from the higher address is put into the
24053 higher numbered register, the load will work regardless of whether
24054 the value being loaded is big-wordian or little-wordian. The
24055 order of the two register loads can matter however, if the address
24056 of the memory location is actually held in one of the registers
24057 being overwritten by the load.
24058
24059 The 'Q' and 'R' constraints are also available for 64-bit
24060 constants. */
24061 case 'Q':
24062 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24063 {
24064 rtx part = gen_lowpart (SImode, x);
24065 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24066 return;
24067 }
24068
24069 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24070 {
24071 output_operand_lossage ("invalid operand for code '%c'", code);
24072 return;
24073 }
24074
24075 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
24076 return;
24077
24078 case 'R':
24079 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24080 {
24081 machine_mode mode = GET_MODE (x);
24082 rtx part;
24083
24084 if (mode == VOIDmode)
24085 mode = DImode;
24086 part = gen_highpart_mode (SImode, mode, x);
24087 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24088 return;
24089 }
24090
24091 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24092 {
24093 output_operand_lossage ("invalid operand for code '%c'", code);
24094 return;
24095 }
24096
24097 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
24098 return;
24099
24100 case 'H':
24101 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24102 {
24103 output_operand_lossage ("invalid operand for code '%c'", code);
24104 return;
24105 }
24106
24107 asm_fprintf (stream, "%r", REGNO (x) + 1);
24108 return;
24109
24110 case 'J':
24111 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24112 {
24113 output_operand_lossage ("invalid operand for code '%c'", code);
24114 return;
24115 }
24116
24117 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
24118 return;
24119
24120 case 'K':
24121 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24122 {
24123 output_operand_lossage ("invalid operand for code '%c'", code);
24124 return;
24125 }
24126
24127 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
24128 return;
24129
24130 case 'm':
24131 asm_fprintf (stream, "%r",
24132 REG_P (XEXP (x, 0))
24133 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
24134 return;
24135
24136 case 'M':
24137 asm_fprintf (stream, "{%r-%r}",
24138 REGNO (x),
24139 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
24140 return;
24141
24142 /* Like 'M', but writing doubleword vector registers, for use by Neon
24143 insns. */
24144 case 'h':
24145 {
24146 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
24147 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
24148 if (numregs == 1)
24149 asm_fprintf (stream, "{d%d}", regno);
24150 else
24151 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
24152 }
24153 return;
24154
24155 case 'd':
24156 /* CONST_TRUE_RTX means always -- that's the default. */
24157 if (x == const_true_rtx)
24158 return;
24159
24160 if (!COMPARISON_P (x))
24161 {
24162 output_operand_lossage ("invalid operand for code '%c'", code);
24163 return;
24164 }
24165
24166 fputs (arm_condition_codes[get_arm_condition_code (x)],
24167 stream);
24168 return;
24169
24170 case 'D':
24171 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
24172 want to do that. */
24173 if (x == const_true_rtx)
24174 {
24175 output_operand_lossage ("instruction never executed");
24176 return;
24177 }
24178 if (!COMPARISON_P (x))
24179 {
24180 output_operand_lossage ("invalid operand for code '%c'", code);
24181 return;
24182 }
24183
24184 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
24185 (get_arm_condition_code (x))],
24186 stream);
24187 return;
24188
24189 case 'V':
24190 {
24191 /* Output the LSB (shift) and width for a bitmask instruction
24192 based on a literal mask. The LSB is printed first,
24193 followed by the width.
24194
24195 Eg. For 0b1...1110001, the result is #1, #3. */
24196 if (!CONST_INT_P (x))
24197 {
24198 output_operand_lossage ("invalid operand for code '%c'", code);
24199 return;
24200 }
24201
24202 unsigned HOST_WIDE_INT val = ~XUINT (x, 0);
24203 int lsb = exact_log2 (val & -val);
24204 asm_fprintf (stream, "#%d, #%d", lsb,
24205 (exact_log2 (val + (val & -val)) - lsb));
24206 }
24207 return;
24208
24209 case 's':
24210 case 'W':
24211 case 'X':
24212 case 'Y':
24213 case 'Z':
24214 /* Former Maverick support, removed after GCC-4.7. */
24215 output_operand_lossage ("obsolete Maverick format code '%c'", code);
24216 return;
24217
24218 case 'U':
24219 if (!REG_P (x)
24220 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
24221 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
24222 /* Bad value for wCG register number. */
24223 {
24224 output_operand_lossage ("invalid operand for code '%c'", code);
24225 return;
24226 }
24227
24228 else
24229 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
24230 return;
24231
24232 /* Print an iWMMXt control register name. */
24233 case 'w':
24234 if (!CONST_INT_P (x)
24235 || INTVAL (x) < 0
24236 || INTVAL (x) >= 16)
24237 /* Bad value for wC register number. */
24238 {
24239 output_operand_lossage ("invalid operand for code '%c'", code);
24240 return;
24241 }
24242
24243 else
24244 {
24245 static const char * wc_reg_names [16] =
24246 {
24247 "wCID", "wCon", "wCSSF", "wCASF",
24248 "wC4", "wC5", "wC6", "wC7",
24249 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24250 "wC12", "wC13", "wC14", "wC15"
24251 };
24252
24253 fputs (wc_reg_names [INTVAL (x)], stream);
24254 }
24255 return;
24256
24257 /* Print the high single-precision register of a VFP double-precision
24258 register. */
24259 case 'p':
24260 {
24261 machine_mode mode = GET_MODE (x);
24262 int regno;
24263
24264 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24265 {
24266 output_operand_lossage ("invalid operand for code '%c'", code);
24267 return;
24268 }
24269
24270 regno = REGNO (x);
24271 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24272 {
24273 output_operand_lossage ("invalid operand for code '%c'", code);
24274 return;
24275 }
24276
24277 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24278 }
24279 return;
24280
24281 /* Print a VFP/Neon double precision or quad precision register name. */
24282 case 'P':
24283 case 'q':
24284 {
24285 machine_mode mode = GET_MODE (x);
24286 int is_quad = (code == 'q');
24287 int regno;
24288
24289 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24290 {
24291 output_operand_lossage ("invalid operand for code '%c'", code);
24292 return;
24293 }
24294
24295 if (!REG_P (x)
24296 || !IS_VFP_REGNUM (REGNO (x)))
24297 {
24298 output_operand_lossage ("invalid operand for code '%c'", code);
24299 return;
24300 }
24301
24302 regno = REGNO (x);
24303 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24304 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24305 {
24306 output_operand_lossage ("invalid operand for code '%c'", code);
24307 return;
24308 }
24309
24310 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24311 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24312 }
24313 return;
24314
24315 /* These two codes print the low/high doubleword register of a Neon quad
24316 register, respectively. For pair-structure types, can also print
24317 low/high quadword registers. */
24318 case 'e':
24319 case 'f':
24320 {
24321 machine_mode mode = GET_MODE (x);
24322 int regno;
24323
24324 if ((GET_MODE_SIZE (mode) != 16
24325 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24326 {
24327 output_operand_lossage ("invalid operand for code '%c'", code);
24328 return;
24329 }
24330
24331 regno = REGNO (x);
24332 if (!NEON_REGNO_OK_FOR_QUAD (regno))
24333 {
24334 output_operand_lossage ("invalid operand for code '%c'", code);
24335 return;
24336 }
24337
24338 if (GET_MODE_SIZE (mode) == 16)
24339 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24340 + (code == 'f' ? 1 : 0));
24341 else
24342 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24343 + (code == 'f' ? 1 : 0));
24344 }
24345 return;
24346
24347 /* Print a VFPv3 floating-point constant, represented as an integer
24348 index. */
24349 case 'G':
24350 {
24351 int index = vfp3_const_double_index (x);
24352 gcc_assert (index != -1);
24353 fprintf (stream, "%d", index);
24354 }
24355 return;
24356
24357 /* Print bits representing opcode features for Neon.
24358
24359 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24360 and polynomials as unsigned.
24361
24362 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24363
24364 Bit 2 is 1 for rounding functions, 0 otherwise. */
24365
24366 /* Identify the type as 's', 'u', 'p' or 'f'. */
24367 case 'T':
24368 {
24369 HOST_WIDE_INT bits = INTVAL (x);
24370 fputc ("uspf"[bits & 3], stream);
24371 }
24372 return;
24373
24374 /* Likewise, but signed and unsigned integers are both 'i'. */
24375 case 'F':
24376 {
24377 HOST_WIDE_INT bits = INTVAL (x);
24378 fputc ("iipf"[bits & 3], stream);
24379 }
24380 return;
24381
24382 /* As for 'T', but emit 'u' instead of 'p'. */
24383 case 't':
24384 {
24385 HOST_WIDE_INT bits = INTVAL (x);
24386 fputc ("usuf"[bits & 3], stream);
24387 }
24388 return;
24389
24390 /* Bit 2: rounding (vs none). */
24391 case 'O':
24392 {
24393 HOST_WIDE_INT bits = INTVAL (x);
24394 fputs ((bits & 4) != 0 ? "r" : "", stream);
24395 }
24396 return;
24397
24398 /* Memory operand for vld1/vst1 instruction. */
24399 case 'A':
24400 {
24401 rtx addr;
24402 bool postinc = FALSE;
24403 rtx postinc_reg = NULL;
24404 unsigned align, memsize, align_bits;
24405
24406 gcc_assert (MEM_P (x));
24407 addr = XEXP (x, 0);
24408 if (GET_CODE (addr) == POST_INC)
24409 {
24410 postinc = 1;
24411 addr = XEXP (addr, 0);
24412 }
24413 if (GET_CODE (addr) == POST_MODIFY)
24414 {
24415 postinc_reg = XEXP( XEXP (addr, 1), 1);
24416 addr = XEXP (addr, 0);
24417 }
24418 asm_fprintf (stream, "[%r", REGNO (addr));
24419
24420 /* We know the alignment of this access, so we can emit a hint in the
24421 instruction (for some alignments) as an aid to the memory subsystem
24422 of the target. */
24423 align = MEM_ALIGN (x) >> 3;
24424 memsize = MEM_SIZE (x);
24425
24426 /* Only certain alignment specifiers are supported by the hardware. */
24427 if (memsize == 32 && (align % 32) == 0)
24428 align_bits = 256;
24429 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24430 align_bits = 128;
24431 else if (memsize >= 8 && (align % 8) == 0)
24432 align_bits = 64;
24433 else
24434 align_bits = 0;
24435
24436 if (align_bits != 0)
24437 asm_fprintf (stream, ":%d", align_bits);
24438
24439 asm_fprintf (stream, "]");
24440
24441 if (postinc)
24442 fputs("!", stream);
24443 if (postinc_reg)
24444 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24445 }
24446 return;
24447
24448 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24449 rtx_code the memory operands output looks like following.
24450 1. [Rn], #+/-<imm>
24451 2. [Rn, #+/-<imm>]!
24452 3. [Rn, #+/-<imm>]
24453 4. [Rn]. */
24454 case 'E':
24455 {
24456 rtx addr;
24457 rtx postinc_reg = NULL;
24458 unsigned inc_val = 0;
24459 enum rtx_code code;
24460
24461 gcc_assert (MEM_P (x));
24462 addr = XEXP (x, 0);
24463 code = GET_CODE (addr);
24464 if (code == POST_INC || code == POST_DEC || code == PRE_INC
24465 || code == PRE_DEC)
24466 {
24467 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24468 inc_val = GET_MODE_SIZE (GET_MODE (x));
24469 if (code == POST_INC || code == POST_DEC)
24470 asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24471 ? "": "-", inc_val);
24472 else
24473 asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24474 ? "": "-", inc_val);
24475 }
24476 else if (code == POST_MODIFY || code == PRE_MODIFY)
24477 {
24478 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24479 postinc_reg = XEXP (XEXP (addr, 1), 1);
24480 if (postinc_reg && CONST_INT_P (postinc_reg))
24481 {
24482 if (code == POST_MODIFY)
24483 asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24484 else
24485 asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24486 }
24487 }
24488 else if (code == PLUS)
24489 {
24490 rtx base = XEXP (addr, 0);
24491 rtx index = XEXP (addr, 1);
24492
24493 gcc_assert (REG_P (base) && CONST_INT_P (index));
24494
24495 HOST_WIDE_INT offset = INTVAL (index);
24496 asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24497 }
24498 else
24499 {
24500 gcc_assert (REG_P (addr));
24501 asm_fprintf (stream, "[%r]",REGNO (addr));
24502 }
24503 }
24504 return;
24505
24506 case 'C':
24507 {
24508 rtx addr;
24509
24510 gcc_assert (MEM_P (x));
24511 addr = XEXP (x, 0);
24512 gcc_assert (REG_P (addr));
24513 asm_fprintf (stream, "[%r]", REGNO (addr));
24514 }
24515 return;
24516
24517 /* Translate an S register number into a D register number and element index. */
24518 case 'y':
24519 {
24520 machine_mode mode = GET_MODE (x);
24521 int regno;
24522
24523 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24524 {
24525 output_operand_lossage ("invalid operand for code '%c'", code);
24526 return;
24527 }
24528
24529 regno = REGNO (x);
24530 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24531 {
24532 output_operand_lossage ("invalid operand for code '%c'", code);
24533 return;
24534 }
24535
24536 regno = regno - FIRST_VFP_REGNUM;
24537 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24538 }
24539 return;
24540
24541 case 'v':
24542 gcc_assert (CONST_DOUBLE_P (x));
24543 int result;
24544 result = vfp3_const_double_for_fract_bits (x);
24545 if (result == 0)
24546 result = vfp3_const_double_for_bits (x);
24547 fprintf (stream, "#%d", result);
24548 return;
24549
24550 /* Register specifier for vld1.16/vst1.16. Translate the S register
24551 number into a D register number and element index. */
24552 case 'z':
24553 {
24554 machine_mode mode = GET_MODE (x);
24555 int regno;
24556
24557 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24558 {
24559 output_operand_lossage ("invalid operand for code '%c'", code);
24560 return;
24561 }
24562
24563 regno = REGNO (x);
24564 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24565 {
24566 output_operand_lossage ("invalid operand for code '%c'", code);
24567 return;
24568 }
24569
24570 regno = regno - FIRST_VFP_REGNUM;
24571 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24572 }
24573 return;
24574
24575 default:
24576 if (x == 0)
24577 {
24578 output_operand_lossage ("missing operand");
24579 return;
24580 }
24581
24582 switch (GET_CODE (x))
24583 {
24584 case REG:
24585 asm_fprintf (stream, "%r", REGNO (x));
24586 break;
24587
24588 case MEM:
24589 output_address (GET_MODE (x), XEXP (x, 0));
24590 break;
24591
24592 case CONST_DOUBLE:
24593 {
24594 char fpstr[20];
24595 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24596 sizeof (fpstr), 0, 1);
24597 fprintf (stream, "#%s", fpstr);
24598 }
24599 break;
24600
24601 default:
24602 gcc_assert (GET_CODE (x) != NEG);
24603 fputc ('#', stream);
24604 if (GET_CODE (x) == HIGH)
24605 {
24606 fputs (":lower16:", stream);
24607 x = XEXP (x, 0);
24608 }
24609
24610 output_addr_const (stream, x);
24611 break;
24612 }
24613 }
24614 }
24615 \f
24616 /* Target hook for printing a memory address. */
24617 static void
24618 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24619 {
24620 if (TARGET_32BIT)
24621 {
24622 int is_minus = GET_CODE (x) == MINUS;
24623
24624 if (REG_P (x))
24625 asm_fprintf (stream, "[%r]", REGNO (x));
24626 else if (GET_CODE (x) == PLUS || is_minus)
24627 {
24628 rtx base = XEXP (x, 0);
24629 rtx index = XEXP (x, 1);
24630 HOST_WIDE_INT offset = 0;
24631 if (!REG_P (base)
24632 || (REG_P (index) && REGNO (index) == SP_REGNUM))
24633 {
24634 /* Ensure that BASE is a register. */
24635 /* (one of them must be). */
24636 /* Also ensure the SP is not used as in index register. */
24637 std::swap (base, index);
24638 }
24639 switch (GET_CODE (index))
24640 {
24641 case CONST_INT:
24642 offset = INTVAL (index);
24643 if (is_minus)
24644 offset = -offset;
24645 asm_fprintf (stream, "[%r, #%wd]",
24646 REGNO (base), offset);
24647 break;
24648
24649 case REG:
24650 asm_fprintf (stream, "[%r, %s%r]",
24651 REGNO (base), is_minus ? "-" : "",
24652 REGNO (index));
24653 break;
24654
24655 case MULT:
24656 case ASHIFTRT:
24657 case LSHIFTRT:
24658 case ASHIFT:
24659 case ROTATERT:
24660 {
24661 asm_fprintf (stream, "[%r, %s%r",
24662 REGNO (base), is_minus ? "-" : "",
24663 REGNO (XEXP (index, 0)));
24664 arm_print_operand (stream, index, 'S');
24665 fputs ("]", stream);
24666 break;
24667 }
24668
24669 default:
24670 gcc_unreachable ();
24671 }
24672 }
24673 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24674 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24675 {
24676 gcc_assert (REG_P (XEXP (x, 0)));
24677
24678 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24679 asm_fprintf (stream, "[%r, #%s%d]!",
24680 REGNO (XEXP (x, 0)),
24681 GET_CODE (x) == PRE_DEC ? "-" : "",
24682 GET_MODE_SIZE (mode));
24683 else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24684 asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24685 else
24686 asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24687 GET_CODE (x) == POST_DEC ? "-" : "",
24688 GET_MODE_SIZE (mode));
24689 }
24690 else if (GET_CODE (x) == PRE_MODIFY)
24691 {
24692 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24693 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24694 asm_fprintf (stream, "#%wd]!",
24695 INTVAL (XEXP (XEXP (x, 1), 1)));
24696 else
24697 asm_fprintf (stream, "%r]!",
24698 REGNO (XEXP (XEXP (x, 1), 1)));
24699 }
24700 else if (GET_CODE (x) == POST_MODIFY)
24701 {
24702 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24703 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24704 asm_fprintf (stream, "#%wd",
24705 INTVAL (XEXP (XEXP (x, 1), 1)));
24706 else
24707 asm_fprintf (stream, "%r",
24708 REGNO (XEXP (XEXP (x, 1), 1)));
24709 }
24710 else output_addr_const (stream, x);
24711 }
24712 else
24713 {
24714 if (REG_P (x))
24715 asm_fprintf (stream, "[%r]", REGNO (x));
24716 else if (GET_CODE (x) == POST_INC)
24717 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24718 else if (GET_CODE (x) == PLUS)
24719 {
24720 gcc_assert (REG_P (XEXP (x, 0)));
24721 if (CONST_INT_P (XEXP (x, 1)))
24722 asm_fprintf (stream, "[%r, #%wd]",
24723 REGNO (XEXP (x, 0)),
24724 INTVAL (XEXP (x, 1)));
24725 else
24726 asm_fprintf (stream, "[%r, %r]",
24727 REGNO (XEXP (x, 0)),
24728 REGNO (XEXP (x, 1)));
24729 }
24730 else
24731 output_addr_const (stream, x);
24732 }
24733 }
24734 \f
24735 /* Target hook for indicating whether a punctuation character for
24736 TARGET_PRINT_OPERAND is valid. */
24737 static bool
24738 arm_print_operand_punct_valid_p (unsigned char code)
24739 {
24740 return (code == '@' || code == '|' || code == '.'
24741 || code == '(' || code == ')' || code == '#'
24742 || (TARGET_32BIT && (code == '?'))
24743 || (TARGET_THUMB2 && (code == '!'))
24744 || (TARGET_THUMB && (code == '_')));
24745 }
24746 \f
24747 /* Target hook for assembling integer objects. The ARM version needs to
24748 handle word-sized values specially. */
24749 static bool
24750 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24751 {
24752 machine_mode mode;
24753
24754 if (size == UNITS_PER_WORD && aligned_p)
24755 {
24756 fputs ("\t.word\t", asm_out_file);
24757 output_addr_const (asm_out_file, x);
24758
24759 /* Mark symbols as position independent. We only do this in the
24760 .text segment, not in the .data segment. */
24761 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24762 (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
24763 {
24764 /* See legitimize_pic_address for an explanation of the
24765 TARGET_VXWORKS_RTP check. */
24766 /* References to weak symbols cannot be resolved locally:
24767 they may be overridden by a non-weak definition at link
24768 time. */
24769 if (!arm_pic_data_is_text_relative
24770 || (SYMBOL_REF_P (x)
24771 && (!SYMBOL_REF_LOCAL_P (x)
24772 || (SYMBOL_REF_DECL (x)
24773 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24774 || (SYMBOL_REF_FUNCTION_P (x)
24775 && !arm_fdpic_local_funcdesc_p (x)))))
24776 {
24777 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24778 fputs ("(GOTFUNCDESC)", asm_out_file);
24779 else
24780 fputs ("(GOT)", asm_out_file);
24781 }
24782 else
24783 {
24784 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24785 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24786 else
24787 {
24788 bool is_readonly;
24789
24790 if (!TARGET_FDPIC
24791 || arm_is_segment_info_known (x, &is_readonly))
24792 fputs ("(GOTOFF)", asm_out_file);
24793 else
24794 fputs ("(GOT)", asm_out_file);
24795 }
24796 }
24797 }
24798
24799 /* For FDPIC we also have to mark symbol for .data section. */
24800 if (TARGET_FDPIC
24801 && !making_const_table
24802 && SYMBOL_REF_P (x)
24803 && SYMBOL_REF_FUNCTION_P (x))
24804 fputs ("(FUNCDESC)", asm_out_file);
24805
24806 fputc ('\n', asm_out_file);
24807 return true;
24808 }
24809
24810 mode = GET_MODE (x);
24811
24812 if (arm_vector_mode_supported_p (mode))
24813 {
24814 int i, units;
24815
24816 gcc_assert (GET_CODE (x) == CONST_VECTOR);
24817
24818 units = CONST_VECTOR_NUNITS (x);
24819 size = GET_MODE_UNIT_SIZE (mode);
24820
24821 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24822 for (i = 0; i < units; i++)
24823 {
24824 rtx elt = CONST_VECTOR_ELT (x, i);
24825 assemble_integer
24826 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
24827 }
24828 else
24829 for (i = 0; i < units; i++)
24830 {
24831 rtx elt = CONST_VECTOR_ELT (x, i);
24832 assemble_real
24833 (*CONST_DOUBLE_REAL_VALUE (elt),
24834 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
24835 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
24836 }
24837
24838 return true;
24839 }
24840
24841 return default_assemble_integer (x, size, aligned_p);
24842 }
24843
24844 static void
24845 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
24846 {
24847 section *s;
24848
24849 if (!TARGET_AAPCS_BASED)
24850 {
24851 (is_ctor ?
24852 default_named_section_asm_out_constructor
24853 : default_named_section_asm_out_destructor) (symbol, priority);
24854 return;
24855 }
24856
24857 /* Put these in the .init_array section, using a special relocation. */
24858 if (priority != DEFAULT_INIT_PRIORITY)
24859 {
24860 char buf[18];
24861 sprintf (buf, "%s.%.5u",
24862 is_ctor ? ".init_array" : ".fini_array",
24863 priority);
24864 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
24865 }
24866 else if (is_ctor)
24867 s = ctors_section;
24868 else
24869 s = dtors_section;
24870
24871 switch_to_section (s);
24872 assemble_align (POINTER_SIZE);
24873 fputs ("\t.word\t", asm_out_file);
24874 output_addr_const (asm_out_file, symbol);
24875 fputs ("(target1)\n", asm_out_file);
24876 }
24877
24878 /* Add a function to the list of static constructors. */
24879
24880 static void
24881 arm_elf_asm_constructor (rtx symbol, int priority)
24882 {
24883 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
24884 }
24885
24886 /* Add a function to the list of static destructors. */
24887
24888 static void
24889 arm_elf_asm_destructor (rtx symbol, int priority)
24890 {
24891 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
24892 }
24893 \f
24894 /* A finite state machine takes care of noticing whether or not instructions
24895 can be conditionally executed, and thus decrease execution time and code
24896 size by deleting branch instructions. The fsm is controlled by
24897 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
24898
24899 /* The state of the fsm controlling condition codes are:
24900 0: normal, do nothing special
24901 1: make ASM_OUTPUT_OPCODE not output this instruction
24902 2: make ASM_OUTPUT_OPCODE not output this instruction
24903 3: make instructions conditional
24904 4: make instructions conditional
24905
24906 State transitions (state->state by whom under condition):
24907 0 -> 1 final_prescan_insn if the `target' is a label
24908 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
24909 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
24910 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
24911 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
24912 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
24913 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
24914 (the target insn is arm_target_insn).
24915
24916 If the jump clobbers the conditions then we use states 2 and 4.
24917
24918 A similar thing can be done with conditional return insns.
24919
24920 XXX In case the `target' is an unconditional branch, this conditionalising
24921 of the instructions always reduces code size, but not always execution
24922 time. But then, I want to reduce the code size to somewhere near what
24923 /bin/cc produces. */
24924
24925 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
24926 instructions. When a COND_EXEC instruction is seen the subsequent
24927 instructions are scanned so that multiple conditional instructions can be
24928 combined into a single IT block. arm_condexec_count and arm_condexec_mask
24929 specify the length and true/false mask for the IT block. These will be
24930 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
24931
24932 /* Returns the index of the ARM condition code string in
24933 `arm_condition_codes', or ARM_NV if the comparison is invalid.
24934 COMPARISON should be an rtx like `(eq (...) (...))'. */
24935
24936 enum arm_cond_code
24937 maybe_get_arm_condition_code (rtx comparison)
24938 {
24939 machine_mode mode = GET_MODE (XEXP (comparison, 0));
24940 enum arm_cond_code code;
24941 enum rtx_code comp_code = GET_CODE (comparison);
24942
24943 if (GET_MODE_CLASS (mode) != MODE_CC)
24944 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
24945 XEXP (comparison, 1));
24946
24947 switch (mode)
24948 {
24949 case E_CC_DNEmode: code = ARM_NE; goto dominance;
24950 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
24951 case E_CC_DGEmode: code = ARM_GE; goto dominance;
24952 case E_CC_DGTmode: code = ARM_GT; goto dominance;
24953 case E_CC_DLEmode: code = ARM_LE; goto dominance;
24954 case E_CC_DLTmode: code = ARM_LT; goto dominance;
24955 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
24956 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
24957 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
24958 case E_CC_DLTUmode: code = ARM_CC;
24959
24960 dominance:
24961 if (comp_code == EQ)
24962 return ARM_INVERSE_CONDITION_CODE (code);
24963 if (comp_code == NE)
24964 return code;
24965 return ARM_NV;
24966
24967 case E_CC_NZmode:
24968 switch (comp_code)
24969 {
24970 case NE: return ARM_NE;
24971 case EQ: return ARM_EQ;
24972 case GE: return ARM_PL;
24973 case LT: return ARM_MI;
24974 default: return ARM_NV;
24975 }
24976
24977 case E_CC_Zmode:
24978 switch (comp_code)
24979 {
24980 case NE: return ARM_NE;
24981 case EQ: return ARM_EQ;
24982 default: return ARM_NV;
24983 }
24984
24985 case E_CC_Nmode:
24986 switch (comp_code)
24987 {
24988 case NE: return ARM_MI;
24989 case EQ: return ARM_PL;
24990 default: return ARM_NV;
24991 }
24992
24993 case E_CCFPEmode:
24994 case E_CCFPmode:
24995 /* We can handle all cases except UNEQ and LTGT. */
24996 switch (comp_code)
24997 {
24998 case GE: return ARM_GE;
24999 case GT: return ARM_GT;
25000 case LE: return ARM_LS;
25001 case LT: return ARM_MI;
25002 case NE: return ARM_NE;
25003 case EQ: return ARM_EQ;
25004 case ORDERED: return ARM_VC;
25005 case UNORDERED: return ARM_VS;
25006 case UNLT: return ARM_LT;
25007 case UNLE: return ARM_LE;
25008 case UNGT: return ARM_HI;
25009 case UNGE: return ARM_PL;
25010 /* UNEQ and LTGT do not have a representation. */
25011 case UNEQ: /* Fall through. */
25012 case LTGT: /* Fall through. */
25013 default: return ARM_NV;
25014 }
25015
25016 case E_CC_SWPmode:
25017 switch (comp_code)
25018 {
25019 case NE: return ARM_NE;
25020 case EQ: return ARM_EQ;
25021 case GE: return ARM_LE;
25022 case GT: return ARM_LT;
25023 case LE: return ARM_GE;
25024 case LT: return ARM_GT;
25025 case GEU: return ARM_LS;
25026 case GTU: return ARM_CC;
25027 case LEU: return ARM_CS;
25028 case LTU: return ARM_HI;
25029 default: return ARM_NV;
25030 }
25031
25032 case E_CC_Cmode:
25033 switch (comp_code)
25034 {
25035 case LTU: return ARM_CS;
25036 case GEU: return ARM_CC;
25037 default: return ARM_NV;
25038 }
25039
25040 case E_CC_NVmode:
25041 switch (comp_code)
25042 {
25043 case GE: return ARM_GE;
25044 case LT: return ARM_LT;
25045 default: return ARM_NV;
25046 }
25047
25048 case E_CC_Bmode:
25049 switch (comp_code)
25050 {
25051 case GEU: return ARM_CS;
25052 case LTU: return ARM_CC;
25053 default: return ARM_NV;
25054 }
25055
25056 case E_CC_Vmode:
25057 switch (comp_code)
25058 {
25059 case NE: return ARM_VS;
25060 case EQ: return ARM_VC;
25061 default: return ARM_NV;
25062 }
25063
25064 case E_CC_ADCmode:
25065 switch (comp_code)
25066 {
25067 case GEU: return ARM_CS;
25068 case LTU: return ARM_CC;
25069 default: return ARM_NV;
25070 }
25071
25072 case E_CCmode:
25073 case E_CC_RSBmode:
25074 switch (comp_code)
25075 {
25076 case NE: return ARM_NE;
25077 case EQ: return ARM_EQ;
25078 case GE: return ARM_GE;
25079 case GT: return ARM_GT;
25080 case LE: return ARM_LE;
25081 case LT: return ARM_LT;
25082 case GEU: return ARM_CS;
25083 case GTU: return ARM_HI;
25084 case LEU: return ARM_LS;
25085 case LTU: return ARM_CC;
25086 default: return ARM_NV;
25087 }
25088
25089 default: gcc_unreachable ();
25090 }
25091 }
25092
25093 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
25094 static enum arm_cond_code
25095 get_arm_condition_code (rtx comparison)
25096 {
25097 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
25098 gcc_assert (code != ARM_NV);
25099 return code;
25100 }
25101
25102 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
25103 code registers when not targetting Thumb1. The VFP condition register
25104 only exists when generating hard-float code. */
25105 static bool
25106 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
25107 {
25108 if (!TARGET_32BIT)
25109 return false;
25110
25111 *p1 = CC_REGNUM;
25112 *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
25113 return true;
25114 }
25115
25116 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25117 instructions. */
25118 void
25119 thumb2_final_prescan_insn (rtx_insn *insn)
25120 {
25121 rtx_insn *first_insn = insn;
25122 rtx body = PATTERN (insn);
25123 rtx predicate;
25124 enum arm_cond_code code;
25125 int n;
25126 int mask;
25127 int max;
25128
25129 /* max_insns_skipped in the tune was already taken into account in the
25130 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
25131 just emit the IT blocks as we can. It does not make sense to split
25132 the IT blocks. */
25133 max = MAX_INSN_PER_IT_BLOCK;
25134
25135 /* Remove the previous insn from the count of insns to be output. */
25136 if (arm_condexec_count)
25137 arm_condexec_count--;
25138
25139 /* Nothing to do if we are already inside a conditional block. */
25140 if (arm_condexec_count)
25141 return;
25142
25143 if (GET_CODE (body) != COND_EXEC)
25144 return;
25145
25146 /* Conditional jumps are implemented directly. */
25147 if (JUMP_P (insn))
25148 return;
25149
25150 predicate = COND_EXEC_TEST (body);
25151 arm_current_cc = get_arm_condition_code (predicate);
25152
25153 n = get_attr_ce_count (insn);
25154 arm_condexec_count = 1;
25155 arm_condexec_mask = (1 << n) - 1;
25156 arm_condexec_masklen = n;
25157 /* See if subsequent instructions can be combined into the same block. */
25158 for (;;)
25159 {
25160 insn = next_nonnote_insn (insn);
25161
25162 /* Jumping into the middle of an IT block is illegal, so a label or
25163 barrier terminates the block. */
25164 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
25165 break;
25166
25167 body = PATTERN (insn);
25168 /* USE and CLOBBER aren't really insns, so just skip them. */
25169 if (GET_CODE (body) == USE
25170 || GET_CODE (body) == CLOBBER)
25171 continue;
25172
25173 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
25174 if (GET_CODE (body) != COND_EXEC)
25175 break;
25176 /* Maximum number of conditionally executed instructions in a block. */
25177 n = get_attr_ce_count (insn);
25178 if (arm_condexec_masklen + n > max)
25179 break;
25180
25181 predicate = COND_EXEC_TEST (body);
25182 code = get_arm_condition_code (predicate);
25183 mask = (1 << n) - 1;
25184 if (arm_current_cc == code)
25185 arm_condexec_mask |= (mask << arm_condexec_masklen);
25186 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
25187 break;
25188
25189 arm_condexec_count++;
25190 arm_condexec_masklen += n;
25191
25192 /* A jump must be the last instruction in a conditional block. */
25193 if (JUMP_P (insn))
25194 break;
25195 }
25196 /* Restore recog_data (getting the attributes of other insns can
25197 destroy this array, but final.cc assumes that it remains intact
25198 across this call). */
25199 extract_constrain_insn_cached (first_insn);
25200 }
25201
25202 void
25203 arm_final_prescan_insn (rtx_insn *insn)
25204 {
25205 /* BODY will hold the body of INSN. */
25206 rtx body = PATTERN (insn);
25207
25208 /* This will be 1 if trying to repeat the trick, and things need to be
25209 reversed if it appears to fail. */
25210 int reverse = 0;
25211
25212 /* If we start with a return insn, we only succeed if we find another one. */
25213 int seeking_return = 0;
25214 enum rtx_code return_code = UNKNOWN;
25215
25216 /* START_INSN will hold the insn from where we start looking. This is the
25217 first insn after the following code_label if REVERSE is true. */
25218 rtx_insn *start_insn = insn;
25219
25220 /* If in state 4, check if the target branch is reached, in order to
25221 change back to state 0. */
25222 if (arm_ccfsm_state == 4)
25223 {
25224 if (insn == arm_target_insn)
25225 {
25226 arm_target_insn = NULL;
25227 arm_ccfsm_state = 0;
25228 }
25229 return;
25230 }
25231
25232 /* If in state 3, it is possible to repeat the trick, if this insn is an
25233 unconditional branch to a label, and immediately following this branch
25234 is the previous target label which is only used once, and the label this
25235 branch jumps to is not too far off. */
25236 if (arm_ccfsm_state == 3)
25237 {
25238 if (simplejump_p (insn))
25239 {
25240 start_insn = next_nonnote_insn (start_insn);
25241 if (BARRIER_P (start_insn))
25242 {
25243 /* XXX Isn't this always a barrier? */
25244 start_insn = next_nonnote_insn (start_insn);
25245 }
25246 if (LABEL_P (start_insn)
25247 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25248 && LABEL_NUSES (start_insn) == 1)
25249 reverse = TRUE;
25250 else
25251 return;
25252 }
25253 else if (ANY_RETURN_P (body))
25254 {
25255 start_insn = next_nonnote_insn (start_insn);
25256 if (BARRIER_P (start_insn))
25257 start_insn = next_nonnote_insn (start_insn);
25258 if (LABEL_P (start_insn)
25259 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25260 && LABEL_NUSES (start_insn) == 1)
25261 {
25262 reverse = TRUE;
25263 seeking_return = 1;
25264 return_code = GET_CODE (body);
25265 }
25266 else
25267 return;
25268 }
25269 else
25270 return;
25271 }
25272
25273 gcc_assert (!arm_ccfsm_state || reverse);
25274 if (!JUMP_P (insn))
25275 return;
25276
25277 /* This jump might be paralleled with a clobber of the condition codes
25278 the jump should always come first */
25279 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25280 body = XVECEXP (body, 0, 0);
25281
25282 if (reverse
25283 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25284 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25285 {
25286 int insns_skipped;
25287 int fail = FALSE, succeed = FALSE;
25288 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25289 int then_not_else = TRUE;
25290 rtx_insn *this_insn = start_insn;
25291 rtx label = 0;
25292
25293 /* Register the insn jumped to. */
25294 if (reverse)
25295 {
25296 if (!seeking_return)
25297 label = XEXP (SET_SRC (body), 0);
25298 }
25299 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25300 label = XEXP (XEXP (SET_SRC (body), 1), 0);
25301 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25302 {
25303 label = XEXP (XEXP (SET_SRC (body), 2), 0);
25304 then_not_else = FALSE;
25305 }
25306 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25307 {
25308 seeking_return = 1;
25309 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25310 }
25311 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25312 {
25313 seeking_return = 1;
25314 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25315 then_not_else = FALSE;
25316 }
25317 else
25318 gcc_unreachable ();
25319
25320 /* See how many insns this branch skips, and what kind of insns. If all
25321 insns are okay, and the label or unconditional branch to the same
25322 label is not too far away, succeed. */
25323 for (insns_skipped = 0;
25324 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25325 {
25326 rtx scanbody;
25327
25328 this_insn = next_nonnote_insn (this_insn);
25329 if (!this_insn)
25330 break;
25331
25332 switch (GET_CODE (this_insn))
25333 {
25334 case CODE_LABEL:
25335 /* Succeed if it is the target label, otherwise fail since
25336 control falls in from somewhere else. */
25337 if (this_insn == label)
25338 {
25339 arm_ccfsm_state = 1;
25340 succeed = TRUE;
25341 }
25342 else
25343 fail = TRUE;
25344 break;
25345
25346 case BARRIER:
25347 /* Succeed if the following insn is the target label.
25348 Otherwise fail.
25349 If return insns are used then the last insn in a function
25350 will be a barrier. */
25351 this_insn = next_nonnote_insn (this_insn);
25352 if (this_insn && this_insn == label)
25353 {
25354 arm_ccfsm_state = 1;
25355 succeed = TRUE;
25356 }
25357 else
25358 fail = TRUE;
25359 break;
25360
25361 case CALL_INSN:
25362 /* The AAPCS says that conditional calls should not be
25363 used since they make interworking inefficient (the
25364 linker can't transform BL<cond> into BLX). That's
25365 only a problem if the machine has BLX. */
25366 if (arm_arch5t)
25367 {
25368 fail = TRUE;
25369 break;
25370 }
25371
25372 /* Succeed if the following insn is the target label, or
25373 if the following two insns are a barrier and the
25374 target label. */
25375 this_insn = next_nonnote_insn (this_insn);
25376 if (this_insn && BARRIER_P (this_insn))
25377 this_insn = next_nonnote_insn (this_insn);
25378
25379 if (this_insn && this_insn == label
25380 && insns_skipped < max_insns_skipped)
25381 {
25382 arm_ccfsm_state = 1;
25383 succeed = TRUE;
25384 }
25385 else
25386 fail = TRUE;
25387 break;
25388
25389 case JUMP_INSN:
25390 /* If this is an unconditional branch to the same label, succeed.
25391 If it is to another label, do nothing. If it is conditional,
25392 fail. */
25393 /* XXX Probably, the tests for SET and the PC are
25394 unnecessary. */
25395
25396 scanbody = PATTERN (this_insn);
25397 if (GET_CODE (scanbody) == SET
25398 && GET_CODE (SET_DEST (scanbody)) == PC)
25399 {
25400 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25401 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25402 {
25403 arm_ccfsm_state = 2;
25404 succeed = TRUE;
25405 }
25406 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25407 fail = TRUE;
25408 }
25409 /* Fail if a conditional return is undesirable (e.g. on a
25410 StrongARM), but still allow this if optimizing for size. */
25411 else if (GET_CODE (scanbody) == return_code
25412 && !use_return_insn (TRUE, NULL)
25413 && !optimize_size)
25414 fail = TRUE;
25415 else if (GET_CODE (scanbody) == return_code)
25416 {
25417 arm_ccfsm_state = 2;
25418 succeed = TRUE;
25419 }
25420 else if (GET_CODE (scanbody) == PARALLEL)
25421 {
25422 switch (get_attr_conds (this_insn))
25423 {
25424 case CONDS_NOCOND:
25425 break;
25426 default:
25427 fail = TRUE;
25428 break;
25429 }
25430 }
25431 else
25432 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
25433
25434 break;
25435
25436 case INSN:
25437 /* Instructions using or affecting the condition codes make it
25438 fail. */
25439 scanbody = PATTERN (this_insn);
25440 if (!(GET_CODE (scanbody) == SET
25441 || GET_CODE (scanbody) == PARALLEL)
25442 || get_attr_conds (this_insn) != CONDS_NOCOND)
25443 fail = TRUE;
25444 break;
25445
25446 default:
25447 break;
25448 }
25449 }
25450 if (succeed)
25451 {
25452 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25453 arm_target_label = CODE_LABEL_NUMBER (label);
25454 else
25455 {
25456 gcc_assert (seeking_return || arm_ccfsm_state == 2);
25457
25458 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25459 {
25460 this_insn = next_nonnote_insn (this_insn);
25461 gcc_assert (!this_insn
25462 || (!BARRIER_P (this_insn)
25463 && !LABEL_P (this_insn)));
25464 }
25465 if (!this_insn)
25466 {
25467 /* Oh, dear! we ran off the end.. give up. */
25468 extract_constrain_insn_cached (insn);
25469 arm_ccfsm_state = 0;
25470 arm_target_insn = NULL;
25471 return;
25472 }
25473 arm_target_insn = this_insn;
25474 }
25475
25476 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25477 what it was. */
25478 if (!reverse)
25479 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25480
25481 if (reverse || then_not_else)
25482 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25483 }
25484
25485 /* Restore recog_data (getting the attributes of other insns can
25486 destroy this array, but final.cc assumes that it remains intact
25487 across this call. */
25488 extract_constrain_insn_cached (insn);
25489 }
25490 }
25491
25492 /* Output IT instructions. */
25493 void
25494 thumb2_asm_output_opcode (FILE * stream)
25495 {
25496 char buff[5];
25497 int n;
25498
25499 if (arm_condexec_mask)
25500 {
25501 for (n = 0; n < arm_condexec_masklen; n++)
25502 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25503 buff[n] = 0;
25504 asm_fprintf(stream, "i%s\t%s\n\t", buff,
25505 arm_condition_codes[arm_current_cc]);
25506 arm_condexec_mask = 0;
25507 }
25508 }
25509
25510 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25511 UNITS_PER_WORD bytes wide. */
25512 static unsigned int
25513 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25514 {
25515 if (IS_VPR_REGNUM (regno))
25516 return CEIL (GET_MODE_SIZE (mode), 2);
25517
25518 if (TARGET_32BIT
25519 && regno > PC_REGNUM
25520 && regno != FRAME_POINTER_REGNUM
25521 && regno != ARG_POINTER_REGNUM
25522 && !IS_VFP_REGNUM (regno))
25523 return 1;
25524
25525 return ARM_NUM_REGS (mode);
25526 }
25527
25528 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25529 static bool
25530 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25531 {
25532 if (GET_MODE_CLASS (mode) == MODE_CC)
25533 return (regno == CC_REGNUM
25534 || (TARGET_VFP_BASE
25535 && regno == VFPCC_REGNUM));
25536
25537 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25538 return false;
25539
25540 if (IS_VPR_REGNUM (regno))
25541 return mode == HImode
25542 || mode == V16BImode
25543 || mode == V8BImode
25544 || mode == V4BImode;
25545
25546 if (TARGET_THUMB1)
25547 /* For the Thumb we only allow values bigger than SImode in
25548 registers 0 - 6, so that there is always a second low
25549 register available to hold the upper part of the value.
25550 We probably we ought to ensure that the register is the
25551 start of an even numbered register pair. */
25552 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25553
25554 if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25555 {
25556 if (mode == DFmode || mode == DImode)
25557 return VFP_REGNO_OK_FOR_DOUBLE (regno);
25558
25559 if (mode == HFmode || mode == BFmode || mode == HImode
25560 || mode == SFmode || mode == SImode)
25561 return VFP_REGNO_OK_FOR_SINGLE (regno);
25562
25563 if (TARGET_NEON)
25564 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25565 || (VALID_NEON_QREG_MODE (mode)
25566 && NEON_REGNO_OK_FOR_QUAD (regno))
25567 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25568 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25569 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25570 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25571 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25572 if (TARGET_HAVE_MVE)
25573 return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25574 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25575 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25576
25577 return false;
25578 }
25579
25580 if (TARGET_REALLY_IWMMXT)
25581 {
25582 if (IS_IWMMXT_GR_REGNUM (regno))
25583 return mode == SImode;
25584
25585 if (IS_IWMMXT_REGNUM (regno))
25586 return VALID_IWMMXT_REG_MODE (mode);
25587 }
25588
25589 /* We allow almost any value to be stored in the general registers.
25590 Restrict doubleword quantities to even register pairs in ARM state
25591 so that we can use ldrd. The same restriction applies for MVE
25592 in order to support Armv8.1-M Mainline instructions.
25593 Do not allow very large Neon structure opaque modes in general
25594 registers; they would use too many. */
25595 if (regno <= LAST_ARM_REGNUM)
25596 {
25597 if (ARM_NUM_REGS (mode) > 4)
25598 return false;
25599
25600 if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25601 return true;
25602
25603 return !((TARGET_LDRD || TARGET_CDE)
25604 && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25605 }
25606
25607 if (regno == FRAME_POINTER_REGNUM
25608 || regno == ARG_POINTER_REGNUM)
25609 /* We only allow integers in the fake hard registers. */
25610 return GET_MODE_CLASS (mode) == MODE_INT;
25611
25612 return false;
25613 }
25614
25615 /* Implement TARGET_MODES_TIEABLE_P. */
25616
25617 static bool
25618 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25619 {
25620 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25621 return true;
25622
25623 /* We specifically want to allow elements of "structure" modes to
25624 be tieable to the structure. This more general condition allows
25625 other rarer situations too. */
25626 if ((TARGET_NEON
25627 && (VALID_NEON_DREG_MODE (mode1)
25628 || VALID_NEON_QREG_MODE (mode1)
25629 || VALID_NEON_STRUCT_MODE (mode1))
25630 && (VALID_NEON_DREG_MODE (mode2)
25631 || VALID_NEON_QREG_MODE (mode2)
25632 || VALID_NEON_STRUCT_MODE (mode2)))
25633 || (TARGET_HAVE_MVE
25634 && (VALID_MVE_MODE (mode1)
25635 || VALID_MVE_STRUCT_MODE (mode1))
25636 && (VALID_MVE_MODE (mode2)
25637 || VALID_MVE_STRUCT_MODE (mode2))))
25638 return true;
25639
25640 return false;
25641 }
25642
25643 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25644 not used in arm mode. */
25645
25646 enum reg_class
25647 arm_regno_class (int regno)
25648 {
25649 if (regno == PC_REGNUM)
25650 return NO_REGS;
25651
25652 if (IS_VPR_REGNUM (regno))
25653 return VPR_REG;
25654
25655 if (TARGET_THUMB1)
25656 {
25657 if (regno == STACK_POINTER_REGNUM)
25658 return STACK_REG;
25659 if (regno == CC_REGNUM)
25660 return CC_REG;
25661 if (regno < 8)
25662 return LO_REGS;
25663 return HI_REGS;
25664 }
25665
25666 if (TARGET_THUMB2 && regno < 8)
25667 return LO_REGS;
25668
25669 if ( regno <= LAST_ARM_REGNUM
25670 || regno == FRAME_POINTER_REGNUM
25671 || regno == ARG_POINTER_REGNUM)
25672 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25673
25674 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25675 return TARGET_THUMB2 ? CC_REG : NO_REGS;
25676
25677 if (IS_VFP_REGNUM (regno))
25678 {
25679 if (regno <= D7_VFP_REGNUM)
25680 return VFP_D0_D7_REGS;
25681 else if (regno <= LAST_LO_VFP_REGNUM)
25682 return VFP_LO_REGS;
25683 else
25684 return VFP_HI_REGS;
25685 }
25686
25687 if (IS_IWMMXT_REGNUM (regno))
25688 return IWMMXT_REGS;
25689
25690 if (IS_IWMMXT_GR_REGNUM (regno))
25691 return IWMMXT_GR_REGS;
25692
25693 return NO_REGS;
25694 }
25695
25696 /* Handle a special case when computing the offset
25697 of an argument from the frame pointer. */
25698 int
25699 arm_debugger_arg_offset (int value, rtx addr)
25700 {
25701 rtx_insn *insn;
25702
25703 /* We are only interested if dbxout_parms() failed to compute the offset. */
25704 if (value != 0)
25705 return 0;
25706
25707 /* We can only cope with the case where the address is held in a register. */
25708 if (!REG_P (addr))
25709 return 0;
25710
25711 /* If we are using the frame pointer to point at the argument, then
25712 an offset of 0 is correct. */
25713 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25714 return 0;
25715
25716 /* If we are using the stack pointer to point at the
25717 argument, then an offset of 0 is correct. */
25718 /* ??? Check this is consistent with thumb2 frame layout. */
25719 if ((TARGET_THUMB || !frame_pointer_needed)
25720 && REGNO (addr) == SP_REGNUM)
25721 return 0;
25722
25723 /* Oh dear. The argument is pointed to by a register rather
25724 than being held in a register, or being stored at a known
25725 offset from the frame pointer. Since GDB only understands
25726 those two kinds of argument we must translate the address
25727 held in the register into an offset from the frame pointer.
25728 We do this by searching through the insns for the function
25729 looking to see where this register gets its value. If the
25730 register is initialized from the frame pointer plus an offset
25731 then we are in luck and we can continue, otherwise we give up.
25732
25733 This code is exercised by producing debugging information
25734 for a function with arguments like this:
25735
25736 double func (double a, double b, int c, double d) {return d;}
25737
25738 Without this code the stab for parameter 'd' will be set to
25739 an offset of 0 from the frame pointer, rather than 8. */
25740
25741 /* The if() statement says:
25742
25743 If the insn is a normal instruction
25744 and if the insn is setting the value in a register
25745 and if the register being set is the register holding the address of the argument
25746 and if the address is computing by an addition
25747 that involves adding to a register
25748 which is the frame pointer
25749 a constant integer
25750
25751 then... */
25752
25753 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25754 {
25755 if ( NONJUMP_INSN_P (insn)
25756 && GET_CODE (PATTERN (insn)) == SET
25757 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25758 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25759 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25760 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25761 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25762 )
25763 {
25764 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25765
25766 break;
25767 }
25768 }
25769
25770 if (value == 0)
25771 {
25772 debug_rtx (addr);
25773 warning (0, "unable to compute real location of stacked parameter");
25774 value = 8; /* XXX magic hack */
25775 }
25776
25777 return value;
25778 }
25779 \f
25780 /* Implement TARGET_PROMOTED_TYPE. */
25781
25782 static tree
25783 arm_promoted_type (const_tree t)
25784 {
25785 if (SCALAR_FLOAT_TYPE_P (t)
25786 && TYPE_PRECISION (t) == 16
25787 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25788 return float_type_node;
25789 return NULL_TREE;
25790 }
25791
25792 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25793 This simply adds HFmode as a supported mode; even though we don't
25794 implement arithmetic on this type directly, it's supported by
25795 optabs conversions, much the way the double-word arithmetic is
25796 special-cased in the default hook. */
25797
25798 static bool
25799 arm_scalar_mode_supported_p (scalar_mode mode)
25800 {
25801 if (mode == HFmode)
25802 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25803 else if (ALL_FIXED_POINT_MODE_P (mode))
25804 return true;
25805 else
25806 return default_scalar_mode_supported_p (mode);
25807 }
25808
25809 /* Set the value of FLT_EVAL_METHOD.
25810 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25811
25812 0: evaluate all operations and constants, whose semantic type has at
25813 most the range and precision of type float, to the range and
25814 precision of float; evaluate all other operations and constants to
25815 the range and precision of the semantic type;
25816
25817 N, where _FloatN is a supported interchange floating type
25818 evaluate all operations and constants, whose semantic type has at
25819 most the range and precision of _FloatN type, to the range and
25820 precision of the _FloatN type; evaluate all other operations and
25821 constants to the range and precision of the semantic type;
25822
25823 If we have the ARMv8.2-A extensions then we support _Float16 in native
25824 precision, so we should set this to 16. Otherwise, we support the type,
25825 but want to evaluate expressions in float precision, so set this to
25826 0. */
25827
25828 static enum flt_eval_method
25829 arm_excess_precision (enum excess_precision_type type)
25830 {
25831 switch (type)
25832 {
25833 case EXCESS_PRECISION_TYPE_FAST:
25834 case EXCESS_PRECISION_TYPE_STANDARD:
25835 /* We can calculate either in 16-bit range and precision or
25836 32-bit range and precision. Make that decision based on whether
25837 we have native support for the ARMv8.2-A 16-bit floating-point
25838 instructions or not. */
25839 return (TARGET_VFP_FP16INST
25840 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25841 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
25842 case EXCESS_PRECISION_TYPE_IMPLICIT:
25843 case EXCESS_PRECISION_TYPE_FLOAT16:
25844 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25845 default:
25846 gcc_unreachable ();
25847 }
25848 return FLT_EVAL_METHOD_UNPREDICTABLE;
25849 }
25850
25851
25852 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
25853 _Float16 if we are using anything other than ieee format for 16-bit
25854 floating point. Otherwise, punt to the default implementation. */
25855 static opt_scalar_float_mode
25856 arm_floatn_mode (int n, bool extended)
25857 {
25858 if (!extended && n == 16)
25859 {
25860 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
25861 return HFmode;
25862 return opt_scalar_float_mode ();
25863 }
25864
25865 return default_floatn_mode (n, extended);
25866 }
25867
25868
25869 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25870 not to early-clobber SRC registers in the process.
25871
25872 We assume that the operands described by SRC and DEST represent a
25873 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25874 number of components into which the copy has been decomposed. */
25875 void
25876 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25877 {
25878 unsigned int i;
25879
25880 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25881 || REGNO (operands[0]) < REGNO (operands[1]))
25882 {
25883 for (i = 0; i < count; i++)
25884 {
25885 operands[2 * i] = dest[i];
25886 operands[2 * i + 1] = src[i];
25887 }
25888 }
25889 else
25890 {
25891 for (i = 0; i < count; i++)
25892 {
25893 operands[2 * i] = dest[count - i - 1];
25894 operands[2 * i + 1] = src[count - i - 1];
25895 }
25896 }
25897 }
25898
25899 /* Split operands into moves from op[1] + op[2] into op[0]. */
25900
25901 void
25902 neon_split_vcombine (rtx operands[3])
25903 {
25904 unsigned int dest = REGNO (operands[0]);
25905 unsigned int src1 = REGNO (operands[1]);
25906 unsigned int src2 = REGNO (operands[2]);
25907 machine_mode halfmode = GET_MODE (operands[1]);
25908 unsigned int halfregs = REG_NREGS (operands[1]);
25909 rtx destlo, desthi;
25910
25911 if (src1 == dest && src2 == dest + halfregs)
25912 {
25913 /* No-op move. Can't split to nothing; emit something. */
25914 emit_note (NOTE_INSN_DELETED);
25915 return;
25916 }
25917
25918 /* Preserve register attributes for variable tracking. */
25919 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25920 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25921 GET_MODE_SIZE (halfmode));
25922
25923 /* Special case of reversed high/low parts. Use VSWP. */
25924 if (src2 == dest && src1 == dest + halfregs)
25925 {
25926 rtx x = gen_rtx_SET (destlo, operands[1]);
25927 rtx y = gen_rtx_SET (desthi, operands[2]);
25928 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25929 return;
25930 }
25931
25932 if (!reg_overlap_mentioned_p (operands[2], destlo))
25933 {
25934 /* Try to avoid unnecessary moves if part of the result
25935 is in the right place already. */
25936 if (src1 != dest)
25937 emit_move_insn (destlo, operands[1]);
25938 if (src2 != dest + halfregs)
25939 emit_move_insn (desthi, operands[2]);
25940 }
25941 else
25942 {
25943 if (src2 != dest + halfregs)
25944 emit_move_insn (desthi, operands[2]);
25945 if (src1 != dest)
25946 emit_move_insn (destlo, operands[1]);
25947 }
25948 }
25949 \f
25950 /* Return the number (counting from 0) of
25951 the least significant set bit in MASK. */
25952
25953 inline static int
25954 number_of_first_bit_set (unsigned mask)
25955 {
25956 return ctz_hwi (mask);
25957 }
25958
25959 /* Like emit_multi_reg_push, but allowing for a different set of
25960 registers to be described as saved. MASK is the set of registers
25961 to be saved; REAL_REGS is the set of registers to be described as
25962 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25963
25964 static rtx_insn *
25965 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25966 {
25967 unsigned long regno;
25968 rtx par[10], tmp, reg;
25969 rtx_insn *insn;
25970 int i, j;
25971
25972 /* Build the parallel of the registers actually being stored. */
25973 for (i = 0; mask; ++i, mask &= mask - 1)
25974 {
25975 regno = ctz_hwi (mask);
25976 reg = gen_rtx_REG (SImode, regno);
25977
25978 if (i == 0)
25979 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
25980 else
25981 tmp = gen_rtx_USE (VOIDmode, reg);
25982
25983 par[i] = tmp;
25984 }
25985
25986 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25987 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
25988 tmp = gen_frame_mem (BLKmode, tmp);
25989 tmp = gen_rtx_SET (tmp, par[0]);
25990 par[0] = tmp;
25991
25992 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
25993 insn = emit_insn (tmp);
25994
25995 /* Always build the stack adjustment note for unwind info. */
25996 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25997 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
25998 par[0] = tmp;
25999
26000 /* Build the parallel of the registers recorded as saved for unwind. */
26001 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26002 {
26003 regno = ctz_hwi (real_regs);
26004 reg = gen_rtx_REG (SImode, regno);
26005
26006 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26007 tmp = gen_frame_mem (SImode, tmp);
26008 tmp = gen_rtx_SET (tmp, reg);
26009 RTX_FRAME_RELATED_P (tmp) = 1;
26010 par[j + 1] = tmp;
26011 }
26012
26013 if (j == 0)
26014 tmp = par[0];
26015 else
26016 {
26017 RTX_FRAME_RELATED_P (par[0]) = 1;
26018 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26019 }
26020
26021 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26022
26023 return insn;
26024 }
26025
26026 /* Emit code to push or pop registers to or from the stack. F is the
26027 assembly file. MASK is the registers to pop. */
26028 static void
26029 thumb_pop (FILE *f, unsigned long mask)
26030 {
26031 int regno;
26032 int lo_mask = mask & 0xFF;
26033
26034 gcc_assert (mask);
26035
26036 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26037 {
26038 /* Special case. Do not generate a POP PC statement here, do it in
26039 thumb_exit() */
26040 thumb_exit (f, -1);
26041 return;
26042 }
26043
26044 fprintf (f, "\tpop\t{");
26045
26046 /* Look at the low registers first. */
26047 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26048 {
26049 if (lo_mask & 1)
26050 {
26051 asm_fprintf (f, "%r", regno);
26052
26053 if ((lo_mask & ~1) != 0)
26054 fprintf (f, ", ");
26055 }
26056 }
26057
26058 if (mask & (1 << PC_REGNUM))
26059 {
26060 /* Catch popping the PC. */
26061 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
26062 || IS_CMSE_ENTRY (arm_current_func_type ()))
26063 {
26064 /* The PC is never poped directly, instead
26065 it is popped into r3 and then BX is used. */
26066 fprintf (f, "}\n");
26067
26068 thumb_exit (f, -1);
26069
26070 return;
26071 }
26072 else
26073 {
26074 if (mask & 0xFF)
26075 fprintf (f, ", ");
26076
26077 asm_fprintf (f, "%r", PC_REGNUM);
26078 }
26079 }
26080
26081 fprintf (f, "}\n");
26082 }
26083
26084 /* Generate code to return from a thumb function.
26085 If 'reg_containing_return_addr' is -1, then the return address is
26086 actually on the stack, at the stack pointer.
26087
26088 Note: do not forget to update length attribute of corresponding insn pattern
26089 when changing assembly output (eg. length attribute of epilogue_insns when
26090 updating Armv8-M Baseline Security Extensions register clearing
26091 sequences). */
26092 static void
26093 thumb_exit (FILE *f, int reg_containing_return_addr)
26094 {
26095 unsigned regs_available_for_popping;
26096 unsigned regs_to_pop;
26097 int pops_needed;
26098 unsigned available;
26099 unsigned required;
26100 machine_mode mode;
26101 int size;
26102 int restore_a4 = FALSE;
26103
26104 /* Compute the registers we need to pop. */
26105 regs_to_pop = 0;
26106 pops_needed = 0;
26107
26108 if (reg_containing_return_addr == -1)
26109 {
26110 regs_to_pop |= 1 << LR_REGNUM;
26111 ++pops_needed;
26112 }
26113
26114 if (TARGET_BACKTRACE)
26115 {
26116 /* Restore the (ARM) frame pointer and stack pointer. */
26117 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26118 pops_needed += 2;
26119 }
26120
26121 /* If there is nothing to pop then just emit the BX instruction and
26122 return. */
26123 if (pops_needed == 0)
26124 {
26125 if (crtl->calls_eh_return)
26126 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26127
26128 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26129 {
26130 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26131 emitted by cmse_nonsecure_entry_clear_before_return (). */
26132 if (!TARGET_HAVE_FPCXT_CMSE)
26133 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
26134 reg_containing_return_addr);
26135 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26136 }
26137 else
26138 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26139 return;
26140 }
26141 /* Otherwise if we are not supporting interworking and we have not created
26142 a backtrace structure and the function was not entered in ARM mode then
26143 just pop the return address straight into the PC. */
26144 else if (!TARGET_INTERWORK
26145 && !TARGET_BACKTRACE
26146 && !is_called_in_ARM_mode (current_function_decl)
26147 && !crtl->calls_eh_return
26148 && !IS_CMSE_ENTRY (arm_current_func_type ()))
26149 {
26150 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26151 return;
26152 }
26153
26154 /* Find out how many of the (return) argument registers we can corrupt. */
26155 regs_available_for_popping = 0;
26156
26157 /* If returning via __builtin_eh_return, the bottom three registers
26158 all contain information needed for the return. */
26159 if (crtl->calls_eh_return)
26160 size = 12;
26161 else
26162 {
26163 /* If we can deduce the registers used from the function's
26164 return value. This is more reliable that examining
26165 df_regs_ever_live_p () because that will be set if the register is
26166 ever used in the function, not just if the register is used
26167 to hold a return value. */
26168
26169 if (crtl->return_rtx != 0)
26170 mode = GET_MODE (crtl->return_rtx);
26171 else
26172 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26173
26174 size = GET_MODE_SIZE (mode);
26175
26176 if (size == 0)
26177 {
26178 /* In a void function we can use any argument register.
26179 In a function that returns a structure on the stack
26180 we can use the second and third argument registers. */
26181 if (mode == VOIDmode)
26182 regs_available_for_popping =
26183 (1 << ARG_REGISTER (1))
26184 | (1 << ARG_REGISTER (2))
26185 | (1 << ARG_REGISTER (3));
26186 else
26187 regs_available_for_popping =
26188 (1 << ARG_REGISTER (2))
26189 | (1 << ARG_REGISTER (3));
26190 }
26191 else if (size <= 4)
26192 regs_available_for_popping =
26193 (1 << ARG_REGISTER (2))
26194 | (1 << ARG_REGISTER (3));
26195 else if (size <= 8)
26196 regs_available_for_popping =
26197 (1 << ARG_REGISTER (3));
26198 }
26199
26200 /* Match registers to be popped with registers into which we pop them. */
26201 for (available = regs_available_for_popping,
26202 required = regs_to_pop;
26203 required != 0 && available != 0;
26204 available &= ~(available & - available),
26205 required &= ~(required & - required))
26206 -- pops_needed;
26207
26208 /* If we have any popping registers left over, remove them. */
26209 if (available > 0)
26210 regs_available_for_popping &= ~available;
26211
26212 /* Otherwise if we need another popping register we can use
26213 the fourth argument register. */
26214 else if (pops_needed)
26215 {
26216 /* If we have not found any free argument registers and
26217 reg a4 contains the return address, we must move it. */
26218 if (regs_available_for_popping == 0
26219 && reg_containing_return_addr == LAST_ARG_REGNUM)
26220 {
26221 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26222 reg_containing_return_addr = LR_REGNUM;
26223 }
26224 else if (size > 12)
26225 {
26226 /* Register a4 is being used to hold part of the return value,
26227 but we have dire need of a free, low register. */
26228 restore_a4 = TRUE;
26229
26230 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26231 }
26232
26233 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26234 {
26235 /* The fourth argument register is available. */
26236 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26237
26238 --pops_needed;
26239 }
26240 }
26241
26242 /* Pop as many registers as we can. */
26243 thumb_pop (f, regs_available_for_popping);
26244
26245 /* Process the registers we popped. */
26246 if (reg_containing_return_addr == -1)
26247 {
26248 /* The return address was popped into the lowest numbered register. */
26249 regs_to_pop &= ~(1 << LR_REGNUM);
26250
26251 reg_containing_return_addr =
26252 number_of_first_bit_set (regs_available_for_popping);
26253
26254 /* Remove this register for the mask of available registers, so that
26255 the return address will not be corrupted by further pops. */
26256 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26257 }
26258
26259 /* If we popped other registers then handle them here. */
26260 if (regs_available_for_popping)
26261 {
26262 int frame_pointer;
26263
26264 /* Work out which register currently contains the frame pointer. */
26265 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26266
26267 /* Move it into the correct place. */
26268 asm_fprintf (f, "\tmov\t%r, %r\n",
26269 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26270
26271 /* (Temporarily) remove it from the mask of popped registers. */
26272 regs_available_for_popping &= ~(1 << frame_pointer);
26273 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26274
26275 if (regs_available_for_popping)
26276 {
26277 int stack_pointer;
26278
26279 /* We popped the stack pointer as well,
26280 find the register that contains it. */
26281 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26282
26283 /* Move it into the stack register. */
26284 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26285
26286 /* At this point we have popped all necessary registers, so
26287 do not worry about restoring regs_available_for_popping
26288 to its correct value:
26289
26290 assert (pops_needed == 0)
26291 assert (regs_available_for_popping == (1 << frame_pointer))
26292 assert (regs_to_pop == (1 << STACK_POINTER)) */
26293 }
26294 else
26295 {
26296 /* Since we have just move the popped value into the frame
26297 pointer, the popping register is available for reuse, and
26298 we know that we still have the stack pointer left to pop. */
26299 regs_available_for_popping |= (1 << frame_pointer);
26300 }
26301 }
26302
26303 /* If we still have registers left on the stack, but we no longer have
26304 any registers into which we can pop them, then we must move the return
26305 address into the link register and make available the register that
26306 contained it. */
26307 if (regs_available_for_popping == 0 && pops_needed > 0)
26308 {
26309 regs_available_for_popping |= 1 << reg_containing_return_addr;
26310
26311 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26312 reg_containing_return_addr);
26313
26314 reg_containing_return_addr = LR_REGNUM;
26315 }
26316
26317 /* If we have registers left on the stack then pop some more.
26318 We know that at most we will want to pop FP and SP. */
26319 if (pops_needed > 0)
26320 {
26321 int popped_into;
26322 int move_to;
26323
26324 thumb_pop (f, regs_available_for_popping);
26325
26326 /* We have popped either FP or SP.
26327 Move whichever one it is into the correct register. */
26328 popped_into = number_of_first_bit_set (regs_available_for_popping);
26329 move_to = number_of_first_bit_set (regs_to_pop);
26330
26331 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26332 --pops_needed;
26333 }
26334
26335 /* If we still have not popped everything then we must have only
26336 had one register available to us and we are now popping the SP. */
26337 if (pops_needed > 0)
26338 {
26339 int popped_into;
26340
26341 thumb_pop (f, regs_available_for_popping);
26342
26343 popped_into = number_of_first_bit_set (regs_available_for_popping);
26344
26345 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26346 /*
26347 assert (regs_to_pop == (1 << STACK_POINTER))
26348 assert (pops_needed == 1)
26349 */
26350 }
26351
26352 /* If necessary restore the a4 register. */
26353 if (restore_a4)
26354 {
26355 if (reg_containing_return_addr != LR_REGNUM)
26356 {
26357 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26358 reg_containing_return_addr = LR_REGNUM;
26359 }
26360
26361 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26362 }
26363
26364 if (crtl->calls_eh_return)
26365 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26366
26367 /* Return to caller. */
26368 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26369 {
26370 /* This is for the cases where LR is not being used to contain the return
26371 address. It may therefore contain information that we might not want
26372 to leak, hence it must be cleared. The value in R0 will never be a
26373 secret at this point, so it is safe to use it, see the clearing code
26374 in cmse_nonsecure_entry_clear_before_return (). */
26375 if (reg_containing_return_addr != LR_REGNUM)
26376 asm_fprintf (f, "\tmov\tlr, r0\n");
26377
26378 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26379 by cmse_nonsecure_entry_clear_before_return (). */
26380 if (!TARGET_HAVE_FPCXT_CMSE)
26381 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26382 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26383 }
26384 else
26385 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26386 }
26387 \f
26388 /* Scan INSN just before assembler is output for it.
26389 For Thumb-1, we track the status of the condition codes; this
26390 information is used in the cbranchsi4_insn pattern. */
26391 void
26392 thumb1_final_prescan_insn (rtx_insn *insn)
26393 {
26394 if (flag_print_asm_name)
26395 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26396 INSN_ADDRESSES (INSN_UID (insn)));
26397 /* Don't overwrite the previous setter when we get to a cbranch. */
26398 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26399 {
26400 enum attr_conds conds;
26401
26402 if (cfun->machine->thumb1_cc_insn)
26403 {
26404 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26405 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26406 CC_STATUS_INIT;
26407 }
26408 conds = get_attr_conds (insn);
26409 if (conds == CONDS_SET)
26410 {
26411 rtx set = single_set (insn);
26412 cfun->machine->thumb1_cc_insn = insn;
26413 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26414 cfun->machine->thumb1_cc_op1 = const0_rtx;
26415 cfun->machine->thumb1_cc_mode = CC_NZmode;
26416 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26417 {
26418 rtx src1 = XEXP (SET_SRC (set), 1);
26419 if (src1 == const0_rtx)
26420 cfun->machine->thumb1_cc_mode = CCmode;
26421 }
26422 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26423 {
26424 /* Record the src register operand instead of dest because
26425 cprop_hardreg pass propagates src. */
26426 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26427 }
26428 }
26429 else if (conds != CONDS_NOCOND)
26430 cfun->machine->thumb1_cc_insn = NULL_RTX;
26431 }
26432
26433 /* Check if unexpected far jump is used. */
26434 if (cfun->machine->lr_save_eliminated
26435 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26436 internal_error("Unexpected thumb1 far jump");
26437 }
26438
26439 int
26440 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26441 {
26442 unsigned HOST_WIDE_INT mask = 0xff;
26443 int i;
26444
26445 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26446 if (val == 0) /* XXX */
26447 return 0;
26448
26449 for (i = 0; i < 25; i++)
26450 if ((val & (mask << i)) == val)
26451 return 1;
26452
26453 return 0;
26454 }
26455
26456 /* Returns nonzero if the current function contains,
26457 or might contain a far jump. */
26458 static int
26459 thumb_far_jump_used_p (void)
26460 {
26461 rtx_insn *insn;
26462 bool far_jump = false;
26463 unsigned int func_size = 0;
26464
26465 /* If we have already decided that far jumps may be used,
26466 do not bother checking again, and always return true even if
26467 it turns out that they are not being used. Once we have made
26468 the decision that far jumps are present (and that hence the link
26469 register will be pushed onto the stack) we cannot go back on it. */
26470 if (cfun->machine->far_jump_used)
26471 return 1;
26472
26473 /* If this function is not being called from the prologue/epilogue
26474 generation code then it must be being called from the
26475 INITIAL_ELIMINATION_OFFSET macro. */
26476 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26477 {
26478 /* In this case we know that we are being asked about the elimination
26479 of the arg pointer register. If that register is not being used,
26480 then there are no arguments on the stack, and we do not have to
26481 worry that a far jump might force the prologue to push the link
26482 register, changing the stack offsets. In this case we can just
26483 return false, since the presence of far jumps in the function will
26484 not affect stack offsets.
26485
26486 If the arg pointer is live (or if it was live, but has now been
26487 eliminated and so set to dead) then we do have to test to see if
26488 the function might contain a far jump. This test can lead to some
26489 false negatives, since before reload is completed, then length of
26490 branch instructions is not known, so gcc defaults to returning their
26491 longest length, which in turn sets the far jump attribute to true.
26492
26493 A false negative will not result in bad code being generated, but it
26494 will result in a needless push and pop of the link register. We
26495 hope that this does not occur too often.
26496
26497 If we need doubleword stack alignment this could affect the other
26498 elimination offsets so we can't risk getting it wrong. */
26499 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26500 cfun->machine->arg_pointer_live = 1;
26501 else if (!cfun->machine->arg_pointer_live)
26502 return 0;
26503 }
26504
26505 /* We should not change far_jump_used during or after reload, as there is
26506 no chance to change stack frame layout. */
26507 if (reload_in_progress || reload_completed)
26508 return 0;
26509
26510 /* Check to see if the function contains a branch
26511 insn with the far jump attribute set. */
26512 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26513 {
26514 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26515 {
26516 far_jump = true;
26517 }
26518 func_size += get_attr_length (insn);
26519 }
26520
26521 /* Attribute far_jump will always be true for thumb1 before
26522 shorten_branch pass. So checking far_jump attribute before
26523 shorten_branch isn't much useful.
26524
26525 Following heuristic tries to estimate more accurately if a far jump
26526 may finally be used. The heuristic is very conservative as there is
26527 no chance to roll-back the decision of not to use far jump.
26528
26529 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26530 2-byte insn is associated with a 4 byte constant pool. Using
26531 function size 2048/3 as the threshold is conservative enough. */
26532 if (far_jump)
26533 {
26534 if ((func_size * 3) >= 2048)
26535 {
26536 /* Record the fact that we have decided that
26537 the function does use far jumps. */
26538 cfun->machine->far_jump_used = 1;
26539 return 1;
26540 }
26541 }
26542
26543 return 0;
26544 }
26545
26546 /* Return nonzero if FUNC must be entered in ARM mode. */
26547 static bool
26548 is_called_in_ARM_mode (tree func)
26549 {
26550 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26551
26552 /* Ignore the problem about functions whose address is taken. */
26553 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26554 return true;
26555
26556 #ifdef ARM_PE
26557 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26558 #else
26559 return false;
26560 #endif
26561 }
26562
26563 /* Given the stack offsets and register mask in OFFSETS, decide how
26564 many additional registers to push instead of subtracting a constant
26565 from SP. For epilogues the principle is the same except we use pop.
26566 FOR_PROLOGUE indicates which we're generating. */
26567 static int
26568 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26569 {
26570 HOST_WIDE_INT amount;
26571 unsigned long live_regs_mask = offsets->saved_regs_mask;
26572 /* Extract a mask of the ones we can give to the Thumb's push/pop
26573 instruction. */
26574 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26575 /* Then count how many other high registers will need to be pushed. */
26576 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26577 int n_free, reg_base, size;
26578
26579 if (!for_prologue && frame_pointer_needed)
26580 amount = offsets->locals_base - offsets->saved_regs;
26581 else
26582 amount = offsets->outgoing_args - offsets->saved_regs;
26583
26584 /* If the stack frame size is 512 exactly, we can save one load
26585 instruction, which should make this a win even when optimizing
26586 for speed. */
26587 if (!optimize_size && amount != 512)
26588 return 0;
26589
26590 /* Can't do this if there are high registers to push. */
26591 if (high_regs_pushed != 0)
26592 return 0;
26593
26594 /* Shouldn't do it in the prologue if no registers would normally
26595 be pushed at all. In the epilogue, also allow it if we'll have
26596 a pop insn for the PC. */
26597 if (l_mask == 0
26598 && (for_prologue
26599 || TARGET_BACKTRACE
26600 || (live_regs_mask & 1 << LR_REGNUM) == 0
26601 || TARGET_INTERWORK
26602 || crtl->args.pretend_args_size != 0))
26603 return 0;
26604
26605 /* Don't do this if thumb_expand_prologue wants to emit instructions
26606 between the push and the stack frame allocation. */
26607 if (for_prologue
26608 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26609 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26610 return 0;
26611
26612 reg_base = 0;
26613 n_free = 0;
26614 if (!for_prologue)
26615 {
26616 size = arm_size_return_regs ();
26617 reg_base = ARM_NUM_INTS (size);
26618 live_regs_mask >>= reg_base;
26619 }
26620
26621 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26622 && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26623 {
26624 live_regs_mask >>= 1;
26625 n_free++;
26626 }
26627
26628 if (n_free == 0)
26629 return 0;
26630 gcc_assert (amount / 4 * 4 == amount);
26631
26632 if (amount >= 512 && (amount - n_free * 4) < 512)
26633 return (amount - 508) / 4;
26634 if (amount <= n_free * 4)
26635 return amount / 4;
26636 return 0;
26637 }
26638
26639 /* The bits which aren't usefully expanded as rtl. */
26640 const char *
26641 thumb1_unexpanded_epilogue (void)
26642 {
26643 arm_stack_offsets *offsets;
26644 int regno;
26645 unsigned long live_regs_mask = 0;
26646 int high_regs_pushed = 0;
26647 int extra_pop;
26648 int had_to_push_lr;
26649 int size;
26650
26651 if (cfun->machine->return_used_this_function != 0)
26652 return "";
26653
26654 if (IS_NAKED (arm_current_func_type ()))
26655 return "";
26656
26657 offsets = arm_get_frame_offsets ();
26658 live_regs_mask = offsets->saved_regs_mask;
26659 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26660
26661 /* If we can deduce the registers used from the function's return value.
26662 This is more reliable that examining df_regs_ever_live_p () because that
26663 will be set if the register is ever used in the function, not just if
26664 the register is used to hold a return value. */
26665 size = arm_size_return_regs ();
26666
26667 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26668 if (extra_pop > 0)
26669 {
26670 unsigned long extra_mask = (1 << extra_pop) - 1;
26671 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26672 }
26673
26674 /* The prolog may have pushed some high registers to use as
26675 work registers. e.g. the testsuite file:
26676 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26677 compiles to produce:
26678 push {r4, r5, r6, r7, lr}
26679 mov r7, r9
26680 mov r6, r8
26681 push {r6, r7}
26682 as part of the prolog. We have to undo that pushing here. */
26683
26684 if (high_regs_pushed)
26685 {
26686 unsigned long mask = live_regs_mask & 0xff;
26687 int next_hi_reg;
26688
26689 mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26690
26691 if (mask == 0)
26692 /* Oh dear! We have no low registers into which we can pop
26693 high registers! */
26694 internal_error
26695 ("no low registers available for popping high registers");
26696
26697 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26698 if (live_regs_mask & (1 << next_hi_reg))
26699 break;
26700
26701 while (high_regs_pushed)
26702 {
26703 /* Find lo register(s) into which the high register(s) can
26704 be popped. */
26705 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26706 {
26707 if (mask & (1 << regno))
26708 high_regs_pushed--;
26709 if (high_regs_pushed == 0)
26710 break;
26711 }
26712
26713 if (high_regs_pushed == 0 && regno >= 0)
26714 mask &= ~((1 << regno) - 1);
26715
26716 /* Pop the values into the low register(s). */
26717 thumb_pop (asm_out_file, mask);
26718
26719 /* Move the value(s) into the high registers. */
26720 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26721 {
26722 if (mask & (1 << regno))
26723 {
26724 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26725 regno);
26726
26727 for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26728 next_hi_reg--)
26729 if (live_regs_mask & (1 << next_hi_reg))
26730 break;
26731 }
26732 }
26733 }
26734 live_regs_mask &= ~0x0f00;
26735 }
26736
26737 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26738 live_regs_mask &= 0xff;
26739
26740 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26741 {
26742 /* Pop the return address into the PC. */
26743 if (had_to_push_lr)
26744 live_regs_mask |= 1 << PC_REGNUM;
26745
26746 /* Either no argument registers were pushed or a backtrace
26747 structure was created which includes an adjusted stack
26748 pointer, so just pop everything. */
26749 if (live_regs_mask)
26750 thumb_pop (asm_out_file, live_regs_mask);
26751
26752 /* We have either just popped the return address into the
26753 PC or it is was kept in LR for the entire function.
26754 Note that thumb_pop has already called thumb_exit if the
26755 PC was in the list. */
26756 if (!had_to_push_lr)
26757 thumb_exit (asm_out_file, LR_REGNUM);
26758 }
26759 else
26760 {
26761 /* Pop everything but the return address. */
26762 if (live_regs_mask)
26763 thumb_pop (asm_out_file, live_regs_mask);
26764
26765 if (had_to_push_lr)
26766 {
26767 if (size > 12)
26768 {
26769 /* We have no free low regs, so save one. */
26770 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26771 LAST_ARG_REGNUM);
26772 }
26773
26774 /* Get the return address into a temporary register. */
26775 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26776
26777 if (size > 12)
26778 {
26779 /* Move the return address to lr. */
26780 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26781 LAST_ARG_REGNUM);
26782 /* Restore the low register. */
26783 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26784 IP_REGNUM);
26785 regno = LR_REGNUM;
26786 }
26787 else
26788 regno = LAST_ARG_REGNUM;
26789 }
26790 else
26791 regno = LR_REGNUM;
26792
26793 /* Remove the argument registers that were pushed onto the stack. */
26794 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26795 SP_REGNUM, SP_REGNUM,
26796 crtl->args.pretend_args_size);
26797
26798 thumb_exit (asm_out_file, regno);
26799 }
26800
26801 return "";
26802 }
26803
26804 /* Functions to save and restore machine-specific function data. */
26805 static struct machine_function *
26806 arm_init_machine_status (void)
26807 {
26808 struct machine_function *machine;
26809 machine = ggc_cleared_alloc<machine_function> ();
26810
26811 #if ARM_FT_UNKNOWN != 0
26812 machine->func_type = ARM_FT_UNKNOWN;
26813 #endif
26814 machine->static_chain_stack_bytes = -1;
26815 return machine;
26816 }
26817
26818 /* Return an RTX indicating where the return address to the
26819 calling function can be found. */
26820 rtx
26821 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26822 {
26823 if (count != 0)
26824 return NULL_RTX;
26825
26826 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26827 }
26828
26829 /* Do anything needed before RTL is emitted for each function. */
26830 void
26831 arm_init_expanders (void)
26832 {
26833 /* Arrange to initialize and mark the machine per-function status. */
26834 init_machine_status = arm_init_machine_status;
26835
26836 /* This is to stop the combine pass optimizing away the alignment
26837 adjustment of va_arg. */
26838 /* ??? It is claimed that this should not be necessary. */
26839 if (cfun)
26840 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26841 }
26842
26843 /* Check that FUNC is called with a different mode. */
26844
26845 bool
26846 arm_change_mode_p (tree func)
26847 {
26848 if (TREE_CODE (func) != FUNCTION_DECL)
26849 return false;
26850
26851 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
26852
26853 if (!callee_tree)
26854 callee_tree = target_option_default_node;
26855
26856 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
26857 int flags = callee_opts->x_target_flags;
26858
26859 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
26860 }
26861
26862 /* Like arm_compute_initial_elimination offset. Simpler because there
26863 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26864 to point at the base of the local variables after static stack
26865 space for a function has been allocated. */
26866
26867 HOST_WIDE_INT
26868 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26869 {
26870 arm_stack_offsets *offsets;
26871
26872 offsets = arm_get_frame_offsets ();
26873
26874 switch (from)
26875 {
26876 case ARG_POINTER_REGNUM:
26877 switch (to)
26878 {
26879 case STACK_POINTER_REGNUM:
26880 return offsets->outgoing_args - offsets->saved_args;
26881
26882 case FRAME_POINTER_REGNUM:
26883 return offsets->soft_frame - offsets->saved_args;
26884
26885 case ARM_HARD_FRAME_POINTER_REGNUM:
26886 return offsets->saved_regs - offsets->saved_args;
26887
26888 case THUMB_HARD_FRAME_POINTER_REGNUM:
26889 return offsets->locals_base - offsets->saved_args;
26890
26891 default:
26892 gcc_unreachable ();
26893 }
26894 break;
26895
26896 case FRAME_POINTER_REGNUM:
26897 switch (to)
26898 {
26899 case STACK_POINTER_REGNUM:
26900 return offsets->outgoing_args - offsets->soft_frame;
26901
26902 case ARM_HARD_FRAME_POINTER_REGNUM:
26903 return offsets->saved_regs - offsets->soft_frame;
26904
26905 case THUMB_HARD_FRAME_POINTER_REGNUM:
26906 return offsets->locals_base - offsets->soft_frame;
26907
26908 default:
26909 gcc_unreachable ();
26910 }
26911 break;
26912
26913 default:
26914 gcc_unreachable ();
26915 }
26916 }
26917
26918 /* Generate the function's prologue. */
26919
26920 void
26921 thumb1_expand_prologue (void)
26922 {
26923 rtx_insn *insn;
26924
26925 HOST_WIDE_INT amount;
26926 HOST_WIDE_INT size;
26927 arm_stack_offsets *offsets;
26928 unsigned long func_type;
26929 int regno;
26930 unsigned long live_regs_mask;
26931 unsigned long l_mask;
26932 unsigned high_regs_pushed = 0;
26933 bool lr_needs_saving;
26934
26935 func_type = arm_current_func_type ();
26936
26937 /* Naked functions don't have prologues. */
26938 if (IS_NAKED (func_type))
26939 {
26940 if (flag_stack_usage_info)
26941 current_function_static_stack_size = 0;
26942 return;
26943 }
26944
26945 if (IS_INTERRUPT (func_type))
26946 {
26947 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
26948 return;
26949 }
26950
26951 if (is_called_in_ARM_mode (current_function_decl))
26952 emit_insn (gen_prologue_thumb1_interwork ());
26953
26954 offsets = arm_get_frame_offsets ();
26955 live_regs_mask = offsets->saved_regs_mask;
26956 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
26957
26958 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26959 l_mask = live_regs_mask & 0x40ff;
26960 /* Then count how many other high registers will need to be pushed. */
26961 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26962
26963 if (crtl->args.pretend_args_size)
26964 {
26965 rtx x = GEN_INT (-crtl->args.pretend_args_size);
26966
26967 if (cfun->machine->uses_anonymous_args)
26968 {
26969 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26970 unsigned long mask;
26971
26972 mask = 1ul << (LAST_ARG_REGNUM + 1);
26973 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26974
26975 insn = thumb1_emit_multi_reg_push (mask, 0);
26976 }
26977 else
26978 {
26979 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26980 stack_pointer_rtx, x));
26981 }
26982 RTX_FRAME_RELATED_P (insn) = 1;
26983 }
26984
26985 if (TARGET_BACKTRACE)
26986 {
26987 HOST_WIDE_INT offset = 0;
26988 unsigned work_register;
26989 rtx work_reg, x, arm_hfp_rtx;
26990
26991 /* We have been asked to create a stack backtrace structure.
26992 The code looks like this:
26993
26994 0 .align 2
26995 0 func:
26996 0 sub SP, #16 Reserve space for 4 registers.
26997 2 push {R7} Push low registers.
26998 4 add R7, SP, #20 Get the stack pointer before the push.
26999 6 str R7, [SP, #8] Store the stack pointer
27000 (before reserving the space).
27001 8 mov R7, PC Get hold of the start of this code + 12.
27002 10 str R7, [SP, #16] Store it.
27003 12 mov R7, FP Get hold of the current frame pointer.
27004 14 str R7, [SP, #4] Store it.
27005 16 mov R7, LR Get hold of the current return address.
27006 18 str R7, [SP, #12] Store it.
27007 20 add R7, SP, #16 Point at the start of the
27008 backtrace structure.
27009 22 mov FP, R7 Put this value into the frame pointer. */
27010
27011 work_register = thumb_find_work_register (live_regs_mask);
27012 work_reg = gen_rtx_REG (SImode, work_register);
27013 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27014
27015 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27016 stack_pointer_rtx, GEN_INT (-16)));
27017 RTX_FRAME_RELATED_P (insn) = 1;
27018
27019 if (l_mask)
27020 {
27021 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27022 RTX_FRAME_RELATED_P (insn) = 1;
27023 lr_needs_saving = false;
27024
27025 offset = bit_count (l_mask) * UNITS_PER_WORD;
27026 }
27027
27028 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27029 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27030
27031 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27032 x = gen_frame_mem (SImode, x);
27033 emit_move_insn (x, work_reg);
27034
27035 /* Make sure that the instruction fetching the PC is in the right place
27036 to calculate "start of backtrace creation code + 12". */
27037 /* ??? The stores using the common WORK_REG ought to be enough to
27038 prevent the scheduler from doing anything weird. Failing that
27039 we could always move all of the following into an UNSPEC_VOLATILE. */
27040 if (l_mask)
27041 {
27042 x = gen_rtx_REG (SImode, PC_REGNUM);
27043 emit_move_insn (work_reg, x);
27044
27045 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27046 x = gen_frame_mem (SImode, x);
27047 emit_move_insn (x, work_reg);
27048
27049 emit_move_insn (work_reg, arm_hfp_rtx);
27050
27051 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27052 x = gen_frame_mem (SImode, x);
27053 emit_move_insn (x, work_reg);
27054 }
27055 else
27056 {
27057 emit_move_insn (work_reg, arm_hfp_rtx);
27058
27059 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27060 x = gen_frame_mem (SImode, x);
27061 emit_move_insn (x, work_reg);
27062
27063 x = gen_rtx_REG (SImode, PC_REGNUM);
27064 emit_move_insn (work_reg, x);
27065
27066 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27067 x = gen_frame_mem (SImode, x);
27068 emit_move_insn (x, work_reg);
27069 }
27070
27071 x = gen_rtx_REG (SImode, LR_REGNUM);
27072 emit_move_insn (work_reg, x);
27073
27074 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27075 x = gen_frame_mem (SImode, x);
27076 emit_move_insn (x, work_reg);
27077
27078 x = GEN_INT (offset + 12);
27079 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27080
27081 emit_move_insn (arm_hfp_rtx, work_reg);
27082 }
27083 /* Optimization: If we are not pushing any low registers but we are going
27084 to push some high registers then delay our first push. This will just
27085 be a push of LR and we can combine it with the push of the first high
27086 register. */
27087 else if ((l_mask & 0xff) != 0
27088 || (high_regs_pushed == 0 && lr_needs_saving))
27089 {
27090 unsigned long mask = l_mask;
27091 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27092 insn = thumb1_emit_multi_reg_push (mask, mask);
27093 RTX_FRAME_RELATED_P (insn) = 1;
27094 lr_needs_saving = false;
27095 }
27096
27097 if (high_regs_pushed)
27098 {
27099 unsigned pushable_regs;
27100 unsigned next_hi_reg;
27101 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27102 : crtl->args.info.nregs;
27103 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27104
27105 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27106 if (live_regs_mask & (1 << next_hi_reg))
27107 break;
27108
27109 /* Here we need to mask out registers used for passing arguments
27110 even if they can be pushed. This is to avoid using them to
27111 stash the high registers. Such kind of stash may clobber the
27112 use of arguments. */
27113 pushable_regs = l_mask & (~arg_regs_mask);
27114 pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
27115
27116 /* Normally, LR can be used as a scratch register once it has been
27117 saved; but if the function examines its own return address then
27118 the value is still live and we need to avoid using it. */
27119 bool return_addr_live
27120 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
27121 LR_REGNUM);
27122
27123 if (lr_needs_saving || return_addr_live)
27124 pushable_regs &= ~(1 << LR_REGNUM);
27125
27126 if (pushable_regs == 0)
27127 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27128
27129 while (high_regs_pushed > 0)
27130 {
27131 unsigned long real_regs_mask = 0;
27132 unsigned long push_mask = 0;
27133
27134 for (regno = LR_REGNUM; regno >= 0; regno --)
27135 {
27136 if (pushable_regs & (1 << regno))
27137 {
27138 emit_move_insn (gen_rtx_REG (SImode, regno),
27139 gen_rtx_REG (SImode, next_hi_reg));
27140
27141 high_regs_pushed --;
27142 real_regs_mask |= (1 << next_hi_reg);
27143 push_mask |= (1 << regno);
27144
27145 if (high_regs_pushed)
27146 {
27147 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27148 next_hi_reg --)
27149 if (live_regs_mask & (1 << next_hi_reg))
27150 break;
27151 }
27152 else
27153 break;
27154 }
27155 }
27156
27157 /* If we had to find a work register and we have not yet
27158 saved the LR then add it to the list of regs to push. */
27159 if (lr_needs_saving)
27160 {
27161 push_mask |= 1 << LR_REGNUM;
27162 real_regs_mask |= 1 << LR_REGNUM;
27163 lr_needs_saving = false;
27164 /* If the return address is not live at this point, we
27165 can add LR to the list of registers that we can use
27166 for pushes. */
27167 if (!return_addr_live)
27168 pushable_regs |= 1 << LR_REGNUM;
27169 }
27170
27171 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
27172 RTX_FRAME_RELATED_P (insn) = 1;
27173 }
27174 }
27175
27176 /* Load the pic register before setting the frame pointer,
27177 so we can use r7 as a temporary work register. */
27178 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27179 arm_load_pic_register (live_regs_mask, NULL_RTX);
27180
27181 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27182 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27183 stack_pointer_rtx);
27184
27185 size = offsets->outgoing_args - offsets->saved_args;
27186 if (flag_stack_usage_info)
27187 current_function_static_stack_size = size;
27188
27189 /* If we have a frame, then do stack checking. FIXME: not implemented. */
27190 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27191 || flag_stack_clash_protection)
27192 && size)
27193 sorry ("%<-fstack-check=specific%> for Thumb-1");
27194
27195 amount = offsets->outgoing_args - offsets->saved_regs;
27196 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27197 if (amount)
27198 {
27199 if (amount < 512)
27200 {
27201 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27202 GEN_INT (- amount)));
27203 RTX_FRAME_RELATED_P (insn) = 1;
27204 }
27205 else
27206 {
27207 rtx reg, dwarf;
27208
27209 /* The stack decrement is too big for an immediate value in a single
27210 insn. In theory we could issue multiple subtracts, but after
27211 three of them it becomes more space efficient to place the full
27212 value in the constant pool and load into a register. (Also the
27213 ARM debugger really likes to see only one stack decrement per
27214 function). So instead we look for a scratch register into which
27215 we can load the decrement, and then we subtract this from the
27216 stack pointer. Unfortunately on the thumb the only available
27217 scratch registers are the argument registers, and we cannot use
27218 these as they may hold arguments to the function. Instead we
27219 attempt to locate a call preserved register which is used by this
27220 function. If we can find one, then we know that it will have
27221 been pushed at the start of the prologue and so we can corrupt
27222 it now. */
27223 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27224 if (live_regs_mask & (1 << regno))
27225 break;
27226
27227 gcc_assert(regno <= LAST_LO_REGNUM);
27228
27229 reg = gen_rtx_REG (SImode, regno);
27230
27231 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27232
27233 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27234 stack_pointer_rtx, reg));
27235
27236 dwarf = gen_rtx_SET (stack_pointer_rtx,
27237 plus_constant (Pmode, stack_pointer_rtx,
27238 -amount));
27239 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27240 RTX_FRAME_RELATED_P (insn) = 1;
27241 }
27242 }
27243
27244 if (frame_pointer_needed)
27245 thumb_set_frame_pointer (offsets);
27246
27247 /* If we are profiling, make sure no instructions are scheduled before
27248 the call to mcount. Similarly if the user has requested no
27249 scheduling in the prolog. Similarly if we want non-call exceptions
27250 using the EABI unwinder, to prevent faulting instructions from being
27251 swapped with a stack adjustment. */
27252 if (crtl->profile || !TARGET_SCHED_PROLOG
27253 || (arm_except_unwind_info (&global_options) == UI_TARGET
27254 && cfun->can_throw_non_call_exceptions))
27255 emit_insn (gen_blockage ());
27256
27257 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27258 if (live_regs_mask & 0xff)
27259 cfun->machine->lr_save_eliminated = 0;
27260 }
27261
27262 /* Clear caller saved registers not used to pass return values and leaked
27263 condition flags before exiting a cmse_nonsecure_entry function. */
27264
27265 void
27266 cmse_nonsecure_entry_clear_before_return (void)
27267 {
27268 bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27269 int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27270 uint32_t padding_bits_to_clear = 0;
27271 auto_sbitmap to_clear_bitmap (maxregno + 1);
27272 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27273 tree result_type;
27274
27275 bitmap_clear (to_clear_bitmap);
27276 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27277 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27278
27279 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27280 registers. */
27281 if (clear_vfpregs)
27282 {
27283 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27284
27285 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27286
27287 if (!TARGET_HAVE_FPCXT_CMSE)
27288 {
27289 /* Make sure we don't clear the two scratch registers used to clear
27290 the relevant FPSCR bits in output_return_instruction. */
27291 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27292 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27293 emit_use (gen_rtx_REG (SImode, 4));
27294 bitmap_clear_bit (to_clear_bitmap, 4);
27295 }
27296 }
27297
27298 /* If the user has defined registers to be caller saved, these are no longer
27299 restored by the function before returning and must thus be cleared for
27300 security purposes. */
27301 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27302 {
27303 /* We do not touch registers that can be used to pass arguments as per
27304 the AAPCS, since these should never be made callee-saved by user
27305 options. */
27306 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27307 continue;
27308 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27309 continue;
27310 if (!callee_saved_reg_p (regno)
27311 && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27312 || TARGET_HARD_FLOAT))
27313 bitmap_set_bit (to_clear_bitmap, regno);
27314 }
27315
27316 /* Make sure we do not clear the registers used to return the result in. */
27317 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27318 if (!VOID_TYPE_P (result_type))
27319 {
27320 uint64_t to_clear_return_mask;
27321 result_rtl = arm_function_value (result_type, current_function_decl, 0);
27322
27323 /* No need to check that we return in registers, because we don't
27324 support returning on stack yet. */
27325 gcc_assert (REG_P (result_rtl));
27326 to_clear_return_mask
27327 = compute_not_to_clear_mask (result_type, result_rtl, 0,
27328 &padding_bits_to_clear);
27329 if (to_clear_return_mask)
27330 {
27331 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27332 for (regno = R0_REGNUM; regno <= maxregno; regno++)
27333 {
27334 if (to_clear_return_mask & (1ULL << regno))
27335 bitmap_clear_bit (to_clear_bitmap, regno);
27336 }
27337 }
27338 }
27339
27340 if (padding_bits_to_clear != 0)
27341 {
27342 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27343 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27344
27345 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27346 returning a composite type, which only uses r0. Let's make sure that
27347 r1-r3 is cleared too. */
27348 bitmap_clear (to_clear_arg_regs_bitmap);
27349 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27350 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27351 }
27352
27353 /* Clear full registers that leak before returning. */
27354 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27355 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27356 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27357 clearing_reg);
27358 }
27359
27360 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27361 POP instruction can be generated. LR should be replaced by PC. All
27362 the checks required are already done by USE_RETURN_INSN (). Hence,
27363 all we really need to check here is if single register is to be
27364 returned, or multiple register return. */
27365 void
27366 thumb2_expand_return (bool simple_return)
27367 {
27368 int i, num_regs;
27369 unsigned long saved_regs_mask;
27370 arm_stack_offsets *offsets;
27371
27372 offsets = arm_get_frame_offsets ();
27373 saved_regs_mask = offsets->saved_regs_mask;
27374
27375 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27376 if (saved_regs_mask & (1 << i))
27377 num_regs++;
27378
27379 if (!simple_return && saved_regs_mask)
27380 {
27381 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27382 functions or adapt code to handle according to ACLE. This path should
27383 not be reachable for cmse_nonsecure_entry functions though we prefer
27384 to assert it for now to ensure that future code changes do not silently
27385 change this behavior. */
27386 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27387 if (num_regs == 1)
27388 {
27389 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27390 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27391 rtx addr = gen_rtx_MEM (SImode,
27392 gen_rtx_POST_INC (SImode,
27393 stack_pointer_rtx));
27394 set_mem_alias_set (addr, get_frame_alias_set ());
27395 XVECEXP (par, 0, 0) = ret_rtx;
27396 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27397 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27398 emit_jump_insn (par);
27399 }
27400 else
27401 {
27402 saved_regs_mask &= ~ (1 << LR_REGNUM);
27403 saved_regs_mask |= (1 << PC_REGNUM);
27404 arm_emit_multi_reg_pop (saved_regs_mask);
27405 }
27406 }
27407 else
27408 {
27409 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27410 cmse_nonsecure_entry_clear_before_return ();
27411 emit_jump_insn (simple_return_rtx);
27412 }
27413 }
27414
27415 void
27416 thumb1_expand_epilogue (void)
27417 {
27418 HOST_WIDE_INT amount;
27419 arm_stack_offsets *offsets;
27420 int regno;
27421
27422 /* Naked functions don't have prologues. */
27423 if (IS_NAKED (arm_current_func_type ()))
27424 return;
27425
27426 offsets = arm_get_frame_offsets ();
27427 amount = offsets->outgoing_args - offsets->saved_regs;
27428
27429 if (frame_pointer_needed)
27430 {
27431 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27432 amount = offsets->locals_base - offsets->saved_regs;
27433 }
27434 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27435
27436 gcc_assert (amount >= 0);
27437 if (amount)
27438 {
27439 emit_insn (gen_blockage ());
27440
27441 if (amount < 512)
27442 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27443 GEN_INT (amount)));
27444 else
27445 {
27446 /* r3 is always free in the epilogue. */
27447 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27448
27449 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27450 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27451 }
27452 }
27453
27454 /* Emit a USE (stack_pointer_rtx), so that
27455 the stack adjustment will not be deleted. */
27456 emit_insn (gen_force_register_use (stack_pointer_rtx));
27457
27458 if (crtl->profile || !TARGET_SCHED_PROLOG)
27459 emit_insn (gen_blockage ());
27460
27461 /* Emit a clobber for each insn that will be restored in the epilogue,
27462 so that flow2 will get register lifetimes correct. */
27463 for (regno = 0; regno < 13; regno++)
27464 if (reg_needs_saving_p (regno))
27465 emit_clobber (gen_rtx_REG (SImode, regno));
27466
27467 if (! df_regs_ever_live_p (LR_REGNUM))
27468 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27469
27470 /* Clear all caller-saved regs that are not used to return. */
27471 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27472 cmse_nonsecure_entry_clear_before_return ();
27473 }
27474
27475 /* Epilogue code for APCS frame. */
27476 static void
27477 arm_expand_epilogue_apcs_frame (bool really_return)
27478 {
27479 unsigned long func_type;
27480 unsigned long saved_regs_mask;
27481 int num_regs = 0;
27482 int i;
27483 int floats_from_frame = 0;
27484 arm_stack_offsets *offsets;
27485
27486 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27487 func_type = arm_current_func_type ();
27488
27489 /* Get frame offsets for ARM. */
27490 offsets = arm_get_frame_offsets ();
27491 saved_regs_mask = offsets->saved_regs_mask;
27492
27493 /* Find the offset of the floating-point save area in the frame. */
27494 floats_from_frame
27495 = (offsets->saved_args
27496 + arm_compute_static_chain_stack_bytes ()
27497 - offsets->frame);
27498
27499 /* Compute how many core registers saved and how far away the floats are. */
27500 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27501 if (saved_regs_mask & (1 << i))
27502 {
27503 num_regs++;
27504 floats_from_frame += 4;
27505 }
27506
27507 if (TARGET_VFP_BASE)
27508 {
27509 int start_reg;
27510 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27511
27512 /* The offset is from IP_REGNUM. */
27513 int saved_size = arm_get_vfp_saved_size ();
27514 if (saved_size > 0)
27515 {
27516 rtx_insn *insn;
27517 floats_from_frame += saved_size;
27518 insn = emit_insn (gen_addsi3 (ip_rtx,
27519 hard_frame_pointer_rtx,
27520 GEN_INT (-floats_from_frame)));
27521 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27522 ip_rtx, hard_frame_pointer_rtx);
27523 }
27524
27525 /* Generate VFP register multi-pop. */
27526 start_reg = FIRST_VFP_REGNUM;
27527
27528 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27529 /* Look for a case where a reg does not need restoring. */
27530 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27531 {
27532 if (start_reg != i)
27533 arm_emit_vfp_multi_reg_pop (start_reg,
27534 (i - start_reg) / 2,
27535 gen_rtx_REG (SImode,
27536 IP_REGNUM));
27537 start_reg = i + 2;
27538 }
27539
27540 /* Restore the remaining regs that we have discovered (or possibly
27541 even all of them, if the conditional in the for loop never
27542 fired). */
27543 if (start_reg != i)
27544 arm_emit_vfp_multi_reg_pop (start_reg,
27545 (i - start_reg) / 2,
27546 gen_rtx_REG (SImode, IP_REGNUM));
27547 }
27548
27549 if (TARGET_IWMMXT)
27550 {
27551 /* The frame pointer is guaranteed to be non-double-word aligned, as
27552 it is set to double-word-aligned old_stack_pointer - 4. */
27553 rtx_insn *insn;
27554 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27555
27556 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27557 if (reg_needs_saving_p (i))
27558 {
27559 rtx addr = gen_frame_mem (V2SImode,
27560 plus_constant (Pmode, hard_frame_pointer_rtx,
27561 - lrm_count * 4));
27562 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27563 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27564 gen_rtx_REG (V2SImode, i),
27565 NULL_RTX);
27566 lrm_count += 2;
27567 }
27568 }
27569
27570 /* saved_regs_mask should contain IP which contains old stack pointer
27571 at the time of activation creation. Since SP and IP are adjacent registers,
27572 we can restore the value directly into SP. */
27573 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27574 saved_regs_mask &= ~(1 << IP_REGNUM);
27575 saved_regs_mask |= (1 << SP_REGNUM);
27576
27577 /* There are two registers left in saved_regs_mask - LR and PC. We
27578 only need to restore LR (the return address), but to
27579 save time we can load it directly into PC, unless we need a
27580 special function exit sequence, or we are not really returning. */
27581 if (really_return
27582 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27583 && !crtl->calls_eh_return)
27584 /* Delete LR from the register mask, so that LR on
27585 the stack is loaded into the PC in the register mask. */
27586 saved_regs_mask &= ~(1 << LR_REGNUM);
27587 else
27588 saved_regs_mask &= ~(1 << PC_REGNUM);
27589
27590 num_regs = bit_count (saved_regs_mask);
27591 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27592 {
27593 rtx_insn *insn;
27594 emit_insn (gen_blockage ());
27595 /* Unwind the stack to just below the saved registers. */
27596 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27597 hard_frame_pointer_rtx,
27598 GEN_INT (- 4 * num_regs)));
27599
27600 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27601 stack_pointer_rtx, hard_frame_pointer_rtx);
27602 }
27603
27604 arm_emit_multi_reg_pop (saved_regs_mask);
27605
27606 if (IS_INTERRUPT (func_type))
27607 {
27608 /* Interrupt handlers will have pushed the
27609 IP onto the stack, so restore it now. */
27610 rtx_insn *insn;
27611 rtx addr = gen_rtx_MEM (SImode,
27612 gen_rtx_POST_INC (SImode,
27613 stack_pointer_rtx));
27614 set_mem_alias_set (addr, get_frame_alias_set ());
27615 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27616 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27617 gen_rtx_REG (SImode, IP_REGNUM),
27618 NULL_RTX);
27619 }
27620
27621 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27622 return;
27623
27624 if (crtl->calls_eh_return)
27625 emit_insn (gen_addsi3 (stack_pointer_rtx,
27626 stack_pointer_rtx,
27627 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27628
27629 if (IS_STACKALIGN (func_type))
27630 /* Restore the original stack pointer. Before prologue, the stack was
27631 realigned and the original stack pointer saved in r0. For details,
27632 see comment in arm_expand_prologue. */
27633 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27634
27635 emit_jump_insn (simple_return_rtx);
27636 }
27637
27638 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27639 function is not a sibcall. */
27640 void
27641 arm_expand_epilogue (bool really_return)
27642 {
27643 unsigned long func_type;
27644 unsigned long saved_regs_mask;
27645 int num_regs = 0;
27646 int i;
27647 int amount;
27648 arm_stack_offsets *offsets;
27649
27650 func_type = arm_current_func_type ();
27651
27652 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27653 let output_return_instruction take care of instruction emission if any. */
27654 if (IS_NAKED (func_type)
27655 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27656 {
27657 if (really_return)
27658 emit_jump_insn (simple_return_rtx);
27659 return;
27660 }
27661
27662 /* If we are throwing an exception, then we really must be doing a
27663 return, so we can't tail-call. */
27664 gcc_assert (!crtl->calls_eh_return || really_return);
27665
27666 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27667 {
27668 arm_expand_epilogue_apcs_frame (really_return);
27669 return;
27670 }
27671
27672 /* Get frame offsets for ARM. */
27673 offsets = arm_get_frame_offsets ();
27674 saved_regs_mask = offsets->saved_regs_mask;
27675 num_regs = bit_count (saved_regs_mask);
27676
27677 if (frame_pointer_needed)
27678 {
27679 rtx_insn *insn;
27680 /* Restore stack pointer if necessary. */
27681 if (TARGET_ARM)
27682 {
27683 /* In ARM mode, frame pointer points to first saved register.
27684 Restore stack pointer to last saved register. */
27685 amount = offsets->frame - offsets->saved_regs;
27686
27687 /* Force out any pending memory operations that reference stacked data
27688 before stack de-allocation occurs. */
27689 emit_insn (gen_blockage ());
27690 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27691 hard_frame_pointer_rtx,
27692 GEN_INT (amount)));
27693 arm_add_cfa_adjust_cfa_note (insn, amount,
27694 stack_pointer_rtx,
27695 hard_frame_pointer_rtx);
27696
27697 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27698 deleted. */
27699 emit_insn (gen_force_register_use (stack_pointer_rtx));
27700 }
27701 else
27702 {
27703 /* In Thumb-2 mode, the frame pointer points to the last saved
27704 register. */
27705 amount = offsets->locals_base - offsets->saved_regs;
27706 if (amount)
27707 {
27708 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27709 hard_frame_pointer_rtx,
27710 GEN_INT (amount)));
27711 arm_add_cfa_adjust_cfa_note (insn, amount,
27712 hard_frame_pointer_rtx,
27713 hard_frame_pointer_rtx);
27714 }
27715
27716 /* Force out any pending memory operations that reference stacked data
27717 before stack de-allocation occurs. */
27718 emit_insn (gen_blockage ());
27719 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27720 hard_frame_pointer_rtx));
27721 arm_add_cfa_adjust_cfa_note (insn, 0,
27722 stack_pointer_rtx,
27723 hard_frame_pointer_rtx);
27724 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27725 deleted. */
27726 emit_insn (gen_force_register_use (stack_pointer_rtx));
27727 }
27728 }
27729 else
27730 {
27731 /* Pop off outgoing args and local frame to adjust stack pointer to
27732 last saved register. */
27733 amount = offsets->outgoing_args - offsets->saved_regs;
27734 if (amount)
27735 {
27736 rtx_insn *tmp;
27737 /* Force out any pending memory operations that reference stacked data
27738 before stack de-allocation occurs. */
27739 emit_insn (gen_blockage ());
27740 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27741 stack_pointer_rtx,
27742 GEN_INT (amount)));
27743 arm_add_cfa_adjust_cfa_note (tmp, amount,
27744 stack_pointer_rtx, stack_pointer_rtx);
27745 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27746 not deleted. */
27747 emit_insn (gen_force_register_use (stack_pointer_rtx));
27748 }
27749 }
27750
27751 if (TARGET_VFP_BASE)
27752 {
27753 /* Generate VFP register multi-pop. */
27754 int end_reg = LAST_VFP_REGNUM + 1;
27755
27756 /* Scan the registers in reverse order. We need to match
27757 any groupings made in the prologue and generate matching
27758 vldm operations. The need to match groups is because,
27759 unlike pop, vldm can only do consecutive regs. */
27760 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27761 /* Look for a case where a reg does not need restoring. */
27762 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27763 {
27764 /* Restore the regs discovered so far (from reg+2 to
27765 end_reg). */
27766 if (end_reg > i + 2)
27767 arm_emit_vfp_multi_reg_pop (i + 2,
27768 (end_reg - (i + 2)) / 2,
27769 stack_pointer_rtx);
27770 end_reg = i;
27771 }
27772
27773 /* Restore the remaining regs that we have discovered (or possibly
27774 even all of them, if the conditional in the for loop never
27775 fired). */
27776 if (end_reg > i + 2)
27777 arm_emit_vfp_multi_reg_pop (i + 2,
27778 (end_reg - (i + 2)) / 2,
27779 stack_pointer_rtx);
27780 }
27781
27782 if (TARGET_IWMMXT)
27783 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27784 if (reg_needs_saving_p (i))
27785 {
27786 rtx_insn *insn;
27787 rtx addr = gen_rtx_MEM (V2SImode,
27788 gen_rtx_POST_INC (SImode,
27789 stack_pointer_rtx));
27790 set_mem_alias_set (addr, get_frame_alias_set ());
27791 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27792 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27793 gen_rtx_REG (V2SImode, i),
27794 NULL_RTX);
27795 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27796 stack_pointer_rtx, stack_pointer_rtx);
27797 }
27798
27799 if (saved_regs_mask)
27800 {
27801 rtx insn;
27802 bool return_in_pc = false;
27803
27804 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27805 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27806 && !IS_CMSE_ENTRY (func_type)
27807 && !IS_STACKALIGN (func_type)
27808 && really_return
27809 && crtl->args.pretend_args_size == 0
27810 && saved_regs_mask & (1 << LR_REGNUM)
27811 && !crtl->calls_eh_return)
27812 {
27813 saved_regs_mask &= ~(1 << LR_REGNUM);
27814 saved_regs_mask |= (1 << PC_REGNUM);
27815 return_in_pc = true;
27816 }
27817
27818 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27819 {
27820 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27821 if (saved_regs_mask & (1 << i))
27822 {
27823 rtx addr = gen_rtx_MEM (SImode,
27824 gen_rtx_POST_INC (SImode,
27825 stack_pointer_rtx));
27826 set_mem_alias_set (addr, get_frame_alias_set ());
27827
27828 if (i == PC_REGNUM)
27829 {
27830 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27831 XVECEXP (insn, 0, 0) = ret_rtx;
27832 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
27833 addr);
27834 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27835 insn = emit_jump_insn (insn);
27836 }
27837 else
27838 {
27839 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27840 addr));
27841 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27842 gen_rtx_REG (SImode, i),
27843 NULL_RTX);
27844 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27845 stack_pointer_rtx,
27846 stack_pointer_rtx);
27847 }
27848 }
27849 }
27850 else
27851 {
27852 if (TARGET_LDRD
27853 && current_tune->prefer_ldrd_strd
27854 && !optimize_function_for_size_p (cfun))
27855 {
27856 if (TARGET_THUMB2)
27857 thumb2_emit_ldrd_pop (saved_regs_mask);
27858 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27859 arm_emit_ldrd_pop (saved_regs_mask);
27860 else
27861 arm_emit_multi_reg_pop (saved_regs_mask);
27862 }
27863 else
27864 arm_emit_multi_reg_pop (saved_regs_mask);
27865 }
27866
27867 if (return_in_pc)
27868 return;
27869 }
27870
27871 amount
27872 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
27873 if (amount)
27874 {
27875 int i, j;
27876 rtx dwarf = NULL_RTX;
27877 rtx_insn *tmp =
27878 emit_insn (gen_addsi3 (stack_pointer_rtx,
27879 stack_pointer_rtx,
27880 GEN_INT (amount)));
27881
27882 RTX_FRAME_RELATED_P (tmp) = 1;
27883
27884 if (cfun->machine->uses_anonymous_args)
27885 {
27886 /* Restore pretend args. Refer arm_expand_prologue on how to save
27887 pretend_args in stack. */
27888 int num_regs = crtl->args.pretend_args_size / 4;
27889 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27890 for (j = 0, i = 0; j < num_regs; i++)
27891 if (saved_regs_mask & (1 << i))
27892 {
27893 rtx reg = gen_rtx_REG (SImode, i);
27894 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27895 j++;
27896 }
27897 REG_NOTES (tmp) = dwarf;
27898 }
27899 arm_add_cfa_adjust_cfa_note (tmp, amount,
27900 stack_pointer_rtx, stack_pointer_rtx);
27901 }
27902
27903 if (IS_CMSE_ENTRY (func_type))
27904 {
27905 /* CMSE_ENTRY always returns. */
27906 gcc_assert (really_return);
27907 /* Clear all caller-saved regs that are not used to return. */
27908 cmse_nonsecure_entry_clear_before_return ();
27909
27910 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
27911 VLDR. */
27912 if (TARGET_HAVE_FPCXT_CMSE)
27913 {
27914 rtx_insn *insn;
27915
27916 insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
27917 GEN_INT (FPCXTNS_ENUM)));
27918 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
27919 plus_constant (Pmode, stack_pointer_rtx, 4));
27920 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27921 RTX_FRAME_RELATED_P (insn) = 1;
27922 }
27923 }
27924
27925 if (!really_return)
27926 return;
27927
27928 if (crtl->calls_eh_return)
27929 emit_insn (gen_addsi3 (stack_pointer_rtx,
27930 stack_pointer_rtx,
27931 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27932
27933 if (IS_STACKALIGN (func_type))
27934 /* Restore the original stack pointer. Before prologue, the stack was
27935 realigned and the original stack pointer saved in r0. For details,
27936 see comment in arm_expand_prologue. */
27937 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27938
27939 emit_jump_insn (simple_return_rtx);
27940 }
27941
27942 /* Implementation of insn prologue_thumb1_interwork. This is the first
27943 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27944
27945 const char *
27946 thumb1_output_interwork (void)
27947 {
27948 const char * name;
27949 FILE *f = asm_out_file;
27950
27951 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27952 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27953 == SYMBOL_REF);
27954 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27955
27956 /* Generate code sequence to switch us into Thumb mode. */
27957 /* The .code 32 directive has already been emitted by
27958 ASM_DECLARE_FUNCTION_NAME. */
27959 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27960 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27961
27962 /* Generate a label, so that the debugger will notice the
27963 change in instruction sets. This label is also used by
27964 the assembler to bypass the ARM code when this function
27965 is called from a Thumb encoded function elsewhere in the
27966 same file. Hence the definition of STUB_NAME here must
27967 agree with the definition in gas/config/tc-arm.c. */
27968
27969 #define STUB_NAME ".real_start_of"
27970
27971 fprintf (f, "\t.code\t16\n");
27972 #ifdef ARM_PE
27973 if (arm_dllexport_name_p (name))
27974 name = arm_strip_name_encoding (name);
27975 #endif
27976 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27977 fprintf (f, "\t.thumb_func\n");
27978 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27979
27980 return "";
27981 }
27982
27983 /* Handle the case of a double word load into a low register from
27984 a computed memory address. The computed address may involve a
27985 register which is overwritten by the load. */
27986 const char *
27987 thumb_load_double_from_address (rtx *operands)
27988 {
27989 rtx addr;
27990 rtx base;
27991 rtx offset;
27992 rtx arg1;
27993 rtx arg2;
27994
27995 gcc_assert (REG_P (operands[0]));
27996 gcc_assert (MEM_P (operands[1]));
27997
27998 /* Get the memory address. */
27999 addr = XEXP (operands[1], 0);
28000
28001 /* Work out how the memory address is computed. */
28002 switch (GET_CODE (addr))
28003 {
28004 case REG:
28005 operands[2] = adjust_address (operands[1], SImode, 4);
28006
28007 if (REGNO (operands[0]) == REGNO (addr))
28008 {
28009 output_asm_insn ("ldr\t%H0, %2", operands);
28010 output_asm_insn ("ldr\t%0, %1", operands);
28011 }
28012 else
28013 {
28014 output_asm_insn ("ldr\t%0, %1", operands);
28015 output_asm_insn ("ldr\t%H0, %2", operands);
28016 }
28017 break;
28018
28019 case CONST:
28020 /* Compute <address> + 4 for the high order load. */
28021 operands[2] = adjust_address (operands[1], SImode, 4);
28022
28023 output_asm_insn ("ldr\t%0, %1", operands);
28024 output_asm_insn ("ldr\t%H0, %2", operands);
28025 break;
28026
28027 case PLUS:
28028 arg1 = XEXP (addr, 0);
28029 arg2 = XEXP (addr, 1);
28030
28031 if (CONSTANT_P (arg1))
28032 base = arg2, offset = arg1;
28033 else
28034 base = arg1, offset = arg2;
28035
28036 gcc_assert (REG_P (base));
28037
28038 /* Catch the case of <address> = <reg> + <reg> */
28039 if (REG_P (offset))
28040 {
28041 int reg_offset = REGNO (offset);
28042 int reg_base = REGNO (base);
28043 int reg_dest = REGNO (operands[0]);
28044
28045 /* Add the base and offset registers together into the
28046 higher destination register. */
28047 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28048 reg_dest + 1, reg_base, reg_offset);
28049
28050 /* Load the lower destination register from the address in
28051 the higher destination register. */
28052 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28053 reg_dest, reg_dest + 1);
28054
28055 /* Load the higher destination register from its own address
28056 plus 4. */
28057 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28058 reg_dest + 1, reg_dest + 1);
28059 }
28060 else
28061 {
28062 /* Compute <address> + 4 for the high order load. */
28063 operands[2] = adjust_address (operands[1], SImode, 4);
28064
28065 /* If the computed address is held in the low order register
28066 then load the high order register first, otherwise always
28067 load the low order register first. */
28068 if (REGNO (operands[0]) == REGNO (base))
28069 {
28070 output_asm_insn ("ldr\t%H0, %2", operands);
28071 output_asm_insn ("ldr\t%0, %1", operands);
28072 }
28073 else
28074 {
28075 output_asm_insn ("ldr\t%0, %1", operands);
28076 output_asm_insn ("ldr\t%H0, %2", operands);
28077 }
28078 }
28079 break;
28080
28081 case LABEL_REF:
28082 /* With no registers to worry about we can just load the value
28083 directly. */
28084 operands[2] = adjust_address (operands[1], SImode, 4);
28085
28086 output_asm_insn ("ldr\t%H0, %2", operands);
28087 output_asm_insn ("ldr\t%0, %1", operands);
28088 break;
28089
28090 default:
28091 gcc_unreachable ();
28092 }
28093
28094 return "";
28095 }
28096
28097 const char *
28098 thumb_output_move_mem_multiple (int n, rtx *operands)
28099 {
28100 switch (n)
28101 {
28102 case 2:
28103 if (REGNO (operands[4]) > REGNO (operands[5]))
28104 std::swap (operands[4], operands[5]);
28105
28106 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28107 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28108 break;
28109
28110 case 3:
28111 if (REGNO (operands[4]) > REGNO (operands[5]))
28112 std::swap (operands[4], operands[5]);
28113 if (REGNO (operands[5]) > REGNO (operands[6]))
28114 std::swap (operands[5], operands[6]);
28115 if (REGNO (operands[4]) > REGNO (operands[5]))
28116 std::swap (operands[4], operands[5]);
28117
28118 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28119 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28120 break;
28121
28122 default:
28123 gcc_unreachable ();
28124 }
28125
28126 return "";
28127 }
28128
28129 /* Output a call-via instruction for thumb state. */
28130 const char *
28131 thumb_call_via_reg (rtx reg)
28132 {
28133 int regno = REGNO (reg);
28134 rtx *labelp;
28135
28136 gcc_assert (regno < LR_REGNUM);
28137
28138 /* If we are in the normal text section we can use a single instance
28139 per compilation unit. If we are doing function sections, then we need
28140 an entry per section, since we can't rely on reachability. */
28141 if (in_section == text_section)
28142 {
28143 thumb_call_reg_needed = 1;
28144
28145 if (thumb_call_via_label[regno] == NULL)
28146 thumb_call_via_label[regno] = gen_label_rtx ();
28147 labelp = thumb_call_via_label + regno;
28148 }
28149 else
28150 {
28151 if (cfun->machine->call_via[regno] == NULL)
28152 cfun->machine->call_via[regno] = gen_label_rtx ();
28153 labelp = cfun->machine->call_via + regno;
28154 }
28155
28156 output_asm_insn ("bl\t%a0", labelp);
28157 return "";
28158 }
28159
28160 /* Routines for generating rtl. */
28161 void
28162 thumb_expand_cpymemqi (rtx *operands)
28163 {
28164 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28165 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28166 HOST_WIDE_INT len = INTVAL (operands[2]);
28167 HOST_WIDE_INT offset = 0;
28168
28169 while (len >= 12)
28170 {
28171 emit_insn (gen_cpymem12b (out, in, out, in));
28172 len -= 12;
28173 }
28174
28175 if (len >= 8)
28176 {
28177 emit_insn (gen_cpymem8b (out, in, out, in));
28178 len -= 8;
28179 }
28180
28181 if (len >= 4)
28182 {
28183 rtx reg = gen_reg_rtx (SImode);
28184 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28185 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28186 len -= 4;
28187 offset += 4;
28188 }
28189
28190 if (len >= 2)
28191 {
28192 rtx reg = gen_reg_rtx (HImode);
28193 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28194 plus_constant (Pmode, in,
28195 offset))));
28196 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28197 offset)),
28198 reg));
28199 len -= 2;
28200 offset += 2;
28201 }
28202
28203 if (len)
28204 {
28205 rtx reg = gen_reg_rtx (QImode);
28206 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28207 plus_constant (Pmode, in,
28208 offset))));
28209 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28210 offset)),
28211 reg));
28212 }
28213 }
28214
28215 void
28216 thumb_reload_out_hi (rtx *operands)
28217 {
28218 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28219 }
28220
28221 /* Return the length of a function name prefix
28222 that starts with the character 'c'. */
28223 static int
28224 arm_get_strip_length (int c)
28225 {
28226 switch (c)
28227 {
28228 ARM_NAME_ENCODING_LENGTHS
28229 default: return 0;
28230 }
28231 }
28232
28233 /* Return a pointer to a function's name with any
28234 and all prefix encodings stripped from it. */
28235 const char *
28236 arm_strip_name_encoding (const char *name)
28237 {
28238 int skip;
28239
28240 while ((skip = arm_get_strip_length (* name)))
28241 name += skip;
28242
28243 return name;
28244 }
28245
28246 /* If there is a '*' anywhere in the name's prefix, then
28247 emit the stripped name verbatim, otherwise prepend an
28248 underscore if leading underscores are being used. */
28249 void
28250 arm_asm_output_labelref (FILE *stream, const char *name)
28251 {
28252 int skip;
28253 int verbatim = 0;
28254
28255 while ((skip = arm_get_strip_length (* name)))
28256 {
28257 verbatim |= (*name == '*');
28258 name += skip;
28259 }
28260
28261 if (verbatim)
28262 fputs (name, stream);
28263 else
28264 asm_fprintf (stream, "%U%s", name);
28265 }
28266
28267 /* This function is used to emit an EABI tag and its associated value.
28268 We emit the numerical value of the tag in case the assembler does not
28269 support textual tags. (Eg gas prior to 2.20). If requested we include
28270 the tag name in a comment so that anyone reading the assembler output
28271 will know which tag is being set.
28272
28273 This function is not static because arm-c.cc needs it too. */
28274
28275 void
28276 arm_emit_eabi_attribute (const char *name, int num, int val)
28277 {
28278 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28279 if (flag_verbose_asm || flag_debug_asm)
28280 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28281 asm_fprintf (asm_out_file, "\n");
28282 }
28283
28284 /* This function is used to print CPU tuning information as comment
28285 in assembler file. Pointers are not printed for now. */
28286
28287 void
28288 arm_print_tune_info (void)
28289 {
28290 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28291 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28292 current_tune->constant_limit);
28293 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28294 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28295 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28296 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28297 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28298 "prefetch.l1_cache_size:\t%d\n",
28299 current_tune->prefetch.l1_cache_size);
28300 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28301 "prefetch.l1_cache_line_size:\t%d\n",
28302 current_tune->prefetch.l1_cache_line_size);
28303 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28304 "prefer_constant_pool:\t%d\n",
28305 (int) current_tune->prefer_constant_pool);
28306 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28307 "branch_cost:\t(s:speed, p:predictable)\n");
28308 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28309 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28310 current_tune->branch_cost (false, false));
28311 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28312 current_tune->branch_cost (false, true));
28313 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28314 current_tune->branch_cost (true, false));
28315 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28316 current_tune->branch_cost (true, true));
28317 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28318 "prefer_ldrd_strd:\t%d\n",
28319 (int) current_tune->prefer_ldrd_strd);
28320 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28321 "logical_op_non_short_circuit:\t[%d,%d]\n",
28322 (int) current_tune->logical_op_non_short_circuit_thumb,
28323 (int) current_tune->logical_op_non_short_circuit_arm);
28324 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28325 "disparage_flag_setting_t16_encodings:\t%d\n",
28326 (int) current_tune->disparage_flag_setting_t16_encodings);
28327 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28328 "string_ops_prefer_neon:\t%d\n",
28329 (int) current_tune->string_ops_prefer_neon);
28330 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28331 "max_insns_inline_memset:\t%d\n",
28332 current_tune->max_insns_inline_memset);
28333 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28334 current_tune->fusible_ops);
28335 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28336 (int) current_tune->sched_autopref);
28337 }
28338
28339 /* The last set of target options used to emit .arch directives, etc. This
28340 could be a function-local static if it were not required to expose it as a
28341 root to the garbage collector. */
28342 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28343
28344 /* Print .arch and .arch_extension directives corresponding to the
28345 current architecture configuration. */
28346 static void
28347 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28348 {
28349 arm_build_target build_target;
28350 /* If the target options haven't changed since the last time we were called
28351 there is nothing to do. This should be sufficient to suppress the
28352 majority of redundant work. */
28353 if (last_asm_targ_options == targ_options)
28354 return;
28355
28356 last_asm_targ_options = targ_options;
28357
28358 build_target.isa = sbitmap_alloc (isa_num_bits);
28359 arm_configure_build_target (&build_target, targ_options, false);
28360
28361 if (build_target.core_name
28362 && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28363 {
28364 const char* truncated_name
28365 = arm_rewrite_selected_cpu (build_target.core_name);
28366 asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28367 }
28368
28369 const arch_option *arch
28370 = arm_parse_arch_option_name (all_architectures, "-march",
28371 build_target.arch_name);
28372 auto_sbitmap opt_bits (isa_num_bits);
28373
28374 gcc_assert (arch);
28375
28376 if (strcmp (build_target.arch_name, "armv7ve") == 0)
28377 {
28378 /* Keep backward compatability for assemblers which don't support
28379 armv7ve. Fortunately, none of the following extensions are reset
28380 by a .fpu directive. */
28381 asm_fprintf (stream, "\t.arch armv7-a\n");
28382 asm_fprintf (stream, "\t.arch_extension virt\n");
28383 asm_fprintf (stream, "\t.arch_extension idiv\n");
28384 asm_fprintf (stream, "\t.arch_extension sec\n");
28385 asm_fprintf (stream, "\t.arch_extension mp\n");
28386 }
28387 else
28388 asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28389
28390 /* The .fpu directive will reset any architecture extensions from the
28391 assembler that relate to the fp/vector extensions. So put this out before
28392 any .arch_extension directives. */
28393 const char *fpu_name = (TARGET_SOFT_FLOAT
28394 ? "softvfp"
28395 : arm_identify_fpu_from_isa (build_target.isa));
28396 asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28397
28398 if (!arch->common.extensions)
28399 return;
28400
28401 for (const struct cpu_arch_extension *opt = arch->common.extensions;
28402 opt->name != NULL;
28403 opt++)
28404 {
28405 if (!opt->remove)
28406 {
28407 arm_initialize_isa (opt_bits, opt->isa_bits);
28408
28409 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28410 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28411 floating point instructions is disabled. So the following check
28412 restricts the printing of ".arch_extension mve" and
28413 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28414 this special behaviour because the feature bit "mve" and
28415 "mve_float" are not part of "fpu bits", so they are not cleared
28416 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28417 TARGET_HAVE_MVE_FLOAT are disabled. */
28418 if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28419 || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28420 && !TARGET_HAVE_MVE_FLOAT))
28421 continue;
28422
28423 /* If every feature bit of this option is set in the target ISA
28424 specification, print out the option name. However, don't print
28425 anything if all the bits are part of the FPU specification. */
28426 if (bitmap_subset_p (opt_bits, build_target.isa)
28427 && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28428 asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28429 }
28430 }
28431 }
28432
28433 static void
28434 arm_file_start (void)
28435 {
28436 int val;
28437
28438 arm_print_asm_arch_directives
28439 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28440
28441 if (TARGET_BPABI)
28442 {
28443 /* If we have a named cpu, but we the assembler does not support that
28444 name via .cpu, put out a cpu name attribute; but don't do this if the
28445 name starts with the fictitious prefix, 'generic'. */
28446 if (arm_active_target.core_name
28447 && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28448 && !startswith (arm_active_target.core_name, "generic"))
28449 {
28450 const char* truncated_name
28451 = arm_rewrite_selected_cpu (arm_active_target.core_name);
28452 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28453 asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28454 truncated_name);
28455 }
28456
28457 if (print_tune_info)
28458 arm_print_tune_info ();
28459
28460 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28461 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28462
28463 if (TARGET_HARD_FLOAT_ABI)
28464 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28465
28466 /* Some of these attributes only apply when the corresponding features
28467 are used. However we don't have any easy way of figuring this out.
28468 Conservatively record the setting that would have been used. */
28469
28470 if (flag_rounding_math)
28471 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28472
28473 if (!flag_unsafe_math_optimizations)
28474 {
28475 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28476 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28477 }
28478 if (flag_signaling_nans)
28479 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28480
28481 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28482 flag_finite_math_only ? 1 : 3);
28483
28484 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28485 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28486 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28487 flag_short_enums ? 1 : 2);
28488
28489 /* Tag_ABI_optimization_goals. */
28490 if (optimize_size)
28491 val = 4;
28492 else if (optimize >= 2)
28493 val = 2;
28494 else if (optimize)
28495 val = 1;
28496 else
28497 val = 6;
28498 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28499
28500 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28501 unaligned_access);
28502
28503 if (arm_fp16_format)
28504 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28505 (int) arm_fp16_format);
28506
28507 if (arm_lang_output_object_attributes_hook)
28508 arm_lang_output_object_attributes_hook();
28509 }
28510
28511 default_file_start ();
28512 }
28513
28514 static void
28515 arm_file_end (void)
28516 {
28517 int regno;
28518
28519 /* Just in case the last function output in the assembler had non-default
28520 architecture directives, we force the assembler state back to the default
28521 set, so that any 'calculated' build attributes are based on the default
28522 options rather than the special options for that function. */
28523 arm_print_asm_arch_directives
28524 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28525
28526 if (NEED_INDICATE_EXEC_STACK)
28527 /* Add .note.GNU-stack. */
28528 file_end_indicate_exec_stack ();
28529
28530 if (! thumb_call_reg_needed)
28531 return;
28532
28533 switch_to_section (text_section);
28534 asm_fprintf (asm_out_file, "\t.code 16\n");
28535 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28536
28537 for (regno = 0; regno < LR_REGNUM; regno++)
28538 {
28539 rtx label = thumb_call_via_label[regno];
28540
28541 if (label != 0)
28542 {
28543 targetm.asm_out.internal_label (asm_out_file, "L",
28544 CODE_LABEL_NUMBER (label));
28545 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28546 }
28547 }
28548 }
28549
28550 #ifndef ARM_PE
28551 /* Symbols in the text segment can be accessed without indirecting via the
28552 constant pool; it may take an extra binary operation, but this is still
28553 faster than indirecting via memory. Don't do this when not optimizing,
28554 since we won't be calculating al of the offsets necessary to do this
28555 simplification. */
28556
28557 static void
28558 arm_encode_section_info (tree decl, rtx rtl, int first)
28559 {
28560 if (optimize > 0 && TREE_CONSTANT (decl))
28561 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28562
28563 default_encode_section_info (decl, rtl, first);
28564 }
28565 #endif /* !ARM_PE */
28566
28567 static void
28568 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28569 {
28570 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28571 && !strcmp (prefix, "L"))
28572 {
28573 arm_ccfsm_state = 0;
28574 arm_target_insn = NULL;
28575 }
28576 default_internal_label (stream, prefix, labelno);
28577 }
28578
28579 /* Define classes to generate code as RTL or output asm to a file.
28580 Using templates then allows to use the same code to output code
28581 sequences in the two formats. */
28582 class thumb1_const_rtl
28583 {
28584 public:
28585 thumb1_const_rtl (rtx dst) : dst (dst) {}
28586
28587 void mov (HOST_WIDE_INT val)
28588 {
28589 emit_set_insn (dst, GEN_INT (val));
28590 }
28591
28592 void add (HOST_WIDE_INT val)
28593 {
28594 emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28595 }
28596
28597 void ashift (HOST_WIDE_INT shift)
28598 {
28599 emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28600 }
28601
28602 void neg ()
28603 {
28604 emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28605 }
28606
28607 private:
28608 rtx dst;
28609 };
28610
28611 class thumb1_const_print
28612 {
28613 public:
28614 thumb1_const_print (FILE *f, int regno)
28615 {
28616 t_file = f;
28617 dst_regname = reg_names[regno];
28618 }
28619
28620 void mov (HOST_WIDE_INT val)
28621 {
28622 asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28623 dst_regname, val);
28624 }
28625
28626 void add (HOST_WIDE_INT val)
28627 {
28628 asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28629 dst_regname, val);
28630 }
28631
28632 void ashift (HOST_WIDE_INT shift)
28633 {
28634 asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28635 dst_regname, shift);
28636 }
28637
28638 void neg ()
28639 {
28640 asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
28641 }
28642
28643 private:
28644 FILE *t_file;
28645 const char *dst_regname;
28646 };
28647
28648 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28649 Avoid generating useless code when one of the bytes is zero. */
28650 template <class T>
28651 void
28652 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
28653 {
28654 bool mov_done_p = false;
28655 unsigned HOST_WIDE_INT val = op1;
28656 int shift = 0;
28657 int i;
28658
28659 gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
28660
28661 if (val <= 255)
28662 {
28663 dst.mov (val);
28664 return;
28665 }
28666
28667 /* For negative numbers with the first nine bits set, build the
28668 opposite of OP1, then negate it, it's generally shorter and not
28669 longer. */
28670 if ((val & 0xFF800000) == 0xFF800000)
28671 {
28672 thumb1_gen_const_int_1 (dst, -op1);
28673 dst.neg ();
28674 return;
28675 }
28676
28677 /* In the general case, we need 7 instructions to build
28678 a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28679 do better if VAL is small enough, or
28680 right-shiftable by a suitable amount. If the
28681 right-shift enables to encode at least one less byte,
28682 it's worth it: we save a adds and a lsls at the
28683 expense of a final lsls. */
28684 int final_shift = number_of_first_bit_set (val);
28685
28686 int leading_zeroes = clz_hwi (val);
28687 int number_of_bytes_needed
28688 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
28689 / BITS_PER_UNIT) + 1;
28690 int number_of_bytes_needed2
28691 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
28692 / BITS_PER_UNIT) + 1;
28693
28694 if (number_of_bytes_needed2 < number_of_bytes_needed)
28695 val >>= final_shift;
28696 else
28697 final_shift = 0;
28698
28699 /* If we are in a very small range, we can use either a single movs
28700 or movs+adds. */
28701 if (val <= 510)
28702 {
28703 if (val > 255)
28704 {
28705 unsigned HOST_WIDE_INT high = val - 255;
28706
28707 dst.mov (high);
28708 dst.add (255);
28709 }
28710 else
28711 dst.mov (val);
28712
28713 if (final_shift > 0)
28714 dst.ashift (final_shift);
28715 }
28716 else
28717 {
28718 /* General case, emit upper 3 bytes as needed. */
28719 for (i = 0; i < 3; i++)
28720 {
28721 unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
28722
28723 if (byte)
28724 {
28725 /* We are about to emit new bits, stop accumulating a
28726 shift amount, and left-shift only if we have already
28727 emitted some upper bits. */
28728 if (mov_done_p)
28729 {
28730 dst.ashift (shift);
28731 dst.add (byte);
28732 }
28733 else
28734 dst.mov (byte);
28735
28736 /* Stop accumulating shift amount since we've just
28737 emitted some bits. */
28738 shift = 0;
28739
28740 mov_done_p = true;
28741 }
28742
28743 if (mov_done_p)
28744 shift += 8;
28745 }
28746
28747 /* Emit lower byte. */
28748 if (!mov_done_p)
28749 dst.mov (val & 0xff);
28750 else
28751 {
28752 dst.ashift (shift);
28753 if (val & 0xff)
28754 dst.add (val & 0xff);
28755 }
28756
28757 if (final_shift > 0)
28758 dst.ashift (final_shift);
28759 }
28760 }
28761
28762 /* Proxies for thumb1.md, since the thumb1_const_print and
28763 thumb1_const_rtl classes are not exported. */
28764 void
28765 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
28766 {
28767 thumb1_const_rtl t (dst);
28768 thumb1_gen_const_int_1 (t, op1);
28769 }
28770
28771 void
28772 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
28773 {
28774 thumb1_const_print t (asm_out_file, REGNO (dst));
28775 thumb1_gen_const_int_1 (t, op1);
28776 }
28777
28778 /* Output code to add DELTA to the first argument, and then jump
28779 to FUNCTION. Used for C++ multiple inheritance. */
28780
28781 static void
28782 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28783 HOST_WIDE_INT, tree function)
28784 {
28785 static int thunk_label = 0;
28786 char label[256];
28787 char labelpc[256];
28788 int mi_delta = delta;
28789 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28790 int shift = 0;
28791 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28792 ? 1 : 0);
28793 if (mi_delta < 0)
28794 mi_delta = - mi_delta;
28795
28796 final_start_function (emit_barrier (), file, 1);
28797
28798 if (TARGET_THUMB1)
28799 {
28800 int labelno = thunk_label++;
28801 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28802 /* Thunks are entered in arm mode when available. */
28803 if (TARGET_THUMB1_ONLY)
28804 {
28805 /* push r3 so we can use it as a temporary. */
28806 /* TODO: Omit this save if r3 is not used. */
28807 fputs ("\tpush {r3}\n", file);
28808
28809 /* With -mpure-code, we cannot load the address from the
28810 constant pool: we build it explicitly. */
28811 if (target_pure_code)
28812 {
28813 fputs ("\tmovs\tr3, #:upper8_15:#", file);
28814 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28815 fputc ('\n', file);
28816 fputs ("\tlsls r3, #8\n", file);
28817 fputs ("\tadds\tr3, #:upper0_7:#", file);
28818 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28819 fputc ('\n', file);
28820 fputs ("\tlsls r3, #8\n", file);
28821 fputs ("\tadds\tr3, #:lower8_15:#", file);
28822 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28823 fputc ('\n', file);
28824 fputs ("\tlsls r3, #8\n", file);
28825 fputs ("\tadds\tr3, #:lower0_7:#", file);
28826 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28827 fputc ('\n', file);
28828 }
28829 else
28830 fputs ("\tldr\tr3, ", file);
28831 }
28832 else
28833 {
28834 fputs ("\tldr\tr12, ", file);
28835 }
28836
28837 if (!target_pure_code)
28838 {
28839 assemble_name (file, label);
28840 fputc ('\n', file);
28841 }
28842
28843 if (flag_pic)
28844 {
28845 /* If we are generating PIC, the ldr instruction below loads
28846 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28847 the address of the add + 8, so we have:
28848
28849 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28850 = target + 1.
28851
28852 Note that we have "+ 1" because some versions of GNU ld
28853 don't set the low bit of the result for R_ARM_REL32
28854 relocations against thumb function symbols.
28855 On ARMv6M this is +4, not +8. */
28856 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28857 assemble_name (file, labelpc);
28858 fputs (":\n", file);
28859 if (TARGET_THUMB1_ONLY)
28860 {
28861 /* This is 2 insns after the start of the thunk, so we know it
28862 is 4-byte aligned. */
28863 fputs ("\tadd\tr3, pc, r3\n", file);
28864 fputs ("\tmov r12, r3\n", file);
28865 }
28866 else
28867 fputs ("\tadd\tr12, pc, r12\n", file);
28868 }
28869 else if (TARGET_THUMB1_ONLY)
28870 fputs ("\tmov r12, r3\n", file);
28871 }
28872 if (TARGET_THUMB1_ONLY)
28873 {
28874 if (mi_delta > 255)
28875 {
28876 /* With -mpure-code, we cannot load MI_DELTA from the
28877 constant pool: we build it explicitly. */
28878 if (target_pure_code)
28879 {
28880 thumb1_const_print r3 (file, 3);
28881 thumb1_gen_const_int_1 (r3, mi_delta);
28882 }
28883 else
28884 {
28885 fputs ("\tldr\tr3, ", file);
28886 assemble_name (file, label);
28887 fputs ("+4\n", file);
28888 }
28889 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
28890 mi_op, this_regno, this_regno);
28891 }
28892 else if (mi_delta != 0)
28893 {
28894 /* Thumb1 unified syntax requires s suffix in instruction name when
28895 one of the operands is immediate. */
28896 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
28897 mi_op, this_regno, this_regno,
28898 mi_delta);
28899 }
28900 }
28901 else
28902 {
28903 /* TODO: Use movw/movt for large constants when available. */
28904 while (mi_delta != 0)
28905 {
28906 if ((mi_delta & (3 << shift)) == 0)
28907 shift += 2;
28908 else
28909 {
28910 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28911 mi_op, this_regno, this_regno,
28912 mi_delta & (0xff << shift));
28913 mi_delta &= ~(0xff << shift);
28914 shift += 8;
28915 }
28916 }
28917 }
28918 if (TARGET_THUMB1)
28919 {
28920 if (TARGET_THUMB1_ONLY)
28921 fputs ("\tpop\t{r3}\n", file);
28922
28923 fprintf (file, "\tbx\tr12\n");
28924
28925 /* With -mpure-code, we don't need to emit literals for the
28926 function address and delta since we emitted code to build
28927 them. */
28928 if (!target_pure_code)
28929 {
28930 ASM_OUTPUT_ALIGN (file, 2);
28931 assemble_name (file, label);
28932 fputs (":\n", file);
28933 if (flag_pic)
28934 {
28935 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28936 rtx tem = XEXP (DECL_RTL (function), 0);
28937 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28938 pipeline offset is four rather than eight. Adjust the offset
28939 accordingly. */
28940 tem = plus_constant (GET_MODE (tem), tem,
28941 TARGET_THUMB1_ONLY ? -3 : -7);
28942 tem = gen_rtx_MINUS (GET_MODE (tem),
28943 tem,
28944 gen_rtx_SYMBOL_REF (Pmode,
28945 ggc_strdup (labelpc)));
28946 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28947 }
28948 else
28949 /* Output ".word .LTHUNKn". */
28950 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28951
28952 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28953 assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
28954 }
28955 }
28956 else
28957 {
28958 fputs ("\tb\t", file);
28959 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28960 if (NEED_PLT_RELOC)
28961 fputs ("(PLT)", file);
28962 fputc ('\n', file);
28963 }
28964
28965 final_end_function ();
28966 }
28967
28968 /* MI thunk handling for TARGET_32BIT. */
28969
28970 static void
28971 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28972 HOST_WIDE_INT vcall_offset, tree function)
28973 {
28974 const bool long_call_p = arm_is_long_call_p (function);
28975
28976 /* On ARM, this_regno is R0 or R1 depending on
28977 whether the function returns an aggregate or not.
28978 */
28979 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
28980 function)
28981 ? R1_REGNUM : R0_REGNUM);
28982
28983 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
28984 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
28985 reload_completed = 1;
28986 emit_note (NOTE_INSN_PROLOGUE_END);
28987
28988 /* Add DELTA to THIS_RTX. */
28989 if (delta != 0)
28990 arm_split_constant (PLUS, Pmode, NULL_RTX,
28991 delta, this_rtx, this_rtx, false);
28992
28993 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
28994 if (vcall_offset != 0)
28995 {
28996 /* Load *THIS_RTX. */
28997 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
28998 /* Compute *THIS_RTX + VCALL_OFFSET. */
28999 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
29000 false);
29001 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
29002 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
29003 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
29004 }
29005
29006 /* Generate a tail call to the target function. */
29007 if (!TREE_USED (function))
29008 {
29009 assemble_external (function);
29010 TREE_USED (function) = 1;
29011 }
29012 rtx funexp = XEXP (DECL_RTL (function), 0);
29013 if (long_call_p)
29014 {
29015 emit_move_insn (temp, funexp);
29016 funexp = temp;
29017 }
29018 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29019 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
29020 SIBLING_CALL_P (insn) = 1;
29021 emit_barrier ();
29022
29023 /* Indirect calls require a bit of fixup in PIC mode. */
29024 if (long_call_p)
29025 {
29026 split_all_insns_noflow ();
29027 arm_reorg ();
29028 }
29029
29030 insn = get_insns ();
29031 shorten_branches (insn);
29032 final_start_function (insn, file, 1);
29033 final (insn, file, 1);
29034 final_end_function ();
29035
29036 /* Stop pretending this is a post-reload pass. */
29037 reload_completed = 0;
29038 }
29039
29040 /* Output code to add DELTA to the first argument, and then jump
29041 to FUNCTION. Used for C++ multiple inheritance. */
29042
29043 static void
29044 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
29045 HOST_WIDE_INT vcall_offset, tree function)
29046 {
29047 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
29048
29049 assemble_start_function (thunk, fnname);
29050 if (TARGET_32BIT)
29051 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
29052 else
29053 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
29054 assemble_end_function (thunk, fnname);
29055 }
29056
29057 int
29058 arm_emit_vector_const (FILE *file, rtx x)
29059 {
29060 int i;
29061 const char * pattern;
29062
29063 gcc_assert (GET_CODE (x) == CONST_VECTOR);
29064
29065 switch (GET_MODE (x))
29066 {
29067 case E_V2SImode: pattern = "%08x"; break;
29068 case E_V4HImode: pattern = "%04x"; break;
29069 case E_V8QImode: pattern = "%02x"; break;
29070 default: gcc_unreachable ();
29071 }
29072
29073 fprintf (file, "0x");
29074 for (i = CONST_VECTOR_NUNITS (x); i--;)
29075 {
29076 rtx element;
29077
29078 element = CONST_VECTOR_ELT (x, i);
29079 fprintf (file, pattern, INTVAL (element));
29080 }
29081
29082 return 1;
29083 }
29084
29085 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29086 HFmode constant pool entries are actually loaded with ldr. */
29087 void
29088 arm_emit_fp16_const (rtx c)
29089 {
29090 long bits;
29091
29092 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
29093 if (WORDS_BIG_ENDIAN)
29094 assemble_zeros (2);
29095 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
29096 if (!WORDS_BIG_ENDIAN)
29097 assemble_zeros (2);
29098 }
29099
29100 const char *
29101 arm_output_load_gr (rtx *operands)
29102 {
29103 rtx reg;
29104 rtx offset;
29105 rtx wcgr;
29106 rtx sum;
29107
29108 if (!MEM_P (operands [1])
29109 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
29110 || !REG_P (reg = XEXP (sum, 0))
29111 || !CONST_INT_P (offset = XEXP (sum, 1))
29112 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
29113 return "wldrw%?\t%0, %1";
29114
29115 /* Fix up an out-of-range load of a GR register. */
29116 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
29117 wcgr = operands[0];
29118 operands[0] = reg;
29119 output_asm_insn ("ldr%?\t%0, %1", operands);
29120
29121 operands[0] = wcgr;
29122 operands[1] = reg;
29123 output_asm_insn ("tmcr%?\t%0, %1", operands);
29124 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
29125
29126 return "";
29127 }
29128
29129 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29130
29131 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29132 named arg and all anonymous args onto the stack.
29133 XXX I know the prologue shouldn't be pushing registers, but it is faster
29134 that way. */
29135
29136 static void
29137 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
29138 const function_arg_info &arg,
29139 int *pretend_size,
29140 int second_time ATTRIBUTE_UNUSED)
29141 {
29142 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
29143 int nregs;
29144
29145 cfun->machine->uses_anonymous_args = 1;
29146 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
29147 {
29148 nregs = pcum->aapcs_ncrn;
29149 if (nregs & 1)
29150 {
29151 int res = arm_needs_doubleword_align (arg.mode, arg.type);
29152 if (res < 0 && warn_psabi)
29153 inform (input_location, "parameter passing for argument of "
29154 "type %qT changed in GCC 7.1", arg.type);
29155 else if (res > 0)
29156 {
29157 nregs++;
29158 if (res > 1 && warn_psabi)
29159 inform (input_location,
29160 "parameter passing for argument of type "
29161 "%qT changed in GCC 9.1", arg.type);
29162 }
29163 }
29164 }
29165 else
29166 nregs = pcum->nregs;
29167
29168 if (nregs < NUM_ARG_REGS)
29169 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
29170 }
29171
29172 /* We can't rely on the caller doing the proper promotion when
29173 using APCS or ATPCS. */
29174
29175 static bool
29176 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
29177 {
29178 return !TARGET_AAPCS_BASED;
29179 }
29180
29181 static machine_mode
29182 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
29183 machine_mode mode,
29184 int *punsignedp ATTRIBUTE_UNUSED,
29185 const_tree fntype ATTRIBUTE_UNUSED,
29186 int for_return ATTRIBUTE_UNUSED)
29187 {
29188 if (GET_MODE_CLASS (mode) == MODE_INT
29189 && GET_MODE_SIZE (mode) < 4)
29190 return SImode;
29191
29192 return mode;
29193 }
29194
29195
29196 static bool
29197 arm_default_short_enums (void)
29198 {
29199 return ARM_DEFAULT_SHORT_ENUMS;
29200 }
29201
29202
29203 /* AAPCS requires that anonymous bitfields affect structure alignment. */
29204
29205 static bool
29206 arm_align_anon_bitfield (void)
29207 {
29208 return TARGET_AAPCS_BASED;
29209 }
29210
29211
29212 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
29213
29214 static tree
29215 arm_cxx_guard_type (void)
29216 {
29217 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
29218 }
29219
29220
29221 /* The EABI says test the least significant bit of a guard variable. */
29222
29223 static bool
29224 arm_cxx_guard_mask_bit (void)
29225 {
29226 return TARGET_AAPCS_BASED;
29227 }
29228
29229
29230 /* The EABI specifies that all array cookies are 8 bytes long. */
29231
29232 static tree
29233 arm_get_cookie_size (tree type)
29234 {
29235 tree size;
29236
29237 if (!TARGET_AAPCS_BASED)
29238 return default_cxx_get_cookie_size (type);
29239
29240 size = build_int_cst (sizetype, 8);
29241 return size;
29242 }
29243
29244
29245 /* The EABI says that array cookies should also contain the element size. */
29246
29247 static bool
29248 arm_cookie_has_size (void)
29249 {
29250 return TARGET_AAPCS_BASED;
29251 }
29252
29253
29254 /* The EABI says constructors and destructors should return a pointer to
29255 the object constructed/destroyed. */
29256
29257 static bool
29258 arm_cxx_cdtor_returns_this (void)
29259 {
29260 return TARGET_AAPCS_BASED;
29261 }
29262
29263 /* The EABI says that an inline function may never be the key
29264 method. */
29265
29266 static bool
29267 arm_cxx_key_method_may_be_inline (void)
29268 {
29269 return !TARGET_AAPCS_BASED;
29270 }
29271
29272 static void
29273 arm_cxx_determine_class_data_visibility (tree decl)
29274 {
29275 if (!TARGET_AAPCS_BASED
29276 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
29277 return;
29278
29279 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29280 is exported. However, on systems without dynamic vague linkage,
29281 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
29282 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
29283 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
29284 else
29285 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
29286 DECL_VISIBILITY_SPECIFIED (decl) = 1;
29287 }
29288
29289 static bool
29290 arm_cxx_class_data_always_comdat (void)
29291 {
29292 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29293 vague linkage if the class has no key function. */
29294 return !TARGET_AAPCS_BASED;
29295 }
29296
29297
29298 /* The EABI says __aeabi_atexit should be used to register static
29299 destructors. */
29300
29301 static bool
29302 arm_cxx_use_aeabi_atexit (void)
29303 {
29304 return TARGET_AAPCS_BASED;
29305 }
29306
29307
29308 void
29309 arm_set_return_address (rtx source, rtx scratch)
29310 {
29311 arm_stack_offsets *offsets;
29312 HOST_WIDE_INT delta;
29313 rtx addr, mem;
29314 unsigned long saved_regs;
29315
29316 offsets = arm_get_frame_offsets ();
29317 saved_regs = offsets->saved_regs_mask;
29318
29319 if ((saved_regs & (1 << LR_REGNUM)) == 0)
29320 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29321 else
29322 {
29323 if (frame_pointer_needed)
29324 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29325 else
29326 {
29327 /* LR will be the first saved register. */
29328 delta = offsets->outgoing_args - (offsets->frame + 4);
29329
29330
29331 if (delta >= 4096)
29332 {
29333 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29334 GEN_INT (delta & ~4095)));
29335 addr = scratch;
29336 delta &= 4095;
29337 }
29338 else
29339 addr = stack_pointer_rtx;
29340
29341 addr = plus_constant (Pmode, addr, delta);
29342 }
29343
29344 /* The store needs to be marked to prevent DSE from deleting
29345 it as dead if it is based on fp. */
29346 mem = gen_frame_mem (Pmode, addr);
29347 MEM_VOLATILE_P (mem) = true;
29348 emit_move_insn (mem, source);
29349 }
29350 }
29351
29352
29353 void
29354 thumb_set_return_address (rtx source, rtx scratch)
29355 {
29356 arm_stack_offsets *offsets;
29357 HOST_WIDE_INT delta;
29358 HOST_WIDE_INT limit;
29359 int reg;
29360 rtx addr, mem;
29361 unsigned long mask;
29362
29363 emit_use (source);
29364
29365 offsets = arm_get_frame_offsets ();
29366 mask = offsets->saved_regs_mask;
29367 if (mask & (1 << LR_REGNUM))
29368 {
29369 limit = 1024;
29370 /* Find the saved regs. */
29371 if (frame_pointer_needed)
29372 {
29373 delta = offsets->soft_frame - offsets->saved_args;
29374 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29375 if (TARGET_THUMB1)
29376 limit = 128;
29377 }
29378 else
29379 {
29380 delta = offsets->outgoing_args - offsets->saved_args;
29381 reg = SP_REGNUM;
29382 }
29383 /* Allow for the stack frame. */
29384 if (TARGET_THUMB1 && TARGET_BACKTRACE)
29385 delta -= 16;
29386 /* The link register is always the first saved register. */
29387 delta -= 4;
29388
29389 /* Construct the address. */
29390 addr = gen_rtx_REG (SImode, reg);
29391 if (delta > limit)
29392 {
29393 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29394 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29395 addr = scratch;
29396 }
29397 else
29398 addr = plus_constant (Pmode, addr, delta);
29399
29400 /* The store needs to be marked to prevent DSE from deleting
29401 it as dead if it is based on fp. */
29402 mem = gen_frame_mem (Pmode, addr);
29403 MEM_VOLATILE_P (mem) = true;
29404 emit_move_insn (mem, source);
29405 }
29406 else
29407 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29408 }
29409
29410 /* Implements target hook vector_mode_supported_p. */
29411 bool
29412 arm_vector_mode_supported_p (machine_mode mode)
29413 {
29414 /* Neon also supports V2SImode, etc. listed in the clause below. */
29415 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29416 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29417 || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29418 || mode == V8BFmode))
29419 return true;
29420
29421 if ((TARGET_NEON || TARGET_IWMMXT)
29422 && ((mode == V2SImode)
29423 || (mode == V4HImode)
29424 || (mode == V8QImode)))
29425 return true;
29426
29427 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29428 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29429 || mode == V2HAmode))
29430 return true;
29431
29432 if (TARGET_HAVE_MVE
29433 && (mode == V2DImode || mode == V4SImode || mode == V8HImode
29434 || mode == V16QImode
29435 || mode == V16BImode || mode == V8BImode || mode == V4BImode))
29436 return true;
29437
29438 if (TARGET_HAVE_MVE_FLOAT
29439 && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29440 return true;
29441
29442 return false;
29443 }
29444
29445 /* Implements target hook array_mode_supported_p. */
29446
29447 static bool
29448 arm_array_mode_supported_p (machine_mode mode,
29449 unsigned HOST_WIDE_INT nelems)
29450 {
29451 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29452 for now, as the lane-swapping logic needs to be extended in the expanders.
29453 See PR target/82518. */
29454 if (TARGET_NEON && !BYTES_BIG_ENDIAN
29455 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29456 && (nelems >= 2 && nelems <= 4))
29457 return true;
29458
29459 if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29460 && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29461 return true;
29462
29463 return false;
29464 }
29465
29466 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29467 registers when autovectorizing for Neon, at least until multiple vector
29468 widths are supported properly by the middle-end. */
29469
29470 static machine_mode
29471 arm_preferred_simd_mode (scalar_mode mode)
29472 {
29473 if (TARGET_NEON)
29474 switch (mode)
29475 {
29476 case E_HFmode:
29477 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29478 case E_SFmode:
29479 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29480 case E_SImode:
29481 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29482 case E_HImode:
29483 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29484 case E_QImode:
29485 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29486 case E_DImode:
29487 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29488 return V2DImode;
29489 break;
29490
29491 default:;
29492 }
29493
29494 if (TARGET_REALLY_IWMMXT)
29495 switch (mode)
29496 {
29497 case E_SImode:
29498 return V2SImode;
29499 case E_HImode:
29500 return V4HImode;
29501 case E_QImode:
29502 return V8QImode;
29503
29504 default:;
29505 }
29506
29507 if (TARGET_HAVE_MVE)
29508 switch (mode)
29509 {
29510 case E_QImode:
29511 return V16QImode;
29512 case E_HImode:
29513 return V8HImode;
29514 case E_SImode:
29515 return V4SImode;
29516
29517 default:;
29518 }
29519
29520 if (TARGET_HAVE_MVE_FLOAT)
29521 switch (mode)
29522 {
29523 case E_HFmode:
29524 return V8HFmode;
29525 case E_SFmode:
29526 return V4SFmode;
29527
29528 default:;
29529 }
29530
29531 return word_mode;
29532 }
29533
29534 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29535
29536 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29537 using r0-r4 for function arguments, r7 for the stack frame and don't have
29538 enough left over to do doubleword arithmetic. For Thumb-2 all the
29539 potentially problematic instructions accept high registers so this is not
29540 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29541 that require many low registers. */
29542 static bool
29543 arm_class_likely_spilled_p (reg_class_t rclass)
29544 {
29545 if ((TARGET_THUMB1 && rclass == LO_REGS)
29546 || rclass == CC_REG)
29547 return true;
29548
29549 return default_class_likely_spilled_p (rclass);
29550 }
29551
29552 /* Implements target hook small_register_classes_for_mode_p. */
29553 bool
29554 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29555 {
29556 return TARGET_THUMB1;
29557 }
29558
29559 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29560 ARM insns and therefore guarantee that the shift count is modulo 256.
29561 DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29562 guarantee no particular behavior for out-of-range counts. */
29563
29564 static unsigned HOST_WIDE_INT
29565 arm_shift_truncation_mask (machine_mode mode)
29566 {
29567 return mode == SImode ? 255 : 0;
29568 }
29569
29570
29571 /* Map internal gcc register numbers to DWARF2 register numbers. */
29572
29573 unsigned int
29574 arm_dbx_register_number (unsigned int regno)
29575 {
29576 if (regno < 16)
29577 return regno;
29578
29579 if (IS_VFP_REGNUM (regno))
29580 {
29581 /* See comment in arm_dwarf_register_span. */
29582 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29583 return 64 + regno - FIRST_VFP_REGNUM;
29584 else
29585 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29586 }
29587
29588 if (IS_IWMMXT_GR_REGNUM (regno))
29589 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29590
29591 if (IS_IWMMXT_REGNUM (regno))
29592 return 112 + regno - FIRST_IWMMXT_REGNUM;
29593
29594 return DWARF_FRAME_REGISTERS;
29595 }
29596
29597 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29598 GCC models tham as 64 32-bit registers, so we need to describe this to
29599 the DWARF generation code. Other registers can use the default. */
29600 static rtx
29601 arm_dwarf_register_span (rtx rtl)
29602 {
29603 machine_mode mode;
29604 unsigned regno;
29605 rtx parts[16];
29606 int nregs;
29607 int i;
29608
29609 regno = REGNO (rtl);
29610 if (!IS_VFP_REGNUM (regno))
29611 return NULL_RTX;
29612
29613 /* XXX FIXME: The EABI defines two VFP register ranges:
29614 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29615 256-287: D0-D31
29616 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29617 corresponding D register. Until GDB supports this, we shall use the
29618 legacy encodings. We also use these encodings for D0-D15 for
29619 compatibility with older debuggers. */
29620 mode = GET_MODE (rtl);
29621 if (GET_MODE_SIZE (mode) < 8)
29622 return NULL_RTX;
29623
29624 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29625 {
29626 nregs = GET_MODE_SIZE (mode) / 4;
29627 for (i = 0; i < nregs; i += 2)
29628 if (TARGET_BIG_END)
29629 {
29630 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29631 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29632 }
29633 else
29634 {
29635 parts[i] = gen_rtx_REG (SImode, regno + i);
29636 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29637 }
29638 }
29639 else
29640 {
29641 nregs = GET_MODE_SIZE (mode) / 8;
29642 for (i = 0; i < nregs; i++)
29643 parts[i] = gen_rtx_REG (DImode, regno + i);
29644 }
29645
29646 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29647 }
29648
29649 #if ARM_UNWIND_INFO
29650 /* Emit unwind directives for a store-multiple instruction or stack pointer
29651 push during alignment.
29652 These should only ever be generated by the function prologue code, so
29653 expect them to have a particular form.
29654 The store-multiple instruction sometimes pushes pc as the last register,
29655 although it should not be tracked into unwind information, or for -Os
29656 sometimes pushes some dummy registers before first register that needs
29657 to be tracked in unwind information; such dummy registers are there just
29658 to avoid separate stack adjustment, and will not be restored in the
29659 epilogue. */
29660
29661 static void
29662 arm_unwind_emit_sequence (FILE * out_file, rtx p)
29663 {
29664 int i;
29665 HOST_WIDE_INT offset;
29666 HOST_WIDE_INT nregs;
29667 int reg_size;
29668 unsigned reg;
29669 unsigned lastreg;
29670 unsigned padfirst = 0, padlast = 0;
29671 rtx e;
29672
29673 e = XVECEXP (p, 0, 0);
29674 gcc_assert (GET_CODE (e) == SET);
29675
29676 /* First insn will adjust the stack pointer. */
29677 gcc_assert (GET_CODE (e) == SET
29678 && REG_P (SET_DEST (e))
29679 && REGNO (SET_DEST (e)) == SP_REGNUM
29680 && GET_CODE (SET_SRC (e)) == PLUS);
29681
29682 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29683 nregs = XVECLEN (p, 0) - 1;
29684 gcc_assert (nregs);
29685
29686 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29687 if (reg < 16)
29688 {
29689 /* For -Os dummy registers can be pushed at the beginning to
29690 avoid separate stack pointer adjustment. */
29691 e = XVECEXP (p, 0, 1);
29692 e = XEXP (SET_DEST (e), 0);
29693 if (GET_CODE (e) == PLUS)
29694 padfirst = INTVAL (XEXP (e, 1));
29695 gcc_assert (padfirst == 0 || optimize_size);
29696 /* The function prologue may also push pc, but not annotate it as it is
29697 never restored. We turn this into a stack pointer adjustment. */
29698 e = XVECEXP (p, 0, nregs);
29699 e = XEXP (SET_DEST (e), 0);
29700 if (GET_CODE (e) == PLUS)
29701 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29702 else
29703 padlast = offset - 4;
29704 gcc_assert (padlast == 0 || padlast == 4);
29705 if (padlast == 4)
29706 fprintf (out_file, "\t.pad #4\n");
29707 reg_size = 4;
29708 fprintf (out_file, "\t.save {");
29709 }
29710 else if (IS_VFP_REGNUM (reg))
29711 {
29712 reg_size = 8;
29713 fprintf (out_file, "\t.vsave {");
29714 }
29715 else
29716 /* Unknown register type. */
29717 gcc_unreachable ();
29718
29719 /* If the stack increment doesn't match the size of the saved registers,
29720 something has gone horribly wrong. */
29721 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29722
29723 offset = padfirst;
29724 lastreg = 0;
29725 /* The remaining insns will describe the stores. */
29726 for (i = 1; i <= nregs; i++)
29727 {
29728 /* Expect (set (mem <addr>) (reg)).
29729 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29730 e = XVECEXP (p, 0, i);
29731 gcc_assert (GET_CODE (e) == SET
29732 && MEM_P (SET_DEST (e))
29733 && REG_P (SET_SRC (e)));
29734
29735 reg = REGNO (SET_SRC (e));
29736 gcc_assert (reg >= lastreg);
29737
29738 if (i != 1)
29739 fprintf (out_file, ", ");
29740 /* We can't use %r for vfp because we need to use the
29741 double precision register names. */
29742 if (IS_VFP_REGNUM (reg))
29743 asm_fprintf (out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29744 else
29745 asm_fprintf (out_file, "%r", reg);
29746
29747 if (flag_checking)
29748 {
29749 /* Check that the addresses are consecutive. */
29750 e = XEXP (SET_DEST (e), 0);
29751 if (GET_CODE (e) == PLUS)
29752 gcc_assert (REG_P (XEXP (e, 0))
29753 && REGNO (XEXP (e, 0)) == SP_REGNUM
29754 && CONST_INT_P (XEXP (e, 1))
29755 && offset == INTVAL (XEXP (e, 1)));
29756 else
29757 gcc_assert (i == 1
29758 && REG_P (e)
29759 && REGNO (e) == SP_REGNUM);
29760 offset += reg_size;
29761 }
29762 }
29763 fprintf (out_file, "}\n");
29764 if (padfirst)
29765 fprintf (out_file, "\t.pad #%d\n", padfirst);
29766 }
29767
29768 /* Emit unwind directives for a SET. */
29769
29770 static void
29771 arm_unwind_emit_set (FILE * out_file, rtx p)
29772 {
29773 rtx e0;
29774 rtx e1;
29775 unsigned reg;
29776
29777 e0 = XEXP (p, 0);
29778 e1 = XEXP (p, 1);
29779 switch (GET_CODE (e0))
29780 {
29781 case MEM:
29782 /* Pushing a single register. */
29783 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29784 || !REG_P (XEXP (XEXP (e0, 0), 0))
29785 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29786 abort ();
29787
29788 asm_fprintf (out_file, "\t.save ");
29789 if (IS_VFP_REGNUM (REGNO (e1)))
29790 asm_fprintf(out_file, "{d%d}\n",
29791 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29792 else
29793 asm_fprintf(out_file, "{%r}\n", REGNO (e1));
29794 break;
29795
29796 case REG:
29797 if (REGNO (e0) == SP_REGNUM)
29798 {
29799 /* A stack increment. */
29800 if (GET_CODE (e1) != PLUS
29801 || !REG_P (XEXP (e1, 0))
29802 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29803 || !CONST_INT_P (XEXP (e1, 1)))
29804 abort ();
29805
29806 asm_fprintf (out_file, "\t.pad #%wd\n",
29807 -INTVAL (XEXP (e1, 1)));
29808 }
29809 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29810 {
29811 HOST_WIDE_INT offset;
29812
29813 if (GET_CODE (e1) == PLUS)
29814 {
29815 if (!REG_P (XEXP (e1, 0))
29816 || !CONST_INT_P (XEXP (e1, 1)))
29817 abort ();
29818 reg = REGNO (XEXP (e1, 0));
29819 offset = INTVAL (XEXP (e1, 1));
29820 asm_fprintf (out_file, "\t.setfp %r, %r, #%wd\n",
29821 HARD_FRAME_POINTER_REGNUM, reg,
29822 offset);
29823 }
29824 else if (REG_P (e1))
29825 {
29826 reg = REGNO (e1);
29827 asm_fprintf (out_file, "\t.setfp %r, %r\n",
29828 HARD_FRAME_POINTER_REGNUM, reg);
29829 }
29830 else
29831 abort ();
29832 }
29833 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29834 {
29835 /* Move from sp to reg. */
29836 asm_fprintf (out_file, "\t.movsp %r\n", REGNO (e0));
29837 }
29838 else if (GET_CODE (e1) == PLUS
29839 && REG_P (XEXP (e1, 0))
29840 && REGNO (XEXP (e1, 0)) == SP_REGNUM
29841 && CONST_INT_P (XEXP (e1, 1)))
29842 {
29843 /* Set reg to offset from sp. */
29844 asm_fprintf (out_file, "\t.movsp %r, #%d\n",
29845 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29846 }
29847 else
29848 abort ();
29849 break;
29850
29851 default:
29852 abort ();
29853 }
29854 }
29855
29856
29857 /* Emit unwind directives for the given insn. */
29858
29859 static void
29860 arm_unwind_emit (FILE * out_file, rtx_insn *insn)
29861 {
29862 rtx note, pat;
29863 bool handled_one = false;
29864
29865 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29866 return;
29867
29868 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29869 && (TREE_NOTHROW (current_function_decl)
29870 || crtl->all_throwers_are_sibcalls))
29871 return;
29872
29873 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29874 return;
29875
29876 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29877 {
29878 switch (REG_NOTE_KIND (note))
29879 {
29880 case REG_FRAME_RELATED_EXPR:
29881 pat = XEXP (note, 0);
29882 goto found;
29883
29884 case REG_CFA_REGISTER:
29885 pat = XEXP (note, 0);
29886 if (pat == NULL)
29887 {
29888 pat = PATTERN (insn);
29889 if (GET_CODE (pat) == PARALLEL)
29890 pat = XVECEXP (pat, 0, 0);
29891 }
29892
29893 /* Only emitted for IS_STACKALIGN re-alignment. */
29894 {
29895 rtx dest, src;
29896 unsigned reg;
29897
29898 src = SET_SRC (pat);
29899 dest = SET_DEST (pat);
29900
29901 gcc_assert (src == stack_pointer_rtx);
29902 reg = REGNO (dest);
29903 asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29904 reg + 0x90, reg);
29905 }
29906 handled_one = true;
29907 break;
29908
29909 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29910 to get correct dwarf information for shrink-wrap. We should not
29911 emit unwind information for it because these are used either for
29912 pretend arguments or notes to adjust sp and restore registers from
29913 stack. */
29914 case REG_CFA_DEF_CFA:
29915 case REG_CFA_ADJUST_CFA:
29916 case REG_CFA_RESTORE:
29917 return;
29918
29919 case REG_CFA_EXPRESSION:
29920 case REG_CFA_OFFSET:
29921 /* ??? Only handling here what we actually emit. */
29922 gcc_unreachable ();
29923
29924 default:
29925 break;
29926 }
29927 }
29928 if (handled_one)
29929 return;
29930 pat = PATTERN (insn);
29931 found:
29932
29933 switch (GET_CODE (pat))
29934 {
29935 case SET:
29936 arm_unwind_emit_set (out_file, pat);
29937 break;
29938
29939 case SEQUENCE:
29940 /* Store multiple. */
29941 arm_unwind_emit_sequence (out_file, pat);
29942 break;
29943
29944 default:
29945 abort();
29946 }
29947 }
29948
29949
29950 /* Output a reference from a function exception table to the type_info
29951 object X. The EABI specifies that the symbol should be relocated by
29952 an R_ARM_TARGET2 relocation. */
29953
29954 static bool
29955 arm_output_ttype (rtx x)
29956 {
29957 fputs ("\t.word\t", asm_out_file);
29958 output_addr_const (asm_out_file, x);
29959 /* Use special relocations for symbol references. */
29960 if (!CONST_INT_P (x))
29961 fputs ("(TARGET2)", asm_out_file);
29962 fputc ('\n', asm_out_file);
29963
29964 return TRUE;
29965 }
29966
29967 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29968
29969 static void
29970 arm_asm_emit_except_personality (rtx personality)
29971 {
29972 fputs ("\t.personality\t", asm_out_file);
29973 output_addr_const (asm_out_file, personality);
29974 fputc ('\n', asm_out_file);
29975 }
29976 #endif /* ARM_UNWIND_INFO */
29977
29978 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29979
29980 static void
29981 arm_asm_init_sections (void)
29982 {
29983 #if ARM_UNWIND_INFO
29984 exception_section = get_unnamed_section (0, output_section_asm_op,
29985 "\t.handlerdata");
29986 #endif /* ARM_UNWIND_INFO */
29987
29988 #ifdef OBJECT_FORMAT_ELF
29989 if (target_pure_code)
29990 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
29991 #endif
29992 }
29993
29994 /* Output unwind directives for the start/end of a function. */
29995
29996 void
29997 arm_output_fn_unwind (FILE * f, bool prologue)
29998 {
29999 if (arm_except_unwind_info (&global_options) != UI_TARGET)
30000 return;
30001
30002 if (prologue)
30003 fputs ("\t.fnstart\n", f);
30004 else
30005 {
30006 /* If this function will never be unwound, then mark it as such.
30007 The came condition is used in arm_unwind_emit to suppress
30008 the frame annotations. */
30009 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30010 && (TREE_NOTHROW (current_function_decl)
30011 || crtl->all_throwers_are_sibcalls))
30012 fputs("\t.cantunwind\n", f);
30013
30014 fputs ("\t.fnend\n", f);
30015 }
30016 }
30017
30018 static bool
30019 arm_emit_tls_decoration (FILE *fp, rtx x)
30020 {
30021 enum tls_reloc reloc;
30022 rtx val;
30023
30024 val = XVECEXP (x, 0, 0);
30025 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
30026
30027 output_addr_const (fp, val);
30028
30029 switch (reloc)
30030 {
30031 case TLS_GD32:
30032 fputs ("(tlsgd)", fp);
30033 break;
30034 case TLS_GD32_FDPIC:
30035 fputs ("(tlsgd_fdpic)", fp);
30036 break;
30037 case TLS_LDM32:
30038 fputs ("(tlsldm)", fp);
30039 break;
30040 case TLS_LDM32_FDPIC:
30041 fputs ("(tlsldm_fdpic)", fp);
30042 break;
30043 case TLS_LDO32:
30044 fputs ("(tlsldo)", fp);
30045 break;
30046 case TLS_IE32:
30047 fputs ("(gottpoff)", fp);
30048 break;
30049 case TLS_IE32_FDPIC:
30050 fputs ("(gottpoff_fdpic)", fp);
30051 break;
30052 case TLS_LE32:
30053 fputs ("(tpoff)", fp);
30054 break;
30055 case TLS_DESCSEQ:
30056 fputs ("(tlsdesc)", fp);
30057 break;
30058 default:
30059 gcc_unreachable ();
30060 }
30061
30062 switch (reloc)
30063 {
30064 case TLS_GD32:
30065 case TLS_LDM32:
30066 case TLS_IE32:
30067 case TLS_DESCSEQ:
30068 fputs (" + (. - ", fp);
30069 output_addr_const (fp, XVECEXP (x, 0, 2));
30070 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30071 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
30072 output_addr_const (fp, XVECEXP (x, 0, 3));
30073 fputc (')', fp);
30074 break;
30075 default:
30076 break;
30077 }
30078
30079 return TRUE;
30080 }
30081
30082 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
30083
30084 static void
30085 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
30086 {
30087 gcc_assert (size == 4);
30088 fputs ("\t.word\t", file);
30089 output_addr_const (file, x);
30090 fputs ("(tlsldo)", file);
30091 }
30092
30093 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
30094
30095 static bool
30096 arm_output_addr_const_extra (FILE *fp, rtx x)
30097 {
30098 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
30099 return arm_emit_tls_decoration (fp, x);
30100 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
30101 {
30102 char label[256];
30103 int labelno = INTVAL (XVECEXP (x, 0, 0));
30104
30105 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
30106 assemble_name_raw (fp, label);
30107
30108 return TRUE;
30109 }
30110 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
30111 {
30112 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
30113 if (GOT_PCREL)
30114 fputs ("+.", fp);
30115 fputs ("-(", fp);
30116 output_addr_const (fp, XVECEXP (x, 0, 0));
30117 fputc (')', fp);
30118 return TRUE;
30119 }
30120 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
30121 {
30122 output_addr_const (fp, XVECEXP (x, 0, 0));
30123 if (GOT_PCREL)
30124 fputs ("+.", fp);
30125 fputs ("-(", fp);
30126 output_addr_const (fp, XVECEXP (x, 0, 1));
30127 fputc (')', fp);
30128 return TRUE;
30129 }
30130 else if (GET_CODE (x) == CONST_VECTOR)
30131 return arm_emit_vector_const (fp, x);
30132
30133 return FALSE;
30134 }
30135
30136 /* Output assembly for a shift instruction.
30137 SET_FLAGS determines how the instruction modifies the condition codes.
30138 0 - Do not set condition codes.
30139 1 - Set condition codes.
30140 2 - Use smallest instruction. */
30141 const char *
30142 arm_output_shift(rtx * operands, int set_flags)
30143 {
30144 char pattern[100];
30145 static const char flag_chars[3] = {'?', '.', '!'};
30146 const char *shift;
30147 HOST_WIDE_INT val;
30148 char c;
30149
30150 c = flag_chars[set_flags];
30151 shift = shift_op(operands[3], &val);
30152 if (shift)
30153 {
30154 if (val != -1)
30155 operands[2] = GEN_INT(val);
30156 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
30157 }
30158 else
30159 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
30160
30161 output_asm_insn (pattern, operands);
30162 return "";
30163 }
30164
30165 /* Output assembly for a WMMX immediate shift instruction. */
30166 const char *
30167 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
30168 {
30169 int shift = INTVAL (operands[2]);
30170 char templ[50];
30171 machine_mode opmode = GET_MODE (operands[0]);
30172
30173 gcc_assert (shift >= 0);
30174
30175 /* If the shift value in the register versions is > 63 (for D qualifier),
30176 31 (for W qualifier) or 15 (for H qualifier). */
30177 if (((opmode == V4HImode) && (shift > 15))
30178 || ((opmode == V2SImode) && (shift > 31))
30179 || ((opmode == DImode) && (shift > 63)))
30180 {
30181 if (wror_or_wsra)
30182 {
30183 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30184 output_asm_insn (templ, operands);
30185 if (opmode == DImode)
30186 {
30187 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
30188 output_asm_insn (templ, operands);
30189 }
30190 }
30191 else
30192 {
30193 /* The destination register will contain all zeros. */
30194 sprintf (templ, "wzero\t%%0");
30195 output_asm_insn (templ, operands);
30196 }
30197 return "";
30198 }
30199
30200 if ((opmode == DImode) && (shift > 32))
30201 {
30202 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30203 output_asm_insn (templ, operands);
30204 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
30205 output_asm_insn (templ, operands);
30206 }
30207 else
30208 {
30209 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
30210 output_asm_insn (templ, operands);
30211 }
30212 return "";
30213 }
30214
30215 /* Output assembly for a WMMX tinsr instruction. */
30216 const char *
30217 arm_output_iwmmxt_tinsr (rtx *operands)
30218 {
30219 int mask = INTVAL (operands[3]);
30220 int i;
30221 char templ[50];
30222 int units = mode_nunits[GET_MODE (operands[0])];
30223 gcc_assert ((mask & (mask - 1)) == 0);
30224 for (i = 0; i < units; ++i)
30225 {
30226 if ((mask & 0x01) == 1)
30227 {
30228 break;
30229 }
30230 mask >>= 1;
30231 }
30232 gcc_assert (i < units);
30233 {
30234 switch (GET_MODE (operands[0]))
30235 {
30236 case E_V8QImode:
30237 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
30238 break;
30239 case E_V4HImode:
30240 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
30241 break;
30242 case E_V2SImode:
30243 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
30244 break;
30245 default:
30246 gcc_unreachable ();
30247 break;
30248 }
30249 output_asm_insn (templ, operands);
30250 }
30251 return "";
30252 }
30253
30254 /* Output a Thumb-1 casesi dispatch sequence. */
30255 const char *
30256 thumb1_output_casesi (rtx *operands)
30257 {
30258 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
30259
30260 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30261
30262 switch (GET_MODE(diff_vec))
30263 {
30264 case E_QImode:
30265 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30266 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30267 case E_HImode:
30268 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30269 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30270 case E_SImode:
30271 return "bl\t%___gnu_thumb1_case_si";
30272 default:
30273 gcc_unreachable ();
30274 }
30275 }
30276
30277 /* Output a Thumb-2 casesi instruction. */
30278 const char *
30279 thumb2_output_casesi (rtx *operands)
30280 {
30281 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30282
30283 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30284
30285 output_asm_insn ("cmp\t%0, %1", operands);
30286 output_asm_insn ("bhi\t%l3", operands);
30287 switch (GET_MODE(diff_vec))
30288 {
30289 case E_QImode:
30290 return "tbb\t[%|pc, %0]";
30291 case E_HImode:
30292 return "tbh\t[%|pc, %0, lsl #1]";
30293 case E_SImode:
30294 if (flag_pic)
30295 {
30296 output_asm_insn ("adr\t%4, %l2", operands);
30297 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
30298 output_asm_insn ("add\t%4, %4, %5", operands);
30299 return "bx\t%4";
30300 }
30301 else
30302 {
30303 output_asm_insn ("adr\t%4, %l2", operands);
30304 return "ldr\t%|pc, [%4, %0, lsl #2]";
30305 }
30306 default:
30307 gcc_unreachable ();
30308 }
30309 }
30310
30311 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
30312 per-core tuning structs. */
30313 static int
30314 arm_issue_rate (void)
30315 {
30316 return current_tune->issue_rate;
30317 }
30318
30319 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
30320 static int
30321 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30322 {
30323 if (DEBUG_INSN_P (insn))
30324 return more;
30325
30326 rtx_code code = GET_CODE (PATTERN (insn));
30327 if (code == USE || code == CLOBBER)
30328 return more;
30329
30330 if (get_attr_type (insn) == TYPE_NO_INSN)
30331 return more;
30332
30333 return more - 1;
30334 }
30335
30336 /* Return how many instructions should scheduler lookahead to choose the
30337 best one. */
30338 static int
30339 arm_first_cycle_multipass_dfa_lookahead (void)
30340 {
30341 int issue_rate = arm_issue_rate ();
30342
30343 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30344 }
30345
30346 /* Enable modeling of L2 auto-prefetcher. */
30347 static int
30348 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30349 {
30350 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30351 }
30352
30353 const char *
30354 arm_mangle_type (const_tree type)
30355 {
30356 /* The ARM ABI documents (10th October 2008) say that "__va_list"
30357 has to be managled as if it is in the "std" namespace. */
30358 if (TARGET_AAPCS_BASED
30359 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30360 return "St9__va_list";
30361
30362 /* Half-precision floating point types. */
30363 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
30364 {
30365 if (TYPE_MODE (type) == BFmode)
30366 return "u6__bf16";
30367 else
30368 return "Dh";
30369 }
30370
30371 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30372 builtin type. */
30373 if (TYPE_NAME (type) != NULL)
30374 return arm_mangle_builtin_type (type);
30375
30376 /* Use the default mangling. */
30377 return NULL;
30378 }
30379
30380 /* Order of allocation of core registers for Thumb: this allocation is
30381 written over the corresponding initial entries of the array
30382 initialized with REG_ALLOC_ORDER. We allocate all low registers
30383 first. Saving and restoring a low register is usually cheaper than
30384 using a call-clobbered high register. */
30385
30386 static const int thumb_core_reg_alloc_order[] =
30387 {
30388 3, 2, 1, 0, 4, 5, 6, 7,
30389 12, 14, 8, 9, 10, 11
30390 };
30391
30392 /* Adjust register allocation order when compiling for Thumb. */
30393
30394 void
30395 arm_order_regs_for_local_alloc (void)
30396 {
30397 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30398 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30399 if (TARGET_THUMB)
30400 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30401 sizeof (thumb_core_reg_alloc_order));
30402 }
30403
30404 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30405
30406 bool
30407 arm_frame_pointer_required (void)
30408 {
30409 if (SUBTARGET_FRAME_POINTER_REQUIRED)
30410 return true;
30411
30412 /* If the function receives nonlocal gotos, it needs to save the frame
30413 pointer in the nonlocal_goto_save_area object. */
30414 if (cfun->has_nonlocal_label)
30415 return true;
30416
30417 /* The frame pointer is required for non-leaf APCS frames. */
30418 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30419 return true;
30420
30421 /* If we are probing the stack in the prologue, we will have a faulting
30422 instruction prior to the stack adjustment and this requires a frame
30423 pointer if we want to catch the exception using the EABI unwinder. */
30424 if (!IS_INTERRUPT (arm_current_func_type ())
30425 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30426 || flag_stack_clash_protection)
30427 && arm_except_unwind_info (&global_options) == UI_TARGET
30428 && cfun->can_throw_non_call_exceptions)
30429 {
30430 HOST_WIDE_INT size = get_frame_size ();
30431
30432 /* That's irrelevant if there is no stack adjustment. */
30433 if (size <= 0)
30434 return false;
30435
30436 /* That's relevant only if there is a stack probe. */
30437 if (crtl->is_leaf && !cfun->calls_alloca)
30438 {
30439 /* We don't have the final size of the frame so adjust. */
30440 size += 32 * UNITS_PER_WORD;
30441 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30442 return true;
30443 }
30444 else
30445 return true;
30446 }
30447
30448 return false;
30449 }
30450
30451 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30452 All modes except THUMB1 have conditional execution.
30453 If we have conditional arithmetic, return false before reload to
30454 enable some ifcvt transformations. */
30455 static bool
30456 arm_have_conditional_execution (void)
30457 {
30458 bool has_cond_exec, enable_ifcvt_trans;
30459
30460 /* Only THUMB1 cannot support conditional execution. */
30461 has_cond_exec = !TARGET_THUMB1;
30462
30463 /* Enable ifcvt transformations if we have conditional arithmetic, but only
30464 before reload. */
30465 enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30466
30467 return has_cond_exec && !enable_ifcvt_trans;
30468 }
30469
30470 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30471 static HOST_WIDE_INT
30472 arm_vector_alignment (const_tree type)
30473 {
30474 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30475
30476 if (TARGET_AAPCS_BASED)
30477 align = MIN (align, 64);
30478
30479 return align;
30480 }
30481
30482 static unsigned int
30483 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30484 {
30485 if (!TARGET_NEON_VECTORIZE_DOUBLE)
30486 {
30487 modes->safe_push (V16QImode);
30488 modes->safe_push (V8QImode);
30489 }
30490 return 0;
30491 }
30492
30493 static bool
30494 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30495 {
30496 /* Vectors which aren't in packed structures will not be less aligned than
30497 the natural alignment of their element type, so this is safe. */
30498 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30499 return !is_packed;
30500
30501 return default_builtin_vector_alignment_reachable (type, is_packed);
30502 }
30503
30504 static bool
30505 arm_builtin_support_vector_misalignment (machine_mode mode,
30506 const_tree type, int misalignment,
30507 bool is_packed)
30508 {
30509 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30510 {
30511 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30512
30513 if (is_packed)
30514 return align == 1;
30515
30516 /* If the misalignment is unknown, we should be able to handle the access
30517 so long as it is not to a member of a packed data structure. */
30518 if (misalignment == -1)
30519 return true;
30520
30521 /* Return true if the misalignment is a multiple of the natural alignment
30522 of the vector's element type. This is probably always going to be
30523 true in practice, since we've already established that this isn't a
30524 packed access. */
30525 return ((misalignment % align) == 0);
30526 }
30527
30528 return default_builtin_support_vector_misalignment (mode, type, misalignment,
30529 is_packed);
30530 }
30531
30532 static void
30533 arm_conditional_register_usage (void)
30534 {
30535 int regno;
30536
30537 if (TARGET_THUMB1 && optimize_size)
30538 {
30539 /* When optimizing for size on Thumb-1, it's better not
30540 to use the HI regs, because of the overhead of
30541 stacking them. */
30542 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30543 fixed_regs[regno] = call_used_regs[regno] = 1;
30544 }
30545
30546 /* The link register can be clobbered by any branch insn,
30547 but we have no way to track that at present, so mark
30548 it as unavailable. */
30549 if (TARGET_THUMB1)
30550 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30551
30552 if (TARGET_32BIT && TARGET_VFP_BASE)
30553 {
30554 /* VFPv3 registers are disabled when earlier VFP
30555 versions are selected due to the definition of
30556 LAST_VFP_REGNUM. */
30557 for (regno = FIRST_VFP_REGNUM;
30558 regno <= LAST_VFP_REGNUM; ++ regno)
30559 {
30560 fixed_regs[regno] = 0;
30561 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30562 || regno >= FIRST_VFP_REGNUM + 32;
30563 }
30564 if (TARGET_HAVE_MVE)
30565 fixed_regs[VPR_REGNUM] = 0;
30566 }
30567
30568 if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30569 {
30570 regno = FIRST_IWMMXT_GR_REGNUM;
30571 /* The 2002/10/09 revision of the XScale ABI has wCG0
30572 and wCG1 as call-preserved registers. The 2002/11/21
30573 revision changed this so that all wCG registers are
30574 scratch registers. */
30575 for (regno = FIRST_IWMMXT_GR_REGNUM;
30576 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30577 fixed_regs[regno] = 0;
30578 /* The XScale ABI has wR0 - wR9 as scratch registers,
30579 the rest as call-preserved registers. */
30580 for (regno = FIRST_IWMMXT_REGNUM;
30581 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30582 {
30583 fixed_regs[regno] = 0;
30584 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30585 }
30586 }
30587
30588 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30589 {
30590 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30591 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30592 }
30593 else if (TARGET_APCS_STACK)
30594 {
30595 fixed_regs[10] = 1;
30596 call_used_regs[10] = 1;
30597 }
30598 /* -mcaller-super-interworking reserves r11 for calls to
30599 _interwork_r11_call_via_rN(). Making the register global
30600 is an easy way of ensuring that it remains valid for all
30601 calls. */
30602 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30603 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30604 {
30605 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30606 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30607 if (TARGET_CALLER_INTERWORKING)
30608 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30609 }
30610
30611 /* The Q and GE bits are only accessed via special ACLE patterns. */
30612 CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30613 CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30614
30615 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30616 }
30617
30618 static reg_class_t
30619 arm_preferred_rename_class (reg_class_t rclass)
30620 {
30621 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30622 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30623 and code size can be reduced. */
30624 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30625 return LO_REGS;
30626 else
30627 return NO_REGS;
30628 }
30629
30630 /* Compute the attribute "length" of insn "*push_multi".
30631 So this function MUST be kept in sync with that insn pattern. */
30632 int
30633 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30634 {
30635 int i, regno, hi_reg;
30636 int num_saves = XVECLEN (parallel_op, 0);
30637
30638 /* ARM mode. */
30639 if (TARGET_ARM)
30640 return 4;
30641 /* Thumb1 mode. */
30642 if (TARGET_THUMB1)
30643 return 2;
30644
30645 /* Thumb2 mode. */
30646 regno = REGNO (first_op);
30647 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30648 list is 8-bit. Normally this means all registers in the list must be
30649 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
30650 encodings. There is one exception for PUSH that LR in HI_REGS can be used
30651 with 16-bit encoding. */
30652 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30653 for (i = 1; i < num_saves && !hi_reg; i++)
30654 {
30655 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30656 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30657 }
30658
30659 if (!hi_reg)
30660 return 2;
30661 return 4;
30662 }
30663
30664 /* Compute the attribute "length" of insn. Currently, this function is used
30665 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30666 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
30667 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
30668 true if OPERANDS contains insn which explicit updates base register. */
30669
30670 int
30671 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30672 {
30673 /* ARM mode. */
30674 if (TARGET_ARM)
30675 return 4;
30676 /* Thumb1 mode. */
30677 if (TARGET_THUMB1)
30678 return 2;
30679
30680 rtx parallel_op = operands[0];
30681 /* Initialize to elements number of PARALLEL. */
30682 unsigned indx = XVECLEN (parallel_op, 0) - 1;
30683 /* Initialize the value to base register. */
30684 unsigned regno = REGNO (operands[1]);
30685 /* Skip return and write back pattern.
30686 We only need register pop pattern for later analysis. */
30687 unsigned first_indx = 0;
30688 first_indx += return_pc ? 1 : 0;
30689 first_indx += write_back_p ? 1 : 0;
30690
30691 /* A pop operation can be done through LDM or POP. If the base register is SP
30692 and if it's with write back, then a LDM will be alias of POP. */
30693 bool pop_p = (regno == SP_REGNUM && write_back_p);
30694 bool ldm_p = !pop_p;
30695
30696 /* Check base register for LDM. */
30697 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30698 return 4;
30699
30700 /* Check each register in the list. */
30701 for (; indx >= first_indx; indx--)
30702 {
30703 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30704 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
30705 comment in arm_attr_length_push_multi. */
30706 if (REGNO_REG_CLASS (regno) == HI_REGS
30707 && (regno != PC_REGNUM || ldm_p))
30708 return 4;
30709 }
30710
30711 return 2;
30712 }
30713
30714 /* Compute the number of instructions emitted by output_move_double. */
30715 int
30716 arm_count_output_move_double_insns (rtx *operands)
30717 {
30718 int count;
30719 rtx ops[2];
30720 /* output_move_double may modify the operands array, so call it
30721 here on a copy of the array. */
30722 ops[0] = operands[0];
30723 ops[1] = operands[1];
30724 output_move_double (ops, false, &count);
30725 return count;
30726 }
30727
30728 /* Same as above, but operands are a register/memory pair in SImode.
30729 Assumes operands has the base register in position 0 and memory in position
30730 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
30731 int
30732 arm_count_ldrdstrd_insns (rtx *operands, bool load)
30733 {
30734 int count;
30735 rtx ops[2];
30736 int regnum, memnum;
30737 if (load)
30738 regnum = 0, memnum = 1;
30739 else
30740 regnum = 1, memnum = 0;
30741 ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
30742 ops[memnum] = adjust_address (operands[2], DImode, 0);
30743 output_move_double (ops, false, &count);
30744 return count;
30745 }
30746
30747
30748 int
30749 vfp3_const_double_for_fract_bits (rtx operand)
30750 {
30751 REAL_VALUE_TYPE r0;
30752
30753 if (!CONST_DOUBLE_P (operand))
30754 return 0;
30755
30756 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
30757 if (exact_real_inverse (DFmode, &r0)
30758 && !REAL_VALUE_NEGATIVE (r0))
30759 {
30760 if (exact_real_truncate (DFmode, &r0))
30761 {
30762 HOST_WIDE_INT value = real_to_integer (&r0);
30763 value = value & 0xffffffff;
30764 if ((value != 0) && ( (value & (value - 1)) == 0))
30765 {
30766 int ret = exact_log2 (value);
30767 gcc_assert (IN_RANGE (ret, 0, 31));
30768 return ret;
30769 }
30770 }
30771 }
30772 return 0;
30773 }
30774
30775 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30776 log2 is in [1, 32], return that log2. Otherwise return -1.
30777 This is used in the patterns for vcvt.s32.f32 floating-point to
30778 fixed-point conversions. */
30779
30780 int
30781 vfp3_const_double_for_bits (rtx x)
30782 {
30783 const REAL_VALUE_TYPE *r;
30784
30785 if (!CONST_DOUBLE_P (x))
30786 return -1;
30787
30788 r = CONST_DOUBLE_REAL_VALUE (x);
30789
30790 if (REAL_VALUE_NEGATIVE (*r)
30791 || REAL_VALUE_ISNAN (*r)
30792 || REAL_VALUE_ISINF (*r)
30793 || !real_isinteger (r, SFmode))
30794 return -1;
30795
30796 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
30797
30798 /* The exact_log2 above will have returned -1 if this is
30799 not an exact log2. */
30800 if (!IN_RANGE (hwint, 1, 32))
30801 return -1;
30802
30803 return hwint;
30804 }
30805
30806 \f
30807 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30808
30809 static void
30810 arm_pre_atomic_barrier (enum memmodel model)
30811 {
30812 if (need_atomic_barrier_p (model, true))
30813 emit_insn (gen_memory_barrier ());
30814 }
30815
30816 static void
30817 arm_post_atomic_barrier (enum memmodel model)
30818 {
30819 if (need_atomic_barrier_p (model, false))
30820 emit_insn (gen_memory_barrier ());
30821 }
30822
30823 /* Emit the load-exclusive and store-exclusive instructions.
30824 Use acquire and release versions if necessary. */
30825
30826 static void
30827 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30828 {
30829 rtx (*gen) (rtx, rtx);
30830
30831 if (acq)
30832 {
30833 switch (mode)
30834 {
30835 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30836 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30837 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30838 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30839 default:
30840 gcc_unreachable ();
30841 }
30842 }
30843 else
30844 {
30845 switch (mode)
30846 {
30847 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
30848 case E_HImode: gen = gen_arm_load_exclusivehi; break;
30849 case E_SImode: gen = gen_arm_load_exclusivesi; break;
30850 case E_DImode: gen = gen_arm_load_exclusivedi; break;
30851 default:
30852 gcc_unreachable ();
30853 }
30854 }
30855
30856 emit_insn (gen (rval, mem));
30857 }
30858
30859 static void
30860 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30861 rtx mem, bool rel)
30862 {
30863 rtx (*gen) (rtx, rtx, rtx);
30864
30865 if (rel)
30866 {
30867 switch (mode)
30868 {
30869 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
30870 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
30871 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
30872 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
30873 default:
30874 gcc_unreachable ();
30875 }
30876 }
30877 else
30878 {
30879 switch (mode)
30880 {
30881 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
30882 case E_HImode: gen = gen_arm_store_exclusivehi; break;
30883 case E_SImode: gen = gen_arm_store_exclusivesi; break;
30884 case E_DImode: gen = gen_arm_store_exclusivedi; break;
30885 default:
30886 gcc_unreachable ();
30887 }
30888 }
30889
30890 emit_insn (gen (bval, rval, mem));
30891 }
30892
30893 /* Mark the previous jump instruction as unlikely. */
30894
30895 static void
30896 emit_unlikely_jump (rtx insn)
30897 {
30898 rtx_insn *jump = emit_jump_insn (insn);
30899 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
30900 }
30901
30902 /* Expand a compare and swap pattern. */
30903
30904 void
30905 arm_expand_compare_and_swap (rtx operands[])
30906 {
30907 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30908 machine_mode mode, cmp_mode;
30909
30910 bval = operands[0];
30911 rval = operands[1];
30912 mem = operands[2];
30913 oldval = operands[3];
30914 newval = operands[4];
30915 is_weak = operands[5];
30916 mod_s = operands[6];
30917 mod_f = operands[7];
30918 mode = GET_MODE (mem);
30919
30920 /* Normally the succ memory model must be stronger than fail, but in the
30921 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30922 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30923
30924 if (TARGET_HAVE_LDACQ
30925 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
30926 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
30927 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30928
30929 switch (mode)
30930 {
30931 case E_QImode:
30932 case E_HImode:
30933 /* For narrow modes, we're going to perform the comparison in SImode,
30934 so do the zero-extension now. */
30935 rval = gen_reg_rtx (SImode);
30936 oldval = convert_modes (SImode, mode, oldval, true);
30937 /* FALLTHRU */
30938
30939 case E_SImode:
30940 /* Force the value into a register if needed. We waited until after
30941 the zero-extension above to do this properly. */
30942 if (!arm_add_operand (oldval, SImode))
30943 oldval = force_reg (SImode, oldval);
30944 break;
30945
30946 case E_DImode:
30947 if (!cmpdi_operand (oldval, mode))
30948 oldval = force_reg (mode, oldval);
30949 break;
30950
30951 default:
30952 gcc_unreachable ();
30953 }
30954
30955 if (TARGET_THUMB1)
30956 cmp_mode = E_SImode;
30957 else
30958 cmp_mode = CC_Zmode;
30959
30960 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
30961 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
30962 oldval, newval, is_weak, mod_s, mod_f));
30963
30964 if (mode == QImode || mode == HImode)
30965 emit_move_insn (operands[1], gen_lowpart (mode, rval));
30966
30967 /* In all cases, we arrange for success to be signaled by Z set.
30968 This arrangement allows for the boolean result to be used directly
30969 in a subsequent branch, post optimization. For Thumb-1 targets, the
30970 boolean negation of the result is also stored in bval because Thumb-1
30971 backend lacks dependency tracking for CC flag due to flag-setting not
30972 being represented at RTL level. */
30973 if (TARGET_THUMB1)
30974 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
30975 else
30976 {
30977 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
30978 emit_insn (gen_rtx_SET (bval, x));
30979 }
30980 }
30981
30982 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30983 another memory store between the load-exclusive and store-exclusive can
30984 reset the monitor from Exclusive to Open state. This means we must wait
30985 until after reload to split the pattern, lest we get a register spill in
30986 the middle of the atomic sequence. Success of the compare and swap is
30987 indicated by the Z flag set for 32bit targets and by neg_bval being zero
30988 for Thumb-1 targets (ie. negation of the boolean value returned by
30989 atomic_compare_and_swapmode standard pattern in operand 0). */
30990
30991 void
30992 arm_split_compare_and_swap (rtx operands[])
30993 {
30994 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
30995 machine_mode mode;
30996 enum memmodel mod_s, mod_f;
30997 bool is_weak;
30998 rtx_code_label *label1, *label2;
30999 rtx x, cond;
31000
31001 rval = operands[1];
31002 mem = operands[2];
31003 oldval = operands[3];
31004 newval = operands[4];
31005 is_weak = (operands[5] != const0_rtx);
31006 mod_s_rtx = operands[6];
31007 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
31008 mod_f = memmodel_from_int (INTVAL (operands[7]));
31009 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
31010 mode = GET_MODE (mem);
31011
31012 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
31013
31014 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
31015 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
31016
31017 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
31018 a full barrier is emitted after the store-release. */
31019 if (is_armv8_sync)
31020 use_acquire = false;
31021
31022 /* Checks whether a barrier is needed and emits one accordingly. */
31023 if (!(use_acquire || use_release))
31024 arm_pre_atomic_barrier (mod_s);
31025
31026 label1 = NULL;
31027 if (!is_weak)
31028 {
31029 label1 = gen_label_rtx ();
31030 emit_label (label1);
31031 }
31032 label2 = gen_label_rtx ();
31033
31034 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
31035
31036 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31037 as required to communicate with arm_expand_compare_and_swap. */
31038 if (TARGET_32BIT)
31039 {
31040 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
31041 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31042 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31043 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
31044 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31045 }
31046 else
31047 {
31048 cond = gen_rtx_NE (VOIDmode, rval, oldval);
31049 if (thumb1_cmpneg_operand (oldval, SImode))
31050 {
31051 rtx src = rval;
31052 if (!satisfies_constraint_L (oldval))
31053 {
31054 gcc_assert (satisfies_constraint_J (oldval));
31055
31056 /* For such immediates, ADDS needs the source and destination regs
31057 to be the same.
31058
31059 Normally this would be handled by RA, but this is all happening
31060 after RA. */
31061 emit_move_insn (neg_bval, rval);
31062 src = neg_bval;
31063 }
31064
31065 emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
31066 label2, cond));
31067 }
31068 else
31069 {
31070 emit_move_insn (neg_bval, const1_rtx);
31071 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
31072 }
31073 }
31074
31075 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
31076
31077 /* Weak or strong, we want EQ to be true for success, so that we
31078 match the flags that we got from the compare above. */
31079 if (TARGET_32BIT)
31080 {
31081 cond = gen_rtx_REG (CCmode, CC_REGNUM);
31082 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
31083 emit_insn (gen_rtx_SET (cond, x));
31084 }
31085
31086 if (!is_weak)
31087 {
31088 /* Z is set to boolean value of !neg_bval, as required to communicate
31089 with arm_expand_compare_and_swap. */
31090 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
31091 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
31092 }
31093
31094 if (!is_mm_relaxed (mod_f))
31095 emit_label (label2);
31096
31097 /* Checks whether a barrier is needed and emits one accordingly. */
31098 if (is_armv8_sync
31099 || !(use_acquire || use_release))
31100 arm_post_atomic_barrier (mod_s);
31101
31102 if (is_mm_relaxed (mod_f))
31103 emit_label (label2);
31104 }
31105
31106 /* Split an atomic operation pattern. Operation is given by CODE and is one
31107 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31108 operation). Operation is performed on the content at MEM and on VALUE
31109 following the memory model MODEL_RTX. The content at MEM before and after
31110 the operation is returned in OLD_OUT and NEW_OUT respectively while the
31111 success of the operation is returned in COND. Using a scratch register or
31112 an operand register for these determines what result is returned for that
31113 pattern. */
31114
31115 void
31116 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
31117 rtx value, rtx model_rtx, rtx cond)
31118 {
31119 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
31120 machine_mode mode = GET_MODE (mem);
31121 machine_mode wmode = (mode == DImode ? DImode : SImode);
31122 rtx_code_label *label;
31123 bool all_low_regs, bind_old_new;
31124 rtx x;
31125
31126 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
31127
31128 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
31129 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
31130
31131 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
31132 a full barrier is emitted after the store-release. */
31133 if (is_armv8_sync)
31134 use_acquire = false;
31135
31136 /* Checks whether a barrier is needed and emits one accordingly. */
31137 if (!(use_acquire || use_release))
31138 arm_pre_atomic_barrier (model);
31139
31140 label = gen_label_rtx ();
31141 emit_label (label);
31142
31143 if (new_out)
31144 new_out = gen_lowpart (wmode, new_out);
31145 if (old_out)
31146 old_out = gen_lowpart (wmode, old_out);
31147 else
31148 old_out = new_out;
31149 value = simplify_gen_subreg (wmode, value, mode, 0);
31150
31151 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
31152
31153 /* Does the operation require destination and first operand to use the same
31154 register? This is decided by register constraints of relevant insn
31155 patterns in thumb1.md. */
31156 gcc_assert (!new_out || REG_P (new_out));
31157 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
31158 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
31159 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
31160 bind_old_new =
31161 (TARGET_THUMB1
31162 && code != SET
31163 && code != MINUS
31164 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
31165
31166 /* We want to return the old value while putting the result of the operation
31167 in the same register as the old value so copy the old value over to the
31168 destination register and use that register for the operation. */
31169 if (old_out && bind_old_new)
31170 {
31171 emit_move_insn (new_out, old_out);
31172 old_out = new_out;
31173 }
31174
31175 switch (code)
31176 {
31177 case SET:
31178 new_out = value;
31179 break;
31180
31181 case NOT:
31182 x = gen_rtx_AND (wmode, old_out, value);
31183 emit_insn (gen_rtx_SET (new_out, x));
31184 x = gen_rtx_NOT (wmode, new_out);
31185 emit_insn (gen_rtx_SET (new_out, x));
31186 break;
31187
31188 case MINUS:
31189 if (CONST_INT_P (value))
31190 {
31191 value = gen_int_mode (-INTVAL (value), wmode);
31192 code = PLUS;
31193 }
31194 /* FALLTHRU */
31195
31196 case PLUS:
31197 if (mode == DImode)
31198 {
31199 /* DImode plus/minus need to clobber flags. */
31200 /* The adddi3 and subdi3 patterns are incorrectly written so that
31201 they require matching operands, even when we could easily support
31202 three operands. Thankfully, this can be fixed up post-splitting,
31203 as the individual add+adc patterns do accept three operands and
31204 post-reload cprop can make these moves go away. */
31205 emit_move_insn (new_out, old_out);
31206 if (code == PLUS)
31207 x = gen_adddi3 (new_out, new_out, value);
31208 else
31209 x = gen_subdi3 (new_out, new_out, value);
31210 emit_insn (x);
31211 break;
31212 }
31213 /* FALLTHRU */
31214
31215 default:
31216 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
31217 emit_insn (gen_rtx_SET (new_out, x));
31218 break;
31219 }
31220
31221 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
31222 use_release);
31223
31224 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31225 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
31226
31227 /* Checks whether a barrier is needed and emits one accordingly. */
31228 if (is_armv8_sync
31229 || !(use_acquire || use_release))
31230 arm_post_atomic_barrier (model);
31231 }
31232 \f
31233 /* Return the mode for the MVE vector of predicates corresponding to MODE. */
31234 opt_machine_mode
31235 arm_mode_to_pred_mode (machine_mode mode)
31236 {
31237 switch (GET_MODE_NUNITS (mode))
31238 {
31239 case 16: return V16BImode;
31240 case 8: return V8BImode;
31241 case 4: return V4BImode;
31242 }
31243 return opt_machine_mode ();
31244 }
31245
31246 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31247 If CAN_INVERT, store either the result or its inverse in TARGET
31248 and return true if TARGET contains the inverse. If !CAN_INVERT,
31249 always store the result in TARGET, never its inverse.
31250
31251 Note that the handling of floating-point comparisons is not
31252 IEEE compliant. */
31253
31254 bool
31255 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
31256 bool can_invert)
31257 {
31258 machine_mode cmp_result_mode = GET_MODE (target);
31259 machine_mode cmp_mode = GET_MODE (op0);
31260
31261 bool inverted;
31262
31263 /* MVE supports more comparisons than Neon. */
31264 if (TARGET_HAVE_MVE)
31265 inverted = false;
31266 else
31267 switch (code)
31268 {
31269 /* For these we need to compute the inverse of the requested
31270 comparison. */
31271 case UNORDERED:
31272 case UNLT:
31273 case UNLE:
31274 case UNGT:
31275 case UNGE:
31276 case UNEQ:
31277 case NE:
31278 code = reverse_condition_maybe_unordered (code);
31279 if (!can_invert)
31280 {
31281 /* Recursively emit the inverted comparison into a temporary
31282 and then store its inverse in TARGET. This avoids reusing
31283 TARGET (which for integer NE could be one of the inputs). */
31284 rtx tmp = gen_reg_rtx (cmp_result_mode);
31285 if (arm_expand_vector_compare (tmp, code, op0, op1, true))
31286 gcc_unreachable ();
31287 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
31288 return false;
31289 }
31290 inverted = true;
31291 break;
31292
31293 default:
31294 inverted = false;
31295 break;
31296 }
31297
31298 switch (code)
31299 {
31300 /* These are natively supported by Neon for zero comparisons, but otherwise
31301 require the operands to be swapped. For MVE, we can only compare
31302 registers. */
31303 case LE:
31304 case LT:
31305 if (!TARGET_HAVE_MVE)
31306 if (op1 != CONST0_RTX (cmp_mode))
31307 {
31308 code = swap_condition (code);
31309 std::swap (op0, op1);
31310 }
31311 /* Fall through. */
31312
31313 /* These are natively supported by Neon for both register and zero
31314 operands. MVE supports registers only. */
31315 case EQ:
31316 case GE:
31317 case GT:
31318 case NE:
31319 if (TARGET_HAVE_MVE)
31320 {
31321 switch (GET_MODE_CLASS (cmp_mode))
31322 {
31323 case MODE_VECTOR_INT:
31324 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31325 op0, force_reg (cmp_mode, op1)));
31326 break;
31327 case MODE_VECTOR_FLOAT:
31328 if (TARGET_HAVE_MVE_FLOAT)
31329 emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
31330 op0, force_reg (cmp_mode, op1)));
31331 else
31332 gcc_unreachable ();
31333 break;
31334 default:
31335 gcc_unreachable ();
31336 }
31337 }
31338 else
31339 emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
31340 return inverted;
31341
31342 /* These are natively supported for register operands only.
31343 Comparisons with zero aren't useful and should be folded
31344 or canonicalized by target-independent code. */
31345 case GEU:
31346 case GTU:
31347 if (TARGET_HAVE_MVE)
31348 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31349 op0, force_reg (cmp_mode, op1)));
31350 else
31351 emit_insn (gen_neon_vc (code, cmp_mode, target,
31352 op0, force_reg (cmp_mode, op1)));
31353 return inverted;
31354
31355 /* These require the operands to be swapped and likewise do not
31356 support comparisons with zero. */
31357 case LEU:
31358 case LTU:
31359 if (TARGET_HAVE_MVE)
31360 emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
31361 force_reg (cmp_mode, op1), op0));
31362 else
31363 emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31364 target, force_reg (cmp_mode, op1), op0));
31365 return inverted;
31366
31367 /* These need a combination of two comparisons. */
31368 case LTGT:
31369 case ORDERED:
31370 {
31371 /* Operands are LTGT iff (a > b || a > b).
31372 Operands are ORDERED iff (a > b || a <= b). */
31373 rtx gt_res = gen_reg_rtx (cmp_result_mode);
31374 rtx alt_res = gen_reg_rtx (cmp_result_mode);
31375 rtx_code alt_code = (code == LTGT ? LT : LE);
31376 if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31377 || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31378 gcc_unreachable ();
31379 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31380 gt_res, alt_res)));
31381 return inverted;
31382 }
31383
31384 default:
31385 gcc_unreachable ();
31386 }
31387 }
31388
31389 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31390 CMP_RESULT_MODE is the mode of the comparison result. */
31391
31392 void
31393 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
31394 {
31395 /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31396 arm_expand_vector_compare, and another one here. */
31397 rtx mask;
31398
31399 if (TARGET_HAVE_MVE)
31400 mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
31401 else
31402 mask = gen_reg_rtx (cmp_result_mode);
31403
31404 bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
31405 operands[4], operands[5], true);
31406 if (inverted)
31407 std::swap (operands[1], operands[2]);
31408 if (TARGET_NEON)
31409 emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
31410 mask, operands[1], operands[2]));
31411 else
31412 {
31413 machine_mode cmp_mode = GET_MODE (operands[0]);
31414
31415 switch (GET_MODE_CLASS (cmp_mode))
31416 {
31417 case MODE_VECTOR_INT:
31418 emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0],
31419 operands[1], operands[2], mask));
31420 break;
31421 case MODE_VECTOR_FLOAT:
31422 if (TARGET_HAVE_MVE_FLOAT)
31423 emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0],
31424 operands[1], operands[2], mask));
31425 else
31426 gcc_unreachable ();
31427 break;
31428 default:
31429 gcc_unreachable ();
31430 }
31431 }
31432 }
31433 \f
31434 #define MAX_VECT_LEN 16
31435
31436 struct expand_vec_perm_d
31437 {
31438 rtx target, op0, op1;
31439 vec_perm_indices perm;
31440 machine_mode vmode;
31441 bool one_vector_p;
31442 bool testing_p;
31443 };
31444
31445 /* Generate a variable permutation. */
31446
31447 static void
31448 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31449 {
31450 machine_mode vmode = GET_MODE (target);
31451 bool one_vector_p = rtx_equal_p (op0, op1);
31452
31453 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31454 gcc_checking_assert (GET_MODE (op0) == vmode);
31455 gcc_checking_assert (GET_MODE (op1) == vmode);
31456 gcc_checking_assert (GET_MODE (sel) == vmode);
31457 gcc_checking_assert (TARGET_NEON);
31458
31459 if (one_vector_p)
31460 {
31461 if (vmode == V8QImode)
31462 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31463 else
31464 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31465 }
31466 else
31467 {
31468 rtx pair;
31469
31470 if (vmode == V8QImode)
31471 {
31472 pair = gen_reg_rtx (V16QImode);
31473 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31474 pair = gen_lowpart (TImode, pair);
31475 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31476 }
31477 else
31478 {
31479 pair = gen_reg_rtx (OImode);
31480 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31481 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31482 }
31483 }
31484 }
31485
31486 void
31487 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31488 {
31489 machine_mode vmode = GET_MODE (target);
31490 unsigned int nelt = GET_MODE_NUNITS (vmode);
31491 bool one_vector_p = rtx_equal_p (op0, op1);
31492 rtx mask;
31493
31494 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31495 numbering of elements for big-endian, we must reverse the order. */
31496 gcc_checking_assert (!BYTES_BIG_ENDIAN);
31497
31498 /* The VTBL instruction does not use a modulo index, so we must take care
31499 of that ourselves. */
31500 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31501 mask = gen_const_vec_duplicate (vmode, mask);
31502 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31503
31504 arm_expand_vec_perm_1 (target, op0, op1, sel);
31505 }
31506
31507 /* Map lane ordering between architectural lane order, and GCC lane order,
31508 taking into account ABI. See comment above output_move_neon for details. */
31509
31510 static int
31511 neon_endian_lane_map (machine_mode mode, int lane)
31512 {
31513 if (BYTES_BIG_ENDIAN)
31514 {
31515 int nelems = GET_MODE_NUNITS (mode);
31516 /* Reverse lane order. */
31517 lane = (nelems - 1 - lane);
31518 /* Reverse D register order, to match ABI. */
31519 if (GET_MODE_SIZE (mode) == 16)
31520 lane = lane ^ (nelems / 2);
31521 }
31522 return lane;
31523 }
31524
31525 /* Some permutations index into pairs of vectors, this is a helper function
31526 to map indexes into those pairs of vectors. */
31527
31528 static int
31529 neon_pair_endian_lane_map (machine_mode mode, int lane)
31530 {
31531 int nelem = GET_MODE_NUNITS (mode);
31532 if (BYTES_BIG_ENDIAN)
31533 lane =
31534 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31535 return lane;
31536 }
31537
31538 /* Generate or test for an insn that supports a constant permutation. */
31539
31540 /* Recognize patterns for the VUZP insns. */
31541
31542 static bool
31543 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31544 {
31545 unsigned int i, odd, mask, nelt = d->perm.length ();
31546 rtx out0, out1, in0, in1;
31547 int first_elem;
31548 int swap_nelt;
31549
31550 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31551 return false;
31552
31553 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31554 big endian pattern on 64 bit vectors, so we correct for that. */
31555 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31556 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31557
31558 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31559
31560 if (first_elem == neon_endian_lane_map (d->vmode, 0))
31561 odd = 0;
31562 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31563 odd = 1;
31564 else
31565 return false;
31566 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31567
31568 for (i = 0; i < nelt; i++)
31569 {
31570 unsigned elt =
31571 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31572 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31573 return false;
31574 }
31575
31576 /* Success! */
31577 if (d->testing_p)
31578 return true;
31579
31580 in0 = d->op0;
31581 in1 = d->op1;
31582 if (swap_nelt != 0)
31583 std::swap (in0, in1);
31584
31585 out0 = d->target;
31586 out1 = gen_reg_rtx (d->vmode);
31587 if (odd)
31588 std::swap (out0, out1);
31589
31590 emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
31591 return true;
31592 }
31593
31594 /* Recognize patterns for the VZIP insns. */
31595
31596 static bool
31597 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
31598 {
31599 unsigned int i, high, mask, nelt = d->perm.length ();
31600 rtx out0, out1, in0, in1;
31601 int first_elem;
31602 bool is_swapped;
31603
31604 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31605 return false;
31606
31607 is_swapped = BYTES_BIG_ENDIAN;
31608
31609 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
31610
31611 high = nelt / 2;
31612 if (first_elem == neon_endian_lane_map (d->vmode, high))
31613 ;
31614 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
31615 high = 0;
31616 else
31617 return false;
31618 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31619
31620 for (i = 0; i < nelt / 2; i++)
31621 {
31622 unsigned elt =
31623 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
31624 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
31625 != elt)
31626 return false;
31627 elt =
31628 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
31629 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
31630 != elt)
31631 return false;
31632 }
31633
31634 /* Success! */
31635 if (d->testing_p)
31636 return true;
31637
31638 in0 = d->op0;
31639 in1 = d->op1;
31640 if (is_swapped)
31641 std::swap (in0, in1);
31642
31643 out0 = d->target;
31644 out1 = gen_reg_rtx (d->vmode);
31645 if (high)
31646 std::swap (out0, out1);
31647
31648 emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31649 return true;
31650 }
31651
31652 /* Recognize patterns for the VREV insns. */
31653 static bool
31654 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31655 {
31656 unsigned int i, j, diff, nelt = d->perm.length ();
31657 rtx (*gen) (machine_mode, rtx, rtx);
31658
31659 if (!d->one_vector_p)
31660 return false;
31661
31662 diff = d->perm[0];
31663 switch (diff)
31664 {
31665 case 7:
31666 switch (d->vmode)
31667 {
31668 case E_V16QImode:
31669 case E_V8QImode:
31670 gen = gen_neon_vrev64;
31671 break;
31672 default:
31673 return false;
31674 }
31675 break;
31676 case 3:
31677 switch (d->vmode)
31678 {
31679 case E_V16QImode:
31680 case E_V8QImode:
31681 gen = gen_neon_vrev32;
31682 break;
31683 case E_V8HImode:
31684 case E_V4HImode:
31685 case E_V8HFmode:
31686 case E_V4HFmode:
31687 gen = gen_neon_vrev64;
31688 break;
31689 default:
31690 return false;
31691 }
31692 break;
31693 case 1:
31694 switch (d->vmode)
31695 {
31696 case E_V16QImode:
31697 case E_V8QImode:
31698 gen = gen_neon_vrev16;
31699 break;
31700 case E_V8HImode:
31701 case E_V4HImode:
31702 gen = gen_neon_vrev32;
31703 break;
31704 case E_V4SImode:
31705 case E_V2SImode:
31706 case E_V4SFmode:
31707 case E_V2SFmode:
31708 gen = gen_neon_vrev64;
31709 break;
31710 default:
31711 return false;
31712 }
31713 break;
31714 default:
31715 return false;
31716 }
31717
31718 for (i = 0; i < nelt ; i += diff + 1)
31719 for (j = 0; j <= diff; j += 1)
31720 {
31721 /* This is guaranteed to be true as the value of diff
31722 is 7, 3, 1 and we should have enough elements in the
31723 queue to generate this. Getting a vector mask with a
31724 value of diff other than these values implies that
31725 something is wrong by the time we get here. */
31726 gcc_assert (i + j < nelt);
31727 if (d->perm[i + j] != i + diff - j)
31728 return false;
31729 }
31730
31731 /* Success! */
31732 if (d->testing_p)
31733 return true;
31734
31735 emit_insn (gen (d->vmode, d->target, d->op0));
31736 return true;
31737 }
31738
31739 /* Recognize patterns for the VTRN insns. */
31740
31741 static bool
31742 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31743 {
31744 unsigned int i, odd, mask, nelt = d->perm.length ();
31745 rtx out0, out1, in0, in1;
31746
31747 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31748 return false;
31749
31750 /* Note that these are little-endian tests. Adjust for big-endian later. */
31751 if (d->perm[0] == 0)
31752 odd = 0;
31753 else if (d->perm[0] == 1)
31754 odd = 1;
31755 else
31756 return false;
31757 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31758
31759 for (i = 0; i < nelt; i += 2)
31760 {
31761 if (d->perm[i] != i + odd)
31762 return false;
31763 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31764 return false;
31765 }
31766
31767 /* Success! */
31768 if (d->testing_p)
31769 return true;
31770
31771 in0 = d->op0;
31772 in1 = d->op1;
31773 if (BYTES_BIG_ENDIAN)
31774 {
31775 std::swap (in0, in1);
31776 odd = !odd;
31777 }
31778
31779 out0 = d->target;
31780 out1 = gen_reg_rtx (d->vmode);
31781 if (odd)
31782 std::swap (out0, out1);
31783
31784 emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
31785 return true;
31786 }
31787
31788 /* Recognize patterns for the VEXT insns. */
31789
31790 static bool
31791 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31792 {
31793 unsigned int i, nelt = d->perm.length ();
31794 rtx offset;
31795
31796 unsigned int location;
31797
31798 unsigned int next = d->perm[0] + 1;
31799
31800 /* TODO: Handle GCC's numbering of elements for big-endian. */
31801 if (BYTES_BIG_ENDIAN)
31802 return false;
31803
31804 /* Check if the extracted indexes are increasing by one. */
31805 for (i = 1; i < nelt; next++, i++)
31806 {
31807 /* If we hit the most significant element of the 2nd vector in
31808 the previous iteration, no need to test further. */
31809 if (next == 2 * nelt)
31810 return false;
31811
31812 /* If we are operating on only one vector: it could be a
31813 rotation. If there are only two elements of size < 64, let
31814 arm_evpc_neon_vrev catch it. */
31815 if (d->one_vector_p && (next == nelt))
31816 {
31817 if ((nelt == 2) && (d->vmode != V2DImode))
31818 return false;
31819 else
31820 next = 0;
31821 }
31822
31823 if (d->perm[i] != next)
31824 return false;
31825 }
31826
31827 location = d->perm[0];
31828
31829 /* Success! */
31830 if (d->testing_p)
31831 return true;
31832
31833 offset = GEN_INT (location);
31834
31835 if(d->vmode == E_DImode)
31836 return false;
31837
31838 emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
31839 return true;
31840 }
31841
31842 /* The NEON VTBL instruction is a fully variable permuation that's even
31843 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31844 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31845 can do slightly better by expanding this as a constant where we don't
31846 have to apply a mask. */
31847
31848 static bool
31849 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31850 {
31851 rtx rperm[MAX_VECT_LEN], sel;
31852 machine_mode vmode = d->vmode;
31853 unsigned int i, nelt = d->perm.length ();
31854
31855 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31856 numbering of elements for big-endian, we must reverse the order. */
31857 if (BYTES_BIG_ENDIAN)
31858 return false;
31859
31860 if (d->testing_p)
31861 return true;
31862
31863 /* Generic code will try constant permutation twice. Once with the
31864 original mode and again with the elements lowered to QImode.
31865 So wait and don't do the selector expansion ourselves. */
31866 if (vmode != V8QImode && vmode != V16QImode)
31867 return false;
31868
31869 for (i = 0; i < nelt; ++i)
31870 rperm[i] = GEN_INT (d->perm[i]);
31871 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31872 sel = force_reg (vmode, sel);
31873
31874 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31875 return true;
31876 }
31877
31878 static bool
31879 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31880 {
31881 /* Check if the input mask matches vext before reordering the
31882 operands. */
31883 if (TARGET_NEON)
31884 if (arm_evpc_neon_vext (d))
31885 return true;
31886
31887 /* The pattern matching functions above are written to look for a small
31888 number to begin the sequence (0, 1, N/2). If we begin with an index
31889 from the second operand, we can swap the operands. */
31890 unsigned int nelt = d->perm.length ();
31891 if (d->perm[0] >= nelt)
31892 {
31893 d->perm.rotate_inputs (1);
31894 std::swap (d->op0, d->op1);
31895 }
31896
31897 if (TARGET_NEON)
31898 {
31899 if (arm_evpc_neon_vuzp (d))
31900 return true;
31901 if (arm_evpc_neon_vzip (d))
31902 return true;
31903 if (arm_evpc_neon_vrev (d))
31904 return true;
31905 if (arm_evpc_neon_vtrn (d))
31906 return true;
31907 return arm_evpc_neon_vtbl (d);
31908 }
31909 return false;
31910 }
31911
31912 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
31913
31914 static bool
31915 arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
31916 rtx target, rtx op0, rtx op1,
31917 const vec_perm_indices &sel)
31918 {
31919 if (vmode != op_mode)
31920 return false;
31921
31922 struct expand_vec_perm_d d;
31923 int i, nelt, which;
31924
31925 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
31926 return false;
31927
31928 d.target = target;
31929 if (op0)
31930 {
31931 rtx nop0 = force_reg (vmode, op0);
31932 if (op0 == op1)
31933 op1 = nop0;
31934 op0 = nop0;
31935 }
31936 if (op1)
31937 op1 = force_reg (vmode, op1);
31938 d.op0 = op0;
31939 d.op1 = op1;
31940
31941 d.vmode = vmode;
31942 gcc_assert (VECTOR_MODE_P (d.vmode));
31943 d.testing_p = !target;
31944
31945 nelt = GET_MODE_NUNITS (d.vmode);
31946 for (i = which = 0; i < nelt; ++i)
31947 {
31948 int ei = sel[i] & (2 * nelt - 1);
31949 which |= (ei < nelt ? 1 : 2);
31950 }
31951
31952 switch (which)
31953 {
31954 default:
31955 gcc_unreachable();
31956
31957 case 3:
31958 d.one_vector_p = false;
31959 if (d.testing_p || !rtx_equal_p (op0, op1))
31960 break;
31961
31962 /* The elements of PERM do not suggest that only the first operand
31963 is used, but both operands are identical. Allow easier matching
31964 of the permutation by folding the permutation into the single
31965 input vector. */
31966 /* FALLTHRU */
31967 case 2:
31968 d.op0 = op1;
31969 d.one_vector_p = true;
31970 break;
31971
31972 case 1:
31973 d.op1 = op0;
31974 d.one_vector_p = true;
31975 break;
31976 }
31977
31978 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
31979
31980 if (!d.testing_p)
31981 return arm_expand_vec_perm_const_1 (&d);
31982
31983 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31984 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31985 if (!d.one_vector_p)
31986 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31987
31988 start_sequence ();
31989 bool ret = arm_expand_vec_perm_const_1 (&d);
31990 end_sequence ();
31991
31992 return ret;
31993 }
31994
31995 bool
31996 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
31997 {
31998 /* If we are soft float and we do not have ldrd
31999 then all auto increment forms are ok. */
32000 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
32001 return true;
32002
32003 switch (code)
32004 {
32005 /* Post increment and Pre Decrement are supported for all
32006 instruction forms except for vector forms. */
32007 case ARM_POST_INC:
32008 case ARM_PRE_DEC:
32009 if (VECTOR_MODE_P (mode))
32010 {
32011 if (code != ARM_PRE_DEC)
32012 return true;
32013 else
32014 return false;
32015 }
32016
32017 return true;
32018
32019 case ARM_POST_DEC:
32020 case ARM_PRE_INC:
32021 /* Without LDRD and mode size greater than
32022 word size, there is no point in auto-incrementing
32023 because ldm and stm will not have these forms. */
32024 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
32025 return false;
32026
32027 /* Vector and floating point modes do not support
32028 these auto increment forms. */
32029 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
32030 return false;
32031
32032 return true;
32033
32034 default:
32035 return false;
32036
32037 }
32038
32039 return false;
32040 }
32041
32042 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32043 on ARM, since we know that shifts by negative amounts are no-ops.
32044 Additionally, the default expansion code is not available or suitable
32045 for post-reload insn splits (this can occur when the register allocator
32046 chooses not to do a shift in NEON).
32047
32048 This function is used in both initial expand and post-reload splits, and
32049 handles all kinds of 64-bit shifts.
32050
32051 Input requirements:
32052 - It is safe for the input and output to be the same register, but
32053 early-clobber rules apply for the shift amount and scratch registers.
32054 - Shift by register requires both scratch registers. In all other cases
32055 the scratch registers may be NULL.
32056 - Ashiftrt by a register also clobbers the CC register. */
32057 void
32058 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
32059 rtx amount, rtx scratch1, rtx scratch2)
32060 {
32061 rtx out_high = gen_highpart (SImode, out);
32062 rtx out_low = gen_lowpart (SImode, out);
32063 rtx in_high = gen_highpart (SImode, in);
32064 rtx in_low = gen_lowpart (SImode, in);
32065
32066 /* Terminology:
32067 in = the register pair containing the input value.
32068 out = the destination register pair.
32069 up = the high- or low-part of each pair.
32070 down = the opposite part to "up".
32071 In a shift, we can consider bits to shift from "up"-stream to
32072 "down"-stream, so in a left-shift "up" is the low-part and "down"
32073 is the high-part of each register pair. */
32074
32075 rtx out_up = code == ASHIFT ? out_low : out_high;
32076 rtx out_down = code == ASHIFT ? out_high : out_low;
32077 rtx in_up = code == ASHIFT ? in_low : in_high;
32078 rtx in_down = code == ASHIFT ? in_high : in_low;
32079
32080 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
32081 gcc_assert (out
32082 && (REG_P (out) || SUBREG_P (out))
32083 && GET_MODE (out) == DImode);
32084 gcc_assert (in
32085 && (REG_P (in) || SUBREG_P (in))
32086 && GET_MODE (in) == DImode);
32087 gcc_assert (amount
32088 && (((REG_P (amount) || SUBREG_P (amount))
32089 && GET_MODE (amount) == SImode)
32090 || CONST_INT_P (amount)));
32091 gcc_assert (scratch1 == NULL
32092 || (GET_CODE (scratch1) == SCRATCH)
32093 || (GET_MODE (scratch1) == SImode
32094 && REG_P (scratch1)));
32095 gcc_assert (scratch2 == NULL
32096 || (GET_CODE (scratch2) == SCRATCH)
32097 || (GET_MODE (scratch2) == SImode
32098 && REG_P (scratch2)));
32099 gcc_assert (!REG_P (out) || !REG_P (amount)
32100 || !HARD_REGISTER_P (out)
32101 || (REGNO (out) != REGNO (amount)
32102 && REGNO (out) + 1 != REGNO (amount)));
32103
32104 /* Macros to make following code more readable. */
32105 #define SUB_32(DEST,SRC) \
32106 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32107 #define RSB_32(DEST,SRC) \
32108 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32109 #define SUB_S_32(DEST,SRC) \
32110 gen_addsi3_compare0 ((DEST), (SRC), \
32111 GEN_INT (-32))
32112 #define SET(DEST,SRC) \
32113 gen_rtx_SET ((DEST), (SRC))
32114 #define SHIFT(CODE,SRC,AMOUNT) \
32115 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32116 #define LSHIFT(CODE,SRC,AMOUNT) \
32117 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32118 SImode, (SRC), (AMOUNT))
32119 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32120 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32121 SImode, (SRC), (AMOUNT))
32122 #define ORR(A,B) \
32123 gen_rtx_IOR (SImode, (A), (B))
32124 #define BRANCH(COND,LABEL) \
32125 gen_arm_cond_branch ((LABEL), \
32126 gen_rtx_ ## COND (CCmode, cc_reg, \
32127 const0_rtx), \
32128 cc_reg)
32129
32130 /* Shifts by register and shifts by constant are handled separately. */
32131 if (CONST_INT_P (amount))
32132 {
32133 /* We have a shift-by-constant. */
32134
32135 /* First, handle out-of-range shift amounts.
32136 In both cases we try to match the result an ARM instruction in a
32137 shift-by-register would give. This helps reduce execution
32138 differences between optimization levels, but it won't stop other
32139 parts of the compiler doing different things. This is "undefined
32140 behavior, in any case. */
32141 if (INTVAL (amount) <= 0)
32142 emit_insn (gen_movdi (out, in));
32143 else if (INTVAL (amount) >= 64)
32144 {
32145 if (code == ASHIFTRT)
32146 {
32147 rtx const31_rtx = GEN_INT (31);
32148 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
32149 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
32150 }
32151 else
32152 emit_insn (gen_movdi (out, const0_rtx));
32153 }
32154
32155 /* Now handle valid shifts. */
32156 else if (INTVAL (amount) < 32)
32157 {
32158 /* Shifts by a constant less than 32. */
32159 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
32160
32161 /* Clearing the out register in DImode first avoids lots
32162 of spilling and results in less stack usage.
32163 Later this redundant insn is completely removed.
32164 Do that only if "in" and "out" are different registers. */
32165 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32166 emit_insn (SET (out, const0_rtx));
32167 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32168 emit_insn (SET (out_down,
32169 ORR (REV_LSHIFT (code, in_up, reverse_amount),
32170 out_down)));
32171 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32172 }
32173 else
32174 {
32175 /* Shifts by a constant greater than 31. */
32176 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
32177
32178 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32179 emit_insn (SET (out, const0_rtx));
32180 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
32181 if (code == ASHIFTRT)
32182 emit_insn (gen_ashrsi3 (out_up, in_up,
32183 GEN_INT (31)));
32184 else
32185 emit_insn (SET (out_up, const0_rtx));
32186 }
32187 }
32188 else
32189 {
32190 /* We have a shift-by-register. */
32191 rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
32192
32193 /* This alternative requires the scratch registers. */
32194 gcc_assert (scratch1 && REG_P (scratch1));
32195 gcc_assert (scratch2 && REG_P (scratch2));
32196
32197 /* We will need the values "amount-32" and "32-amount" later.
32198 Swapping them around now allows the later code to be more general. */
32199 switch (code)
32200 {
32201 case ASHIFT:
32202 emit_insn (SUB_32 (scratch1, amount));
32203 emit_insn (RSB_32 (scratch2, amount));
32204 break;
32205 case ASHIFTRT:
32206 emit_insn (RSB_32 (scratch1, amount));
32207 /* Also set CC = amount > 32. */
32208 emit_insn (SUB_S_32 (scratch2, amount));
32209 break;
32210 case LSHIFTRT:
32211 emit_insn (RSB_32 (scratch1, amount));
32212 emit_insn (SUB_32 (scratch2, amount));
32213 break;
32214 default:
32215 gcc_unreachable ();
32216 }
32217
32218 /* Emit code like this:
32219
32220 arithmetic-left:
32221 out_down = in_down << amount;
32222 out_down = (in_up << (amount - 32)) | out_down;
32223 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32224 out_up = in_up << amount;
32225
32226 arithmetic-right:
32227 out_down = in_down >> amount;
32228 out_down = (in_up << (32 - amount)) | out_down;
32229 if (amount < 32)
32230 out_down = ((signed)in_up >> (amount - 32)) | out_down;
32231 out_up = in_up << amount;
32232
32233 logical-right:
32234 out_down = in_down >> amount;
32235 out_down = (in_up << (32 - amount)) | out_down;
32236 if (amount < 32)
32237 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32238 out_up = in_up << amount;
32239
32240 The ARM and Thumb2 variants are the same but implemented slightly
32241 differently. If this were only called during expand we could just
32242 use the Thumb2 case and let combine do the right thing, but this
32243 can also be called from post-reload splitters. */
32244
32245 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32246
32247 if (!TARGET_THUMB2)
32248 {
32249 /* Emit code for ARM mode. */
32250 emit_insn (SET (out_down,
32251 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
32252 if (code == ASHIFTRT)
32253 {
32254 rtx_code_label *done_label = gen_label_rtx ();
32255 emit_jump_insn (BRANCH (LT, done_label));
32256 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
32257 out_down)));
32258 emit_label (done_label);
32259 }
32260 else
32261 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
32262 out_down)));
32263 }
32264 else
32265 {
32266 /* Emit code for Thumb2 mode.
32267 Thumb2 can't do shift and or in one insn. */
32268 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
32269 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
32270
32271 if (code == ASHIFTRT)
32272 {
32273 rtx_code_label *done_label = gen_label_rtx ();
32274 emit_jump_insn (BRANCH (LT, done_label));
32275 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
32276 emit_insn (SET (out_down, ORR (out_down, scratch2)));
32277 emit_label (done_label);
32278 }
32279 else
32280 {
32281 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
32282 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
32283 }
32284 }
32285
32286 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32287 }
32288
32289 #undef SUB_32
32290 #undef RSB_32
32291 #undef SUB_S_32
32292 #undef SET
32293 #undef SHIFT
32294 #undef LSHIFT
32295 #undef REV_LSHIFT
32296 #undef ORR
32297 #undef BRANCH
32298 }
32299
32300 /* Returns true if the pattern is a valid symbolic address, which is either a
32301 symbol_ref or (symbol_ref + addend).
32302
32303 According to the ARM ELF ABI, the initial addend of REL-type relocations
32304 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32305 literal field of the instruction as a 16-bit signed value in the range
32306 -32768 <= A < 32768.
32307
32308 In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32309 unsigned range of 0 <= A < 256 as described in the AAELF32
32310 relocation handling documentation: REL-type relocations are encoded
32311 as unsigned in this case. */
32312
32313 bool
32314 arm_valid_symbolic_address_p (rtx addr)
32315 {
32316 rtx xop0, xop1 = NULL_RTX;
32317 rtx tmp = addr;
32318
32319 if (target_word_relocations)
32320 return false;
32321
32322 if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
32323 return true;
32324
32325 /* (const (plus: symbol_ref const_int)) */
32326 if (GET_CODE (addr) == CONST)
32327 tmp = XEXP (addr, 0);
32328
32329 if (GET_CODE (tmp) == PLUS)
32330 {
32331 xop0 = XEXP (tmp, 0);
32332 xop1 = XEXP (tmp, 1);
32333
32334 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
32335 {
32336 if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
32337 return IN_RANGE (INTVAL (xop1), 0, 0xff);
32338 else
32339 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
32340 }
32341 }
32342
32343 return false;
32344 }
32345
32346 /* Returns true if a valid comparison operation and makes
32347 the operands in a form that is valid. */
32348 bool
32349 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
32350 {
32351 enum rtx_code code = GET_CODE (*comparison);
32352 int code_int;
32353 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
32354 ? GET_MODE (*op2) : GET_MODE (*op1);
32355
32356 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
32357
32358 if (code == UNEQ || code == LTGT)
32359 return false;
32360
32361 code_int = (int)code;
32362 arm_canonicalize_comparison (&code_int, op1, op2, 0);
32363 PUT_CODE (*comparison, (enum rtx_code)code_int);
32364
32365 switch (mode)
32366 {
32367 case E_SImode:
32368 if (!arm_add_operand (*op1, mode))
32369 *op1 = force_reg (mode, *op1);
32370 if (!arm_add_operand (*op2, mode))
32371 *op2 = force_reg (mode, *op2);
32372 return true;
32373
32374 case E_DImode:
32375 /* gen_compare_reg() will sort out any invalid operands. */
32376 return true;
32377
32378 case E_HFmode:
32379 if (!TARGET_VFP_FP16INST)
32380 break;
32381 /* FP16 comparisons are done in SF mode. */
32382 mode = SFmode;
32383 *op1 = convert_to_mode (mode, *op1, 1);
32384 *op2 = convert_to_mode (mode, *op2, 1);
32385 /* Fall through. */
32386 case E_SFmode:
32387 case E_DFmode:
32388 if (!vfp_compare_operand (*op1, mode))
32389 *op1 = force_reg (mode, *op1);
32390 if (!vfp_compare_operand (*op2, mode))
32391 *op2 = force_reg (mode, *op2);
32392 return true;
32393 default:
32394 break;
32395 }
32396
32397 return false;
32398
32399 }
32400
32401 /* Maximum number of instructions to set block of memory. */
32402 static int
32403 arm_block_set_max_insns (void)
32404 {
32405 if (optimize_function_for_size_p (cfun))
32406 return 4;
32407 else
32408 return current_tune->max_insns_inline_memset;
32409 }
32410
32411 /* Return TRUE if it's profitable to set block of memory for
32412 non-vectorized case. VAL is the value to set the memory
32413 with. LENGTH is the number of bytes to set. ALIGN is the
32414 alignment of the destination memory in bytes. UNALIGNED_P
32415 is TRUE if we can only set the memory with instructions
32416 meeting alignment requirements. USE_STRD_P is TRUE if we
32417 can use strd to set the memory. */
32418 static bool
32419 arm_block_set_non_vect_profit_p (rtx val,
32420 unsigned HOST_WIDE_INT length,
32421 unsigned HOST_WIDE_INT align,
32422 bool unaligned_p, bool use_strd_p)
32423 {
32424 int num = 0;
32425 /* For leftovers in bytes of 0-7, we can set the memory block using
32426 strb/strh/str with minimum instruction number. */
32427 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32428
32429 if (unaligned_p)
32430 {
32431 num = arm_const_inline_cost (SET, val);
32432 num += length / align + length % align;
32433 }
32434 else if (use_strd_p)
32435 {
32436 num = arm_const_double_inline_cost (val);
32437 num += (length >> 3) + leftover[length & 7];
32438 }
32439 else
32440 {
32441 num = arm_const_inline_cost (SET, val);
32442 num += (length >> 2) + leftover[length & 3];
32443 }
32444
32445 /* We may be able to combine last pair STRH/STRB into a single STR
32446 by shifting one byte back. */
32447 if (unaligned_access && length > 3 && (length & 3) == 3)
32448 num--;
32449
32450 return (num <= arm_block_set_max_insns ());
32451 }
32452
32453 /* Return TRUE if it's profitable to set block of memory for
32454 vectorized case. LENGTH is the number of bytes to set.
32455 ALIGN is the alignment of destination memory in bytes.
32456 MODE is the vector mode used to set the memory. */
32457 static bool
32458 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32459 unsigned HOST_WIDE_INT align,
32460 machine_mode mode)
32461 {
32462 int num;
32463 bool unaligned_p = ((align & 3) != 0);
32464 unsigned int nelt = GET_MODE_NUNITS (mode);
32465
32466 /* Instruction loading constant value. */
32467 num = 1;
32468 /* Instructions storing the memory. */
32469 num += (length + nelt - 1) / nelt;
32470 /* Instructions adjusting the address expression. Only need to
32471 adjust address expression if it's 4 bytes aligned and bytes
32472 leftover can only be stored by mis-aligned store instruction. */
32473 if (!unaligned_p && (length & 3) != 0)
32474 num++;
32475
32476 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
32477 if (!unaligned_p && mode == V16QImode)
32478 num--;
32479
32480 return (num <= arm_block_set_max_insns ());
32481 }
32482
32483 /* Set a block of memory using vectorization instructions for the
32484 unaligned case. We fill the first LENGTH bytes of the memory
32485 area starting from DSTBASE with byte constant VALUE. ALIGN is
32486 the alignment requirement of memory. Return TRUE if succeeded. */
32487 static bool
32488 arm_block_set_unaligned_vect (rtx dstbase,
32489 unsigned HOST_WIDE_INT length,
32490 unsigned HOST_WIDE_INT value,
32491 unsigned HOST_WIDE_INT align)
32492 {
32493 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32494 rtx dst, mem;
32495 rtx val_vec, reg;
32496 rtx (*gen_func) (rtx, rtx);
32497 machine_mode mode;
32498 unsigned HOST_WIDE_INT v = value;
32499 unsigned int offset = 0;
32500 gcc_assert ((align & 0x3) != 0);
32501 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32502 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32503 if (length >= nelt_v16)
32504 {
32505 mode = V16QImode;
32506 gen_func = gen_movmisalignv16qi;
32507 }
32508 else
32509 {
32510 mode = V8QImode;
32511 gen_func = gen_movmisalignv8qi;
32512 }
32513 nelt_mode = GET_MODE_NUNITS (mode);
32514 gcc_assert (length >= nelt_mode);
32515 /* Skip if it isn't profitable. */
32516 if (!arm_block_set_vect_profit_p (length, align, mode))
32517 return false;
32518
32519 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32520 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32521
32522 v = sext_hwi (v, BITS_PER_WORD);
32523
32524 reg = gen_reg_rtx (mode);
32525 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32526 /* Emit instruction loading the constant value. */
32527 emit_move_insn (reg, val_vec);
32528
32529 /* Handle nelt_mode bytes in a vector. */
32530 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32531 {
32532 emit_insn ((*gen_func) (mem, reg));
32533 if (i + 2 * nelt_mode <= length)
32534 {
32535 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32536 offset += nelt_mode;
32537 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32538 }
32539 }
32540
32541 /* If there are not less than nelt_v8 bytes leftover, we must be in
32542 V16QI mode. */
32543 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32544
32545 /* Handle (8, 16) bytes leftover. */
32546 if (i + nelt_v8 < length)
32547 {
32548 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32549 offset += length - i;
32550 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32551
32552 /* We are shifting bytes back, set the alignment accordingly. */
32553 if ((length & 1) != 0 && align >= 2)
32554 set_mem_align (mem, BITS_PER_UNIT);
32555
32556 emit_insn (gen_movmisalignv16qi (mem, reg));
32557 }
32558 /* Handle (0, 8] bytes leftover. */
32559 else if (i < length && i + nelt_v8 >= length)
32560 {
32561 if (mode == V16QImode)
32562 reg = gen_lowpart (V8QImode, reg);
32563
32564 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32565 + (nelt_mode - nelt_v8))));
32566 offset += (length - i) + (nelt_mode - nelt_v8);
32567 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32568
32569 /* We are shifting bytes back, set the alignment accordingly. */
32570 if ((length & 1) != 0 && align >= 2)
32571 set_mem_align (mem, BITS_PER_UNIT);
32572
32573 emit_insn (gen_movmisalignv8qi (mem, reg));
32574 }
32575
32576 return true;
32577 }
32578
32579 /* Set a block of memory using vectorization instructions for the
32580 aligned case. We fill the first LENGTH bytes of the memory area
32581 starting from DSTBASE with byte constant VALUE. ALIGN is the
32582 alignment requirement of memory. Return TRUE if succeeded. */
32583 static bool
32584 arm_block_set_aligned_vect (rtx dstbase,
32585 unsigned HOST_WIDE_INT length,
32586 unsigned HOST_WIDE_INT value,
32587 unsigned HOST_WIDE_INT align)
32588 {
32589 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
32590 rtx dst, addr, mem;
32591 rtx val_vec, reg;
32592 machine_mode mode;
32593 unsigned int offset = 0;
32594
32595 gcc_assert ((align & 0x3) == 0);
32596 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32597 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32598 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
32599 mode = V16QImode;
32600 else
32601 mode = V8QImode;
32602
32603 nelt_mode = GET_MODE_NUNITS (mode);
32604 gcc_assert (length >= nelt_mode);
32605 /* Skip if it isn't profitable. */
32606 if (!arm_block_set_vect_profit_p (length, align, mode))
32607 return false;
32608
32609 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32610
32611 reg = gen_reg_rtx (mode);
32612 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
32613 /* Emit instruction loading the constant value. */
32614 emit_move_insn (reg, val_vec);
32615
32616 i = 0;
32617 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
32618 if (mode == V16QImode)
32619 {
32620 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32621 emit_insn (gen_movmisalignv16qi (mem, reg));
32622 i += nelt_mode;
32623 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
32624 if (i + nelt_v8 < length && i + nelt_v16 > length)
32625 {
32626 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32627 offset += length - nelt_mode;
32628 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32629 /* We are shifting bytes back, set the alignment accordingly. */
32630 if ((length & 0x3) == 0)
32631 set_mem_align (mem, BITS_PER_UNIT * 4);
32632 else if ((length & 0x1) == 0)
32633 set_mem_align (mem, BITS_PER_UNIT * 2);
32634 else
32635 set_mem_align (mem, BITS_PER_UNIT);
32636
32637 emit_insn (gen_movmisalignv16qi (mem, reg));
32638 return true;
32639 }
32640 /* Fall through for bytes leftover. */
32641 mode = V8QImode;
32642 nelt_mode = GET_MODE_NUNITS (mode);
32643 reg = gen_lowpart (V8QImode, reg);
32644 }
32645
32646 /* Handle 8 bytes in a vector. */
32647 for (; (i + nelt_mode <= length); i += nelt_mode)
32648 {
32649 addr = plus_constant (Pmode, dst, i);
32650 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
32651 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32652 emit_move_insn (mem, reg);
32653 else
32654 emit_insn (gen_unaligned_storev8qi (mem, reg));
32655 }
32656
32657 /* Handle single word leftover by shifting 4 bytes back. We can
32658 use aligned access for this case. */
32659 if (i + UNITS_PER_WORD == length)
32660 {
32661 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32662 offset += i - UNITS_PER_WORD;
32663 mem = adjust_automodify_address (dstbase, mode, addr, offset);
32664 /* We are shifting 4 bytes back, set the alignment accordingly. */
32665 if (align > UNITS_PER_WORD)
32666 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32667
32668 emit_insn (gen_unaligned_storev8qi (mem, reg));
32669 }
32670 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32671 We have to use unaligned access for this case. */
32672 else if (i < length)
32673 {
32674 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32675 offset += length - nelt_mode;
32676 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32677 /* We are shifting bytes back, set the alignment accordingly. */
32678 if ((length & 1) == 0)
32679 set_mem_align (mem, BITS_PER_UNIT * 2);
32680 else
32681 set_mem_align (mem, BITS_PER_UNIT);
32682
32683 emit_insn (gen_movmisalignv8qi (mem, reg));
32684 }
32685
32686 return true;
32687 }
32688
32689 /* Set a block of memory using plain strh/strb instructions, only
32690 using instructions allowed by ALIGN on processor. We fill the
32691 first LENGTH bytes of the memory area starting from DSTBASE
32692 with byte constant VALUE. ALIGN is the alignment requirement
32693 of memory. */
32694 static bool
32695 arm_block_set_unaligned_non_vect (rtx dstbase,
32696 unsigned HOST_WIDE_INT length,
32697 unsigned HOST_WIDE_INT value,
32698 unsigned HOST_WIDE_INT align)
32699 {
32700 unsigned int i;
32701 rtx dst, addr, mem;
32702 rtx val_exp, val_reg, reg;
32703 machine_mode mode;
32704 HOST_WIDE_INT v = value;
32705
32706 gcc_assert (align == 1 || align == 2);
32707
32708 if (align == 2)
32709 v |= (value << BITS_PER_UNIT);
32710
32711 v = sext_hwi (v, BITS_PER_WORD);
32712 val_exp = GEN_INT (v);
32713 /* Skip if it isn't profitable. */
32714 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32715 align, true, false))
32716 return false;
32717
32718 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32719 mode = (align == 2 ? HImode : QImode);
32720 val_reg = force_reg (SImode, val_exp);
32721 reg = gen_lowpart (mode, val_reg);
32722
32723 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
32724 {
32725 addr = plus_constant (Pmode, dst, i);
32726 mem = adjust_automodify_address (dstbase, mode, addr, i);
32727 emit_move_insn (mem, reg);
32728 }
32729
32730 /* Handle single byte leftover. */
32731 if (i + 1 == length)
32732 {
32733 reg = gen_lowpart (QImode, val_reg);
32734 addr = plus_constant (Pmode, dst, i);
32735 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32736 emit_move_insn (mem, reg);
32737 i++;
32738 }
32739
32740 gcc_assert (i == length);
32741 return true;
32742 }
32743
32744 /* Set a block of memory using plain strd/str/strh/strb instructions,
32745 to permit unaligned copies on processors which support unaligned
32746 semantics for those instructions. We fill the first LENGTH bytes
32747 of the memory area starting from DSTBASE with byte constant VALUE.
32748 ALIGN is the alignment requirement of memory. */
32749 static bool
32750 arm_block_set_aligned_non_vect (rtx dstbase,
32751 unsigned HOST_WIDE_INT length,
32752 unsigned HOST_WIDE_INT value,
32753 unsigned HOST_WIDE_INT align)
32754 {
32755 unsigned int i;
32756 rtx dst, addr, mem;
32757 rtx val_exp, val_reg, reg;
32758 unsigned HOST_WIDE_INT v;
32759 bool use_strd_p;
32760
32761 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32762 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32763
32764 v = (value | (value << 8) | (value << 16) | (value << 24));
32765 if (length < UNITS_PER_WORD)
32766 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32767
32768 if (use_strd_p)
32769 v |= (v << BITS_PER_WORD);
32770 else
32771 v = sext_hwi (v, BITS_PER_WORD);
32772
32773 val_exp = GEN_INT (v);
32774 /* Skip if it isn't profitable. */
32775 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32776 align, false, use_strd_p))
32777 {
32778 if (!use_strd_p)
32779 return false;
32780
32781 /* Try without strd. */
32782 v = (v >> BITS_PER_WORD);
32783 v = sext_hwi (v, BITS_PER_WORD);
32784 val_exp = GEN_INT (v);
32785 use_strd_p = false;
32786 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32787 align, false, use_strd_p))
32788 return false;
32789 }
32790
32791 i = 0;
32792 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32793 /* Handle double words using strd if possible. */
32794 if (use_strd_p)
32795 {
32796 val_reg = force_reg (DImode, val_exp);
32797 reg = val_reg;
32798 for (; (i + 8 <= length); i += 8)
32799 {
32800 addr = plus_constant (Pmode, dst, i);
32801 mem = adjust_automodify_address (dstbase, DImode, addr, i);
32802 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32803 emit_move_insn (mem, reg);
32804 else
32805 emit_insn (gen_unaligned_storedi (mem, reg));
32806 }
32807 }
32808 else
32809 val_reg = force_reg (SImode, val_exp);
32810
32811 /* Handle words. */
32812 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32813 for (; (i + 4 <= length); i += 4)
32814 {
32815 addr = plus_constant (Pmode, dst, i);
32816 mem = adjust_automodify_address (dstbase, SImode, addr, i);
32817 if ((align & 3) == 0)
32818 emit_move_insn (mem, reg);
32819 else
32820 emit_insn (gen_unaligned_storesi (mem, reg));
32821 }
32822
32823 /* Merge last pair of STRH and STRB into a STR if possible. */
32824 if (unaligned_access && i > 0 && (i + 3) == length)
32825 {
32826 addr = plus_constant (Pmode, dst, i - 1);
32827 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32828 /* We are shifting one byte back, set the alignment accordingly. */
32829 if ((align & 1) == 0)
32830 set_mem_align (mem, BITS_PER_UNIT);
32831
32832 /* Most likely this is an unaligned access, and we can't tell at
32833 compilation time. */
32834 emit_insn (gen_unaligned_storesi (mem, reg));
32835 return true;
32836 }
32837
32838 /* Handle half word leftover. */
32839 if (i + 2 <= length)
32840 {
32841 reg = gen_lowpart (HImode, val_reg);
32842 addr = plus_constant (Pmode, dst, i);
32843 mem = adjust_automodify_address (dstbase, HImode, addr, i);
32844 if ((align & 1) == 0)
32845 emit_move_insn (mem, reg);
32846 else
32847 emit_insn (gen_unaligned_storehi (mem, reg));
32848
32849 i += 2;
32850 }
32851
32852 /* Handle single byte leftover. */
32853 if (i + 1 == length)
32854 {
32855 reg = gen_lowpart (QImode, val_reg);
32856 addr = plus_constant (Pmode, dst, i);
32857 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32858 emit_move_insn (mem, reg);
32859 }
32860
32861 return true;
32862 }
32863
32864 /* Set a block of memory using vectorization instructions for both
32865 aligned and unaligned cases. We fill the first LENGTH bytes of
32866 the memory area starting from DSTBASE with byte constant VALUE.
32867 ALIGN is the alignment requirement of memory. */
32868 static bool
32869 arm_block_set_vect (rtx dstbase,
32870 unsigned HOST_WIDE_INT length,
32871 unsigned HOST_WIDE_INT value,
32872 unsigned HOST_WIDE_INT align)
32873 {
32874 /* Check whether we need to use unaligned store instruction. */
32875 if (((align & 3) != 0 || (length & 3) != 0)
32876 /* Check whether unaligned store instruction is available. */
32877 && (!unaligned_access || BYTES_BIG_ENDIAN))
32878 return false;
32879
32880 if ((align & 3) == 0)
32881 return arm_block_set_aligned_vect (dstbase, length, value, align);
32882 else
32883 return arm_block_set_unaligned_vect (dstbase, length, value, align);
32884 }
32885
32886 /* Expand string store operation. Firstly we try to do that by using
32887 vectorization instructions, then try with ARM unaligned access and
32888 double-word store if profitable. OPERANDS[0] is the destination,
32889 OPERANDS[1] is the number of bytes, operands[2] is the value to
32890 initialize the memory, OPERANDS[3] is the known alignment of the
32891 destination. */
32892 bool
32893 arm_gen_setmem (rtx *operands)
32894 {
32895 rtx dstbase = operands[0];
32896 unsigned HOST_WIDE_INT length;
32897 unsigned HOST_WIDE_INT value;
32898 unsigned HOST_WIDE_INT align;
32899
32900 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32901 return false;
32902
32903 length = UINTVAL (operands[1]);
32904 if (length > 64)
32905 return false;
32906
32907 value = (UINTVAL (operands[2]) & 0xFF);
32908 align = UINTVAL (operands[3]);
32909 if (TARGET_NEON && length >= 8
32910 && current_tune->string_ops_prefer_neon
32911 && arm_block_set_vect (dstbase, length, value, align))
32912 return true;
32913
32914 if (!unaligned_access && (align & 3) != 0)
32915 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32916
32917 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32918 }
32919
32920
32921 static bool
32922 arm_macro_fusion_p (void)
32923 {
32924 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
32925 }
32926
32927 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
32928 for MOVW / MOVT macro fusion. */
32929
32930 static bool
32931 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
32932 {
32933 /* We are trying to fuse
32934 movw imm / movt imm
32935 instructions as a group that gets scheduled together. */
32936
32937 rtx set_dest = SET_DEST (curr_set);
32938
32939 if (GET_MODE (set_dest) != SImode)
32940 return false;
32941
32942 /* We are trying to match:
32943 prev (movw) == (set (reg r0) (const_int imm16))
32944 curr (movt) == (set (zero_extract (reg r0)
32945 (const_int 16)
32946 (const_int 16))
32947 (const_int imm16_1))
32948 or
32949 prev (movw) == (set (reg r1)
32950 (high (symbol_ref ("SYM"))))
32951 curr (movt) == (set (reg r0)
32952 (lo_sum (reg r1)
32953 (symbol_ref ("SYM")))) */
32954
32955 if (GET_CODE (set_dest) == ZERO_EXTRACT)
32956 {
32957 if (CONST_INT_P (SET_SRC (curr_set))
32958 && CONST_INT_P (SET_SRC (prev_set))
32959 && REG_P (XEXP (set_dest, 0))
32960 && REG_P (SET_DEST (prev_set))
32961 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
32962 return true;
32963
32964 }
32965 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
32966 && REG_P (SET_DEST (curr_set))
32967 && REG_P (SET_DEST (prev_set))
32968 && GET_CODE (SET_SRC (prev_set)) == HIGH
32969 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
32970 return true;
32971
32972 return false;
32973 }
32974
32975 static bool
32976 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
32977 {
32978 rtx prev_set = single_set (prev);
32979 rtx curr_set = single_set (curr);
32980
32981 if (!prev_set
32982 || !curr_set)
32983 return false;
32984
32985 if (any_condjump_p (curr))
32986 return false;
32987
32988 if (!arm_macro_fusion_p ())
32989 return false;
32990
32991 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
32992 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
32993 return true;
32994
32995 return false;
32996 }
32997
32998 /* Return true iff the instruction fusion described by OP is enabled. */
32999 bool
33000 arm_fusion_enabled_p (tune_params::fuse_ops op)
33001 {
33002 return current_tune->fusible_ops & op;
33003 }
33004
33005 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
33006 scheduled for speculative execution. Reject the long-running division
33007 and square-root instructions. */
33008
33009 static bool
33010 arm_sched_can_speculate_insn (rtx_insn *insn)
33011 {
33012 switch (get_attr_type (insn))
33013 {
33014 case TYPE_SDIV:
33015 case TYPE_UDIV:
33016 case TYPE_FDIVS:
33017 case TYPE_FDIVD:
33018 case TYPE_FSQRTS:
33019 case TYPE_FSQRTD:
33020 case TYPE_NEON_FP_SQRT_S:
33021 case TYPE_NEON_FP_SQRT_D:
33022 case TYPE_NEON_FP_SQRT_S_Q:
33023 case TYPE_NEON_FP_SQRT_D_Q:
33024 case TYPE_NEON_FP_DIV_S:
33025 case TYPE_NEON_FP_DIV_D:
33026 case TYPE_NEON_FP_DIV_S_Q:
33027 case TYPE_NEON_FP_DIV_D_Q:
33028 return false;
33029 default:
33030 return true;
33031 }
33032 }
33033
33034 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
33035
33036 static unsigned HOST_WIDE_INT
33037 arm_asan_shadow_offset (void)
33038 {
33039 return HOST_WIDE_INT_1U << 29;
33040 }
33041
33042
33043 /* This is a temporary fix for PR60655. Ideally we need
33044 to handle most of these cases in the generic part but
33045 currently we reject minus (..) (sym_ref). We try to
33046 ameliorate the case with minus (sym_ref1) (sym_ref2)
33047 where they are in the same section. */
33048
33049 static bool
33050 arm_const_not_ok_for_debug_p (rtx p)
33051 {
33052 tree decl_op0 = NULL;
33053 tree decl_op1 = NULL;
33054
33055 if (GET_CODE (p) == UNSPEC)
33056 return true;
33057 if (GET_CODE (p) == MINUS)
33058 {
33059 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
33060 {
33061 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
33062 if (decl_op1
33063 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
33064 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
33065 {
33066 if ((VAR_P (decl_op1)
33067 || TREE_CODE (decl_op1) == CONST_DECL)
33068 && (VAR_P (decl_op0)
33069 || TREE_CODE (decl_op0) == CONST_DECL))
33070 return (get_variable_section (decl_op1, false)
33071 != get_variable_section (decl_op0, false));
33072
33073 if (TREE_CODE (decl_op1) == LABEL_DECL
33074 && TREE_CODE (decl_op0) == LABEL_DECL)
33075 return (DECL_CONTEXT (decl_op1)
33076 != DECL_CONTEXT (decl_op0));
33077 }
33078
33079 return true;
33080 }
33081 }
33082
33083 return false;
33084 }
33085
33086 /* return TRUE if x is a reference to a value in a constant pool */
33087 extern bool
33088 arm_is_constant_pool_ref (rtx x)
33089 {
33090 return (MEM_P (x)
33091 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
33092 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
33093 }
33094
33095 /* Remember the last target of arm_set_current_function. */
33096 static GTY(()) tree arm_previous_fndecl;
33097
33098 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
33099
33100 void
33101 save_restore_target_globals (tree new_tree)
33102 {
33103 /* If we have a previous state, use it. */
33104 if (TREE_TARGET_GLOBALS (new_tree))
33105 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33106 else if (new_tree == target_option_default_node)
33107 restore_target_globals (&default_target_globals);
33108 else
33109 {
33110 /* Call target_reinit and save the state for TARGET_GLOBALS. */
33111 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
33112 }
33113
33114 arm_option_params_internal ();
33115 }
33116
33117 /* Invalidate arm_previous_fndecl. */
33118
33119 void
33120 arm_reset_previous_fndecl (void)
33121 {
33122 arm_previous_fndecl = NULL_TREE;
33123 }
33124
33125 /* Establish appropriate back-end context for processing the function
33126 FNDECL. The argument might be NULL to indicate processing at top
33127 level, outside of any function scope. */
33128
33129 static void
33130 arm_set_current_function (tree fndecl)
33131 {
33132 if (!fndecl || fndecl == arm_previous_fndecl)
33133 return;
33134
33135 tree old_tree = (arm_previous_fndecl
33136 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
33137 : NULL_TREE);
33138
33139 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33140
33141 /* If current function has no attributes but previous one did,
33142 use the default node. */
33143 if (! new_tree && old_tree)
33144 new_tree = target_option_default_node;
33145
33146 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
33147 the default have been handled by save_restore_target_globals from
33148 arm_pragma_target_parse. */
33149 if (old_tree == new_tree)
33150 return;
33151
33152 arm_previous_fndecl = fndecl;
33153
33154 /* First set the target options. */
33155 cl_target_option_restore (&global_options, &global_options_set,
33156 TREE_TARGET_OPTION (new_tree));
33157
33158 save_restore_target_globals (new_tree);
33159
33160 arm_override_options_after_change_1 (&global_options, &global_options_set);
33161 }
33162
33163 /* Implement TARGET_OPTION_PRINT. */
33164
33165 static void
33166 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
33167 {
33168 int flags = ptr->x_target_flags;
33169 const char *fpu_name;
33170
33171 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
33172 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
33173
33174 fprintf (file, "%*sselected isa %s\n", indent, "",
33175 TARGET_THUMB2_P (flags) ? "thumb2" :
33176 TARGET_THUMB_P (flags) ? "thumb1" :
33177 "arm");
33178
33179 if (ptr->x_arm_arch_string)
33180 fprintf (file, "%*sselected architecture %s\n", indent, "",
33181 ptr->x_arm_arch_string);
33182
33183 if (ptr->x_arm_cpu_string)
33184 fprintf (file, "%*sselected CPU %s\n", indent, "",
33185 ptr->x_arm_cpu_string);
33186
33187 if (ptr->x_arm_tune_string)
33188 fprintf (file, "%*sselected tune %s\n", indent, "",
33189 ptr->x_arm_tune_string);
33190
33191 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
33192 }
33193
33194 /* Hook to determine if one function can safely inline another. */
33195
33196 static bool
33197 arm_can_inline_p (tree caller, tree callee)
33198 {
33199 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33200 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33201 bool can_inline = true;
33202
33203 struct cl_target_option *caller_opts
33204 = TREE_TARGET_OPTION (caller_tree ? caller_tree
33205 : target_option_default_node);
33206
33207 struct cl_target_option *callee_opts
33208 = TREE_TARGET_OPTION (callee_tree ? callee_tree
33209 : target_option_default_node);
33210
33211 if (callee_opts == caller_opts)
33212 return true;
33213
33214 /* Callee's ISA features should be a subset of the caller's. */
33215 struct arm_build_target caller_target;
33216 struct arm_build_target callee_target;
33217 caller_target.isa = sbitmap_alloc (isa_num_bits);
33218 callee_target.isa = sbitmap_alloc (isa_num_bits);
33219
33220 arm_configure_build_target (&caller_target, caller_opts, false);
33221 arm_configure_build_target (&callee_target, callee_opts, false);
33222 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
33223 can_inline = false;
33224
33225 sbitmap_free (caller_target.isa);
33226 sbitmap_free (callee_target.isa);
33227
33228 /* OK to inline between different modes.
33229 Function with mode specific instructions, e.g using asm,
33230 must be explicitly protected with noinline. */
33231 return can_inline;
33232 }
33233
33234 /* Hook to fix function's alignment affected by target attribute. */
33235
33236 static void
33237 arm_relayout_function (tree fndecl)
33238 {
33239 if (DECL_USER_ALIGN (fndecl))
33240 return;
33241
33242 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33243
33244 if (!callee_tree)
33245 callee_tree = target_option_default_node;
33246
33247 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
33248 SET_DECL_ALIGN
33249 (fndecl,
33250 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
33251 }
33252
33253 /* Inner function to process the attribute((target(...))), take an argument and
33254 set the current options from the argument. If we have a list, recursively
33255 go over the list. */
33256
33257 static bool
33258 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
33259 {
33260 if (TREE_CODE (args) == TREE_LIST)
33261 {
33262 bool ret = true;
33263
33264 for (; args; args = TREE_CHAIN (args))
33265 if (TREE_VALUE (args)
33266 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
33267 ret = false;
33268 return ret;
33269 }
33270
33271 else if (TREE_CODE (args) != STRING_CST)
33272 {
33273 error ("attribute %<target%> argument not a string");
33274 return false;
33275 }
33276
33277 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
33278 char *q;
33279
33280 while ((q = strtok (argstr, ",")) != NULL)
33281 {
33282 argstr = NULL;
33283 if (!strcmp (q, "thumb"))
33284 {
33285 opts->x_target_flags |= MASK_THUMB;
33286 if (TARGET_FDPIC && !arm_arch_thumb2)
33287 sorry ("FDPIC mode is not supported in Thumb-1 mode");
33288 }
33289
33290 else if (!strcmp (q, "arm"))
33291 opts->x_target_flags &= ~MASK_THUMB;
33292
33293 else if (!strcmp (q, "general-regs-only"))
33294 opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
33295
33296 else if (startswith (q, "fpu="))
33297 {
33298 int fpu_index;
33299 if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
33300 &fpu_index, CL_TARGET))
33301 {
33302 error ("invalid fpu for target attribute or pragma %qs", q);
33303 return false;
33304 }
33305 if (fpu_index == TARGET_FPU_auto)
33306 {
33307 /* This doesn't really make sense until we support
33308 general dynamic selection of the architecture and all
33309 sub-features. */
33310 sorry ("auto fpu selection not currently permitted here");
33311 return false;
33312 }
33313 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
33314 }
33315 else if (startswith (q, "arch="))
33316 {
33317 char *arch = q + 5;
33318 const arch_option *arm_selected_arch
33319 = arm_parse_arch_option_name (all_architectures, "arch", arch);
33320
33321 if (!arm_selected_arch)
33322 {
33323 error ("invalid architecture for target attribute or pragma %qs",
33324 q);
33325 return false;
33326 }
33327
33328 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
33329 }
33330 else if (q[0] == '+')
33331 {
33332 opts->x_arm_arch_string
33333 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
33334 }
33335 else
33336 {
33337 error ("unknown target attribute or pragma %qs", q);
33338 return false;
33339 }
33340 }
33341
33342 return true;
33343 }
33344
33345 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
33346
33347 tree
33348 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
33349 struct gcc_options *opts_set)
33350 {
33351 struct cl_target_option cl_opts;
33352
33353 if (!arm_valid_target_attribute_rec (args, opts))
33354 return NULL_TREE;
33355
33356 cl_target_option_save (&cl_opts, opts, opts_set);
33357 arm_configure_build_target (&arm_active_target, &cl_opts, false);
33358 arm_option_check_internal (opts);
33359 /* Do any overrides, such as global options arch=xxx.
33360 We do this since arm_active_target was overridden. */
33361 arm_option_reconfigure_globals ();
33362 arm_options_perform_arch_sanity_checks ();
33363 arm_option_override_internal (opts, opts_set);
33364
33365 return build_target_option_node (opts, opts_set);
33366 }
33367
33368 static void
33369 add_attribute (const char * mode, tree *attributes)
33370 {
33371 size_t len = strlen (mode);
33372 tree value = build_string (len, mode);
33373
33374 TREE_TYPE (value) = build_array_type (char_type_node,
33375 build_index_type (size_int (len)));
33376
33377 *attributes = tree_cons (get_identifier ("target"),
33378 build_tree_list (NULL_TREE, value),
33379 *attributes);
33380 }
33381
33382 /* For testing. Insert thumb or arm modes alternatively on functions. */
33383
33384 static void
33385 arm_insert_attributes (tree fndecl, tree * attributes)
33386 {
33387 const char *mode;
33388
33389 if (! TARGET_FLIP_THUMB)
33390 return;
33391
33392 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33393 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33394 return;
33395
33396 /* Nested definitions must inherit mode. */
33397 if (current_function_decl)
33398 {
33399 mode = TARGET_THUMB ? "thumb" : "arm";
33400 add_attribute (mode, attributes);
33401 return;
33402 }
33403
33404 /* If there is already a setting don't change it. */
33405 if (lookup_attribute ("target", *attributes) != NULL)
33406 return;
33407
33408 mode = thumb_flipper ? "thumb" : "arm";
33409 add_attribute (mode, attributes);
33410
33411 thumb_flipper = !thumb_flipper;
33412 }
33413
33414 /* Hook to validate attribute((target("string"))). */
33415
33416 static bool
33417 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33418 tree args, int ARG_UNUSED (flags))
33419 {
33420 bool ret = true;
33421 struct gcc_options func_options, func_options_set;
33422 tree cur_tree, new_optimize;
33423 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33424
33425 /* Get the optimization options of the current function. */
33426 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33427
33428 /* If the function changed the optimization levels as well as setting target
33429 options, start with the optimizations specified. */
33430 if (!func_optimize)
33431 func_optimize = optimization_default_node;
33432
33433 /* Init func_options. */
33434 memset (&func_options, 0, sizeof (func_options));
33435 init_options_struct (&func_options, NULL);
33436 lang_hooks.init_options_struct (&func_options);
33437 memset (&func_options_set, 0, sizeof (func_options_set));
33438
33439 /* Initialize func_options to the defaults. */
33440 cl_optimization_restore (&func_options, &func_options_set,
33441 TREE_OPTIMIZATION (func_optimize));
33442
33443 cl_target_option_restore (&func_options, &func_options_set,
33444 TREE_TARGET_OPTION (target_option_default_node));
33445
33446 /* Set func_options flags with new target mode. */
33447 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33448 &func_options_set);
33449
33450 if (cur_tree == NULL_TREE)
33451 ret = false;
33452
33453 new_optimize = build_optimization_node (&func_options, &func_options_set);
33454
33455 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33456
33457 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33458
33459 return ret;
33460 }
33461
33462 /* Match an ISA feature bitmap to a named FPU. We always use the
33463 first entry that exactly matches the feature set, so that we
33464 effectively canonicalize the FPU name for the assembler. */
33465 static const char*
33466 arm_identify_fpu_from_isa (sbitmap isa)
33467 {
33468 auto_sbitmap fpubits (isa_num_bits);
33469 auto_sbitmap cand_fpubits (isa_num_bits);
33470
33471 bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33472
33473 /* If there are no ISA feature bits relating to the FPU, we must be
33474 doing soft-float. */
33475 if (bitmap_empty_p (fpubits))
33476 return "softvfp";
33477
33478 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33479 {
33480 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33481 if (bitmap_equal_p (fpubits, cand_fpubits))
33482 return all_fpus[i].name;
33483 }
33484 /* We must find an entry, or things have gone wrong. */
33485 gcc_unreachable ();
33486 }
33487
33488 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
33489 by the function fndecl. */
33490 void
33491 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33492 {
33493 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
33494
33495 struct cl_target_option *targ_options;
33496 if (target_parts)
33497 targ_options = TREE_TARGET_OPTION (target_parts);
33498 else
33499 targ_options = TREE_TARGET_OPTION (target_option_current_node);
33500 gcc_assert (targ_options);
33501
33502 arm_print_asm_arch_directives (stream, targ_options);
33503
33504 fprintf (stream, "\t.syntax unified\n");
33505
33506 if (TARGET_THUMB)
33507 {
33508 if (is_called_in_ARM_mode (decl)
33509 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
33510 && cfun->is_thunk))
33511 fprintf (stream, "\t.code 32\n");
33512 else if (TARGET_THUMB1)
33513 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
33514 else
33515 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
33516 }
33517 else
33518 fprintf (stream, "\t.arm\n");
33519
33520 if (TARGET_POKE_FUNCTION_NAME)
33521 arm_poke_function_name (stream, (const char *) name);
33522 }
33523
33524 /* If MEM is in the form of [base+offset], extract the two parts
33525 of address and set to BASE and OFFSET, otherwise return false
33526 after clearing BASE and OFFSET. */
33527
33528 static bool
33529 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
33530 {
33531 rtx addr;
33532
33533 gcc_assert (MEM_P (mem));
33534
33535 addr = XEXP (mem, 0);
33536
33537 /* Strip off const from addresses like (const (addr)). */
33538 if (GET_CODE (addr) == CONST)
33539 addr = XEXP (addr, 0);
33540
33541 if (REG_P (addr))
33542 {
33543 *base = addr;
33544 *offset = const0_rtx;
33545 return true;
33546 }
33547
33548 if (GET_CODE (addr) == PLUS
33549 && GET_CODE (XEXP (addr, 0)) == REG
33550 && CONST_INT_P (XEXP (addr, 1)))
33551 {
33552 *base = XEXP (addr, 0);
33553 *offset = XEXP (addr, 1);
33554 return true;
33555 }
33556
33557 *base = NULL_RTX;
33558 *offset = NULL_RTX;
33559
33560 return false;
33561 }
33562
33563 /* If INSN is a load or store of address in the form of [base+offset],
33564 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
33565 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
33566 otherwise return FALSE. */
33567
33568 static bool
33569 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
33570 {
33571 rtx x, dest, src;
33572
33573 gcc_assert (INSN_P (insn));
33574 x = PATTERN (insn);
33575 if (GET_CODE (x) != SET)
33576 return false;
33577
33578 src = SET_SRC (x);
33579 dest = SET_DEST (x);
33580 if (REG_P (src) && MEM_P (dest))
33581 {
33582 *is_load = false;
33583 extract_base_offset_in_addr (dest, base, offset);
33584 }
33585 else if (MEM_P (src) && REG_P (dest))
33586 {
33587 *is_load = true;
33588 extract_base_offset_in_addr (src, base, offset);
33589 }
33590 else
33591 return false;
33592
33593 return (*base != NULL_RTX && *offset != NULL_RTX);
33594 }
33595
33596 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33597
33598 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33599 and PRI are only calculated for these instructions. For other instruction,
33600 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
33601 instruction fusion can be supported by returning different priorities.
33602
33603 It's important that irrelevant instructions get the largest FUSION_PRI. */
33604
33605 static void
33606 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33607 int *fusion_pri, int *pri)
33608 {
33609 int tmp, off_val;
33610 bool is_load;
33611 rtx base, offset;
33612
33613 gcc_assert (INSN_P (insn));
33614
33615 tmp = max_pri - 1;
33616 if (!fusion_load_store (insn, &base, &offset, &is_load))
33617 {
33618 *pri = tmp;
33619 *fusion_pri = tmp;
33620 return;
33621 }
33622
33623 /* Load goes first. */
33624 if (is_load)
33625 *fusion_pri = tmp - 1;
33626 else
33627 *fusion_pri = tmp - 2;
33628
33629 tmp /= 2;
33630
33631 /* INSN with smaller base register goes first. */
33632 tmp -= ((REGNO (base) & 0xff) << 20);
33633
33634 /* INSN with smaller offset goes first. */
33635 off_val = (int)(INTVAL (offset));
33636 if (off_val >= 0)
33637 tmp -= (off_val & 0xfffff);
33638 else
33639 tmp += ((- off_val) & 0xfffff);
33640
33641 *pri = tmp;
33642 return;
33643 }
33644
33645
33646 /* Construct and return a PARALLEL RTX vector with elements numbering the
33647 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
33648 the vector - from the perspective of the architecture. This does not
33649 line up with GCC's perspective on lane numbers, so we end up with
33650 different masks depending on our target endian-ness. The diagram
33651 below may help. We must draw the distinction when building masks
33652 which select one half of the vector. An instruction selecting
33653 architectural low-lanes for a big-endian target, must be described using
33654 a mask selecting GCC high-lanes.
33655
33656 Big-Endian Little-Endian
33657
33658 GCC 0 1 2 3 3 2 1 0
33659 | x | x | x | x | | x | x | x | x |
33660 Architecture 3 2 1 0 3 2 1 0
33661
33662 Low Mask: { 2, 3 } { 0, 1 }
33663 High Mask: { 0, 1 } { 2, 3 }
33664 */
33665
33666 rtx
33667 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
33668 {
33669 int nunits = GET_MODE_NUNITS (mode);
33670 rtvec v = rtvec_alloc (nunits / 2);
33671 int high_base = nunits / 2;
33672 int low_base = 0;
33673 int base;
33674 rtx t1;
33675 int i;
33676
33677 if (BYTES_BIG_ENDIAN)
33678 base = high ? low_base : high_base;
33679 else
33680 base = high ? high_base : low_base;
33681
33682 for (i = 0; i < nunits / 2; i++)
33683 RTVEC_ELT (v, i) = GEN_INT (base + i);
33684
33685 t1 = gen_rtx_PARALLEL (mode, v);
33686 return t1;
33687 }
33688
33689 /* Check OP for validity as a PARALLEL RTX vector with elements
33690 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
33691 from the perspective of the architecture. See the diagram above
33692 arm_simd_vect_par_cnst_half_p for more details. */
33693
33694 bool
33695 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
33696 bool high)
33697 {
33698 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
33699 HOST_WIDE_INT count_op = XVECLEN (op, 0);
33700 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
33701 int i = 0;
33702
33703 if (!VECTOR_MODE_P (mode))
33704 return false;
33705
33706 if (count_op != count_ideal)
33707 return false;
33708
33709 for (i = 0; i < count_ideal; i++)
33710 {
33711 rtx elt_op = XVECEXP (op, 0, i);
33712 rtx elt_ideal = XVECEXP (ideal, 0, i);
33713
33714 if (!CONST_INT_P (elt_op)
33715 || INTVAL (elt_ideal) != INTVAL (elt_op))
33716 return false;
33717 }
33718 return true;
33719 }
33720
33721 /* Can output mi_thunk for all cases except for non-zero vcall_offset
33722 in Thumb1. */
33723 static bool
33724 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
33725 const_tree)
33726 {
33727 /* For now, we punt and not handle this for TARGET_THUMB1. */
33728 if (vcall_offset && TARGET_THUMB1)
33729 return false;
33730
33731 /* Otherwise ok. */
33732 return true;
33733 }
33734
33735 /* Generate RTL for a conditional branch with rtx comparison CODE in
33736 mode CC_MODE. The destination of the unlikely conditional branch
33737 is LABEL_REF. */
33738
33739 void
33740 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
33741 rtx label_ref)
33742 {
33743 rtx x;
33744 x = gen_rtx_fmt_ee (code, VOIDmode,
33745 gen_rtx_REG (cc_mode, CC_REGNUM),
33746 const0_rtx);
33747
33748 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
33749 gen_rtx_LABEL_REF (VOIDmode, label_ref),
33750 pc_rtx);
33751 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
33752 }
33753
33754 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
33755
33756 For pure-code sections there is no letter code for this attribute, so
33757 output all the section flags numerically when this is needed. */
33758
33759 static bool
33760 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
33761 {
33762
33763 if (flags & SECTION_ARM_PURECODE)
33764 {
33765 *num = 0x20000000;
33766
33767 if (!(flags & SECTION_DEBUG))
33768 *num |= 0x2;
33769 if (flags & SECTION_EXCLUDE)
33770 *num |= 0x80000000;
33771 if (flags & SECTION_WRITE)
33772 *num |= 0x1;
33773 if (flags & SECTION_CODE)
33774 *num |= 0x4;
33775 if (flags & SECTION_MERGE)
33776 *num |= 0x10;
33777 if (flags & SECTION_STRINGS)
33778 *num |= 0x20;
33779 if (flags & SECTION_TLS)
33780 *num |= 0x400;
33781 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
33782 *num |= 0x200;
33783
33784 return true;
33785 }
33786
33787 return false;
33788 }
33789
33790 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
33791
33792 If pure-code is passed as an option, make sure all functions are in
33793 sections that have the SHF_ARM_PURECODE attribute. */
33794
33795 static section *
33796 arm_function_section (tree decl, enum node_frequency freq,
33797 bool startup, bool exit)
33798 {
33799 const char * section_name;
33800 section * sec;
33801
33802 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
33803 return default_function_section (decl, freq, startup, exit);
33804
33805 if (!target_pure_code)
33806 return default_function_section (decl, freq, startup, exit);
33807
33808
33809 section_name = DECL_SECTION_NAME (decl);
33810
33811 /* If a function is not in a named section then it falls under the 'default'
33812 text section, also known as '.text'. We can preserve previous behavior as
33813 the default text section already has the SHF_ARM_PURECODE section
33814 attribute. */
33815 if (!section_name)
33816 {
33817 section *default_sec = default_function_section (decl, freq, startup,
33818 exit);
33819
33820 /* If default_sec is not null, then it must be a special section like for
33821 example .text.startup. We set the pure-code attribute and return the
33822 same section to preserve existing behavior. */
33823 if (default_sec)
33824 default_sec->common.flags |= SECTION_ARM_PURECODE;
33825 return default_sec;
33826 }
33827
33828 /* Otherwise look whether a section has already been created with
33829 'section_name'. */
33830 sec = get_named_section (decl, section_name, 0);
33831 if (!sec)
33832 /* If that is not the case passing NULL as the section's name to
33833 'get_named_section' will create a section with the declaration's
33834 section name. */
33835 sec = get_named_section (decl, NULL, 0);
33836
33837 /* Set the SHF_ARM_PURECODE attribute. */
33838 sec->common.flags |= SECTION_ARM_PURECODE;
33839
33840 return sec;
33841 }
33842
33843 /* Implements the TARGET_SECTION_FLAGS hook.
33844
33845 If DECL is a function declaration and pure-code is passed as an option
33846 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
33847 section's name and RELOC indicates whether the declarations initializer may
33848 contain runtime relocations. */
33849
33850 static unsigned int
33851 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
33852 {
33853 unsigned int flags = default_section_type_flags (decl, name, reloc);
33854
33855 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
33856 flags |= SECTION_ARM_PURECODE;
33857
33858 return flags;
33859 }
33860
33861 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
33862
33863 static void
33864 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
33865 rtx op0, rtx op1,
33866 rtx *quot_p, rtx *rem_p)
33867 {
33868 if (mode == SImode)
33869 gcc_assert (!TARGET_IDIV);
33870
33871 scalar_int_mode libval_mode
33872 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
33873
33874 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
33875 libval_mode, op0, mode, op1, mode);
33876
33877 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
33878 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
33879 GET_MODE_SIZE (mode));
33880
33881 gcc_assert (quotient);
33882 gcc_assert (remainder);
33883
33884 *quot_p = quotient;
33885 *rem_p = remainder;
33886 }
33887
33888 /* This function checks for the availability of the coprocessor builtin passed
33889 in BUILTIN for the current target. Returns true if it is available and
33890 false otherwise. If a BUILTIN is passed for which this function has not
33891 been implemented it will cause an exception. */
33892
33893 bool
33894 arm_coproc_builtin_available (enum unspecv builtin)
33895 {
33896 /* None of these builtins are available in Thumb mode if the target only
33897 supports Thumb-1. */
33898 if (TARGET_THUMB1)
33899 return false;
33900
33901 switch (builtin)
33902 {
33903 case VUNSPEC_CDP:
33904 case VUNSPEC_LDC:
33905 case VUNSPEC_LDCL:
33906 case VUNSPEC_STC:
33907 case VUNSPEC_STCL:
33908 case VUNSPEC_MCR:
33909 case VUNSPEC_MRC:
33910 if (arm_arch4)
33911 return true;
33912 break;
33913 case VUNSPEC_CDP2:
33914 case VUNSPEC_LDC2:
33915 case VUNSPEC_LDC2L:
33916 case VUNSPEC_STC2:
33917 case VUNSPEC_STC2L:
33918 case VUNSPEC_MCR2:
33919 case VUNSPEC_MRC2:
33920 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
33921 ARMv8-{A,M}. */
33922 if (arm_arch5t)
33923 return true;
33924 break;
33925 case VUNSPEC_MCRR:
33926 case VUNSPEC_MRRC:
33927 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
33928 ARMv8-{A,M}. */
33929 if (arm_arch6 || arm_arch5te)
33930 return true;
33931 break;
33932 case VUNSPEC_MCRR2:
33933 case VUNSPEC_MRRC2:
33934 if (arm_arch6)
33935 return true;
33936 break;
33937 default:
33938 gcc_unreachable ();
33939 }
33940 return false;
33941 }
33942
33943 /* This function returns true if OP is a valid memory operand for the ldc and
33944 stc coprocessor instructions and false otherwise. */
33945
33946 bool
33947 arm_coproc_ldc_stc_legitimate_address (rtx op)
33948 {
33949 HOST_WIDE_INT range;
33950 /* Has to be a memory operand. */
33951 if (!MEM_P (op))
33952 return false;
33953
33954 op = XEXP (op, 0);
33955
33956 /* We accept registers. */
33957 if (REG_P (op))
33958 return true;
33959
33960 switch GET_CODE (op)
33961 {
33962 case PLUS:
33963 {
33964 /* Or registers with an offset. */
33965 if (!REG_P (XEXP (op, 0)))
33966 return false;
33967
33968 op = XEXP (op, 1);
33969
33970 /* The offset must be an immediate though. */
33971 if (!CONST_INT_P (op))
33972 return false;
33973
33974 range = INTVAL (op);
33975
33976 /* Within the range of [-1020,1020]. */
33977 if (!IN_RANGE (range, -1020, 1020))
33978 return false;
33979
33980 /* And a multiple of 4. */
33981 return (range % 4) == 0;
33982 }
33983 case PRE_INC:
33984 case POST_INC:
33985 case PRE_DEC:
33986 case POST_DEC:
33987 return REG_P (XEXP (op, 0));
33988 default:
33989 gcc_unreachable ();
33990 }
33991 return false;
33992 }
33993
33994 /* Return the diagnostic message string if conversion from FROMTYPE to
33995 TOTYPE is not allowed, NULL otherwise. */
33996
33997 static const char *
33998 arm_invalid_conversion (const_tree fromtype, const_tree totype)
33999 {
34000 if (element_mode (fromtype) != element_mode (totype))
34001 {
34002 /* Do no allow conversions to/from BFmode scalar types. */
34003 if (TYPE_MODE (fromtype) == BFmode)
34004 return N_("invalid conversion from type %<bfloat16_t%>");
34005 if (TYPE_MODE (totype) == BFmode)
34006 return N_("invalid conversion to type %<bfloat16_t%>");
34007 }
34008
34009 /* Conversion allowed. */
34010 return NULL;
34011 }
34012
34013 /* Return the diagnostic message string if the unary operation OP is
34014 not permitted on TYPE, NULL otherwise. */
34015
34016 static const char *
34017 arm_invalid_unary_op (int op, const_tree type)
34018 {
34019 /* Reject all single-operand operations on BFmode except for &. */
34020 if (element_mode (type) == BFmode && op != ADDR_EXPR)
34021 return N_("operation not permitted on type %<bfloat16_t%>");
34022
34023 /* Operation allowed. */
34024 return NULL;
34025 }
34026
34027 /* Return the diagnostic message string if the binary operation OP is
34028 not permitted on TYPE1 and TYPE2, NULL otherwise. */
34029
34030 static const char *
34031 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
34032 const_tree type2)
34033 {
34034 /* Reject all 2-operand operations on BFmode. */
34035 if (element_mode (type1) == BFmode
34036 || element_mode (type2) == BFmode)
34037 return N_("operation not permitted on type %<bfloat16_t%>");
34038
34039 /* Operation allowed. */
34040 return NULL;
34041 }
34042
34043 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34044
34045 In VFPv1, VFP registers could only be accessed in the mode they were
34046 set, so subregs would be invalid there. However, we don't support
34047 VFPv1 at the moment, and the restriction was lifted in VFPv2.
34048
34049 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34050 VFP registers in little-endian order. We can't describe that accurately to
34051 GCC, so avoid taking subregs of such values.
34052
34053 The only exception is going from a 128-bit to a 64-bit type. In that
34054 case the data layout happens to be consistent for big-endian, so we
34055 explicitly allow that case. */
34056
34057 static bool
34058 arm_can_change_mode_class (machine_mode from, machine_mode to,
34059 reg_class_t rclass)
34060 {
34061 if (TARGET_BIG_END
34062 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
34063 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
34064 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
34065 && reg_classes_intersect_p (VFP_REGS, rclass))
34066 return false;
34067 return true;
34068 }
34069
34070 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
34071 strcpy from constants will be faster. */
34072
34073 static HOST_WIDE_INT
34074 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
34075 {
34076 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
34077 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
34078 return MAX (align, BITS_PER_WORD * factor);
34079 return align;
34080 }
34081
34082 /* Emit a speculation barrier on target architectures that do not have
34083 DSB/ISB directly. Such systems probably don't need a barrier
34084 themselves, but if the code is ever run on a later architecture, it
34085 might become a problem. */
34086 void
34087 arm_emit_speculation_barrier_function ()
34088 {
34089 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
34090 }
34091
34092 /* Have we recorded an explicit access to the Q bit of APSR?. */
34093 bool
34094 arm_q_bit_access (void)
34095 {
34096 if (cfun && cfun->decl)
34097 return lookup_attribute ("acle qbit",
34098 DECL_ATTRIBUTES (cfun->decl));
34099 return true;
34100 }
34101
34102 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
34103 bool
34104 arm_ge_bits_access (void)
34105 {
34106 if (cfun && cfun->decl)
34107 return lookup_attribute ("acle gebits",
34108 DECL_ATTRIBUTES (cfun->decl));
34109 return true;
34110 }
34111
34112 /* NULL if insn INSN is valid within a low-overhead loop.
34113 Otherwise return why doloop cannot be applied. */
34114
34115 static const char *
34116 arm_invalid_within_doloop (const rtx_insn *insn)
34117 {
34118 if (!TARGET_HAVE_LOB)
34119 return default_invalid_within_doloop (insn);
34120
34121 if (CALL_P (insn))
34122 return "Function call in the loop.";
34123
34124 if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
34125 return "LR is used inside loop.";
34126
34127 return NULL;
34128 }
34129
34130 bool
34131 arm_target_insn_ok_for_lob (rtx insn)
34132 {
34133 basic_block bb = BLOCK_FOR_INSN (insn);
34134 /* Make sure the basic block of the target insn is a simple latch
34135 having as single predecessor and successor the body of the loop
34136 itself. Only simple loops with a single basic block as body are
34137 supported for 'low over head loop' making sure that LE target is
34138 above LE itself in the generated code. */
34139
34140 return single_succ_p (bb)
34141 && single_pred_p (bb)
34142 && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
34143 && contains_no_active_insn_p (bb);
34144 }
34145
34146 #if CHECKING_P
34147 namespace selftest {
34148
34149 /* Scan the static data tables generated by parsecpu.awk looking for
34150 potential issues with the data. We primarily check for
34151 inconsistencies in the option extensions at present (extensions
34152 that duplicate others but aren't marked as aliases). Furthermore,
34153 for correct canonicalization later options must never be a subset
34154 of an earlier option. Any extension should also only specify other
34155 feature bits and never an architecture bit. The architecture is inferred
34156 from the declaration of the extension. */
34157 static void
34158 arm_test_cpu_arch_data (void)
34159 {
34160 const arch_option *arch;
34161 const cpu_option *cpu;
34162 auto_sbitmap target_isa (isa_num_bits);
34163 auto_sbitmap isa1 (isa_num_bits);
34164 auto_sbitmap isa2 (isa_num_bits);
34165
34166 for (arch = all_architectures; arch->common.name != NULL; ++arch)
34167 {
34168 const cpu_arch_extension *ext1, *ext2;
34169
34170 if (arch->common.extensions == NULL)
34171 continue;
34172
34173 arm_initialize_isa (target_isa, arch->common.isa_bits);
34174
34175 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
34176 {
34177 if (ext1->alias)
34178 continue;
34179
34180 arm_initialize_isa (isa1, ext1->isa_bits);
34181 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34182 {
34183 if (ext2->alias || ext1->remove != ext2->remove)
34184 continue;
34185
34186 arm_initialize_isa (isa2, ext2->isa_bits);
34187 /* If the option is a subset of the parent option, it doesn't
34188 add anything and so isn't useful. */
34189 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34190
34191 /* If the extension specifies any architectural bits then
34192 disallow it. Extensions should only specify feature bits. */
34193 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34194 }
34195 }
34196 }
34197
34198 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
34199 {
34200 const cpu_arch_extension *ext1, *ext2;
34201
34202 if (cpu->common.extensions == NULL)
34203 continue;
34204
34205 arm_initialize_isa (target_isa, arch->common.isa_bits);
34206
34207 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
34208 {
34209 if (ext1->alias)
34210 continue;
34211
34212 arm_initialize_isa (isa1, ext1->isa_bits);
34213 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34214 {
34215 if (ext2->alias || ext1->remove != ext2->remove)
34216 continue;
34217
34218 arm_initialize_isa (isa2, ext2->isa_bits);
34219 /* If the option is a subset of the parent option, it doesn't
34220 add anything and so isn't useful. */
34221 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34222
34223 /* If the extension specifies any architectural bits then
34224 disallow it. Extensions should only specify feature bits. */
34225 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34226 }
34227 }
34228 }
34229 }
34230
34231 /* Scan the static data tables generated by parsecpu.awk looking for
34232 potential issues with the data. Here we check for consistency between the
34233 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34234 a feature bit that is not defined by any FPU flag. */
34235 static void
34236 arm_test_fpu_data (void)
34237 {
34238 auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
34239 auto_sbitmap fpubits (isa_num_bits);
34240 auto_sbitmap tmpset (isa_num_bits);
34241
34242 static const enum isa_feature fpu_bitlist_internal[]
34243 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
34244 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
34245
34246 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
34247 {
34248 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
34249 bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
34250 bitmap_clear (isa_all_fpubits_internal);
34251 bitmap_copy (isa_all_fpubits_internal, tmpset);
34252 }
34253
34254 if (!bitmap_empty_p (isa_all_fpubits_internal))
34255 {
34256 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
34257 " group that are not defined by any FPU.\n"
34258 " Check your arm-cpus.in.\n");
34259 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
34260 }
34261 }
34262
34263 static void
34264 arm_run_selftests (void)
34265 {
34266 arm_test_cpu_arch_data ();
34267 arm_test_fpu_data ();
34268 }
34269 } /* Namespace selftest. */
34270
34271 #undef TARGET_RUN_TARGET_SELFTESTS
34272 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34273 #endif /* CHECKING_P */
34274
34275 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34276 global variable based guard use the default else
34277 return a null tree. */
34278 static tree
34279 arm_stack_protect_guard (void)
34280 {
34281 if (arm_stack_protector_guard == SSP_GLOBAL)
34282 return default_stack_protect_guard ();
34283
34284 return NULL_TREE;
34285 }
34286
34287 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34288 Unlike the arm version, we do NOT implement asm flag outputs. */
34289
34290 rtx_insn *
34291 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
34292 vec<machine_mode> & /*input_modes*/,
34293 vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
34294 HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
34295 {
34296 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
34297 if (startswith (constraints[i], "=@cc"))
34298 {
34299 sorry ("%<asm%> flags not supported in thumb1 mode");
34300 break;
34301 }
34302 return NULL;
34303 }
34304
34305 /* Generate code to enable conditional branches in functions over 1 MiB.
34306 Parameters are:
34307 operands: is the operands list of the asm insn (see arm_cond_branch or
34308 arm_cond_branch_reversed).
34309 pos_label: is an index into the operands array where operands[pos_label] is
34310 the asm label of the final jump destination.
34311 dest: is a string which is used to generate the asm label of the intermediate
34312 destination
34313 branch_format: is a string denoting the intermediate branch format, e.g.
34314 "beq", "bne", etc. */
34315
34316 const char *
34317 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
34318 const char * branch_format)
34319 {
34320 rtx_code_label * tmp_label = gen_label_rtx ();
34321 char label_buf[256];
34322 char buffer[128];
34323 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
34324 CODE_LABEL_NUMBER (tmp_label));
34325 const char *label_ptr = arm_strip_name_encoding (label_buf);
34326 rtx dest_label = operands[pos_label];
34327 operands[pos_label] = tmp_label;
34328
34329 snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
34330 output_asm_insn (buffer, operands);
34331
34332 snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
34333 operands[pos_label] = dest_label;
34334 output_asm_insn (buffer, operands);
34335 return "";
34336 }
34337
34338 /* If given mode matches, load from memory to LO_REGS.
34339 (i.e [Rn], Rn <= LO_REGS). */
34340 enum reg_class
34341 arm_mode_base_reg_class (machine_mode mode)
34342 {
34343 if (TARGET_HAVE_MVE
34344 && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
34345 return LO_REGS;
34346
34347 return MODE_BASE_REG_REG_CLASS (mode);
34348 }
34349
34350 struct gcc_target targetm = TARGET_INITIALIZER;
34351
34352 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
34353
34354 opt_machine_mode
34355 arm_get_mask_mode (machine_mode mode)
34356 {
34357 if (TARGET_HAVE_MVE)
34358 return arm_mode_to_pred_mode (mode);
34359
34360 return default_get_mask_mode (mode);
34361 }
34362
34363 #include "gt-arm.h"