]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
2019-08-30 Bernd Edlinger <bernd.edlinger@hotmail.de>
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2019 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "df.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "flags.h"
53 #include "reload.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "cfgrtl.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "intl.h"
61 #include "libfuncs.h"
62 #include "params.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "selftest.h"
73
74 /* This file should be included last. */
75 #include "target-def.h"
76
77 /* Forward definitions of types. */
78 typedef struct minipool_node Mnode;
79 typedef struct minipool_fixup Mfix;
80
81 /* The last .arch and .fpu assembly strings that we printed. */
82 static std::string arm_last_printed_arch_string;
83 static std::string arm_last_printed_fpu_string;
84
85 void (*arm_lang_output_object_attributes_hook)(void);
86
87 struct four_ints
88 {
89 int i[4];
90 };
91
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx);
94 static int arm_needs_doubleword_align (machine_mode, const_tree);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets *arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
100 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap);
103 static int arm_address_register_rtx_p (rtx, int);
104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
105 static bool is_called_in_ARM_mode (tree);
106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
111 inline static int thumb1_index_register_rtx_p (rtx, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx, int);
117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
118 static bool arm_print_operand_punct_valid_p (unsigned char code);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
120 static arm_cc get_arm_condition_code (rtx);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx *, const char *, const char *,
123 int, HOST_WIDE_INT);
124 static const char *shift_op (rtx, HOST_WIDE_INT *);
125 static struct machine_function *arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
129 static Mnode *add_minipool_forward_ref (Mfix *);
130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
131 static Mnode *add_minipool_backward_ref (Mfix *);
132 static void assign_minipool_offsets (Mfix *);
133 static void arm_print_value (FILE *, rtx);
134 static void dump_minipool (rtx_insn *);
135 static int arm_barrier_cost (rtx_insn *);
136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
139 machine_mode, rtx);
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree);
145 static unsigned long arm_compute_func_type (void);
146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
151 #endif
152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree, const_tree);
157 static void arm_set_default_type_attributes (tree);
158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code,
161 unsigned HOST_WIDE_INT val,
162 struct four_ints *return_sequence);
163 static int optimal_immediate_sequence_1 (enum rtx_code code,
164 unsigned HOST_WIDE_INT val,
165 struct four_ints *return_sequence,
166 int i);
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree, tree);
169 static machine_mode arm_promote_function_mode (const_tree,
170 machine_mode, int *,
171 const_tree, int);
172 static bool arm_return_in_memory (const_tree, const_tree);
173 static rtx arm_function_value (const_tree, const_tree, bool);
174 static rtx arm_libcall_value_1 (machine_mode);
175 static rtx arm_libcall_value (machine_mode, const_rtx);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
179 tree);
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
182 static bool arm_legitimate_constant_p (machine_mode, rtx);
183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
187 static void emit_constant_insn (rtx cond, rtx pattern);
188 static rtx_insn *emit_set_insn (rtx, rtx);
189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
190 static int arm_arg_partial_bytes (cumulative_args_t,
191 const function_arg_info &);
192 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
193 static void arm_function_arg_advance (cumulative_args_t,
194 const function_arg_info &);
195 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
196 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
197 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
198 const_tree);
199 static rtx aapcs_libcall_value (machine_mode);
200 static int aapcs_select_return_coproc (const_tree, const_tree);
201
202 #ifdef OBJECT_FORMAT_ELF
203 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
204 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
205 #endif
206 #ifndef ARM_PE
207 static void arm_encode_section_info (tree, rtx, int);
208 #endif
209
210 static void arm_file_end (void);
211 static void arm_file_start (void);
212 static void arm_insert_attributes (tree, tree *);
213
214 static void arm_setup_incoming_varargs (cumulative_args_t,
215 const function_arg_info &, int *, int);
216 static bool arm_pass_by_reference (cumulative_args_t,
217 const function_arg_info &);
218 static bool arm_promote_prototypes (const_tree);
219 static bool arm_default_short_enums (void);
220 static bool arm_align_anon_bitfield (void);
221 static bool arm_return_in_msb (const_tree);
222 static bool arm_must_pass_in_stack (const function_arg_info &);
223 static bool arm_return_in_memory (const_tree, const_tree);
224 #if ARM_UNWIND_INFO
225 static void arm_unwind_emit (FILE *, rtx_insn *);
226 static bool arm_output_ttype (rtx);
227 static void arm_asm_emit_except_personality (rtx);
228 #endif
229 static void arm_asm_init_sections (void);
230 static rtx arm_dwarf_register_span (rtx);
231
232 static tree arm_cxx_guard_type (void);
233 static bool arm_cxx_guard_mask_bit (void);
234 static tree arm_get_cookie_size (tree);
235 static bool arm_cookie_has_size (void);
236 static bool arm_cxx_cdtor_returns_this (void);
237 static bool arm_cxx_key_method_may_be_inline (void);
238 static void arm_cxx_determine_class_data_visibility (tree);
239 static bool arm_cxx_class_data_always_comdat (void);
240 static bool arm_cxx_use_aeabi_atexit (void);
241 static void arm_init_libfuncs (void);
242 static tree arm_build_builtin_va_list (void);
243 static void arm_expand_builtin_va_start (tree, rtx);
244 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
245 static void arm_option_override (void);
246 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
247 static void arm_option_restore (struct gcc_options *,
248 struct cl_target_option *);
249 static void arm_override_options_after_change (void);
250 static void arm_option_print (FILE *, int, struct cl_target_option *);
251 static void arm_set_current_function (tree);
252 static bool arm_can_inline_p (tree, tree);
253 static void arm_relayout_function (tree);
254 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
255 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
256 static bool arm_sched_can_speculate_insn (rtx_insn *);
257 static bool arm_macro_fusion_p (void);
258 static bool arm_cannot_copy_insn_p (rtx_insn *);
259 static int arm_issue_rate (void);
260 static int arm_first_cycle_multipass_dfa_lookahead (void);
261 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
262 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
263 static bool arm_output_addr_const_extra (FILE *, rtx);
264 static bool arm_allocate_stack_slots_for_args (void);
265 static bool arm_warn_func_return (tree);
266 static tree arm_promoted_type (const_tree t);
267 static bool arm_scalar_mode_supported_p (scalar_mode);
268 static bool arm_frame_pointer_required (void);
269 static bool arm_can_eliminate (const int, const int);
270 static void arm_asm_trampoline_template (FILE *);
271 static void arm_trampoline_init (rtx, tree, rtx);
272 static rtx arm_trampoline_adjust_address (rtx);
273 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
274 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
275 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
276 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
277 static bool arm_array_mode_supported_p (machine_mode,
278 unsigned HOST_WIDE_INT);
279 static machine_mode arm_preferred_simd_mode (scalar_mode);
280 static bool arm_class_likely_spilled_p (reg_class_t);
281 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
282 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
283 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
284 const_tree type,
285 int misalignment,
286 bool is_packed);
287 static void arm_conditional_register_usage (void);
288 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
289 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
290 static void arm_autovectorize_vector_sizes (vector_sizes *, bool);
291 static int arm_default_branch_cost (bool, bool);
292 static int arm_cortex_a5_branch_cost (bool, bool);
293 static int arm_cortex_m_branch_cost (bool, bool);
294 static int arm_cortex_m7_branch_cost (bool, bool);
295
296 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
297 const vec_perm_indices &);
298
299 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
300
301 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
302 tree vectype,
303 int misalign ATTRIBUTE_UNUSED);
304 static unsigned arm_add_stmt_cost (void *data, int count,
305 enum vect_cost_for_stmt kind,
306 struct _stmt_vec_info *stmt_info,
307 int misalign,
308 enum vect_cost_model_location where);
309
310 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
311 bool op0_preserve_value);
312 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
313
314 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
315 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
316 const_tree);
317 static section *arm_function_section (tree, enum node_frequency, bool, bool);
318 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
319 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
320 int reloc);
321 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
322 static opt_scalar_float_mode arm_floatn_mode (int, bool);
323 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
324 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
325 static bool arm_modes_tieable_p (machine_mode, machine_mode);
326 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
327 \f
328 /* Table of machine attributes. */
329 static const struct attribute_spec arm_attribute_table[] =
330 {
331 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
332 affects_type_identity, handler, exclude } */
333 /* Function calls made to this symbol must be done indirectly, because
334 it may lie outside of the 26 bit addressing range of a normal function
335 call. */
336 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
337 /* Whereas these functions are always known to reside within the 26 bit
338 addressing range. */
339 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
340 /* Specify the procedure call conventions for a function. */
341 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
342 NULL },
343 /* Interrupt Service Routines have special prologue and epilogue requirements. */
344 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
345 NULL },
346 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
347 NULL },
348 { "naked", 0, 0, true, false, false, false,
349 arm_handle_fndecl_attribute, NULL },
350 #ifdef ARM_PE
351 /* ARM/PE has three new attributes:
352 interfacearm - ?
353 dllexport - for exporting a function/variable that will live in a dll
354 dllimport - for importing a function/variable from a dll
355
356 Microsoft allows multiple declspecs in one __declspec, separating
357 them with spaces. We do NOT support this. Instead, use __declspec
358 multiple times.
359 */
360 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
361 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
362 { "interfacearm", 0, 0, true, false, false, false,
363 arm_handle_fndecl_attribute, NULL },
364 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
365 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
366 NULL },
367 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
368 NULL },
369 { "notshared", 0, 0, false, true, false, false,
370 arm_handle_notshared_attribute, NULL },
371 #endif
372 /* ARMv8-M Security Extensions support. */
373 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
374 arm_handle_cmse_nonsecure_entry, NULL },
375 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
376 arm_handle_cmse_nonsecure_call, NULL },
377 { NULL, 0, 0, false, false, false, false, NULL, NULL }
378 };
379 \f
380 /* Initialize the GCC target structure. */
381 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
382 #undef TARGET_MERGE_DECL_ATTRIBUTES
383 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
384 #endif
385
386 #undef TARGET_LEGITIMIZE_ADDRESS
387 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
388
389 #undef TARGET_ATTRIBUTE_TABLE
390 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
391
392 #undef TARGET_INSERT_ATTRIBUTES
393 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
394
395 #undef TARGET_ASM_FILE_START
396 #define TARGET_ASM_FILE_START arm_file_start
397 #undef TARGET_ASM_FILE_END
398 #define TARGET_ASM_FILE_END arm_file_end
399
400 #undef TARGET_ASM_ALIGNED_SI_OP
401 #define TARGET_ASM_ALIGNED_SI_OP NULL
402 #undef TARGET_ASM_INTEGER
403 #define TARGET_ASM_INTEGER arm_assemble_integer
404
405 #undef TARGET_PRINT_OPERAND
406 #define TARGET_PRINT_OPERAND arm_print_operand
407 #undef TARGET_PRINT_OPERAND_ADDRESS
408 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
409 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
410 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
411
412 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
413 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
414
415 #undef TARGET_ASM_FUNCTION_PROLOGUE
416 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
417
418 #undef TARGET_ASM_FUNCTION_EPILOGUE
419 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
420
421 #undef TARGET_CAN_INLINE_P
422 #define TARGET_CAN_INLINE_P arm_can_inline_p
423
424 #undef TARGET_RELAYOUT_FUNCTION
425 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
426
427 #undef TARGET_OPTION_OVERRIDE
428 #define TARGET_OPTION_OVERRIDE arm_option_override
429
430 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
431 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
432
433 #undef TARGET_OPTION_SAVE
434 #define TARGET_OPTION_SAVE arm_option_save
435
436 #undef TARGET_OPTION_RESTORE
437 #define TARGET_OPTION_RESTORE arm_option_restore
438
439 #undef TARGET_OPTION_PRINT
440 #define TARGET_OPTION_PRINT arm_option_print
441
442 #undef TARGET_COMP_TYPE_ATTRIBUTES
443 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
444
445 #undef TARGET_SCHED_CAN_SPECULATE_INSN
446 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
447
448 #undef TARGET_SCHED_MACRO_FUSION_P
449 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
450
451 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
452 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
453
454 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
455 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
456
457 #undef TARGET_SCHED_ADJUST_COST
458 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
459
460 #undef TARGET_SET_CURRENT_FUNCTION
461 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
462
463 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
464 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
465
466 #undef TARGET_SCHED_REORDER
467 #define TARGET_SCHED_REORDER arm_sched_reorder
468
469 #undef TARGET_REGISTER_MOVE_COST
470 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
471
472 #undef TARGET_MEMORY_MOVE_COST
473 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
474
475 #undef TARGET_ENCODE_SECTION_INFO
476 #ifdef ARM_PE
477 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
478 #else
479 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
480 #endif
481
482 #undef TARGET_STRIP_NAME_ENCODING
483 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
484
485 #undef TARGET_ASM_INTERNAL_LABEL
486 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
487
488 #undef TARGET_FLOATN_MODE
489 #define TARGET_FLOATN_MODE arm_floatn_mode
490
491 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
492 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
493
494 #undef TARGET_FUNCTION_VALUE
495 #define TARGET_FUNCTION_VALUE arm_function_value
496
497 #undef TARGET_LIBCALL_VALUE
498 #define TARGET_LIBCALL_VALUE arm_libcall_value
499
500 #undef TARGET_FUNCTION_VALUE_REGNO_P
501 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
502
503 #undef TARGET_ASM_OUTPUT_MI_THUNK
504 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
505 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
506 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
507
508 #undef TARGET_RTX_COSTS
509 #define TARGET_RTX_COSTS arm_rtx_costs
510 #undef TARGET_ADDRESS_COST
511 #define TARGET_ADDRESS_COST arm_address_cost
512
513 #undef TARGET_SHIFT_TRUNCATION_MASK
514 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
515 #undef TARGET_VECTOR_MODE_SUPPORTED_P
516 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
517 #undef TARGET_ARRAY_MODE_SUPPORTED_P
518 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
519 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
520 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
521 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
522 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
523 arm_autovectorize_vector_sizes
524
525 #undef TARGET_MACHINE_DEPENDENT_REORG
526 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
527
528 #undef TARGET_INIT_BUILTINS
529 #define TARGET_INIT_BUILTINS arm_init_builtins
530 #undef TARGET_EXPAND_BUILTIN
531 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
532 #undef TARGET_BUILTIN_DECL
533 #define TARGET_BUILTIN_DECL arm_builtin_decl
534
535 #undef TARGET_INIT_LIBFUNCS
536 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
537
538 #undef TARGET_PROMOTE_FUNCTION_MODE
539 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
540 #undef TARGET_PROMOTE_PROTOTYPES
541 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
542 #undef TARGET_PASS_BY_REFERENCE
543 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
544 #undef TARGET_ARG_PARTIAL_BYTES
545 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
546 #undef TARGET_FUNCTION_ARG
547 #define TARGET_FUNCTION_ARG arm_function_arg
548 #undef TARGET_FUNCTION_ARG_ADVANCE
549 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
550 #undef TARGET_FUNCTION_ARG_PADDING
551 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
552 #undef TARGET_FUNCTION_ARG_BOUNDARY
553 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
554
555 #undef TARGET_SETUP_INCOMING_VARARGS
556 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
557
558 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
559 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
560
561 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
562 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
563 #undef TARGET_TRAMPOLINE_INIT
564 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
565 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
566 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
567
568 #undef TARGET_WARN_FUNC_RETURN
569 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
570
571 #undef TARGET_DEFAULT_SHORT_ENUMS
572 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
573
574 #undef TARGET_ALIGN_ANON_BITFIELD
575 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
576
577 #undef TARGET_NARROW_VOLATILE_BITFIELD
578 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
579
580 #undef TARGET_CXX_GUARD_TYPE
581 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
582
583 #undef TARGET_CXX_GUARD_MASK_BIT
584 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
585
586 #undef TARGET_CXX_GET_COOKIE_SIZE
587 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
588
589 #undef TARGET_CXX_COOKIE_HAS_SIZE
590 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
591
592 #undef TARGET_CXX_CDTOR_RETURNS_THIS
593 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
594
595 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
596 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
597
598 #undef TARGET_CXX_USE_AEABI_ATEXIT
599 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
600
601 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
602 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
603 arm_cxx_determine_class_data_visibility
604
605 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
606 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
607
608 #undef TARGET_RETURN_IN_MSB
609 #define TARGET_RETURN_IN_MSB arm_return_in_msb
610
611 #undef TARGET_RETURN_IN_MEMORY
612 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
613
614 #undef TARGET_MUST_PASS_IN_STACK
615 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
616
617 #if ARM_UNWIND_INFO
618 #undef TARGET_ASM_UNWIND_EMIT
619 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
620
621 /* EABI unwinding tables use a different format for the typeinfo tables. */
622 #undef TARGET_ASM_TTYPE
623 #define TARGET_ASM_TTYPE arm_output_ttype
624
625 #undef TARGET_ARM_EABI_UNWINDER
626 #define TARGET_ARM_EABI_UNWINDER true
627
628 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
629 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
630
631 #endif /* ARM_UNWIND_INFO */
632
633 #undef TARGET_ASM_INIT_SECTIONS
634 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
635
636 #undef TARGET_DWARF_REGISTER_SPAN
637 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
638
639 #undef TARGET_CANNOT_COPY_INSN_P
640 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
641
642 #ifdef HAVE_AS_TLS
643 #undef TARGET_HAVE_TLS
644 #define TARGET_HAVE_TLS true
645 #endif
646
647 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
648 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
649
650 #undef TARGET_LEGITIMATE_CONSTANT_P
651 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
652
653 #undef TARGET_CANNOT_FORCE_CONST_MEM
654 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
655
656 #undef TARGET_MAX_ANCHOR_OFFSET
657 #define TARGET_MAX_ANCHOR_OFFSET 4095
658
659 /* The minimum is set such that the total size of the block
660 for a particular anchor is -4088 + 1 + 4095 bytes, which is
661 divisible by eight, ensuring natural spacing of anchors. */
662 #undef TARGET_MIN_ANCHOR_OFFSET
663 #define TARGET_MIN_ANCHOR_OFFSET -4088
664
665 #undef TARGET_SCHED_ISSUE_RATE
666 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
667
668 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
669 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
670 arm_first_cycle_multipass_dfa_lookahead
671
672 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
673 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
674 arm_first_cycle_multipass_dfa_lookahead_guard
675
676 #undef TARGET_MANGLE_TYPE
677 #define TARGET_MANGLE_TYPE arm_mangle_type
678
679 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
680 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
681
682 #undef TARGET_BUILD_BUILTIN_VA_LIST
683 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
684 #undef TARGET_EXPAND_BUILTIN_VA_START
685 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
686 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
687 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
688
689 #ifdef HAVE_AS_TLS
690 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
691 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
692 #endif
693
694 #undef TARGET_LEGITIMATE_ADDRESS_P
695 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
696
697 #undef TARGET_PREFERRED_RELOAD_CLASS
698 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
699
700 #undef TARGET_PROMOTED_TYPE
701 #define TARGET_PROMOTED_TYPE arm_promoted_type
702
703 #undef TARGET_SCALAR_MODE_SUPPORTED_P
704 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
705
706 #undef TARGET_COMPUTE_FRAME_LAYOUT
707 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
708
709 #undef TARGET_FRAME_POINTER_REQUIRED
710 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
711
712 #undef TARGET_CAN_ELIMINATE
713 #define TARGET_CAN_ELIMINATE arm_can_eliminate
714
715 #undef TARGET_CONDITIONAL_REGISTER_USAGE
716 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
717
718 #undef TARGET_CLASS_LIKELY_SPILLED_P
719 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
720
721 #undef TARGET_VECTORIZE_BUILTINS
722 #define TARGET_VECTORIZE_BUILTINS
723
724 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
725 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
726 arm_builtin_vectorized_function
727
728 #undef TARGET_VECTOR_ALIGNMENT
729 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
730
731 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
732 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
733 arm_vector_alignment_reachable
734
735 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
736 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
737 arm_builtin_support_vector_misalignment
738
739 #undef TARGET_PREFERRED_RENAME_CLASS
740 #define TARGET_PREFERRED_RENAME_CLASS \
741 arm_preferred_rename_class
742
743 #undef TARGET_VECTORIZE_VEC_PERM_CONST
744 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
745
746 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
747 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
748 arm_builtin_vectorization_cost
749 #undef TARGET_VECTORIZE_ADD_STMT_COST
750 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
751
752 #undef TARGET_CANONICALIZE_COMPARISON
753 #define TARGET_CANONICALIZE_COMPARISON \
754 arm_canonicalize_comparison
755
756 #undef TARGET_ASAN_SHADOW_OFFSET
757 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
758
759 #undef MAX_INSN_PER_IT_BLOCK
760 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
761
762 #undef TARGET_CAN_USE_DOLOOP_P
763 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
764
765 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
766 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
767
768 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
769 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
770
771 #undef TARGET_SCHED_FUSION_PRIORITY
772 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
773
774 #undef TARGET_ASM_FUNCTION_SECTION
775 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
776
777 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
778 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
779
780 #undef TARGET_SECTION_TYPE_FLAGS
781 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
782
783 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
784 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
785
786 #undef TARGET_C_EXCESS_PRECISION
787 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
788
789 /* Although the architecture reserves bits 0 and 1, only the former is
790 used for ARM/Thumb ISA selection in v7 and earlier versions. */
791 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
792 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
793
794 #undef TARGET_FIXED_CONDITION_CODE_REGS
795 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
796
797 #undef TARGET_HARD_REGNO_NREGS
798 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
799 #undef TARGET_HARD_REGNO_MODE_OK
800 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
801
802 #undef TARGET_MODES_TIEABLE_P
803 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
804
805 #undef TARGET_CAN_CHANGE_MODE_CLASS
806 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
807
808 #undef TARGET_CONSTANT_ALIGNMENT
809 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
810 \f
811 /* Obstack for minipool constant handling. */
812 static struct obstack minipool_obstack;
813 static char * minipool_startobj;
814
815 /* The maximum number of insns skipped which
816 will be conditionalised if possible. */
817 static int max_insns_skipped = 5;
818
819 extern FILE * asm_out_file;
820
821 /* True if we are currently building a constant table. */
822 int making_const_table;
823
824 /* The processor for which instructions should be scheduled. */
825 enum processor_type arm_tune = TARGET_CPU_arm_none;
826
827 /* The current tuning set. */
828 const struct tune_params *current_tune;
829
830 /* Which floating point hardware to schedule for. */
831 int arm_fpu_attr;
832
833 /* Used for Thumb call_via trampolines. */
834 rtx thumb_call_via_label[14];
835 static int thumb_call_reg_needed;
836
837 /* The bits in this mask specify which instruction scheduling options should
838 be used. */
839 unsigned int tune_flags = 0;
840
841 /* The highest ARM architecture version supported by the
842 target. */
843 enum base_architecture arm_base_arch = BASE_ARCH_0;
844
845 /* Active target architecture and tuning. */
846
847 struct arm_build_target arm_active_target;
848
849 /* The following are used in the arm.md file as equivalents to bits
850 in the above two flag variables. */
851
852 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
853 int arm_arch4 = 0;
854
855 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
856 int arm_arch4t = 0;
857
858 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
859 int arm_arch5t = 0;
860
861 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
862 int arm_arch5te = 0;
863
864 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
865 int arm_arch6 = 0;
866
867 /* Nonzero if this chip supports the ARM 6K extensions. */
868 int arm_arch6k = 0;
869
870 /* Nonzero if this chip supports the ARM 6KZ extensions. */
871 int arm_arch6kz = 0;
872
873 /* Nonzero if instructions present in ARMv6-M can be used. */
874 int arm_arch6m = 0;
875
876 /* Nonzero if this chip supports the ARM 7 extensions. */
877 int arm_arch7 = 0;
878
879 /* Nonzero if this chip supports the Large Physical Address Extension. */
880 int arm_arch_lpae = 0;
881
882 /* Nonzero if instructions not present in the 'M' profile can be used. */
883 int arm_arch_notm = 0;
884
885 /* Nonzero if instructions present in ARMv7E-M can be used. */
886 int arm_arch7em = 0;
887
888 /* Nonzero if instructions present in ARMv8 can be used. */
889 int arm_arch8 = 0;
890
891 /* Nonzero if this chip supports the ARMv8.1 extensions. */
892 int arm_arch8_1 = 0;
893
894 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
895 int arm_arch8_2 = 0;
896
897 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
898 int arm_arch8_3 = 0;
899
900 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
901 int arm_arch8_4 = 0;
902
903 /* Nonzero if this chip supports the FP16 instructions extension of ARM
904 Architecture 8.2. */
905 int arm_fp16_inst = 0;
906
907 /* Nonzero if this chip can benefit from load scheduling. */
908 int arm_ld_sched = 0;
909
910 /* Nonzero if this chip is a StrongARM. */
911 int arm_tune_strongarm = 0;
912
913 /* Nonzero if this chip supports Intel Wireless MMX technology. */
914 int arm_arch_iwmmxt = 0;
915
916 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
917 int arm_arch_iwmmxt2 = 0;
918
919 /* Nonzero if this chip is an XScale. */
920 int arm_arch_xscale = 0;
921
922 /* Nonzero if tuning for XScale */
923 int arm_tune_xscale = 0;
924
925 /* Nonzero if we want to tune for stores that access the write-buffer.
926 This typically means an ARM6 or ARM7 with MMU or MPU. */
927 int arm_tune_wbuf = 0;
928
929 /* Nonzero if tuning for Cortex-A9. */
930 int arm_tune_cortex_a9 = 0;
931
932 /* Nonzero if we should define __THUMB_INTERWORK__ in the
933 preprocessor.
934 XXX This is a bit of a hack, it's intended to help work around
935 problems in GLD which doesn't understand that armv5t code is
936 interworking clean. */
937 int arm_cpp_interwork = 0;
938
939 /* Nonzero if chip supports Thumb 1. */
940 int arm_arch_thumb1;
941
942 /* Nonzero if chip supports Thumb 2. */
943 int arm_arch_thumb2;
944
945 /* Nonzero if chip supports integer division instruction. */
946 int arm_arch_arm_hwdiv;
947 int arm_arch_thumb_hwdiv;
948
949 /* Nonzero if chip disallows volatile memory access in IT block. */
950 int arm_arch_no_volatile_ce;
951
952 /* Nonzero if we shouldn't use literal pools. */
953 bool arm_disable_literal_pool = false;
954
955 /* The register number to be used for the PIC offset register. */
956 unsigned arm_pic_register = INVALID_REGNUM;
957
958 enum arm_pcs arm_pcs_default;
959
960 /* For an explanation of these variables, see final_prescan_insn below. */
961 int arm_ccfsm_state;
962 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
963 enum arm_cond_code arm_current_cc;
964
965 rtx arm_target_insn;
966 int arm_target_label;
967 /* The number of conditionally executed insns, including the current insn. */
968 int arm_condexec_count = 0;
969 /* A bitmask specifying the patterns for the IT block.
970 Zero means do not output an IT block before this insn. */
971 int arm_condexec_mask = 0;
972 /* The number of bits used in arm_condexec_mask. */
973 int arm_condexec_masklen = 0;
974
975 /* Nonzero if chip supports the ARMv8 CRC instructions. */
976 int arm_arch_crc = 0;
977
978 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
979 int arm_arch_dotprod = 0;
980
981 /* Nonzero if chip supports the ARMv8-M security extensions. */
982 int arm_arch_cmse = 0;
983
984 /* Nonzero if the core has a very small, high-latency, multiply unit. */
985 int arm_m_profile_small_mul = 0;
986
987 /* The condition codes of the ARM, and the inverse function. */
988 static const char * const arm_condition_codes[] =
989 {
990 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
991 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
992 };
993
994 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
995 int arm_regs_in_sequence[] =
996 {
997 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
998 };
999
1000 #define ARM_LSL_NAME "lsl"
1001 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1002
1003 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1004 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1005 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1006 \f
1007 /* Initialization code. */
1008
1009 struct cpu_tune
1010 {
1011 enum processor_type scheduler;
1012 unsigned int tune_flags;
1013 const struct tune_params *tune;
1014 };
1015
1016 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1017 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1018 { \
1019 num_slots, \
1020 l1_size, \
1021 l1_line_size \
1022 }
1023
1024 /* arm generic vectorizer costs. */
1025 static const
1026 struct cpu_vec_costs arm_default_vec_cost = {
1027 1, /* scalar_stmt_cost. */
1028 1, /* scalar load_cost. */
1029 1, /* scalar_store_cost. */
1030 1, /* vec_stmt_cost. */
1031 1, /* vec_to_scalar_cost. */
1032 1, /* scalar_to_vec_cost. */
1033 1, /* vec_align_load_cost. */
1034 1, /* vec_unalign_load_cost. */
1035 1, /* vec_unalign_store_cost. */
1036 1, /* vec_store_cost. */
1037 3, /* cond_taken_branch_cost. */
1038 1, /* cond_not_taken_branch_cost. */
1039 };
1040
1041 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1042 #include "aarch-cost-tables.h"
1043
1044
1045
1046 const struct cpu_cost_table cortexa9_extra_costs =
1047 {
1048 /* ALU */
1049 {
1050 0, /* arith. */
1051 0, /* logical. */
1052 0, /* shift. */
1053 COSTS_N_INSNS (1), /* shift_reg. */
1054 COSTS_N_INSNS (1), /* arith_shift. */
1055 COSTS_N_INSNS (2), /* arith_shift_reg. */
1056 0, /* log_shift. */
1057 COSTS_N_INSNS (1), /* log_shift_reg. */
1058 COSTS_N_INSNS (1), /* extend. */
1059 COSTS_N_INSNS (2), /* extend_arith. */
1060 COSTS_N_INSNS (1), /* bfi. */
1061 COSTS_N_INSNS (1), /* bfx. */
1062 0, /* clz. */
1063 0, /* rev. */
1064 0, /* non_exec. */
1065 true /* non_exec_costs_exec. */
1066 },
1067 {
1068 /* MULT SImode */
1069 {
1070 COSTS_N_INSNS (3), /* simple. */
1071 COSTS_N_INSNS (3), /* flag_setting. */
1072 COSTS_N_INSNS (2), /* extend. */
1073 COSTS_N_INSNS (3), /* add. */
1074 COSTS_N_INSNS (2), /* extend_add. */
1075 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1076 },
1077 /* MULT DImode */
1078 {
1079 0, /* simple (N/A). */
1080 0, /* flag_setting (N/A). */
1081 COSTS_N_INSNS (4), /* extend. */
1082 0, /* add (N/A). */
1083 COSTS_N_INSNS (4), /* extend_add. */
1084 0 /* idiv (N/A). */
1085 }
1086 },
1087 /* LD/ST */
1088 {
1089 COSTS_N_INSNS (2), /* load. */
1090 COSTS_N_INSNS (2), /* load_sign_extend. */
1091 COSTS_N_INSNS (2), /* ldrd. */
1092 COSTS_N_INSNS (2), /* ldm_1st. */
1093 1, /* ldm_regs_per_insn_1st. */
1094 2, /* ldm_regs_per_insn_subsequent. */
1095 COSTS_N_INSNS (5), /* loadf. */
1096 COSTS_N_INSNS (5), /* loadd. */
1097 COSTS_N_INSNS (1), /* load_unaligned. */
1098 COSTS_N_INSNS (2), /* store. */
1099 COSTS_N_INSNS (2), /* strd. */
1100 COSTS_N_INSNS (2), /* stm_1st. */
1101 1, /* stm_regs_per_insn_1st. */
1102 2, /* stm_regs_per_insn_subsequent. */
1103 COSTS_N_INSNS (1), /* storef. */
1104 COSTS_N_INSNS (1), /* stored. */
1105 COSTS_N_INSNS (1), /* store_unaligned. */
1106 COSTS_N_INSNS (1), /* loadv. */
1107 COSTS_N_INSNS (1) /* storev. */
1108 },
1109 {
1110 /* FP SFmode */
1111 {
1112 COSTS_N_INSNS (14), /* div. */
1113 COSTS_N_INSNS (4), /* mult. */
1114 COSTS_N_INSNS (7), /* mult_addsub. */
1115 COSTS_N_INSNS (30), /* fma. */
1116 COSTS_N_INSNS (3), /* addsub. */
1117 COSTS_N_INSNS (1), /* fpconst. */
1118 COSTS_N_INSNS (1), /* neg. */
1119 COSTS_N_INSNS (3), /* compare. */
1120 COSTS_N_INSNS (3), /* widen. */
1121 COSTS_N_INSNS (3), /* narrow. */
1122 COSTS_N_INSNS (3), /* toint. */
1123 COSTS_N_INSNS (3), /* fromint. */
1124 COSTS_N_INSNS (3) /* roundint. */
1125 },
1126 /* FP DFmode */
1127 {
1128 COSTS_N_INSNS (24), /* div. */
1129 COSTS_N_INSNS (5), /* mult. */
1130 COSTS_N_INSNS (8), /* mult_addsub. */
1131 COSTS_N_INSNS (30), /* fma. */
1132 COSTS_N_INSNS (3), /* addsub. */
1133 COSTS_N_INSNS (1), /* fpconst. */
1134 COSTS_N_INSNS (1), /* neg. */
1135 COSTS_N_INSNS (3), /* compare. */
1136 COSTS_N_INSNS (3), /* widen. */
1137 COSTS_N_INSNS (3), /* narrow. */
1138 COSTS_N_INSNS (3), /* toint. */
1139 COSTS_N_INSNS (3), /* fromint. */
1140 COSTS_N_INSNS (3) /* roundint. */
1141 }
1142 },
1143 /* Vector */
1144 {
1145 COSTS_N_INSNS (1) /* alu. */
1146 }
1147 };
1148
1149 const struct cpu_cost_table cortexa8_extra_costs =
1150 {
1151 /* ALU */
1152 {
1153 0, /* arith. */
1154 0, /* logical. */
1155 COSTS_N_INSNS (1), /* shift. */
1156 0, /* shift_reg. */
1157 COSTS_N_INSNS (1), /* arith_shift. */
1158 0, /* arith_shift_reg. */
1159 COSTS_N_INSNS (1), /* log_shift. */
1160 0, /* log_shift_reg. */
1161 0, /* extend. */
1162 0, /* extend_arith. */
1163 0, /* bfi. */
1164 0, /* bfx. */
1165 0, /* clz. */
1166 0, /* rev. */
1167 0, /* non_exec. */
1168 true /* non_exec_costs_exec. */
1169 },
1170 {
1171 /* MULT SImode */
1172 {
1173 COSTS_N_INSNS (1), /* simple. */
1174 COSTS_N_INSNS (1), /* flag_setting. */
1175 COSTS_N_INSNS (1), /* extend. */
1176 COSTS_N_INSNS (1), /* add. */
1177 COSTS_N_INSNS (1), /* extend_add. */
1178 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1179 },
1180 /* MULT DImode */
1181 {
1182 0, /* simple (N/A). */
1183 0, /* flag_setting (N/A). */
1184 COSTS_N_INSNS (2), /* extend. */
1185 0, /* add (N/A). */
1186 COSTS_N_INSNS (2), /* extend_add. */
1187 0 /* idiv (N/A). */
1188 }
1189 },
1190 /* LD/ST */
1191 {
1192 COSTS_N_INSNS (1), /* load. */
1193 COSTS_N_INSNS (1), /* load_sign_extend. */
1194 COSTS_N_INSNS (1), /* ldrd. */
1195 COSTS_N_INSNS (1), /* ldm_1st. */
1196 1, /* ldm_regs_per_insn_1st. */
1197 2, /* ldm_regs_per_insn_subsequent. */
1198 COSTS_N_INSNS (1), /* loadf. */
1199 COSTS_N_INSNS (1), /* loadd. */
1200 COSTS_N_INSNS (1), /* load_unaligned. */
1201 COSTS_N_INSNS (1), /* store. */
1202 COSTS_N_INSNS (1), /* strd. */
1203 COSTS_N_INSNS (1), /* stm_1st. */
1204 1, /* stm_regs_per_insn_1st. */
1205 2, /* stm_regs_per_insn_subsequent. */
1206 COSTS_N_INSNS (1), /* storef. */
1207 COSTS_N_INSNS (1), /* stored. */
1208 COSTS_N_INSNS (1), /* store_unaligned. */
1209 COSTS_N_INSNS (1), /* loadv. */
1210 COSTS_N_INSNS (1) /* storev. */
1211 },
1212 {
1213 /* FP SFmode */
1214 {
1215 COSTS_N_INSNS (36), /* div. */
1216 COSTS_N_INSNS (11), /* mult. */
1217 COSTS_N_INSNS (20), /* mult_addsub. */
1218 COSTS_N_INSNS (30), /* fma. */
1219 COSTS_N_INSNS (9), /* addsub. */
1220 COSTS_N_INSNS (3), /* fpconst. */
1221 COSTS_N_INSNS (3), /* neg. */
1222 COSTS_N_INSNS (6), /* compare. */
1223 COSTS_N_INSNS (4), /* widen. */
1224 COSTS_N_INSNS (4), /* narrow. */
1225 COSTS_N_INSNS (8), /* toint. */
1226 COSTS_N_INSNS (8), /* fromint. */
1227 COSTS_N_INSNS (8) /* roundint. */
1228 },
1229 /* FP DFmode */
1230 {
1231 COSTS_N_INSNS (64), /* div. */
1232 COSTS_N_INSNS (16), /* mult. */
1233 COSTS_N_INSNS (25), /* mult_addsub. */
1234 COSTS_N_INSNS (30), /* fma. */
1235 COSTS_N_INSNS (9), /* addsub. */
1236 COSTS_N_INSNS (3), /* fpconst. */
1237 COSTS_N_INSNS (3), /* neg. */
1238 COSTS_N_INSNS (6), /* compare. */
1239 COSTS_N_INSNS (6), /* widen. */
1240 COSTS_N_INSNS (6), /* narrow. */
1241 COSTS_N_INSNS (8), /* toint. */
1242 COSTS_N_INSNS (8), /* fromint. */
1243 COSTS_N_INSNS (8) /* roundint. */
1244 }
1245 },
1246 /* Vector */
1247 {
1248 COSTS_N_INSNS (1) /* alu. */
1249 }
1250 };
1251
1252 const struct cpu_cost_table cortexa5_extra_costs =
1253 {
1254 /* ALU */
1255 {
1256 0, /* arith. */
1257 0, /* logical. */
1258 COSTS_N_INSNS (1), /* shift. */
1259 COSTS_N_INSNS (1), /* shift_reg. */
1260 COSTS_N_INSNS (1), /* arith_shift. */
1261 COSTS_N_INSNS (1), /* arith_shift_reg. */
1262 COSTS_N_INSNS (1), /* log_shift. */
1263 COSTS_N_INSNS (1), /* log_shift_reg. */
1264 COSTS_N_INSNS (1), /* extend. */
1265 COSTS_N_INSNS (1), /* extend_arith. */
1266 COSTS_N_INSNS (1), /* bfi. */
1267 COSTS_N_INSNS (1), /* bfx. */
1268 COSTS_N_INSNS (1), /* clz. */
1269 COSTS_N_INSNS (1), /* rev. */
1270 0, /* non_exec. */
1271 true /* non_exec_costs_exec. */
1272 },
1273
1274 {
1275 /* MULT SImode */
1276 {
1277 0, /* simple. */
1278 COSTS_N_INSNS (1), /* flag_setting. */
1279 COSTS_N_INSNS (1), /* extend. */
1280 COSTS_N_INSNS (1), /* add. */
1281 COSTS_N_INSNS (1), /* extend_add. */
1282 COSTS_N_INSNS (7) /* idiv. */
1283 },
1284 /* MULT DImode */
1285 {
1286 0, /* simple (N/A). */
1287 0, /* flag_setting (N/A). */
1288 COSTS_N_INSNS (1), /* extend. */
1289 0, /* add. */
1290 COSTS_N_INSNS (2), /* extend_add. */
1291 0 /* idiv (N/A). */
1292 }
1293 },
1294 /* LD/ST */
1295 {
1296 COSTS_N_INSNS (1), /* load. */
1297 COSTS_N_INSNS (1), /* load_sign_extend. */
1298 COSTS_N_INSNS (6), /* ldrd. */
1299 COSTS_N_INSNS (1), /* ldm_1st. */
1300 1, /* ldm_regs_per_insn_1st. */
1301 2, /* ldm_regs_per_insn_subsequent. */
1302 COSTS_N_INSNS (2), /* loadf. */
1303 COSTS_N_INSNS (4), /* loadd. */
1304 COSTS_N_INSNS (1), /* load_unaligned. */
1305 COSTS_N_INSNS (1), /* store. */
1306 COSTS_N_INSNS (3), /* strd. */
1307 COSTS_N_INSNS (1), /* stm_1st. */
1308 1, /* stm_regs_per_insn_1st. */
1309 2, /* stm_regs_per_insn_subsequent. */
1310 COSTS_N_INSNS (2), /* storef. */
1311 COSTS_N_INSNS (2), /* stored. */
1312 COSTS_N_INSNS (1), /* store_unaligned. */
1313 COSTS_N_INSNS (1), /* loadv. */
1314 COSTS_N_INSNS (1) /* storev. */
1315 },
1316 {
1317 /* FP SFmode */
1318 {
1319 COSTS_N_INSNS (15), /* div. */
1320 COSTS_N_INSNS (3), /* mult. */
1321 COSTS_N_INSNS (7), /* mult_addsub. */
1322 COSTS_N_INSNS (7), /* fma. */
1323 COSTS_N_INSNS (3), /* addsub. */
1324 COSTS_N_INSNS (3), /* fpconst. */
1325 COSTS_N_INSNS (3), /* neg. */
1326 COSTS_N_INSNS (3), /* compare. */
1327 COSTS_N_INSNS (3), /* widen. */
1328 COSTS_N_INSNS (3), /* narrow. */
1329 COSTS_N_INSNS (3), /* toint. */
1330 COSTS_N_INSNS (3), /* fromint. */
1331 COSTS_N_INSNS (3) /* roundint. */
1332 },
1333 /* FP DFmode */
1334 {
1335 COSTS_N_INSNS (30), /* div. */
1336 COSTS_N_INSNS (6), /* mult. */
1337 COSTS_N_INSNS (10), /* mult_addsub. */
1338 COSTS_N_INSNS (7), /* fma. */
1339 COSTS_N_INSNS (3), /* addsub. */
1340 COSTS_N_INSNS (3), /* fpconst. */
1341 COSTS_N_INSNS (3), /* neg. */
1342 COSTS_N_INSNS (3), /* compare. */
1343 COSTS_N_INSNS (3), /* widen. */
1344 COSTS_N_INSNS (3), /* narrow. */
1345 COSTS_N_INSNS (3), /* toint. */
1346 COSTS_N_INSNS (3), /* fromint. */
1347 COSTS_N_INSNS (3) /* roundint. */
1348 }
1349 },
1350 /* Vector */
1351 {
1352 COSTS_N_INSNS (1) /* alu. */
1353 }
1354 };
1355
1356
1357 const struct cpu_cost_table cortexa7_extra_costs =
1358 {
1359 /* ALU */
1360 {
1361 0, /* arith. */
1362 0, /* logical. */
1363 COSTS_N_INSNS (1), /* shift. */
1364 COSTS_N_INSNS (1), /* shift_reg. */
1365 COSTS_N_INSNS (1), /* arith_shift. */
1366 COSTS_N_INSNS (1), /* arith_shift_reg. */
1367 COSTS_N_INSNS (1), /* log_shift. */
1368 COSTS_N_INSNS (1), /* log_shift_reg. */
1369 COSTS_N_INSNS (1), /* extend. */
1370 COSTS_N_INSNS (1), /* extend_arith. */
1371 COSTS_N_INSNS (1), /* bfi. */
1372 COSTS_N_INSNS (1), /* bfx. */
1373 COSTS_N_INSNS (1), /* clz. */
1374 COSTS_N_INSNS (1), /* rev. */
1375 0, /* non_exec. */
1376 true /* non_exec_costs_exec. */
1377 },
1378
1379 {
1380 /* MULT SImode */
1381 {
1382 0, /* simple. */
1383 COSTS_N_INSNS (1), /* flag_setting. */
1384 COSTS_N_INSNS (1), /* extend. */
1385 COSTS_N_INSNS (1), /* add. */
1386 COSTS_N_INSNS (1), /* extend_add. */
1387 COSTS_N_INSNS (7) /* idiv. */
1388 },
1389 /* MULT DImode */
1390 {
1391 0, /* simple (N/A). */
1392 0, /* flag_setting (N/A). */
1393 COSTS_N_INSNS (1), /* extend. */
1394 0, /* add. */
1395 COSTS_N_INSNS (2), /* extend_add. */
1396 0 /* idiv (N/A). */
1397 }
1398 },
1399 /* LD/ST */
1400 {
1401 COSTS_N_INSNS (1), /* load. */
1402 COSTS_N_INSNS (1), /* load_sign_extend. */
1403 COSTS_N_INSNS (3), /* ldrd. */
1404 COSTS_N_INSNS (1), /* ldm_1st. */
1405 1, /* ldm_regs_per_insn_1st. */
1406 2, /* ldm_regs_per_insn_subsequent. */
1407 COSTS_N_INSNS (2), /* loadf. */
1408 COSTS_N_INSNS (2), /* loadd. */
1409 COSTS_N_INSNS (1), /* load_unaligned. */
1410 COSTS_N_INSNS (1), /* store. */
1411 COSTS_N_INSNS (3), /* strd. */
1412 COSTS_N_INSNS (1), /* stm_1st. */
1413 1, /* stm_regs_per_insn_1st. */
1414 2, /* stm_regs_per_insn_subsequent. */
1415 COSTS_N_INSNS (2), /* storef. */
1416 COSTS_N_INSNS (2), /* stored. */
1417 COSTS_N_INSNS (1), /* store_unaligned. */
1418 COSTS_N_INSNS (1), /* loadv. */
1419 COSTS_N_INSNS (1) /* storev. */
1420 },
1421 {
1422 /* FP SFmode */
1423 {
1424 COSTS_N_INSNS (15), /* div. */
1425 COSTS_N_INSNS (3), /* mult. */
1426 COSTS_N_INSNS (7), /* mult_addsub. */
1427 COSTS_N_INSNS (7), /* fma. */
1428 COSTS_N_INSNS (3), /* addsub. */
1429 COSTS_N_INSNS (3), /* fpconst. */
1430 COSTS_N_INSNS (3), /* neg. */
1431 COSTS_N_INSNS (3), /* compare. */
1432 COSTS_N_INSNS (3), /* widen. */
1433 COSTS_N_INSNS (3), /* narrow. */
1434 COSTS_N_INSNS (3), /* toint. */
1435 COSTS_N_INSNS (3), /* fromint. */
1436 COSTS_N_INSNS (3) /* roundint. */
1437 },
1438 /* FP DFmode */
1439 {
1440 COSTS_N_INSNS (30), /* div. */
1441 COSTS_N_INSNS (6), /* mult. */
1442 COSTS_N_INSNS (10), /* mult_addsub. */
1443 COSTS_N_INSNS (7), /* fma. */
1444 COSTS_N_INSNS (3), /* addsub. */
1445 COSTS_N_INSNS (3), /* fpconst. */
1446 COSTS_N_INSNS (3), /* neg. */
1447 COSTS_N_INSNS (3), /* compare. */
1448 COSTS_N_INSNS (3), /* widen. */
1449 COSTS_N_INSNS (3), /* narrow. */
1450 COSTS_N_INSNS (3), /* toint. */
1451 COSTS_N_INSNS (3), /* fromint. */
1452 COSTS_N_INSNS (3) /* roundint. */
1453 }
1454 },
1455 /* Vector */
1456 {
1457 COSTS_N_INSNS (1) /* alu. */
1458 }
1459 };
1460
1461 const struct cpu_cost_table cortexa12_extra_costs =
1462 {
1463 /* ALU */
1464 {
1465 0, /* arith. */
1466 0, /* logical. */
1467 0, /* shift. */
1468 COSTS_N_INSNS (1), /* shift_reg. */
1469 COSTS_N_INSNS (1), /* arith_shift. */
1470 COSTS_N_INSNS (1), /* arith_shift_reg. */
1471 COSTS_N_INSNS (1), /* log_shift. */
1472 COSTS_N_INSNS (1), /* log_shift_reg. */
1473 0, /* extend. */
1474 COSTS_N_INSNS (1), /* extend_arith. */
1475 0, /* bfi. */
1476 COSTS_N_INSNS (1), /* bfx. */
1477 COSTS_N_INSNS (1), /* clz. */
1478 COSTS_N_INSNS (1), /* rev. */
1479 0, /* non_exec. */
1480 true /* non_exec_costs_exec. */
1481 },
1482 /* MULT SImode */
1483 {
1484 {
1485 COSTS_N_INSNS (2), /* simple. */
1486 COSTS_N_INSNS (3), /* flag_setting. */
1487 COSTS_N_INSNS (2), /* extend. */
1488 COSTS_N_INSNS (3), /* add. */
1489 COSTS_N_INSNS (2), /* extend_add. */
1490 COSTS_N_INSNS (18) /* idiv. */
1491 },
1492 /* MULT DImode */
1493 {
1494 0, /* simple (N/A). */
1495 0, /* flag_setting (N/A). */
1496 COSTS_N_INSNS (3), /* extend. */
1497 0, /* add (N/A). */
1498 COSTS_N_INSNS (3), /* extend_add. */
1499 0 /* idiv (N/A). */
1500 }
1501 },
1502 /* LD/ST */
1503 {
1504 COSTS_N_INSNS (3), /* load. */
1505 COSTS_N_INSNS (3), /* load_sign_extend. */
1506 COSTS_N_INSNS (3), /* ldrd. */
1507 COSTS_N_INSNS (3), /* ldm_1st. */
1508 1, /* ldm_regs_per_insn_1st. */
1509 2, /* ldm_regs_per_insn_subsequent. */
1510 COSTS_N_INSNS (3), /* loadf. */
1511 COSTS_N_INSNS (3), /* loadd. */
1512 0, /* load_unaligned. */
1513 0, /* store. */
1514 0, /* strd. */
1515 0, /* stm_1st. */
1516 1, /* stm_regs_per_insn_1st. */
1517 2, /* stm_regs_per_insn_subsequent. */
1518 COSTS_N_INSNS (2), /* storef. */
1519 COSTS_N_INSNS (2), /* stored. */
1520 0, /* store_unaligned. */
1521 COSTS_N_INSNS (1), /* loadv. */
1522 COSTS_N_INSNS (1) /* storev. */
1523 },
1524 {
1525 /* FP SFmode */
1526 {
1527 COSTS_N_INSNS (17), /* div. */
1528 COSTS_N_INSNS (4), /* mult. */
1529 COSTS_N_INSNS (8), /* mult_addsub. */
1530 COSTS_N_INSNS (8), /* fma. */
1531 COSTS_N_INSNS (4), /* addsub. */
1532 COSTS_N_INSNS (2), /* fpconst. */
1533 COSTS_N_INSNS (2), /* neg. */
1534 COSTS_N_INSNS (2), /* compare. */
1535 COSTS_N_INSNS (4), /* widen. */
1536 COSTS_N_INSNS (4), /* narrow. */
1537 COSTS_N_INSNS (4), /* toint. */
1538 COSTS_N_INSNS (4), /* fromint. */
1539 COSTS_N_INSNS (4) /* roundint. */
1540 },
1541 /* FP DFmode */
1542 {
1543 COSTS_N_INSNS (31), /* div. */
1544 COSTS_N_INSNS (4), /* mult. */
1545 COSTS_N_INSNS (8), /* mult_addsub. */
1546 COSTS_N_INSNS (8), /* fma. */
1547 COSTS_N_INSNS (4), /* addsub. */
1548 COSTS_N_INSNS (2), /* fpconst. */
1549 COSTS_N_INSNS (2), /* neg. */
1550 COSTS_N_INSNS (2), /* compare. */
1551 COSTS_N_INSNS (4), /* widen. */
1552 COSTS_N_INSNS (4), /* narrow. */
1553 COSTS_N_INSNS (4), /* toint. */
1554 COSTS_N_INSNS (4), /* fromint. */
1555 COSTS_N_INSNS (4) /* roundint. */
1556 }
1557 },
1558 /* Vector */
1559 {
1560 COSTS_N_INSNS (1) /* alu. */
1561 }
1562 };
1563
1564 const struct cpu_cost_table cortexa15_extra_costs =
1565 {
1566 /* ALU */
1567 {
1568 0, /* arith. */
1569 0, /* logical. */
1570 0, /* shift. */
1571 0, /* shift_reg. */
1572 COSTS_N_INSNS (1), /* arith_shift. */
1573 COSTS_N_INSNS (1), /* arith_shift_reg. */
1574 COSTS_N_INSNS (1), /* log_shift. */
1575 COSTS_N_INSNS (1), /* log_shift_reg. */
1576 0, /* extend. */
1577 COSTS_N_INSNS (1), /* extend_arith. */
1578 COSTS_N_INSNS (1), /* bfi. */
1579 0, /* bfx. */
1580 0, /* clz. */
1581 0, /* rev. */
1582 0, /* non_exec. */
1583 true /* non_exec_costs_exec. */
1584 },
1585 /* MULT SImode */
1586 {
1587 {
1588 COSTS_N_INSNS (2), /* simple. */
1589 COSTS_N_INSNS (3), /* flag_setting. */
1590 COSTS_N_INSNS (2), /* extend. */
1591 COSTS_N_INSNS (2), /* add. */
1592 COSTS_N_INSNS (2), /* extend_add. */
1593 COSTS_N_INSNS (18) /* idiv. */
1594 },
1595 /* MULT DImode */
1596 {
1597 0, /* simple (N/A). */
1598 0, /* flag_setting (N/A). */
1599 COSTS_N_INSNS (3), /* extend. */
1600 0, /* add (N/A). */
1601 COSTS_N_INSNS (3), /* extend_add. */
1602 0 /* idiv (N/A). */
1603 }
1604 },
1605 /* LD/ST */
1606 {
1607 COSTS_N_INSNS (3), /* load. */
1608 COSTS_N_INSNS (3), /* load_sign_extend. */
1609 COSTS_N_INSNS (3), /* ldrd. */
1610 COSTS_N_INSNS (4), /* ldm_1st. */
1611 1, /* ldm_regs_per_insn_1st. */
1612 2, /* ldm_regs_per_insn_subsequent. */
1613 COSTS_N_INSNS (4), /* loadf. */
1614 COSTS_N_INSNS (4), /* loadd. */
1615 0, /* load_unaligned. */
1616 0, /* store. */
1617 0, /* strd. */
1618 COSTS_N_INSNS (1), /* stm_1st. */
1619 1, /* stm_regs_per_insn_1st. */
1620 2, /* stm_regs_per_insn_subsequent. */
1621 0, /* storef. */
1622 0, /* stored. */
1623 0, /* store_unaligned. */
1624 COSTS_N_INSNS (1), /* loadv. */
1625 COSTS_N_INSNS (1) /* storev. */
1626 },
1627 {
1628 /* FP SFmode */
1629 {
1630 COSTS_N_INSNS (17), /* div. */
1631 COSTS_N_INSNS (4), /* mult. */
1632 COSTS_N_INSNS (8), /* mult_addsub. */
1633 COSTS_N_INSNS (8), /* fma. */
1634 COSTS_N_INSNS (4), /* addsub. */
1635 COSTS_N_INSNS (2), /* fpconst. */
1636 COSTS_N_INSNS (2), /* neg. */
1637 COSTS_N_INSNS (5), /* compare. */
1638 COSTS_N_INSNS (4), /* widen. */
1639 COSTS_N_INSNS (4), /* narrow. */
1640 COSTS_N_INSNS (4), /* toint. */
1641 COSTS_N_INSNS (4), /* fromint. */
1642 COSTS_N_INSNS (4) /* roundint. */
1643 },
1644 /* FP DFmode */
1645 {
1646 COSTS_N_INSNS (31), /* div. */
1647 COSTS_N_INSNS (4), /* mult. */
1648 COSTS_N_INSNS (8), /* mult_addsub. */
1649 COSTS_N_INSNS (8), /* fma. */
1650 COSTS_N_INSNS (4), /* addsub. */
1651 COSTS_N_INSNS (2), /* fpconst. */
1652 COSTS_N_INSNS (2), /* neg. */
1653 COSTS_N_INSNS (2), /* compare. */
1654 COSTS_N_INSNS (4), /* widen. */
1655 COSTS_N_INSNS (4), /* narrow. */
1656 COSTS_N_INSNS (4), /* toint. */
1657 COSTS_N_INSNS (4), /* fromint. */
1658 COSTS_N_INSNS (4) /* roundint. */
1659 }
1660 },
1661 /* Vector */
1662 {
1663 COSTS_N_INSNS (1) /* alu. */
1664 }
1665 };
1666
1667 const struct cpu_cost_table v7m_extra_costs =
1668 {
1669 /* ALU */
1670 {
1671 0, /* arith. */
1672 0, /* logical. */
1673 0, /* shift. */
1674 0, /* shift_reg. */
1675 0, /* arith_shift. */
1676 COSTS_N_INSNS (1), /* arith_shift_reg. */
1677 0, /* log_shift. */
1678 COSTS_N_INSNS (1), /* log_shift_reg. */
1679 0, /* extend. */
1680 COSTS_N_INSNS (1), /* extend_arith. */
1681 0, /* bfi. */
1682 0, /* bfx. */
1683 0, /* clz. */
1684 0, /* rev. */
1685 COSTS_N_INSNS (1), /* non_exec. */
1686 false /* non_exec_costs_exec. */
1687 },
1688 {
1689 /* MULT SImode */
1690 {
1691 COSTS_N_INSNS (1), /* simple. */
1692 COSTS_N_INSNS (1), /* flag_setting. */
1693 COSTS_N_INSNS (2), /* extend. */
1694 COSTS_N_INSNS (1), /* add. */
1695 COSTS_N_INSNS (3), /* extend_add. */
1696 COSTS_N_INSNS (8) /* idiv. */
1697 },
1698 /* MULT DImode */
1699 {
1700 0, /* simple (N/A). */
1701 0, /* flag_setting (N/A). */
1702 COSTS_N_INSNS (2), /* extend. */
1703 0, /* add (N/A). */
1704 COSTS_N_INSNS (3), /* extend_add. */
1705 0 /* idiv (N/A). */
1706 }
1707 },
1708 /* LD/ST */
1709 {
1710 COSTS_N_INSNS (2), /* load. */
1711 0, /* load_sign_extend. */
1712 COSTS_N_INSNS (3), /* ldrd. */
1713 COSTS_N_INSNS (2), /* ldm_1st. */
1714 1, /* ldm_regs_per_insn_1st. */
1715 1, /* ldm_regs_per_insn_subsequent. */
1716 COSTS_N_INSNS (2), /* loadf. */
1717 COSTS_N_INSNS (3), /* loadd. */
1718 COSTS_N_INSNS (1), /* load_unaligned. */
1719 COSTS_N_INSNS (2), /* store. */
1720 COSTS_N_INSNS (3), /* strd. */
1721 COSTS_N_INSNS (2), /* stm_1st. */
1722 1, /* stm_regs_per_insn_1st. */
1723 1, /* stm_regs_per_insn_subsequent. */
1724 COSTS_N_INSNS (2), /* storef. */
1725 COSTS_N_INSNS (3), /* stored. */
1726 COSTS_N_INSNS (1), /* store_unaligned. */
1727 COSTS_N_INSNS (1), /* loadv. */
1728 COSTS_N_INSNS (1) /* storev. */
1729 },
1730 {
1731 /* FP SFmode */
1732 {
1733 COSTS_N_INSNS (7), /* div. */
1734 COSTS_N_INSNS (2), /* mult. */
1735 COSTS_N_INSNS (5), /* mult_addsub. */
1736 COSTS_N_INSNS (3), /* fma. */
1737 COSTS_N_INSNS (1), /* addsub. */
1738 0, /* fpconst. */
1739 0, /* neg. */
1740 0, /* compare. */
1741 0, /* widen. */
1742 0, /* narrow. */
1743 0, /* toint. */
1744 0, /* fromint. */
1745 0 /* roundint. */
1746 },
1747 /* FP DFmode */
1748 {
1749 COSTS_N_INSNS (15), /* div. */
1750 COSTS_N_INSNS (5), /* mult. */
1751 COSTS_N_INSNS (7), /* mult_addsub. */
1752 COSTS_N_INSNS (7), /* fma. */
1753 COSTS_N_INSNS (3), /* addsub. */
1754 0, /* fpconst. */
1755 0, /* neg. */
1756 0, /* compare. */
1757 0, /* widen. */
1758 0, /* narrow. */
1759 0, /* toint. */
1760 0, /* fromint. */
1761 0 /* roundint. */
1762 }
1763 },
1764 /* Vector */
1765 {
1766 COSTS_N_INSNS (1) /* alu. */
1767 }
1768 };
1769
1770 const struct addr_mode_cost_table generic_addr_mode_costs =
1771 {
1772 /* int. */
1773 {
1774 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1775 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1776 COSTS_N_INSNS (0) /* AMO_WB. */
1777 },
1778 /* float. */
1779 {
1780 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1781 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1782 COSTS_N_INSNS (0) /* AMO_WB. */
1783 },
1784 /* vector. */
1785 {
1786 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1787 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1788 COSTS_N_INSNS (0) /* AMO_WB. */
1789 }
1790 };
1791
1792 const struct tune_params arm_slowmul_tune =
1793 {
1794 &generic_extra_costs, /* Insn extra costs. */
1795 &generic_addr_mode_costs, /* Addressing mode costs. */
1796 NULL, /* Sched adj cost. */
1797 arm_default_branch_cost,
1798 &arm_default_vec_cost,
1799 3, /* Constant limit. */
1800 5, /* Max cond insns. */
1801 8, /* Memset max inline. */
1802 1, /* Issue rate. */
1803 ARM_PREFETCH_NOT_BENEFICIAL,
1804 tune_params::PREF_CONST_POOL_TRUE,
1805 tune_params::PREF_LDRD_FALSE,
1806 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1807 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1808 tune_params::DISPARAGE_FLAGS_NEITHER,
1809 tune_params::PREF_NEON_STRINGOPS_FALSE,
1810 tune_params::FUSE_NOTHING,
1811 tune_params::SCHED_AUTOPREF_OFF
1812 };
1813
1814 const struct tune_params arm_fastmul_tune =
1815 {
1816 &generic_extra_costs, /* Insn extra costs. */
1817 &generic_addr_mode_costs, /* Addressing mode costs. */
1818 NULL, /* Sched adj cost. */
1819 arm_default_branch_cost,
1820 &arm_default_vec_cost,
1821 1, /* Constant limit. */
1822 5, /* Max cond insns. */
1823 8, /* Memset max inline. */
1824 1, /* Issue rate. */
1825 ARM_PREFETCH_NOT_BENEFICIAL,
1826 tune_params::PREF_CONST_POOL_TRUE,
1827 tune_params::PREF_LDRD_FALSE,
1828 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1829 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1830 tune_params::DISPARAGE_FLAGS_NEITHER,
1831 tune_params::PREF_NEON_STRINGOPS_FALSE,
1832 tune_params::FUSE_NOTHING,
1833 tune_params::SCHED_AUTOPREF_OFF
1834 };
1835
1836 /* StrongARM has early execution of branches, so a sequence that is worth
1837 skipping is shorter. Set max_insns_skipped to a lower value. */
1838
1839 const struct tune_params arm_strongarm_tune =
1840 {
1841 &generic_extra_costs, /* Insn extra costs. */
1842 &generic_addr_mode_costs, /* Addressing mode costs. */
1843 NULL, /* Sched adj cost. */
1844 arm_default_branch_cost,
1845 &arm_default_vec_cost,
1846 1, /* Constant limit. */
1847 3, /* Max cond insns. */
1848 8, /* Memset max inline. */
1849 1, /* Issue rate. */
1850 ARM_PREFETCH_NOT_BENEFICIAL,
1851 tune_params::PREF_CONST_POOL_TRUE,
1852 tune_params::PREF_LDRD_FALSE,
1853 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1854 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1855 tune_params::DISPARAGE_FLAGS_NEITHER,
1856 tune_params::PREF_NEON_STRINGOPS_FALSE,
1857 tune_params::FUSE_NOTHING,
1858 tune_params::SCHED_AUTOPREF_OFF
1859 };
1860
1861 const struct tune_params arm_xscale_tune =
1862 {
1863 &generic_extra_costs, /* Insn extra costs. */
1864 &generic_addr_mode_costs, /* Addressing mode costs. */
1865 xscale_sched_adjust_cost,
1866 arm_default_branch_cost,
1867 &arm_default_vec_cost,
1868 2, /* Constant limit. */
1869 3, /* Max cond insns. */
1870 8, /* Memset max inline. */
1871 1, /* Issue rate. */
1872 ARM_PREFETCH_NOT_BENEFICIAL,
1873 tune_params::PREF_CONST_POOL_TRUE,
1874 tune_params::PREF_LDRD_FALSE,
1875 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1876 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1877 tune_params::DISPARAGE_FLAGS_NEITHER,
1878 tune_params::PREF_NEON_STRINGOPS_FALSE,
1879 tune_params::FUSE_NOTHING,
1880 tune_params::SCHED_AUTOPREF_OFF
1881 };
1882
1883 const struct tune_params arm_9e_tune =
1884 {
1885 &generic_extra_costs, /* Insn extra costs. */
1886 &generic_addr_mode_costs, /* Addressing mode costs. */
1887 NULL, /* Sched adj cost. */
1888 arm_default_branch_cost,
1889 &arm_default_vec_cost,
1890 1, /* Constant limit. */
1891 5, /* Max cond insns. */
1892 8, /* Memset max inline. */
1893 1, /* Issue rate. */
1894 ARM_PREFETCH_NOT_BENEFICIAL,
1895 tune_params::PREF_CONST_POOL_TRUE,
1896 tune_params::PREF_LDRD_FALSE,
1897 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1898 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1899 tune_params::DISPARAGE_FLAGS_NEITHER,
1900 tune_params::PREF_NEON_STRINGOPS_FALSE,
1901 tune_params::FUSE_NOTHING,
1902 tune_params::SCHED_AUTOPREF_OFF
1903 };
1904
1905 const struct tune_params arm_marvell_pj4_tune =
1906 {
1907 &generic_extra_costs, /* Insn extra costs. */
1908 &generic_addr_mode_costs, /* Addressing mode costs. */
1909 NULL, /* Sched adj cost. */
1910 arm_default_branch_cost,
1911 &arm_default_vec_cost,
1912 1, /* Constant limit. */
1913 5, /* Max cond insns. */
1914 8, /* Memset max inline. */
1915 2, /* Issue rate. */
1916 ARM_PREFETCH_NOT_BENEFICIAL,
1917 tune_params::PREF_CONST_POOL_TRUE,
1918 tune_params::PREF_LDRD_FALSE,
1919 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1920 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1921 tune_params::DISPARAGE_FLAGS_NEITHER,
1922 tune_params::PREF_NEON_STRINGOPS_FALSE,
1923 tune_params::FUSE_NOTHING,
1924 tune_params::SCHED_AUTOPREF_OFF
1925 };
1926
1927 const struct tune_params arm_v6t2_tune =
1928 {
1929 &generic_extra_costs, /* Insn extra costs. */
1930 &generic_addr_mode_costs, /* Addressing mode costs. */
1931 NULL, /* Sched adj cost. */
1932 arm_default_branch_cost,
1933 &arm_default_vec_cost,
1934 1, /* Constant limit. */
1935 5, /* Max cond insns. */
1936 8, /* Memset max inline. */
1937 1, /* Issue rate. */
1938 ARM_PREFETCH_NOT_BENEFICIAL,
1939 tune_params::PREF_CONST_POOL_FALSE,
1940 tune_params::PREF_LDRD_FALSE,
1941 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1942 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1943 tune_params::DISPARAGE_FLAGS_NEITHER,
1944 tune_params::PREF_NEON_STRINGOPS_FALSE,
1945 tune_params::FUSE_NOTHING,
1946 tune_params::SCHED_AUTOPREF_OFF
1947 };
1948
1949
1950 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1951 const struct tune_params arm_cortex_tune =
1952 {
1953 &generic_extra_costs,
1954 &generic_addr_mode_costs, /* Addressing mode costs. */
1955 NULL, /* Sched adj cost. */
1956 arm_default_branch_cost,
1957 &arm_default_vec_cost,
1958 1, /* Constant limit. */
1959 5, /* Max cond insns. */
1960 8, /* Memset max inline. */
1961 2, /* Issue rate. */
1962 ARM_PREFETCH_NOT_BENEFICIAL,
1963 tune_params::PREF_CONST_POOL_FALSE,
1964 tune_params::PREF_LDRD_FALSE,
1965 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1966 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1967 tune_params::DISPARAGE_FLAGS_NEITHER,
1968 tune_params::PREF_NEON_STRINGOPS_FALSE,
1969 tune_params::FUSE_NOTHING,
1970 tune_params::SCHED_AUTOPREF_OFF
1971 };
1972
1973 const struct tune_params arm_cortex_a8_tune =
1974 {
1975 &cortexa8_extra_costs,
1976 &generic_addr_mode_costs, /* Addressing mode costs. */
1977 NULL, /* Sched adj cost. */
1978 arm_default_branch_cost,
1979 &arm_default_vec_cost,
1980 1, /* Constant limit. */
1981 5, /* Max cond insns. */
1982 8, /* Memset max inline. */
1983 2, /* Issue rate. */
1984 ARM_PREFETCH_NOT_BENEFICIAL,
1985 tune_params::PREF_CONST_POOL_FALSE,
1986 tune_params::PREF_LDRD_FALSE,
1987 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1988 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1989 tune_params::DISPARAGE_FLAGS_NEITHER,
1990 tune_params::PREF_NEON_STRINGOPS_TRUE,
1991 tune_params::FUSE_NOTHING,
1992 tune_params::SCHED_AUTOPREF_OFF
1993 };
1994
1995 const struct tune_params arm_cortex_a7_tune =
1996 {
1997 &cortexa7_extra_costs,
1998 &generic_addr_mode_costs, /* Addressing mode costs. */
1999 NULL, /* Sched adj cost. */
2000 arm_default_branch_cost,
2001 &arm_default_vec_cost,
2002 1, /* Constant limit. */
2003 5, /* Max cond insns. */
2004 8, /* Memset max inline. */
2005 2, /* Issue rate. */
2006 ARM_PREFETCH_NOT_BENEFICIAL,
2007 tune_params::PREF_CONST_POOL_FALSE,
2008 tune_params::PREF_LDRD_FALSE,
2009 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2010 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2011 tune_params::DISPARAGE_FLAGS_NEITHER,
2012 tune_params::PREF_NEON_STRINGOPS_TRUE,
2013 tune_params::FUSE_NOTHING,
2014 tune_params::SCHED_AUTOPREF_OFF
2015 };
2016
2017 const struct tune_params arm_cortex_a15_tune =
2018 {
2019 &cortexa15_extra_costs,
2020 &generic_addr_mode_costs, /* Addressing mode costs. */
2021 NULL, /* Sched adj cost. */
2022 arm_default_branch_cost,
2023 &arm_default_vec_cost,
2024 1, /* Constant limit. */
2025 2, /* Max cond insns. */
2026 8, /* Memset max inline. */
2027 3, /* Issue rate. */
2028 ARM_PREFETCH_NOT_BENEFICIAL,
2029 tune_params::PREF_CONST_POOL_FALSE,
2030 tune_params::PREF_LDRD_TRUE,
2031 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2032 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2033 tune_params::DISPARAGE_FLAGS_ALL,
2034 tune_params::PREF_NEON_STRINGOPS_TRUE,
2035 tune_params::FUSE_NOTHING,
2036 tune_params::SCHED_AUTOPREF_FULL
2037 };
2038
2039 const struct tune_params arm_cortex_a35_tune =
2040 {
2041 &cortexa53_extra_costs,
2042 &generic_addr_mode_costs, /* Addressing mode costs. */
2043 NULL, /* Sched adj cost. */
2044 arm_default_branch_cost,
2045 &arm_default_vec_cost,
2046 1, /* Constant limit. */
2047 5, /* Max cond insns. */
2048 8, /* Memset max inline. */
2049 1, /* Issue rate. */
2050 ARM_PREFETCH_NOT_BENEFICIAL,
2051 tune_params::PREF_CONST_POOL_FALSE,
2052 tune_params::PREF_LDRD_FALSE,
2053 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2054 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2055 tune_params::DISPARAGE_FLAGS_NEITHER,
2056 tune_params::PREF_NEON_STRINGOPS_TRUE,
2057 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2058 tune_params::SCHED_AUTOPREF_OFF
2059 };
2060
2061 const struct tune_params arm_cortex_a53_tune =
2062 {
2063 &cortexa53_extra_costs,
2064 &generic_addr_mode_costs, /* Addressing mode costs. */
2065 NULL, /* Sched adj cost. */
2066 arm_default_branch_cost,
2067 &arm_default_vec_cost,
2068 1, /* Constant limit. */
2069 5, /* Max cond insns. */
2070 8, /* Memset max inline. */
2071 2, /* Issue rate. */
2072 ARM_PREFETCH_NOT_BENEFICIAL,
2073 tune_params::PREF_CONST_POOL_FALSE,
2074 tune_params::PREF_LDRD_FALSE,
2075 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2076 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2077 tune_params::DISPARAGE_FLAGS_NEITHER,
2078 tune_params::PREF_NEON_STRINGOPS_TRUE,
2079 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2080 tune_params::SCHED_AUTOPREF_OFF
2081 };
2082
2083 const struct tune_params arm_cortex_a57_tune =
2084 {
2085 &cortexa57_extra_costs,
2086 &generic_addr_mode_costs, /* addressing mode costs */
2087 NULL, /* Sched adj cost. */
2088 arm_default_branch_cost,
2089 &arm_default_vec_cost,
2090 1, /* Constant limit. */
2091 2, /* Max cond insns. */
2092 8, /* Memset max inline. */
2093 3, /* Issue rate. */
2094 ARM_PREFETCH_NOT_BENEFICIAL,
2095 tune_params::PREF_CONST_POOL_FALSE,
2096 tune_params::PREF_LDRD_TRUE,
2097 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2098 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2099 tune_params::DISPARAGE_FLAGS_ALL,
2100 tune_params::PREF_NEON_STRINGOPS_TRUE,
2101 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2102 tune_params::SCHED_AUTOPREF_FULL
2103 };
2104
2105 const struct tune_params arm_exynosm1_tune =
2106 {
2107 &exynosm1_extra_costs,
2108 &generic_addr_mode_costs, /* Addressing mode costs. */
2109 NULL, /* Sched adj cost. */
2110 arm_default_branch_cost,
2111 &arm_default_vec_cost,
2112 1, /* Constant limit. */
2113 2, /* Max cond insns. */
2114 8, /* Memset max inline. */
2115 3, /* Issue rate. */
2116 ARM_PREFETCH_NOT_BENEFICIAL,
2117 tune_params::PREF_CONST_POOL_FALSE,
2118 tune_params::PREF_LDRD_TRUE,
2119 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2120 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2121 tune_params::DISPARAGE_FLAGS_ALL,
2122 tune_params::PREF_NEON_STRINGOPS_TRUE,
2123 tune_params::FUSE_NOTHING,
2124 tune_params::SCHED_AUTOPREF_OFF
2125 };
2126
2127 const struct tune_params arm_xgene1_tune =
2128 {
2129 &xgene1_extra_costs,
2130 &generic_addr_mode_costs, /* Addressing mode costs. */
2131 NULL, /* Sched adj cost. */
2132 arm_default_branch_cost,
2133 &arm_default_vec_cost,
2134 1, /* Constant limit. */
2135 2, /* Max cond insns. */
2136 32, /* Memset max inline. */
2137 4, /* Issue rate. */
2138 ARM_PREFETCH_NOT_BENEFICIAL,
2139 tune_params::PREF_CONST_POOL_FALSE,
2140 tune_params::PREF_LDRD_TRUE,
2141 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2142 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2143 tune_params::DISPARAGE_FLAGS_ALL,
2144 tune_params::PREF_NEON_STRINGOPS_FALSE,
2145 tune_params::FUSE_NOTHING,
2146 tune_params::SCHED_AUTOPREF_OFF
2147 };
2148
2149 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2150 less appealing. Set max_insns_skipped to a low value. */
2151
2152 const struct tune_params arm_cortex_a5_tune =
2153 {
2154 &cortexa5_extra_costs,
2155 &generic_addr_mode_costs, /* Addressing mode costs. */
2156 NULL, /* Sched adj cost. */
2157 arm_cortex_a5_branch_cost,
2158 &arm_default_vec_cost,
2159 1, /* Constant limit. */
2160 1, /* Max cond insns. */
2161 8, /* Memset max inline. */
2162 2, /* Issue rate. */
2163 ARM_PREFETCH_NOT_BENEFICIAL,
2164 tune_params::PREF_CONST_POOL_FALSE,
2165 tune_params::PREF_LDRD_FALSE,
2166 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2167 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2168 tune_params::DISPARAGE_FLAGS_NEITHER,
2169 tune_params::PREF_NEON_STRINGOPS_TRUE,
2170 tune_params::FUSE_NOTHING,
2171 tune_params::SCHED_AUTOPREF_OFF
2172 };
2173
2174 const struct tune_params arm_cortex_a9_tune =
2175 {
2176 &cortexa9_extra_costs,
2177 &generic_addr_mode_costs, /* Addressing mode costs. */
2178 cortex_a9_sched_adjust_cost,
2179 arm_default_branch_cost,
2180 &arm_default_vec_cost,
2181 1, /* Constant limit. */
2182 5, /* Max cond insns. */
2183 8, /* Memset max inline. */
2184 2, /* Issue rate. */
2185 ARM_PREFETCH_BENEFICIAL(4,32,32),
2186 tune_params::PREF_CONST_POOL_FALSE,
2187 tune_params::PREF_LDRD_FALSE,
2188 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2189 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2190 tune_params::DISPARAGE_FLAGS_NEITHER,
2191 tune_params::PREF_NEON_STRINGOPS_FALSE,
2192 tune_params::FUSE_NOTHING,
2193 tune_params::SCHED_AUTOPREF_OFF
2194 };
2195
2196 const struct tune_params arm_cortex_a12_tune =
2197 {
2198 &cortexa12_extra_costs,
2199 &generic_addr_mode_costs, /* Addressing mode costs. */
2200 NULL, /* Sched adj cost. */
2201 arm_default_branch_cost,
2202 &arm_default_vec_cost, /* Vectorizer costs. */
2203 1, /* Constant limit. */
2204 2, /* Max cond insns. */
2205 8, /* Memset max inline. */
2206 2, /* Issue rate. */
2207 ARM_PREFETCH_NOT_BENEFICIAL,
2208 tune_params::PREF_CONST_POOL_FALSE,
2209 tune_params::PREF_LDRD_TRUE,
2210 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2211 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2212 tune_params::DISPARAGE_FLAGS_ALL,
2213 tune_params::PREF_NEON_STRINGOPS_TRUE,
2214 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2215 tune_params::SCHED_AUTOPREF_OFF
2216 };
2217
2218 const struct tune_params arm_cortex_a73_tune =
2219 {
2220 &cortexa57_extra_costs,
2221 &generic_addr_mode_costs, /* Addressing mode costs. */
2222 NULL, /* Sched adj cost. */
2223 arm_default_branch_cost,
2224 &arm_default_vec_cost, /* Vectorizer costs. */
2225 1, /* Constant limit. */
2226 2, /* Max cond insns. */
2227 8, /* Memset max inline. */
2228 2, /* Issue rate. */
2229 ARM_PREFETCH_NOT_BENEFICIAL,
2230 tune_params::PREF_CONST_POOL_FALSE,
2231 tune_params::PREF_LDRD_TRUE,
2232 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2233 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2234 tune_params::DISPARAGE_FLAGS_ALL,
2235 tune_params::PREF_NEON_STRINGOPS_TRUE,
2236 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2237 tune_params::SCHED_AUTOPREF_FULL
2238 };
2239
2240 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2241 cycle to execute each. An LDR from the constant pool also takes two cycles
2242 to execute, but mildly increases pipelining opportunity (consecutive
2243 loads/stores can be pipelined together, saving one cycle), and may also
2244 improve icache utilisation. Hence we prefer the constant pool for such
2245 processors. */
2246
2247 const struct tune_params arm_v7m_tune =
2248 {
2249 &v7m_extra_costs,
2250 &generic_addr_mode_costs, /* Addressing mode costs. */
2251 NULL, /* Sched adj cost. */
2252 arm_cortex_m_branch_cost,
2253 &arm_default_vec_cost,
2254 1, /* Constant limit. */
2255 2, /* Max cond insns. */
2256 8, /* Memset max inline. */
2257 1, /* Issue rate. */
2258 ARM_PREFETCH_NOT_BENEFICIAL,
2259 tune_params::PREF_CONST_POOL_TRUE,
2260 tune_params::PREF_LDRD_FALSE,
2261 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2262 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2263 tune_params::DISPARAGE_FLAGS_NEITHER,
2264 tune_params::PREF_NEON_STRINGOPS_FALSE,
2265 tune_params::FUSE_NOTHING,
2266 tune_params::SCHED_AUTOPREF_OFF
2267 };
2268
2269 /* Cortex-M7 tuning. */
2270
2271 const struct tune_params arm_cortex_m7_tune =
2272 {
2273 &v7m_extra_costs,
2274 &generic_addr_mode_costs, /* Addressing mode costs. */
2275 NULL, /* Sched adj cost. */
2276 arm_cortex_m7_branch_cost,
2277 &arm_default_vec_cost,
2278 0, /* Constant limit. */
2279 1, /* Max cond insns. */
2280 8, /* Memset max inline. */
2281 2, /* Issue rate. */
2282 ARM_PREFETCH_NOT_BENEFICIAL,
2283 tune_params::PREF_CONST_POOL_TRUE,
2284 tune_params::PREF_LDRD_FALSE,
2285 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2286 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2287 tune_params::DISPARAGE_FLAGS_NEITHER,
2288 tune_params::PREF_NEON_STRINGOPS_FALSE,
2289 tune_params::FUSE_NOTHING,
2290 tune_params::SCHED_AUTOPREF_OFF
2291 };
2292
2293 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2294 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2295 cortex-m23. */
2296 const struct tune_params arm_v6m_tune =
2297 {
2298 &generic_extra_costs, /* Insn extra costs. */
2299 &generic_addr_mode_costs, /* Addressing mode costs. */
2300 NULL, /* Sched adj cost. */
2301 arm_default_branch_cost,
2302 &arm_default_vec_cost, /* Vectorizer costs. */
2303 1, /* Constant limit. */
2304 5, /* Max cond insns. */
2305 8, /* Memset max inline. */
2306 1, /* Issue rate. */
2307 ARM_PREFETCH_NOT_BENEFICIAL,
2308 tune_params::PREF_CONST_POOL_FALSE,
2309 tune_params::PREF_LDRD_FALSE,
2310 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2311 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2312 tune_params::DISPARAGE_FLAGS_NEITHER,
2313 tune_params::PREF_NEON_STRINGOPS_FALSE,
2314 tune_params::FUSE_NOTHING,
2315 tune_params::SCHED_AUTOPREF_OFF
2316 };
2317
2318 const struct tune_params arm_fa726te_tune =
2319 {
2320 &generic_extra_costs, /* Insn extra costs. */
2321 &generic_addr_mode_costs, /* Addressing mode costs. */
2322 fa726te_sched_adjust_cost,
2323 arm_default_branch_cost,
2324 &arm_default_vec_cost,
2325 1, /* Constant limit. */
2326 5, /* Max cond insns. */
2327 8, /* Memset max inline. */
2328 2, /* Issue rate. */
2329 ARM_PREFETCH_NOT_BENEFICIAL,
2330 tune_params::PREF_CONST_POOL_TRUE,
2331 tune_params::PREF_LDRD_FALSE,
2332 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2333 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2334 tune_params::DISPARAGE_FLAGS_NEITHER,
2335 tune_params::PREF_NEON_STRINGOPS_FALSE,
2336 tune_params::FUSE_NOTHING,
2337 tune_params::SCHED_AUTOPREF_OFF
2338 };
2339
2340 /* Auto-generated CPU, FPU and architecture tables. */
2341 #include "arm-cpu-data.h"
2342
2343 /* The name of the preprocessor macro to define for this architecture. PROFILE
2344 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2345 is thus chosen to be big enough to hold the longest architecture name. */
2346
2347 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2348
2349 /* Supported TLS relocations. */
2350
2351 enum tls_reloc {
2352 TLS_GD32,
2353 TLS_LDM32,
2354 TLS_LDO32,
2355 TLS_IE32,
2356 TLS_LE32,
2357 TLS_DESCSEQ /* GNU scheme */
2358 };
2359
2360 /* The maximum number of insns to be used when loading a constant. */
2361 inline static int
2362 arm_constant_limit (bool size_p)
2363 {
2364 return size_p ? 1 : current_tune->constant_limit;
2365 }
2366
2367 /* Emit an insn that's a simple single-set. Both the operands must be known
2368 to be valid. */
2369 inline static rtx_insn *
2370 emit_set_insn (rtx x, rtx y)
2371 {
2372 return emit_insn (gen_rtx_SET (x, y));
2373 }
2374
2375 /* Return the number of bits set in VALUE. */
2376 static unsigned
2377 bit_count (unsigned long value)
2378 {
2379 unsigned long count = 0;
2380
2381 while (value)
2382 {
2383 count++;
2384 value &= value - 1; /* Clear the least-significant set bit. */
2385 }
2386
2387 return count;
2388 }
2389
2390 /* Return the number of bits set in BMAP. */
2391 static unsigned
2392 bitmap_popcount (const sbitmap bmap)
2393 {
2394 unsigned int count = 0;
2395 unsigned int n = 0;
2396 sbitmap_iterator sbi;
2397
2398 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2399 count++;
2400 return count;
2401 }
2402
2403 typedef struct
2404 {
2405 machine_mode mode;
2406 const char *name;
2407 } arm_fixed_mode_set;
2408
2409 /* A small helper for setting fixed-point library libfuncs. */
2410
2411 static void
2412 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2413 const char *funcname, const char *modename,
2414 int num_suffix)
2415 {
2416 char buffer[50];
2417
2418 if (num_suffix == 0)
2419 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2420 else
2421 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2422
2423 set_optab_libfunc (optable, mode, buffer);
2424 }
2425
2426 static void
2427 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2428 machine_mode from, const char *funcname,
2429 const char *toname, const char *fromname)
2430 {
2431 char buffer[50];
2432 const char *maybe_suffix_2 = "";
2433
2434 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2435 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2436 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2437 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2438 maybe_suffix_2 = "2";
2439
2440 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2441 maybe_suffix_2);
2442
2443 set_conv_libfunc (optable, to, from, buffer);
2444 }
2445
2446 static GTY(()) rtx speculation_barrier_libfunc;
2447
2448 /* Set up library functions unique to ARM. */
2449 static void
2450 arm_init_libfuncs (void)
2451 {
2452 /* For Linux, we have access to kernel support for atomic operations. */
2453 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2454 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2455
2456 /* There are no special library functions unless we are using the
2457 ARM BPABI. */
2458 if (!TARGET_BPABI)
2459 return;
2460
2461 /* The functions below are described in Section 4 of the "Run-Time
2462 ABI for the ARM architecture", Version 1.0. */
2463
2464 /* Double-precision floating-point arithmetic. Table 2. */
2465 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2466 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2467 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2468 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2469 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2470
2471 /* Double-precision comparisons. Table 3. */
2472 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2473 set_optab_libfunc (ne_optab, DFmode, NULL);
2474 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2475 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2476 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2477 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2478 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2479
2480 /* Single-precision floating-point arithmetic. Table 4. */
2481 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2482 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2483 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2484 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2485 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2486
2487 /* Single-precision comparisons. Table 5. */
2488 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2489 set_optab_libfunc (ne_optab, SFmode, NULL);
2490 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2491 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2492 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2493 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2494 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2495
2496 /* Floating-point to integer conversions. Table 6. */
2497 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2498 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2499 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2500 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2501 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2502 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2503 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2504 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2505
2506 /* Conversions between floating types. Table 7. */
2507 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2508 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2509
2510 /* Integer to floating-point conversions. Table 8. */
2511 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2512 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2513 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2514 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2515 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2516 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2517 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2518 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2519
2520 /* Long long. Table 9. */
2521 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2522 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2523 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2524 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2525 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2526 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2527 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2528 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2529
2530 /* Integer (32/32->32) division. \S 4.3.1. */
2531 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2532 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2533
2534 /* The divmod functions are designed so that they can be used for
2535 plain division, even though they return both the quotient and the
2536 remainder. The quotient is returned in the usual location (i.e.,
2537 r0 for SImode, {r0, r1} for DImode), just as would be expected
2538 for an ordinary division routine. Because the AAPCS calling
2539 conventions specify that all of { r0, r1, r2, r3 } are
2540 callee-saved registers, there is no need to tell the compiler
2541 explicitly that those registers are clobbered by these
2542 routines. */
2543 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2544 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2545
2546 /* For SImode division the ABI provides div-without-mod routines,
2547 which are faster. */
2548 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2549 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2550
2551 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2552 divmod libcalls instead. */
2553 set_optab_libfunc (smod_optab, DImode, NULL);
2554 set_optab_libfunc (umod_optab, DImode, NULL);
2555 set_optab_libfunc (smod_optab, SImode, NULL);
2556 set_optab_libfunc (umod_optab, SImode, NULL);
2557
2558 /* Half-precision float operations. The compiler handles all operations
2559 with NULL libfuncs by converting the SFmode. */
2560 switch (arm_fp16_format)
2561 {
2562 case ARM_FP16_FORMAT_IEEE:
2563 case ARM_FP16_FORMAT_ALTERNATIVE:
2564
2565 /* Conversions. */
2566 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2567 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2568 ? "__gnu_f2h_ieee"
2569 : "__gnu_f2h_alternative"));
2570 set_conv_libfunc (sext_optab, SFmode, HFmode,
2571 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2572 ? "__gnu_h2f_ieee"
2573 : "__gnu_h2f_alternative"));
2574
2575 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2576 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2577 ? "__gnu_d2h_ieee"
2578 : "__gnu_d2h_alternative"));
2579
2580 /* Arithmetic. */
2581 set_optab_libfunc (add_optab, HFmode, NULL);
2582 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2583 set_optab_libfunc (smul_optab, HFmode, NULL);
2584 set_optab_libfunc (neg_optab, HFmode, NULL);
2585 set_optab_libfunc (sub_optab, HFmode, NULL);
2586
2587 /* Comparisons. */
2588 set_optab_libfunc (eq_optab, HFmode, NULL);
2589 set_optab_libfunc (ne_optab, HFmode, NULL);
2590 set_optab_libfunc (lt_optab, HFmode, NULL);
2591 set_optab_libfunc (le_optab, HFmode, NULL);
2592 set_optab_libfunc (ge_optab, HFmode, NULL);
2593 set_optab_libfunc (gt_optab, HFmode, NULL);
2594 set_optab_libfunc (unord_optab, HFmode, NULL);
2595 break;
2596
2597 default:
2598 break;
2599 }
2600
2601 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2602 {
2603 const arm_fixed_mode_set fixed_arith_modes[] =
2604 {
2605 { E_QQmode, "qq" },
2606 { E_UQQmode, "uqq" },
2607 { E_HQmode, "hq" },
2608 { E_UHQmode, "uhq" },
2609 { E_SQmode, "sq" },
2610 { E_USQmode, "usq" },
2611 { E_DQmode, "dq" },
2612 { E_UDQmode, "udq" },
2613 { E_TQmode, "tq" },
2614 { E_UTQmode, "utq" },
2615 { E_HAmode, "ha" },
2616 { E_UHAmode, "uha" },
2617 { E_SAmode, "sa" },
2618 { E_USAmode, "usa" },
2619 { E_DAmode, "da" },
2620 { E_UDAmode, "uda" },
2621 { E_TAmode, "ta" },
2622 { E_UTAmode, "uta" }
2623 };
2624 const arm_fixed_mode_set fixed_conv_modes[] =
2625 {
2626 { E_QQmode, "qq" },
2627 { E_UQQmode, "uqq" },
2628 { E_HQmode, "hq" },
2629 { E_UHQmode, "uhq" },
2630 { E_SQmode, "sq" },
2631 { E_USQmode, "usq" },
2632 { E_DQmode, "dq" },
2633 { E_UDQmode, "udq" },
2634 { E_TQmode, "tq" },
2635 { E_UTQmode, "utq" },
2636 { E_HAmode, "ha" },
2637 { E_UHAmode, "uha" },
2638 { E_SAmode, "sa" },
2639 { E_USAmode, "usa" },
2640 { E_DAmode, "da" },
2641 { E_UDAmode, "uda" },
2642 { E_TAmode, "ta" },
2643 { E_UTAmode, "uta" },
2644 { E_QImode, "qi" },
2645 { E_HImode, "hi" },
2646 { E_SImode, "si" },
2647 { E_DImode, "di" },
2648 { E_TImode, "ti" },
2649 { E_SFmode, "sf" },
2650 { E_DFmode, "df" }
2651 };
2652 unsigned int i, j;
2653
2654 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2655 {
2656 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2657 "add", fixed_arith_modes[i].name, 3);
2658 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2659 "ssadd", fixed_arith_modes[i].name, 3);
2660 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2661 "usadd", fixed_arith_modes[i].name, 3);
2662 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2663 "sub", fixed_arith_modes[i].name, 3);
2664 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2665 "sssub", fixed_arith_modes[i].name, 3);
2666 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2667 "ussub", fixed_arith_modes[i].name, 3);
2668 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2669 "mul", fixed_arith_modes[i].name, 3);
2670 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2671 "ssmul", fixed_arith_modes[i].name, 3);
2672 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2673 "usmul", fixed_arith_modes[i].name, 3);
2674 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2675 "div", fixed_arith_modes[i].name, 3);
2676 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2677 "udiv", fixed_arith_modes[i].name, 3);
2678 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2679 "ssdiv", fixed_arith_modes[i].name, 3);
2680 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2681 "usdiv", fixed_arith_modes[i].name, 3);
2682 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2683 "neg", fixed_arith_modes[i].name, 2);
2684 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2685 "ssneg", fixed_arith_modes[i].name, 2);
2686 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2687 "usneg", fixed_arith_modes[i].name, 2);
2688 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2689 "ashl", fixed_arith_modes[i].name, 3);
2690 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2691 "ashr", fixed_arith_modes[i].name, 3);
2692 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2693 "lshr", fixed_arith_modes[i].name, 3);
2694 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2695 "ssashl", fixed_arith_modes[i].name, 3);
2696 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2697 "usashl", fixed_arith_modes[i].name, 3);
2698 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2699 "cmp", fixed_arith_modes[i].name, 2);
2700 }
2701
2702 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2703 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2704 {
2705 if (i == j
2706 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2707 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2708 continue;
2709
2710 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2711 fixed_conv_modes[j].mode, "fract",
2712 fixed_conv_modes[i].name,
2713 fixed_conv_modes[j].name);
2714 arm_set_fixed_conv_libfunc (satfract_optab,
2715 fixed_conv_modes[i].mode,
2716 fixed_conv_modes[j].mode, "satfract",
2717 fixed_conv_modes[i].name,
2718 fixed_conv_modes[j].name);
2719 arm_set_fixed_conv_libfunc (fractuns_optab,
2720 fixed_conv_modes[i].mode,
2721 fixed_conv_modes[j].mode, "fractuns",
2722 fixed_conv_modes[i].name,
2723 fixed_conv_modes[j].name);
2724 arm_set_fixed_conv_libfunc (satfractuns_optab,
2725 fixed_conv_modes[i].mode,
2726 fixed_conv_modes[j].mode, "satfractuns",
2727 fixed_conv_modes[i].name,
2728 fixed_conv_modes[j].name);
2729 }
2730 }
2731
2732 if (TARGET_AAPCS_BASED)
2733 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2734
2735 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2736 }
2737
2738 /* On AAPCS systems, this is the "struct __va_list". */
2739 static GTY(()) tree va_list_type;
2740
2741 /* Return the type to use as __builtin_va_list. */
2742 static tree
2743 arm_build_builtin_va_list (void)
2744 {
2745 tree va_list_name;
2746 tree ap_field;
2747
2748 if (!TARGET_AAPCS_BASED)
2749 return std_build_builtin_va_list ();
2750
2751 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2752 defined as:
2753
2754 struct __va_list
2755 {
2756 void *__ap;
2757 };
2758
2759 The C Library ABI further reinforces this definition in \S
2760 4.1.
2761
2762 We must follow this definition exactly. The structure tag
2763 name is visible in C++ mangled names, and thus forms a part
2764 of the ABI. The field name may be used by people who
2765 #include <stdarg.h>. */
2766 /* Create the type. */
2767 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2768 /* Give it the required name. */
2769 va_list_name = build_decl (BUILTINS_LOCATION,
2770 TYPE_DECL,
2771 get_identifier ("__va_list"),
2772 va_list_type);
2773 DECL_ARTIFICIAL (va_list_name) = 1;
2774 TYPE_NAME (va_list_type) = va_list_name;
2775 TYPE_STUB_DECL (va_list_type) = va_list_name;
2776 /* Create the __ap field. */
2777 ap_field = build_decl (BUILTINS_LOCATION,
2778 FIELD_DECL,
2779 get_identifier ("__ap"),
2780 ptr_type_node);
2781 DECL_ARTIFICIAL (ap_field) = 1;
2782 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2783 TYPE_FIELDS (va_list_type) = ap_field;
2784 /* Compute its layout. */
2785 layout_type (va_list_type);
2786
2787 return va_list_type;
2788 }
2789
2790 /* Return an expression of type "void *" pointing to the next
2791 available argument in a variable-argument list. VALIST is the
2792 user-level va_list object, of type __builtin_va_list. */
2793 static tree
2794 arm_extract_valist_ptr (tree valist)
2795 {
2796 if (TREE_TYPE (valist) == error_mark_node)
2797 return error_mark_node;
2798
2799 /* On an AAPCS target, the pointer is stored within "struct
2800 va_list". */
2801 if (TARGET_AAPCS_BASED)
2802 {
2803 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2804 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2805 valist, ap_field, NULL_TREE);
2806 }
2807
2808 return valist;
2809 }
2810
2811 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2812 static void
2813 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2814 {
2815 valist = arm_extract_valist_ptr (valist);
2816 std_expand_builtin_va_start (valist, nextarg);
2817 }
2818
2819 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2820 static tree
2821 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2822 gimple_seq *post_p)
2823 {
2824 valist = arm_extract_valist_ptr (valist);
2825 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2826 }
2827
2828 /* Check any incompatible options that the user has specified. */
2829 static void
2830 arm_option_check_internal (struct gcc_options *opts)
2831 {
2832 int flags = opts->x_target_flags;
2833
2834 /* iWMMXt and NEON are incompatible. */
2835 if (TARGET_IWMMXT
2836 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2837 error ("iWMMXt and NEON are incompatible");
2838
2839 /* Make sure that the processor choice does not conflict with any of the
2840 other command line choices. */
2841 if (TARGET_ARM_P (flags)
2842 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2843 error ("target CPU does not support ARM mode");
2844
2845 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2846 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2847 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2848
2849 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2850 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2851
2852 /* If this target is normally configured to use APCS frames, warn if they
2853 are turned off and debugging is turned on. */
2854 if (TARGET_ARM_P (flags)
2855 && write_symbols != NO_DEBUG
2856 && !TARGET_APCS_FRAME
2857 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2858 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2859 "debugging");
2860
2861 /* iWMMXt unsupported under Thumb mode. */
2862 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2863 error ("iWMMXt unsupported under Thumb mode");
2864
2865 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2866 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2867
2868 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2869 {
2870 error ("RTP PIC is incompatible with Thumb");
2871 flag_pic = 0;
2872 }
2873
2874 if (target_pure_code || target_slow_flash_data)
2875 {
2876 const char *flag = (target_pure_code ? "-mpure-code" :
2877 "-mslow-flash-data");
2878
2879 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2880 with MOVT. */
2881 if (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON)
2882 error ("%s only supports non-pic code on M-profile targets with the "
2883 "MOVT instruction", flag);
2884
2885 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2886 -mword-relocations forbids relocation of MOVT/MOVW. */
2887 if (target_word_relocations)
2888 error ("%s incompatible with %<-mword-relocations%>", flag);
2889 }
2890 }
2891
2892 /* Recompute the global settings depending on target attribute options. */
2893
2894 static void
2895 arm_option_params_internal (void)
2896 {
2897 /* If we are not using the default (ARM mode) section anchor offset
2898 ranges, then set the correct ranges now. */
2899 if (TARGET_THUMB1)
2900 {
2901 /* Thumb-1 LDR instructions cannot have negative offsets.
2902 Permissible positive offset ranges are 5-bit (for byte loads),
2903 6-bit (for halfword loads), or 7-bit (for word loads).
2904 Empirical results suggest a 7-bit anchor range gives the best
2905 overall code size. */
2906 targetm.min_anchor_offset = 0;
2907 targetm.max_anchor_offset = 127;
2908 }
2909 else if (TARGET_THUMB2)
2910 {
2911 /* The minimum is set such that the total size of the block
2912 for a particular anchor is 248 + 1 + 4095 bytes, which is
2913 divisible by eight, ensuring natural spacing of anchors. */
2914 targetm.min_anchor_offset = -248;
2915 targetm.max_anchor_offset = 4095;
2916 }
2917 else
2918 {
2919 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2920 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2921 }
2922
2923 /* Increase the number of conditional instructions with -Os. */
2924 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2925
2926 /* For THUMB2, we limit the conditional sequence to one IT block. */
2927 if (TARGET_THUMB2)
2928 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2929 }
2930
2931 /* True if -mflip-thumb should next add an attribute for the default
2932 mode, false if it should next add an attribute for the opposite mode. */
2933 static GTY(()) bool thumb_flipper;
2934
2935 /* Options after initial target override. */
2936 static GTY(()) tree init_optimize;
2937
2938 static void
2939 arm_override_options_after_change_1 (struct gcc_options *opts)
2940 {
2941 /* -falign-functions without argument: supply one. */
2942 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
2943 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2944 && opts->x_optimize_size ? "2" : "4";
2945 }
2946
2947 /* Implement targetm.override_options_after_change. */
2948
2949 static void
2950 arm_override_options_after_change (void)
2951 {
2952 arm_configure_build_target (&arm_active_target,
2953 TREE_TARGET_OPTION (target_option_default_node),
2954 &global_options_set, false);
2955
2956 arm_override_options_after_change_1 (&global_options);
2957 }
2958
2959 /* Implement TARGET_OPTION_SAVE. */
2960 static void
2961 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2962 {
2963 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2964 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2965 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2966 }
2967
2968 /* Implement TARGET_OPTION_RESTORE. */
2969 static void
2970 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2971 {
2972 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2973 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2974 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2975 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2976 false);
2977 }
2978
2979 /* Reset options between modes that the user has specified. */
2980 static void
2981 arm_option_override_internal (struct gcc_options *opts,
2982 struct gcc_options *opts_set)
2983 {
2984 arm_override_options_after_change_1 (opts);
2985
2986 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2987 {
2988 /* The default is to enable interworking, so this warning message would
2989 be confusing to users who have just compiled with
2990 eg, -march=armv4. */
2991 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2992 opts->x_target_flags &= ~MASK_INTERWORK;
2993 }
2994
2995 if (TARGET_THUMB_P (opts->x_target_flags)
2996 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2997 {
2998 warning (0, "target CPU does not support THUMB instructions");
2999 opts->x_target_flags &= ~MASK_THUMB;
3000 }
3001
3002 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3003 {
3004 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3005 opts->x_target_flags &= ~MASK_APCS_FRAME;
3006 }
3007
3008 /* Callee super interworking implies thumb interworking. Adding
3009 this to the flags here simplifies the logic elsewhere. */
3010 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3011 opts->x_target_flags |= MASK_INTERWORK;
3012
3013 /* need to remember initial values so combinaisons of options like
3014 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3015 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3016
3017 if (! opts_set->x_arm_restrict_it)
3018 opts->x_arm_restrict_it = arm_arch8;
3019
3020 /* ARM execution state and M profile don't have [restrict] IT. */
3021 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3022 opts->x_arm_restrict_it = 0;
3023
3024 /* Enable -munaligned-access by default for
3025 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3026 i.e. Thumb2 and ARM state only.
3027 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3028 - ARMv8 architecture-base processors.
3029
3030 Disable -munaligned-access by default for
3031 - all pre-ARMv6 architecture-based processors
3032 - ARMv6-M architecture-based processors
3033 - ARMv8-M Baseline processors. */
3034
3035 if (! opts_set->x_unaligned_access)
3036 {
3037 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3038 && arm_arch6 && (arm_arch_notm || arm_arch7));
3039 }
3040 else if (opts->x_unaligned_access == 1
3041 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3042 {
3043 warning (0, "target CPU does not support unaligned accesses");
3044 opts->x_unaligned_access = 0;
3045 }
3046
3047 /* Don't warn since it's on by default in -O2. */
3048 if (TARGET_THUMB1_P (opts->x_target_flags))
3049 opts->x_flag_schedule_insns = 0;
3050 else
3051 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3052
3053 /* Disable shrink-wrap when optimizing function for size, since it tends to
3054 generate additional returns. */
3055 if (optimize_function_for_size_p (cfun)
3056 && TARGET_THUMB2_P (opts->x_target_flags))
3057 opts->x_flag_shrink_wrap = false;
3058 else
3059 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3060
3061 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3062 - epilogue_insns - does not accurately model the corresponding insns
3063 emitted in the asm file. In particular, see the comment in thumb_exit
3064 'Find out how many of the (return) argument registers we can corrupt'.
3065 As a consequence, the epilogue may clobber registers without fipa-ra
3066 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3067 TODO: Accurately model clobbers for epilogue_insns and reenable
3068 fipa-ra. */
3069 if (TARGET_THUMB1_P (opts->x_target_flags))
3070 opts->x_flag_ipa_ra = 0;
3071 else
3072 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3073
3074 /* Thumb2 inline assembly code should always use unified syntax.
3075 This will apply to ARM and Thumb1 eventually. */
3076 if (TARGET_THUMB2_P (opts->x_target_flags))
3077 opts->x_inline_asm_unified = true;
3078
3079 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3080 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3081 #endif
3082 }
3083
3084 static sbitmap isa_all_fpubits;
3085 static sbitmap isa_quirkbits;
3086
3087 /* Configure a build target TARGET from the user-specified options OPTS and
3088 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3089 architecture have been specified, but the two are not identical. */
3090 void
3091 arm_configure_build_target (struct arm_build_target *target,
3092 struct cl_target_option *opts,
3093 struct gcc_options *opts_set,
3094 bool warn_compatible)
3095 {
3096 const cpu_option *arm_selected_tune = NULL;
3097 const arch_option *arm_selected_arch = NULL;
3098 const cpu_option *arm_selected_cpu = NULL;
3099 const arm_fpu_desc *arm_selected_fpu = NULL;
3100 const char *tune_opts = NULL;
3101 const char *arch_opts = NULL;
3102 const char *cpu_opts = NULL;
3103
3104 bitmap_clear (target->isa);
3105 target->core_name = NULL;
3106 target->arch_name = NULL;
3107
3108 if (opts_set->x_arm_arch_string)
3109 {
3110 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3111 "-march",
3112 opts->x_arm_arch_string);
3113 arch_opts = strchr (opts->x_arm_arch_string, '+');
3114 }
3115
3116 if (opts_set->x_arm_cpu_string)
3117 {
3118 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3119 opts->x_arm_cpu_string);
3120 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3121 arm_selected_tune = arm_selected_cpu;
3122 /* If taking the tuning from -mcpu, we don't need to rescan the
3123 options for tuning. */
3124 }
3125
3126 if (opts_set->x_arm_tune_string)
3127 {
3128 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3129 opts->x_arm_tune_string);
3130 tune_opts = strchr (opts->x_arm_tune_string, '+');
3131 }
3132
3133 if (arm_selected_arch)
3134 {
3135 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3136 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3137 arch_opts);
3138
3139 if (arm_selected_cpu)
3140 {
3141 auto_sbitmap cpu_isa (isa_num_bits);
3142 auto_sbitmap isa_delta (isa_num_bits);
3143
3144 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3145 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3146 cpu_opts);
3147 bitmap_xor (isa_delta, cpu_isa, target->isa);
3148 /* Ignore any bits that are quirk bits. */
3149 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3150 /* Ignore (for now) any bits that might be set by -mfpu. */
3151 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3152
3153 if (!bitmap_empty_p (isa_delta))
3154 {
3155 if (warn_compatible)
3156 warning (0, "switch %<-mcpu=%s%> conflicts "
3157 "with %<-march=%s%> switch",
3158 arm_selected_cpu->common.name,
3159 arm_selected_arch->common.name);
3160 /* -march wins for code generation.
3161 -mcpu wins for default tuning. */
3162 if (!arm_selected_tune)
3163 arm_selected_tune = arm_selected_cpu;
3164
3165 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3166 target->arch_name = arm_selected_arch->common.name;
3167 }
3168 else
3169 {
3170 /* Architecture and CPU are essentially the same.
3171 Prefer the CPU setting. */
3172 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3173 target->core_name = arm_selected_cpu->common.name;
3174 /* Copy the CPU's capabilities, so that we inherit the
3175 appropriate extensions and quirks. */
3176 bitmap_copy (target->isa, cpu_isa);
3177 }
3178 }
3179 else
3180 {
3181 /* Pick a CPU based on the architecture. */
3182 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3183 target->arch_name = arm_selected_arch->common.name;
3184 /* Note: target->core_name is left unset in this path. */
3185 }
3186 }
3187 else if (arm_selected_cpu)
3188 {
3189 target->core_name = arm_selected_cpu->common.name;
3190 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3191 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3192 cpu_opts);
3193 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3194 }
3195 /* If the user did not specify a processor or architecture, choose
3196 one for them. */
3197 else
3198 {
3199 const cpu_option *sel;
3200 auto_sbitmap sought_isa (isa_num_bits);
3201 bitmap_clear (sought_isa);
3202 auto_sbitmap default_isa (isa_num_bits);
3203
3204 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3205 TARGET_CPU_DEFAULT);
3206 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3207 gcc_assert (arm_selected_cpu->common.name);
3208
3209 /* RWE: All of the selection logic below (to the end of this
3210 'if' clause) looks somewhat suspect. It appears to be mostly
3211 there to support forcing thumb support when the default CPU
3212 does not have thumb (somewhat dubious in terms of what the
3213 user might be expecting). I think it should be removed once
3214 support for the pre-thumb era cores is removed. */
3215 sel = arm_selected_cpu;
3216 arm_initialize_isa (default_isa, sel->common.isa_bits);
3217 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3218 cpu_opts);
3219
3220 /* Now check to see if the user has specified any command line
3221 switches that require certain abilities from the cpu. */
3222
3223 if (TARGET_INTERWORK || TARGET_THUMB)
3224 bitmap_set_bit (sought_isa, isa_bit_thumb);
3225
3226 /* If there are such requirements and the default CPU does not
3227 satisfy them, we need to run over the complete list of
3228 cores looking for one that is satisfactory. */
3229 if (!bitmap_empty_p (sought_isa)
3230 && !bitmap_subset_p (sought_isa, default_isa))
3231 {
3232 auto_sbitmap candidate_isa (isa_num_bits);
3233 /* We're only interested in a CPU with at least the
3234 capabilities of the default CPU and the required
3235 additional features. */
3236 bitmap_ior (default_isa, default_isa, sought_isa);
3237
3238 /* Try to locate a CPU type that supports all of the abilities
3239 of the default CPU, plus the extra abilities requested by
3240 the user. */
3241 for (sel = all_cores; sel->common.name != NULL; sel++)
3242 {
3243 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3244 /* An exact match? */
3245 if (bitmap_equal_p (default_isa, candidate_isa))
3246 break;
3247 }
3248
3249 if (sel->common.name == NULL)
3250 {
3251 unsigned current_bit_count = isa_num_bits;
3252 const cpu_option *best_fit = NULL;
3253
3254 /* Ideally we would like to issue an error message here
3255 saying that it was not possible to find a CPU compatible
3256 with the default CPU, but which also supports the command
3257 line options specified by the programmer, and so they
3258 ought to use the -mcpu=<name> command line option to
3259 override the default CPU type.
3260
3261 If we cannot find a CPU that has exactly the
3262 characteristics of the default CPU and the given
3263 command line options we scan the array again looking
3264 for a best match. The best match must have at least
3265 the capabilities of the perfect match. */
3266 for (sel = all_cores; sel->common.name != NULL; sel++)
3267 {
3268 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3269
3270 if (bitmap_subset_p (default_isa, candidate_isa))
3271 {
3272 unsigned count;
3273
3274 bitmap_and_compl (candidate_isa, candidate_isa,
3275 default_isa);
3276 count = bitmap_popcount (candidate_isa);
3277
3278 if (count < current_bit_count)
3279 {
3280 best_fit = sel;
3281 current_bit_count = count;
3282 }
3283 }
3284
3285 gcc_assert (best_fit);
3286 sel = best_fit;
3287 }
3288 }
3289 arm_selected_cpu = sel;
3290 }
3291
3292 /* Now we know the CPU, we can finally initialize the target
3293 structure. */
3294 target->core_name = arm_selected_cpu->common.name;
3295 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3296 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3297 cpu_opts);
3298 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3299 }
3300
3301 gcc_assert (arm_selected_cpu);
3302 gcc_assert (arm_selected_arch);
3303
3304 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3305 {
3306 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3307 auto_sbitmap fpu_bits (isa_num_bits);
3308
3309 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3310 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3311 bitmap_ior (target->isa, target->isa, fpu_bits);
3312 }
3313
3314 if (!arm_selected_tune)
3315 arm_selected_tune = arm_selected_cpu;
3316 else /* Validate the features passed to -mtune. */
3317 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3318
3319 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3320
3321 /* Finish initializing the target structure. */
3322 target->arch_pp_name = arm_selected_arch->arch;
3323 target->base_arch = arm_selected_arch->base_arch;
3324 target->profile = arm_selected_arch->profile;
3325
3326 target->tune_flags = tune_data->tune_flags;
3327 target->tune = tune_data->tune;
3328 target->tune_core = tune_data->scheduler;
3329 arm_option_reconfigure_globals ();
3330 }
3331
3332 /* Fix up any incompatible options that the user has specified. */
3333 static void
3334 arm_option_override (void)
3335 {
3336 static const enum isa_feature fpu_bitlist[]
3337 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3338 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3339 cl_target_option opts;
3340
3341 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3342 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3343
3344 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3345 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3346
3347 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3348
3349 if (!global_options_set.x_arm_fpu_index)
3350 {
3351 bool ok;
3352 int fpu_index;
3353
3354 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3355 CL_TARGET);
3356 gcc_assert (ok);
3357 arm_fpu_index = (enum fpu_type) fpu_index;
3358 }
3359
3360 cl_target_option_save (&opts, &global_options);
3361 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3362 true);
3363
3364 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3365 SUBTARGET_OVERRIDE_OPTIONS;
3366 #endif
3367
3368 /* Initialize boolean versions of the architectural flags, for use
3369 in the arm.md file and for enabling feature flags. */
3370 arm_option_reconfigure_globals ();
3371
3372 arm_tune = arm_active_target.tune_core;
3373 tune_flags = arm_active_target.tune_flags;
3374 current_tune = arm_active_target.tune;
3375
3376 /* TBD: Dwarf info for apcs frame is not handled yet. */
3377 if (TARGET_APCS_FRAME)
3378 flag_shrink_wrap = false;
3379
3380 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3381 {
3382 warning (0, "%<-mapcs-stack-check%> incompatible with "
3383 "%<-mno-apcs-frame%>");
3384 target_flags |= MASK_APCS_FRAME;
3385 }
3386
3387 if (TARGET_POKE_FUNCTION_NAME)
3388 target_flags |= MASK_APCS_FRAME;
3389
3390 if (TARGET_APCS_REENT && flag_pic)
3391 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3392
3393 if (TARGET_APCS_REENT)
3394 warning (0, "APCS reentrant code not supported. Ignored");
3395
3396 /* Set up some tuning parameters. */
3397 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3398 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3399 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3400 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3401 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3402 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3403
3404 /* For arm2/3 there is no need to do any scheduling if we are doing
3405 software floating-point. */
3406 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3407 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3408
3409 /* Override the default structure alignment for AAPCS ABI. */
3410 if (!global_options_set.x_arm_structure_size_boundary)
3411 {
3412 if (TARGET_AAPCS_BASED)
3413 arm_structure_size_boundary = 8;
3414 }
3415 else
3416 {
3417 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3418
3419 if (arm_structure_size_boundary != 8
3420 && arm_structure_size_boundary != 32
3421 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3422 {
3423 if (ARM_DOUBLEWORD_ALIGN)
3424 warning (0,
3425 "structure size boundary can only be set to 8, 32 or 64");
3426 else
3427 warning (0, "structure size boundary can only be set to 8 or 32");
3428 arm_structure_size_boundary
3429 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3430 }
3431 }
3432
3433 if (TARGET_VXWORKS_RTP)
3434 {
3435 if (!global_options_set.x_arm_pic_data_is_text_relative)
3436 arm_pic_data_is_text_relative = 0;
3437 }
3438 else if (flag_pic
3439 && !arm_pic_data_is_text_relative
3440 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3441 /* When text & data segments don't have a fixed displacement, the
3442 intended use is with a single, read only, pic base register.
3443 Unless the user explicitly requested not to do that, set
3444 it. */
3445 target_flags |= MASK_SINGLE_PIC_BASE;
3446
3447 /* If stack checking is disabled, we can use r10 as the PIC register,
3448 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3449 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3450 {
3451 if (TARGET_VXWORKS_RTP)
3452 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3453 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3454 }
3455
3456 if (flag_pic && TARGET_VXWORKS_RTP)
3457 arm_pic_register = 9;
3458
3459 if (arm_pic_register_string != NULL)
3460 {
3461 int pic_register = decode_reg_name (arm_pic_register_string);
3462
3463 if (!flag_pic)
3464 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3465
3466 /* Prevent the user from choosing an obviously stupid PIC register. */
3467 else if (pic_register < 0 || call_used_regs[pic_register]
3468 || pic_register == HARD_FRAME_POINTER_REGNUM
3469 || pic_register == STACK_POINTER_REGNUM
3470 || pic_register >= PC_REGNUM
3471 || (TARGET_VXWORKS_RTP
3472 && (unsigned int) pic_register != arm_pic_register))
3473 error ("unable to use %qs for PIC register", arm_pic_register_string);
3474 else
3475 arm_pic_register = pic_register;
3476 }
3477
3478 if (flag_pic)
3479 target_word_relocations = 1;
3480
3481 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3482 if (fix_cm3_ldrd == 2)
3483 {
3484 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3485 fix_cm3_ldrd = 1;
3486 else
3487 fix_cm3_ldrd = 0;
3488 }
3489
3490 /* Hot/Cold partitioning is not currently supported, since we can't
3491 handle literal pool placement in that case. */
3492 if (flag_reorder_blocks_and_partition)
3493 {
3494 inform (input_location,
3495 "%<-freorder-blocks-and-partition%> not supported "
3496 "on this architecture");
3497 flag_reorder_blocks_and_partition = 0;
3498 flag_reorder_blocks = 1;
3499 }
3500
3501 if (flag_pic)
3502 /* Hoisting PIC address calculations more aggressively provides a small,
3503 but measurable, size reduction for PIC code. Therefore, we decrease
3504 the bar for unrestricted expression hoisting to the cost of PIC address
3505 calculation, which is 2 instructions. */
3506 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3507 global_options.x_param_values,
3508 global_options_set.x_param_values);
3509
3510 /* ARM EABI defaults to strict volatile bitfields. */
3511 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3512 && abi_version_at_least(2))
3513 flag_strict_volatile_bitfields = 1;
3514
3515 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3516 have deemed it beneficial (signified by setting
3517 prefetch.num_slots to 1 or more). */
3518 if (flag_prefetch_loop_arrays < 0
3519 && HAVE_prefetch
3520 && optimize >= 3
3521 && current_tune->prefetch.num_slots > 0)
3522 flag_prefetch_loop_arrays = 1;
3523
3524 /* Set up parameters to be used in prefetching algorithm. Do not
3525 override the defaults unless we are tuning for a core we have
3526 researched values for. */
3527 if (current_tune->prefetch.num_slots > 0)
3528 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3529 current_tune->prefetch.num_slots,
3530 global_options.x_param_values,
3531 global_options_set.x_param_values);
3532 if (current_tune->prefetch.l1_cache_line_size >= 0)
3533 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3534 current_tune->prefetch.l1_cache_line_size,
3535 global_options.x_param_values,
3536 global_options_set.x_param_values);
3537 if (current_tune->prefetch.l1_cache_size >= 0)
3538 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3539 current_tune->prefetch.l1_cache_size,
3540 global_options.x_param_values,
3541 global_options_set.x_param_values);
3542
3543 /* Use the alternative scheduling-pressure algorithm by default. */
3544 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3545 global_options.x_param_values,
3546 global_options_set.x_param_values);
3547
3548 /* Look through ready list and all of queue for instructions
3549 relevant for L2 auto-prefetcher. */
3550 int param_sched_autopref_queue_depth;
3551
3552 switch (current_tune->sched_autopref)
3553 {
3554 case tune_params::SCHED_AUTOPREF_OFF:
3555 param_sched_autopref_queue_depth = -1;
3556 break;
3557
3558 case tune_params::SCHED_AUTOPREF_RANK:
3559 param_sched_autopref_queue_depth = 0;
3560 break;
3561
3562 case tune_params::SCHED_AUTOPREF_FULL:
3563 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3564 break;
3565
3566 default:
3567 gcc_unreachable ();
3568 }
3569
3570 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3571 param_sched_autopref_queue_depth,
3572 global_options.x_param_values,
3573 global_options_set.x_param_values);
3574
3575 /* Currently, for slow flash data, we just disable literal pools. We also
3576 disable it for pure-code. */
3577 if (target_slow_flash_data || target_pure_code)
3578 arm_disable_literal_pool = true;
3579
3580 /* Disable scheduling fusion by default if it's not armv7 processor
3581 or doesn't prefer ldrd/strd. */
3582 if (flag_schedule_fusion == 2
3583 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3584 flag_schedule_fusion = 0;
3585
3586 /* Need to remember initial options before they are overriden. */
3587 init_optimize = build_optimization_node (&global_options);
3588
3589 arm_options_perform_arch_sanity_checks ();
3590 arm_option_override_internal (&global_options, &global_options_set);
3591 arm_option_check_internal (&global_options);
3592 arm_option_params_internal ();
3593
3594 /* Create the default target_options structure. */
3595 target_option_default_node = target_option_current_node
3596 = build_target_option_node (&global_options);
3597
3598 /* Register global variables with the garbage collector. */
3599 arm_add_gc_roots ();
3600
3601 /* Init initial mode for testing. */
3602 thumb_flipper = TARGET_THUMB;
3603 }
3604
3605
3606 /* Reconfigure global status flags from the active_target.isa. */
3607 void
3608 arm_option_reconfigure_globals (void)
3609 {
3610 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3611 arm_base_arch = arm_active_target.base_arch;
3612
3613 /* Initialize boolean versions of the architectural flags, for use
3614 in the arm.md file. */
3615 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3616 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3617 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3618 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3619 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3620 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3621 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3622 arm_arch6m = arm_arch6 && !arm_arch_notm;
3623 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3624 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3625 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3626 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3627 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3628 arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3629 arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3630 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3631 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3632 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3633 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3634 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3635 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3636 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3637 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3638 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3639 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3640 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3641 if (arm_fp16_inst)
3642 {
3643 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3644 error ("selected fp16 options are incompatible");
3645 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3646 }
3647
3648 /* And finally, set up some quirks. */
3649 arm_arch_no_volatile_ce
3650 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3651 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3652 isa_bit_quirk_armv6kz);
3653
3654 /* Use the cp15 method if it is available. */
3655 if (target_thread_pointer == TP_AUTO)
3656 {
3657 if (arm_arch6k && !TARGET_THUMB1)
3658 target_thread_pointer = TP_CP15;
3659 else
3660 target_thread_pointer = TP_SOFT;
3661 }
3662 }
3663
3664 /* Perform some validation between the desired architecture and the rest of the
3665 options. */
3666 void
3667 arm_options_perform_arch_sanity_checks (void)
3668 {
3669 /* V5T code we generate is completely interworking capable, so we turn off
3670 TARGET_INTERWORK here to avoid many tests later on. */
3671
3672 /* XXX However, we must pass the right pre-processor defines to CPP
3673 or GLD can get confused. This is a hack. */
3674 if (TARGET_INTERWORK)
3675 arm_cpp_interwork = 1;
3676
3677 if (arm_arch5t)
3678 target_flags &= ~MASK_INTERWORK;
3679
3680 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3681 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3682
3683 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3684 error ("iwmmxt abi requires an iwmmxt capable cpu");
3685
3686 /* BPABI targets use linker tricks to allow interworking on cores
3687 without thumb support. */
3688 if (TARGET_INTERWORK
3689 && !TARGET_BPABI
3690 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3691 {
3692 warning (0, "target CPU does not support interworking" );
3693 target_flags &= ~MASK_INTERWORK;
3694 }
3695
3696 /* If soft-float is specified then don't use FPU. */
3697 if (TARGET_SOFT_FLOAT)
3698 arm_fpu_attr = FPU_NONE;
3699 else
3700 arm_fpu_attr = FPU_VFP;
3701
3702 if (TARGET_AAPCS_BASED)
3703 {
3704 if (TARGET_CALLER_INTERWORKING)
3705 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3706 else
3707 if (TARGET_CALLEE_INTERWORKING)
3708 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3709 }
3710
3711 /* __fp16 support currently assumes the core has ldrh. */
3712 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3713 sorry ("__fp16 and no ldrh");
3714
3715 if (use_cmse && !arm_arch_cmse)
3716 error ("target CPU does not support ARMv8-M Security Extensions");
3717
3718 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3719 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3720 if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3721 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3722
3723
3724 if (TARGET_AAPCS_BASED)
3725 {
3726 if (arm_abi == ARM_ABI_IWMMXT)
3727 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3728 else if (TARGET_HARD_FLOAT_ABI)
3729 {
3730 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3731 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3732 error ("%<-mfloat-abi=hard%>: selected processor lacks an FPU");
3733 }
3734 else
3735 arm_pcs_default = ARM_PCS_AAPCS;
3736 }
3737 else
3738 {
3739 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3740 sorry ("%<-mfloat-abi=hard%> and VFP");
3741
3742 if (arm_abi == ARM_ABI_APCS)
3743 arm_pcs_default = ARM_PCS_APCS;
3744 else
3745 arm_pcs_default = ARM_PCS_ATPCS;
3746 }
3747 }
3748
3749 static void
3750 arm_add_gc_roots (void)
3751 {
3752 gcc_obstack_init(&minipool_obstack);
3753 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3754 }
3755 \f
3756 /* A table of known ARM exception types.
3757 For use with the interrupt function attribute. */
3758
3759 typedef struct
3760 {
3761 const char *const arg;
3762 const unsigned long return_value;
3763 }
3764 isr_attribute_arg;
3765
3766 static const isr_attribute_arg isr_attribute_args [] =
3767 {
3768 { "IRQ", ARM_FT_ISR },
3769 { "irq", ARM_FT_ISR },
3770 { "FIQ", ARM_FT_FIQ },
3771 { "fiq", ARM_FT_FIQ },
3772 { "ABORT", ARM_FT_ISR },
3773 { "abort", ARM_FT_ISR },
3774 { "ABORT", ARM_FT_ISR },
3775 { "abort", ARM_FT_ISR },
3776 { "UNDEF", ARM_FT_EXCEPTION },
3777 { "undef", ARM_FT_EXCEPTION },
3778 { "SWI", ARM_FT_EXCEPTION },
3779 { "swi", ARM_FT_EXCEPTION },
3780 { NULL, ARM_FT_NORMAL }
3781 };
3782
3783 /* Returns the (interrupt) function type of the current
3784 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3785
3786 static unsigned long
3787 arm_isr_value (tree argument)
3788 {
3789 const isr_attribute_arg * ptr;
3790 const char * arg;
3791
3792 if (!arm_arch_notm)
3793 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3794
3795 /* No argument - default to IRQ. */
3796 if (argument == NULL_TREE)
3797 return ARM_FT_ISR;
3798
3799 /* Get the value of the argument. */
3800 if (TREE_VALUE (argument) == NULL_TREE
3801 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3802 return ARM_FT_UNKNOWN;
3803
3804 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3805
3806 /* Check it against the list of known arguments. */
3807 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3808 if (streq (arg, ptr->arg))
3809 return ptr->return_value;
3810
3811 /* An unrecognized interrupt type. */
3812 return ARM_FT_UNKNOWN;
3813 }
3814
3815 /* Computes the type of the current function. */
3816
3817 static unsigned long
3818 arm_compute_func_type (void)
3819 {
3820 unsigned long type = ARM_FT_UNKNOWN;
3821 tree a;
3822 tree attr;
3823
3824 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3825
3826 /* Decide if the current function is volatile. Such functions
3827 never return, and many memory cycles can be saved by not storing
3828 register values that will never be needed again. This optimization
3829 was added to speed up context switching in a kernel application. */
3830 if (optimize > 0
3831 && (TREE_NOTHROW (current_function_decl)
3832 || !(flag_unwind_tables
3833 || (flag_exceptions
3834 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3835 && TREE_THIS_VOLATILE (current_function_decl))
3836 type |= ARM_FT_VOLATILE;
3837
3838 if (cfun->static_chain_decl != NULL)
3839 type |= ARM_FT_NESTED;
3840
3841 attr = DECL_ATTRIBUTES (current_function_decl);
3842
3843 a = lookup_attribute ("naked", attr);
3844 if (a != NULL_TREE)
3845 type |= ARM_FT_NAKED;
3846
3847 a = lookup_attribute ("isr", attr);
3848 if (a == NULL_TREE)
3849 a = lookup_attribute ("interrupt", attr);
3850
3851 if (a == NULL_TREE)
3852 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3853 else
3854 type |= arm_isr_value (TREE_VALUE (a));
3855
3856 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3857 type |= ARM_FT_CMSE_ENTRY;
3858
3859 return type;
3860 }
3861
3862 /* Returns the type of the current function. */
3863
3864 unsigned long
3865 arm_current_func_type (void)
3866 {
3867 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3868 cfun->machine->func_type = arm_compute_func_type ();
3869
3870 return cfun->machine->func_type;
3871 }
3872
3873 bool
3874 arm_allocate_stack_slots_for_args (void)
3875 {
3876 /* Naked functions should not allocate stack slots for arguments. */
3877 return !IS_NAKED (arm_current_func_type ());
3878 }
3879
3880 static bool
3881 arm_warn_func_return (tree decl)
3882 {
3883 /* Naked functions are implemented entirely in assembly, including the
3884 return sequence, so suppress warnings about this. */
3885 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3886 }
3887
3888 \f
3889 /* Output assembler code for a block containing the constant parts
3890 of a trampoline, leaving space for the variable parts.
3891
3892 On the ARM, (if r8 is the static chain regnum, and remembering that
3893 referencing pc adds an offset of 8) the trampoline looks like:
3894 ldr r8, [pc, #0]
3895 ldr pc, [pc]
3896 .word static chain value
3897 .word function's address
3898 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3899
3900 static void
3901 arm_asm_trampoline_template (FILE *f)
3902 {
3903 fprintf (f, "\t.syntax unified\n");
3904
3905 if (TARGET_ARM)
3906 {
3907 fprintf (f, "\t.arm\n");
3908 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3909 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3910 }
3911 else if (TARGET_THUMB2)
3912 {
3913 fprintf (f, "\t.thumb\n");
3914 /* The Thumb-2 trampoline is similar to the arm implementation.
3915 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3916 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3917 STATIC_CHAIN_REGNUM, PC_REGNUM);
3918 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3919 }
3920 else
3921 {
3922 ASM_OUTPUT_ALIGN (f, 2);
3923 fprintf (f, "\t.code\t16\n");
3924 fprintf (f, ".Ltrampoline_start:\n");
3925 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3926 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3927 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3928 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3929 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3930 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3931 }
3932 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3933 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3934 }
3935
3936 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3937
3938 static void
3939 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3940 {
3941 rtx fnaddr, mem, a_tramp;
3942
3943 emit_block_move (m_tramp, assemble_trampoline_template (),
3944 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3945
3946 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3947 emit_move_insn (mem, chain_value);
3948
3949 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3950 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3951 emit_move_insn (mem, fnaddr);
3952
3953 a_tramp = XEXP (m_tramp, 0);
3954 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3955 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3956 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3957 }
3958
3959 /* Thumb trampolines should be entered in thumb mode, so set
3960 the bottom bit of the address. */
3961
3962 static rtx
3963 arm_trampoline_adjust_address (rtx addr)
3964 {
3965 if (TARGET_THUMB)
3966 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3967 NULL, 0, OPTAB_LIB_WIDEN);
3968 return addr;
3969 }
3970 \f
3971 /* Return 1 if it is possible to return using a single instruction.
3972 If SIBLING is non-null, this is a test for a return before a sibling
3973 call. SIBLING is the call insn, so we can examine its register usage. */
3974
3975 int
3976 use_return_insn (int iscond, rtx sibling)
3977 {
3978 int regno;
3979 unsigned int func_type;
3980 unsigned long saved_int_regs;
3981 unsigned HOST_WIDE_INT stack_adjust;
3982 arm_stack_offsets *offsets;
3983
3984 /* Never use a return instruction before reload has run. */
3985 if (!reload_completed)
3986 return 0;
3987
3988 func_type = arm_current_func_type ();
3989
3990 /* Naked, volatile and stack alignment functions need special
3991 consideration. */
3992 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3993 return 0;
3994
3995 /* So do interrupt functions that use the frame pointer and Thumb
3996 interrupt functions. */
3997 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3998 return 0;
3999
4000 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4001 && !optimize_function_for_size_p (cfun))
4002 return 0;
4003
4004 offsets = arm_get_frame_offsets ();
4005 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4006
4007 /* As do variadic functions. */
4008 if (crtl->args.pretend_args_size
4009 || cfun->machine->uses_anonymous_args
4010 /* Or if the function calls __builtin_eh_return () */
4011 || crtl->calls_eh_return
4012 /* Or if the function calls alloca */
4013 || cfun->calls_alloca
4014 /* Or if there is a stack adjustment. However, if the stack pointer
4015 is saved on the stack, we can use a pre-incrementing stack load. */
4016 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4017 && stack_adjust == 4))
4018 /* Or if the static chain register was saved above the frame, under the
4019 assumption that the stack pointer isn't saved on the stack. */
4020 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4021 && arm_compute_static_chain_stack_bytes() != 0))
4022 return 0;
4023
4024 saved_int_regs = offsets->saved_regs_mask;
4025
4026 /* Unfortunately, the insn
4027
4028 ldmib sp, {..., sp, ...}
4029
4030 triggers a bug on most SA-110 based devices, such that the stack
4031 pointer won't be correctly restored if the instruction takes a
4032 page fault. We work around this problem by popping r3 along with
4033 the other registers, since that is never slower than executing
4034 another instruction.
4035
4036 We test for !arm_arch5t here, because code for any architecture
4037 less than this could potentially be run on one of the buggy
4038 chips. */
4039 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4040 {
4041 /* Validate that r3 is a call-clobbered register (always true in
4042 the default abi) ... */
4043 if (!call_used_regs[3])
4044 return 0;
4045
4046 /* ... that it isn't being used for a return value ... */
4047 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4048 return 0;
4049
4050 /* ... or for a tail-call argument ... */
4051 if (sibling)
4052 {
4053 gcc_assert (CALL_P (sibling));
4054
4055 if (find_regno_fusage (sibling, USE, 3))
4056 return 0;
4057 }
4058
4059 /* ... and that there are no call-saved registers in r0-r2
4060 (always true in the default ABI). */
4061 if (saved_int_regs & 0x7)
4062 return 0;
4063 }
4064
4065 /* Can't be done if interworking with Thumb, and any registers have been
4066 stacked. */
4067 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4068 return 0;
4069
4070 /* On StrongARM, conditional returns are expensive if they aren't
4071 taken and multiple registers have been stacked. */
4072 if (iscond && arm_tune_strongarm)
4073 {
4074 /* Conditional return when just the LR is stored is a simple
4075 conditional-load instruction, that's not expensive. */
4076 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4077 return 0;
4078
4079 if (flag_pic
4080 && arm_pic_register != INVALID_REGNUM
4081 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4082 return 0;
4083 }
4084
4085 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4086 several instructions if anything needs to be popped. */
4087 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4088 return 0;
4089
4090 /* If there are saved registers but the LR isn't saved, then we need
4091 two instructions for the return. */
4092 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4093 return 0;
4094
4095 /* Can't be done if any of the VFP regs are pushed,
4096 since this also requires an insn. */
4097 if (TARGET_HARD_FLOAT)
4098 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4099 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4100 return 0;
4101
4102 if (TARGET_REALLY_IWMMXT)
4103 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4104 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4105 return 0;
4106
4107 return 1;
4108 }
4109
4110 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4111 shrink-wrapping if possible. This is the case if we need to emit a
4112 prologue, which we can test by looking at the offsets. */
4113 bool
4114 use_simple_return_p (void)
4115 {
4116 arm_stack_offsets *offsets;
4117
4118 /* Note this function can be called before or after reload. */
4119 if (!reload_completed)
4120 arm_compute_frame_layout ();
4121
4122 offsets = arm_get_frame_offsets ();
4123 return offsets->outgoing_args != 0;
4124 }
4125
4126 /* Return TRUE if int I is a valid immediate ARM constant. */
4127
4128 int
4129 const_ok_for_arm (HOST_WIDE_INT i)
4130 {
4131 int lowbit;
4132
4133 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4134 be all zero, or all one. */
4135 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4136 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4137 != ((~(unsigned HOST_WIDE_INT) 0)
4138 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4139 return FALSE;
4140
4141 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4142
4143 /* Fast return for 0 and small values. We must do this for zero, since
4144 the code below can't handle that one case. */
4145 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4146 return TRUE;
4147
4148 /* Get the number of trailing zeros. */
4149 lowbit = ffs((int) i) - 1;
4150
4151 /* Only even shifts are allowed in ARM mode so round down to the
4152 nearest even number. */
4153 if (TARGET_ARM)
4154 lowbit &= ~1;
4155
4156 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4157 return TRUE;
4158
4159 if (TARGET_ARM)
4160 {
4161 /* Allow rotated constants in ARM mode. */
4162 if (lowbit <= 4
4163 && ((i & ~0xc000003f) == 0
4164 || (i & ~0xf000000f) == 0
4165 || (i & ~0xfc000003) == 0))
4166 return TRUE;
4167 }
4168 else if (TARGET_THUMB2)
4169 {
4170 HOST_WIDE_INT v;
4171
4172 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4173 v = i & 0xff;
4174 v |= v << 16;
4175 if (i == v || i == (v | (v << 8)))
4176 return TRUE;
4177
4178 /* Allow repeated pattern 0xXY00XY00. */
4179 v = i & 0xff00;
4180 v |= v << 16;
4181 if (i == v)
4182 return TRUE;
4183 }
4184 else if (TARGET_HAVE_MOVT)
4185 {
4186 /* Thumb-1 Targets with MOVT. */
4187 if (i > 0xffff)
4188 return FALSE;
4189 else
4190 return TRUE;
4191 }
4192
4193 return FALSE;
4194 }
4195
4196 /* Return true if I is a valid constant for the operation CODE. */
4197 int
4198 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4199 {
4200 if (const_ok_for_arm (i))
4201 return 1;
4202
4203 switch (code)
4204 {
4205 case SET:
4206 /* See if we can use movw. */
4207 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4208 return 1;
4209 else
4210 /* Otherwise, try mvn. */
4211 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4212
4213 case PLUS:
4214 /* See if we can use addw or subw. */
4215 if (TARGET_THUMB2
4216 && ((i & 0xfffff000) == 0
4217 || ((-i) & 0xfffff000) == 0))
4218 return 1;
4219 /* Fall through. */
4220 case COMPARE:
4221 case EQ:
4222 case NE:
4223 case GT:
4224 case LE:
4225 case LT:
4226 case GE:
4227 case GEU:
4228 case LTU:
4229 case GTU:
4230 case LEU:
4231 case UNORDERED:
4232 case ORDERED:
4233 case UNEQ:
4234 case UNGE:
4235 case UNLT:
4236 case UNGT:
4237 case UNLE:
4238 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4239
4240 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4241 case XOR:
4242 return 0;
4243
4244 case IOR:
4245 if (TARGET_THUMB2)
4246 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4247 return 0;
4248
4249 case AND:
4250 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4251
4252 default:
4253 gcc_unreachable ();
4254 }
4255 }
4256
4257 /* Return true if I is a valid di mode constant for the operation CODE. */
4258 int
4259 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4260 {
4261 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4262 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4263 rtx hi = GEN_INT (hi_val);
4264 rtx lo = GEN_INT (lo_val);
4265
4266 if (TARGET_THUMB1)
4267 return 0;
4268
4269 switch (code)
4270 {
4271 case AND:
4272 case IOR:
4273 case XOR:
4274 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4275 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4276 case PLUS:
4277 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4278
4279 default:
4280 return 0;
4281 }
4282 }
4283
4284 /* Emit a sequence of insns to handle a large constant.
4285 CODE is the code of the operation required, it can be any of SET, PLUS,
4286 IOR, AND, XOR, MINUS;
4287 MODE is the mode in which the operation is being performed;
4288 VAL is the integer to operate on;
4289 SOURCE is the other operand (a register, or a null-pointer for SET);
4290 SUBTARGETS means it is safe to create scratch registers if that will
4291 either produce a simpler sequence, or we will want to cse the values.
4292 Return value is the number of insns emitted. */
4293
4294 /* ??? Tweak this for thumb2. */
4295 int
4296 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4297 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4298 {
4299 rtx cond;
4300
4301 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4302 cond = COND_EXEC_TEST (PATTERN (insn));
4303 else
4304 cond = NULL_RTX;
4305
4306 if (subtargets || code == SET
4307 || (REG_P (target) && REG_P (source)
4308 && REGNO (target) != REGNO (source)))
4309 {
4310 /* After arm_reorg has been called, we can't fix up expensive
4311 constants by pushing them into memory so we must synthesize
4312 them in-line, regardless of the cost. This is only likely to
4313 be more costly on chips that have load delay slots and we are
4314 compiling without running the scheduler (so no splitting
4315 occurred before the final instruction emission).
4316
4317 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4318 */
4319 if (!cfun->machine->after_arm_reorg
4320 && !cond
4321 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4322 1, 0)
4323 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4324 + (code != SET))))
4325 {
4326 if (code == SET)
4327 {
4328 /* Currently SET is the only monadic value for CODE, all
4329 the rest are diadic. */
4330 if (TARGET_USE_MOVT)
4331 arm_emit_movpair (target, GEN_INT (val));
4332 else
4333 emit_set_insn (target, GEN_INT (val));
4334
4335 return 1;
4336 }
4337 else
4338 {
4339 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4340
4341 if (TARGET_USE_MOVT)
4342 arm_emit_movpair (temp, GEN_INT (val));
4343 else
4344 emit_set_insn (temp, GEN_INT (val));
4345
4346 /* For MINUS, the value is subtracted from, since we never
4347 have subtraction of a constant. */
4348 if (code == MINUS)
4349 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4350 else
4351 emit_set_insn (target,
4352 gen_rtx_fmt_ee (code, mode, source, temp));
4353 return 2;
4354 }
4355 }
4356 }
4357
4358 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4359 1);
4360 }
4361
4362 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4363 ARM/THUMB2 immediates, and add up to VAL.
4364 Thr function return value gives the number of insns required. */
4365 static int
4366 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4367 struct four_ints *return_sequence)
4368 {
4369 int best_consecutive_zeros = 0;
4370 int i;
4371 int best_start = 0;
4372 int insns1, insns2;
4373 struct four_ints tmp_sequence;
4374
4375 /* If we aren't targeting ARM, the best place to start is always at
4376 the bottom, otherwise look more closely. */
4377 if (TARGET_ARM)
4378 {
4379 for (i = 0; i < 32; i += 2)
4380 {
4381 int consecutive_zeros = 0;
4382
4383 if (!(val & (3 << i)))
4384 {
4385 while ((i < 32) && !(val & (3 << i)))
4386 {
4387 consecutive_zeros += 2;
4388 i += 2;
4389 }
4390 if (consecutive_zeros > best_consecutive_zeros)
4391 {
4392 best_consecutive_zeros = consecutive_zeros;
4393 best_start = i - consecutive_zeros;
4394 }
4395 i -= 2;
4396 }
4397 }
4398 }
4399
4400 /* So long as it won't require any more insns to do so, it's
4401 desirable to emit a small constant (in bits 0...9) in the last
4402 insn. This way there is more chance that it can be combined with
4403 a later addressing insn to form a pre-indexed load or store
4404 operation. Consider:
4405
4406 *((volatile int *)0xe0000100) = 1;
4407 *((volatile int *)0xe0000110) = 2;
4408
4409 We want this to wind up as:
4410
4411 mov rA, #0xe0000000
4412 mov rB, #1
4413 str rB, [rA, #0x100]
4414 mov rB, #2
4415 str rB, [rA, #0x110]
4416
4417 rather than having to synthesize both large constants from scratch.
4418
4419 Therefore, we calculate how many insns would be required to emit
4420 the constant starting from `best_start', and also starting from
4421 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4422 yield a shorter sequence, we may as well use zero. */
4423 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4424 if (best_start != 0
4425 && ((HOST_WIDE_INT_1U << best_start) < val))
4426 {
4427 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4428 if (insns2 <= insns1)
4429 {
4430 *return_sequence = tmp_sequence;
4431 insns1 = insns2;
4432 }
4433 }
4434
4435 return insns1;
4436 }
4437
4438 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4439 static int
4440 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4441 struct four_ints *return_sequence, int i)
4442 {
4443 int remainder = val & 0xffffffff;
4444 int insns = 0;
4445
4446 /* Try and find a way of doing the job in either two or three
4447 instructions.
4448
4449 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4450 location. We start at position I. This may be the MSB, or
4451 optimial_immediate_sequence may have positioned it at the largest block
4452 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4453 wrapping around to the top of the word when we drop off the bottom.
4454 In the worst case this code should produce no more than four insns.
4455
4456 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4457 constants, shifted to any arbitrary location. We should always start
4458 at the MSB. */
4459 do
4460 {
4461 int end;
4462 unsigned int b1, b2, b3, b4;
4463 unsigned HOST_WIDE_INT result;
4464 int loc;
4465
4466 gcc_assert (insns < 4);
4467
4468 if (i <= 0)
4469 i += 32;
4470
4471 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4472 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4473 {
4474 loc = i;
4475 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4476 /* We can use addw/subw for the last 12 bits. */
4477 result = remainder;
4478 else
4479 {
4480 /* Use an 8-bit shifted/rotated immediate. */
4481 end = i - 8;
4482 if (end < 0)
4483 end += 32;
4484 result = remainder & ((0x0ff << end)
4485 | ((i < end) ? (0xff >> (32 - end))
4486 : 0));
4487 i -= 8;
4488 }
4489 }
4490 else
4491 {
4492 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4493 arbitrary shifts. */
4494 i -= TARGET_ARM ? 2 : 1;
4495 continue;
4496 }
4497
4498 /* Next, see if we can do a better job with a thumb2 replicated
4499 constant.
4500
4501 We do it this way around to catch the cases like 0x01F001E0 where
4502 two 8-bit immediates would work, but a replicated constant would
4503 make it worse.
4504
4505 TODO: 16-bit constants that don't clear all the bits, but still win.
4506 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4507 if (TARGET_THUMB2)
4508 {
4509 b1 = (remainder & 0xff000000) >> 24;
4510 b2 = (remainder & 0x00ff0000) >> 16;
4511 b3 = (remainder & 0x0000ff00) >> 8;
4512 b4 = remainder & 0xff;
4513
4514 if (loc > 24)
4515 {
4516 /* The 8-bit immediate already found clears b1 (and maybe b2),
4517 but must leave b3 and b4 alone. */
4518
4519 /* First try to find a 32-bit replicated constant that clears
4520 almost everything. We can assume that we can't do it in one,
4521 or else we wouldn't be here. */
4522 unsigned int tmp = b1 & b2 & b3 & b4;
4523 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4524 + (tmp << 24);
4525 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4526 + (tmp == b3) + (tmp == b4);
4527 if (tmp
4528 && (matching_bytes >= 3
4529 || (matching_bytes == 2
4530 && const_ok_for_op (remainder & ~tmp2, code))))
4531 {
4532 /* At least 3 of the bytes match, and the fourth has at
4533 least as many bits set, or two of the bytes match
4534 and it will only require one more insn to finish. */
4535 result = tmp2;
4536 i = tmp != b1 ? 32
4537 : tmp != b2 ? 24
4538 : tmp != b3 ? 16
4539 : 8;
4540 }
4541
4542 /* Second, try to find a 16-bit replicated constant that can
4543 leave three of the bytes clear. If b2 or b4 is already
4544 zero, then we can. If the 8-bit from above would not
4545 clear b2 anyway, then we still win. */
4546 else if (b1 == b3 && (!b2 || !b4
4547 || (remainder & 0x00ff0000 & ~result)))
4548 {
4549 result = remainder & 0xff00ff00;
4550 i = 24;
4551 }
4552 }
4553 else if (loc > 16)
4554 {
4555 /* The 8-bit immediate already found clears b2 (and maybe b3)
4556 and we don't get here unless b1 is alredy clear, but it will
4557 leave b4 unchanged. */
4558
4559 /* If we can clear b2 and b4 at once, then we win, since the
4560 8-bits couldn't possibly reach that far. */
4561 if (b2 == b4)
4562 {
4563 result = remainder & 0x00ff00ff;
4564 i = 16;
4565 }
4566 }
4567 }
4568
4569 return_sequence->i[insns++] = result;
4570 remainder &= ~result;
4571
4572 if (code == SET || code == MINUS)
4573 code = PLUS;
4574 }
4575 while (remainder);
4576
4577 return insns;
4578 }
4579
4580 /* Emit an instruction with the indicated PATTERN. If COND is
4581 non-NULL, conditionalize the execution of the instruction on COND
4582 being true. */
4583
4584 static void
4585 emit_constant_insn (rtx cond, rtx pattern)
4586 {
4587 if (cond)
4588 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4589 emit_insn (pattern);
4590 }
4591
4592 /* As above, but extra parameter GENERATE which, if clear, suppresses
4593 RTL generation. */
4594
4595 static int
4596 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4597 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4598 int subtargets, int generate)
4599 {
4600 int can_invert = 0;
4601 int can_negate = 0;
4602 int final_invert = 0;
4603 int i;
4604 int set_sign_bit_copies = 0;
4605 int clear_sign_bit_copies = 0;
4606 int clear_zero_bit_copies = 0;
4607 int set_zero_bit_copies = 0;
4608 int insns = 0, neg_insns, inv_insns;
4609 unsigned HOST_WIDE_INT temp1, temp2;
4610 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4611 struct four_ints *immediates;
4612 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4613
4614 /* Find out which operations are safe for a given CODE. Also do a quick
4615 check for degenerate cases; these can occur when DImode operations
4616 are split. */
4617 switch (code)
4618 {
4619 case SET:
4620 can_invert = 1;
4621 break;
4622
4623 case PLUS:
4624 can_negate = 1;
4625 break;
4626
4627 case IOR:
4628 if (remainder == 0xffffffff)
4629 {
4630 if (generate)
4631 emit_constant_insn (cond,
4632 gen_rtx_SET (target,
4633 GEN_INT (ARM_SIGN_EXTEND (val))));
4634 return 1;
4635 }
4636
4637 if (remainder == 0)
4638 {
4639 if (reload_completed && rtx_equal_p (target, source))
4640 return 0;
4641
4642 if (generate)
4643 emit_constant_insn (cond, gen_rtx_SET (target, source));
4644 return 1;
4645 }
4646 break;
4647
4648 case AND:
4649 if (remainder == 0)
4650 {
4651 if (generate)
4652 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4653 return 1;
4654 }
4655 if (remainder == 0xffffffff)
4656 {
4657 if (reload_completed && rtx_equal_p (target, source))
4658 return 0;
4659 if (generate)
4660 emit_constant_insn (cond, gen_rtx_SET (target, source));
4661 return 1;
4662 }
4663 can_invert = 1;
4664 break;
4665
4666 case XOR:
4667 if (remainder == 0)
4668 {
4669 if (reload_completed && rtx_equal_p (target, source))
4670 return 0;
4671 if (generate)
4672 emit_constant_insn (cond, gen_rtx_SET (target, source));
4673 return 1;
4674 }
4675
4676 if (remainder == 0xffffffff)
4677 {
4678 if (generate)
4679 emit_constant_insn (cond,
4680 gen_rtx_SET (target,
4681 gen_rtx_NOT (mode, source)));
4682 return 1;
4683 }
4684 final_invert = 1;
4685 break;
4686
4687 case MINUS:
4688 /* We treat MINUS as (val - source), since (source - val) is always
4689 passed as (source + (-val)). */
4690 if (remainder == 0)
4691 {
4692 if (generate)
4693 emit_constant_insn (cond,
4694 gen_rtx_SET (target,
4695 gen_rtx_NEG (mode, source)));
4696 return 1;
4697 }
4698 if (const_ok_for_arm (val))
4699 {
4700 if (generate)
4701 emit_constant_insn (cond,
4702 gen_rtx_SET (target,
4703 gen_rtx_MINUS (mode, GEN_INT (val),
4704 source)));
4705 return 1;
4706 }
4707
4708 break;
4709
4710 default:
4711 gcc_unreachable ();
4712 }
4713
4714 /* If we can do it in one insn get out quickly. */
4715 if (const_ok_for_op (val, code))
4716 {
4717 if (generate)
4718 emit_constant_insn (cond,
4719 gen_rtx_SET (target,
4720 (source
4721 ? gen_rtx_fmt_ee (code, mode, source,
4722 GEN_INT (val))
4723 : GEN_INT (val))));
4724 return 1;
4725 }
4726
4727 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4728 insn. */
4729 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4730 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4731 {
4732 if (generate)
4733 {
4734 if (mode == SImode && i == 16)
4735 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4736 smaller insn. */
4737 emit_constant_insn (cond,
4738 gen_zero_extendhisi2
4739 (target, gen_lowpart (HImode, source)));
4740 else
4741 /* Extz only supports SImode, but we can coerce the operands
4742 into that mode. */
4743 emit_constant_insn (cond,
4744 gen_extzv_t2 (gen_lowpart (SImode, target),
4745 gen_lowpart (SImode, source),
4746 GEN_INT (i), const0_rtx));
4747 }
4748
4749 return 1;
4750 }
4751
4752 /* Calculate a few attributes that may be useful for specific
4753 optimizations. */
4754 /* Count number of leading zeros. */
4755 for (i = 31; i >= 0; i--)
4756 {
4757 if ((remainder & (1 << i)) == 0)
4758 clear_sign_bit_copies++;
4759 else
4760 break;
4761 }
4762
4763 /* Count number of leading 1's. */
4764 for (i = 31; i >= 0; i--)
4765 {
4766 if ((remainder & (1 << i)) != 0)
4767 set_sign_bit_copies++;
4768 else
4769 break;
4770 }
4771
4772 /* Count number of trailing zero's. */
4773 for (i = 0; i <= 31; i++)
4774 {
4775 if ((remainder & (1 << i)) == 0)
4776 clear_zero_bit_copies++;
4777 else
4778 break;
4779 }
4780
4781 /* Count number of trailing 1's. */
4782 for (i = 0; i <= 31; i++)
4783 {
4784 if ((remainder & (1 << i)) != 0)
4785 set_zero_bit_copies++;
4786 else
4787 break;
4788 }
4789
4790 switch (code)
4791 {
4792 case SET:
4793 /* See if we can do this by sign_extending a constant that is known
4794 to be negative. This is a good, way of doing it, since the shift
4795 may well merge into a subsequent insn. */
4796 if (set_sign_bit_copies > 1)
4797 {
4798 if (const_ok_for_arm
4799 (temp1 = ARM_SIGN_EXTEND (remainder
4800 << (set_sign_bit_copies - 1))))
4801 {
4802 if (generate)
4803 {
4804 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4805 emit_constant_insn (cond,
4806 gen_rtx_SET (new_src, GEN_INT (temp1)));
4807 emit_constant_insn (cond,
4808 gen_ashrsi3 (target, new_src,
4809 GEN_INT (set_sign_bit_copies - 1)));
4810 }
4811 return 2;
4812 }
4813 /* For an inverted constant, we will need to set the low bits,
4814 these will be shifted out of harm's way. */
4815 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4816 if (const_ok_for_arm (~temp1))
4817 {
4818 if (generate)
4819 {
4820 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4821 emit_constant_insn (cond,
4822 gen_rtx_SET (new_src, GEN_INT (temp1)));
4823 emit_constant_insn (cond,
4824 gen_ashrsi3 (target, new_src,
4825 GEN_INT (set_sign_bit_copies - 1)));
4826 }
4827 return 2;
4828 }
4829 }
4830
4831 /* See if we can calculate the value as the difference between two
4832 valid immediates. */
4833 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4834 {
4835 int topshift = clear_sign_bit_copies & ~1;
4836
4837 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4838 & (0xff000000 >> topshift));
4839
4840 /* If temp1 is zero, then that means the 9 most significant
4841 bits of remainder were 1 and we've caused it to overflow.
4842 When topshift is 0 we don't need to do anything since we
4843 can borrow from 'bit 32'. */
4844 if (temp1 == 0 && topshift != 0)
4845 temp1 = 0x80000000 >> (topshift - 1);
4846
4847 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4848
4849 if (const_ok_for_arm (temp2))
4850 {
4851 if (generate)
4852 {
4853 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4854 emit_constant_insn (cond,
4855 gen_rtx_SET (new_src, GEN_INT (temp1)));
4856 emit_constant_insn (cond,
4857 gen_addsi3 (target, new_src,
4858 GEN_INT (-temp2)));
4859 }
4860
4861 return 2;
4862 }
4863 }
4864
4865 /* See if we can generate this by setting the bottom (or the top)
4866 16 bits, and then shifting these into the other half of the
4867 word. We only look for the simplest cases, to do more would cost
4868 too much. Be careful, however, not to generate this when the
4869 alternative would take fewer insns. */
4870 if (val & 0xffff0000)
4871 {
4872 temp1 = remainder & 0xffff0000;
4873 temp2 = remainder & 0x0000ffff;
4874
4875 /* Overlaps outside this range are best done using other methods. */
4876 for (i = 9; i < 24; i++)
4877 {
4878 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4879 && !const_ok_for_arm (temp2))
4880 {
4881 rtx new_src = (subtargets
4882 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4883 : target);
4884 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4885 source, subtargets, generate);
4886 source = new_src;
4887 if (generate)
4888 emit_constant_insn
4889 (cond,
4890 gen_rtx_SET
4891 (target,
4892 gen_rtx_IOR (mode,
4893 gen_rtx_ASHIFT (mode, source,
4894 GEN_INT (i)),
4895 source)));
4896 return insns + 1;
4897 }
4898 }
4899
4900 /* Don't duplicate cases already considered. */
4901 for (i = 17; i < 24; i++)
4902 {
4903 if (((temp1 | (temp1 >> i)) == remainder)
4904 && !const_ok_for_arm (temp1))
4905 {
4906 rtx new_src = (subtargets
4907 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4908 : target);
4909 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4910 source, subtargets, generate);
4911 source = new_src;
4912 if (generate)
4913 emit_constant_insn
4914 (cond,
4915 gen_rtx_SET (target,
4916 gen_rtx_IOR
4917 (mode,
4918 gen_rtx_LSHIFTRT (mode, source,
4919 GEN_INT (i)),
4920 source)));
4921 return insns + 1;
4922 }
4923 }
4924 }
4925 break;
4926
4927 case IOR:
4928 case XOR:
4929 /* If we have IOR or XOR, and the constant can be loaded in a
4930 single instruction, and we can find a temporary to put it in,
4931 then this can be done in two instructions instead of 3-4. */
4932 if (subtargets
4933 /* TARGET can't be NULL if SUBTARGETS is 0 */
4934 || (reload_completed && !reg_mentioned_p (target, source)))
4935 {
4936 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4937 {
4938 if (generate)
4939 {
4940 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4941
4942 emit_constant_insn (cond,
4943 gen_rtx_SET (sub, GEN_INT (val)));
4944 emit_constant_insn (cond,
4945 gen_rtx_SET (target,
4946 gen_rtx_fmt_ee (code, mode,
4947 source, sub)));
4948 }
4949 return 2;
4950 }
4951 }
4952
4953 if (code == XOR)
4954 break;
4955
4956 /* Convert.
4957 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4958 and the remainder 0s for e.g. 0xfff00000)
4959 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4960
4961 This can be done in 2 instructions by using shifts with mov or mvn.
4962 e.g. for
4963 x = x | 0xfff00000;
4964 we generate.
4965 mvn r0, r0, asl #12
4966 mvn r0, r0, lsr #12 */
4967 if (set_sign_bit_copies > 8
4968 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4969 {
4970 if (generate)
4971 {
4972 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4973 rtx shift = GEN_INT (set_sign_bit_copies);
4974
4975 emit_constant_insn
4976 (cond,
4977 gen_rtx_SET (sub,
4978 gen_rtx_NOT (mode,
4979 gen_rtx_ASHIFT (mode,
4980 source,
4981 shift))));
4982 emit_constant_insn
4983 (cond,
4984 gen_rtx_SET (target,
4985 gen_rtx_NOT (mode,
4986 gen_rtx_LSHIFTRT (mode, sub,
4987 shift))));
4988 }
4989 return 2;
4990 }
4991
4992 /* Convert
4993 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4994 to
4995 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4996
4997 For eg. r0 = r0 | 0xfff
4998 mvn r0, r0, lsr #12
4999 mvn r0, r0, asl #12
5000
5001 */
5002 if (set_zero_bit_copies > 8
5003 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5004 {
5005 if (generate)
5006 {
5007 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5008 rtx shift = GEN_INT (set_zero_bit_copies);
5009
5010 emit_constant_insn
5011 (cond,
5012 gen_rtx_SET (sub,
5013 gen_rtx_NOT (mode,
5014 gen_rtx_LSHIFTRT (mode,
5015 source,
5016 shift))));
5017 emit_constant_insn
5018 (cond,
5019 gen_rtx_SET (target,
5020 gen_rtx_NOT (mode,
5021 gen_rtx_ASHIFT (mode, sub,
5022 shift))));
5023 }
5024 return 2;
5025 }
5026
5027 /* This will never be reached for Thumb2 because orn is a valid
5028 instruction. This is for Thumb1 and the ARM 32 bit cases.
5029
5030 x = y | constant (such that ~constant is a valid constant)
5031 Transform this to
5032 x = ~(~y & ~constant).
5033 */
5034 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5035 {
5036 if (generate)
5037 {
5038 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5039 emit_constant_insn (cond,
5040 gen_rtx_SET (sub,
5041 gen_rtx_NOT (mode, source)));
5042 source = sub;
5043 if (subtargets)
5044 sub = gen_reg_rtx (mode);
5045 emit_constant_insn (cond,
5046 gen_rtx_SET (sub,
5047 gen_rtx_AND (mode, source,
5048 GEN_INT (temp1))));
5049 emit_constant_insn (cond,
5050 gen_rtx_SET (target,
5051 gen_rtx_NOT (mode, sub)));
5052 }
5053 return 3;
5054 }
5055 break;
5056
5057 case AND:
5058 /* See if two shifts will do 2 or more insn's worth of work. */
5059 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5060 {
5061 HOST_WIDE_INT shift_mask = ((0xffffffff
5062 << (32 - clear_sign_bit_copies))
5063 & 0xffffffff);
5064
5065 if ((remainder | shift_mask) != 0xffffffff)
5066 {
5067 HOST_WIDE_INT new_val
5068 = ARM_SIGN_EXTEND (remainder | shift_mask);
5069
5070 if (generate)
5071 {
5072 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5073 insns = arm_gen_constant (AND, SImode, cond, new_val,
5074 new_src, source, subtargets, 1);
5075 source = new_src;
5076 }
5077 else
5078 {
5079 rtx targ = subtargets ? NULL_RTX : target;
5080 insns = arm_gen_constant (AND, mode, cond, new_val,
5081 targ, source, subtargets, 0);
5082 }
5083 }
5084
5085 if (generate)
5086 {
5087 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5088 rtx shift = GEN_INT (clear_sign_bit_copies);
5089
5090 emit_insn (gen_ashlsi3 (new_src, source, shift));
5091 emit_insn (gen_lshrsi3 (target, new_src, shift));
5092 }
5093
5094 return insns + 2;
5095 }
5096
5097 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5098 {
5099 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5100
5101 if ((remainder | shift_mask) != 0xffffffff)
5102 {
5103 HOST_WIDE_INT new_val
5104 = ARM_SIGN_EXTEND (remainder | shift_mask);
5105 if (generate)
5106 {
5107 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5108
5109 insns = arm_gen_constant (AND, mode, cond, new_val,
5110 new_src, source, subtargets, 1);
5111 source = new_src;
5112 }
5113 else
5114 {
5115 rtx targ = subtargets ? NULL_RTX : target;
5116
5117 insns = arm_gen_constant (AND, mode, cond, new_val,
5118 targ, source, subtargets, 0);
5119 }
5120 }
5121
5122 if (generate)
5123 {
5124 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5125 rtx shift = GEN_INT (clear_zero_bit_copies);
5126
5127 emit_insn (gen_lshrsi3 (new_src, source, shift));
5128 emit_insn (gen_ashlsi3 (target, new_src, shift));
5129 }
5130
5131 return insns + 2;
5132 }
5133
5134 break;
5135
5136 default:
5137 break;
5138 }
5139
5140 /* Calculate what the instruction sequences would be if we generated it
5141 normally, negated, or inverted. */
5142 if (code == AND)
5143 /* AND cannot be split into multiple insns, so invert and use BIC. */
5144 insns = 99;
5145 else
5146 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5147
5148 if (can_negate)
5149 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5150 &neg_immediates);
5151 else
5152 neg_insns = 99;
5153
5154 if (can_invert || final_invert)
5155 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5156 &inv_immediates);
5157 else
5158 inv_insns = 99;
5159
5160 immediates = &pos_immediates;
5161
5162 /* Is the negated immediate sequence more efficient? */
5163 if (neg_insns < insns && neg_insns <= inv_insns)
5164 {
5165 insns = neg_insns;
5166 immediates = &neg_immediates;
5167 }
5168 else
5169 can_negate = 0;
5170
5171 /* Is the inverted immediate sequence more efficient?
5172 We must allow for an extra NOT instruction for XOR operations, although
5173 there is some chance that the final 'mvn' will get optimized later. */
5174 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5175 {
5176 insns = inv_insns;
5177 immediates = &inv_immediates;
5178 }
5179 else
5180 {
5181 can_invert = 0;
5182 final_invert = 0;
5183 }
5184
5185 /* Now output the chosen sequence as instructions. */
5186 if (generate)
5187 {
5188 for (i = 0; i < insns; i++)
5189 {
5190 rtx new_src, temp1_rtx;
5191
5192 temp1 = immediates->i[i];
5193
5194 if (code == SET || code == MINUS)
5195 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5196 else if ((final_invert || i < (insns - 1)) && subtargets)
5197 new_src = gen_reg_rtx (mode);
5198 else
5199 new_src = target;
5200
5201 if (can_invert)
5202 temp1 = ~temp1;
5203 else if (can_negate)
5204 temp1 = -temp1;
5205
5206 temp1 = trunc_int_for_mode (temp1, mode);
5207 temp1_rtx = GEN_INT (temp1);
5208
5209 if (code == SET)
5210 ;
5211 else if (code == MINUS)
5212 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5213 else
5214 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5215
5216 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5217 source = new_src;
5218
5219 if (code == SET)
5220 {
5221 can_negate = can_invert;
5222 can_invert = 0;
5223 code = PLUS;
5224 }
5225 else if (code == MINUS)
5226 code = PLUS;
5227 }
5228 }
5229
5230 if (final_invert)
5231 {
5232 if (generate)
5233 emit_constant_insn (cond, gen_rtx_SET (target,
5234 gen_rtx_NOT (mode, source)));
5235 insns++;
5236 }
5237
5238 return insns;
5239 }
5240
5241 /* Canonicalize a comparison so that we are more likely to recognize it.
5242 This can be done for a few constant compares, where we can make the
5243 immediate value easier to load. */
5244
5245 static void
5246 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5247 bool op0_preserve_value)
5248 {
5249 machine_mode mode;
5250 unsigned HOST_WIDE_INT i, maxval;
5251
5252 mode = GET_MODE (*op0);
5253 if (mode == VOIDmode)
5254 mode = GET_MODE (*op1);
5255
5256 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5257
5258 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5259 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5260 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5261 for GTU/LEU in Thumb mode. */
5262 if (mode == DImode)
5263 {
5264
5265 if (*code == GT || *code == LE
5266 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5267 {
5268 /* Missing comparison. First try to use an available
5269 comparison. */
5270 if (CONST_INT_P (*op1))
5271 {
5272 i = INTVAL (*op1);
5273 switch (*code)
5274 {
5275 case GT:
5276 case LE:
5277 if (i != maxval
5278 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5279 {
5280 *op1 = GEN_INT (i + 1);
5281 *code = *code == GT ? GE : LT;
5282 return;
5283 }
5284 break;
5285 case GTU:
5286 case LEU:
5287 if (i != ~((unsigned HOST_WIDE_INT) 0)
5288 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5289 {
5290 *op1 = GEN_INT (i + 1);
5291 *code = *code == GTU ? GEU : LTU;
5292 return;
5293 }
5294 break;
5295 default:
5296 gcc_unreachable ();
5297 }
5298 }
5299
5300 /* If that did not work, reverse the condition. */
5301 if (!op0_preserve_value)
5302 {
5303 std::swap (*op0, *op1);
5304 *code = (int)swap_condition ((enum rtx_code)*code);
5305 }
5306 }
5307 return;
5308 }
5309
5310 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5311 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5312 to facilitate possible combining with a cmp into 'ands'. */
5313 if (mode == SImode
5314 && GET_CODE (*op0) == ZERO_EXTEND
5315 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5316 && GET_MODE (XEXP (*op0, 0)) == QImode
5317 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5318 && subreg_lowpart_p (XEXP (*op0, 0))
5319 && *op1 == const0_rtx)
5320 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5321 GEN_INT (255));
5322
5323 /* Comparisons smaller than DImode. Only adjust comparisons against
5324 an out-of-range constant. */
5325 if (!CONST_INT_P (*op1)
5326 || const_ok_for_arm (INTVAL (*op1))
5327 || const_ok_for_arm (- INTVAL (*op1)))
5328 return;
5329
5330 i = INTVAL (*op1);
5331
5332 switch (*code)
5333 {
5334 case EQ:
5335 case NE:
5336 return;
5337
5338 case GT:
5339 case LE:
5340 if (i != maxval
5341 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5342 {
5343 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5344 *code = *code == GT ? GE : LT;
5345 return;
5346 }
5347 break;
5348
5349 case GE:
5350 case LT:
5351 if (i != ~maxval
5352 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5353 {
5354 *op1 = GEN_INT (i - 1);
5355 *code = *code == GE ? GT : LE;
5356 return;
5357 }
5358 break;
5359
5360 case GTU:
5361 case LEU:
5362 if (i != ~((unsigned HOST_WIDE_INT) 0)
5363 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5364 {
5365 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5366 *code = *code == GTU ? GEU : LTU;
5367 return;
5368 }
5369 break;
5370
5371 case GEU:
5372 case LTU:
5373 if (i != 0
5374 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5375 {
5376 *op1 = GEN_INT (i - 1);
5377 *code = *code == GEU ? GTU : LEU;
5378 return;
5379 }
5380 break;
5381
5382 default:
5383 gcc_unreachable ();
5384 }
5385 }
5386
5387
5388 /* Define how to find the value returned by a function. */
5389
5390 static rtx
5391 arm_function_value(const_tree type, const_tree func,
5392 bool outgoing ATTRIBUTE_UNUSED)
5393 {
5394 machine_mode mode;
5395 int unsignedp ATTRIBUTE_UNUSED;
5396 rtx r ATTRIBUTE_UNUSED;
5397
5398 mode = TYPE_MODE (type);
5399
5400 if (TARGET_AAPCS_BASED)
5401 return aapcs_allocate_return_reg (mode, type, func);
5402
5403 /* Promote integer types. */
5404 if (INTEGRAL_TYPE_P (type))
5405 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5406
5407 /* Promotes small structs returned in a register to full-word size
5408 for big-endian AAPCS. */
5409 if (arm_return_in_msb (type))
5410 {
5411 HOST_WIDE_INT size = int_size_in_bytes (type);
5412 if (size % UNITS_PER_WORD != 0)
5413 {
5414 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5415 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5416 }
5417 }
5418
5419 return arm_libcall_value_1 (mode);
5420 }
5421
5422 /* libcall hashtable helpers. */
5423
5424 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5425 {
5426 static inline hashval_t hash (const rtx_def *);
5427 static inline bool equal (const rtx_def *, const rtx_def *);
5428 static inline void remove (rtx_def *);
5429 };
5430
5431 inline bool
5432 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5433 {
5434 return rtx_equal_p (p1, p2);
5435 }
5436
5437 inline hashval_t
5438 libcall_hasher::hash (const rtx_def *p1)
5439 {
5440 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5441 }
5442
5443 typedef hash_table<libcall_hasher> libcall_table_type;
5444
5445 static void
5446 add_libcall (libcall_table_type *htab, rtx libcall)
5447 {
5448 *htab->find_slot (libcall, INSERT) = libcall;
5449 }
5450
5451 static bool
5452 arm_libcall_uses_aapcs_base (const_rtx libcall)
5453 {
5454 static bool init_done = false;
5455 static libcall_table_type *libcall_htab = NULL;
5456
5457 if (!init_done)
5458 {
5459 init_done = true;
5460
5461 libcall_htab = new libcall_table_type (31);
5462 add_libcall (libcall_htab,
5463 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5464 add_libcall (libcall_htab,
5465 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5466 add_libcall (libcall_htab,
5467 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5468 add_libcall (libcall_htab,
5469 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5470
5471 add_libcall (libcall_htab,
5472 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5473 add_libcall (libcall_htab,
5474 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5475 add_libcall (libcall_htab,
5476 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5477 add_libcall (libcall_htab,
5478 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5479
5480 add_libcall (libcall_htab,
5481 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5482 add_libcall (libcall_htab,
5483 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5484 add_libcall (libcall_htab,
5485 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5486 add_libcall (libcall_htab,
5487 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5488 add_libcall (libcall_htab,
5489 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5490 add_libcall (libcall_htab,
5491 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5492 add_libcall (libcall_htab,
5493 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5494 add_libcall (libcall_htab,
5495 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5496
5497 /* Values from double-precision helper functions are returned in core
5498 registers if the selected core only supports single-precision
5499 arithmetic, even if we are using the hard-float ABI. The same is
5500 true for single-precision helpers, but we will never be using the
5501 hard-float ABI on a CPU which doesn't support single-precision
5502 operations in hardware. */
5503 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5504 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5505 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5506 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5507 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5508 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5509 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5510 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5511 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5512 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5513 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5514 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5515 SFmode));
5516 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5517 DFmode));
5518 add_libcall (libcall_htab,
5519 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5520 }
5521
5522 return libcall && libcall_htab->find (libcall) != NULL;
5523 }
5524
5525 static rtx
5526 arm_libcall_value_1 (machine_mode mode)
5527 {
5528 if (TARGET_AAPCS_BASED)
5529 return aapcs_libcall_value (mode);
5530 else if (TARGET_IWMMXT_ABI
5531 && arm_vector_mode_supported_p (mode))
5532 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5533 else
5534 return gen_rtx_REG (mode, ARG_REGISTER (1));
5535 }
5536
5537 /* Define how to find the value returned by a library function
5538 assuming the value has mode MODE. */
5539
5540 static rtx
5541 arm_libcall_value (machine_mode mode, const_rtx libcall)
5542 {
5543 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5544 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5545 {
5546 /* The following libcalls return their result in integer registers,
5547 even though they return a floating point value. */
5548 if (arm_libcall_uses_aapcs_base (libcall))
5549 return gen_rtx_REG (mode, ARG_REGISTER(1));
5550
5551 }
5552
5553 return arm_libcall_value_1 (mode);
5554 }
5555
5556 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5557
5558 static bool
5559 arm_function_value_regno_p (const unsigned int regno)
5560 {
5561 if (regno == ARG_REGISTER (1)
5562 || (TARGET_32BIT
5563 && TARGET_AAPCS_BASED
5564 && TARGET_HARD_FLOAT
5565 && regno == FIRST_VFP_REGNUM)
5566 || (TARGET_IWMMXT_ABI
5567 && regno == FIRST_IWMMXT_REGNUM))
5568 return true;
5569
5570 return false;
5571 }
5572
5573 /* Determine the amount of memory needed to store the possible return
5574 registers of an untyped call. */
5575 int
5576 arm_apply_result_size (void)
5577 {
5578 int size = 16;
5579
5580 if (TARGET_32BIT)
5581 {
5582 if (TARGET_HARD_FLOAT_ABI)
5583 size += 32;
5584 if (TARGET_IWMMXT_ABI)
5585 size += 8;
5586 }
5587
5588 return size;
5589 }
5590
5591 /* Decide whether TYPE should be returned in memory (true)
5592 or in a register (false). FNTYPE is the type of the function making
5593 the call. */
5594 static bool
5595 arm_return_in_memory (const_tree type, const_tree fntype)
5596 {
5597 HOST_WIDE_INT size;
5598
5599 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5600
5601 if (TARGET_AAPCS_BASED)
5602 {
5603 /* Simple, non-aggregate types (ie not including vectors and
5604 complex) are always returned in a register (or registers).
5605 We don't care about which register here, so we can short-cut
5606 some of the detail. */
5607 if (!AGGREGATE_TYPE_P (type)
5608 && TREE_CODE (type) != VECTOR_TYPE
5609 && TREE_CODE (type) != COMPLEX_TYPE)
5610 return false;
5611
5612 /* Any return value that is no larger than one word can be
5613 returned in r0. */
5614 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5615 return false;
5616
5617 /* Check any available co-processors to see if they accept the
5618 type as a register candidate (VFP, for example, can return
5619 some aggregates in consecutive registers). These aren't
5620 available if the call is variadic. */
5621 if (aapcs_select_return_coproc (type, fntype) >= 0)
5622 return false;
5623
5624 /* Vector values should be returned using ARM registers, not
5625 memory (unless they're over 16 bytes, which will break since
5626 we only have four call-clobbered registers to play with). */
5627 if (TREE_CODE (type) == VECTOR_TYPE)
5628 return (size < 0 || size > (4 * UNITS_PER_WORD));
5629
5630 /* The rest go in memory. */
5631 return true;
5632 }
5633
5634 if (TREE_CODE (type) == VECTOR_TYPE)
5635 return (size < 0 || size > (4 * UNITS_PER_WORD));
5636
5637 if (!AGGREGATE_TYPE_P (type) &&
5638 (TREE_CODE (type) != VECTOR_TYPE))
5639 /* All simple types are returned in registers. */
5640 return false;
5641
5642 if (arm_abi != ARM_ABI_APCS)
5643 {
5644 /* ATPCS and later return aggregate types in memory only if they are
5645 larger than a word (or are variable size). */
5646 return (size < 0 || size > UNITS_PER_WORD);
5647 }
5648
5649 /* For the arm-wince targets we choose to be compatible with Microsoft's
5650 ARM and Thumb compilers, which always return aggregates in memory. */
5651 #ifndef ARM_WINCE
5652 /* All structures/unions bigger than one word are returned in memory.
5653 Also catch the case where int_size_in_bytes returns -1. In this case
5654 the aggregate is either huge or of variable size, and in either case
5655 we will want to return it via memory and not in a register. */
5656 if (size < 0 || size > UNITS_PER_WORD)
5657 return true;
5658
5659 if (TREE_CODE (type) == RECORD_TYPE)
5660 {
5661 tree field;
5662
5663 /* For a struct the APCS says that we only return in a register
5664 if the type is 'integer like' and every addressable element
5665 has an offset of zero. For practical purposes this means
5666 that the structure can have at most one non bit-field element
5667 and that this element must be the first one in the structure. */
5668
5669 /* Find the first field, ignoring non FIELD_DECL things which will
5670 have been created by C++. */
5671 for (field = TYPE_FIELDS (type);
5672 field && TREE_CODE (field) != FIELD_DECL;
5673 field = DECL_CHAIN (field))
5674 continue;
5675
5676 if (field == NULL)
5677 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5678
5679 /* Check that the first field is valid for returning in a register. */
5680
5681 /* ... Floats are not allowed */
5682 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5683 return true;
5684
5685 /* ... Aggregates that are not themselves valid for returning in
5686 a register are not allowed. */
5687 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5688 return true;
5689
5690 /* Now check the remaining fields, if any. Only bitfields are allowed,
5691 since they are not addressable. */
5692 for (field = DECL_CHAIN (field);
5693 field;
5694 field = DECL_CHAIN (field))
5695 {
5696 if (TREE_CODE (field) != FIELD_DECL)
5697 continue;
5698
5699 if (!DECL_BIT_FIELD_TYPE (field))
5700 return true;
5701 }
5702
5703 return false;
5704 }
5705
5706 if (TREE_CODE (type) == UNION_TYPE)
5707 {
5708 tree field;
5709
5710 /* Unions can be returned in registers if every element is
5711 integral, or can be returned in an integer register. */
5712 for (field = TYPE_FIELDS (type);
5713 field;
5714 field = DECL_CHAIN (field))
5715 {
5716 if (TREE_CODE (field) != FIELD_DECL)
5717 continue;
5718
5719 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5720 return true;
5721
5722 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5723 return true;
5724 }
5725
5726 return false;
5727 }
5728 #endif /* not ARM_WINCE */
5729
5730 /* Return all other types in memory. */
5731 return true;
5732 }
5733
5734 const struct pcs_attribute_arg
5735 {
5736 const char *arg;
5737 enum arm_pcs value;
5738 } pcs_attribute_args[] =
5739 {
5740 {"aapcs", ARM_PCS_AAPCS},
5741 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5742 #if 0
5743 /* We could recognize these, but changes would be needed elsewhere
5744 * to implement them. */
5745 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5746 {"atpcs", ARM_PCS_ATPCS},
5747 {"apcs", ARM_PCS_APCS},
5748 #endif
5749 {NULL, ARM_PCS_UNKNOWN}
5750 };
5751
5752 static enum arm_pcs
5753 arm_pcs_from_attribute (tree attr)
5754 {
5755 const struct pcs_attribute_arg *ptr;
5756 const char *arg;
5757
5758 /* Get the value of the argument. */
5759 if (TREE_VALUE (attr) == NULL_TREE
5760 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5761 return ARM_PCS_UNKNOWN;
5762
5763 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5764
5765 /* Check it against the list of known arguments. */
5766 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5767 if (streq (arg, ptr->arg))
5768 return ptr->value;
5769
5770 /* An unrecognized interrupt type. */
5771 return ARM_PCS_UNKNOWN;
5772 }
5773
5774 /* Get the PCS variant to use for this call. TYPE is the function's type
5775 specification, DECL is the specific declartion. DECL may be null if
5776 the call could be indirect or if this is a library call. */
5777 static enum arm_pcs
5778 arm_get_pcs_model (const_tree type, const_tree decl)
5779 {
5780 bool user_convention = false;
5781 enum arm_pcs user_pcs = arm_pcs_default;
5782 tree attr;
5783
5784 gcc_assert (type);
5785
5786 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5787 if (attr)
5788 {
5789 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5790 user_convention = true;
5791 }
5792
5793 if (TARGET_AAPCS_BASED)
5794 {
5795 /* Detect varargs functions. These always use the base rules
5796 (no argument is ever a candidate for a co-processor
5797 register). */
5798 bool base_rules = stdarg_p (type);
5799
5800 if (user_convention)
5801 {
5802 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5803 sorry ("non-AAPCS derived PCS variant");
5804 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5805 error ("variadic functions must use the base AAPCS variant");
5806 }
5807
5808 if (base_rules)
5809 return ARM_PCS_AAPCS;
5810 else if (user_convention)
5811 return user_pcs;
5812 else if (decl && flag_unit_at_a_time)
5813 {
5814 /* Local functions never leak outside this compilation unit,
5815 so we are free to use whatever conventions are
5816 appropriate. */
5817 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5818 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5819 if (i && i->local)
5820 return ARM_PCS_AAPCS_LOCAL;
5821 }
5822 }
5823 else if (user_convention && user_pcs != arm_pcs_default)
5824 sorry ("PCS variant");
5825
5826 /* For everything else we use the target's default. */
5827 return arm_pcs_default;
5828 }
5829
5830
5831 static void
5832 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5833 const_tree fntype ATTRIBUTE_UNUSED,
5834 rtx libcall ATTRIBUTE_UNUSED,
5835 const_tree fndecl ATTRIBUTE_UNUSED)
5836 {
5837 /* Record the unallocated VFP registers. */
5838 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5839 pcum->aapcs_vfp_reg_alloc = 0;
5840 }
5841
5842 /* Walk down the type tree of TYPE counting consecutive base elements.
5843 If *MODEP is VOIDmode, then set it to the first valid floating point
5844 type. If a non-floating point type is found, or if a floating point
5845 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5846 otherwise return the count in the sub-tree. */
5847 static int
5848 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5849 {
5850 machine_mode mode;
5851 HOST_WIDE_INT size;
5852
5853 switch (TREE_CODE (type))
5854 {
5855 case REAL_TYPE:
5856 mode = TYPE_MODE (type);
5857 if (mode != DFmode && mode != SFmode && mode != HFmode)
5858 return -1;
5859
5860 if (*modep == VOIDmode)
5861 *modep = mode;
5862
5863 if (*modep == mode)
5864 return 1;
5865
5866 break;
5867
5868 case COMPLEX_TYPE:
5869 mode = TYPE_MODE (TREE_TYPE (type));
5870 if (mode != DFmode && mode != SFmode)
5871 return -1;
5872
5873 if (*modep == VOIDmode)
5874 *modep = mode;
5875
5876 if (*modep == mode)
5877 return 2;
5878
5879 break;
5880
5881 case VECTOR_TYPE:
5882 /* Use V2SImode and V4SImode as representatives of all 64-bit
5883 and 128-bit vector types, whether or not those modes are
5884 supported with the present options. */
5885 size = int_size_in_bytes (type);
5886 switch (size)
5887 {
5888 case 8:
5889 mode = V2SImode;
5890 break;
5891 case 16:
5892 mode = V4SImode;
5893 break;
5894 default:
5895 return -1;
5896 }
5897
5898 if (*modep == VOIDmode)
5899 *modep = mode;
5900
5901 /* Vector modes are considered to be opaque: two vectors are
5902 equivalent for the purposes of being homogeneous aggregates
5903 if they are the same size. */
5904 if (*modep == mode)
5905 return 1;
5906
5907 break;
5908
5909 case ARRAY_TYPE:
5910 {
5911 int count;
5912 tree index = TYPE_DOMAIN (type);
5913
5914 /* Can't handle incomplete types nor sizes that are not
5915 fixed. */
5916 if (!COMPLETE_TYPE_P (type)
5917 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5918 return -1;
5919
5920 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5921 if (count == -1
5922 || !index
5923 || !TYPE_MAX_VALUE (index)
5924 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5925 || !TYPE_MIN_VALUE (index)
5926 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5927 || count < 0)
5928 return -1;
5929
5930 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5931 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5932
5933 /* There must be no padding. */
5934 if (wi::to_wide (TYPE_SIZE (type))
5935 != count * GET_MODE_BITSIZE (*modep))
5936 return -1;
5937
5938 return count;
5939 }
5940
5941 case RECORD_TYPE:
5942 {
5943 int count = 0;
5944 int sub_count;
5945 tree field;
5946
5947 /* Can't handle incomplete types nor sizes that are not
5948 fixed. */
5949 if (!COMPLETE_TYPE_P (type)
5950 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5951 return -1;
5952
5953 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5954 {
5955 if (TREE_CODE (field) != FIELD_DECL)
5956 continue;
5957
5958 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5959 if (sub_count < 0)
5960 return -1;
5961 count += sub_count;
5962 }
5963
5964 /* There must be no padding. */
5965 if (wi::to_wide (TYPE_SIZE (type))
5966 != count * GET_MODE_BITSIZE (*modep))
5967 return -1;
5968
5969 return count;
5970 }
5971
5972 case UNION_TYPE:
5973 case QUAL_UNION_TYPE:
5974 {
5975 /* These aren't very interesting except in a degenerate case. */
5976 int count = 0;
5977 int sub_count;
5978 tree field;
5979
5980 /* Can't handle incomplete types nor sizes that are not
5981 fixed. */
5982 if (!COMPLETE_TYPE_P (type)
5983 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5984 return -1;
5985
5986 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5987 {
5988 if (TREE_CODE (field) != FIELD_DECL)
5989 continue;
5990
5991 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5992 if (sub_count < 0)
5993 return -1;
5994 count = count > sub_count ? count : sub_count;
5995 }
5996
5997 /* There must be no padding. */
5998 if (wi::to_wide (TYPE_SIZE (type))
5999 != count * GET_MODE_BITSIZE (*modep))
6000 return -1;
6001
6002 return count;
6003 }
6004
6005 default:
6006 break;
6007 }
6008
6009 return -1;
6010 }
6011
6012 /* Return true if PCS_VARIANT should use VFP registers. */
6013 static bool
6014 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6015 {
6016 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6017 {
6018 static bool seen_thumb1_vfp = false;
6019
6020 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6021 {
6022 sorry ("Thumb-1 hard-float VFP ABI");
6023 /* sorry() is not immediately fatal, so only display this once. */
6024 seen_thumb1_vfp = true;
6025 }
6026
6027 return true;
6028 }
6029
6030 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6031 return false;
6032
6033 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6034 (TARGET_VFP_DOUBLE || !is_double));
6035 }
6036
6037 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6038 suitable for passing or returning in VFP registers for the PCS
6039 variant selected. If it is, then *BASE_MODE is updated to contain
6040 a machine mode describing each element of the argument's type and
6041 *COUNT to hold the number of such elements. */
6042 static bool
6043 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6044 machine_mode mode, const_tree type,
6045 machine_mode *base_mode, int *count)
6046 {
6047 machine_mode new_mode = VOIDmode;
6048
6049 /* If we have the type information, prefer that to working things
6050 out from the mode. */
6051 if (type)
6052 {
6053 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6054
6055 if (ag_count > 0 && ag_count <= 4)
6056 *count = ag_count;
6057 else
6058 return false;
6059 }
6060 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6061 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6062 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6063 {
6064 *count = 1;
6065 new_mode = mode;
6066 }
6067 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6068 {
6069 *count = 2;
6070 new_mode = (mode == DCmode ? DFmode : SFmode);
6071 }
6072 else
6073 return false;
6074
6075
6076 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6077 return false;
6078
6079 *base_mode = new_mode;
6080
6081 if (TARGET_GENERAL_REGS_ONLY)
6082 error ("argument of type %qT not permitted with -mgeneral-regs-only",
6083 type);
6084
6085 return true;
6086 }
6087
6088 static bool
6089 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6090 machine_mode mode, const_tree type)
6091 {
6092 int count ATTRIBUTE_UNUSED;
6093 machine_mode ag_mode ATTRIBUTE_UNUSED;
6094
6095 if (!use_vfp_abi (pcs_variant, false))
6096 return false;
6097 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6098 &ag_mode, &count);
6099 }
6100
6101 static bool
6102 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6103 const_tree type)
6104 {
6105 if (!use_vfp_abi (pcum->pcs_variant, false))
6106 return false;
6107
6108 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6109 &pcum->aapcs_vfp_rmode,
6110 &pcum->aapcs_vfp_rcount);
6111 }
6112
6113 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6114 for the behaviour of this function. */
6115
6116 static bool
6117 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6118 const_tree type ATTRIBUTE_UNUSED)
6119 {
6120 int rmode_size
6121 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6122 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6123 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6124 int regno;
6125
6126 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6127 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6128 {
6129 pcum->aapcs_vfp_reg_alloc = mask << regno;
6130 if (mode == BLKmode
6131 || (mode == TImode && ! TARGET_NEON)
6132 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6133 {
6134 int i;
6135 int rcount = pcum->aapcs_vfp_rcount;
6136 int rshift = shift;
6137 machine_mode rmode = pcum->aapcs_vfp_rmode;
6138 rtx par;
6139 if (!TARGET_NEON)
6140 {
6141 /* Avoid using unsupported vector modes. */
6142 if (rmode == V2SImode)
6143 rmode = DImode;
6144 else if (rmode == V4SImode)
6145 {
6146 rmode = DImode;
6147 rcount *= 2;
6148 rshift /= 2;
6149 }
6150 }
6151 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6152 for (i = 0; i < rcount; i++)
6153 {
6154 rtx tmp = gen_rtx_REG (rmode,
6155 FIRST_VFP_REGNUM + regno + i * rshift);
6156 tmp = gen_rtx_EXPR_LIST
6157 (VOIDmode, tmp,
6158 GEN_INT (i * GET_MODE_SIZE (rmode)));
6159 XVECEXP (par, 0, i) = tmp;
6160 }
6161
6162 pcum->aapcs_reg = par;
6163 }
6164 else
6165 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6166 return true;
6167 }
6168 return false;
6169 }
6170
6171 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6172 comment there for the behaviour of this function. */
6173
6174 static rtx
6175 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6176 machine_mode mode,
6177 const_tree type ATTRIBUTE_UNUSED)
6178 {
6179 if (!use_vfp_abi (pcs_variant, false))
6180 return NULL;
6181
6182 if (mode == BLKmode
6183 || (GET_MODE_CLASS (mode) == MODE_INT
6184 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6185 && !TARGET_NEON))
6186 {
6187 int count;
6188 machine_mode ag_mode;
6189 int i;
6190 rtx par;
6191 int shift;
6192
6193 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6194 &ag_mode, &count);
6195
6196 if (!TARGET_NEON)
6197 {
6198 if (ag_mode == V2SImode)
6199 ag_mode = DImode;
6200 else if (ag_mode == V4SImode)
6201 {
6202 ag_mode = DImode;
6203 count *= 2;
6204 }
6205 }
6206 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6207 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6208 for (i = 0; i < count; i++)
6209 {
6210 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6211 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6212 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6213 XVECEXP (par, 0, i) = tmp;
6214 }
6215
6216 return par;
6217 }
6218
6219 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6220 }
6221
6222 static void
6223 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6224 machine_mode mode ATTRIBUTE_UNUSED,
6225 const_tree type ATTRIBUTE_UNUSED)
6226 {
6227 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6228 pcum->aapcs_vfp_reg_alloc = 0;
6229 return;
6230 }
6231
6232 #define AAPCS_CP(X) \
6233 { \
6234 aapcs_ ## X ## _cum_init, \
6235 aapcs_ ## X ## _is_call_candidate, \
6236 aapcs_ ## X ## _allocate, \
6237 aapcs_ ## X ## _is_return_candidate, \
6238 aapcs_ ## X ## _allocate_return_reg, \
6239 aapcs_ ## X ## _advance \
6240 }
6241
6242 /* Table of co-processors that can be used to pass arguments in
6243 registers. Idealy no arugment should be a candidate for more than
6244 one co-processor table entry, but the table is processed in order
6245 and stops after the first match. If that entry then fails to put
6246 the argument into a co-processor register, the argument will go on
6247 the stack. */
6248 static struct
6249 {
6250 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6251 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6252
6253 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6254 BLKmode) is a candidate for this co-processor's registers; this
6255 function should ignore any position-dependent state in
6256 CUMULATIVE_ARGS and only use call-type dependent information. */
6257 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6258
6259 /* Return true if the argument does get a co-processor register; it
6260 should set aapcs_reg to an RTX of the register allocated as is
6261 required for a return from FUNCTION_ARG. */
6262 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6263
6264 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6265 be returned in this co-processor's registers. */
6266 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6267
6268 /* Allocate and return an RTX element to hold the return type of a call. This
6269 routine must not fail and will only be called if is_return_candidate
6270 returned true with the same parameters. */
6271 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6272
6273 /* Finish processing this argument and prepare to start processing
6274 the next one. */
6275 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6276 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6277 {
6278 AAPCS_CP(vfp)
6279 };
6280
6281 #undef AAPCS_CP
6282
6283 static int
6284 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6285 const_tree type)
6286 {
6287 int i;
6288
6289 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6290 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6291 return i;
6292
6293 return -1;
6294 }
6295
6296 static int
6297 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6298 {
6299 /* We aren't passed a decl, so we can't check that a call is local.
6300 However, it isn't clear that that would be a win anyway, since it
6301 might limit some tail-calling opportunities. */
6302 enum arm_pcs pcs_variant;
6303
6304 if (fntype)
6305 {
6306 const_tree fndecl = NULL_TREE;
6307
6308 if (TREE_CODE (fntype) == FUNCTION_DECL)
6309 {
6310 fndecl = fntype;
6311 fntype = TREE_TYPE (fntype);
6312 }
6313
6314 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6315 }
6316 else
6317 pcs_variant = arm_pcs_default;
6318
6319 if (pcs_variant != ARM_PCS_AAPCS)
6320 {
6321 int i;
6322
6323 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6324 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6325 TYPE_MODE (type),
6326 type))
6327 return i;
6328 }
6329 return -1;
6330 }
6331
6332 static rtx
6333 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6334 const_tree fntype)
6335 {
6336 /* We aren't passed a decl, so we can't check that a call is local.
6337 However, it isn't clear that that would be a win anyway, since it
6338 might limit some tail-calling opportunities. */
6339 enum arm_pcs pcs_variant;
6340 int unsignedp ATTRIBUTE_UNUSED;
6341
6342 if (fntype)
6343 {
6344 const_tree fndecl = NULL_TREE;
6345
6346 if (TREE_CODE (fntype) == FUNCTION_DECL)
6347 {
6348 fndecl = fntype;
6349 fntype = TREE_TYPE (fntype);
6350 }
6351
6352 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6353 }
6354 else
6355 pcs_variant = arm_pcs_default;
6356
6357 /* Promote integer types. */
6358 if (type && INTEGRAL_TYPE_P (type))
6359 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6360
6361 if (pcs_variant != ARM_PCS_AAPCS)
6362 {
6363 int i;
6364
6365 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6366 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6367 type))
6368 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6369 mode, type);
6370 }
6371
6372 /* Promotes small structs returned in a register to full-word size
6373 for big-endian AAPCS. */
6374 if (type && arm_return_in_msb (type))
6375 {
6376 HOST_WIDE_INT size = int_size_in_bytes (type);
6377 if (size % UNITS_PER_WORD != 0)
6378 {
6379 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6380 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6381 }
6382 }
6383
6384 return gen_rtx_REG (mode, R0_REGNUM);
6385 }
6386
6387 static rtx
6388 aapcs_libcall_value (machine_mode mode)
6389 {
6390 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6391 && GET_MODE_SIZE (mode) <= 4)
6392 mode = SImode;
6393
6394 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6395 }
6396
6397 /* Lay out a function argument using the AAPCS rules. The rule
6398 numbers referred to here are those in the AAPCS. */
6399 static void
6400 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6401 const_tree type, bool named)
6402 {
6403 int nregs, nregs2;
6404 int ncrn;
6405
6406 /* We only need to do this once per argument. */
6407 if (pcum->aapcs_arg_processed)
6408 return;
6409
6410 pcum->aapcs_arg_processed = true;
6411
6412 /* Special case: if named is false then we are handling an incoming
6413 anonymous argument which is on the stack. */
6414 if (!named)
6415 return;
6416
6417 /* Is this a potential co-processor register candidate? */
6418 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6419 {
6420 int slot = aapcs_select_call_coproc (pcum, mode, type);
6421 pcum->aapcs_cprc_slot = slot;
6422
6423 /* We don't have to apply any of the rules from part B of the
6424 preparation phase, these are handled elsewhere in the
6425 compiler. */
6426
6427 if (slot >= 0)
6428 {
6429 /* A Co-processor register candidate goes either in its own
6430 class of registers or on the stack. */
6431 if (!pcum->aapcs_cprc_failed[slot])
6432 {
6433 /* C1.cp - Try to allocate the argument to co-processor
6434 registers. */
6435 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6436 return;
6437
6438 /* C2.cp - Put the argument on the stack and note that we
6439 can't assign any more candidates in this slot. We also
6440 need to note that we have allocated stack space, so that
6441 we won't later try to split a non-cprc candidate between
6442 core registers and the stack. */
6443 pcum->aapcs_cprc_failed[slot] = true;
6444 pcum->can_split = false;
6445 }
6446
6447 /* We didn't get a register, so this argument goes on the
6448 stack. */
6449 gcc_assert (pcum->can_split == false);
6450 return;
6451 }
6452 }
6453
6454 /* C3 - For double-word aligned arguments, round the NCRN up to the
6455 next even number. */
6456 ncrn = pcum->aapcs_ncrn;
6457 if (ncrn & 1)
6458 {
6459 int res = arm_needs_doubleword_align (mode, type);
6460 /* Only warn during RTL expansion of call stmts, otherwise we would
6461 warn e.g. during gimplification even on functions that will be
6462 always inlined, and we'd warn multiple times. Don't warn when
6463 called in expand_function_start either, as we warn instead in
6464 arm_function_arg_boundary in that case. */
6465 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6466 inform (input_location, "parameter passing for argument of type "
6467 "%qT changed in GCC 7.1", type);
6468 else if (res > 0)
6469 ncrn++;
6470 }
6471
6472 nregs = ARM_NUM_REGS2(mode, type);
6473
6474 /* Sigh, this test should really assert that nregs > 0, but a GCC
6475 extension allows empty structs and then gives them empty size; it
6476 then allows such a structure to be passed by value. For some of
6477 the code below we have to pretend that such an argument has
6478 non-zero size so that we 'locate' it correctly either in
6479 registers or on the stack. */
6480 gcc_assert (nregs >= 0);
6481
6482 nregs2 = nregs ? nregs : 1;
6483
6484 /* C4 - Argument fits entirely in core registers. */
6485 if (ncrn + nregs2 <= NUM_ARG_REGS)
6486 {
6487 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6488 pcum->aapcs_next_ncrn = ncrn + nregs;
6489 return;
6490 }
6491
6492 /* C5 - Some core registers left and there are no arguments already
6493 on the stack: split this argument between the remaining core
6494 registers and the stack. */
6495 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6496 {
6497 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6498 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6499 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6500 return;
6501 }
6502
6503 /* C6 - NCRN is set to 4. */
6504 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6505
6506 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6507 return;
6508 }
6509
6510 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6511 for a call to a function whose data type is FNTYPE.
6512 For a library call, FNTYPE is NULL. */
6513 void
6514 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6515 rtx libname,
6516 tree fndecl ATTRIBUTE_UNUSED)
6517 {
6518 /* Long call handling. */
6519 if (fntype)
6520 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6521 else
6522 pcum->pcs_variant = arm_pcs_default;
6523
6524 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6525 {
6526 if (arm_libcall_uses_aapcs_base (libname))
6527 pcum->pcs_variant = ARM_PCS_AAPCS;
6528
6529 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6530 pcum->aapcs_reg = NULL_RTX;
6531 pcum->aapcs_partial = 0;
6532 pcum->aapcs_arg_processed = false;
6533 pcum->aapcs_cprc_slot = -1;
6534 pcum->can_split = true;
6535
6536 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6537 {
6538 int i;
6539
6540 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6541 {
6542 pcum->aapcs_cprc_failed[i] = false;
6543 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6544 }
6545 }
6546 return;
6547 }
6548
6549 /* Legacy ABIs */
6550
6551 /* On the ARM, the offset starts at 0. */
6552 pcum->nregs = 0;
6553 pcum->iwmmxt_nregs = 0;
6554 pcum->can_split = true;
6555
6556 /* Varargs vectors are treated the same as long long.
6557 named_count avoids having to change the way arm handles 'named' */
6558 pcum->named_count = 0;
6559 pcum->nargs = 0;
6560
6561 if (TARGET_REALLY_IWMMXT && fntype)
6562 {
6563 tree fn_arg;
6564
6565 for (fn_arg = TYPE_ARG_TYPES (fntype);
6566 fn_arg;
6567 fn_arg = TREE_CHAIN (fn_arg))
6568 pcum->named_count += 1;
6569
6570 if (! pcum->named_count)
6571 pcum->named_count = INT_MAX;
6572 }
6573 }
6574
6575 /* Return 2 if double word alignment is required for argument passing,
6576 but wasn't required before the fix for PR88469.
6577 Return 1 if double word alignment is required for argument passing.
6578 Return -1 if double word alignment used to be required for argument
6579 passing before PR77728 ABI fix, but is not required anymore.
6580 Return 0 if double word alignment is not required and wasn't requried
6581 before either. */
6582 static int
6583 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6584 {
6585 if (!type)
6586 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6587
6588 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6589 if (!AGGREGATE_TYPE_P (type))
6590 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6591
6592 /* Array types: Use member alignment of element type. */
6593 if (TREE_CODE (type) == ARRAY_TYPE)
6594 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6595
6596 int ret = 0;
6597 int ret2 = 0;
6598 /* Record/aggregate types: Use greatest member alignment of any member. */
6599 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6600 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6601 {
6602 if (TREE_CODE (field) == FIELD_DECL)
6603 return 1;
6604 else
6605 /* Before PR77728 fix, we were incorrectly considering also
6606 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6607 Make sure we can warn about that with -Wpsabi. */
6608 ret = -1;
6609 }
6610 else if (TREE_CODE (field) == FIELD_DECL
6611 && DECL_BIT_FIELD_TYPE (field)
6612 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
6613 ret2 = 1;
6614
6615 if (ret2)
6616 return 2;
6617
6618 return ret;
6619 }
6620
6621
6622 /* Determine where to put an argument to a function.
6623 Value is zero to push the argument on the stack,
6624 or a hard register in which to store the argument.
6625
6626 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6627 the preceding args and about the function being called.
6628 ARG is a description of the argument.
6629
6630 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6631 other arguments are passed on the stack. If (NAMED == 0) (which happens
6632 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6633 defined), say it is passed in the stack (function_prologue will
6634 indeed make it pass in the stack if necessary). */
6635
6636 static rtx
6637 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
6638 {
6639 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6640 int nregs;
6641
6642 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6643 a call insn (op3 of a call_value insn). */
6644 if (arg.end_marker_p ())
6645 return const0_rtx;
6646
6647 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6648 {
6649 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
6650 return pcum->aapcs_reg;
6651 }
6652
6653 /* Varargs vectors are treated the same as long long.
6654 named_count avoids having to change the way arm handles 'named' */
6655 if (TARGET_IWMMXT_ABI
6656 && arm_vector_mode_supported_p (arg.mode)
6657 && pcum->named_count > pcum->nargs + 1)
6658 {
6659 if (pcum->iwmmxt_nregs <= 9)
6660 return gen_rtx_REG (arg.mode,
6661 pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6662 else
6663 {
6664 pcum->can_split = false;
6665 return NULL_RTX;
6666 }
6667 }
6668
6669 /* Put doubleword aligned quantities in even register pairs. */
6670 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6671 {
6672 int res = arm_needs_doubleword_align (arg.mode, arg.type);
6673 if (res < 0 && warn_psabi)
6674 inform (input_location, "parameter passing for argument of type "
6675 "%qT changed in GCC 7.1", arg.type);
6676 else if (res > 0)
6677 {
6678 pcum->nregs++;
6679 if (res > 1 && warn_psabi)
6680 inform (input_location, "parameter passing for argument of type "
6681 "%qT changed in GCC 9.1", arg.type);
6682 }
6683 }
6684
6685 /* Only allow splitting an arg between regs and memory if all preceding
6686 args were allocated to regs. For args passed by reference we only count
6687 the reference pointer. */
6688 if (pcum->can_split)
6689 nregs = 1;
6690 else
6691 nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
6692
6693 if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
6694 return NULL_RTX;
6695
6696 return gen_rtx_REG (arg.mode, pcum->nregs);
6697 }
6698
6699 static unsigned int
6700 arm_function_arg_boundary (machine_mode mode, const_tree type)
6701 {
6702 if (!ARM_DOUBLEWORD_ALIGN)
6703 return PARM_BOUNDARY;
6704
6705 int res = arm_needs_doubleword_align (mode, type);
6706 if (res < 0 && warn_psabi)
6707 inform (input_location, "parameter passing for argument of type %qT "
6708 "changed in GCC 7.1", type);
6709 if (res > 1 && warn_psabi)
6710 inform (input_location, "parameter passing for argument of type "
6711 "%qT changed in GCC 9.1", type);
6712
6713 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6714 }
6715
6716 static int
6717 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
6718 {
6719 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6720 int nregs = pcum->nregs;
6721
6722 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6723 {
6724 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
6725 return pcum->aapcs_partial;
6726 }
6727
6728 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
6729 return 0;
6730
6731 if (NUM_ARG_REGS > nregs
6732 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
6733 && pcum->can_split)
6734 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6735
6736 return 0;
6737 }
6738
6739 /* Update the data in PCUM to advance over argument ARG. */
6740
6741 static void
6742 arm_function_arg_advance (cumulative_args_t pcum_v,
6743 const function_arg_info &arg)
6744 {
6745 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6746
6747 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6748 {
6749 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
6750
6751 if (pcum->aapcs_cprc_slot >= 0)
6752 {
6753 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
6754 arg.type);
6755 pcum->aapcs_cprc_slot = -1;
6756 }
6757
6758 /* Generic stuff. */
6759 pcum->aapcs_arg_processed = false;
6760 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6761 pcum->aapcs_reg = NULL_RTX;
6762 pcum->aapcs_partial = 0;
6763 }
6764 else
6765 {
6766 pcum->nargs += 1;
6767 if (arm_vector_mode_supported_p (arg.mode)
6768 && pcum->named_count > pcum->nargs
6769 && TARGET_IWMMXT_ABI)
6770 pcum->iwmmxt_nregs += 1;
6771 else
6772 pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
6773 }
6774 }
6775
6776 /* Variable sized types are passed by reference. This is a GCC
6777 extension to the ARM ABI. */
6778
6779 static bool
6780 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6781 {
6782 return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
6783 }
6784 \f
6785 /* Encode the current state of the #pragma [no_]long_calls. */
6786 typedef enum
6787 {
6788 OFF, /* No #pragma [no_]long_calls is in effect. */
6789 LONG, /* #pragma long_calls is in effect. */
6790 SHORT /* #pragma no_long_calls is in effect. */
6791 } arm_pragma_enum;
6792
6793 static arm_pragma_enum arm_pragma_long_calls = OFF;
6794
6795 void
6796 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6797 {
6798 arm_pragma_long_calls = LONG;
6799 }
6800
6801 void
6802 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6803 {
6804 arm_pragma_long_calls = SHORT;
6805 }
6806
6807 void
6808 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6809 {
6810 arm_pragma_long_calls = OFF;
6811 }
6812 \f
6813 /* Handle an attribute requiring a FUNCTION_DECL;
6814 arguments as in struct attribute_spec.handler. */
6815 static tree
6816 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6817 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6818 {
6819 if (TREE_CODE (*node) != FUNCTION_DECL)
6820 {
6821 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6822 name);
6823 *no_add_attrs = true;
6824 }
6825
6826 return NULL_TREE;
6827 }
6828
6829 /* Handle an "interrupt" or "isr" attribute;
6830 arguments as in struct attribute_spec.handler. */
6831 static tree
6832 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6833 bool *no_add_attrs)
6834 {
6835 if (DECL_P (*node))
6836 {
6837 if (TREE_CODE (*node) != FUNCTION_DECL)
6838 {
6839 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6840 name);
6841 *no_add_attrs = true;
6842 }
6843 /* FIXME: the argument if any is checked for type attributes;
6844 should it be checked for decl ones? */
6845 }
6846 else
6847 {
6848 if (TREE_CODE (*node) == FUNCTION_TYPE
6849 || TREE_CODE (*node) == METHOD_TYPE)
6850 {
6851 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6852 {
6853 warning (OPT_Wattributes, "%qE attribute ignored",
6854 name);
6855 *no_add_attrs = true;
6856 }
6857 }
6858 else if (TREE_CODE (*node) == POINTER_TYPE
6859 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6860 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6861 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6862 {
6863 *node = build_variant_type_copy (*node);
6864 TREE_TYPE (*node) = build_type_attribute_variant
6865 (TREE_TYPE (*node),
6866 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6867 *no_add_attrs = true;
6868 }
6869 else
6870 {
6871 /* Possibly pass this attribute on from the type to a decl. */
6872 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6873 | (int) ATTR_FLAG_FUNCTION_NEXT
6874 | (int) ATTR_FLAG_ARRAY_NEXT))
6875 {
6876 *no_add_attrs = true;
6877 return tree_cons (name, args, NULL_TREE);
6878 }
6879 else
6880 {
6881 warning (OPT_Wattributes, "%qE attribute ignored",
6882 name);
6883 }
6884 }
6885 }
6886
6887 return NULL_TREE;
6888 }
6889
6890 /* Handle a "pcs" attribute; arguments as in struct
6891 attribute_spec.handler. */
6892 static tree
6893 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6894 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6895 {
6896 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6897 {
6898 warning (OPT_Wattributes, "%qE attribute ignored", name);
6899 *no_add_attrs = true;
6900 }
6901 return NULL_TREE;
6902 }
6903
6904 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6905 /* Handle the "notshared" attribute. This attribute is another way of
6906 requesting hidden visibility. ARM's compiler supports
6907 "__declspec(notshared)"; we support the same thing via an
6908 attribute. */
6909
6910 static tree
6911 arm_handle_notshared_attribute (tree *node,
6912 tree name ATTRIBUTE_UNUSED,
6913 tree args ATTRIBUTE_UNUSED,
6914 int flags ATTRIBUTE_UNUSED,
6915 bool *no_add_attrs)
6916 {
6917 tree decl = TYPE_NAME (*node);
6918
6919 if (decl)
6920 {
6921 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6922 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6923 *no_add_attrs = false;
6924 }
6925 return NULL_TREE;
6926 }
6927 #endif
6928
6929 /* This function returns true if a function with declaration FNDECL and type
6930 FNTYPE uses the stack to pass arguments or return variables and false
6931 otherwise. This is used for functions with the attributes
6932 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6933 diagnostic messages if the stack is used. NAME is the name of the attribute
6934 used. */
6935
6936 static bool
6937 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6938 {
6939 function_args_iterator args_iter;
6940 CUMULATIVE_ARGS args_so_far_v;
6941 cumulative_args_t args_so_far;
6942 bool first_param = true;
6943 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6944
6945 /* Error out if any argument is passed on the stack. */
6946 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6947 args_so_far = pack_cumulative_args (&args_so_far_v);
6948 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6949 {
6950 rtx arg_rtx;
6951
6952 prev_arg_type = arg_type;
6953 if (VOID_TYPE_P (arg_type))
6954 continue;
6955
6956 function_arg_info arg (arg_type, /*named=*/true);
6957 if (!first_param)
6958 /* ??? We should advance after processing the argument and pass
6959 the argument we're advancing past. */
6960 arm_function_arg_advance (args_so_far, arg);
6961 arg_rtx = arm_function_arg (args_so_far, arg);
6962 if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
6963 {
6964 error ("%qE attribute not available to functions with arguments "
6965 "passed on the stack", name);
6966 return true;
6967 }
6968 first_param = false;
6969 }
6970
6971 /* Error out for variadic functions since we cannot control how many
6972 arguments will be passed and thus stack could be used. stdarg_p () is not
6973 used for the checking to avoid browsing arguments twice. */
6974 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6975 {
6976 error ("%qE attribute not available to functions with variable number "
6977 "of arguments", name);
6978 return true;
6979 }
6980
6981 /* Error out if return value is passed on the stack. */
6982 ret_type = TREE_TYPE (fntype);
6983 if (arm_return_in_memory (ret_type, fntype))
6984 {
6985 error ("%qE attribute not available to functions that return value on "
6986 "the stack", name);
6987 return true;
6988 }
6989 return false;
6990 }
6991
6992 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6993 function will check whether the attribute is allowed here and will add the
6994 attribute to the function declaration tree or otherwise issue a warning. */
6995
6996 static tree
6997 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6998 tree /* args */,
6999 int /* flags */,
7000 bool *no_add_attrs)
7001 {
7002 tree fndecl;
7003
7004 if (!use_cmse)
7005 {
7006 *no_add_attrs = true;
7007 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7008 "option.", name);
7009 return NULL_TREE;
7010 }
7011
7012 /* Ignore attribute for function types. */
7013 if (TREE_CODE (*node) != FUNCTION_DECL)
7014 {
7015 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7016 name);
7017 *no_add_attrs = true;
7018 return NULL_TREE;
7019 }
7020
7021 fndecl = *node;
7022
7023 /* Warn for static linkage functions. */
7024 if (!TREE_PUBLIC (fndecl))
7025 {
7026 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7027 "with static linkage", name);
7028 *no_add_attrs = true;
7029 return NULL_TREE;
7030 }
7031
7032 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7033 TREE_TYPE (fndecl));
7034 return NULL_TREE;
7035 }
7036
7037
7038 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7039 function will check whether the attribute is allowed here and will add the
7040 attribute to the function type tree or otherwise issue a diagnostic. The
7041 reason we check this at declaration time is to only allow the use of the
7042 attribute with declarations of function pointers and not function
7043 declarations. This function checks NODE is of the expected type and issues
7044 diagnostics otherwise using NAME. If it is not of the expected type
7045 *NO_ADD_ATTRS will be set to true. */
7046
7047 static tree
7048 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7049 tree /* args */,
7050 int /* flags */,
7051 bool *no_add_attrs)
7052 {
7053 tree decl = NULL_TREE, fntype = NULL_TREE;
7054 tree type;
7055
7056 if (!use_cmse)
7057 {
7058 *no_add_attrs = true;
7059 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7060 "option.", name);
7061 return NULL_TREE;
7062 }
7063
7064 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7065 {
7066 decl = *node;
7067 fntype = TREE_TYPE (decl);
7068 }
7069
7070 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7071 fntype = TREE_TYPE (fntype);
7072
7073 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7074 {
7075 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7076 "function pointer", name);
7077 *no_add_attrs = true;
7078 return NULL_TREE;
7079 }
7080
7081 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7082
7083 if (*no_add_attrs)
7084 return NULL_TREE;
7085
7086 /* Prevent trees being shared among function types with and without
7087 cmse_nonsecure_call attribute. */
7088 type = TREE_TYPE (decl);
7089
7090 type = build_distinct_type_copy (type);
7091 TREE_TYPE (decl) = type;
7092 fntype = type;
7093
7094 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7095 {
7096 type = fntype;
7097 fntype = TREE_TYPE (fntype);
7098 fntype = build_distinct_type_copy (fntype);
7099 TREE_TYPE (type) = fntype;
7100 }
7101
7102 /* Construct a type attribute and add it to the function type. */
7103 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7104 TYPE_ATTRIBUTES (fntype));
7105 TYPE_ATTRIBUTES (fntype) = attrs;
7106 return NULL_TREE;
7107 }
7108
7109 /* Return 0 if the attributes for two types are incompatible, 1 if they
7110 are compatible, and 2 if they are nearly compatible (which causes a
7111 warning to be generated). */
7112 static int
7113 arm_comp_type_attributes (const_tree type1, const_tree type2)
7114 {
7115 int l1, l2, s1, s2;
7116
7117 /* Check for mismatch of non-default calling convention. */
7118 if (TREE_CODE (type1) != FUNCTION_TYPE)
7119 return 1;
7120
7121 /* Check for mismatched call attributes. */
7122 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7123 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7124 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7125 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7126
7127 /* Only bother to check if an attribute is defined. */
7128 if (l1 | l2 | s1 | s2)
7129 {
7130 /* If one type has an attribute, the other must have the same attribute. */
7131 if ((l1 != l2) || (s1 != s2))
7132 return 0;
7133
7134 /* Disallow mixed attributes. */
7135 if ((l1 & s2) || (l2 & s1))
7136 return 0;
7137 }
7138
7139 /* Check for mismatched ISR attribute. */
7140 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7141 if (! l1)
7142 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7143 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7144 if (! l2)
7145 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7146 if (l1 != l2)
7147 return 0;
7148
7149 l1 = lookup_attribute ("cmse_nonsecure_call",
7150 TYPE_ATTRIBUTES (type1)) != NULL;
7151 l2 = lookup_attribute ("cmse_nonsecure_call",
7152 TYPE_ATTRIBUTES (type2)) != NULL;
7153
7154 if (l1 != l2)
7155 return 0;
7156
7157 return 1;
7158 }
7159
7160 /* Assigns default attributes to newly defined type. This is used to
7161 set short_call/long_call attributes for function types of
7162 functions defined inside corresponding #pragma scopes. */
7163 static void
7164 arm_set_default_type_attributes (tree type)
7165 {
7166 /* Add __attribute__ ((long_call)) to all functions, when
7167 inside #pragma long_calls or __attribute__ ((short_call)),
7168 when inside #pragma no_long_calls. */
7169 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7170 {
7171 tree type_attr_list, attr_name;
7172 type_attr_list = TYPE_ATTRIBUTES (type);
7173
7174 if (arm_pragma_long_calls == LONG)
7175 attr_name = get_identifier ("long_call");
7176 else if (arm_pragma_long_calls == SHORT)
7177 attr_name = get_identifier ("short_call");
7178 else
7179 return;
7180
7181 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7182 TYPE_ATTRIBUTES (type) = type_attr_list;
7183 }
7184 }
7185 \f
7186 /* Return true if DECL is known to be linked into section SECTION. */
7187
7188 static bool
7189 arm_function_in_section_p (tree decl, section *section)
7190 {
7191 /* We can only be certain about the prevailing symbol definition. */
7192 if (!decl_binds_to_current_def_p (decl))
7193 return false;
7194
7195 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7196 if (!DECL_SECTION_NAME (decl))
7197 {
7198 /* Make sure that we will not create a unique section for DECL. */
7199 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7200 return false;
7201 }
7202
7203 return function_section (decl) == section;
7204 }
7205
7206 /* Return nonzero if a 32-bit "long_call" should be generated for
7207 a call from the current function to DECL. We generate a long_call
7208 if the function:
7209
7210 a. has an __attribute__((long call))
7211 or b. is within the scope of a #pragma long_calls
7212 or c. the -mlong-calls command line switch has been specified
7213
7214 However we do not generate a long call if the function:
7215
7216 d. has an __attribute__ ((short_call))
7217 or e. is inside the scope of a #pragma no_long_calls
7218 or f. is defined in the same section as the current function. */
7219
7220 bool
7221 arm_is_long_call_p (tree decl)
7222 {
7223 tree attrs;
7224
7225 if (!decl)
7226 return TARGET_LONG_CALLS;
7227
7228 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7229 if (lookup_attribute ("short_call", attrs))
7230 return false;
7231
7232 /* For "f", be conservative, and only cater for cases in which the
7233 whole of the current function is placed in the same section. */
7234 if (!flag_reorder_blocks_and_partition
7235 && TREE_CODE (decl) == FUNCTION_DECL
7236 && arm_function_in_section_p (decl, current_function_section ()))
7237 return false;
7238
7239 if (lookup_attribute ("long_call", attrs))
7240 return true;
7241
7242 return TARGET_LONG_CALLS;
7243 }
7244
7245 /* Return nonzero if it is ok to make a tail-call to DECL. */
7246 static bool
7247 arm_function_ok_for_sibcall (tree decl, tree exp)
7248 {
7249 unsigned long func_type;
7250
7251 if (cfun->machine->sibcall_blocked)
7252 return false;
7253
7254 /* Never tailcall something if we are generating code for Thumb-1. */
7255 if (TARGET_THUMB1)
7256 return false;
7257
7258 /* The PIC register is live on entry to VxWorks PLT entries, so we
7259 must make the call before restoring the PIC register. */
7260 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7261 return false;
7262
7263 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7264 may be used both as target of the call and base register for restoring
7265 the VFP registers */
7266 if (TARGET_APCS_FRAME && TARGET_ARM
7267 && TARGET_HARD_FLOAT
7268 && decl && arm_is_long_call_p (decl))
7269 return false;
7270
7271 /* If we are interworking and the function is not declared static
7272 then we can't tail-call it unless we know that it exists in this
7273 compilation unit (since it might be a Thumb routine). */
7274 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7275 && !TREE_ASM_WRITTEN (decl))
7276 return false;
7277
7278 func_type = arm_current_func_type ();
7279 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7280 if (IS_INTERRUPT (func_type))
7281 return false;
7282
7283 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7284 generated for entry functions themselves. */
7285 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7286 return false;
7287
7288 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7289 this would complicate matters for later code generation. */
7290 if (TREE_CODE (exp) == CALL_EXPR)
7291 {
7292 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7293 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7294 return false;
7295 }
7296
7297 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7298 {
7299 /* Check that the return value locations are the same. For
7300 example that we aren't returning a value from the sibling in
7301 a VFP register but then need to transfer it to a core
7302 register. */
7303 rtx a, b;
7304 tree decl_or_type = decl;
7305
7306 /* If it is an indirect function pointer, get the function type. */
7307 if (!decl)
7308 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7309
7310 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7311 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7312 cfun->decl, false);
7313 if (!rtx_equal_p (a, b))
7314 return false;
7315 }
7316
7317 /* Never tailcall if function may be called with a misaligned SP. */
7318 if (IS_STACKALIGN (func_type))
7319 return false;
7320
7321 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7322 references should become a NOP. Don't convert such calls into
7323 sibling calls. */
7324 if (TARGET_AAPCS_BASED
7325 && arm_abi == ARM_ABI_AAPCS
7326 && decl
7327 && DECL_WEAK (decl))
7328 return false;
7329
7330 /* We cannot do a tailcall for an indirect call by descriptor if all the
7331 argument registers are used because the only register left to load the
7332 address is IP and it will already contain the static chain. */
7333 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7334 {
7335 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7336 CUMULATIVE_ARGS cum;
7337 cumulative_args_t cum_v;
7338
7339 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7340 cum_v = pack_cumulative_args (&cum);
7341
7342 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7343 {
7344 tree type = TREE_VALUE (t);
7345 if (!VOID_TYPE_P (type))
7346 {
7347 function_arg_info arg (type, /*named=*/true);
7348 arm_function_arg_advance (cum_v, arg);
7349 }
7350 }
7351
7352 function_arg_info arg (integer_type_node, /*named=*/true);
7353 if (!arm_function_arg (cum_v, arg))
7354 return false;
7355 }
7356
7357 /* Everything else is ok. */
7358 return true;
7359 }
7360
7361 \f
7362 /* Addressing mode support functions. */
7363
7364 /* Return nonzero if X is a legitimate immediate operand when compiling
7365 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7366 int
7367 legitimate_pic_operand_p (rtx x)
7368 {
7369 if (GET_CODE (x) == SYMBOL_REF
7370 || (GET_CODE (x) == CONST
7371 && GET_CODE (XEXP (x, 0)) == PLUS
7372 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7373 return 0;
7374
7375 return 1;
7376 }
7377
7378 /* Record that the current function needs a PIC register. If PIC_REG is null,
7379 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
7380 both case cfun->machine->pic_reg is initialized if we have not already done
7381 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
7382 PIC register is reloaded in the current position of the instruction stream
7383 irregardless of whether it was loaded before. Otherwise, it is only loaded
7384 if not already done so (crtl->uses_pic_offset_table is null). Note that
7385 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7386 is only supported iff COMPUTE_NOW is false. */
7387
7388 static void
7389 require_pic_register (rtx pic_reg, bool compute_now)
7390 {
7391 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7392
7393 /* A lot of the logic here is made obscure by the fact that this
7394 routine gets called as part of the rtx cost estimation process.
7395 We don't want those calls to affect any assumptions about the real
7396 function; and further, we can't call entry_of_function() until we
7397 start the real expansion process. */
7398 if (!crtl->uses_pic_offset_table || compute_now)
7399 {
7400 gcc_assert (can_create_pseudo_p ()
7401 || (pic_reg != NULL_RTX
7402 && REG_P (pic_reg)
7403 && GET_MODE (pic_reg) == Pmode));
7404 if (arm_pic_register != INVALID_REGNUM
7405 && !compute_now
7406 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7407 {
7408 if (!cfun->machine->pic_reg)
7409 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7410
7411 /* Play games to avoid marking the function as needing pic
7412 if we are being called as part of the cost-estimation
7413 process. */
7414 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7415 crtl->uses_pic_offset_table = 1;
7416 }
7417 else
7418 {
7419 rtx_insn *seq, *insn;
7420
7421 if (pic_reg == NULL_RTX)
7422 pic_reg = gen_reg_rtx (Pmode);
7423 if (!cfun->machine->pic_reg)
7424 cfun->machine->pic_reg = pic_reg;
7425
7426 /* Play games to avoid marking the function as needing pic
7427 if we are being called as part of the cost-estimation
7428 process. */
7429 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7430 {
7431 crtl->uses_pic_offset_table = 1;
7432 start_sequence ();
7433
7434 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7435 && arm_pic_register > LAST_LO_REGNUM
7436 && !compute_now)
7437 emit_move_insn (cfun->machine->pic_reg,
7438 gen_rtx_REG (Pmode, arm_pic_register));
7439 else
7440 arm_load_pic_register (0UL, pic_reg);
7441
7442 seq = get_insns ();
7443 end_sequence ();
7444
7445 for (insn = seq; insn; insn = NEXT_INSN (insn))
7446 if (INSN_P (insn))
7447 INSN_LOCATION (insn) = prologue_location;
7448
7449 /* We can be called during expansion of PHI nodes, where
7450 we can't yet emit instructions directly in the final
7451 insn stream. Queue the insns on the entry edge, they will
7452 be committed after everything else is expanded. */
7453 if (currently_expanding_to_rtl)
7454 insert_insn_on_edge (seq,
7455 single_succ_edge
7456 (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7457 else
7458 emit_insn (seq);
7459 }
7460 }
7461 }
7462 }
7463
7464 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
7465 created to hold the result of the load. If not NULL, PIC_REG indicates
7466 which register to use as PIC register, otherwise it is decided by register
7467 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
7468 location in the instruction stream, irregardless of whether it was loaded
7469 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
7470 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
7471
7472 Returns the register REG into which the PIC load is performed. */
7473
7474 rtx
7475 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
7476 bool compute_now)
7477 {
7478 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7479
7480 if (GET_CODE (orig) == SYMBOL_REF
7481 || GET_CODE (orig) == LABEL_REF)
7482 {
7483 if (reg == 0)
7484 {
7485 gcc_assert (can_create_pseudo_p ());
7486 reg = gen_reg_rtx (Pmode);
7487 }
7488
7489 /* VxWorks does not impose a fixed gap between segments; the run-time
7490 gap can be different from the object-file gap. We therefore can't
7491 use GOTOFF unless we are absolutely sure that the symbol is in the
7492 same segment as the GOT. Unfortunately, the flexibility of linker
7493 scripts means that we can't be sure of that in general, so assume
7494 that GOTOFF is never valid on VxWorks. */
7495 /* References to weak symbols cannot be resolved locally: they
7496 may be overridden by a non-weak definition at link time. */
7497 rtx_insn *insn;
7498 if ((GET_CODE (orig) == LABEL_REF
7499 || (GET_CODE (orig) == SYMBOL_REF
7500 && SYMBOL_REF_LOCAL_P (orig)
7501 && (SYMBOL_REF_DECL (orig)
7502 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7503 && NEED_GOT_RELOC
7504 && arm_pic_data_is_text_relative)
7505 insn = arm_pic_static_addr (orig, reg);
7506 else
7507 {
7508 rtx pat;
7509 rtx mem;
7510
7511 /* If this function doesn't have a pic register, create one now. */
7512 require_pic_register (pic_reg, compute_now);
7513
7514 if (pic_reg == NULL_RTX)
7515 pic_reg = cfun->machine->pic_reg;
7516
7517 pat = gen_calculate_pic_address (reg, pic_reg, orig);
7518
7519 /* Make the MEM as close to a constant as possible. */
7520 mem = SET_SRC (pat);
7521 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7522 MEM_READONLY_P (mem) = 1;
7523 MEM_NOTRAP_P (mem) = 1;
7524
7525 insn = emit_insn (pat);
7526 }
7527
7528 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7529 by loop. */
7530 set_unique_reg_note (insn, REG_EQUAL, orig);
7531
7532 return reg;
7533 }
7534 else if (GET_CODE (orig) == CONST)
7535 {
7536 rtx base, offset;
7537
7538 if (GET_CODE (XEXP (orig, 0)) == PLUS
7539 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7540 return orig;
7541
7542 /* Handle the case where we have: const (UNSPEC_TLS). */
7543 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7544 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7545 return orig;
7546
7547 /* Handle the case where we have:
7548 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7549 CONST_INT. */
7550 if (GET_CODE (XEXP (orig, 0)) == PLUS
7551 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7552 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7553 {
7554 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7555 return orig;
7556 }
7557
7558 if (reg == 0)
7559 {
7560 gcc_assert (can_create_pseudo_p ());
7561 reg = gen_reg_rtx (Pmode);
7562 }
7563
7564 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7565
7566 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
7567 pic_reg, compute_now);
7568 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7569 base == reg ? 0 : reg, pic_reg,
7570 compute_now);
7571
7572 if (CONST_INT_P (offset))
7573 {
7574 /* The base register doesn't really matter, we only want to
7575 test the index for the appropriate mode. */
7576 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7577 {
7578 gcc_assert (can_create_pseudo_p ());
7579 offset = force_reg (Pmode, offset);
7580 }
7581
7582 if (CONST_INT_P (offset))
7583 return plus_constant (Pmode, base, INTVAL (offset));
7584 }
7585
7586 if (GET_MODE_SIZE (mode) > 4
7587 && (GET_MODE_CLASS (mode) == MODE_INT
7588 || TARGET_SOFT_FLOAT))
7589 {
7590 emit_insn (gen_addsi3 (reg, base, offset));
7591 return reg;
7592 }
7593
7594 return gen_rtx_PLUS (Pmode, base, offset);
7595 }
7596
7597 return orig;
7598 }
7599
7600
7601 /* Whether a register is callee saved or not. This is necessary because high
7602 registers are marked as caller saved when optimizing for size on Thumb-1
7603 targets despite being callee saved in order to avoid using them. */
7604 #define callee_saved_reg_p(reg) \
7605 (!call_used_regs[reg] \
7606 || (TARGET_THUMB1 && optimize_size \
7607 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
7608
7609 /* Return a mask for the call-clobbered low registers that are unused
7610 at the end of the prologue. */
7611 static unsigned long
7612 thumb1_prologue_unused_call_clobbered_lo_regs (void)
7613 {
7614 unsigned long mask = 0;
7615 bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7616
7617 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
7618 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
7619 mask |= 1 << (reg - FIRST_LO_REGNUM);
7620 return mask;
7621 }
7622
7623 /* Similarly for the start of the epilogue. */
7624 static unsigned long
7625 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
7626 {
7627 unsigned long mask = 0;
7628 bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
7629
7630 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
7631 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
7632 mask |= 1 << (reg - FIRST_LO_REGNUM);
7633 return mask;
7634 }
7635
7636 /* Find a spare register to use during the prolog of a function. */
7637
7638 static int
7639 thumb_find_work_register (unsigned long pushed_regs_mask)
7640 {
7641 int reg;
7642
7643 unsigned long unused_regs
7644 = thumb1_prologue_unused_call_clobbered_lo_regs ();
7645
7646 /* Check the argument registers first as these are call-used. The
7647 register allocation order means that sometimes r3 might be used
7648 but earlier argument registers might not, so check them all. */
7649 for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
7650 if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
7651 return reg;
7652
7653 /* Otherwise look for a call-saved register that is going to be pushed. */
7654 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7655 if (pushed_regs_mask & (1 << reg))
7656 return reg;
7657
7658 if (TARGET_THUMB2)
7659 {
7660 /* Thumb-2 can use high regs. */
7661 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7662 if (pushed_regs_mask & (1 << reg))
7663 return reg;
7664 }
7665 /* Something went wrong - thumb_compute_save_reg_mask()
7666 should have arranged for a suitable register to be pushed. */
7667 gcc_unreachable ();
7668 }
7669
7670 static GTY(()) int pic_labelno;
7671
7672 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7673 low register. */
7674
7675 void
7676 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
7677 {
7678 rtx l1, labelno, pic_tmp, pic_rtx;
7679
7680 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7681 return;
7682
7683 gcc_assert (flag_pic);
7684
7685 if (pic_reg == NULL_RTX)
7686 pic_reg = cfun->machine->pic_reg;
7687 if (TARGET_VXWORKS_RTP)
7688 {
7689 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7690 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7691 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7692
7693 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7694
7695 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7696 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7697 }
7698 else
7699 {
7700 /* We use an UNSPEC rather than a LABEL_REF because this label
7701 never appears in the code stream. */
7702
7703 labelno = GEN_INT (pic_labelno++);
7704 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7705 l1 = gen_rtx_CONST (VOIDmode, l1);
7706
7707 /* On the ARM the PC register contains 'dot + 8' at the time of the
7708 addition, on the Thumb it is 'dot + 4'. */
7709 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7710 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7711 UNSPEC_GOTSYM_OFF);
7712 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7713
7714 if (TARGET_32BIT)
7715 {
7716 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7717 }
7718 else /* TARGET_THUMB1 */
7719 {
7720 if (arm_pic_register != INVALID_REGNUM
7721 && REGNO (pic_reg) > LAST_LO_REGNUM)
7722 {
7723 /* We will have pushed the pic register, so we should always be
7724 able to find a work register. */
7725 pic_tmp = gen_rtx_REG (SImode,
7726 thumb_find_work_register (saved_regs));
7727 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7728 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7729 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7730 }
7731 else if (arm_pic_register != INVALID_REGNUM
7732 && arm_pic_register > LAST_LO_REGNUM
7733 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7734 {
7735 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7736 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7737 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7738 }
7739 else
7740 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7741 }
7742 }
7743
7744 /* Need to emit this whether or not we obey regdecls,
7745 since setjmp/longjmp can cause life info to screw up. */
7746 emit_use (pic_reg);
7747 }
7748
7749 /* Generate code to load the address of a static var when flag_pic is set. */
7750 static rtx_insn *
7751 arm_pic_static_addr (rtx orig, rtx reg)
7752 {
7753 rtx l1, labelno, offset_rtx;
7754
7755 gcc_assert (flag_pic);
7756
7757 /* We use an UNSPEC rather than a LABEL_REF because this label
7758 never appears in the code stream. */
7759 labelno = GEN_INT (pic_labelno++);
7760 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7761 l1 = gen_rtx_CONST (VOIDmode, l1);
7762
7763 /* On the ARM the PC register contains 'dot + 8' at the time of the
7764 addition, on the Thumb it is 'dot + 4'. */
7765 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7766 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7767 UNSPEC_SYMBOL_OFFSET);
7768 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7769
7770 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7771 }
7772
7773 /* Return nonzero if X is valid as an ARM state addressing register. */
7774 static int
7775 arm_address_register_rtx_p (rtx x, int strict_p)
7776 {
7777 int regno;
7778
7779 if (!REG_P (x))
7780 return 0;
7781
7782 regno = REGNO (x);
7783
7784 if (strict_p)
7785 return ARM_REGNO_OK_FOR_BASE_P (regno);
7786
7787 return (regno <= LAST_ARM_REGNUM
7788 || regno >= FIRST_PSEUDO_REGISTER
7789 || regno == FRAME_POINTER_REGNUM
7790 || regno == ARG_POINTER_REGNUM);
7791 }
7792
7793 /* Return TRUE if this rtx is the difference of a symbol and a label,
7794 and will reduce to a PC-relative relocation in the object file.
7795 Expressions like this can be left alone when generating PIC, rather
7796 than forced through the GOT. */
7797 static int
7798 pcrel_constant_p (rtx x)
7799 {
7800 if (GET_CODE (x) == MINUS)
7801 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7802
7803 return FALSE;
7804 }
7805
7806 /* Return true if X will surely end up in an index register after next
7807 splitting pass. */
7808 static bool
7809 will_be_in_index_register (const_rtx x)
7810 {
7811 /* arm.md: calculate_pic_address will split this into a register. */
7812 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7813 }
7814
7815 /* Return nonzero if X is a valid ARM state address operand. */
7816 int
7817 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7818 int strict_p)
7819 {
7820 bool use_ldrd;
7821 enum rtx_code code = GET_CODE (x);
7822
7823 if (arm_address_register_rtx_p (x, strict_p))
7824 return 1;
7825
7826 use_ldrd = (TARGET_LDRD
7827 && (mode == DImode || mode == DFmode));
7828
7829 if (code == POST_INC || code == PRE_DEC
7830 || ((code == PRE_INC || code == POST_DEC)
7831 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7832 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7833
7834 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7835 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7836 && GET_CODE (XEXP (x, 1)) == PLUS
7837 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7838 {
7839 rtx addend = XEXP (XEXP (x, 1), 1);
7840
7841 /* Don't allow ldrd post increment by register because it's hard
7842 to fixup invalid register choices. */
7843 if (use_ldrd
7844 && GET_CODE (x) == POST_MODIFY
7845 && REG_P (addend))
7846 return 0;
7847
7848 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7849 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7850 }
7851
7852 /* After reload constants split into minipools will have addresses
7853 from a LABEL_REF. */
7854 else if (reload_completed
7855 && (code == LABEL_REF
7856 || (code == CONST
7857 && GET_CODE (XEXP (x, 0)) == PLUS
7858 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7859 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7860 return 1;
7861
7862 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7863 return 0;
7864
7865 else if (code == PLUS)
7866 {
7867 rtx xop0 = XEXP (x, 0);
7868 rtx xop1 = XEXP (x, 1);
7869
7870 return ((arm_address_register_rtx_p (xop0, strict_p)
7871 && ((CONST_INT_P (xop1)
7872 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7873 || (!strict_p && will_be_in_index_register (xop1))))
7874 || (arm_address_register_rtx_p (xop1, strict_p)
7875 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7876 }
7877
7878 #if 0
7879 /* Reload currently can't handle MINUS, so disable this for now */
7880 else if (GET_CODE (x) == MINUS)
7881 {
7882 rtx xop0 = XEXP (x, 0);
7883 rtx xop1 = XEXP (x, 1);
7884
7885 return (arm_address_register_rtx_p (xop0, strict_p)
7886 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7887 }
7888 #endif
7889
7890 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7891 && code == SYMBOL_REF
7892 && CONSTANT_POOL_ADDRESS_P (x)
7893 && ! (flag_pic
7894 && symbol_mentioned_p (get_pool_constant (x))
7895 && ! pcrel_constant_p (get_pool_constant (x))))
7896 return 1;
7897
7898 return 0;
7899 }
7900
7901 /* Return true if we can avoid creating a constant pool entry for x. */
7902 static bool
7903 can_avoid_literal_pool_for_label_p (rtx x)
7904 {
7905 /* Normally we can assign constant values to target registers without
7906 the help of constant pool. But there are cases we have to use constant
7907 pool like:
7908 1) assign a label to register.
7909 2) sign-extend a 8bit value to 32bit and then assign to register.
7910
7911 Constant pool access in format:
7912 (set (reg r0) (mem (symbol_ref (".LC0"))))
7913 will cause the use of literal pool (later in function arm_reorg).
7914 So here we mark such format as an invalid format, then the compiler
7915 will adjust it into:
7916 (set (reg r0) (symbol_ref (".LC0")))
7917 (set (reg r0) (mem (reg r0))).
7918 No extra register is required, and (mem (reg r0)) won't cause the use
7919 of literal pools. */
7920 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7921 && CONSTANT_POOL_ADDRESS_P (x))
7922 return 1;
7923 return 0;
7924 }
7925
7926
7927 /* Return nonzero if X is a valid Thumb-2 address operand. */
7928 static int
7929 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7930 {
7931 bool use_ldrd;
7932 enum rtx_code code = GET_CODE (x);
7933
7934 if (arm_address_register_rtx_p (x, strict_p))
7935 return 1;
7936
7937 use_ldrd = (TARGET_LDRD
7938 && (mode == DImode || mode == DFmode));
7939
7940 if (code == POST_INC || code == PRE_DEC
7941 || ((code == PRE_INC || code == POST_DEC)
7942 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7943 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7944
7945 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7946 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7947 && GET_CODE (XEXP (x, 1)) == PLUS
7948 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7949 {
7950 /* Thumb-2 only has autoincrement by constant. */
7951 rtx addend = XEXP (XEXP (x, 1), 1);
7952 HOST_WIDE_INT offset;
7953
7954 if (!CONST_INT_P (addend))
7955 return 0;
7956
7957 offset = INTVAL(addend);
7958 if (GET_MODE_SIZE (mode) <= 4)
7959 return (offset > -256 && offset < 256);
7960
7961 return (use_ldrd && offset > -1024 && offset < 1024
7962 && (offset & 3) == 0);
7963 }
7964
7965 /* After reload constants split into minipools will have addresses
7966 from a LABEL_REF. */
7967 else if (reload_completed
7968 && (code == LABEL_REF
7969 || (code == CONST
7970 && GET_CODE (XEXP (x, 0)) == PLUS
7971 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7972 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7973 return 1;
7974
7975 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7976 return 0;
7977
7978 else if (code == PLUS)
7979 {
7980 rtx xop0 = XEXP (x, 0);
7981 rtx xop1 = XEXP (x, 1);
7982
7983 return ((arm_address_register_rtx_p (xop0, strict_p)
7984 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7985 || (!strict_p && will_be_in_index_register (xop1))))
7986 || (arm_address_register_rtx_p (xop1, strict_p)
7987 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7988 }
7989
7990 else if (can_avoid_literal_pool_for_label_p (x))
7991 return 0;
7992
7993 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7994 && code == SYMBOL_REF
7995 && CONSTANT_POOL_ADDRESS_P (x)
7996 && ! (flag_pic
7997 && symbol_mentioned_p (get_pool_constant (x))
7998 && ! pcrel_constant_p (get_pool_constant (x))))
7999 return 1;
8000
8001 return 0;
8002 }
8003
8004 /* Return nonzero if INDEX is valid for an address index operand in
8005 ARM state. */
8006 static int
8007 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8008 int strict_p)
8009 {
8010 HOST_WIDE_INT range;
8011 enum rtx_code code = GET_CODE (index);
8012
8013 /* Standard coprocessor addressing modes. */
8014 if (TARGET_HARD_FLOAT
8015 && (mode == SFmode || mode == DFmode))
8016 return (code == CONST_INT && INTVAL (index) < 1024
8017 && INTVAL (index) > -1024
8018 && (INTVAL (index) & 3) == 0);
8019
8020 /* For quad modes, we restrict the constant offset to be slightly less
8021 than what the instruction format permits. We do this because for
8022 quad mode moves, we will actually decompose them into two separate
8023 double-mode reads or writes. INDEX must therefore be a valid
8024 (double-mode) offset and so should INDEX+8. */
8025 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8026 return (code == CONST_INT
8027 && INTVAL (index) < 1016
8028 && INTVAL (index) > -1024
8029 && (INTVAL (index) & 3) == 0);
8030
8031 /* We have no such constraint on double mode offsets, so we permit the
8032 full range of the instruction format. */
8033 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8034 return (code == CONST_INT
8035 && INTVAL (index) < 1024
8036 && INTVAL (index) > -1024
8037 && (INTVAL (index) & 3) == 0);
8038
8039 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8040 return (code == CONST_INT
8041 && INTVAL (index) < 1024
8042 && INTVAL (index) > -1024
8043 && (INTVAL (index) & 3) == 0);
8044
8045 if (arm_address_register_rtx_p (index, strict_p)
8046 && (GET_MODE_SIZE (mode) <= 4))
8047 return 1;
8048
8049 if (mode == DImode || mode == DFmode)
8050 {
8051 if (code == CONST_INT)
8052 {
8053 HOST_WIDE_INT val = INTVAL (index);
8054
8055 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8056 If vldr is selected it uses arm_coproc_mem_operand. */
8057 if (TARGET_LDRD)
8058 return val > -256 && val < 256;
8059 else
8060 return val > -4096 && val < 4092;
8061 }
8062
8063 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8064 }
8065
8066 if (GET_MODE_SIZE (mode) <= 4
8067 && ! (arm_arch4
8068 && (mode == HImode
8069 || mode == HFmode
8070 || (mode == QImode && outer == SIGN_EXTEND))))
8071 {
8072 if (code == MULT)
8073 {
8074 rtx xiop0 = XEXP (index, 0);
8075 rtx xiop1 = XEXP (index, 1);
8076
8077 return ((arm_address_register_rtx_p (xiop0, strict_p)
8078 && power_of_two_operand (xiop1, SImode))
8079 || (arm_address_register_rtx_p (xiop1, strict_p)
8080 && power_of_two_operand (xiop0, SImode)));
8081 }
8082 else if (code == LSHIFTRT || code == ASHIFTRT
8083 || code == ASHIFT || code == ROTATERT)
8084 {
8085 rtx op = XEXP (index, 1);
8086
8087 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8088 && CONST_INT_P (op)
8089 && INTVAL (op) > 0
8090 && INTVAL (op) <= 31);
8091 }
8092 }
8093
8094 /* For ARM v4 we may be doing a sign-extend operation during the
8095 load. */
8096 if (arm_arch4)
8097 {
8098 if (mode == HImode
8099 || mode == HFmode
8100 || (outer == SIGN_EXTEND && mode == QImode))
8101 range = 256;
8102 else
8103 range = 4096;
8104 }
8105 else
8106 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8107
8108 return (code == CONST_INT
8109 && INTVAL (index) < range
8110 && INTVAL (index) > -range);
8111 }
8112
8113 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8114 index operand. i.e. 1, 2, 4 or 8. */
8115 static bool
8116 thumb2_index_mul_operand (rtx op)
8117 {
8118 HOST_WIDE_INT val;
8119
8120 if (!CONST_INT_P (op))
8121 return false;
8122
8123 val = INTVAL(op);
8124 return (val == 1 || val == 2 || val == 4 || val == 8);
8125 }
8126
8127 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8128 static int
8129 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8130 {
8131 enum rtx_code code = GET_CODE (index);
8132
8133 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8134 /* Standard coprocessor addressing modes. */
8135 if (TARGET_HARD_FLOAT
8136 && (mode == SFmode || mode == DFmode))
8137 return (code == CONST_INT && INTVAL (index) < 1024
8138 /* Thumb-2 allows only > -256 index range for it's core register
8139 load/stores. Since we allow SF/DF in core registers, we have
8140 to use the intersection between -256~4096 (core) and -1024~1024
8141 (coprocessor). */
8142 && INTVAL (index) > -256
8143 && (INTVAL (index) & 3) == 0);
8144
8145 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8146 {
8147 /* For DImode assume values will usually live in core regs
8148 and only allow LDRD addressing modes. */
8149 if (!TARGET_LDRD || mode != DImode)
8150 return (code == CONST_INT
8151 && INTVAL (index) < 1024
8152 && INTVAL (index) > -1024
8153 && (INTVAL (index) & 3) == 0);
8154 }
8155
8156 /* For quad modes, we restrict the constant offset to be slightly less
8157 than what the instruction format permits. We do this because for
8158 quad mode moves, we will actually decompose them into two separate
8159 double-mode reads or writes. INDEX must therefore be a valid
8160 (double-mode) offset and so should INDEX+8. */
8161 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8162 return (code == CONST_INT
8163 && INTVAL (index) < 1016
8164 && INTVAL (index) > -1024
8165 && (INTVAL (index) & 3) == 0);
8166
8167 /* We have no such constraint on double mode offsets, so we permit the
8168 full range of the instruction format. */
8169 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8170 return (code == CONST_INT
8171 && INTVAL (index) < 1024
8172 && INTVAL (index) > -1024
8173 && (INTVAL (index) & 3) == 0);
8174
8175 if (arm_address_register_rtx_p (index, strict_p)
8176 && (GET_MODE_SIZE (mode) <= 4))
8177 return 1;
8178
8179 if (mode == DImode || mode == DFmode)
8180 {
8181 if (code == CONST_INT)
8182 {
8183 HOST_WIDE_INT val = INTVAL (index);
8184 /* Thumb-2 ldrd only has reg+const addressing modes.
8185 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8186 If vldr is selected it uses arm_coproc_mem_operand. */
8187 if (TARGET_LDRD)
8188 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8189 else
8190 return IN_RANGE (val, -255, 4095 - 4);
8191 }
8192 else
8193 return 0;
8194 }
8195
8196 if (code == MULT)
8197 {
8198 rtx xiop0 = XEXP (index, 0);
8199 rtx xiop1 = XEXP (index, 1);
8200
8201 return ((arm_address_register_rtx_p (xiop0, strict_p)
8202 && thumb2_index_mul_operand (xiop1))
8203 || (arm_address_register_rtx_p (xiop1, strict_p)
8204 && thumb2_index_mul_operand (xiop0)));
8205 }
8206 else if (code == ASHIFT)
8207 {
8208 rtx op = XEXP (index, 1);
8209
8210 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8211 && CONST_INT_P (op)
8212 && INTVAL (op) > 0
8213 && INTVAL (op) <= 3);
8214 }
8215
8216 return (code == CONST_INT
8217 && INTVAL (index) < 4096
8218 && INTVAL (index) > -256);
8219 }
8220
8221 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8222 static int
8223 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8224 {
8225 int regno;
8226
8227 if (!REG_P (x))
8228 return 0;
8229
8230 regno = REGNO (x);
8231
8232 if (strict_p)
8233 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8234
8235 return (regno <= LAST_LO_REGNUM
8236 || regno > LAST_VIRTUAL_REGISTER
8237 || regno == FRAME_POINTER_REGNUM
8238 || (GET_MODE_SIZE (mode) >= 4
8239 && (regno == STACK_POINTER_REGNUM
8240 || regno >= FIRST_PSEUDO_REGISTER
8241 || x == hard_frame_pointer_rtx
8242 || x == arg_pointer_rtx)));
8243 }
8244
8245 /* Return nonzero if x is a legitimate index register. This is the case
8246 for any base register that can access a QImode object. */
8247 inline static int
8248 thumb1_index_register_rtx_p (rtx x, int strict_p)
8249 {
8250 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8251 }
8252
8253 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8254
8255 The AP may be eliminated to either the SP or the FP, so we use the
8256 least common denominator, e.g. SImode, and offsets from 0 to 64.
8257
8258 ??? Verify whether the above is the right approach.
8259
8260 ??? Also, the FP may be eliminated to the SP, so perhaps that
8261 needs special handling also.
8262
8263 ??? Look at how the mips16 port solves this problem. It probably uses
8264 better ways to solve some of these problems.
8265
8266 Although it is not incorrect, we don't accept QImode and HImode
8267 addresses based on the frame pointer or arg pointer until the
8268 reload pass starts. This is so that eliminating such addresses
8269 into stack based ones won't produce impossible code. */
8270 int
8271 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8272 {
8273 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8274 return 0;
8275
8276 /* ??? Not clear if this is right. Experiment. */
8277 if (GET_MODE_SIZE (mode) < 4
8278 && !(reload_in_progress || reload_completed)
8279 && (reg_mentioned_p (frame_pointer_rtx, x)
8280 || reg_mentioned_p (arg_pointer_rtx, x)
8281 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8282 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8283 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8284 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8285 return 0;
8286
8287 /* Accept any base register. SP only in SImode or larger. */
8288 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8289 return 1;
8290
8291 /* This is PC relative data before arm_reorg runs. */
8292 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8293 && GET_CODE (x) == SYMBOL_REF
8294 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8295 return 1;
8296
8297 /* This is PC relative data after arm_reorg runs. */
8298 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8299 && reload_completed
8300 && (GET_CODE (x) == LABEL_REF
8301 || (GET_CODE (x) == CONST
8302 && GET_CODE (XEXP (x, 0)) == PLUS
8303 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8304 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8305 return 1;
8306
8307 /* Post-inc indexing only supported for SImode and larger. */
8308 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8309 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8310 return 1;
8311
8312 else if (GET_CODE (x) == PLUS)
8313 {
8314 /* REG+REG address can be any two index registers. */
8315 /* We disallow FRAME+REG addressing since we know that FRAME
8316 will be replaced with STACK, and SP relative addressing only
8317 permits SP+OFFSET. */
8318 if (GET_MODE_SIZE (mode) <= 4
8319 && XEXP (x, 0) != frame_pointer_rtx
8320 && XEXP (x, 1) != frame_pointer_rtx
8321 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8322 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8323 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8324 return 1;
8325
8326 /* REG+const has 5-7 bit offset for non-SP registers. */
8327 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8328 || XEXP (x, 0) == arg_pointer_rtx)
8329 && CONST_INT_P (XEXP (x, 1))
8330 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8331 return 1;
8332
8333 /* REG+const has 10-bit offset for SP, but only SImode and
8334 larger is supported. */
8335 /* ??? Should probably check for DI/DFmode overflow here
8336 just like GO_IF_LEGITIMATE_OFFSET does. */
8337 else if (REG_P (XEXP (x, 0))
8338 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8339 && GET_MODE_SIZE (mode) >= 4
8340 && CONST_INT_P (XEXP (x, 1))
8341 && INTVAL (XEXP (x, 1)) >= 0
8342 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8343 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8344 return 1;
8345
8346 else if (REG_P (XEXP (x, 0))
8347 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8348 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8349 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8350 && REGNO (XEXP (x, 0))
8351 <= LAST_VIRTUAL_POINTER_REGISTER))
8352 && GET_MODE_SIZE (mode) >= 4
8353 && CONST_INT_P (XEXP (x, 1))
8354 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8355 return 1;
8356 }
8357
8358 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8359 && GET_MODE_SIZE (mode) == 4
8360 && GET_CODE (x) == SYMBOL_REF
8361 && CONSTANT_POOL_ADDRESS_P (x)
8362 && ! (flag_pic
8363 && symbol_mentioned_p (get_pool_constant (x))
8364 && ! pcrel_constant_p (get_pool_constant (x))))
8365 return 1;
8366
8367 return 0;
8368 }
8369
8370 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8371 instruction of mode MODE. */
8372 int
8373 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8374 {
8375 switch (GET_MODE_SIZE (mode))
8376 {
8377 case 1:
8378 return val >= 0 && val < 32;
8379
8380 case 2:
8381 return val >= 0 && val < 64 && (val & 1) == 0;
8382
8383 default:
8384 return (val >= 0
8385 && (val + GET_MODE_SIZE (mode)) <= 128
8386 && (val & 3) == 0);
8387 }
8388 }
8389
8390 bool
8391 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8392 {
8393 if (TARGET_ARM)
8394 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8395 else if (TARGET_THUMB2)
8396 return thumb2_legitimate_address_p (mode, x, strict_p);
8397 else /* if (TARGET_THUMB1) */
8398 return thumb1_legitimate_address_p (mode, x, strict_p);
8399 }
8400
8401 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8402
8403 Given an rtx X being reloaded into a reg required to be
8404 in class CLASS, return the class of reg to actually use.
8405 In general this is just CLASS, but for the Thumb core registers and
8406 immediate constants we prefer a LO_REGS class or a subset. */
8407
8408 static reg_class_t
8409 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8410 {
8411 if (TARGET_32BIT)
8412 return rclass;
8413 else
8414 {
8415 if (rclass == GENERAL_REGS)
8416 return LO_REGS;
8417 else
8418 return rclass;
8419 }
8420 }
8421
8422 /* Build the SYMBOL_REF for __tls_get_addr. */
8423
8424 static GTY(()) rtx tls_get_addr_libfunc;
8425
8426 static rtx
8427 get_tls_get_addr (void)
8428 {
8429 if (!tls_get_addr_libfunc)
8430 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8431 return tls_get_addr_libfunc;
8432 }
8433
8434 rtx
8435 arm_load_tp (rtx target)
8436 {
8437 if (!target)
8438 target = gen_reg_rtx (SImode);
8439
8440 if (TARGET_HARD_TP)
8441 {
8442 /* Can return in any reg. */
8443 emit_insn (gen_load_tp_hard (target));
8444 }
8445 else
8446 {
8447 /* Always returned in r0. Immediately copy the result into a pseudo,
8448 otherwise other uses of r0 (e.g. setting up function arguments) may
8449 clobber the value. */
8450
8451 rtx tmp;
8452
8453 emit_insn (gen_load_tp_soft ());
8454
8455 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8456 emit_move_insn (target, tmp);
8457 }
8458 return target;
8459 }
8460
8461 static rtx
8462 load_tls_operand (rtx x, rtx reg)
8463 {
8464 rtx tmp;
8465
8466 if (reg == NULL_RTX)
8467 reg = gen_reg_rtx (SImode);
8468
8469 tmp = gen_rtx_CONST (SImode, x);
8470
8471 emit_move_insn (reg, tmp);
8472
8473 return reg;
8474 }
8475
8476 static rtx_insn *
8477 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8478 {
8479 rtx label, labelno, sum;
8480
8481 gcc_assert (reloc != TLS_DESCSEQ);
8482 start_sequence ();
8483
8484 labelno = GEN_INT (pic_labelno++);
8485 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8486 label = gen_rtx_CONST (VOIDmode, label);
8487
8488 sum = gen_rtx_UNSPEC (Pmode,
8489 gen_rtvec (4, x, GEN_INT (reloc), label,
8490 GEN_INT (TARGET_ARM ? 8 : 4)),
8491 UNSPEC_TLS);
8492 reg = load_tls_operand (sum, reg);
8493
8494 if (TARGET_ARM)
8495 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8496 else
8497 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8498
8499 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8500 LCT_PURE, /* LCT_CONST? */
8501 Pmode, reg, Pmode);
8502
8503 rtx_insn *insns = get_insns ();
8504 end_sequence ();
8505
8506 return insns;
8507 }
8508
8509 static rtx
8510 arm_tls_descseq_addr (rtx x, rtx reg)
8511 {
8512 rtx labelno = GEN_INT (pic_labelno++);
8513 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8514 rtx sum = gen_rtx_UNSPEC (Pmode,
8515 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8516 gen_rtx_CONST (VOIDmode, label),
8517 GEN_INT (!TARGET_ARM)),
8518 UNSPEC_TLS);
8519 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8520
8521 emit_insn (gen_tlscall (x, labelno));
8522 if (!reg)
8523 reg = gen_reg_rtx (SImode);
8524 else
8525 gcc_assert (REGNO (reg) != R0_REGNUM);
8526
8527 emit_move_insn (reg, reg0);
8528
8529 return reg;
8530 }
8531
8532 rtx
8533 legitimize_tls_address (rtx x, rtx reg)
8534 {
8535 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8536 rtx_insn *insns;
8537 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8538
8539 switch (model)
8540 {
8541 case TLS_MODEL_GLOBAL_DYNAMIC:
8542 if (TARGET_GNU2_TLS)
8543 {
8544 reg = arm_tls_descseq_addr (x, reg);
8545
8546 tp = arm_load_tp (NULL_RTX);
8547
8548 dest = gen_rtx_PLUS (Pmode, tp, reg);
8549 }
8550 else
8551 {
8552 /* Original scheme */
8553 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8554 dest = gen_reg_rtx (Pmode);
8555 emit_libcall_block (insns, dest, ret, x);
8556 }
8557 return dest;
8558
8559 case TLS_MODEL_LOCAL_DYNAMIC:
8560 if (TARGET_GNU2_TLS)
8561 {
8562 reg = arm_tls_descseq_addr (x, reg);
8563
8564 tp = arm_load_tp (NULL_RTX);
8565
8566 dest = gen_rtx_PLUS (Pmode, tp, reg);
8567 }
8568 else
8569 {
8570 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8571
8572 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8573 share the LDM result with other LD model accesses. */
8574 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8575 UNSPEC_TLS);
8576 dest = gen_reg_rtx (Pmode);
8577 emit_libcall_block (insns, dest, ret, eqv);
8578
8579 /* Load the addend. */
8580 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8581 GEN_INT (TLS_LDO32)),
8582 UNSPEC_TLS);
8583 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8584 dest = gen_rtx_PLUS (Pmode, dest, addend);
8585 }
8586 return dest;
8587
8588 case TLS_MODEL_INITIAL_EXEC:
8589 labelno = GEN_INT (pic_labelno++);
8590 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8591 label = gen_rtx_CONST (VOIDmode, label);
8592 sum = gen_rtx_UNSPEC (Pmode,
8593 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8594 GEN_INT (TARGET_ARM ? 8 : 4)),
8595 UNSPEC_TLS);
8596 reg = load_tls_operand (sum, reg);
8597
8598 if (TARGET_ARM)
8599 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8600 else if (TARGET_THUMB2)
8601 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8602 else
8603 {
8604 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8605 emit_move_insn (reg, gen_const_mem (SImode, reg));
8606 }
8607
8608 tp = arm_load_tp (NULL_RTX);
8609
8610 return gen_rtx_PLUS (Pmode, tp, reg);
8611
8612 case TLS_MODEL_LOCAL_EXEC:
8613 tp = arm_load_tp (NULL_RTX);
8614
8615 reg = gen_rtx_UNSPEC (Pmode,
8616 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8617 UNSPEC_TLS);
8618 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8619
8620 return gen_rtx_PLUS (Pmode, tp, reg);
8621
8622 default:
8623 abort ();
8624 }
8625 }
8626
8627 /* Try machine-dependent ways of modifying an illegitimate address
8628 to be legitimate. If we find one, return the new, valid address. */
8629 rtx
8630 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8631 {
8632 if (arm_tls_referenced_p (x))
8633 {
8634 rtx addend = NULL;
8635
8636 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8637 {
8638 addend = XEXP (XEXP (x, 0), 1);
8639 x = XEXP (XEXP (x, 0), 0);
8640 }
8641
8642 if (GET_CODE (x) != SYMBOL_REF)
8643 return x;
8644
8645 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8646
8647 x = legitimize_tls_address (x, NULL_RTX);
8648
8649 if (addend)
8650 {
8651 x = gen_rtx_PLUS (SImode, x, addend);
8652 orig_x = x;
8653 }
8654 else
8655 return x;
8656 }
8657
8658 if (!TARGET_ARM)
8659 {
8660 /* TODO: legitimize_address for Thumb2. */
8661 if (TARGET_THUMB2)
8662 return x;
8663 return thumb_legitimize_address (x, orig_x, mode);
8664 }
8665
8666 if (GET_CODE (x) == PLUS)
8667 {
8668 rtx xop0 = XEXP (x, 0);
8669 rtx xop1 = XEXP (x, 1);
8670
8671 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8672 xop0 = force_reg (SImode, xop0);
8673
8674 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8675 && !symbol_mentioned_p (xop1))
8676 xop1 = force_reg (SImode, xop1);
8677
8678 if (ARM_BASE_REGISTER_RTX_P (xop0)
8679 && CONST_INT_P (xop1))
8680 {
8681 HOST_WIDE_INT n, low_n;
8682 rtx base_reg, val;
8683 n = INTVAL (xop1);
8684
8685 /* VFP addressing modes actually allow greater offsets, but for
8686 now we just stick with the lowest common denominator. */
8687 if (mode == DImode || mode == DFmode)
8688 {
8689 low_n = n & 0x0f;
8690 n &= ~0x0f;
8691 if (low_n > 4)
8692 {
8693 n += 16;
8694 low_n -= 16;
8695 }
8696 }
8697 else
8698 {
8699 low_n = ((mode) == TImode ? 0
8700 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8701 n -= low_n;
8702 }
8703
8704 base_reg = gen_reg_rtx (SImode);
8705 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8706 emit_move_insn (base_reg, val);
8707 x = plus_constant (Pmode, base_reg, low_n);
8708 }
8709 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8710 x = gen_rtx_PLUS (SImode, xop0, xop1);
8711 }
8712
8713 /* XXX We don't allow MINUS any more -- see comment in
8714 arm_legitimate_address_outer_p (). */
8715 else if (GET_CODE (x) == MINUS)
8716 {
8717 rtx xop0 = XEXP (x, 0);
8718 rtx xop1 = XEXP (x, 1);
8719
8720 if (CONSTANT_P (xop0))
8721 xop0 = force_reg (SImode, xop0);
8722
8723 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8724 xop1 = force_reg (SImode, xop1);
8725
8726 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8727 x = gen_rtx_MINUS (SImode, xop0, xop1);
8728 }
8729
8730 /* Make sure to take full advantage of the pre-indexed addressing mode
8731 with absolute addresses which often allows for the base register to
8732 be factorized for multiple adjacent memory references, and it might
8733 even allows for the mini pool to be avoided entirely. */
8734 else if (CONST_INT_P (x) && optimize > 0)
8735 {
8736 unsigned int bits;
8737 HOST_WIDE_INT mask, base, index;
8738 rtx base_reg;
8739
8740 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8741 use a 8-bit index. So let's use a 12-bit index for SImode only and
8742 hope that arm_gen_constant will enable ldrb to use more bits. */
8743 bits = (mode == SImode) ? 12 : 8;
8744 mask = (1 << bits) - 1;
8745 base = INTVAL (x) & ~mask;
8746 index = INTVAL (x) & mask;
8747 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8748 {
8749 /* It'll most probably be more efficient to generate the base
8750 with more bits set and use a negative index instead. */
8751 base |= mask;
8752 index -= mask;
8753 }
8754 base_reg = force_reg (SImode, GEN_INT (base));
8755 x = plus_constant (Pmode, base_reg, index);
8756 }
8757
8758 if (flag_pic)
8759 {
8760 /* We need to find and carefully transform any SYMBOL and LABEL
8761 references; so go back to the original address expression. */
8762 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
8763 false /*compute_now*/);
8764
8765 if (new_x != orig_x)
8766 x = new_x;
8767 }
8768
8769 return x;
8770 }
8771
8772
8773 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8774 to be legitimate. If we find one, return the new, valid address. */
8775 rtx
8776 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8777 {
8778 if (GET_CODE (x) == PLUS
8779 && CONST_INT_P (XEXP (x, 1))
8780 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8781 || INTVAL (XEXP (x, 1)) < 0))
8782 {
8783 rtx xop0 = XEXP (x, 0);
8784 rtx xop1 = XEXP (x, 1);
8785 HOST_WIDE_INT offset = INTVAL (xop1);
8786
8787 /* Try and fold the offset into a biasing of the base register and
8788 then offsetting that. Don't do this when optimizing for space
8789 since it can cause too many CSEs. */
8790 if (optimize_size && offset >= 0
8791 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8792 {
8793 HOST_WIDE_INT delta;
8794
8795 if (offset >= 256)
8796 delta = offset - (256 - GET_MODE_SIZE (mode));
8797 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8798 delta = 31 * GET_MODE_SIZE (mode);
8799 else
8800 delta = offset & (~31 * GET_MODE_SIZE (mode));
8801
8802 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8803 NULL_RTX);
8804 x = plus_constant (Pmode, xop0, delta);
8805 }
8806 else if (offset < 0 && offset > -256)
8807 /* Small negative offsets are best done with a subtract before the
8808 dereference, forcing these into a register normally takes two
8809 instructions. */
8810 x = force_operand (x, NULL_RTX);
8811 else
8812 {
8813 /* For the remaining cases, force the constant into a register. */
8814 xop1 = force_reg (SImode, xop1);
8815 x = gen_rtx_PLUS (SImode, xop0, xop1);
8816 }
8817 }
8818 else if (GET_CODE (x) == PLUS
8819 && s_register_operand (XEXP (x, 1), SImode)
8820 && !s_register_operand (XEXP (x, 0), SImode))
8821 {
8822 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8823
8824 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8825 }
8826
8827 if (flag_pic)
8828 {
8829 /* We need to find and carefully transform any SYMBOL and LABEL
8830 references; so go back to the original address expression. */
8831 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
8832 false /*compute_now*/);
8833
8834 if (new_x != orig_x)
8835 x = new_x;
8836 }
8837
8838 return x;
8839 }
8840
8841 /* Return TRUE if X contains any TLS symbol references. */
8842
8843 bool
8844 arm_tls_referenced_p (rtx x)
8845 {
8846 if (! TARGET_HAVE_TLS)
8847 return false;
8848
8849 subrtx_iterator::array_type array;
8850 FOR_EACH_SUBRTX (iter, array, x, ALL)
8851 {
8852 const_rtx x = *iter;
8853 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8854 {
8855 /* ARM currently does not provide relocations to encode TLS variables
8856 into AArch32 instructions, only data, so there is no way to
8857 currently implement these if a literal pool is disabled. */
8858 if (arm_disable_literal_pool)
8859 sorry ("accessing thread-local storage is not currently supported "
8860 "with %<-mpure-code%> or %<-mslow-flash-data%>");
8861
8862 return true;
8863 }
8864
8865 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8866 TLS offsets, not real symbol references. */
8867 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8868 iter.skip_subrtxes ();
8869 }
8870 return false;
8871 }
8872
8873 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8874
8875 On the ARM, allow any integer (invalid ones are removed later by insn
8876 patterns), nice doubles and symbol_refs which refer to the function's
8877 constant pool XXX.
8878
8879 When generating pic allow anything. */
8880
8881 static bool
8882 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8883 {
8884 return flag_pic || !label_mentioned_p (x);
8885 }
8886
8887 static bool
8888 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8889 {
8890 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8891 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8892 for ARMv8-M Baseline or later the result is valid. */
8893 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8894 x = XEXP (x, 0);
8895
8896 return (CONST_INT_P (x)
8897 || CONST_DOUBLE_P (x)
8898 || CONSTANT_ADDRESS_P (x)
8899 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8900 || flag_pic);
8901 }
8902
8903 static bool
8904 arm_legitimate_constant_p (machine_mode mode, rtx x)
8905 {
8906 return (!arm_cannot_force_const_mem (mode, x)
8907 && (TARGET_32BIT
8908 ? arm_legitimate_constant_p_1 (mode, x)
8909 : thumb_legitimate_constant_p (mode, x)));
8910 }
8911
8912 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8913
8914 static bool
8915 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8916 {
8917 rtx base, offset;
8918 split_const (x, &base, &offset);
8919
8920 if (SYMBOL_REF_P (base))
8921 {
8922 /* Function symbols cannot have an offset due to the Thumb bit. */
8923 if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
8924 && INTVAL (offset) != 0)
8925 return true;
8926
8927 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
8928 && !offset_within_block_p (base, INTVAL (offset)))
8929 return true;
8930 }
8931 return arm_tls_referenced_p (x);
8932 }
8933 \f
8934 #define REG_OR_SUBREG_REG(X) \
8935 (REG_P (X) \
8936 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8937
8938 #define REG_OR_SUBREG_RTX(X) \
8939 (REG_P (X) ? (X) : SUBREG_REG (X))
8940
8941 static inline int
8942 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8943 {
8944 machine_mode mode = GET_MODE (x);
8945 int total, words;
8946
8947 switch (code)
8948 {
8949 case ASHIFT:
8950 case ASHIFTRT:
8951 case LSHIFTRT:
8952 case ROTATERT:
8953 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8954
8955 case PLUS:
8956 case MINUS:
8957 case COMPARE:
8958 case NEG:
8959 case NOT:
8960 return COSTS_N_INSNS (1);
8961
8962 case MULT:
8963 if (arm_arch6m && arm_m_profile_small_mul)
8964 return COSTS_N_INSNS (32);
8965
8966 if (CONST_INT_P (XEXP (x, 1)))
8967 {
8968 int cycles = 0;
8969 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8970
8971 while (i)
8972 {
8973 i >>= 2;
8974 cycles++;
8975 }
8976 return COSTS_N_INSNS (2) + cycles;
8977 }
8978 return COSTS_N_INSNS (1) + 16;
8979
8980 case SET:
8981 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8982 the mode. */
8983 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8984 return (COSTS_N_INSNS (words)
8985 + 4 * ((MEM_P (SET_SRC (x)))
8986 + MEM_P (SET_DEST (x))));
8987
8988 case CONST_INT:
8989 if (outer == SET)
8990 {
8991 if (UINTVAL (x) < 256
8992 /* 16-bit constant. */
8993 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8994 return 0;
8995 if (thumb_shiftable_const (INTVAL (x)))
8996 return COSTS_N_INSNS (2);
8997 return COSTS_N_INSNS (3);
8998 }
8999 else if ((outer == PLUS || outer == COMPARE)
9000 && INTVAL (x) < 256 && INTVAL (x) > -256)
9001 return 0;
9002 else if ((outer == IOR || outer == XOR || outer == AND)
9003 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9004 return COSTS_N_INSNS (1);
9005 else if (outer == AND)
9006 {
9007 int i;
9008 /* This duplicates the tests in the andsi3 expander. */
9009 for (i = 9; i <= 31; i++)
9010 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9011 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9012 return COSTS_N_INSNS (2);
9013 }
9014 else if (outer == ASHIFT || outer == ASHIFTRT
9015 || outer == LSHIFTRT)
9016 return 0;
9017 return COSTS_N_INSNS (2);
9018
9019 case CONST:
9020 case CONST_DOUBLE:
9021 case LABEL_REF:
9022 case SYMBOL_REF:
9023 return COSTS_N_INSNS (3);
9024
9025 case UDIV:
9026 case UMOD:
9027 case DIV:
9028 case MOD:
9029 return 100;
9030
9031 case TRUNCATE:
9032 return 99;
9033
9034 case AND:
9035 case XOR:
9036 case IOR:
9037 /* XXX guess. */
9038 return 8;
9039
9040 case MEM:
9041 /* XXX another guess. */
9042 /* Memory costs quite a lot for the first word, but subsequent words
9043 load at the equivalent of a single insn each. */
9044 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9045 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9046 ? 4 : 0));
9047
9048 case IF_THEN_ELSE:
9049 /* XXX a guess. */
9050 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9051 return 14;
9052 return 2;
9053
9054 case SIGN_EXTEND:
9055 case ZERO_EXTEND:
9056 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9057 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9058
9059 if (mode == SImode)
9060 return total;
9061
9062 if (arm_arch6)
9063 return total + COSTS_N_INSNS (1);
9064
9065 /* Assume a two-shift sequence. Increase the cost slightly so
9066 we prefer actual shifts over an extend operation. */
9067 return total + 1 + COSTS_N_INSNS (2);
9068
9069 default:
9070 return 99;
9071 }
9072 }
9073
9074 /* Estimates the size cost of thumb1 instructions.
9075 For now most of the code is copied from thumb1_rtx_costs. We need more
9076 fine grain tuning when we have more related test cases. */
9077 static inline int
9078 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9079 {
9080 machine_mode mode = GET_MODE (x);
9081 int words, cost;
9082
9083 switch (code)
9084 {
9085 case ASHIFT:
9086 case ASHIFTRT:
9087 case LSHIFTRT:
9088 case ROTATERT:
9089 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9090
9091 case PLUS:
9092 case MINUS:
9093 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9094 defined by RTL expansion, especially for the expansion of
9095 multiplication. */
9096 if ((GET_CODE (XEXP (x, 0)) == MULT
9097 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9098 || (GET_CODE (XEXP (x, 1)) == MULT
9099 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9100 return COSTS_N_INSNS (2);
9101 /* Fall through. */
9102 case COMPARE:
9103 case NEG:
9104 case NOT:
9105 return COSTS_N_INSNS (1);
9106
9107 case MULT:
9108 if (CONST_INT_P (XEXP (x, 1)))
9109 {
9110 /* Thumb1 mul instruction can't operate on const. We must Load it
9111 into a register first. */
9112 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9113 /* For the targets which have a very small and high-latency multiply
9114 unit, we prefer to synthesize the mult with up to 5 instructions,
9115 giving a good balance between size and performance. */
9116 if (arm_arch6m && arm_m_profile_small_mul)
9117 return COSTS_N_INSNS (5);
9118 else
9119 return COSTS_N_INSNS (1) + const_size;
9120 }
9121 return COSTS_N_INSNS (1);
9122
9123 case SET:
9124 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9125 the mode. */
9126 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9127 cost = COSTS_N_INSNS (words);
9128 if (satisfies_constraint_J (SET_SRC (x))
9129 || satisfies_constraint_K (SET_SRC (x))
9130 /* Too big an immediate for a 2-byte mov, using MOVT. */
9131 || (CONST_INT_P (SET_SRC (x))
9132 && UINTVAL (SET_SRC (x)) >= 256
9133 && TARGET_HAVE_MOVT
9134 && satisfies_constraint_j (SET_SRC (x)))
9135 /* thumb1_movdi_insn. */
9136 || ((words > 1) && MEM_P (SET_SRC (x))))
9137 cost += COSTS_N_INSNS (1);
9138 return cost;
9139
9140 case CONST_INT:
9141 if (outer == SET)
9142 {
9143 if (UINTVAL (x) < 256)
9144 return COSTS_N_INSNS (1);
9145 /* movw is 4byte long. */
9146 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9147 return COSTS_N_INSNS (2);
9148 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9149 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9150 return COSTS_N_INSNS (2);
9151 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9152 if (thumb_shiftable_const (INTVAL (x)))
9153 return COSTS_N_INSNS (2);
9154 return COSTS_N_INSNS (3);
9155 }
9156 else if ((outer == PLUS || outer == COMPARE)
9157 && INTVAL (x) < 256 && INTVAL (x) > -256)
9158 return 0;
9159 else if ((outer == IOR || outer == XOR || outer == AND)
9160 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9161 return COSTS_N_INSNS (1);
9162 else if (outer == AND)
9163 {
9164 int i;
9165 /* This duplicates the tests in the andsi3 expander. */
9166 for (i = 9; i <= 31; i++)
9167 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9168 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9169 return COSTS_N_INSNS (2);
9170 }
9171 else if (outer == ASHIFT || outer == ASHIFTRT
9172 || outer == LSHIFTRT)
9173 return 0;
9174 return COSTS_N_INSNS (2);
9175
9176 case CONST:
9177 case CONST_DOUBLE:
9178 case LABEL_REF:
9179 case SYMBOL_REF:
9180 return COSTS_N_INSNS (3);
9181
9182 case UDIV:
9183 case UMOD:
9184 case DIV:
9185 case MOD:
9186 return 100;
9187
9188 case TRUNCATE:
9189 return 99;
9190
9191 case AND:
9192 case XOR:
9193 case IOR:
9194 return COSTS_N_INSNS (1);
9195
9196 case MEM:
9197 return (COSTS_N_INSNS (1)
9198 + COSTS_N_INSNS (1)
9199 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9200 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9201 ? COSTS_N_INSNS (1) : 0));
9202
9203 case IF_THEN_ELSE:
9204 /* XXX a guess. */
9205 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9206 return 14;
9207 return 2;
9208
9209 case ZERO_EXTEND:
9210 /* XXX still guessing. */
9211 switch (GET_MODE (XEXP (x, 0)))
9212 {
9213 case E_QImode:
9214 return (1 + (mode == DImode ? 4 : 0)
9215 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9216
9217 case E_HImode:
9218 return (4 + (mode == DImode ? 4 : 0)
9219 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9220
9221 case E_SImode:
9222 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9223
9224 default:
9225 return 99;
9226 }
9227
9228 default:
9229 return 99;
9230 }
9231 }
9232
9233 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9234 operand, then return the operand that is being shifted. If the shift
9235 is not by a constant, then set SHIFT_REG to point to the operand.
9236 Return NULL if OP is not a shifter operand. */
9237 static rtx
9238 shifter_op_p (rtx op, rtx *shift_reg)
9239 {
9240 enum rtx_code code = GET_CODE (op);
9241
9242 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9243 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9244 return XEXP (op, 0);
9245 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9246 return XEXP (op, 0);
9247 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9248 || code == ASHIFTRT)
9249 {
9250 if (!CONST_INT_P (XEXP (op, 1)))
9251 *shift_reg = XEXP (op, 1);
9252 return XEXP (op, 0);
9253 }
9254
9255 return NULL;
9256 }
9257
9258 static bool
9259 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9260 {
9261 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9262 rtx_code code = GET_CODE (x);
9263 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9264
9265 switch (XINT (x, 1))
9266 {
9267 case UNSPEC_UNALIGNED_LOAD:
9268 /* We can only do unaligned loads into the integer unit, and we can't
9269 use LDM or LDRD. */
9270 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9271 if (speed_p)
9272 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9273 + extra_cost->ldst.load_unaligned);
9274
9275 #ifdef NOT_YET
9276 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9277 ADDR_SPACE_GENERIC, speed_p);
9278 #endif
9279 return true;
9280
9281 case UNSPEC_UNALIGNED_STORE:
9282 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9283 if (speed_p)
9284 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9285 + extra_cost->ldst.store_unaligned);
9286
9287 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9288 #ifdef NOT_YET
9289 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9290 ADDR_SPACE_GENERIC, speed_p);
9291 #endif
9292 return true;
9293
9294 case UNSPEC_VRINTZ:
9295 case UNSPEC_VRINTP:
9296 case UNSPEC_VRINTM:
9297 case UNSPEC_VRINTR:
9298 case UNSPEC_VRINTX:
9299 case UNSPEC_VRINTA:
9300 if (speed_p)
9301 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9302
9303 return true;
9304 default:
9305 *cost = COSTS_N_INSNS (2);
9306 break;
9307 }
9308 return true;
9309 }
9310
9311 /* Cost of a libcall. We assume one insn per argument, an amount for the
9312 call (one insn for -Os) and then one for processing the result. */
9313 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9314
9315 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9316 do \
9317 { \
9318 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9319 if (shift_op != NULL \
9320 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9321 { \
9322 if (shift_reg) \
9323 { \
9324 if (speed_p) \
9325 *cost += extra_cost->alu.arith_shift_reg; \
9326 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9327 ASHIFT, 1, speed_p); \
9328 } \
9329 else if (speed_p) \
9330 *cost += extra_cost->alu.arith_shift; \
9331 \
9332 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9333 ASHIFT, 0, speed_p) \
9334 + rtx_cost (XEXP (x, 1 - IDX), \
9335 GET_MODE (shift_op), \
9336 OP, 1, speed_p)); \
9337 return true; \
9338 } \
9339 } \
9340 while (0)
9341
9342 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9343 considering the costs of the addressing mode and memory access
9344 separately. */
9345 static bool
9346 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9347 int *cost, bool speed_p)
9348 {
9349 machine_mode mode = GET_MODE (x);
9350
9351 *cost = COSTS_N_INSNS (1);
9352
9353 if (flag_pic
9354 && GET_CODE (XEXP (x, 0)) == PLUS
9355 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9356 /* This will be split into two instructions. Add the cost of the
9357 additional instruction here. The cost of the memory access is computed
9358 below. See arm.md:calculate_pic_address. */
9359 *cost += COSTS_N_INSNS (1);
9360
9361 /* Calculate cost of the addressing mode. */
9362 if (speed_p)
9363 {
9364 arm_addr_mode_op op_type;
9365 switch (GET_CODE (XEXP (x, 0)))
9366 {
9367 default:
9368 case REG:
9369 op_type = AMO_DEFAULT;
9370 break;
9371 case MINUS:
9372 /* MINUS does not appear in RTL, but the architecture supports it,
9373 so handle this case defensively. */
9374 /* fall through */
9375 case PLUS:
9376 op_type = AMO_NO_WB;
9377 break;
9378 case PRE_INC:
9379 case PRE_DEC:
9380 case POST_INC:
9381 case POST_DEC:
9382 case PRE_MODIFY:
9383 case POST_MODIFY:
9384 op_type = AMO_WB;
9385 break;
9386 }
9387
9388 if (VECTOR_MODE_P (mode))
9389 *cost += current_tune->addr_mode_costs->vector[op_type];
9390 else if (FLOAT_MODE_P (mode))
9391 *cost += current_tune->addr_mode_costs->fp[op_type];
9392 else
9393 *cost += current_tune->addr_mode_costs->integer[op_type];
9394 }
9395
9396 /* Calculate cost of memory access. */
9397 if (speed_p)
9398 {
9399 if (FLOAT_MODE_P (mode))
9400 {
9401 if (GET_MODE_SIZE (mode) == 8)
9402 *cost += extra_cost->ldst.loadd;
9403 else
9404 *cost += extra_cost->ldst.loadf;
9405 }
9406 else if (VECTOR_MODE_P (mode))
9407 *cost += extra_cost->ldst.loadv;
9408 else
9409 {
9410 /* Integer modes */
9411 if (GET_MODE_SIZE (mode) == 8)
9412 *cost += extra_cost->ldst.ldrd;
9413 else
9414 *cost += extra_cost->ldst.load;
9415 }
9416 }
9417
9418 return true;
9419 }
9420
9421 /* RTX costs. Make an estimate of the cost of executing the operation
9422 X, which is contained within an operation with code OUTER_CODE.
9423 SPEED_P indicates whether the cost desired is the performance cost,
9424 or the size cost. The estimate is stored in COST and the return
9425 value is TRUE if the cost calculation is final, or FALSE if the
9426 caller should recurse through the operands of X to add additional
9427 costs.
9428
9429 We currently make no attempt to model the size savings of Thumb-2
9430 16-bit instructions. At the normal points in compilation where
9431 this code is called we have no measure of whether the condition
9432 flags are live or not, and thus no realistic way to determine what
9433 the size will eventually be. */
9434 static bool
9435 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9436 const struct cpu_cost_table *extra_cost,
9437 int *cost, bool speed_p)
9438 {
9439 machine_mode mode = GET_MODE (x);
9440
9441 *cost = COSTS_N_INSNS (1);
9442
9443 if (TARGET_THUMB1)
9444 {
9445 if (speed_p)
9446 *cost = thumb1_rtx_costs (x, code, outer_code);
9447 else
9448 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9449 return true;
9450 }
9451
9452 switch (code)
9453 {
9454 case SET:
9455 *cost = 0;
9456 /* SET RTXs don't have a mode so we get it from the destination. */
9457 mode = GET_MODE (SET_DEST (x));
9458
9459 if (REG_P (SET_SRC (x))
9460 && REG_P (SET_DEST (x)))
9461 {
9462 /* Assume that most copies can be done with a single insn,
9463 unless we don't have HW FP, in which case everything
9464 larger than word mode will require two insns. */
9465 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9466 && GET_MODE_SIZE (mode) > 4)
9467 || mode == DImode)
9468 ? 2 : 1);
9469 /* Conditional register moves can be encoded
9470 in 16 bits in Thumb mode. */
9471 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9472 *cost >>= 1;
9473
9474 return true;
9475 }
9476
9477 if (CONST_INT_P (SET_SRC (x)))
9478 {
9479 /* Handle CONST_INT here, since the value doesn't have a mode
9480 and we would otherwise be unable to work out the true cost. */
9481 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9482 0, speed_p);
9483 outer_code = SET;
9484 /* Slightly lower the cost of setting a core reg to a constant.
9485 This helps break up chains and allows for better scheduling. */
9486 if (REG_P (SET_DEST (x))
9487 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9488 *cost -= 1;
9489 x = SET_SRC (x);
9490 /* Immediate moves with an immediate in the range [0, 255] can be
9491 encoded in 16 bits in Thumb mode. */
9492 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9493 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9494 *cost >>= 1;
9495 goto const_int_cost;
9496 }
9497
9498 return false;
9499
9500 case MEM:
9501 return arm_mem_costs (x, extra_cost, cost, speed_p);
9502
9503 case PARALLEL:
9504 {
9505 /* Calculations of LDM costs are complex. We assume an initial cost
9506 (ldm_1st) which will load the number of registers mentioned in
9507 ldm_regs_per_insn_1st registers; then each additional
9508 ldm_regs_per_insn_subsequent registers cost one more insn. The
9509 formula for N regs is thus:
9510
9511 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9512 + ldm_regs_per_insn_subsequent - 1)
9513 / ldm_regs_per_insn_subsequent).
9514
9515 Additional costs may also be added for addressing. A similar
9516 formula is used for STM. */
9517
9518 bool is_ldm = load_multiple_operation (x, SImode);
9519 bool is_stm = store_multiple_operation (x, SImode);
9520
9521 if (is_ldm || is_stm)
9522 {
9523 if (speed_p)
9524 {
9525 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9526 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9527 ? extra_cost->ldst.ldm_regs_per_insn_1st
9528 : extra_cost->ldst.stm_regs_per_insn_1st;
9529 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9530 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9531 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9532
9533 *cost += regs_per_insn_1st
9534 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9535 + regs_per_insn_sub - 1)
9536 / regs_per_insn_sub);
9537 return true;
9538 }
9539
9540 }
9541 return false;
9542 }
9543 case DIV:
9544 case UDIV:
9545 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9546 && (mode == SFmode || !TARGET_VFP_SINGLE))
9547 *cost += COSTS_N_INSNS (speed_p
9548 ? extra_cost->fp[mode != SFmode].div : 0);
9549 else if (mode == SImode && TARGET_IDIV)
9550 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9551 else
9552 *cost = LIBCALL_COST (2);
9553
9554 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9555 possible udiv is prefered. */
9556 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9557 return false; /* All arguments must be in registers. */
9558
9559 case MOD:
9560 /* MOD by a power of 2 can be expanded as:
9561 rsbs r1, r0, #0
9562 and r0, r0, #(n - 1)
9563 and r1, r1, #(n - 1)
9564 rsbpl r0, r1, #0. */
9565 if (CONST_INT_P (XEXP (x, 1))
9566 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9567 && mode == SImode)
9568 {
9569 *cost += COSTS_N_INSNS (3);
9570
9571 if (speed_p)
9572 *cost += 2 * extra_cost->alu.logical
9573 + extra_cost->alu.arith;
9574 return true;
9575 }
9576
9577 /* Fall-through. */
9578 case UMOD:
9579 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9580 possible udiv is prefered. */
9581 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9582 return false; /* All arguments must be in registers. */
9583
9584 case ROTATE:
9585 if (mode == SImode && REG_P (XEXP (x, 1)))
9586 {
9587 *cost += (COSTS_N_INSNS (1)
9588 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9589 if (speed_p)
9590 *cost += extra_cost->alu.shift_reg;
9591 return true;
9592 }
9593 /* Fall through */
9594 case ROTATERT:
9595 case ASHIFT:
9596 case LSHIFTRT:
9597 case ASHIFTRT:
9598 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9599 {
9600 *cost += (COSTS_N_INSNS (2)
9601 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9602 if (speed_p)
9603 *cost += 2 * extra_cost->alu.shift;
9604 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9605 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9606 *cost += 1;
9607 return true;
9608 }
9609 else if (mode == SImode)
9610 {
9611 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9612 /* Slightly disparage register shifts at -Os, but not by much. */
9613 if (!CONST_INT_P (XEXP (x, 1)))
9614 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9615 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9616 return true;
9617 }
9618 else if (GET_MODE_CLASS (mode) == MODE_INT
9619 && GET_MODE_SIZE (mode) < 4)
9620 {
9621 if (code == ASHIFT)
9622 {
9623 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9624 /* Slightly disparage register shifts at -Os, but not by
9625 much. */
9626 if (!CONST_INT_P (XEXP (x, 1)))
9627 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9628 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9629 }
9630 else if (code == LSHIFTRT || code == ASHIFTRT)
9631 {
9632 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9633 {
9634 /* Can use SBFX/UBFX. */
9635 if (speed_p)
9636 *cost += extra_cost->alu.bfx;
9637 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9638 }
9639 else
9640 {
9641 *cost += COSTS_N_INSNS (1);
9642 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9643 if (speed_p)
9644 {
9645 if (CONST_INT_P (XEXP (x, 1)))
9646 *cost += 2 * extra_cost->alu.shift;
9647 else
9648 *cost += (extra_cost->alu.shift
9649 + extra_cost->alu.shift_reg);
9650 }
9651 else
9652 /* Slightly disparage register shifts. */
9653 *cost += !CONST_INT_P (XEXP (x, 1));
9654 }
9655 }
9656 else /* Rotates. */
9657 {
9658 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9659 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9660 if (speed_p)
9661 {
9662 if (CONST_INT_P (XEXP (x, 1)))
9663 *cost += (2 * extra_cost->alu.shift
9664 + extra_cost->alu.log_shift);
9665 else
9666 *cost += (extra_cost->alu.shift
9667 + extra_cost->alu.shift_reg
9668 + extra_cost->alu.log_shift_reg);
9669 }
9670 }
9671 return true;
9672 }
9673
9674 *cost = LIBCALL_COST (2);
9675 return false;
9676
9677 case BSWAP:
9678 if (arm_arch6)
9679 {
9680 if (mode == SImode)
9681 {
9682 if (speed_p)
9683 *cost += extra_cost->alu.rev;
9684
9685 return false;
9686 }
9687 }
9688 else
9689 {
9690 /* No rev instruction available. Look at arm_legacy_rev
9691 and thumb_legacy_rev for the form of RTL used then. */
9692 if (TARGET_THUMB)
9693 {
9694 *cost += COSTS_N_INSNS (9);
9695
9696 if (speed_p)
9697 {
9698 *cost += 6 * extra_cost->alu.shift;
9699 *cost += 3 * extra_cost->alu.logical;
9700 }
9701 }
9702 else
9703 {
9704 *cost += COSTS_N_INSNS (4);
9705
9706 if (speed_p)
9707 {
9708 *cost += 2 * extra_cost->alu.shift;
9709 *cost += extra_cost->alu.arith_shift;
9710 *cost += 2 * extra_cost->alu.logical;
9711 }
9712 }
9713 return true;
9714 }
9715 return false;
9716
9717 case MINUS:
9718 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9719 && (mode == SFmode || !TARGET_VFP_SINGLE))
9720 {
9721 if (GET_CODE (XEXP (x, 0)) == MULT
9722 || GET_CODE (XEXP (x, 1)) == MULT)
9723 {
9724 rtx mul_op0, mul_op1, sub_op;
9725
9726 if (speed_p)
9727 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9728
9729 if (GET_CODE (XEXP (x, 0)) == MULT)
9730 {
9731 mul_op0 = XEXP (XEXP (x, 0), 0);
9732 mul_op1 = XEXP (XEXP (x, 0), 1);
9733 sub_op = XEXP (x, 1);
9734 }
9735 else
9736 {
9737 mul_op0 = XEXP (XEXP (x, 1), 0);
9738 mul_op1 = XEXP (XEXP (x, 1), 1);
9739 sub_op = XEXP (x, 0);
9740 }
9741
9742 /* The first operand of the multiply may be optionally
9743 negated. */
9744 if (GET_CODE (mul_op0) == NEG)
9745 mul_op0 = XEXP (mul_op0, 0);
9746
9747 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9748 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9749 + rtx_cost (sub_op, mode, code, 0, speed_p));
9750
9751 return true;
9752 }
9753
9754 if (speed_p)
9755 *cost += extra_cost->fp[mode != SFmode].addsub;
9756 return false;
9757 }
9758
9759 if (mode == SImode)
9760 {
9761 rtx shift_by_reg = NULL;
9762 rtx shift_op;
9763 rtx non_shift_op;
9764
9765 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9766 if (shift_op == NULL)
9767 {
9768 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9769 non_shift_op = XEXP (x, 0);
9770 }
9771 else
9772 non_shift_op = XEXP (x, 1);
9773
9774 if (shift_op != NULL)
9775 {
9776 if (shift_by_reg != NULL)
9777 {
9778 if (speed_p)
9779 *cost += extra_cost->alu.arith_shift_reg;
9780 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9781 }
9782 else if (speed_p)
9783 *cost += extra_cost->alu.arith_shift;
9784
9785 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9786 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9787 return true;
9788 }
9789
9790 if (arm_arch_thumb2
9791 && GET_CODE (XEXP (x, 1)) == MULT)
9792 {
9793 /* MLS. */
9794 if (speed_p)
9795 *cost += extra_cost->mult[0].add;
9796 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9797 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9798 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9799 return true;
9800 }
9801
9802 if (CONST_INT_P (XEXP (x, 0)))
9803 {
9804 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9805 INTVAL (XEXP (x, 0)), NULL_RTX,
9806 NULL_RTX, 1, 0);
9807 *cost = COSTS_N_INSNS (insns);
9808 if (speed_p)
9809 *cost += insns * extra_cost->alu.arith;
9810 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9811 return true;
9812 }
9813 else if (speed_p)
9814 *cost += extra_cost->alu.arith;
9815
9816 return false;
9817 }
9818
9819 if (GET_MODE_CLASS (mode) == MODE_INT
9820 && GET_MODE_SIZE (mode) < 4)
9821 {
9822 rtx shift_op, shift_reg;
9823 shift_reg = NULL;
9824
9825 /* We check both sides of the MINUS for shifter operands since,
9826 unlike PLUS, it's not commutative. */
9827
9828 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9829 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9830
9831 /* Slightly disparage, as we might need to widen the result. */
9832 *cost += 1;
9833 if (speed_p)
9834 *cost += extra_cost->alu.arith;
9835
9836 if (CONST_INT_P (XEXP (x, 0)))
9837 {
9838 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9839 return true;
9840 }
9841
9842 return false;
9843 }
9844
9845 if (mode == DImode)
9846 {
9847 *cost += COSTS_N_INSNS (1);
9848
9849 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9850 {
9851 rtx op1 = XEXP (x, 1);
9852
9853 if (speed_p)
9854 *cost += 2 * extra_cost->alu.arith;
9855
9856 if (GET_CODE (op1) == ZERO_EXTEND)
9857 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9858 0, speed_p);
9859 else
9860 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9861 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9862 0, speed_p);
9863 return true;
9864 }
9865 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9866 {
9867 if (speed_p)
9868 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9869 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9870 0, speed_p)
9871 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9872 return true;
9873 }
9874 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9875 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9876 {
9877 if (speed_p)
9878 *cost += (extra_cost->alu.arith
9879 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9880 ? extra_cost->alu.arith
9881 : extra_cost->alu.arith_shift));
9882 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9883 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9884 GET_CODE (XEXP (x, 1)), 0, speed_p));
9885 return true;
9886 }
9887
9888 if (speed_p)
9889 *cost += 2 * extra_cost->alu.arith;
9890 return false;
9891 }
9892
9893 /* Vector mode? */
9894
9895 *cost = LIBCALL_COST (2);
9896 return false;
9897
9898 case PLUS:
9899 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9900 && (mode == SFmode || !TARGET_VFP_SINGLE))
9901 {
9902 if (GET_CODE (XEXP (x, 0)) == MULT)
9903 {
9904 rtx mul_op0, mul_op1, add_op;
9905
9906 if (speed_p)
9907 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9908
9909 mul_op0 = XEXP (XEXP (x, 0), 0);
9910 mul_op1 = XEXP (XEXP (x, 0), 1);
9911 add_op = XEXP (x, 1);
9912
9913 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9914 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9915 + rtx_cost (add_op, mode, code, 0, speed_p));
9916
9917 return true;
9918 }
9919
9920 if (speed_p)
9921 *cost += extra_cost->fp[mode != SFmode].addsub;
9922 return false;
9923 }
9924 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9925 {
9926 *cost = LIBCALL_COST (2);
9927 return false;
9928 }
9929
9930 /* Narrow modes can be synthesized in SImode, but the range
9931 of useful sub-operations is limited. Check for shift operations
9932 on one of the operands. Only left shifts can be used in the
9933 narrow modes. */
9934 if (GET_MODE_CLASS (mode) == MODE_INT
9935 && GET_MODE_SIZE (mode) < 4)
9936 {
9937 rtx shift_op, shift_reg;
9938 shift_reg = NULL;
9939
9940 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9941
9942 if (CONST_INT_P (XEXP (x, 1)))
9943 {
9944 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9945 INTVAL (XEXP (x, 1)), NULL_RTX,
9946 NULL_RTX, 1, 0);
9947 *cost = COSTS_N_INSNS (insns);
9948 if (speed_p)
9949 *cost += insns * extra_cost->alu.arith;
9950 /* Slightly penalize a narrow operation as the result may
9951 need widening. */
9952 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9953 return true;
9954 }
9955
9956 /* Slightly penalize a narrow operation as the result may
9957 need widening. */
9958 *cost += 1;
9959 if (speed_p)
9960 *cost += extra_cost->alu.arith;
9961
9962 return false;
9963 }
9964
9965 if (mode == SImode)
9966 {
9967 rtx shift_op, shift_reg;
9968
9969 if (TARGET_INT_SIMD
9970 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9971 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9972 {
9973 /* UXTA[BH] or SXTA[BH]. */
9974 if (speed_p)
9975 *cost += extra_cost->alu.extend_arith;
9976 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9977 0, speed_p)
9978 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9979 return true;
9980 }
9981
9982 shift_reg = NULL;
9983 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9984 if (shift_op != NULL)
9985 {
9986 if (shift_reg)
9987 {
9988 if (speed_p)
9989 *cost += extra_cost->alu.arith_shift_reg;
9990 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9991 }
9992 else if (speed_p)
9993 *cost += extra_cost->alu.arith_shift;
9994
9995 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9996 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9997 return true;
9998 }
9999 if (GET_CODE (XEXP (x, 0)) == MULT)
10000 {
10001 rtx mul_op = XEXP (x, 0);
10002
10003 if (TARGET_DSP_MULTIPLY
10004 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10005 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10006 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10007 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10008 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10009 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10010 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10011 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10012 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10013 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10014 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10015 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10016 == 16))))))
10017 {
10018 /* SMLA[BT][BT]. */
10019 if (speed_p)
10020 *cost += extra_cost->mult[0].extend_add;
10021 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10022 SIGN_EXTEND, 0, speed_p)
10023 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10024 SIGN_EXTEND, 0, speed_p)
10025 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10026 return true;
10027 }
10028
10029 if (speed_p)
10030 *cost += extra_cost->mult[0].add;
10031 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10032 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10033 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10034 return true;
10035 }
10036 if (CONST_INT_P (XEXP (x, 1)))
10037 {
10038 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10039 INTVAL (XEXP (x, 1)), NULL_RTX,
10040 NULL_RTX, 1, 0);
10041 *cost = COSTS_N_INSNS (insns);
10042 if (speed_p)
10043 *cost += insns * extra_cost->alu.arith;
10044 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10045 return true;
10046 }
10047 else if (speed_p)
10048 *cost += extra_cost->alu.arith;
10049
10050 return false;
10051 }
10052
10053 if (mode == DImode)
10054 {
10055 if (GET_CODE (XEXP (x, 0)) == MULT
10056 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10057 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10058 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10059 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10060 {
10061 if (speed_p)
10062 *cost += extra_cost->mult[1].extend_add;
10063 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10064 ZERO_EXTEND, 0, speed_p)
10065 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10066 ZERO_EXTEND, 0, speed_p)
10067 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10068 return true;
10069 }
10070
10071 *cost += COSTS_N_INSNS (1);
10072
10073 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10074 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10075 {
10076 if (speed_p)
10077 *cost += (extra_cost->alu.arith
10078 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10079 ? extra_cost->alu.arith
10080 : extra_cost->alu.arith_shift));
10081
10082 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10083 0, speed_p)
10084 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10085 return true;
10086 }
10087
10088 if (speed_p)
10089 *cost += 2 * extra_cost->alu.arith;
10090 return false;
10091 }
10092
10093 /* Vector mode? */
10094 *cost = LIBCALL_COST (2);
10095 return false;
10096 case IOR:
10097 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10098 {
10099 if (speed_p)
10100 *cost += extra_cost->alu.rev;
10101
10102 return true;
10103 }
10104 /* Fall through. */
10105 case AND: case XOR:
10106 if (mode == SImode)
10107 {
10108 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10109 rtx op0 = XEXP (x, 0);
10110 rtx shift_op, shift_reg;
10111
10112 if (subcode == NOT
10113 && (code == AND
10114 || (code == IOR && TARGET_THUMB2)))
10115 op0 = XEXP (op0, 0);
10116
10117 shift_reg = NULL;
10118 shift_op = shifter_op_p (op0, &shift_reg);
10119 if (shift_op != NULL)
10120 {
10121 if (shift_reg)
10122 {
10123 if (speed_p)
10124 *cost += extra_cost->alu.log_shift_reg;
10125 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10126 }
10127 else if (speed_p)
10128 *cost += extra_cost->alu.log_shift;
10129
10130 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10131 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10132 return true;
10133 }
10134
10135 if (CONST_INT_P (XEXP (x, 1)))
10136 {
10137 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10138 INTVAL (XEXP (x, 1)), NULL_RTX,
10139 NULL_RTX, 1, 0);
10140
10141 *cost = COSTS_N_INSNS (insns);
10142 if (speed_p)
10143 *cost += insns * extra_cost->alu.logical;
10144 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10145 return true;
10146 }
10147
10148 if (speed_p)
10149 *cost += extra_cost->alu.logical;
10150 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10151 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10152 return true;
10153 }
10154
10155 if (mode == DImode)
10156 {
10157 rtx op0 = XEXP (x, 0);
10158 enum rtx_code subcode = GET_CODE (op0);
10159
10160 *cost += COSTS_N_INSNS (1);
10161
10162 if (subcode == NOT
10163 && (code == AND
10164 || (code == IOR && TARGET_THUMB2)))
10165 op0 = XEXP (op0, 0);
10166
10167 if (GET_CODE (op0) == ZERO_EXTEND)
10168 {
10169 if (speed_p)
10170 *cost += 2 * extra_cost->alu.logical;
10171
10172 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10173 0, speed_p)
10174 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10175 return true;
10176 }
10177 else if (GET_CODE (op0) == SIGN_EXTEND)
10178 {
10179 if (speed_p)
10180 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10181
10182 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10183 0, speed_p)
10184 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10185 return true;
10186 }
10187
10188 if (speed_p)
10189 *cost += 2 * extra_cost->alu.logical;
10190
10191 return true;
10192 }
10193 /* Vector mode? */
10194
10195 *cost = LIBCALL_COST (2);
10196 return false;
10197
10198 case MULT:
10199 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10200 && (mode == SFmode || !TARGET_VFP_SINGLE))
10201 {
10202 rtx op0 = XEXP (x, 0);
10203
10204 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10205 op0 = XEXP (op0, 0);
10206
10207 if (speed_p)
10208 *cost += extra_cost->fp[mode != SFmode].mult;
10209
10210 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10211 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10212 return true;
10213 }
10214 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10215 {
10216 *cost = LIBCALL_COST (2);
10217 return false;
10218 }
10219
10220 if (mode == SImode)
10221 {
10222 if (TARGET_DSP_MULTIPLY
10223 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10224 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10225 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10226 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10227 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10228 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10229 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10230 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10231 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10232 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10233 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10234 && (INTVAL (XEXP (XEXP (x, 1), 1))
10235 == 16))))))
10236 {
10237 /* SMUL[TB][TB]. */
10238 if (speed_p)
10239 *cost += extra_cost->mult[0].extend;
10240 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10241 SIGN_EXTEND, 0, speed_p);
10242 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10243 SIGN_EXTEND, 1, speed_p);
10244 return true;
10245 }
10246 if (speed_p)
10247 *cost += extra_cost->mult[0].simple;
10248 return false;
10249 }
10250
10251 if (mode == DImode)
10252 {
10253 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10254 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10255 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10256 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10257 {
10258 if (speed_p)
10259 *cost += extra_cost->mult[1].extend;
10260 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10261 ZERO_EXTEND, 0, speed_p)
10262 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10263 ZERO_EXTEND, 0, speed_p));
10264 return true;
10265 }
10266
10267 *cost = LIBCALL_COST (2);
10268 return false;
10269 }
10270
10271 /* Vector mode? */
10272 *cost = LIBCALL_COST (2);
10273 return false;
10274
10275 case NEG:
10276 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10277 && (mode == SFmode || !TARGET_VFP_SINGLE))
10278 {
10279 if (GET_CODE (XEXP (x, 0)) == MULT)
10280 {
10281 /* VNMUL. */
10282 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10283 return true;
10284 }
10285
10286 if (speed_p)
10287 *cost += extra_cost->fp[mode != SFmode].neg;
10288
10289 return false;
10290 }
10291 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10292 {
10293 *cost = LIBCALL_COST (1);
10294 return false;
10295 }
10296
10297 if (mode == SImode)
10298 {
10299 if (GET_CODE (XEXP (x, 0)) == ABS)
10300 {
10301 *cost += COSTS_N_INSNS (1);
10302 /* Assume the non-flag-changing variant. */
10303 if (speed_p)
10304 *cost += (extra_cost->alu.log_shift
10305 + extra_cost->alu.arith_shift);
10306 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10307 return true;
10308 }
10309
10310 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10311 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10312 {
10313 *cost += COSTS_N_INSNS (1);
10314 /* No extra cost for MOV imm and MVN imm. */
10315 /* If the comparison op is using the flags, there's no further
10316 cost, otherwise we need to add the cost of the comparison. */
10317 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10318 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10319 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10320 {
10321 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10322 *cost += (COSTS_N_INSNS (1)
10323 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10324 0, speed_p)
10325 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10326 1, speed_p));
10327 if (speed_p)
10328 *cost += extra_cost->alu.arith;
10329 }
10330 return true;
10331 }
10332
10333 if (speed_p)
10334 *cost += extra_cost->alu.arith;
10335 return false;
10336 }
10337
10338 if (GET_MODE_CLASS (mode) == MODE_INT
10339 && GET_MODE_SIZE (mode) < 4)
10340 {
10341 /* Slightly disparage, as we might need an extend operation. */
10342 *cost += 1;
10343 if (speed_p)
10344 *cost += extra_cost->alu.arith;
10345 return false;
10346 }
10347
10348 if (mode == DImode)
10349 {
10350 *cost += COSTS_N_INSNS (1);
10351 if (speed_p)
10352 *cost += 2 * extra_cost->alu.arith;
10353 return false;
10354 }
10355
10356 /* Vector mode? */
10357 *cost = LIBCALL_COST (1);
10358 return false;
10359
10360 case NOT:
10361 if (mode == SImode)
10362 {
10363 rtx shift_op;
10364 rtx shift_reg = NULL;
10365
10366 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10367
10368 if (shift_op)
10369 {
10370 if (shift_reg != NULL)
10371 {
10372 if (speed_p)
10373 *cost += extra_cost->alu.log_shift_reg;
10374 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10375 }
10376 else if (speed_p)
10377 *cost += extra_cost->alu.log_shift;
10378 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10379 return true;
10380 }
10381
10382 if (speed_p)
10383 *cost += extra_cost->alu.logical;
10384 return false;
10385 }
10386 if (mode == DImode)
10387 {
10388 *cost += COSTS_N_INSNS (1);
10389 return false;
10390 }
10391
10392 /* Vector mode? */
10393
10394 *cost += LIBCALL_COST (1);
10395 return false;
10396
10397 case IF_THEN_ELSE:
10398 {
10399 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10400 {
10401 *cost += COSTS_N_INSNS (3);
10402 return true;
10403 }
10404 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10405 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10406
10407 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10408 /* Assume that if one arm of the if_then_else is a register,
10409 that it will be tied with the result and eliminate the
10410 conditional insn. */
10411 if (REG_P (XEXP (x, 1)))
10412 *cost += op2cost;
10413 else if (REG_P (XEXP (x, 2)))
10414 *cost += op1cost;
10415 else
10416 {
10417 if (speed_p)
10418 {
10419 if (extra_cost->alu.non_exec_costs_exec)
10420 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10421 else
10422 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10423 }
10424 else
10425 *cost += op1cost + op2cost;
10426 }
10427 }
10428 return true;
10429
10430 case COMPARE:
10431 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10432 *cost = 0;
10433 else
10434 {
10435 machine_mode op0mode;
10436 /* We'll mostly assume that the cost of a compare is the cost of the
10437 LHS. However, there are some notable exceptions. */
10438
10439 /* Floating point compares are never done as side-effects. */
10440 op0mode = GET_MODE (XEXP (x, 0));
10441 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10442 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10443 {
10444 if (speed_p)
10445 *cost += extra_cost->fp[op0mode != SFmode].compare;
10446
10447 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10448 {
10449 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10450 return true;
10451 }
10452
10453 return false;
10454 }
10455 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10456 {
10457 *cost = LIBCALL_COST (2);
10458 return false;
10459 }
10460
10461 /* DImode compares normally take two insns. */
10462 if (op0mode == DImode)
10463 {
10464 *cost += COSTS_N_INSNS (1);
10465 if (speed_p)
10466 *cost += 2 * extra_cost->alu.arith;
10467 return false;
10468 }
10469
10470 if (op0mode == SImode)
10471 {
10472 rtx shift_op;
10473 rtx shift_reg;
10474
10475 if (XEXP (x, 1) == const0_rtx
10476 && !(REG_P (XEXP (x, 0))
10477 || (GET_CODE (XEXP (x, 0)) == SUBREG
10478 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10479 {
10480 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10481
10482 /* Multiply operations that set the flags are often
10483 significantly more expensive. */
10484 if (speed_p
10485 && GET_CODE (XEXP (x, 0)) == MULT
10486 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10487 *cost += extra_cost->mult[0].flag_setting;
10488
10489 if (speed_p
10490 && GET_CODE (XEXP (x, 0)) == PLUS
10491 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10492 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10493 0), 1), mode))
10494 *cost += extra_cost->mult[0].flag_setting;
10495 return true;
10496 }
10497
10498 shift_reg = NULL;
10499 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10500 if (shift_op != NULL)
10501 {
10502 if (shift_reg != NULL)
10503 {
10504 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10505 1, speed_p);
10506 if (speed_p)
10507 *cost += extra_cost->alu.arith_shift_reg;
10508 }
10509 else if (speed_p)
10510 *cost += extra_cost->alu.arith_shift;
10511 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10512 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10513 return true;
10514 }
10515
10516 if (speed_p)
10517 *cost += extra_cost->alu.arith;
10518 if (CONST_INT_P (XEXP (x, 1))
10519 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10520 {
10521 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10522 return true;
10523 }
10524 return false;
10525 }
10526
10527 /* Vector mode? */
10528
10529 *cost = LIBCALL_COST (2);
10530 return false;
10531 }
10532 return true;
10533
10534 case EQ:
10535 case NE:
10536 case LT:
10537 case LE:
10538 case GT:
10539 case GE:
10540 case LTU:
10541 case LEU:
10542 case GEU:
10543 case GTU:
10544 case ORDERED:
10545 case UNORDERED:
10546 case UNEQ:
10547 case UNLE:
10548 case UNLT:
10549 case UNGE:
10550 case UNGT:
10551 case LTGT:
10552 if (outer_code == SET)
10553 {
10554 /* Is it a store-flag operation? */
10555 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10556 && XEXP (x, 1) == const0_rtx)
10557 {
10558 /* Thumb also needs an IT insn. */
10559 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10560 return true;
10561 }
10562 if (XEXP (x, 1) == const0_rtx)
10563 {
10564 switch (code)
10565 {
10566 case LT:
10567 /* LSR Rd, Rn, #31. */
10568 if (speed_p)
10569 *cost += extra_cost->alu.shift;
10570 break;
10571
10572 case EQ:
10573 /* RSBS T1, Rn, #0
10574 ADC Rd, Rn, T1. */
10575
10576 case NE:
10577 /* SUBS T1, Rn, #1
10578 SBC Rd, Rn, T1. */
10579 *cost += COSTS_N_INSNS (1);
10580 break;
10581
10582 case LE:
10583 /* RSBS T1, Rn, Rn, LSR #31
10584 ADC Rd, Rn, T1. */
10585 *cost += COSTS_N_INSNS (1);
10586 if (speed_p)
10587 *cost += extra_cost->alu.arith_shift;
10588 break;
10589
10590 case GT:
10591 /* RSB Rd, Rn, Rn, ASR #1
10592 LSR Rd, Rd, #31. */
10593 *cost += COSTS_N_INSNS (1);
10594 if (speed_p)
10595 *cost += (extra_cost->alu.arith_shift
10596 + extra_cost->alu.shift);
10597 break;
10598
10599 case GE:
10600 /* ASR Rd, Rn, #31
10601 ADD Rd, Rn, #1. */
10602 *cost += COSTS_N_INSNS (1);
10603 if (speed_p)
10604 *cost += extra_cost->alu.shift;
10605 break;
10606
10607 default:
10608 /* Remaining cases are either meaningless or would take
10609 three insns anyway. */
10610 *cost = COSTS_N_INSNS (3);
10611 break;
10612 }
10613 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10614 return true;
10615 }
10616 else
10617 {
10618 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10619 if (CONST_INT_P (XEXP (x, 1))
10620 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10621 {
10622 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10623 return true;
10624 }
10625
10626 return false;
10627 }
10628 }
10629 /* Not directly inside a set. If it involves the condition code
10630 register it must be the condition for a branch, cond_exec or
10631 I_T_E operation. Since the comparison is performed elsewhere
10632 this is just the control part which has no additional
10633 cost. */
10634 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10635 && XEXP (x, 1) == const0_rtx)
10636 {
10637 *cost = 0;
10638 return true;
10639 }
10640 return false;
10641
10642 case ABS:
10643 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10644 && (mode == SFmode || !TARGET_VFP_SINGLE))
10645 {
10646 if (speed_p)
10647 *cost += extra_cost->fp[mode != SFmode].neg;
10648
10649 return false;
10650 }
10651 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10652 {
10653 *cost = LIBCALL_COST (1);
10654 return false;
10655 }
10656
10657 if (mode == SImode)
10658 {
10659 if (speed_p)
10660 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10661 return false;
10662 }
10663 /* Vector mode? */
10664 *cost = LIBCALL_COST (1);
10665 return false;
10666
10667 case SIGN_EXTEND:
10668 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10669 && MEM_P (XEXP (x, 0)))
10670 {
10671 if (mode == DImode)
10672 *cost += COSTS_N_INSNS (1);
10673
10674 if (!speed_p)
10675 return true;
10676
10677 if (GET_MODE (XEXP (x, 0)) == SImode)
10678 *cost += extra_cost->ldst.load;
10679 else
10680 *cost += extra_cost->ldst.load_sign_extend;
10681
10682 if (mode == DImode)
10683 *cost += extra_cost->alu.shift;
10684
10685 return true;
10686 }
10687
10688 /* Widening from less than 32-bits requires an extend operation. */
10689 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10690 {
10691 /* We have SXTB/SXTH. */
10692 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10693 if (speed_p)
10694 *cost += extra_cost->alu.extend;
10695 }
10696 else if (GET_MODE (XEXP (x, 0)) != SImode)
10697 {
10698 /* Needs two shifts. */
10699 *cost += COSTS_N_INSNS (1);
10700 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10701 if (speed_p)
10702 *cost += 2 * extra_cost->alu.shift;
10703 }
10704
10705 /* Widening beyond 32-bits requires one more insn. */
10706 if (mode == DImode)
10707 {
10708 *cost += COSTS_N_INSNS (1);
10709 if (speed_p)
10710 *cost += extra_cost->alu.shift;
10711 }
10712
10713 return true;
10714
10715 case ZERO_EXTEND:
10716 if ((arm_arch4
10717 || GET_MODE (XEXP (x, 0)) == SImode
10718 || GET_MODE (XEXP (x, 0)) == QImode)
10719 && MEM_P (XEXP (x, 0)))
10720 {
10721 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10722
10723 if (mode == DImode)
10724 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10725
10726 return true;
10727 }
10728
10729 /* Widening from less than 32-bits requires an extend operation. */
10730 if (GET_MODE (XEXP (x, 0)) == QImode)
10731 {
10732 /* UXTB can be a shorter instruction in Thumb2, but it might
10733 be slower than the AND Rd, Rn, #255 alternative. When
10734 optimizing for speed it should never be slower to use
10735 AND, and we don't really model 16-bit vs 32-bit insns
10736 here. */
10737 if (speed_p)
10738 *cost += extra_cost->alu.logical;
10739 }
10740 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10741 {
10742 /* We have UXTB/UXTH. */
10743 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10744 if (speed_p)
10745 *cost += extra_cost->alu.extend;
10746 }
10747 else if (GET_MODE (XEXP (x, 0)) != SImode)
10748 {
10749 /* Needs two shifts. It's marginally preferable to use
10750 shifts rather than two BIC instructions as the second
10751 shift may merge with a subsequent insn as a shifter
10752 op. */
10753 *cost = COSTS_N_INSNS (2);
10754 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10755 if (speed_p)
10756 *cost += 2 * extra_cost->alu.shift;
10757 }
10758
10759 /* Widening beyond 32-bits requires one more insn. */
10760 if (mode == DImode)
10761 {
10762 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10763 }
10764
10765 return true;
10766
10767 case CONST_INT:
10768 *cost = 0;
10769 /* CONST_INT has no mode, so we cannot tell for sure how many
10770 insns are really going to be needed. The best we can do is
10771 look at the value passed. If it fits in SImode, then assume
10772 that's the mode it will be used for. Otherwise assume it
10773 will be used in DImode. */
10774 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10775 mode = SImode;
10776 else
10777 mode = DImode;
10778
10779 /* Avoid blowing up in arm_gen_constant (). */
10780 if (!(outer_code == PLUS
10781 || outer_code == AND
10782 || outer_code == IOR
10783 || outer_code == XOR
10784 || outer_code == MINUS))
10785 outer_code = SET;
10786
10787 const_int_cost:
10788 if (mode == SImode)
10789 {
10790 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10791 INTVAL (x), NULL, NULL,
10792 0, 0));
10793 /* Extra costs? */
10794 }
10795 else
10796 {
10797 *cost += COSTS_N_INSNS (arm_gen_constant
10798 (outer_code, SImode, NULL,
10799 trunc_int_for_mode (INTVAL (x), SImode),
10800 NULL, NULL, 0, 0)
10801 + arm_gen_constant (outer_code, SImode, NULL,
10802 INTVAL (x) >> 32, NULL,
10803 NULL, 0, 0));
10804 /* Extra costs? */
10805 }
10806
10807 return true;
10808
10809 case CONST:
10810 case LABEL_REF:
10811 case SYMBOL_REF:
10812 if (speed_p)
10813 {
10814 if (arm_arch_thumb2 && !flag_pic)
10815 *cost += COSTS_N_INSNS (1);
10816 else
10817 *cost += extra_cost->ldst.load;
10818 }
10819 else
10820 *cost += COSTS_N_INSNS (1);
10821
10822 if (flag_pic)
10823 {
10824 *cost += COSTS_N_INSNS (1);
10825 if (speed_p)
10826 *cost += extra_cost->alu.arith;
10827 }
10828
10829 return true;
10830
10831 case CONST_FIXED:
10832 *cost = COSTS_N_INSNS (4);
10833 /* Fixme. */
10834 return true;
10835
10836 case CONST_DOUBLE:
10837 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10838 && (mode == SFmode || !TARGET_VFP_SINGLE))
10839 {
10840 if (vfp3_const_double_rtx (x))
10841 {
10842 if (speed_p)
10843 *cost += extra_cost->fp[mode == DFmode].fpconst;
10844 return true;
10845 }
10846
10847 if (speed_p)
10848 {
10849 if (mode == DFmode)
10850 *cost += extra_cost->ldst.loadd;
10851 else
10852 *cost += extra_cost->ldst.loadf;
10853 }
10854 else
10855 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10856
10857 return true;
10858 }
10859 *cost = COSTS_N_INSNS (4);
10860 return true;
10861
10862 case CONST_VECTOR:
10863 /* Fixme. */
10864 if (TARGET_NEON
10865 && TARGET_HARD_FLOAT
10866 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10867 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10868 *cost = COSTS_N_INSNS (1);
10869 else
10870 *cost = COSTS_N_INSNS (4);
10871 return true;
10872
10873 case HIGH:
10874 case LO_SUM:
10875 /* When optimizing for size, we prefer constant pool entries to
10876 MOVW/MOVT pairs, so bump the cost of these slightly. */
10877 if (!speed_p)
10878 *cost += 1;
10879 return true;
10880
10881 case CLZ:
10882 if (speed_p)
10883 *cost += extra_cost->alu.clz;
10884 return false;
10885
10886 case SMIN:
10887 if (XEXP (x, 1) == const0_rtx)
10888 {
10889 if (speed_p)
10890 *cost += extra_cost->alu.log_shift;
10891 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10892 return true;
10893 }
10894 /* Fall through. */
10895 case SMAX:
10896 case UMIN:
10897 case UMAX:
10898 *cost += COSTS_N_INSNS (1);
10899 return false;
10900
10901 case TRUNCATE:
10902 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10903 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10904 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10905 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10906 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10907 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10908 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10909 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10910 == ZERO_EXTEND))))
10911 {
10912 if (speed_p)
10913 *cost += extra_cost->mult[1].extend;
10914 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10915 ZERO_EXTEND, 0, speed_p)
10916 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10917 ZERO_EXTEND, 0, speed_p));
10918 return true;
10919 }
10920 *cost = LIBCALL_COST (1);
10921 return false;
10922
10923 case UNSPEC_VOLATILE:
10924 case UNSPEC:
10925 return arm_unspec_cost (x, outer_code, speed_p, cost);
10926
10927 case PC:
10928 /* Reading the PC is like reading any other register. Writing it
10929 is more expensive, but we take that into account elsewhere. */
10930 *cost = 0;
10931 return true;
10932
10933 case ZERO_EXTRACT:
10934 /* TODO: Simple zero_extract of bottom bits using AND. */
10935 /* Fall through. */
10936 case SIGN_EXTRACT:
10937 if (arm_arch6
10938 && mode == SImode
10939 && CONST_INT_P (XEXP (x, 1))
10940 && CONST_INT_P (XEXP (x, 2)))
10941 {
10942 if (speed_p)
10943 *cost += extra_cost->alu.bfx;
10944 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10945 return true;
10946 }
10947 /* Without UBFX/SBFX, need to resort to shift operations. */
10948 *cost += COSTS_N_INSNS (1);
10949 if (speed_p)
10950 *cost += 2 * extra_cost->alu.shift;
10951 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10952 return true;
10953
10954 case FLOAT_EXTEND:
10955 if (TARGET_HARD_FLOAT)
10956 {
10957 if (speed_p)
10958 *cost += extra_cost->fp[mode == DFmode].widen;
10959 if (!TARGET_VFP5
10960 && GET_MODE (XEXP (x, 0)) == HFmode)
10961 {
10962 /* Pre v8, widening HF->DF is a two-step process, first
10963 widening to SFmode. */
10964 *cost += COSTS_N_INSNS (1);
10965 if (speed_p)
10966 *cost += extra_cost->fp[0].widen;
10967 }
10968 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10969 return true;
10970 }
10971
10972 *cost = LIBCALL_COST (1);
10973 return false;
10974
10975 case FLOAT_TRUNCATE:
10976 if (TARGET_HARD_FLOAT)
10977 {
10978 if (speed_p)
10979 *cost += extra_cost->fp[mode == DFmode].narrow;
10980 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10981 return true;
10982 /* Vector modes? */
10983 }
10984 *cost = LIBCALL_COST (1);
10985 return false;
10986
10987 case FMA:
10988 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10989 {
10990 rtx op0 = XEXP (x, 0);
10991 rtx op1 = XEXP (x, 1);
10992 rtx op2 = XEXP (x, 2);
10993
10994
10995 /* vfms or vfnma. */
10996 if (GET_CODE (op0) == NEG)
10997 op0 = XEXP (op0, 0);
10998
10999 /* vfnms or vfnma. */
11000 if (GET_CODE (op2) == NEG)
11001 op2 = XEXP (op2, 0);
11002
11003 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11004 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11005 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11006
11007 if (speed_p)
11008 *cost += extra_cost->fp[mode ==DFmode].fma;
11009
11010 return true;
11011 }
11012
11013 *cost = LIBCALL_COST (3);
11014 return false;
11015
11016 case FIX:
11017 case UNSIGNED_FIX:
11018 if (TARGET_HARD_FLOAT)
11019 {
11020 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11021 a vcvt fixed-point conversion. */
11022 if (code == FIX && mode == SImode
11023 && GET_CODE (XEXP (x, 0)) == FIX
11024 && GET_MODE (XEXP (x, 0)) == SFmode
11025 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11026 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11027 > 0)
11028 {
11029 if (speed_p)
11030 *cost += extra_cost->fp[0].toint;
11031
11032 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11033 code, 0, speed_p);
11034 return true;
11035 }
11036
11037 if (GET_MODE_CLASS (mode) == MODE_INT)
11038 {
11039 mode = GET_MODE (XEXP (x, 0));
11040 if (speed_p)
11041 *cost += extra_cost->fp[mode == DFmode].toint;
11042 /* Strip of the 'cost' of rounding towards zero. */
11043 if (GET_CODE (XEXP (x, 0)) == FIX)
11044 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11045 0, speed_p);
11046 else
11047 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11048 /* ??? Increase the cost to deal with transferring from
11049 FP -> CORE registers? */
11050 return true;
11051 }
11052 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11053 && TARGET_VFP5)
11054 {
11055 if (speed_p)
11056 *cost += extra_cost->fp[mode == DFmode].roundint;
11057 return false;
11058 }
11059 /* Vector costs? */
11060 }
11061 *cost = LIBCALL_COST (1);
11062 return false;
11063
11064 case FLOAT:
11065 case UNSIGNED_FLOAT:
11066 if (TARGET_HARD_FLOAT)
11067 {
11068 /* ??? Increase the cost to deal with transferring from CORE
11069 -> FP registers? */
11070 if (speed_p)
11071 *cost += extra_cost->fp[mode == DFmode].fromint;
11072 return false;
11073 }
11074 *cost = LIBCALL_COST (1);
11075 return false;
11076
11077 case CALL:
11078 return true;
11079
11080 case ASM_OPERANDS:
11081 {
11082 /* Just a guess. Guess number of instructions in the asm
11083 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11084 though (see PR60663). */
11085 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11086 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11087
11088 *cost = COSTS_N_INSNS (asm_length + num_operands);
11089 return true;
11090 }
11091 default:
11092 if (mode != VOIDmode)
11093 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11094 else
11095 *cost = COSTS_N_INSNS (4); /* Who knows? */
11096 return false;
11097 }
11098 }
11099
11100 #undef HANDLE_NARROW_SHIFT_ARITH
11101
11102 /* RTX costs entry point. */
11103
11104 static bool
11105 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11106 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11107 {
11108 bool result;
11109 int code = GET_CODE (x);
11110 gcc_assert (current_tune->insn_extra_cost);
11111
11112 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
11113 (enum rtx_code) outer_code,
11114 current_tune->insn_extra_cost,
11115 total, speed);
11116
11117 if (dump_file && arm_verbose_cost)
11118 {
11119 print_rtl_single (dump_file, x);
11120 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11121 *total, result ? "final" : "partial");
11122 }
11123 return result;
11124 }
11125
11126 /* All address computations that can be done are free, but rtx cost returns
11127 the same for practically all of them. So we weight the different types
11128 of address here in the order (most pref first):
11129 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11130 static inline int
11131 arm_arm_address_cost (rtx x)
11132 {
11133 enum rtx_code c = GET_CODE (x);
11134
11135 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11136 return 0;
11137 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11138 return 10;
11139
11140 if (c == PLUS)
11141 {
11142 if (CONST_INT_P (XEXP (x, 1)))
11143 return 2;
11144
11145 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11146 return 3;
11147
11148 return 4;
11149 }
11150
11151 return 6;
11152 }
11153
11154 static inline int
11155 arm_thumb_address_cost (rtx x)
11156 {
11157 enum rtx_code c = GET_CODE (x);
11158
11159 if (c == REG)
11160 return 1;
11161 if (c == PLUS
11162 && REG_P (XEXP (x, 0))
11163 && CONST_INT_P (XEXP (x, 1)))
11164 return 1;
11165
11166 return 2;
11167 }
11168
11169 static int
11170 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11171 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11172 {
11173 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11174 }
11175
11176 /* Adjust cost hook for XScale. */
11177 static bool
11178 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11179 int * cost)
11180 {
11181 /* Some true dependencies can have a higher cost depending
11182 on precisely how certain input operands are used. */
11183 if (dep_type == 0
11184 && recog_memoized (insn) >= 0
11185 && recog_memoized (dep) >= 0)
11186 {
11187 int shift_opnum = get_attr_shift (insn);
11188 enum attr_type attr_type = get_attr_type (dep);
11189
11190 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11191 operand for INSN. If we have a shifted input operand and the
11192 instruction we depend on is another ALU instruction, then we may
11193 have to account for an additional stall. */
11194 if (shift_opnum != 0
11195 && (attr_type == TYPE_ALU_SHIFT_IMM
11196 || attr_type == TYPE_ALUS_SHIFT_IMM
11197 || attr_type == TYPE_LOGIC_SHIFT_IMM
11198 || attr_type == TYPE_LOGICS_SHIFT_IMM
11199 || attr_type == TYPE_ALU_SHIFT_REG
11200 || attr_type == TYPE_ALUS_SHIFT_REG
11201 || attr_type == TYPE_LOGIC_SHIFT_REG
11202 || attr_type == TYPE_LOGICS_SHIFT_REG
11203 || attr_type == TYPE_MOV_SHIFT
11204 || attr_type == TYPE_MVN_SHIFT
11205 || attr_type == TYPE_MOV_SHIFT_REG
11206 || attr_type == TYPE_MVN_SHIFT_REG))
11207 {
11208 rtx shifted_operand;
11209 int opno;
11210
11211 /* Get the shifted operand. */
11212 extract_insn (insn);
11213 shifted_operand = recog_data.operand[shift_opnum];
11214
11215 /* Iterate over all the operands in DEP. If we write an operand
11216 that overlaps with SHIFTED_OPERAND, then we have increase the
11217 cost of this dependency. */
11218 extract_insn (dep);
11219 preprocess_constraints (dep);
11220 for (opno = 0; opno < recog_data.n_operands; opno++)
11221 {
11222 /* We can ignore strict inputs. */
11223 if (recog_data.operand_type[opno] == OP_IN)
11224 continue;
11225
11226 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11227 shifted_operand))
11228 {
11229 *cost = 2;
11230 return false;
11231 }
11232 }
11233 }
11234 }
11235 return true;
11236 }
11237
11238 /* Adjust cost hook for Cortex A9. */
11239 static bool
11240 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11241 int * cost)
11242 {
11243 switch (dep_type)
11244 {
11245 case REG_DEP_ANTI:
11246 *cost = 0;
11247 return false;
11248
11249 case REG_DEP_TRUE:
11250 case REG_DEP_OUTPUT:
11251 if (recog_memoized (insn) >= 0
11252 && recog_memoized (dep) >= 0)
11253 {
11254 if (GET_CODE (PATTERN (insn)) == SET)
11255 {
11256 if (GET_MODE_CLASS
11257 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11258 || GET_MODE_CLASS
11259 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11260 {
11261 enum attr_type attr_type_insn = get_attr_type (insn);
11262 enum attr_type attr_type_dep = get_attr_type (dep);
11263
11264 /* By default all dependencies of the form
11265 s0 = s0 <op> s1
11266 s0 = s0 <op> s2
11267 have an extra latency of 1 cycle because
11268 of the input and output dependency in this
11269 case. However this gets modeled as an true
11270 dependency and hence all these checks. */
11271 if (REG_P (SET_DEST (PATTERN (insn)))
11272 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11273 {
11274 /* FMACS is a special case where the dependent
11275 instruction can be issued 3 cycles before
11276 the normal latency in case of an output
11277 dependency. */
11278 if ((attr_type_insn == TYPE_FMACS
11279 || attr_type_insn == TYPE_FMACD)
11280 && (attr_type_dep == TYPE_FMACS
11281 || attr_type_dep == TYPE_FMACD))
11282 {
11283 if (dep_type == REG_DEP_OUTPUT)
11284 *cost = insn_default_latency (dep) - 3;
11285 else
11286 *cost = insn_default_latency (dep);
11287 return false;
11288 }
11289 else
11290 {
11291 if (dep_type == REG_DEP_OUTPUT)
11292 *cost = insn_default_latency (dep) + 1;
11293 else
11294 *cost = insn_default_latency (dep);
11295 }
11296 return false;
11297 }
11298 }
11299 }
11300 }
11301 break;
11302
11303 default:
11304 gcc_unreachable ();
11305 }
11306
11307 return true;
11308 }
11309
11310 /* Adjust cost hook for FA726TE. */
11311 static bool
11312 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11313 int * cost)
11314 {
11315 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11316 have penalty of 3. */
11317 if (dep_type == REG_DEP_TRUE
11318 && recog_memoized (insn) >= 0
11319 && recog_memoized (dep) >= 0
11320 && get_attr_conds (dep) == CONDS_SET)
11321 {
11322 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11323 if (get_attr_conds (insn) == CONDS_USE
11324 && get_attr_type (insn) != TYPE_BRANCH)
11325 {
11326 *cost = 3;
11327 return false;
11328 }
11329
11330 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11331 || get_attr_conds (insn) == CONDS_USE)
11332 {
11333 *cost = 0;
11334 return false;
11335 }
11336 }
11337
11338 return true;
11339 }
11340
11341 /* Implement TARGET_REGISTER_MOVE_COST.
11342
11343 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11344 it is typically more expensive than a single memory access. We set
11345 the cost to less than two memory accesses so that floating
11346 point to integer conversion does not go through memory. */
11347
11348 int
11349 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11350 reg_class_t from, reg_class_t to)
11351 {
11352 if (TARGET_32BIT)
11353 {
11354 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11355 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11356 return 15;
11357 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11358 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11359 return 4;
11360 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11361 return 20;
11362 else
11363 return 2;
11364 }
11365 else
11366 {
11367 if (from == HI_REGS || to == HI_REGS)
11368 return 4;
11369 else
11370 return 2;
11371 }
11372 }
11373
11374 /* Implement TARGET_MEMORY_MOVE_COST. */
11375
11376 int
11377 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11378 bool in ATTRIBUTE_UNUSED)
11379 {
11380 if (TARGET_32BIT)
11381 return 10;
11382 else
11383 {
11384 if (GET_MODE_SIZE (mode) < 4)
11385 return 8;
11386 else
11387 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11388 }
11389 }
11390
11391 /* Vectorizer cost model implementation. */
11392
11393 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11394 static int
11395 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11396 tree vectype,
11397 int misalign ATTRIBUTE_UNUSED)
11398 {
11399 unsigned elements;
11400
11401 switch (type_of_cost)
11402 {
11403 case scalar_stmt:
11404 return current_tune->vec_costs->scalar_stmt_cost;
11405
11406 case scalar_load:
11407 return current_tune->vec_costs->scalar_load_cost;
11408
11409 case scalar_store:
11410 return current_tune->vec_costs->scalar_store_cost;
11411
11412 case vector_stmt:
11413 return current_tune->vec_costs->vec_stmt_cost;
11414
11415 case vector_load:
11416 return current_tune->vec_costs->vec_align_load_cost;
11417
11418 case vector_store:
11419 return current_tune->vec_costs->vec_store_cost;
11420
11421 case vec_to_scalar:
11422 return current_tune->vec_costs->vec_to_scalar_cost;
11423
11424 case scalar_to_vec:
11425 return current_tune->vec_costs->scalar_to_vec_cost;
11426
11427 case unaligned_load:
11428 case vector_gather_load:
11429 return current_tune->vec_costs->vec_unalign_load_cost;
11430
11431 case unaligned_store:
11432 case vector_scatter_store:
11433 return current_tune->vec_costs->vec_unalign_store_cost;
11434
11435 case cond_branch_taken:
11436 return current_tune->vec_costs->cond_taken_branch_cost;
11437
11438 case cond_branch_not_taken:
11439 return current_tune->vec_costs->cond_not_taken_branch_cost;
11440
11441 case vec_perm:
11442 case vec_promote_demote:
11443 return current_tune->vec_costs->vec_stmt_cost;
11444
11445 case vec_construct:
11446 elements = TYPE_VECTOR_SUBPARTS (vectype);
11447 return elements / 2 + 1;
11448
11449 default:
11450 gcc_unreachable ();
11451 }
11452 }
11453
11454 /* Implement targetm.vectorize.add_stmt_cost. */
11455
11456 static unsigned
11457 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11458 struct _stmt_vec_info *stmt_info, int misalign,
11459 enum vect_cost_model_location where)
11460 {
11461 unsigned *cost = (unsigned *) data;
11462 unsigned retval = 0;
11463
11464 if (flag_vect_cost_model)
11465 {
11466 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11467 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11468
11469 /* Statements in an inner loop relative to the loop being
11470 vectorized are weighted more heavily. The value here is
11471 arbitrary and could potentially be improved with analysis. */
11472 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11473 count *= 50; /* FIXME. */
11474
11475 retval = (unsigned) (count * stmt_cost);
11476 cost[where] += retval;
11477 }
11478
11479 return retval;
11480 }
11481
11482 /* Return true if and only if this insn can dual-issue only as older. */
11483 static bool
11484 cortexa7_older_only (rtx_insn *insn)
11485 {
11486 if (recog_memoized (insn) < 0)
11487 return false;
11488
11489 switch (get_attr_type (insn))
11490 {
11491 case TYPE_ALU_DSP_REG:
11492 case TYPE_ALU_SREG:
11493 case TYPE_ALUS_SREG:
11494 case TYPE_LOGIC_REG:
11495 case TYPE_LOGICS_REG:
11496 case TYPE_ADC_REG:
11497 case TYPE_ADCS_REG:
11498 case TYPE_ADR:
11499 case TYPE_BFM:
11500 case TYPE_REV:
11501 case TYPE_MVN_REG:
11502 case TYPE_SHIFT_IMM:
11503 case TYPE_SHIFT_REG:
11504 case TYPE_LOAD_BYTE:
11505 case TYPE_LOAD_4:
11506 case TYPE_STORE_4:
11507 case TYPE_FFARITHS:
11508 case TYPE_FADDS:
11509 case TYPE_FFARITHD:
11510 case TYPE_FADDD:
11511 case TYPE_FMOV:
11512 case TYPE_F_CVT:
11513 case TYPE_FCMPS:
11514 case TYPE_FCMPD:
11515 case TYPE_FCONSTS:
11516 case TYPE_FCONSTD:
11517 case TYPE_FMULS:
11518 case TYPE_FMACS:
11519 case TYPE_FMULD:
11520 case TYPE_FMACD:
11521 case TYPE_FDIVS:
11522 case TYPE_FDIVD:
11523 case TYPE_F_MRC:
11524 case TYPE_F_MRRC:
11525 case TYPE_F_FLAG:
11526 case TYPE_F_LOADS:
11527 case TYPE_F_STORES:
11528 return true;
11529 default:
11530 return false;
11531 }
11532 }
11533
11534 /* Return true if and only if this insn can dual-issue as younger. */
11535 static bool
11536 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11537 {
11538 if (recog_memoized (insn) < 0)
11539 {
11540 if (verbose > 5)
11541 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11542 return false;
11543 }
11544
11545 switch (get_attr_type (insn))
11546 {
11547 case TYPE_ALU_IMM:
11548 case TYPE_ALUS_IMM:
11549 case TYPE_LOGIC_IMM:
11550 case TYPE_LOGICS_IMM:
11551 case TYPE_EXTEND:
11552 case TYPE_MVN_IMM:
11553 case TYPE_MOV_IMM:
11554 case TYPE_MOV_REG:
11555 case TYPE_MOV_SHIFT:
11556 case TYPE_MOV_SHIFT_REG:
11557 case TYPE_BRANCH:
11558 case TYPE_CALL:
11559 return true;
11560 default:
11561 return false;
11562 }
11563 }
11564
11565
11566 /* Look for an instruction that can dual issue only as an older
11567 instruction, and move it in front of any instructions that can
11568 dual-issue as younger, while preserving the relative order of all
11569 other instructions in the ready list. This is a hueuristic to help
11570 dual-issue in later cycles, by postponing issue of more flexible
11571 instructions. This heuristic may affect dual issue opportunities
11572 in the current cycle. */
11573 static void
11574 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11575 int *n_readyp, int clock)
11576 {
11577 int i;
11578 int first_older_only = -1, first_younger = -1;
11579
11580 if (verbose > 5)
11581 fprintf (file,
11582 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11583 clock,
11584 *n_readyp);
11585
11586 /* Traverse the ready list from the head (the instruction to issue
11587 first), and looking for the first instruction that can issue as
11588 younger and the first instruction that can dual-issue only as
11589 older. */
11590 for (i = *n_readyp - 1; i >= 0; i--)
11591 {
11592 rtx_insn *insn = ready[i];
11593 if (cortexa7_older_only (insn))
11594 {
11595 first_older_only = i;
11596 if (verbose > 5)
11597 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11598 break;
11599 }
11600 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11601 first_younger = i;
11602 }
11603
11604 /* Nothing to reorder because either no younger insn found or insn
11605 that can dual-issue only as older appears before any insn that
11606 can dual-issue as younger. */
11607 if (first_younger == -1)
11608 {
11609 if (verbose > 5)
11610 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11611 return;
11612 }
11613
11614 /* Nothing to reorder because no older-only insn in the ready list. */
11615 if (first_older_only == -1)
11616 {
11617 if (verbose > 5)
11618 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11619 return;
11620 }
11621
11622 /* Move first_older_only insn before first_younger. */
11623 if (verbose > 5)
11624 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11625 INSN_UID(ready [first_older_only]),
11626 INSN_UID(ready [first_younger]));
11627 rtx_insn *first_older_only_insn = ready [first_older_only];
11628 for (i = first_older_only; i < first_younger; i++)
11629 {
11630 ready[i] = ready[i+1];
11631 }
11632
11633 ready[i] = first_older_only_insn;
11634 return;
11635 }
11636
11637 /* Implement TARGET_SCHED_REORDER. */
11638 static int
11639 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11640 int clock)
11641 {
11642 switch (arm_tune)
11643 {
11644 case TARGET_CPU_cortexa7:
11645 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11646 break;
11647 default:
11648 /* Do nothing for other cores. */
11649 break;
11650 }
11651
11652 return arm_issue_rate ();
11653 }
11654
11655 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11656 It corrects the value of COST based on the relationship between
11657 INSN and DEP through the dependence LINK. It returns the new
11658 value. There is a per-core adjust_cost hook to adjust scheduler costs
11659 and the per-core hook can choose to completely override the generic
11660 adjust_cost function. Only put bits of code into arm_adjust_cost that
11661 are common across all cores. */
11662 static int
11663 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11664 unsigned int)
11665 {
11666 rtx i_pat, d_pat;
11667
11668 /* When generating Thumb-1 code, we want to place flag-setting operations
11669 close to a conditional branch which depends on them, so that we can
11670 omit the comparison. */
11671 if (TARGET_THUMB1
11672 && dep_type == 0
11673 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11674 && recog_memoized (dep) >= 0
11675 && get_attr_conds (dep) == CONDS_SET)
11676 return 0;
11677
11678 if (current_tune->sched_adjust_cost != NULL)
11679 {
11680 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11681 return cost;
11682 }
11683
11684 /* XXX Is this strictly true? */
11685 if (dep_type == REG_DEP_ANTI
11686 || dep_type == REG_DEP_OUTPUT)
11687 return 0;
11688
11689 /* Call insns don't incur a stall, even if they follow a load. */
11690 if (dep_type == 0
11691 && CALL_P (insn))
11692 return 1;
11693
11694 if ((i_pat = single_set (insn)) != NULL
11695 && MEM_P (SET_SRC (i_pat))
11696 && (d_pat = single_set (dep)) != NULL
11697 && MEM_P (SET_DEST (d_pat)))
11698 {
11699 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11700 /* This is a load after a store, there is no conflict if the load reads
11701 from a cached area. Assume that loads from the stack, and from the
11702 constant pool are cached, and that others will miss. This is a
11703 hack. */
11704
11705 if ((GET_CODE (src_mem) == SYMBOL_REF
11706 && CONSTANT_POOL_ADDRESS_P (src_mem))
11707 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11708 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11709 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11710 return 1;
11711 }
11712
11713 return cost;
11714 }
11715
11716 int
11717 arm_max_conditional_execute (void)
11718 {
11719 return max_insns_skipped;
11720 }
11721
11722 static int
11723 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11724 {
11725 if (TARGET_32BIT)
11726 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11727 else
11728 return (optimize > 0) ? 2 : 0;
11729 }
11730
11731 static int
11732 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11733 {
11734 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11735 }
11736
11737 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11738 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11739 sequences of non-executed instructions in IT blocks probably take the same
11740 amount of time as executed instructions (and the IT instruction itself takes
11741 space in icache). This function was experimentally determined to give good
11742 results on a popular embedded benchmark. */
11743
11744 static int
11745 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11746 {
11747 return (TARGET_32BIT && speed_p) ? 1
11748 : arm_default_branch_cost (speed_p, predictable_p);
11749 }
11750
11751 static int
11752 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11753 {
11754 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11755 }
11756
11757 static bool fp_consts_inited = false;
11758
11759 static REAL_VALUE_TYPE value_fp0;
11760
11761 static void
11762 init_fp_table (void)
11763 {
11764 REAL_VALUE_TYPE r;
11765
11766 r = REAL_VALUE_ATOF ("0", DFmode);
11767 value_fp0 = r;
11768 fp_consts_inited = true;
11769 }
11770
11771 /* Return TRUE if rtx X is a valid immediate FP constant. */
11772 int
11773 arm_const_double_rtx (rtx x)
11774 {
11775 const REAL_VALUE_TYPE *r;
11776
11777 if (!fp_consts_inited)
11778 init_fp_table ();
11779
11780 r = CONST_DOUBLE_REAL_VALUE (x);
11781 if (REAL_VALUE_MINUS_ZERO (*r))
11782 return 0;
11783
11784 if (real_equal (r, &value_fp0))
11785 return 1;
11786
11787 return 0;
11788 }
11789
11790 /* VFPv3 has a fairly wide range of representable immediates, formed from
11791 "quarter-precision" floating-point values. These can be evaluated using this
11792 formula (with ^ for exponentiation):
11793
11794 -1^s * n * 2^-r
11795
11796 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11797 16 <= n <= 31 and 0 <= r <= 7.
11798
11799 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11800
11801 - A (most-significant) is the sign bit.
11802 - BCD are the exponent (encoded as r XOR 3).
11803 - EFGH are the mantissa (encoded as n - 16).
11804 */
11805
11806 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11807 fconst[sd] instruction, or -1 if X isn't suitable. */
11808 static int
11809 vfp3_const_double_index (rtx x)
11810 {
11811 REAL_VALUE_TYPE r, m;
11812 int sign, exponent;
11813 unsigned HOST_WIDE_INT mantissa, mant_hi;
11814 unsigned HOST_WIDE_INT mask;
11815 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11816 bool fail;
11817
11818 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11819 return -1;
11820
11821 r = *CONST_DOUBLE_REAL_VALUE (x);
11822
11823 /* We can't represent these things, so detect them first. */
11824 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11825 return -1;
11826
11827 /* Extract sign, exponent and mantissa. */
11828 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11829 r = real_value_abs (&r);
11830 exponent = REAL_EXP (&r);
11831 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11832 highest (sign) bit, with a fixed binary point at bit point_pos.
11833 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11834 bits for the mantissa, this may fail (low bits would be lost). */
11835 real_ldexp (&m, &r, point_pos - exponent);
11836 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11837 mantissa = w.elt (0);
11838 mant_hi = w.elt (1);
11839
11840 /* If there are bits set in the low part of the mantissa, we can't
11841 represent this value. */
11842 if (mantissa != 0)
11843 return -1;
11844
11845 /* Now make it so that mantissa contains the most-significant bits, and move
11846 the point_pos to indicate that the least-significant bits have been
11847 discarded. */
11848 point_pos -= HOST_BITS_PER_WIDE_INT;
11849 mantissa = mant_hi;
11850
11851 /* We can permit four significant bits of mantissa only, plus a high bit
11852 which is always 1. */
11853 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11854 if ((mantissa & mask) != 0)
11855 return -1;
11856
11857 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11858 mantissa >>= point_pos - 5;
11859
11860 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11861 floating-point immediate zero with Neon using an integer-zero load, but
11862 that case is handled elsewhere.) */
11863 if (mantissa == 0)
11864 return -1;
11865
11866 gcc_assert (mantissa >= 16 && mantissa <= 31);
11867
11868 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11869 normalized significands are in the range [1, 2). (Our mantissa is shifted
11870 left 4 places at this point relative to normalized IEEE754 values). GCC
11871 internally uses [0.5, 1) (see real.c), so the exponent returned from
11872 REAL_EXP must be altered. */
11873 exponent = 5 - exponent;
11874
11875 if (exponent < 0 || exponent > 7)
11876 return -1;
11877
11878 /* Sign, mantissa and exponent are now in the correct form to plug into the
11879 formula described in the comment above. */
11880 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11881 }
11882
11883 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11884 int
11885 vfp3_const_double_rtx (rtx x)
11886 {
11887 if (!TARGET_VFP3)
11888 return 0;
11889
11890 return vfp3_const_double_index (x) != -1;
11891 }
11892
11893 /* Recognize immediates which can be used in various Neon instructions. Legal
11894 immediates are described by the following table (for VMVN variants, the
11895 bitwise inverse of the constant shown is recognized. In either case, VMOV
11896 is output and the correct instruction to use for a given constant is chosen
11897 by the assembler). The constant shown is replicated across all elements of
11898 the destination vector.
11899
11900 insn elems variant constant (binary)
11901 ---- ----- ------- -----------------
11902 vmov i32 0 00000000 00000000 00000000 abcdefgh
11903 vmov i32 1 00000000 00000000 abcdefgh 00000000
11904 vmov i32 2 00000000 abcdefgh 00000000 00000000
11905 vmov i32 3 abcdefgh 00000000 00000000 00000000
11906 vmov i16 4 00000000 abcdefgh
11907 vmov i16 5 abcdefgh 00000000
11908 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11909 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11910 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11911 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11912 vmvn i16 10 00000000 abcdefgh
11913 vmvn i16 11 abcdefgh 00000000
11914 vmov i32 12 00000000 00000000 abcdefgh 11111111
11915 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11916 vmov i32 14 00000000 abcdefgh 11111111 11111111
11917 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11918 vmov i8 16 abcdefgh
11919 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11920 eeeeeeee ffffffff gggggggg hhhhhhhh
11921 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11922 vmov f32 19 00000000 00000000 00000000 00000000
11923
11924 For case 18, B = !b. Representable values are exactly those accepted by
11925 vfp3_const_double_index, but are output as floating-point numbers rather
11926 than indices.
11927
11928 For case 19, we will change it to vmov.i32 when assembling.
11929
11930 Variants 0-5 (inclusive) may also be used as immediates for the second
11931 operand of VORR/VBIC instructions.
11932
11933 The INVERSE argument causes the bitwise inverse of the given operand to be
11934 recognized instead (used for recognizing legal immediates for the VAND/VORN
11935 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11936 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11937 output, rather than the real insns vbic/vorr).
11938
11939 INVERSE makes no difference to the recognition of float vectors.
11940
11941 The return value is the variant of immediate as shown in the above table, or
11942 -1 if the given value doesn't match any of the listed patterns.
11943 */
11944 static int
11945 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11946 rtx *modconst, int *elementwidth)
11947 {
11948 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11949 matches = 1; \
11950 for (i = 0; i < idx; i += (STRIDE)) \
11951 if (!(TEST)) \
11952 matches = 0; \
11953 if (matches) \
11954 { \
11955 immtype = (CLASS); \
11956 elsize = (ELSIZE); \
11957 break; \
11958 }
11959
11960 unsigned int i, elsize = 0, idx = 0, n_elts;
11961 unsigned int innersize;
11962 unsigned char bytes[16];
11963 int immtype = -1, matches;
11964 unsigned int invmask = inverse ? 0xff : 0;
11965 bool vector = GET_CODE (op) == CONST_VECTOR;
11966
11967 if (vector)
11968 n_elts = CONST_VECTOR_NUNITS (op);
11969 else
11970 {
11971 n_elts = 1;
11972 gcc_assert (mode != VOIDmode);
11973 }
11974
11975 innersize = GET_MODE_UNIT_SIZE (mode);
11976
11977 /* Vectors of float constants. */
11978 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11979 {
11980 rtx el0 = CONST_VECTOR_ELT (op, 0);
11981
11982 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11983 return -1;
11984
11985 /* FP16 vectors cannot be represented. */
11986 if (GET_MODE_INNER (mode) == HFmode)
11987 return -1;
11988
11989 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11990 are distinct in this context. */
11991 if (!const_vec_duplicate_p (op))
11992 return -1;
11993
11994 if (modconst)
11995 *modconst = CONST_VECTOR_ELT (op, 0);
11996
11997 if (elementwidth)
11998 *elementwidth = 0;
11999
12000 if (el0 == CONST0_RTX (GET_MODE (el0)))
12001 return 19;
12002 else
12003 return 18;
12004 }
12005
12006 /* The tricks done in the code below apply for little-endian vector layout.
12007 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12008 FIXME: Implement logic for big-endian vectors. */
12009 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12010 return -1;
12011
12012 /* Splat vector constant out into a byte vector. */
12013 for (i = 0; i < n_elts; i++)
12014 {
12015 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12016 unsigned HOST_WIDE_INT elpart;
12017
12018 gcc_assert (CONST_INT_P (el));
12019 elpart = INTVAL (el);
12020
12021 for (unsigned int byte = 0; byte < innersize; byte++)
12022 {
12023 bytes[idx++] = (elpart & 0xff) ^ invmask;
12024 elpart >>= BITS_PER_UNIT;
12025 }
12026 }
12027
12028 /* Sanity check. */
12029 gcc_assert (idx == GET_MODE_SIZE (mode));
12030
12031 do
12032 {
12033 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12034 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12035
12036 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12037 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12038
12039 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12040 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12041
12042 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12043 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12044
12045 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12046
12047 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12048
12049 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12050 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12051
12052 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12053 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12054
12055 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12056 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12057
12058 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12059 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12060
12061 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12062
12063 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12064
12065 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12066 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12067
12068 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12069 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12070
12071 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12072 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12073
12074 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12075 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12076
12077 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12078
12079 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12080 && bytes[i] == bytes[(i + 8) % idx]);
12081 }
12082 while (0);
12083
12084 if (immtype == -1)
12085 return -1;
12086
12087 if (elementwidth)
12088 *elementwidth = elsize;
12089
12090 if (modconst)
12091 {
12092 unsigned HOST_WIDE_INT imm = 0;
12093
12094 /* Un-invert bytes of recognized vector, if necessary. */
12095 if (invmask != 0)
12096 for (i = 0; i < idx; i++)
12097 bytes[i] ^= invmask;
12098
12099 if (immtype == 17)
12100 {
12101 /* FIXME: Broken on 32-bit H_W_I hosts. */
12102 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12103
12104 for (i = 0; i < 8; i++)
12105 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12106 << (i * BITS_PER_UNIT);
12107
12108 *modconst = GEN_INT (imm);
12109 }
12110 else
12111 {
12112 unsigned HOST_WIDE_INT imm = 0;
12113
12114 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12115 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12116
12117 *modconst = GEN_INT (imm);
12118 }
12119 }
12120
12121 return immtype;
12122 #undef CHECK
12123 }
12124
12125 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12126 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12127 float elements), and a modified constant (whatever should be output for a
12128 VMOV) in *MODCONST. */
12129
12130 int
12131 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12132 rtx *modconst, int *elementwidth)
12133 {
12134 rtx tmpconst;
12135 int tmpwidth;
12136 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12137
12138 if (retval == -1)
12139 return 0;
12140
12141 if (modconst)
12142 *modconst = tmpconst;
12143
12144 if (elementwidth)
12145 *elementwidth = tmpwidth;
12146
12147 return 1;
12148 }
12149
12150 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12151 the immediate is valid, write a constant suitable for using as an operand
12152 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12153 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12154
12155 int
12156 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12157 rtx *modconst, int *elementwidth)
12158 {
12159 rtx tmpconst;
12160 int tmpwidth;
12161 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12162
12163 if (retval < 0 || retval > 5)
12164 return 0;
12165
12166 if (modconst)
12167 *modconst = tmpconst;
12168
12169 if (elementwidth)
12170 *elementwidth = tmpwidth;
12171
12172 return 1;
12173 }
12174
12175 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12176 the immediate is valid, write a constant suitable for using as an operand
12177 to VSHR/VSHL to *MODCONST and the corresponding element width to
12178 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12179 because they have different limitations. */
12180
12181 int
12182 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12183 rtx *modconst, int *elementwidth,
12184 bool isleftshift)
12185 {
12186 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12187 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12188 unsigned HOST_WIDE_INT last_elt = 0;
12189 unsigned HOST_WIDE_INT maxshift;
12190
12191 /* Split vector constant out into a byte vector. */
12192 for (i = 0; i < n_elts; i++)
12193 {
12194 rtx el = CONST_VECTOR_ELT (op, i);
12195 unsigned HOST_WIDE_INT elpart;
12196
12197 if (CONST_INT_P (el))
12198 elpart = INTVAL (el);
12199 else if (CONST_DOUBLE_P (el))
12200 return 0;
12201 else
12202 gcc_unreachable ();
12203
12204 if (i != 0 && elpart != last_elt)
12205 return 0;
12206
12207 last_elt = elpart;
12208 }
12209
12210 /* Shift less than element size. */
12211 maxshift = innersize * 8;
12212
12213 if (isleftshift)
12214 {
12215 /* Left shift immediate value can be from 0 to <size>-1. */
12216 if (last_elt >= maxshift)
12217 return 0;
12218 }
12219 else
12220 {
12221 /* Right shift immediate value can be from 1 to <size>. */
12222 if (last_elt == 0 || last_elt > maxshift)
12223 return 0;
12224 }
12225
12226 if (elementwidth)
12227 *elementwidth = innersize * 8;
12228
12229 if (modconst)
12230 *modconst = CONST_VECTOR_ELT (op, 0);
12231
12232 return 1;
12233 }
12234
12235 /* Return a string suitable for output of Neon immediate logic operation
12236 MNEM. */
12237
12238 char *
12239 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12240 int inverse, int quad)
12241 {
12242 int width, is_valid;
12243 static char templ[40];
12244
12245 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12246
12247 gcc_assert (is_valid != 0);
12248
12249 if (quad)
12250 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12251 else
12252 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12253
12254 return templ;
12255 }
12256
12257 /* Return a string suitable for output of Neon immediate shift operation
12258 (VSHR or VSHL) MNEM. */
12259
12260 char *
12261 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12262 machine_mode mode, int quad,
12263 bool isleftshift)
12264 {
12265 int width, is_valid;
12266 static char templ[40];
12267
12268 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12269 gcc_assert (is_valid != 0);
12270
12271 if (quad)
12272 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12273 else
12274 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12275
12276 return templ;
12277 }
12278
12279 /* Output a sequence of pairwise operations to implement a reduction.
12280 NOTE: We do "too much work" here, because pairwise operations work on two
12281 registers-worth of operands in one go. Unfortunately we can't exploit those
12282 extra calculations to do the full operation in fewer steps, I don't think.
12283 Although all vector elements of the result but the first are ignored, we
12284 actually calculate the same result in each of the elements. An alternative
12285 such as initially loading a vector with zero to use as each of the second
12286 operands would use up an additional register and take an extra instruction,
12287 for no particular gain. */
12288
12289 void
12290 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12291 rtx (*reduc) (rtx, rtx, rtx))
12292 {
12293 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12294 rtx tmpsum = op1;
12295
12296 for (i = parts / 2; i >= 1; i /= 2)
12297 {
12298 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12299 emit_insn (reduc (dest, tmpsum, tmpsum));
12300 tmpsum = dest;
12301 }
12302 }
12303
12304 /* If VALS is a vector constant that can be loaded into a register
12305 using VDUP, generate instructions to do so and return an RTX to
12306 assign to the register. Otherwise return NULL_RTX. */
12307
12308 static rtx
12309 neon_vdup_constant (rtx vals)
12310 {
12311 machine_mode mode = GET_MODE (vals);
12312 machine_mode inner_mode = GET_MODE_INNER (mode);
12313 rtx x;
12314
12315 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12316 return NULL_RTX;
12317
12318 if (!const_vec_duplicate_p (vals, &x))
12319 /* The elements are not all the same. We could handle repeating
12320 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12321 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12322 vdup.i16). */
12323 return NULL_RTX;
12324
12325 /* We can load this constant by using VDUP and a constant in a
12326 single ARM register. This will be cheaper than a vector
12327 load. */
12328
12329 x = copy_to_mode_reg (inner_mode, x);
12330 return gen_vec_duplicate (mode, x);
12331 }
12332
12333 /* Generate code to load VALS, which is a PARALLEL containing only
12334 constants (for vec_init) or CONST_VECTOR, efficiently into a
12335 register. Returns an RTX to copy into the register, or NULL_RTX
12336 for a PARALLEL that cannot be converted into a CONST_VECTOR. */
12337
12338 rtx
12339 neon_make_constant (rtx vals)
12340 {
12341 machine_mode mode = GET_MODE (vals);
12342 rtx target;
12343 rtx const_vec = NULL_RTX;
12344 int n_elts = GET_MODE_NUNITS (mode);
12345 int n_const = 0;
12346 int i;
12347
12348 if (GET_CODE (vals) == CONST_VECTOR)
12349 const_vec = vals;
12350 else if (GET_CODE (vals) == PARALLEL)
12351 {
12352 /* A CONST_VECTOR must contain only CONST_INTs and
12353 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12354 Only store valid constants in a CONST_VECTOR. */
12355 for (i = 0; i < n_elts; ++i)
12356 {
12357 rtx x = XVECEXP (vals, 0, i);
12358 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12359 n_const++;
12360 }
12361 if (n_const == n_elts)
12362 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12363 }
12364 else
12365 gcc_unreachable ();
12366
12367 if (const_vec != NULL
12368 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12369 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12370 return const_vec;
12371 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12372 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12373 pipeline cycle; creating the constant takes one or two ARM
12374 pipeline cycles. */
12375 return target;
12376 else if (const_vec != NULL_RTX)
12377 /* Load from constant pool. On Cortex-A8 this takes two cycles
12378 (for either double or quad vectors). We cannot take advantage
12379 of single-cycle VLD1 because we need a PC-relative addressing
12380 mode. */
12381 return const_vec;
12382 else
12383 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12384 We cannot construct an initializer. */
12385 return NULL_RTX;
12386 }
12387
12388 /* Initialize vector TARGET to VALS. */
12389
12390 void
12391 neon_expand_vector_init (rtx target, rtx vals)
12392 {
12393 machine_mode mode = GET_MODE (target);
12394 machine_mode inner_mode = GET_MODE_INNER (mode);
12395 int n_elts = GET_MODE_NUNITS (mode);
12396 int n_var = 0, one_var = -1;
12397 bool all_same = true;
12398 rtx x, mem;
12399 int i;
12400
12401 for (i = 0; i < n_elts; ++i)
12402 {
12403 x = XVECEXP (vals, 0, i);
12404 if (!CONSTANT_P (x))
12405 ++n_var, one_var = i;
12406
12407 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12408 all_same = false;
12409 }
12410
12411 if (n_var == 0)
12412 {
12413 rtx constant = neon_make_constant (vals);
12414 if (constant != NULL_RTX)
12415 {
12416 emit_move_insn (target, constant);
12417 return;
12418 }
12419 }
12420
12421 /* Splat a single non-constant element if we can. */
12422 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12423 {
12424 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12425 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12426 return;
12427 }
12428
12429 /* One field is non-constant. Load constant then overwrite varying
12430 field. This is more efficient than using the stack. */
12431 if (n_var == 1)
12432 {
12433 rtx copy = copy_rtx (vals);
12434 rtx merge_mask = GEN_INT (1 << one_var);
12435
12436 /* Load constant part of vector, substitute neighboring value for
12437 varying element. */
12438 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12439 neon_expand_vector_init (target, copy);
12440
12441 /* Insert variable. */
12442 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12443 emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
12444 return;
12445 }
12446
12447 /* Construct the vector in memory one field at a time
12448 and load the whole vector. */
12449 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12450 for (i = 0; i < n_elts; i++)
12451 emit_move_insn (adjust_address_nv (mem, inner_mode,
12452 i * GET_MODE_SIZE (inner_mode)),
12453 XVECEXP (vals, 0, i));
12454 emit_move_insn (target, mem);
12455 }
12456
12457 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12458 ERR if it doesn't. EXP indicates the source location, which includes the
12459 inlining history for intrinsics. */
12460
12461 static void
12462 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12463 const_tree exp, const char *desc)
12464 {
12465 HOST_WIDE_INT lane;
12466
12467 gcc_assert (CONST_INT_P (operand));
12468
12469 lane = INTVAL (operand);
12470
12471 if (lane < low || lane >= high)
12472 {
12473 if (exp)
12474 error ("%K%s %wd out of range %wd - %wd",
12475 exp, desc, lane, low, high - 1);
12476 else
12477 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12478 }
12479 }
12480
12481 /* Bounds-check lanes. */
12482
12483 void
12484 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12485 const_tree exp)
12486 {
12487 bounds_check (operand, low, high, exp, "lane");
12488 }
12489
12490 /* Bounds-check constants. */
12491
12492 void
12493 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12494 {
12495 bounds_check (operand, low, high, NULL_TREE, "constant");
12496 }
12497
12498 HOST_WIDE_INT
12499 neon_element_bits (machine_mode mode)
12500 {
12501 return GET_MODE_UNIT_BITSIZE (mode);
12502 }
12503
12504 \f
12505 /* Predicates for `match_operand' and `match_operator'. */
12506
12507 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12508 WB is true if full writeback address modes are allowed and is false
12509 if limited writeback address modes (POST_INC and PRE_DEC) are
12510 allowed. */
12511
12512 int
12513 arm_coproc_mem_operand (rtx op, bool wb)
12514 {
12515 rtx ind;
12516
12517 /* Reject eliminable registers. */
12518 if (! (reload_in_progress || reload_completed || lra_in_progress)
12519 && ( reg_mentioned_p (frame_pointer_rtx, op)
12520 || reg_mentioned_p (arg_pointer_rtx, op)
12521 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12522 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12523 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12524 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12525 return FALSE;
12526
12527 /* Constants are converted into offsets from labels. */
12528 if (!MEM_P (op))
12529 return FALSE;
12530
12531 ind = XEXP (op, 0);
12532
12533 if (reload_completed
12534 && (GET_CODE (ind) == LABEL_REF
12535 || (GET_CODE (ind) == CONST
12536 && GET_CODE (XEXP (ind, 0)) == PLUS
12537 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12538 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12539 return TRUE;
12540
12541 /* Match: (mem (reg)). */
12542 if (REG_P (ind))
12543 return arm_address_register_rtx_p (ind, 0);
12544
12545 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12546 acceptable in any case (subject to verification by
12547 arm_address_register_rtx_p). We need WB to be true to accept
12548 PRE_INC and POST_DEC. */
12549 if (GET_CODE (ind) == POST_INC
12550 || GET_CODE (ind) == PRE_DEC
12551 || (wb
12552 && (GET_CODE (ind) == PRE_INC
12553 || GET_CODE (ind) == POST_DEC)))
12554 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12555
12556 if (wb
12557 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12558 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12559 && GET_CODE (XEXP (ind, 1)) == PLUS
12560 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12561 ind = XEXP (ind, 1);
12562
12563 /* Match:
12564 (plus (reg)
12565 (const)). */
12566 if (GET_CODE (ind) == PLUS
12567 && REG_P (XEXP (ind, 0))
12568 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12569 && CONST_INT_P (XEXP (ind, 1))
12570 && INTVAL (XEXP (ind, 1)) > -1024
12571 && INTVAL (XEXP (ind, 1)) < 1024
12572 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12573 return TRUE;
12574
12575 return FALSE;
12576 }
12577
12578 /* Return TRUE if OP is a memory operand which we can load or store a vector
12579 to/from. TYPE is one of the following values:
12580 0 - Vector load/stor (vldr)
12581 1 - Core registers (ldm)
12582 2 - Element/structure loads (vld1)
12583 */
12584 int
12585 neon_vector_mem_operand (rtx op, int type, bool strict)
12586 {
12587 rtx ind;
12588
12589 /* Reject eliminable registers. */
12590 if (strict && ! (reload_in_progress || reload_completed)
12591 && (reg_mentioned_p (frame_pointer_rtx, op)
12592 || reg_mentioned_p (arg_pointer_rtx, op)
12593 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12594 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12595 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12596 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12597 return FALSE;
12598
12599 /* Constants are converted into offsets from labels. */
12600 if (!MEM_P (op))
12601 return FALSE;
12602
12603 ind = XEXP (op, 0);
12604
12605 if (reload_completed
12606 && (GET_CODE (ind) == LABEL_REF
12607 || (GET_CODE (ind) == CONST
12608 && GET_CODE (XEXP (ind, 0)) == PLUS
12609 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12610 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12611 return TRUE;
12612
12613 /* Match: (mem (reg)). */
12614 if (REG_P (ind))
12615 return arm_address_register_rtx_p (ind, 0);
12616
12617 /* Allow post-increment with Neon registers. */
12618 if ((type != 1 && GET_CODE (ind) == POST_INC)
12619 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12620 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12621
12622 /* Allow post-increment by register for VLDn */
12623 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12624 && GET_CODE (XEXP (ind, 1)) == PLUS
12625 && REG_P (XEXP (XEXP (ind, 1), 1)))
12626 return true;
12627
12628 /* Match:
12629 (plus (reg)
12630 (const)). */
12631 if (type == 0
12632 && GET_CODE (ind) == PLUS
12633 && REG_P (XEXP (ind, 0))
12634 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12635 && CONST_INT_P (XEXP (ind, 1))
12636 && INTVAL (XEXP (ind, 1)) > -1024
12637 /* For quad modes, we restrict the constant offset to be slightly less
12638 than what the instruction format permits. We have no such constraint
12639 on double mode offsets. (This must match arm_legitimate_index_p.) */
12640 && (INTVAL (XEXP (ind, 1))
12641 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12642 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12643 return TRUE;
12644
12645 return FALSE;
12646 }
12647
12648 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12649 type. */
12650 int
12651 neon_struct_mem_operand (rtx op)
12652 {
12653 rtx ind;
12654
12655 /* Reject eliminable registers. */
12656 if (! (reload_in_progress || reload_completed)
12657 && ( reg_mentioned_p (frame_pointer_rtx, op)
12658 || reg_mentioned_p (arg_pointer_rtx, op)
12659 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12660 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12661 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12662 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12663 return FALSE;
12664
12665 /* Constants are converted into offsets from labels. */
12666 if (!MEM_P (op))
12667 return FALSE;
12668
12669 ind = XEXP (op, 0);
12670
12671 if (reload_completed
12672 && (GET_CODE (ind) == LABEL_REF
12673 || (GET_CODE (ind) == CONST
12674 && GET_CODE (XEXP (ind, 0)) == PLUS
12675 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12676 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12677 return TRUE;
12678
12679 /* Match: (mem (reg)). */
12680 if (REG_P (ind))
12681 return arm_address_register_rtx_p (ind, 0);
12682
12683 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12684 if (GET_CODE (ind) == POST_INC
12685 || GET_CODE (ind) == PRE_DEC)
12686 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12687
12688 return FALSE;
12689 }
12690
12691 /* Prepares the operands for the VCMLA by lane instruction such that the right
12692 register number is selected. This instruction is special in that it always
12693 requires a D register, however there is a choice to be made between Dn[0],
12694 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
12695
12696 The VCMLA by lane function always selects two values. For instance given D0
12697 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
12698 used by the instruction. However given V4SF then index 0 and 1 are valid as
12699 D0[0] or D1[0] are both valid.
12700
12701 This function centralizes that information based on OPERANDS, OPERANDS[3]
12702 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
12703 updated to contain the right index. */
12704
12705 rtx *
12706 neon_vcmla_lane_prepare_operands (rtx *operands)
12707 {
12708 int lane = INTVAL (operands[4]);
12709 machine_mode constmode = SImode;
12710 machine_mode mode = GET_MODE (operands[3]);
12711 int regno = REGNO (operands[3]);
12712 regno = ((regno - FIRST_VFP_REGNUM) >> 1);
12713 if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
12714 {
12715 operands[3] = gen_int_mode (regno + 1, constmode);
12716 operands[4]
12717 = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
12718 }
12719 else
12720 {
12721 operands[3] = gen_int_mode (regno, constmode);
12722 operands[4] = gen_int_mode (lane, constmode);
12723 }
12724 return operands;
12725 }
12726
12727
12728 /* Return true if X is a register that will be eliminated later on. */
12729 int
12730 arm_eliminable_register (rtx x)
12731 {
12732 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12733 || REGNO (x) == ARG_POINTER_REGNUM
12734 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12735 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12736 }
12737
12738 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12739 coprocessor registers. Otherwise return NO_REGS. */
12740
12741 enum reg_class
12742 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12743 {
12744 if (mode == HFmode)
12745 {
12746 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12747 return GENERAL_REGS;
12748 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12749 return NO_REGS;
12750 return GENERAL_REGS;
12751 }
12752
12753 /* The neon move patterns handle all legitimate vector and struct
12754 addresses. */
12755 if (TARGET_NEON
12756 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12757 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12758 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12759 || VALID_NEON_STRUCT_MODE (mode)))
12760 return NO_REGS;
12761
12762 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12763 return NO_REGS;
12764
12765 return GENERAL_REGS;
12766 }
12767
12768 /* Values which must be returned in the most-significant end of the return
12769 register. */
12770
12771 static bool
12772 arm_return_in_msb (const_tree valtype)
12773 {
12774 return (TARGET_AAPCS_BASED
12775 && BYTES_BIG_ENDIAN
12776 && (AGGREGATE_TYPE_P (valtype)
12777 || TREE_CODE (valtype) == COMPLEX_TYPE
12778 || FIXED_POINT_TYPE_P (valtype)));
12779 }
12780
12781 /* Return TRUE if X references a SYMBOL_REF. */
12782 int
12783 symbol_mentioned_p (rtx x)
12784 {
12785 const char * fmt;
12786 int i;
12787
12788 if (GET_CODE (x) == SYMBOL_REF)
12789 return 1;
12790
12791 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12792 are constant offsets, not symbols. */
12793 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12794 return 0;
12795
12796 fmt = GET_RTX_FORMAT (GET_CODE (x));
12797
12798 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12799 {
12800 if (fmt[i] == 'E')
12801 {
12802 int j;
12803
12804 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12805 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12806 return 1;
12807 }
12808 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12809 return 1;
12810 }
12811
12812 return 0;
12813 }
12814
12815 /* Return TRUE if X references a LABEL_REF. */
12816 int
12817 label_mentioned_p (rtx x)
12818 {
12819 const char * fmt;
12820 int i;
12821
12822 if (GET_CODE (x) == LABEL_REF)
12823 return 1;
12824
12825 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12826 instruction, but they are constant offsets, not symbols. */
12827 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12828 return 0;
12829
12830 fmt = GET_RTX_FORMAT (GET_CODE (x));
12831 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12832 {
12833 if (fmt[i] == 'E')
12834 {
12835 int j;
12836
12837 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12838 if (label_mentioned_p (XVECEXP (x, i, j)))
12839 return 1;
12840 }
12841 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12842 return 1;
12843 }
12844
12845 return 0;
12846 }
12847
12848 int
12849 tls_mentioned_p (rtx x)
12850 {
12851 switch (GET_CODE (x))
12852 {
12853 case CONST:
12854 return tls_mentioned_p (XEXP (x, 0));
12855
12856 case UNSPEC:
12857 if (XINT (x, 1) == UNSPEC_TLS)
12858 return 1;
12859
12860 /* Fall through. */
12861 default:
12862 return 0;
12863 }
12864 }
12865
12866 /* Must not copy any rtx that uses a pc-relative address.
12867 Also, disallow copying of load-exclusive instructions that
12868 may appear after splitting of compare-and-swap-style operations
12869 so as to prevent those loops from being transformed away from their
12870 canonical forms (see PR 69904). */
12871
12872 static bool
12873 arm_cannot_copy_insn_p (rtx_insn *insn)
12874 {
12875 /* The tls call insn cannot be copied, as it is paired with a data
12876 word. */
12877 if (recog_memoized (insn) == CODE_FOR_tlscall)
12878 return true;
12879
12880 subrtx_iterator::array_type array;
12881 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12882 {
12883 const_rtx x = *iter;
12884 if (GET_CODE (x) == UNSPEC
12885 && (XINT (x, 1) == UNSPEC_PIC_BASE
12886 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12887 return true;
12888 }
12889
12890 rtx set = single_set (insn);
12891 if (set)
12892 {
12893 rtx src = SET_SRC (set);
12894 if (GET_CODE (src) == ZERO_EXTEND)
12895 src = XEXP (src, 0);
12896
12897 /* Catch the load-exclusive and load-acquire operations. */
12898 if (GET_CODE (src) == UNSPEC_VOLATILE
12899 && (XINT (src, 1) == VUNSPEC_LL
12900 || XINT (src, 1) == VUNSPEC_LAX))
12901 return true;
12902 }
12903 return false;
12904 }
12905
12906 enum rtx_code
12907 minmax_code (rtx x)
12908 {
12909 enum rtx_code code = GET_CODE (x);
12910
12911 switch (code)
12912 {
12913 case SMAX:
12914 return GE;
12915 case SMIN:
12916 return LE;
12917 case UMIN:
12918 return LEU;
12919 case UMAX:
12920 return GEU;
12921 default:
12922 gcc_unreachable ();
12923 }
12924 }
12925
12926 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12927
12928 bool
12929 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12930 int *mask, bool *signed_sat)
12931 {
12932 /* The high bound must be a power of two minus one. */
12933 int log = exact_log2 (INTVAL (hi_bound) + 1);
12934 if (log == -1)
12935 return false;
12936
12937 /* The low bound is either zero (for usat) or one less than the
12938 negation of the high bound (for ssat). */
12939 if (INTVAL (lo_bound) == 0)
12940 {
12941 if (mask)
12942 *mask = log;
12943 if (signed_sat)
12944 *signed_sat = false;
12945
12946 return true;
12947 }
12948
12949 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12950 {
12951 if (mask)
12952 *mask = log + 1;
12953 if (signed_sat)
12954 *signed_sat = true;
12955
12956 return true;
12957 }
12958
12959 return false;
12960 }
12961
12962 /* Return 1 if memory locations are adjacent. */
12963 int
12964 adjacent_mem_locations (rtx a, rtx b)
12965 {
12966 /* We don't guarantee to preserve the order of these memory refs. */
12967 if (volatile_refs_p (a) || volatile_refs_p (b))
12968 return 0;
12969
12970 if ((REG_P (XEXP (a, 0))
12971 || (GET_CODE (XEXP (a, 0)) == PLUS
12972 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12973 && (REG_P (XEXP (b, 0))
12974 || (GET_CODE (XEXP (b, 0)) == PLUS
12975 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12976 {
12977 HOST_WIDE_INT val0 = 0, val1 = 0;
12978 rtx reg0, reg1;
12979 int val_diff;
12980
12981 if (GET_CODE (XEXP (a, 0)) == PLUS)
12982 {
12983 reg0 = XEXP (XEXP (a, 0), 0);
12984 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12985 }
12986 else
12987 reg0 = XEXP (a, 0);
12988
12989 if (GET_CODE (XEXP (b, 0)) == PLUS)
12990 {
12991 reg1 = XEXP (XEXP (b, 0), 0);
12992 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12993 }
12994 else
12995 reg1 = XEXP (b, 0);
12996
12997 /* Don't accept any offset that will require multiple
12998 instructions to handle, since this would cause the
12999 arith_adjacentmem pattern to output an overlong sequence. */
13000 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13001 return 0;
13002
13003 /* Don't allow an eliminable register: register elimination can make
13004 the offset too large. */
13005 if (arm_eliminable_register (reg0))
13006 return 0;
13007
13008 val_diff = val1 - val0;
13009
13010 if (arm_ld_sched)
13011 {
13012 /* If the target has load delay slots, then there's no benefit
13013 to using an ldm instruction unless the offset is zero and
13014 we are optimizing for size. */
13015 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13016 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13017 && (val_diff == 4 || val_diff == -4));
13018 }
13019
13020 return ((REGNO (reg0) == REGNO (reg1))
13021 && (val_diff == 4 || val_diff == -4));
13022 }
13023
13024 return 0;
13025 }
13026
13027 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13028 for load operations, false for store operations. CONSECUTIVE is true
13029 if the register numbers in the operation must be consecutive in the register
13030 bank. RETURN_PC is true if value is to be loaded in PC.
13031 The pattern we are trying to match for load is:
13032 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13033 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13034 :
13035 :
13036 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13037 ]
13038 where
13039 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13040 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13041 3. If consecutive is TRUE, then for kth register being loaded,
13042 REGNO (R_dk) = REGNO (R_d0) + k.
13043 The pattern for store is similar. */
13044 bool
13045 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13046 bool consecutive, bool return_pc)
13047 {
13048 HOST_WIDE_INT count = XVECLEN (op, 0);
13049 rtx reg, mem, addr;
13050 unsigned regno;
13051 unsigned first_regno;
13052 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13053 rtx elt;
13054 bool addr_reg_in_reglist = false;
13055 bool update = false;
13056 int reg_increment;
13057 int offset_adj;
13058 int regs_per_val;
13059
13060 /* If not in SImode, then registers must be consecutive
13061 (e.g., VLDM instructions for DFmode). */
13062 gcc_assert ((mode == SImode) || consecutive);
13063 /* Setting return_pc for stores is illegal. */
13064 gcc_assert (!return_pc || load);
13065
13066 /* Set up the increments and the regs per val based on the mode. */
13067 reg_increment = GET_MODE_SIZE (mode);
13068 regs_per_val = reg_increment / 4;
13069 offset_adj = return_pc ? 1 : 0;
13070
13071 if (count <= 1
13072 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13073 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13074 return false;
13075
13076 /* Check if this is a write-back. */
13077 elt = XVECEXP (op, 0, offset_adj);
13078 if (GET_CODE (SET_SRC (elt)) == PLUS)
13079 {
13080 i++;
13081 base = 1;
13082 update = true;
13083
13084 /* The offset adjustment must be the number of registers being
13085 popped times the size of a single register. */
13086 if (!REG_P (SET_DEST (elt))
13087 || !REG_P (XEXP (SET_SRC (elt), 0))
13088 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13089 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13090 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13091 ((count - 1 - offset_adj) * reg_increment))
13092 return false;
13093 }
13094
13095 i = i + offset_adj;
13096 base = base + offset_adj;
13097 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13098 success depends on the type: VLDM can do just one reg,
13099 LDM must do at least two. */
13100 if ((count <= i) && (mode == SImode))
13101 return false;
13102
13103 elt = XVECEXP (op, 0, i - 1);
13104 if (GET_CODE (elt) != SET)
13105 return false;
13106
13107 if (load)
13108 {
13109 reg = SET_DEST (elt);
13110 mem = SET_SRC (elt);
13111 }
13112 else
13113 {
13114 reg = SET_SRC (elt);
13115 mem = SET_DEST (elt);
13116 }
13117
13118 if (!REG_P (reg) || !MEM_P (mem))
13119 return false;
13120
13121 regno = REGNO (reg);
13122 first_regno = regno;
13123 addr = XEXP (mem, 0);
13124 if (GET_CODE (addr) == PLUS)
13125 {
13126 if (!CONST_INT_P (XEXP (addr, 1)))
13127 return false;
13128
13129 offset = INTVAL (XEXP (addr, 1));
13130 addr = XEXP (addr, 0);
13131 }
13132
13133 if (!REG_P (addr))
13134 return false;
13135
13136 /* Don't allow SP to be loaded unless it is also the base register. It
13137 guarantees that SP is reset correctly when an LDM instruction
13138 is interrupted. Otherwise, we might end up with a corrupt stack. */
13139 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13140 return false;
13141
13142 if (regno == REGNO (addr))
13143 addr_reg_in_reglist = true;
13144
13145 for (; i < count; i++)
13146 {
13147 elt = XVECEXP (op, 0, i);
13148 if (GET_CODE (elt) != SET)
13149 return false;
13150
13151 if (load)
13152 {
13153 reg = SET_DEST (elt);
13154 mem = SET_SRC (elt);
13155 }
13156 else
13157 {
13158 reg = SET_SRC (elt);
13159 mem = SET_DEST (elt);
13160 }
13161
13162 if (!REG_P (reg)
13163 || GET_MODE (reg) != mode
13164 || REGNO (reg) <= regno
13165 || (consecutive
13166 && (REGNO (reg) !=
13167 (unsigned int) (first_regno + regs_per_val * (i - base))))
13168 /* Don't allow SP to be loaded unless it is also the base register. It
13169 guarantees that SP is reset correctly when an LDM instruction
13170 is interrupted. Otherwise, we might end up with a corrupt stack. */
13171 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13172 || !MEM_P (mem)
13173 || GET_MODE (mem) != mode
13174 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13175 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13176 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13177 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13178 offset + (i - base) * reg_increment))
13179 && (!REG_P (XEXP (mem, 0))
13180 || offset + (i - base) * reg_increment != 0)))
13181 return false;
13182
13183 regno = REGNO (reg);
13184 if (regno == REGNO (addr))
13185 addr_reg_in_reglist = true;
13186 }
13187
13188 if (load)
13189 {
13190 if (update && addr_reg_in_reglist)
13191 return false;
13192
13193 /* For Thumb-1, address register is always modified - either by write-back
13194 or by explicit load. If the pattern does not describe an update,
13195 then the address register must be in the list of loaded registers. */
13196 if (TARGET_THUMB1)
13197 return update || addr_reg_in_reglist;
13198 }
13199
13200 return true;
13201 }
13202
13203 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13204 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13205 instruction. ADD_OFFSET is nonzero if the base address register needs
13206 to be modified with an add instruction before we can use it. */
13207
13208 static bool
13209 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13210 int nops, HOST_WIDE_INT add_offset)
13211 {
13212 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13213 if the offset isn't small enough. The reason 2 ldrs are faster
13214 is because these ARMs are able to do more than one cache access
13215 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13216 whilst the ARM8 has a double bandwidth cache. This means that
13217 these cores can do both an instruction fetch and a data fetch in
13218 a single cycle, so the trick of calculating the address into a
13219 scratch register (one of the result regs) and then doing a load
13220 multiple actually becomes slower (and no smaller in code size).
13221 That is the transformation
13222
13223 ldr rd1, [rbase + offset]
13224 ldr rd2, [rbase + offset + 4]
13225
13226 to
13227
13228 add rd1, rbase, offset
13229 ldmia rd1, {rd1, rd2}
13230
13231 produces worse code -- '3 cycles + any stalls on rd2' instead of
13232 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13233 access per cycle, the first sequence could never complete in less
13234 than 6 cycles, whereas the ldm sequence would only take 5 and
13235 would make better use of sequential accesses if not hitting the
13236 cache.
13237
13238 We cheat here and test 'arm_ld_sched' which we currently know to
13239 only be true for the ARM8, ARM9 and StrongARM. If this ever
13240 changes, then the test below needs to be reworked. */
13241 if (nops == 2 && arm_ld_sched && add_offset != 0)
13242 return false;
13243
13244 /* XScale has load-store double instructions, but they have stricter
13245 alignment requirements than load-store multiple, so we cannot
13246 use them.
13247
13248 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13249 the pipeline until completion.
13250
13251 NREGS CYCLES
13252 1 3
13253 2 4
13254 3 5
13255 4 6
13256
13257 An ldr instruction takes 1-3 cycles, but does not block the
13258 pipeline.
13259
13260 NREGS CYCLES
13261 1 1-3
13262 2 2-6
13263 3 3-9
13264 4 4-12
13265
13266 Best case ldr will always win. However, the more ldr instructions
13267 we issue, the less likely we are to be able to schedule them well.
13268 Using ldr instructions also increases code size.
13269
13270 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13271 for counts of 3 or 4 regs. */
13272 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13273 return false;
13274 return true;
13275 }
13276
13277 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13278 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13279 an array ORDER which describes the sequence to use when accessing the
13280 offsets that produces an ascending order. In this sequence, each
13281 offset must be larger by exactly 4 than the previous one. ORDER[0]
13282 must have been filled in with the lowest offset by the caller.
13283 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13284 we use to verify that ORDER produces an ascending order of registers.
13285 Return true if it was possible to construct such an order, false if
13286 not. */
13287
13288 static bool
13289 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13290 int *unsorted_regs)
13291 {
13292 int i;
13293 for (i = 1; i < nops; i++)
13294 {
13295 int j;
13296
13297 order[i] = order[i - 1];
13298 for (j = 0; j < nops; j++)
13299 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13300 {
13301 /* We must find exactly one offset that is higher than the
13302 previous one by 4. */
13303 if (order[i] != order[i - 1])
13304 return false;
13305 order[i] = j;
13306 }
13307 if (order[i] == order[i - 1])
13308 return false;
13309 /* The register numbers must be ascending. */
13310 if (unsorted_regs != NULL
13311 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13312 return false;
13313 }
13314 return true;
13315 }
13316
13317 /* Used to determine in a peephole whether a sequence of load
13318 instructions can be changed into a load-multiple instruction.
13319 NOPS is the number of separate load instructions we are examining. The
13320 first NOPS entries in OPERANDS are the destination registers, the
13321 next NOPS entries are memory operands. If this function is
13322 successful, *BASE is set to the common base register of the memory
13323 accesses; *LOAD_OFFSET is set to the first memory location's offset
13324 from that base register.
13325 REGS is an array filled in with the destination register numbers.
13326 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13327 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13328 the sequence of registers in REGS matches the loads from ascending memory
13329 locations, and the function verifies that the register numbers are
13330 themselves ascending. If CHECK_REGS is false, the register numbers
13331 are stored in the order they are found in the operands. */
13332 static int
13333 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13334 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13335 {
13336 int unsorted_regs[MAX_LDM_STM_OPS];
13337 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13338 int order[MAX_LDM_STM_OPS];
13339 int base_reg = -1;
13340 int i, ldm_case;
13341
13342 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13343 easily extended if required. */
13344 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13345
13346 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13347
13348 /* Loop over the operands and check that the memory references are
13349 suitable (i.e. immediate offsets from the same base register). At
13350 the same time, extract the target register, and the memory
13351 offsets. */
13352 for (i = 0; i < nops; i++)
13353 {
13354 rtx reg;
13355 rtx offset;
13356
13357 /* Convert a subreg of a mem into the mem itself. */
13358 if (GET_CODE (operands[nops + i]) == SUBREG)
13359 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13360
13361 gcc_assert (MEM_P (operands[nops + i]));
13362
13363 /* Don't reorder volatile memory references; it doesn't seem worth
13364 looking for the case where the order is ok anyway. */
13365 if (MEM_VOLATILE_P (operands[nops + i]))
13366 return 0;
13367
13368 offset = const0_rtx;
13369
13370 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13371 || (GET_CODE (reg) == SUBREG
13372 && REG_P (reg = SUBREG_REG (reg))))
13373 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13374 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13375 || (GET_CODE (reg) == SUBREG
13376 && REG_P (reg = SUBREG_REG (reg))))
13377 && (CONST_INT_P (offset
13378 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13379 {
13380 if (i == 0)
13381 {
13382 base_reg = REGNO (reg);
13383 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13384 return 0;
13385 }
13386 else if (base_reg != (int) REGNO (reg))
13387 /* Not addressed from the same base register. */
13388 return 0;
13389
13390 unsorted_regs[i] = (REG_P (operands[i])
13391 ? REGNO (operands[i])
13392 : REGNO (SUBREG_REG (operands[i])));
13393
13394 /* If it isn't an integer register, or if it overwrites the
13395 base register but isn't the last insn in the list, then
13396 we can't do this. */
13397 if (unsorted_regs[i] < 0
13398 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13399 || unsorted_regs[i] > 14
13400 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13401 return 0;
13402
13403 /* Don't allow SP to be loaded unless it is also the base
13404 register. It guarantees that SP is reset correctly when
13405 an LDM instruction is interrupted. Otherwise, we might
13406 end up with a corrupt stack. */
13407 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13408 return 0;
13409
13410 unsorted_offsets[i] = INTVAL (offset);
13411 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13412 order[0] = i;
13413 }
13414 else
13415 /* Not a suitable memory address. */
13416 return 0;
13417 }
13418
13419 /* All the useful information has now been extracted from the
13420 operands into unsorted_regs and unsorted_offsets; additionally,
13421 order[0] has been set to the lowest offset in the list. Sort
13422 the offsets into order, verifying that they are adjacent, and
13423 check that the register numbers are ascending. */
13424 if (!compute_offset_order (nops, unsorted_offsets, order,
13425 check_regs ? unsorted_regs : NULL))
13426 return 0;
13427
13428 if (saved_order)
13429 memcpy (saved_order, order, sizeof order);
13430
13431 if (base)
13432 {
13433 *base = base_reg;
13434
13435 for (i = 0; i < nops; i++)
13436 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13437
13438 *load_offset = unsorted_offsets[order[0]];
13439 }
13440
13441 if (unsorted_offsets[order[0]] == 0)
13442 ldm_case = 1; /* ldmia */
13443 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13444 ldm_case = 2; /* ldmib */
13445 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13446 ldm_case = 3; /* ldmda */
13447 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13448 ldm_case = 4; /* ldmdb */
13449 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13450 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13451 ldm_case = 5;
13452 else
13453 return 0;
13454
13455 if (!multiple_operation_profitable_p (false, nops,
13456 ldm_case == 5
13457 ? unsorted_offsets[order[0]] : 0))
13458 return 0;
13459
13460 return ldm_case;
13461 }
13462
13463 /* Used to determine in a peephole whether a sequence of store instructions can
13464 be changed into a store-multiple instruction.
13465 NOPS is the number of separate store instructions we are examining.
13466 NOPS_TOTAL is the total number of instructions recognized by the peephole
13467 pattern.
13468 The first NOPS entries in OPERANDS are the source registers, the next
13469 NOPS entries are memory operands. If this function is successful, *BASE is
13470 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13471 to the first memory location's offset from that base register. REGS is an
13472 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13473 likewise filled with the corresponding rtx's.
13474 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13475 numbers to an ascending order of stores.
13476 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13477 from ascending memory locations, and the function verifies that the register
13478 numbers are themselves ascending. If CHECK_REGS is false, the register
13479 numbers are stored in the order they are found in the operands. */
13480 static int
13481 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13482 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13483 HOST_WIDE_INT *load_offset, bool check_regs)
13484 {
13485 int unsorted_regs[MAX_LDM_STM_OPS];
13486 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13487 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13488 int order[MAX_LDM_STM_OPS];
13489 int base_reg = -1;
13490 rtx base_reg_rtx = NULL;
13491 int i, stm_case;
13492
13493 /* Write back of base register is currently only supported for Thumb 1. */
13494 int base_writeback = TARGET_THUMB1;
13495
13496 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13497 easily extended if required. */
13498 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13499
13500 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13501
13502 /* Loop over the operands and check that the memory references are
13503 suitable (i.e. immediate offsets from the same base register). At
13504 the same time, extract the target register, and the memory
13505 offsets. */
13506 for (i = 0; i < nops; i++)
13507 {
13508 rtx reg;
13509 rtx offset;
13510
13511 /* Convert a subreg of a mem into the mem itself. */
13512 if (GET_CODE (operands[nops + i]) == SUBREG)
13513 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13514
13515 gcc_assert (MEM_P (operands[nops + i]));
13516
13517 /* Don't reorder volatile memory references; it doesn't seem worth
13518 looking for the case where the order is ok anyway. */
13519 if (MEM_VOLATILE_P (operands[nops + i]))
13520 return 0;
13521
13522 offset = const0_rtx;
13523
13524 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13525 || (GET_CODE (reg) == SUBREG
13526 && REG_P (reg = SUBREG_REG (reg))))
13527 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13528 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13529 || (GET_CODE (reg) == SUBREG
13530 && REG_P (reg = SUBREG_REG (reg))))
13531 && (CONST_INT_P (offset
13532 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13533 {
13534 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13535 ? operands[i] : SUBREG_REG (operands[i]));
13536 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13537
13538 if (i == 0)
13539 {
13540 base_reg = REGNO (reg);
13541 base_reg_rtx = reg;
13542 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13543 return 0;
13544 }
13545 else if (base_reg != (int) REGNO (reg))
13546 /* Not addressed from the same base register. */
13547 return 0;
13548
13549 /* If it isn't an integer register, then we can't do this. */
13550 if (unsorted_regs[i] < 0
13551 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13552 /* The effects are unpredictable if the base register is
13553 both updated and stored. */
13554 || (base_writeback && unsorted_regs[i] == base_reg)
13555 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13556 || unsorted_regs[i] > 14)
13557 return 0;
13558
13559 unsorted_offsets[i] = INTVAL (offset);
13560 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13561 order[0] = i;
13562 }
13563 else
13564 /* Not a suitable memory address. */
13565 return 0;
13566 }
13567
13568 /* All the useful information has now been extracted from the
13569 operands into unsorted_regs and unsorted_offsets; additionally,
13570 order[0] has been set to the lowest offset in the list. Sort
13571 the offsets into order, verifying that they are adjacent, and
13572 check that the register numbers are ascending. */
13573 if (!compute_offset_order (nops, unsorted_offsets, order,
13574 check_regs ? unsorted_regs : NULL))
13575 return 0;
13576
13577 if (saved_order)
13578 memcpy (saved_order, order, sizeof order);
13579
13580 if (base)
13581 {
13582 *base = base_reg;
13583
13584 for (i = 0; i < nops; i++)
13585 {
13586 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13587 if (reg_rtxs)
13588 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13589 }
13590
13591 *load_offset = unsorted_offsets[order[0]];
13592 }
13593
13594 if (TARGET_THUMB1
13595 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13596 return 0;
13597
13598 if (unsorted_offsets[order[0]] == 0)
13599 stm_case = 1; /* stmia */
13600 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13601 stm_case = 2; /* stmib */
13602 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13603 stm_case = 3; /* stmda */
13604 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13605 stm_case = 4; /* stmdb */
13606 else
13607 return 0;
13608
13609 if (!multiple_operation_profitable_p (false, nops, 0))
13610 return 0;
13611
13612 return stm_case;
13613 }
13614 \f
13615 /* Routines for use in generating RTL. */
13616
13617 /* Generate a load-multiple instruction. COUNT is the number of loads in
13618 the instruction; REGS and MEMS are arrays containing the operands.
13619 BASEREG is the base register to be used in addressing the memory operands.
13620 WBACK_OFFSET is nonzero if the instruction should update the base
13621 register. */
13622
13623 static rtx
13624 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13625 HOST_WIDE_INT wback_offset)
13626 {
13627 int i = 0, j;
13628 rtx result;
13629
13630 if (!multiple_operation_profitable_p (false, count, 0))
13631 {
13632 rtx seq;
13633
13634 start_sequence ();
13635
13636 for (i = 0; i < count; i++)
13637 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13638
13639 if (wback_offset != 0)
13640 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13641
13642 seq = get_insns ();
13643 end_sequence ();
13644
13645 return seq;
13646 }
13647
13648 result = gen_rtx_PARALLEL (VOIDmode,
13649 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13650 if (wback_offset != 0)
13651 {
13652 XVECEXP (result, 0, 0)
13653 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13654 i = 1;
13655 count++;
13656 }
13657
13658 for (j = 0; i < count; i++, j++)
13659 XVECEXP (result, 0, i)
13660 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13661
13662 return result;
13663 }
13664
13665 /* Generate a store-multiple instruction. COUNT is the number of stores in
13666 the instruction; REGS and MEMS are arrays containing the operands.
13667 BASEREG is the base register to be used in addressing the memory operands.
13668 WBACK_OFFSET is nonzero if the instruction should update the base
13669 register. */
13670
13671 static rtx
13672 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13673 HOST_WIDE_INT wback_offset)
13674 {
13675 int i = 0, j;
13676 rtx result;
13677
13678 if (GET_CODE (basereg) == PLUS)
13679 basereg = XEXP (basereg, 0);
13680
13681 if (!multiple_operation_profitable_p (false, count, 0))
13682 {
13683 rtx seq;
13684
13685 start_sequence ();
13686
13687 for (i = 0; i < count; i++)
13688 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13689
13690 if (wback_offset != 0)
13691 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13692
13693 seq = get_insns ();
13694 end_sequence ();
13695
13696 return seq;
13697 }
13698
13699 result = gen_rtx_PARALLEL (VOIDmode,
13700 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13701 if (wback_offset != 0)
13702 {
13703 XVECEXP (result, 0, 0)
13704 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13705 i = 1;
13706 count++;
13707 }
13708
13709 for (j = 0; i < count; i++, j++)
13710 XVECEXP (result, 0, i)
13711 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13712
13713 return result;
13714 }
13715
13716 /* Generate either a load-multiple or a store-multiple instruction. This
13717 function can be used in situations where we can start with a single MEM
13718 rtx and adjust its address upwards.
13719 COUNT is the number of operations in the instruction, not counting a
13720 possible update of the base register. REGS is an array containing the
13721 register operands.
13722 BASEREG is the base register to be used in addressing the memory operands,
13723 which are constructed from BASEMEM.
13724 WRITE_BACK specifies whether the generated instruction should include an
13725 update of the base register.
13726 OFFSETP is used to pass an offset to and from this function; this offset
13727 is not used when constructing the address (instead BASEMEM should have an
13728 appropriate offset in its address), it is used only for setting
13729 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13730
13731 static rtx
13732 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13733 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13734 {
13735 rtx mems[MAX_LDM_STM_OPS];
13736 HOST_WIDE_INT offset = *offsetp;
13737 int i;
13738
13739 gcc_assert (count <= MAX_LDM_STM_OPS);
13740
13741 if (GET_CODE (basereg) == PLUS)
13742 basereg = XEXP (basereg, 0);
13743
13744 for (i = 0; i < count; i++)
13745 {
13746 rtx addr = plus_constant (Pmode, basereg, i * 4);
13747 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13748 offset += 4;
13749 }
13750
13751 if (write_back)
13752 *offsetp = offset;
13753
13754 if (is_load)
13755 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13756 write_back ? 4 * count : 0);
13757 else
13758 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13759 write_back ? 4 * count : 0);
13760 }
13761
13762 rtx
13763 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13764 rtx basemem, HOST_WIDE_INT *offsetp)
13765 {
13766 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13767 offsetp);
13768 }
13769
13770 rtx
13771 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13772 rtx basemem, HOST_WIDE_INT *offsetp)
13773 {
13774 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13775 offsetp);
13776 }
13777
13778 /* Called from a peephole2 expander to turn a sequence of loads into an
13779 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13780 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13781 is true if we can reorder the registers because they are used commutatively
13782 subsequently.
13783 Returns true iff we could generate a new instruction. */
13784
13785 bool
13786 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13787 {
13788 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13789 rtx mems[MAX_LDM_STM_OPS];
13790 int i, j, base_reg;
13791 rtx base_reg_rtx;
13792 HOST_WIDE_INT offset;
13793 int write_back = FALSE;
13794 int ldm_case;
13795 rtx addr;
13796
13797 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13798 &base_reg, &offset, !sort_regs);
13799
13800 if (ldm_case == 0)
13801 return false;
13802
13803 if (sort_regs)
13804 for (i = 0; i < nops - 1; i++)
13805 for (j = i + 1; j < nops; j++)
13806 if (regs[i] > regs[j])
13807 {
13808 int t = regs[i];
13809 regs[i] = regs[j];
13810 regs[j] = t;
13811 }
13812 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13813
13814 if (TARGET_THUMB1)
13815 {
13816 gcc_assert (ldm_case == 1 || ldm_case == 5);
13817
13818 /* Thumb-1 ldm uses writeback except if the base is loaded. */
13819 write_back = true;
13820 for (i = 0; i < nops; i++)
13821 if (base_reg == regs[i])
13822 write_back = false;
13823
13824 /* Ensure the base is dead if it is updated. */
13825 if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
13826 return false;
13827 }
13828
13829 if (ldm_case == 5)
13830 {
13831 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13832 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13833 offset = 0;
13834 base_reg_rtx = newbase;
13835 }
13836
13837 for (i = 0; i < nops; i++)
13838 {
13839 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13840 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13841 SImode, addr, 0);
13842 }
13843 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13844 write_back ? offset + i * 4 : 0));
13845 return true;
13846 }
13847
13848 /* Called from a peephole2 expander to turn a sequence of stores into an
13849 STM instruction. OPERANDS are the operands found by the peephole matcher;
13850 NOPS indicates how many separate stores we are trying to combine.
13851 Returns true iff we could generate a new instruction. */
13852
13853 bool
13854 gen_stm_seq (rtx *operands, int nops)
13855 {
13856 int i;
13857 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13858 rtx mems[MAX_LDM_STM_OPS];
13859 int base_reg;
13860 rtx base_reg_rtx;
13861 HOST_WIDE_INT offset;
13862 int write_back = FALSE;
13863 int stm_case;
13864 rtx addr;
13865 bool base_reg_dies;
13866
13867 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13868 mem_order, &base_reg, &offset, true);
13869
13870 if (stm_case == 0)
13871 return false;
13872
13873 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13874
13875 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13876 if (TARGET_THUMB1)
13877 {
13878 gcc_assert (base_reg_dies);
13879 write_back = TRUE;
13880 }
13881
13882 if (stm_case == 5)
13883 {
13884 gcc_assert (base_reg_dies);
13885 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13886 offset = 0;
13887 }
13888
13889 addr = plus_constant (Pmode, base_reg_rtx, offset);
13890
13891 for (i = 0; i < nops; i++)
13892 {
13893 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13894 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13895 SImode, addr, 0);
13896 }
13897 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13898 write_back ? offset + i * 4 : 0));
13899 return true;
13900 }
13901
13902 /* Called from a peephole2 expander to turn a sequence of stores that are
13903 preceded by constant loads into an STM instruction. OPERANDS are the
13904 operands found by the peephole matcher; NOPS indicates how many
13905 separate stores we are trying to combine; there are 2 * NOPS
13906 instructions in the peephole.
13907 Returns true iff we could generate a new instruction. */
13908
13909 bool
13910 gen_const_stm_seq (rtx *operands, int nops)
13911 {
13912 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13913 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13914 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13915 rtx mems[MAX_LDM_STM_OPS];
13916 int base_reg;
13917 rtx base_reg_rtx;
13918 HOST_WIDE_INT offset;
13919 int write_back = FALSE;
13920 int stm_case;
13921 rtx addr;
13922 bool base_reg_dies;
13923 int i, j;
13924 HARD_REG_SET allocated;
13925
13926 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13927 mem_order, &base_reg, &offset, false);
13928
13929 if (stm_case == 0)
13930 return false;
13931
13932 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13933
13934 /* If the same register is used more than once, try to find a free
13935 register. */
13936 CLEAR_HARD_REG_SET (allocated);
13937 for (i = 0; i < nops; i++)
13938 {
13939 for (j = i + 1; j < nops; j++)
13940 if (regs[i] == regs[j])
13941 {
13942 rtx t = peep2_find_free_register (0, nops * 2,
13943 TARGET_THUMB1 ? "l" : "r",
13944 SImode, &allocated);
13945 if (t == NULL_RTX)
13946 return false;
13947 reg_rtxs[i] = t;
13948 regs[i] = REGNO (t);
13949 }
13950 }
13951
13952 /* Compute an ordering that maps the register numbers to an ascending
13953 sequence. */
13954 reg_order[0] = 0;
13955 for (i = 0; i < nops; i++)
13956 if (regs[i] < regs[reg_order[0]])
13957 reg_order[0] = i;
13958
13959 for (i = 1; i < nops; i++)
13960 {
13961 int this_order = reg_order[i - 1];
13962 for (j = 0; j < nops; j++)
13963 if (regs[j] > regs[reg_order[i - 1]]
13964 && (this_order == reg_order[i - 1]
13965 || regs[j] < regs[this_order]))
13966 this_order = j;
13967 reg_order[i] = this_order;
13968 }
13969
13970 /* Ensure that registers that must be live after the instruction end
13971 up with the correct value. */
13972 for (i = 0; i < nops; i++)
13973 {
13974 int this_order = reg_order[i];
13975 if ((this_order != mem_order[i]
13976 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13977 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13978 return false;
13979 }
13980
13981 /* Load the constants. */
13982 for (i = 0; i < nops; i++)
13983 {
13984 rtx op = operands[2 * nops + mem_order[i]];
13985 sorted_regs[i] = regs[reg_order[i]];
13986 emit_move_insn (reg_rtxs[reg_order[i]], op);
13987 }
13988
13989 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13990
13991 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13992 if (TARGET_THUMB1)
13993 {
13994 gcc_assert (base_reg_dies);
13995 write_back = TRUE;
13996 }
13997
13998 if (stm_case == 5)
13999 {
14000 gcc_assert (base_reg_dies);
14001 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14002 offset = 0;
14003 }
14004
14005 addr = plus_constant (Pmode, base_reg_rtx, offset);
14006
14007 for (i = 0; i < nops; i++)
14008 {
14009 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14010 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14011 SImode, addr, 0);
14012 }
14013 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14014 write_back ? offset + i * 4 : 0));
14015 return true;
14016 }
14017
14018 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14019 unaligned copies on processors which support unaligned semantics for those
14020 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14021 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14022 An interleave factor of 1 (the minimum) will perform no interleaving.
14023 Load/store multiple are used for aligned addresses where possible. */
14024
14025 static void
14026 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14027 HOST_WIDE_INT length,
14028 unsigned int interleave_factor)
14029 {
14030 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14031 int *regnos = XALLOCAVEC (int, interleave_factor);
14032 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14033 HOST_WIDE_INT i, j;
14034 HOST_WIDE_INT remaining = length, words;
14035 rtx halfword_tmp = NULL, byte_tmp = NULL;
14036 rtx dst, src;
14037 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14038 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14039 HOST_WIDE_INT srcoffset, dstoffset;
14040 HOST_WIDE_INT src_autoinc, dst_autoinc;
14041 rtx mem, addr;
14042
14043 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
14044
14045 /* Use hard registers if we have aligned source or destination so we can use
14046 load/store multiple with contiguous registers. */
14047 if (dst_aligned || src_aligned)
14048 for (i = 0; i < interleave_factor; i++)
14049 regs[i] = gen_rtx_REG (SImode, i);
14050 else
14051 for (i = 0; i < interleave_factor; i++)
14052 regs[i] = gen_reg_rtx (SImode);
14053
14054 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14055 src = copy_addr_to_reg (XEXP (srcbase, 0));
14056
14057 srcoffset = dstoffset = 0;
14058
14059 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14060 For copying the last bytes we want to subtract this offset again. */
14061 src_autoinc = dst_autoinc = 0;
14062
14063 for (i = 0; i < interleave_factor; i++)
14064 regnos[i] = i;
14065
14066 /* Copy BLOCK_SIZE_BYTES chunks. */
14067
14068 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14069 {
14070 /* Load words. */
14071 if (src_aligned && interleave_factor > 1)
14072 {
14073 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14074 TRUE, srcbase, &srcoffset));
14075 src_autoinc += UNITS_PER_WORD * interleave_factor;
14076 }
14077 else
14078 {
14079 for (j = 0; j < interleave_factor; j++)
14080 {
14081 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14082 - src_autoinc));
14083 mem = adjust_automodify_address (srcbase, SImode, addr,
14084 srcoffset + j * UNITS_PER_WORD);
14085 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14086 }
14087 srcoffset += block_size_bytes;
14088 }
14089
14090 /* Store words. */
14091 if (dst_aligned && interleave_factor > 1)
14092 {
14093 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14094 TRUE, dstbase, &dstoffset));
14095 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14096 }
14097 else
14098 {
14099 for (j = 0; j < interleave_factor; j++)
14100 {
14101 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14102 - dst_autoinc));
14103 mem = adjust_automodify_address (dstbase, SImode, addr,
14104 dstoffset + j * UNITS_PER_WORD);
14105 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14106 }
14107 dstoffset += block_size_bytes;
14108 }
14109
14110 remaining -= block_size_bytes;
14111 }
14112
14113 /* Copy any whole words left (note these aren't interleaved with any
14114 subsequent halfword/byte load/stores in the interests of simplicity). */
14115
14116 words = remaining / UNITS_PER_WORD;
14117
14118 gcc_assert (words < interleave_factor);
14119
14120 if (src_aligned && words > 1)
14121 {
14122 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14123 &srcoffset));
14124 src_autoinc += UNITS_PER_WORD * words;
14125 }
14126 else
14127 {
14128 for (j = 0; j < words; j++)
14129 {
14130 addr = plus_constant (Pmode, src,
14131 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14132 mem = adjust_automodify_address (srcbase, SImode, addr,
14133 srcoffset + j * UNITS_PER_WORD);
14134 if (src_aligned)
14135 emit_move_insn (regs[j], mem);
14136 else
14137 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14138 }
14139 srcoffset += words * UNITS_PER_WORD;
14140 }
14141
14142 if (dst_aligned && words > 1)
14143 {
14144 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14145 &dstoffset));
14146 dst_autoinc += words * UNITS_PER_WORD;
14147 }
14148 else
14149 {
14150 for (j = 0; j < words; j++)
14151 {
14152 addr = plus_constant (Pmode, dst,
14153 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14154 mem = adjust_automodify_address (dstbase, SImode, addr,
14155 dstoffset + j * UNITS_PER_WORD);
14156 if (dst_aligned)
14157 emit_move_insn (mem, regs[j]);
14158 else
14159 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14160 }
14161 dstoffset += words * UNITS_PER_WORD;
14162 }
14163
14164 remaining -= words * UNITS_PER_WORD;
14165
14166 gcc_assert (remaining < 4);
14167
14168 /* Copy a halfword if necessary. */
14169
14170 if (remaining >= 2)
14171 {
14172 halfword_tmp = gen_reg_rtx (SImode);
14173
14174 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14175 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14176 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14177
14178 /* Either write out immediately, or delay until we've loaded the last
14179 byte, depending on interleave factor. */
14180 if (interleave_factor == 1)
14181 {
14182 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14183 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14184 emit_insn (gen_unaligned_storehi (mem,
14185 gen_lowpart (HImode, halfword_tmp)));
14186 halfword_tmp = NULL;
14187 dstoffset += 2;
14188 }
14189
14190 remaining -= 2;
14191 srcoffset += 2;
14192 }
14193
14194 gcc_assert (remaining < 2);
14195
14196 /* Copy last byte. */
14197
14198 if ((remaining & 1) != 0)
14199 {
14200 byte_tmp = gen_reg_rtx (SImode);
14201
14202 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14203 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14204 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14205
14206 if (interleave_factor == 1)
14207 {
14208 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14209 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14210 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14211 byte_tmp = NULL;
14212 dstoffset++;
14213 }
14214
14215 remaining--;
14216 srcoffset++;
14217 }
14218
14219 /* Store last halfword if we haven't done so already. */
14220
14221 if (halfword_tmp)
14222 {
14223 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14224 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14225 emit_insn (gen_unaligned_storehi (mem,
14226 gen_lowpart (HImode, halfword_tmp)));
14227 dstoffset += 2;
14228 }
14229
14230 /* Likewise for last byte. */
14231
14232 if (byte_tmp)
14233 {
14234 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14235 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14236 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14237 dstoffset++;
14238 }
14239
14240 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14241 }
14242
14243 /* From mips_adjust_block_mem:
14244
14245 Helper function for doing a loop-based block operation on memory
14246 reference MEM. Each iteration of the loop will operate on LENGTH
14247 bytes of MEM.
14248
14249 Create a new base register for use within the loop and point it to
14250 the start of MEM. Create a new memory reference that uses this
14251 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14252
14253 static void
14254 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14255 rtx *loop_mem)
14256 {
14257 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14258
14259 /* Although the new mem does not refer to a known location,
14260 it does keep up to LENGTH bytes of alignment. */
14261 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14262 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14263 }
14264
14265 /* From mips_block_move_loop:
14266
14267 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14268 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14269 the memory regions do not overlap. */
14270
14271 static void
14272 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14273 unsigned int interleave_factor,
14274 HOST_WIDE_INT bytes_per_iter)
14275 {
14276 rtx src_reg, dest_reg, final_src, test;
14277 HOST_WIDE_INT leftover;
14278
14279 leftover = length % bytes_per_iter;
14280 length -= leftover;
14281
14282 /* Create registers and memory references for use within the loop. */
14283 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14284 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14285
14286 /* Calculate the value that SRC_REG should have after the last iteration of
14287 the loop. */
14288 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14289 0, 0, OPTAB_WIDEN);
14290
14291 /* Emit the start of the loop. */
14292 rtx_code_label *label = gen_label_rtx ();
14293 emit_label (label);
14294
14295 /* Emit the loop body. */
14296 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14297 interleave_factor);
14298
14299 /* Move on to the next block. */
14300 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14301 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14302
14303 /* Emit the loop condition. */
14304 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14305 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14306
14307 /* Mop up any left-over bytes. */
14308 if (leftover)
14309 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14310 }
14311
14312 /* Emit a block move when either the source or destination is unaligned (not
14313 aligned to a four-byte boundary). This may need further tuning depending on
14314 core type, optimize_size setting, etc. */
14315
14316 static int
14317 arm_cpymemqi_unaligned (rtx *operands)
14318 {
14319 HOST_WIDE_INT length = INTVAL (operands[2]);
14320
14321 if (optimize_size)
14322 {
14323 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14324 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14325 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14326 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14327 or dst_aligned though: allow more interleaving in those cases since the
14328 resulting code can be smaller. */
14329 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14330 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14331
14332 if (length > 12)
14333 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14334 interleave_factor, bytes_per_iter);
14335 else
14336 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14337 interleave_factor);
14338 }
14339 else
14340 {
14341 /* Note that the loop created by arm_block_move_unaligned_loop may be
14342 subject to loop unrolling, which makes tuning this condition a little
14343 redundant. */
14344 if (length > 32)
14345 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14346 else
14347 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14348 }
14349
14350 return 1;
14351 }
14352
14353 int
14354 arm_gen_cpymemqi (rtx *operands)
14355 {
14356 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14357 HOST_WIDE_INT srcoffset, dstoffset;
14358 rtx src, dst, srcbase, dstbase;
14359 rtx part_bytes_reg = NULL;
14360 rtx mem;
14361
14362 if (!CONST_INT_P (operands[2])
14363 || !CONST_INT_P (operands[3])
14364 || INTVAL (operands[2]) > 64)
14365 return 0;
14366
14367 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14368 return arm_cpymemqi_unaligned (operands);
14369
14370 if (INTVAL (operands[3]) & 3)
14371 return 0;
14372
14373 dstbase = operands[0];
14374 srcbase = operands[1];
14375
14376 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14377 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14378
14379 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14380 out_words_to_go = INTVAL (operands[2]) / 4;
14381 last_bytes = INTVAL (operands[2]) & 3;
14382 dstoffset = srcoffset = 0;
14383
14384 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14385 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14386
14387 while (in_words_to_go >= 2)
14388 {
14389 if (in_words_to_go > 4)
14390 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14391 TRUE, srcbase, &srcoffset));
14392 else
14393 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14394 src, FALSE, srcbase,
14395 &srcoffset));
14396
14397 if (out_words_to_go)
14398 {
14399 if (out_words_to_go > 4)
14400 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14401 TRUE, dstbase, &dstoffset));
14402 else if (out_words_to_go != 1)
14403 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14404 out_words_to_go, dst,
14405 (last_bytes == 0
14406 ? FALSE : TRUE),
14407 dstbase, &dstoffset));
14408 else
14409 {
14410 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14411 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14412 if (last_bytes != 0)
14413 {
14414 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14415 dstoffset += 4;
14416 }
14417 }
14418 }
14419
14420 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14421 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14422 }
14423
14424 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14425 if (out_words_to_go)
14426 {
14427 rtx sreg;
14428
14429 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14430 sreg = copy_to_reg (mem);
14431
14432 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14433 emit_move_insn (mem, sreg);
14434 in_words_to_go--;
14435
14436 gcc_assert (!in_words_to_go); /* Sanity check */
14437 }
14438
14439 if (in_words_to_go)
14440 {
14441 gcc_assert (in_words_to_go > 0);
14442
14443 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14444 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14445 }
14446
14447 gcc_assert (!last_bytes || part_bytes_reg);
14448
14449 if (BYTES_BIG_ENDIAN && last_bytes)
14450 {
14451 rtx tmp = gen_reg_rtx (SImode);
14452
14453 /* The bytes we want are in the top end of the word. */
14454 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14455 GEN_INT (8 * (4 - last_bytes))));
14456 part_bytes_reg = tmp;
14457
14458 while (last_bytes)
14459 {
14460 mem = adjust_automodify_address (dstbase, QImode,
14461 plus_constant (Pmode, dst,
14462 last_bytes - 1),
14463 dstoffset + last_bytes - 1);
14464 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14465
14466 if (--last_bytes)
14467 {
14468 tmp = gen_reg_rtx (SImode);
14469 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14470 part_bytes_reg = tmp;
14471 }
14472 }
14473
14474 }
14475 else
14476 {
14477 if (last_bytes > 1)
14478 {
14479 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14480 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14481 last_bytes -= 2;
14482 if (last_bytes)
14483 {
14484 rtx tmp = gen_reg_rtx (SImode);
14485 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14486 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14487 part_bytes_reg = tmp;
14488 dstoffset += 2;
14489 }
14490 }
14491
14492 if (last_bytes)
14493 {
14494 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14495 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14496 }
14497 }
14498
14499 return 1;
14500 }
14501
14502 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
14503 by mode size. */
14504 inline static rtx
14505 next_consecutive_mem (rtx mem)
14506 {
14507 machine_mode mode = GET_MODE (mem);
14508 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14509 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14510
14511 return adjust_automodify_address (mem, mode, addr, offset);
14512 }
14513
14514 /* Copy using LDRD/STRD instructions whenever possible.
14515 Returns true upon success. */
14516 bool
14517 gen_cpymem_ldrd_strd (rtx *operands)
14518 {
14519 unsigned HOST_WIDE_INT len;
14520 HOST_WIDE_INT align;
14521 rtx src, dst, base;
14522 rtx reg0;
14523 bool src_aligned, dst_aligned;
14524 bool src_volatile, dst_volatile;
14525
14526 gcc_assert (CONST_INT_P (operands[2]));
14527 gcc_assert (CONST_INT_P (operands[3]));
14528
14529 len = UINTVAL (operands[2]);
14530 if (len > 64)
14531 return false;
14532
14533 /* Maximum alignment we can assume for both src and dst buffers. */
14534 align = INTVAL (operands[3]);
14535
14536 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14537 return false;
14538
14539 /* Place src and dst addresses in registers
14540 and update the corresponding mem rtx. */
14541 dst = operands[0];
14542 dst_volatile = MEM_VOLATILE_P (dst);
14543 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14544 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14545 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14546
14547 src = operands[1];
14548 src_volatile = MEM_VOLATILE_P (src);
14549 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14550 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14551 src = adjust_automodify_address (src, VOIDmode, base, 0);
14552
14553 if (!unaligned_access && !(src_aligned && dst_aligned))
14554 return false;
14555
14556 if (src_volatile || dst_volatile)
14557 return false;
14558
14559 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14560 if (!(dst_aligned || src_aligned))
14561 return arm_gen_cpymemqi (operands);
14562
14563 /* If the either src or dst is unaligned we'll be accessing it as pairs
14564 of unaligned SImode accesses. Otherwise we can generate DImode
14565 ldrd/strd instructions. */
14566 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14567 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14568
14569 while (len >= 8)
14570 {
14571 len -= 8;
14572 reg0 = gen_reg_rtx (DImode);
14573 rtx low_reg = NULL_RTX;
14574 rtx hi_reg = NULL_RTX;
14575
14576 if (!src_aligned || !dst_aligned)
14577 {
14578 low_reg = gen_lowpart (SImode, reg0);
14579 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14580 }
14581 if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
14582 emit_move_insn (reg0, src);
14583 else if (src_aligned)
14584 emit_insn (gen_unaligned_loaddi (reg0, src));
14585 else
14586 {
14587 emit_insn (gen_unaligned_loadsi (low_reg, src));
14588 src = next_consecutive_mem (src);
14589 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14590 }
14591
14592 if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
14593 emit_move_insn (dst, reg0);
14594 else if (dst_aligned)
14595 emit_insn (gen_unaligned_storedi (dst, reg0));
14596 else
14597 {
14598 emit_insn (gen_unaligned_storesi (dst, low_reg));
14599 dst = next_consecutive_mem (dst);
14600 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14601 }
14602
14603 src = next_consecutive_mem (src);
14604 dst = next_consecutive_mem (dst);
14605 }
14606
14607 gcc_assert (len < 8);
14608 if (len >= 4)
14609 {
14610 /* More than a word but less than a double-word to copy. Copy a word. */
14611 reg0 = gen_reg_rtx (SImode);
14612 src = adjust_address (src, SImode, 0);
14613 dst = adjust_address (dst, SImode, 0);
14614 if (src_aligned)
14615 emit_move_insn (reg0, src);
14616 else
14617 emit_insn (gen_unaligned_loadsi (reg0, src));
14618
14619 if (dst_aligned)
14620 emit_move_insn (dst, reg0);
14621 else
14622 emit_insn (gen_unaligned_storesi (dst, reg0));
14623
14624 src = next_consecutive_mem (src);
14625 dst = next_consecutive_mem (dst);
14626 len -= 4;
14627 }
14628
14629 if (len == 0)
14630 return true;
14631
14632 /* Copy the remaining bytes. */
14633 if (len >= 2)
14634 {
14635 dst = adjust_address (dst, HImode, 0);
14636 src = adjust_address (src, HImode, 0);
14637 reg0 = gen_reg_rtx (SImode);
14638 if (src_aligned)
14639 emit_insn (gen_zero_extendhisi2 (reg0, src));
14640 else
14641 emit_insn (gen_unaligned_loadhiu (reg0, src));
14642
14643 if (dst_aligned)
14644 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14645 else
14646 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14647
14648 src = next_consecutive_mem (src);
14649 dst = next_consecutive_mem (dst);
14650 if (len == 2)
14651 return true;
14652 }
14653
14654 dst = adjust_address (dst, QImode, 0);
14655 src = adjust_address (src, QImode, 0);
14656 reg0 = gen_reg_rtx (QImode);
14657 emit_move_insn (reg0, src);
14658 emit_move_insn (dst, reg0);
14659 return true;
14660 }
14661
14662 /* Select a dominance comparison mode if possible for a test of the general
14663 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14664 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14665 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14666 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14667 In all cases OP will be either EQ or NE, but we don't need to know which
14668 here. If we are unable to support a dominance comparison we return
14669 CC mode. This will then fail to match for the RTL expressions that
14670 generate this call. */
14671 machine_mode
14672 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14673 {
14674 enum rtx_code cond1, cond2;
14675 int swapped = 0;
14676
14677 /* Currently we will probably get the wrong result if the individual
14678 comparisons are not simple. This also ensures that it is safe to
14679 reverse a comparison if necessary. */
14680 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14681 != CCmode)
14682 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14683 != CCmode))
14684 return CCmode;
14685
14686 /* The if_then_else variant of this tests the second condition if the
14687 first passes, but is true if the first fails. Reverse the first
14688 condition to get a true "inclusive-or" expression. */
14689 if (cond_or == DOM_CC_NX_OR_Y)
14690 cond1 = reverse_condition (cond1);
14691
14692 /* If the comparisons are not equal, and one doesn't dominate the other,
14693 then we can't do this. */
14694 if (cond1 != cond2
14695 && !comparison_dominates_p (cond1, cond2)
14696 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14697 return CCmode;
14698
14699 if (swapped)
14700 std::swap (cond1, cond2);
14701
14702 switch (cond1)
14703 {
14704 case EQ:
14705 if (cond_or == DOM_CC_X_AND_Y)
14706 return CC_DEQmode;
14707
14708 switch (cond2)
14709 {
14710 case EQ: return CC_DEQmode;
14711 case LE: return CC_DLEmode;
14712 case LEU: return CC_DLEUmode;
14713 case GE: return CC_DGEmode;
14714 case GEU: return CC_DGEUmode;
14715 default: gcc_unreachable ();
14716 }
14717
14718 case LT:
14719 if (cond_or == DOM_CC_X_AND_Y)
14720 return CC_DLTmode;
14721
14722 switch (cond2)
14723 {
14724 case LT:
14725 return CC_DLTmode;
14726 case LE:
14727 return CC_DLEmode;
14728 case NE:
14729 return CC_DNEmode;
14730 default:
14731 gcc_unreachable ();
14732 }
14733
14734 case GT:
14735 if (cond_or == DOM_CC_X_AND_Y)
14736 return CC_DGTmode;
14737
14738 switch (cond2)
14739 {
14740 case GT:
14741 return CC_DGTmode;
14742 case GE:
14743 return CC_DGEmode;
14744 case NE:
14745 return CC_DNEmode;
14746 default:
14747 gcc_unreachable ();
14748 }
14749
14750 case LTU:
14751 if (cond_or == DOM_CC_X_AND_Y)
14752 return CC_DLTUmode;
14753
14754 switch (cond2)
14755 {
14756 case LTU:
14757 return CC_DLTUmode;
14758 case LEU:
14759 return CC_DLEUmode;
14760 case NE:
14761 return CC_DNEmode;
14762 default:
14763 gcc_unreachable ();
14764 }
14765
14766 case GTU:
14767 if (cond_or == DOM_CC_X_AND_Y)
14768 return CC_DGTUmode;
14769
14770 switch (cond2)
14771 {
14772 case GTU:
14773 return CC_DGTUmode;
14774 case GEU:
14775 return CC_DGEUmode;
14776 case NE:
14777 return CC_DNEmode;
14778 default:
14779 gcc_unreachable ();
14780 }
14781
14782 /* The remaining cases only occur when both comparisons are the
14783 same. */
14784 case NE:
14785 gcc_assert (cond1 == cond2);
14786 return CC_DNEmode;
14787
14788 case LE:
14789 gcc_assert (cond1 == cond2);
14790 return CC_DLEmode;
14791
14792 case GE:
14793 gcc_assert (cond1 == cond2);
14794 return CC_DGEmode;
14795
14796 case LEU:
14797 gcc_assert (cond1 == cond2);
14798 return CC_DLEUmode;
14799
14800 case GEU:
14801 gcc_assert (cond1 == cond2);
14802 return CC_DGEUmode;
14803
14804 default:
14805 gcc_unreachable ();
14806 }
14807 }
14808
14809 machine_mode
14810 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14811 {
14812 /* All floating point compares return CCFP if it is an equality
14813 comparison, and CCFPE otherwise. */
14814 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14815 {
14816 switch (op)
14817 {
14818 case EQ:
14819 case NE:
14820 case UNORDERED:
14821 case ORDERED:
14822 case UNLT:
14823 case UNLE:
14824 case UNGT:
14825 case UNGE:
14826 case UNEQ:
14827 case LTGT:
14828 return CCFPmode;
14829
14830 case LT:
14831 case LE:
14832 case GT:
14833 case GE:
14834 return CCFPEmode;
14835
14836 default:
14837 gcc_unreachable ();
14838 }
14839 }
14840
14841 /* A compare with a shifted operand. Because of canonicalization, the
14842 comparison will have to be swapped when we emit the assembler. */
14843 if (GET_MODE (y) == SImode
14844 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14845 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14846 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14847 || GET_CODE (x) == ROTATERT))
14848 return CC_SWPmode;
14849
14850 /* This operation is performed swapped, but since we only rely on the Z
14851 flag we don't need an additional mode. */
14852 if (GET_MODE (y) == SImode
14853 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14854 && GET_CODE (x) == NEG
14855 && (op == EQ || op == NE))
14856 return CC_Zmode;
14857
14858 /* This is a special case that is used by combine to allow a
14859 comparison of a shifted byte load to be split into a zero-extend
14860 followed by a comparison of the shifted integer (only valid for
14861 equalities and unsigned inequalities). */
14862 if (GET_MODE (x) == SImode
14863 && GET_CODE (x) == ASHIFT
14864 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14865 && GET_CODE (XEXP (x, 0)) == SUBREG
14866 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14867 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14868 && (op == EQ || op == NE
14869 || op == GEU || op == GTU || op == LTU || op == LEU)
14870 && CONST_INT_P (y))
14871 return CC_Zmode;
14872
14873 /* A construct for a conditional compare, if the false arm contains
14874 0, then both conditions must be true, otherwise either condition
14875 must be true. Not all conditions are possible, so CCmode is
14876 returned if it can't be done. */
14877 if (GET_CODE (x) == IF_THEN_ELSE
14878 && (XEXP (x, 2) == const0_rtx
14879 || XEXP (x, 2) == const1_rtx)
14880 && COMPARISON_P (XEXP (x, 0))
14881 && COMPARISON_P (XEXP (x, 1)))
14882 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14883 INTVAL (XEXP (x, 2)));
14884
14885 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14886 if (GET_CODE (x) == AND
14887 && (op == EQ || op == NE)
14888 && COMPARISON_P (XEXP (x, 0))
14889 && COMPARISON_P (XEXP (x, 1)))
14890 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14891 DOM_CC_X_AND_Y);
14892
14893 if (GET_CODE (x) == IOR
14894 && (op == EQ || op == NE)
14895 && COMPARISON_P (XEXP (x, 0))
14896 && COMPARISON_P (XEXP (x, 1)))
14897 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14898 DOM_CC_X_OR_Y);
14899
14900 /* An operation (on Thumb) where we want to test for a single bit.
14901 This is done by shifting that bit up into the top bit of a
14902 scratch register; we can then branch on the sign bit. */
14903 if (TARGET_THUMB1
14904 && GET_MODE (x) == SImode
14905 && (op == EQ || op == NE)
14906 && GET_CODE (x) == ZERO_EXTRACT
14907 && XEXP (x, 1) == const1_rtx)
14908 return CC_Nmode;
14909
14910 /* An operation that sets the condition codes as a side-effect, the
14911 V flag is not set correctly, so we can only use comparisons where
14912 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14913 instead.) */
14914 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14915 if (GET_MODE (x) == SImode
14916 && y == const0_rtx
14917 && (op == EQ || op == NE || op == LT || op == GE)
14918 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14919 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14920 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14921 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14922 || GET_CODE (x) == LSHIFTRT
14923 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14924 || GET_CODE (x) == ROTATERT
14925 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14926 return CC_NOOVmode;
14927
14928 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14929 return CC_Zmode;
14930
14931 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14932 && GET_CODE (x) == PLUS
14933 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14934 return CC_Cmode;
14935
14936 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14937 {
14938 switch (op)
14939 {
14940 case EQ:
14941 case NE:
14942 /* A DImode comparison against zero can be implemented by
14943 or'ing the two halves together. */
14944 if (y == const0_rtx)
14945 return CC_Zmode;
14946
14947 /* We can do an equality test in three Thumb instructions. */
14948 if (!TARGET_32BIT)
14949 return CC_Zmode;
14950
14951 /* FALLTHROUGH */
14952
14953 case LTU:
14954 case LEU:
14955 case GTU:
14956 case GEU:
14957 /* DImode unsigned comparisons can be implemented by cmp +
14958 cmpeq without a scratch register. Not worth doing in
14959 Thumb-2. */
14960 if (TARGET_32BIT)
14961 return CC_CZmode;
14962
14963 /* FALLTHROUGH */
14964
14965 case LT:
14966 case LE:
14967 case GT:
14968 case GE:
14969 /* DImode signed and unsigned comparisons can be implemented
14970 by cmp + sbcs with a scratch register, but that does not
14971 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14972 gcc_assert (op != EQ && op != NE);
14973 return CC_NCVmode;
14974
14975 default:
14976 gcc_unreachable ();
14977 }
14978 }
14979
14980 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14981 return GET_MODE (x);
14982
14983 return CCmode;
14984 }
14985
14986 /* X and Y are two things to compare using CODE. Emit the compare insn and
14987 return the rtx for register 0 in the proper mode. FP means this is a
14988 floating point compare: I don't think that it is needed on the arm. */
14989 rtx
14990 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14991 {
14992 machine_mode mode;
14993 rtx cc_reg;
14994 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14995
14996 /* We might have X as a constant, Y as a register because of the predicates
14997 used for cmpdi. If so, force X to a register here. */
14998 if (dimode_comparison && !REG_P (x))
14999 x = force_reg (DImode, x);
15000
15001 mode = SELECT_CC_MODE (code, x, y);
15002 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15003
15004 if (dimode_comparison
15005 && mode != CC_CZmode)
15006 {
15007 rtx clobber, set;
15008
15009 /* To compare two non-zero values for equality, XOR them and
15010 then compare against zero. Not used for ARM mode; there
15011 CC_CZmode is cheaper. */
15012 if (mode == CC_Zmode && y != const0_rtx)
15013 {
15014 gcc_assert (!reload_completed);
15015 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15016 y = const0_rtx;
15017 }
15018
15019 /* A scratch register is required. */
15020 if (reload_completed)
15021 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15022 else
15023 scratch = gen_rtx_SCRATCH (SImode);
15024
15025 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15026 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15027 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15028 }
15029 else
15030 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15031
15032 return cc_reg;
15033 }
15034
15035 /* Generate a sequence of insns that will generate the correct return
15036 address mask depending on the physical architecture that the program
15037 is running on. */
15038 rtx
15039 arm_gen_return_addr_mask (void)
15040 {
15041 rtx reg = gen_reg_rtx (Pmode);
15042
15043 emit_insn (gen_return_addr_mask (reg));
15044 return reg;
15045 }
15046
15047 void
15048 arm_reload_in_hi (rtx *operands)
15049 {
15050 rtx ref = operands[1];
15051 rtx base, scratch;
15052 HOST_WIDE_INT offset = 0;
15053
15054 if (GET_CODE (ref) == SUBREG)
15055 {
15056 offset = SUBREG_BYTE (ref);
15057 ref = SUBREG_REG (ref);
15058 }
15059
15060 if (REG_P (ref))
15061 {
15062 /* We have a pseudo which has been spilt onto the stack; there
15063 are two cases here: the first where there is a simple
15064 stack-slot replacement and a second where the stack-slot is
15065 out of range, or is used as a subreg. */
15066 if (reg_equiv_mem (REGNO (ref)))
15067 {
15068 ref = reg_equiv_mem (REGNO (ref));
15069 base = find_replacement (&XEXP (ref, 0));
15070 }
15071 else
15072 /* The slot is out of range, or was dressed up in a SUBREG. */
15073 base = reg_equiv_address (REGNO (ref));
15074
15075 /* PR 62554: If there is no equivalent memory location then just move
15076 the value as an SImode register move. This happens when the target
15077 architecture variant does not have an HImode register move. */
15078 if (base == NULL)
15079 {
15080 gcc_assert (REG_P (operands[0]));
15081 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15082 gen_rtx_SUBREG (SImode, ref, 0)));
15083 return;
15084 }
15085 }
15086 else
15087 base = find_replacement (&XEXP (ref, 0));
15088
15089 /* Handle the case where the address is too complex to be offset by 1. */
15090 if (GET_CODE (base) == MINUS
15091 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15092 {
15093 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15094
15095 emit_set_insn (base_plus, base);
15096 base = base_plus;
15097 }
15098 else if (GET_CODE (base) == PLUS)
15099 {
15100 /* The addend must be CONST_INT, or we would have dealt with it above. */
15101 HOST_WIDE_INT hi, lo;
15102
15103 offset += INTVAL (XEXP (base, 1));
15104 base = XEXP (base, 0);
15105
15106 /* Rework the address into a legal sequence of insns. */
15107 /* Valid range for lo is -4095 -> 4095 */
15108 lo = (offset >= 0
15109 ? (offset & 0xfff)
15110 : -((-offset) & 0xfff));
15111
15112 /* Corner case, if lo is the max offset then we would be out of range
15113 once we have added the additional 1 below, so bump the msb into the
15114 pre-loading insn(s). */
15115 if (lo == 4095)
15116 lo &= 0x7ff;
15117
15118 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15119 ^ (HOST_WIDE_INT) 0x80000000)
15120 - (HOST_WIDE_INT) 0x80000000);
15121
15122 gcc_assert (hi + lo == offset);
15123
15124 if (hi != 0)
15125 {
15126 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15127
15128 /* Get the base address; addsi3 knows how to handle constants
15129 that require more than one insn. */
15130 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15131 base = base_plus;
15132 offset = lo;
15133 }
15134 }
15135
15136 /* Operands[2] may overlap operands[0] (though it won't overlap
15137 operands[1]), that's why we asked for a DImode reg -- so we can
15138 use the bit that does not overlap. */
15139 if (REGNO (operands[2]) == REGNO (operands[0]))
15140 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15141 else
15142 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15143
15144 emit_insn (gen_zero_extendqisi2 (scratch,
15145 gen_rtx_MEM (QImode,
15146 plus_constant (Pmode, base,
15147 offset))));
15148 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15149 gen_rtx_MEM (QImode,
15150 plus_constant (Pmode, base,
15151 offset + 1))));
15152 if (!BYTES_BIG_ENDIAN)
15153 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15154 gen_rtx_IOR (SImode,
15155 gen_rtx_ASHIFT
15156 (SImode,
15157 gen_rtx_SUBREG (SImode, operands[0], 0),
15158 GEN_INT (8)),
15159 scratch));
15160 else
15161 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15162 gen_rtx_IOR (SImode,
15163 gen_rtx_ASHIFT (SImode, scratch,
15164 GEN_INT (8)),
15165 gen_rtx_SUBREG (SImode, operands[0], 0)));
15166 }
15167
15168 /* Handle storing a half-word to memory during reload by synthesizing as two
15169 byte stores. Take care not to clobber the input values until after we
15170 have moved them somewhere safe. This code assumes that if the DImode
15171 scratch in operands[2] overlaps either the input value or output address
15172 in some way, then that value must die in this insn (we absolutely need
15173 two scratch registers for some corner cases). */
15174 void
15175 arm_reload_out_hi (rtx *operands)
15176 {
15177 rtx ref = operands[0];
15178 rtx outval = operands[1];
15179 rtx base, scratch;
15180 HOST_WIDE_INT offset = 0;
15181
15182 if (GET_CODE (ref) == SUBREG)
15183 {
15184 offset = SUBREG_BYTE (ref);
15185 ref = SUBREG_REG (ref);
15186 }
15187
15188 if (REG_P (ref))
15189 {
15190 /* We have a pseudo which has been spilt onto the stack; there
15191 are two cases here: the first where there is a simple
15192 stack-slot replacement and a second where the stack-slot is
15193 out of range, or is used as a subreg. */
15194 if (reg_equiv_mem (REGNO (ref)))
15195 {
15196 ref = reg_equiv_mem (REGNO (ref));
15197 base = find_replacement (&XEXP (ref, 0));
15198 }
15199 else
15200 /* The slot is out of range, or was dressed up in a SUBREG. */
15201 base = reg_equiv_address (REGNO (ref));
15202
15203 /* PR 62254: If there is no equivalent memory location then just move
15204 the value as an SImode register move. This happens when the target
15205 architecture variant does not have an HImode register move. */
15206 if (base == NULL)
15207 {
15208 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15209
15210 if (REG_P (outval))
15211 {
15212 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15213 gen_rtx_SUBREG (SImode, outval, 0)));
15214 }
15215 else /* SUBREG_P (outval) */
15216 {
15217 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15218 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15219 SUBREG_REG (outval)));
15220 else
15221 /* FIXME: Handle other cases ? */
15222 gcc_unreachable ();
15223 }
15224 return;
15225 }
15226 }
15227 else
15228 base = find_replacement (&XEXP (ref, 0));
15229
15230 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15231
15232 /* Handle the case where the address is too complex to be offset by 1. */
15233 if (GET_CODE (base) == MINUS
15234 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15235 {
15236 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15237
15238 /* Be careful not to destroy OUTVAL. */
15239 if (reg_overlap_mentioned_p (base_plus, outval))
15240 {
15241 /* Updating base_plus might destroy outval, see if we can
15242 swap the scratch and base_plus. */
15243 if (!reg_overlap_mentioned_p (scratch, outval))
15244 std::swap (scratch, base_plus);
15245 else
15246 {
15247 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15248
15249 /* Be conservative and copy OUTVAL into the scratch now,
15250 this should only be necessary if outval is a subreg
15251 of something larger than a word. */
15252 /* XXX Might this clobber base? I can't see how it can,
15253 since scratch is known to overlap with OUTVAL, and
15254 must be wider than a word. */
15255 emit_insn (gen_movhi (scratch_hi, outval));
15256 outval = scratch_hi;
15257 }
15258 }
15259
15260 emit_set_insn (base_plus, base);
15261 base = base_plus;
15262 }
15263 else if (GET_CODE (base) == PLUS)
15264 {
15265 /* The addend must be CONST_INT, or we would have dealt with it above. */
15266 HOST_WIDE_INT hi, lo;
15267
15268 offset += INTVAL (XEXP (base, 1));
15269 base = XEXP (base, 0);
15270
15271 /* Rework the address into a legal sequence of insns. */
15272 /* Valid range for lo is -4095 -> 4095 */
15273 lo = (offset >= 0
15274 ? (offset & 0xfff)
15275 : -((-offset) & 0xfff));
15276
15277 /* Corner case, if lo is the max offset then we would be out of range
15278 once we have added the additional 1 below, so bump the msb into the
15279 pre-loading insn(s). */
15280 if (lo == 4095)
15281 lo &= 0x7ff;
15282
15283 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15284 ^ (HOST_WIDE_INT) 0x80000000)
15285 - (HOST_WIDE_INT) 0x80000000);
15286
15287 gcc_assert (hi + lo == offset);
15288
15289 if (hi != 0)
15290 {
15291 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15292
15293 /* Be careful not to destroy OUTVAL. */
15294 if (reg_overlap_mentioned_p (base_plus, outval))
15295 {
15296 /* Updating base_plus might destroy outval, see if we
15297 can swap the scratch and base_plus. */
15298 if (!reg_overlap_mentioned_p (scratch, outval))
15299 std::swap (scratch, base_plus);
15300 else
15301 {
15302 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15303
15304 /* Be conservative and copy outval into scratch now,
15305 this should only be necessary if outval is a
15306 subreg of something larger than a word. */
15307 /* XXX Might this clobber base? I can't see how it
15308 can, since scratch is known to overlap with
15309 outval. */
15310 emit_insn (gen_movhi (scratch_hi, outval));
15311 outval = scratch_hi;
15312 }
15313 }
15314
15315 /* Get the base address; addsi3 knows how to handle constants
15316 that require more than one insn. */
15317 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15318 base = base_plus;
15319 offset = lo;
15320 }
15321 }
15322
15323 if (BYTES_BIG_ENDIAN)
15324 {
15325 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15326 plus_constant (Pmode, base,
15327 offset + 1)),
15328 gen_lowpart (QImode, outval)));
15329 emit_insn (gen_lshrsi3 (scratch,
15330 gen_rtx_SUBREG (SImode, outval, 0),
15331 GEN_INT (8)));
15332 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15333 offset)),
15334 gen_lowpart (QImode, scratch)));
15335 }
15336 else
15337 {
15338 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15339 offset)),
15340 gen_lowpart (QImode, outval)));
15341 emit_insn (gen_lshrsi3 (scratch,
15342 gen_rtx_SUBREG (SImode, outval, 0),
15343 GEN_INT (8)));
15344 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15345 plus_constant (Pmode, base,
15346 offset + 1)),
15347 gen_lowpart (QImode, scratch)));
15348 }
15349 }
15350
15351 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15352 (padded to the size of a word) should be passed in a register. */
15353
15354 static bool
15355 arm_must_pass_in_stack (const function_arg_info &arg)
15356 {
15357 if (TARGET_AAPCS_BASED)
15358 return must_pass_in_stack_var_size (arg);
15359 else
15360 return must_pass_in_stack_var_size_or_pad (arg);
15361 }
15362
15363
15364 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15365 byte of a stack argument has useful data. For legacy APCS ABIs we use
15366 the default. For AAPCS based ABIs small aggregate types are placed
15367 in the lowest memory address. */
15368
15369 static pad_direction
15370 arm_function_arg_padding (machine_mode mode, const_tree type)
15371 {
15372 if (!TARGET_AAPCS_BASED)
15373 return default_function_arg_padding (mode, type);
15374
15375 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15376 return PAD_DOWNWARD;
15377
15378 return PAD_UPWARD;
15379 }
15380
15381
15382 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15383 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15384 register has useful data, and return the opposite if the most
15385 significant byte does. */
15386
15387 bool
15388 arm_pad_reg_upward (machine_mode mode,
15389 tree type, int first ATTRIBUTE_UNUSED)
15390 {
15391 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15392 {
15393 /* For AAPCS, small aggregates, small fixed-point types,
15394 and small complex types are always padded upwards. */
15395 if (type)
15396 {
15397 if ((AGGREGATE_TYPE_P (type)
15398 || TREE_CODE (type) == COMPLEX_TYPE
15399 || FIXED_POINT_TYPE_P (type))
15400 && int_size_in_bytes (type) <= 4)
15401 return true;
15402 }
15403 else
15404 {
15405 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15406 && GET_MODE_SIZE (mode) <= 4)
15407 return true;
15408 }
15409 }
15410
15411 /* Otherwise, use default padding. */
15412 return !BYTES_BIG_ENDIAN;
15413 }
15414
15415 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15416 assuming that the address in the base register is word aligned. */
15417 bool
15418 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15419 {
15420 HOST_WIDE_INT max_offset;
15421
15422 /* Offset must be a multiple of 4 in Thumb mode. */
15423 if (TARGET_THUMB2 && ((offset & 3) != 0))
15424 return false;
15425
15426 if (TARGET_THUMB2)
15427 max_offset = 1020;
15428 else if (TARGET_ARM)
15429 max_offset = 255;
15430 else
15431 return false;
15432
15433 return ((offset <= max_offset) && (offset >= -max_offset));
15434 }
15435
15436 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15437 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15438 Assumes that the address in the base register RN is word aligned. Pattern
15439 guarantees that both memory accesses use the same base register,
15440 the offsets are constants within the range, and the gap between the offsets is 4.
15441 If preload complete then check that registers are legal. WBACK indicates whether
15442 address is updated. LOAD indicates whether memory access is load or store. */
15443 bool
15444 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15445 bool wback, bool load)
15446 {
15447 unsigned int t, t2, n;
15448
15449 if (!reload_completed)
15450 return true;
15451
15452 if (!offset_ok_for_ldrd_strd (offset))
15453 return false;
15454
15455 t = REGNO (rt);
15456 t2 = REGNO (rt2);
15457 n = REGNO (rn);
15458
15459 if ((TARGET_THUMB2)
15460 && ((wback && (n == t || n == t2))
15461 || (t == SP_REGNUM)
15462 || (t == PC_REGNUM)
15463 || (t2 == SP_REGNUM)
15464 || (t2 == PC_REGNUM)
15465 || (!load && (n == PC_REGNUM))
15466 || (load && (t == t2))
15467 /* Triggers Cortex-M3 LDRD errata. */
15468 || (!wback && load && fix_cm3_ldrd && (n == t))))
15469 return false;
15470
15471 if ((TARGET_ARM)
15472 && ((wback && (n == t || n == t2))
15473 || (t2 == PC_REGNUM)
15474 || (t % 2 != 0) /* First destination register is not even. */
15475 || (t2 != t + 1)
15476 /* PC can be used as base register (for offset addressing only),
15477 but it is depricated. */
15478 || (n == PC_REGNUM)))
15479 return false;
15480
15481 return true;
15482 }
15483
15484 /* Return true if a 64-bit access with alignment ALIGN and with a
15485 constant offset OFFSET from the base pointer is permitted on this
15486 architecture. */
15487 static bool
15488 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15489 {
15490 return (unaligned_access
15491 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15492 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15493 }
15494
15495 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15496 operand MEM's address contains an immediate offset from the base
15497 register and has no side effects, in which case it sets BASE,
15498 OFFSET and ALIGN accordingly. */
15499 static bool
15500 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15501 {
15502 rtx addr;
15503
15504 gcc_assert (base != NULL && offset != NULL);
15505
15506 /* TODO: Handle more general memory operand patterns, such as
15507 PRE_DEC and PRE_INC. */
15508
15509 if (side_effects_p (mem))
15510 return false;
15511
15512 /* Can't deal with subregs. */
15513 if (GET_CODE (mem) == SUBREG)
15514 return false;
15515
15516 gcc_assert (MEM_P (mem));
15517
15518 *offset = const0_rtx;
15519 *align = MEM_ALIGN (mem);
15520
15521 addr = XEXP (mem, 0);
15522
15523 /* If addr isn't valid for DImode, then we can't handle it. */
15524 if (!arm_legitimate_address_p (DImode, addr,
15525 reload_in_progress || reload_completed))
15526 return false;
15527
15528 if (REG_P (addr))
15529 {
15530 *base = addr;
15531 return true;
15532 }
15533 else if (GET_CODE (addr) == PLUS)
15534 {
15535 *base = XEXP (addr, 0);
15536 *offset = XEXP (addr, 1);
15537 return (REG_P (*base) && CONST_INT_P (*offset));
15538 }
15539
15540 return false;
15541 }
15542
15543 /* Called from a peephole2 to replace two word-size accesses with a
15544 single LDRD/STRD instruction. Returns true iff we can generate a
15545 new instruction sequence. That is, both accesses use the same base
15546 register and the gap between constant offsets is 4. This function
15547 may reorder its operands to match ldrd/strd RTL templates.
15548 OPERANDS are the operands found by the peephole matcher;
15549 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15550 corresponding memory operands. LOAD indicaates whether the access
15551 is load or store. CONST_STORE indicates a store of constant
15552 integer values held in OPERANDS[4,5] and assumes that the pattern
15553 is of length 4 insn, for the purpose of checking dead registers.
15554 COMMUTE indicates that register operands may be reordered. */
15555 bool
15556 gen_operands_ldrd_strd (rtx *operands, bool load,
15557 bool const_store, bool commute)
15558 {
15559 int nops = 2;
15560 HOST_WIDE_INT offsets[2], offset, align[2];
15561 rtx base = NULL_RTX;
15562 rtx cur_base, cur_offset, tmp;
15563 int i, gap;
15564 HARD_REG_SET regset;
15565
15566 gcc_assert (!const_store || !load);
15567 /* Check that the memory references are immediate offsets from the
15568 same base register. Extract the base register, the destination
15569 registers, and the corresponding memory offsets. */
15570 for (i = 0; i < nops; i++)
15571 {
15572 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15573 &align[i]))
15574 return false;
15575
15576 if (i == 0)
15577 base = cur_base;
15578 else if (REGNO (base) != REGNO (cur_base))
15579 return false;
15580
15581 offsets[i] = INTVAL (cur_offset);
15582 if (GET_CODE (operands[i]) == SUBREG)
15583 {
15584 tmp = SUBREG_REG (operands[i]);
15585 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15586 operands[i] = tmp;
15587 }
15588 }
15589
15590 /* Make sure there is no dependency between the individual loads. */
15591 if (load && REGNO (operands[0]) == REGNO (base))
15592 return false; /* RAW */
15593
15594 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15595 return false; /* WAW */
15596
15597 /* If the same input register is used in both stores
15598 when storing different constants, try to find a free register.
15599 For example, the code
15600 mov r0, 0
15601 str r0, [r2]
15602 mov r0, 1
15603 str r0, [r2, #4]
15604 can be transformed into
15605 mov r1, 0
15606 mov r0, 1
15607 strd r1, r0, [r2]
15608 in Thumb mode assuming that r1 is free.
15609 For ARM mode do the same but only if the starting register
15610 can be made to be even. */
15611 if (const_store
15612 && REGNO (operands[0]) == REGNO (operands[1])
15613 && INTVAL (operands[4]) != INTVAL (operands[5]))
15614 {
15615 if (TARGET_THUMB2)
15616 {
15617 CLEAR_HARD_REG_SET (regset);
15618 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15619 if (tmp == NULL_RTX)
15620 return false;
15621
15622 /* Use the new register in the first load to ensure that
15623 if the original input register is not dead after peephole,
15624 then it will have the correct constant value. */
15625 operands[0] = tmp;
15626 }
15627 else if (TARGET_ARM)
15628 {
15629 int regno = REGNO (operands[0]);
15630 if (!peep2_reg_dead_p (4, operands[0]))
15631 {
15632 /* When the input register is even and is not dead after the
15633 pattern, it has to hold the second constant but we cannot
15634 form a legal STRD in ARM mode with this register as the second
15635 register. */
15636 if (regno % 2 == 0)
15637 return false;
15638
15639 /* Is regno-1 free? */
15640 SET_HARD_REG_SET (regset);
15641 CLEAR_HARD_REG_BIT(regset, regno - 1);
15642 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15643 if (tmp == NULL_RTX)
15644 return false;
15645
15646 operands[0] = tmp;
15647 }
15648 else
15649 {
15650 /* Find a DImode register. */
15651 CLEAR_HARD_REG_SET (regset);
15652 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15653 if (tmp != NULL_RTX)
15654 {
15655 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15656 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15657 }
15658 else
15659 {
15660 /* Can we use the input register to form a DI register? */
15661 SET_HARD_REG_SET (regset);
15662 CLEAR_HARD_REG_BIT(regset,
15663 regno % 2 == 0 ? regno + 1 : regno - 1);
15664 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15665 if (tmp == NULL_RTX)
15666 return false;
15667 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15668 }
15669 }
15670
15671 gcc_assert (operands[0] != NULL_RTX);
15672 gcc_assert (operands[1] != NULL_RTX);
15673 gcc_assert (REGNO (operands[0]) % 2 == 0);
15674 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15675 }
15676 }
15677
15678 /* Make sure the instructions are ordered with lower memory access first. */
15679 if (offsets[0] > offsets[1])
15680 {
15681 gap = offsets[0] - offsets[1];
15682 offset = offsets[1];
15683
15684 /* Swap the instructions such that lower memory is accessed first. */
15685 std::swap (operands[0], operands[1]);
15686 std::swap (operands[2], operands[3]);
15687 std::swap (align[0], align[1]);
15688 if (const_store)
15689 std::swap (operands[4], operands[5]);
15690 }
15691 else
15692 {
15693 gap = offsets[1] - offsets[0];
15694 offset = offsets[0];
15695 }
15696
15697 /* Make sure accesses are to consecutive memory locations. */
15698 if (gap != GET_MODE_SIZE (SImode))
15699 return false;
15700
15701 if (!align_ok_ldrd_strd (align[0], offset))
15702 return false;
15703
15704 /* Make sure we generate legal instructions. */
15705 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15706 false, load))
15707 return true;
15708
15709 /* In Thumb state, where registers are almost unconstrained, there
15710 is little hope to fix it. */
15711 if (TARGET_THUMB2)
15712 return false;
15713
15714 if (load && commute)
15715 {
15716 /* Try reordering registers. */
15717 std::swap (operands[0], operands[1]);
15718 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15719 false, load))
15720 return true;
15721 }
15722
15723 if (const_store)
15724 {
15725 /* If input registers are dead after this pattern, they can be
15726 reordered or replaced by other registers that are free in the
15727 current pattern. */
15728 if (!peep2_reg_dead_p (4, operands[0])
15729 || !peep2_reg_dead_p (4, operands[1]))
15730 return false;
15731
15732 /* Try to reorder the input registers. */
15733 /* For example, the code
15734 mov r0, 0
15735 mov r1, 1
15736 str r1, [r2]
15737 str r0, [r2, #4]
15738 can be transformed into
15739 mov r1, 0
15740 mov r0, 1
15741 strd r0, [r2]
15742 */
15743 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15744 false, false))
15745 {
15746 std::swap (operands[0], operands[1]);
15747 return true;
15748 }
15749
15750 /* Try to find a free DI register. */
15751 CLEAR_HARD_REG_SET (regset);
15752 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15753 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15754 while (true)
15755 {
15756 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15757 if (tmp == NULL_RTX)
15758 return false;
15759
15760 /* DREG must be an even-numbered register in DImode.
15761 Split it into SI registers. */
15762 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15763 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15764 gcc_assert (operands[0] != NULL_RTX);
15765 gcc_assert (operands[1] != NULL_RTX);
15766 gcc_assert (REGNO (operands[0]) % 2 == 0);
15767 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15768
15769 return (operands_ok_ldrd_strd (operands[0], operands[1],
15770 base, offset,
15771 false, load));
15772 }
15773 }
15774
15775 return false;
15776 }
15777
15778
15779 /* Return true if parallel execution of the two word-size accesses provided
15780 could be satisfied with a single LDRD/STRD instruction. Two word-size
15781 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
15782 register operands and OPERANDS[2,3] are the corresponding memory operands.
15783 */
15784 bool
15785 valid_operands_ldrd_strd (rtx *operands, bool load)
15786 {
15787 int nops = 2;
15788 HOST_WIDE_INT offsets[2], offset, align[2];
15789 rtx base = NULL_RTX;
15790 rtx cur_base, cur_offset;
15791 int i, gap;
15792
15793 /* Check that the memory references are immediate offsets from the
15794 same base register. Extract the base register, the destination
15795 registers, and the corresponding memory offsets. */
15796 for (i = 0; i < nops; i++)
15797 {
15798 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15799 &align[i]))
15800 return false;
15801
15802 if (i == 0)
15803 base = cur_base;
15804 else if (REGNO (base) != REGNO (cur_base))
15805 return false;
15806
15807 offsets[i] = INTVAL (cur_offset);
15808 if (GET_CODE (operands[i]) == SUBREG)
15809 return false;
15810 }
15811
15812 if (offsets[0] > offsets[1])
15813 return false;
15814
15815 gap = offsets[1] - offsets[0];
15816 offset = offsets[0];
15817
15818 /* Make sure accesses are to consecutive memory locations. */
15819 if (gap != GET_MODE_SIZE (SImode))
15820 return false;
15821
15822 if (!align_ok_ldrd_strd (align[0], offset))
15823 return false;
15824
15825 return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15826 false, load);
15827 }
15828
15829 \f
15830 /* Print a symbolic form of X to the debug file, F. */
15831 static void
15832 arm_print_value (FILE *f, rtx x)
15833 {
15834 switch (GET_CODE (x))
15835 {
15836 case CONST_INT:
15837 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15838 return;
15839
15840 case CONST_DOUBLE:
15841 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15842 return;
15843
15844 case CONST_VECTOR:
15845 {
15846 int i;
15847
15848 fprintf (f, "<");
15849 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15850 {
15851 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15852 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15853 fputc (',', f);
15854 }
15855 fprintf (f, ">");
15856 }
15857 return;
15858
15859 case CONST_STRING:
15860 fprintf (f, "\"%s\"", XSTR (x, 0));
15861 return;
15862
15863 case SYMBOL_REF:
15864 fprintf (f, "`%s'", XSTR (x, 0));
15865 return;
15866
15867 case LABEL_REF:
15868 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15869 return;
15870
15871 case CONST:
15872 arm_print_value (f, XEXP (x, 0));
15873 return;
15874
15875 case PLUS:
15876 arm_print_value (f, XEXP (x, 0));
15877 fprintf (f, "+");
15878 arm_print_value (f, XEXP (x, 1));
15879 return;
15880
15881 case PC:
15882 fprintf (f, "pc");
15883 return;
15884
15885 default:
15886 fprintf (f, "????");
15887 return;
15888 }
15889 }
15890 \f
15891 /* Routines for manipulation of the constant pool. */
15892
15893 /* Arm instructions cannot load a large constant directly into a
15894 register; they have to come from a pc relative load. The constant
15895 must therefore be placed in the addressable range of the pc
15896 relative load. Depending on the precise pc relative load
15897 instruction the range is somewhere between 256 bytes and 4k. This
15898 means that we often have to dump a constant inside a function, and
15899 generate code to branch around it.
15900
15901 It is important to minimize this, since the branches will slow
15902 things down and make the code larger.
15903
15904 Normally we can hide the table after an existing unconditional
15905 branch so that there is no interruption of the flow, but in the
15906 worst case the code looks like this:
15907
15908 ldr rn, L1
15909 ...
15910 b L2
15911 align
15912 L1: .long value
15913 L2:
15914 ...
15915
15916 ldr rn, L3
15917 ...
15918 b L4
15919 align
15920 L3: .long value
15921 L4:
15922 ...
15923
15924 We fix this by performing a scan after scheduling, which notices
15925 which instructions need to have their operands fetched from the
15926 constant table and builds the table.
15927
15928 The algorithm starts by building a table of all the constants that
15929 need fixing up and all the natural barriers in the function (places
15930 where a constant table can be dropped without breaking the flow).
15931 For each fixup we note how far the pc-relative replacement will be
15932 able to reach and the offset of the instruction into the function.
15933
15934 Having built the table we then group the fixes together to form
15935 tables that are as large as possible (subject to addressing
15936 constraints) and emit each table of constants after the last
15937 barrier that is within range of all the instructions in the group.
15938 If a group does not contain a barrier, then we forcibly create one
15939 by inserting a jump instruction into the flow. Once the table has
15940 been inserted, the insns are then modified to reference the
15941 relevant entry in the pool.
15942
15943 Possible enhancements to the algorithm (not implemented) are:
15944
15945 1) For some processors and object formats, there may be benefit in
15946 aligning the pools to the start of cache lines; this alignment
15947 would need to be taken into account when calculating addressability
15948 of a pool. */
15949
15950 /* These typedefs are located at the start of this file, so that
15951 they can be used in the prototypes there. This comment is to
15952 remind readers of that fact so that the following structures
15953 can be understood more easily.
15954
15955 typedef struct minipool_node Mnode;
15956 typedef struct minipool_fixup Mfix; */
15957
15958 struct minipool_node
15959 {
15960 /* Doubly linked chain of entries. */
15961 Mnode * next;
15962 Mnode * prev;
15963 /* The maximum offset into the code that this entry can be placed. While
15964 pushing fixes for forward references, all entries are sorted in order
15965 of increasing max_address. */
15966 HOST_WIDE_INT max_address;
15967 /* Similarly for an entry inserted for a backwards ref. */
15968 HOST_WIDE_INT min_address;
15969 /* The number of fixes referencing this entry. This can become zero
15970 if we "unpush" an entry. In this case we ignore the entry when we
15971 come to emit the code. */
15972 int refcount;
15973 /* The offset from the start of the minipool. */
15974 HOST_WIDE_INT offset;
15975 /* The value in table. */
15976 rtx value;
15977 /* The mode of value. */
15978 machine_mode mode;
15979 /* The size of the value. With iWMMXt enabled
15980 sizes > 4 also imply an alignment of 8-bytes. */
15981 int fix_size;
15982 };
15983
15984 struct minipool_fixup
15985 {
15986 Mfix * next;
15987 rtx_insn * insn;
15988 HOST_WIDE_INT address;
15989 rtx * loc;
15990 machine_mode mode;
15991 int fix_size;
15992 rtx value;
15993 Mnode * minipool;
15994 HOST_WIDE_INT forwards;
15995 HOST_WIDE_INT backwards;
15996 };
15997
15998 /* Fixes less than a word need padding out to a word boundary. */
15999 #define MINIPOOL_FIX_SIZE(mode) \
16000 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16001
16002 static Mnode * minipool_vector_head;
16003 static Mnode * minipool_vector_tail;
16004 static rtx_code_label *minipool_vector_label;
16005 static int minipool_pad;
16006
16007 /* The linked list of all minipool fixes required for this function. */
16008 Mfix * minipool_fix_head;
16009 Mfix * minipool_fix_tail;
16010 /* The fix entry for the current minipool, once it has been placed. */
16011 Mfix * minipool_barrier;
16012
16013 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16014 #define JUMP_TABLES_IN_TEXT_SECTION 0
16015 #endif
16016
16017 static HOST_WIDE_INT
16018 get_jump_table_size (rtx_jump_table_data *insn)
16019 {
16020 /* ADDR_VECs only take room if read-only data does into the text
16021 section. */
16022 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16023 {
16024 rtx body = PATTERN (insn);
16025 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16026 HOST_WIDE_INT size;
16027 HOST_WIDE_INT modesize;
16028
16029 modesize = GET_MODE_SIZE (GET_MODE (body));
16030 size = modesize * XVECLEN (body, elt);
16031 switch (modesize)
16032 {
16033 case 1:
16034 /* Round up size of TBB table to a halfword boundary. */
16035 size = (size + 1) & ~HOST_WIDE_INT_1;
16036 break;
16037 case 2:
16038 /* No padding necessary for TBH. */
16039 break;
16040 case 4:
16041 /* Add two bytes for alignment on Thumb. */
16042 if (TARGET_THUMB)
16043 size += 2;
16044 break;
16045 default:
16046 gcc_unreachable ();
16047 }
16048 return size;
16049 }
16050
16051 return 0;
16052 }
16053
16054 /* Return the maximum amount of padding that will be inserted before
16055 label LABEL. */
16056
16057 static HOST_WIDE_INT
16058 get_label_padding (rtx label)
16059 {
16060 HOST_WIDE_INT align, min_insn_size;
16061
16062 align = 1 << label_to_alignment (label).levels[0].log;
16063 min_insn_size = TARGET_THUMB ? 2 : 4;
16064 return align > min_insn_size ? align - min_insn_size : 0;
16065 }
16066
16067 /* Move a minipool fix MP from its current location to before MAX_MP.
16068 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16069 constraints may need updating. */
16070 static Mnode *
16071 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16072 HOST_WIDE_INT max_address)
16073 {
16074 /* The code below assumes these are different. */
16075 gcc_assert (mp != max_mp);
16076
16077 if (max_mp == NULL)
16078 {
16079 if (max_address < mp->max_address)
16080 mp->max_address = max_address;
16081 }
16082 else
16083 {
16084 if (max_address > max_mp->max_address - mp->fix_size)
16085 mp->max_address = max_mp->max_address - mp->fix_size;
16086 else
16087 mp->max_address = max_address;
16088
16089 /* Unlink MP from its current position. Since max_mp is non-null,
16090 mp->prev must be non-null. */
16091 mp->prev->next = mp->next;
16092 if (mp->next != NULL)
16093 mp->next->prev = mp->prev;
16094 else
16095 minipool_vector_tail = mp->prev;
16096
16097 /* Re-insert it before MAX_MP. */
16098 mp->next = max_mp;
16099 mp->prev = max_mp->prev;
16100 max_mp->prev = mp;
16101
16102 if (mp->prev != NULL)
16103 mp->prev->next = mp;
16104 else
16105 minipool_vector_head = mp;
16106 }
16107
16108 /* Save the new entry. */
16109 max_mp = mp;
16110
16111 /* Scan over the preceding entries and adjust their addresses as
16112 required. */
16113 while (mp->prev != NULL
16114 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16115 {
16116 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16117 mp = mp->prev;
16118 }
16119
16120 return max_mp;
16121 }
16122
16123 /* Add a constant to the minipool for a forward reference. Returns the
16124 node added or NULL if the constant will not fit in this pool. */
16125 static Mnode *
16126 add_minipool_forward_ref (Mfix *fix)
16127 {
16128 /* If set, max_mp is the first pool_entry that has a lower
16129 constraint than the one we are trying to add. */
16130 Mnode * max_mp = NULL;
16131 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16132 Mnode * mp;
16133
16134 /* If the minipool starts before the end of FIX->INSN then this FIX
16135 cannot be placed into the current pool. Furthermore, adding the
16136 new constant pool entry may cause the pool to start FIX_SIZE bytes
16137 earlier. */
16138 if (minipool_vector_head &&
16139 (fix->address + get_attr_length (fix->insn)
16140 >= minipool_vector_head->max_address - fix->fix_size))
16141 return NULL;
16142
16143 /* Scan the pool to see if a constant with the same value has
16144 already been added. While we are doing this, also note the
16145 location where we must insert the constant if it doesn't already
16146 exist. */
16147 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16148 {
16149 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16150 && fix->mode == mp->mode
16151 && (!LABEL_P (fix->value)
16152 || (CODE_LABEL_NUMBER (fix->value)
16153 == CODE_LABEL_NUMBER (mp->value)))
16154 && rtx_equal_p (fix->value, mp->value))
16155 {
16156 /* More than one fix references this entry. */
16157 mp->refcount++;
16158 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16159 }
16160
16161 /* Note the insertion point if necessary. */
16162 if (max_mp == NULL
16163 && mp->max_address > max_address)
16164 max_mp = mp;
16165
16166 /* If we are inserting an 8-bytes aligned quantity and
16167 we have not already found an insertion point, then
16168 make sure that all such 8-byte aligned quantities are
16169 placed at the start of the pool. */
16170 if (ARM_DOUBLEWORD_ALIGN
16171 && max_mp == NULL
16172 && fix->fix_size >= 8
16173 && mp->fix_size < 8)
16174 {
16175 max_mp = mp;
16176 max_address = mp->max_address;
16177 }
16178 }
16179
16180 /* The value is not currently in the minipool, so we need to create
16181 a new entry for it. If MAX_MP is NULL, the entry will be put on
16182 the end of the list since the placement is less constrained than
16183 any existing entry. Otherwise, we insert the new fix before
16184 MAX_MP and, if necessary, adjust the constraints on the other
16185 entries. */
16186 mp = XNEW (Mnode);
16187 mp->fix_size = fix->fix_size;
16188 mp->mode = fix->mode;
16189 mp->value = fix->value;
16190 mp->refcount = 1;
16191 /* Not yet required for a backwards ref. */
16192 mp->min_address = -65536;
16193
16194 if (max_mp == NULL)
16195 {
16196 mp->max_address = max_address;
16197 mp->next = NULL;
16198 mp->prev = minipool_vector_tail;
16199
16200 if (mp->prev == NULL)
16201 {
16202 minipool_vector_head = mp;
16203 minipool_vector_label = gen_label_rtx ();
16204 }
16205 else
16206 mp->prev->next = mp;
16207
16208 minipool_vector_tail = mp;
16209 }
16210 else
16211 {
16212 if (max_address > max_mp->max_address - mp->fix_size)
16213 mp->max_address = max_mp->max_address - mp->fix_size;
16214 else
16215 mp->max_address = max_address;
16216
16217 mp->next = max_mp;
16218 mp->prev = max_mp->prev;
16219 max_mp->prev = mp;
16220 if (mp->prev != NULL)
16221 mp->prev->next = mp;
16222 else
16223 minipool_vector_head = mp;
16224 }
16225
16226 /* Save the new entry. */
16227 max_mp = mp;
16228
16229 /* Scan over the preceding entries and adjust their addresses as
16230 required. */
16231 while (mp->prev != NULL
16232 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16233 {
16234 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16235 mp = mp->prev;
16236 }
16237
16238 return max_mp;
16239 }
16240
16241 static Mnode *
16242 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16243 HOST_WIDE_INT min_address)
16244 {
16245 HOST_WIDE_INT offset;
16246
16247 /* The code below assumes these are different. */
16248 gcc_assert (mp != min_mp);
16249
16250 if (min_mp == NULL)
16251 {
16252 if (min_address > mp->min_address)
16253 mp->min_address = min_address;
16254 }
16255 else
16256 {
16257 /* We will adjust this below if it is too loose. */
16258 mp->min_address = min_address;
16259
16260 /* Unlink MP from its current position. Since min_mp is non-null,
16261 mp->next must be non-null. */
16262 mp->next->prev = mp->prev;
16263 if (mp->prev != NULL)
16264 mp->prev->next = mp->next;
16265 else
16266 minipool_vector_head = mp->next;
16267
16268 /* Reinsert it after MIN_MP. */
16269 mp->prev = min_mp;
16270 mp->next = min_mp->next;
16271 min_mp->next = mp;
16272 if (mp->next != NULL)
16273 mp->next->prev = mp;
16274 else
16275 minipool_vector_tail = mp;
16276 }
16277
16278 min_mp = mp;
16279
16280 offset = 0;
16281 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16282 {
16283 mp->offset = offset;
16284 if (mp->refcount > 0)
16285 offset += mp->fix_size;
16286
16287 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16288 mp->next->min_address = mp->min_address + mp->fix_size;
16289 }
16290
16291 return min_mp;
16292 }
16293
16294 /* Add a constant to the minipool for a backward reference. Returns the
16295 node added or NULL if the constant will not fit in this pool.
16296
16297 Note that the code for insertion for a backwards reference can be
16298 somewhat confusing because the calculated offsets for each fix do
16299 not take into account the size of the pool (which is still under
16300 construction. */
16301 static Mnode *
16302 add_minipool_backward_ref (Mfix *fix)
16303 {
16304 /* If set, min_mp is the last pool_entry that has a lower constraint
16305 than the one we are trying to add. */
16306 Mnode *min_mp = NULL;
16307 /* This can be negative, since it is only a constraint. */
16308 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16309 Mnode *mp;
16310
16311 /* If we can't reach the current pool from this insn, or if we can't
16312 insert this entry at the end of the pool without pushing other
16313 fixes out of range, then we don't try. This ensures that we
16314 can't fail later on. */
16315 if (min_address >= minipool_barrier->address
16316 || (minipool_vector_tail->min_address + fix->fix_size
16317 >= minipool_barrier->address))
16318 return NULL;
16319
16320 /* Scan the pool to see if a constant with the same value has
16321 already been added. While we are doing this, also note the
16322 location where we must insert the constant if it doesn't already
16323 exist. */
16324 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16325 {
16326 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16327 && fix->mode == mp->mode
16328 && (!LABEL_P (fix->value)
16329 || (CODE_LABEL_NUMBER (fix->value)
16330 == CODE_LABEL_NUMBER (mp->value)))
16331 && rtx_equal_p (fix->value, mp->value)
16332 /* Check that there is enough slack to move this entry to the
16333 end of the table (this is conservative). */
16334 && (mp->max_address
16335 > (minipool_barrier->address
16336 + minipool_vector_tail->offset
16337 + minipool_vector_tail->fix_size)))
16338 {
16339 mp->refcount++;
16340 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16341 }
16342
16343 if (min_mp != NULL)
16344 mp->min_address += fix->fix_size;
16345 else
16346 {
16347 /* Note the insertion point if necessary. */
16348 if (mp->min_address < min_address)
16349 {
16350 /* For now, we do not allow the insertion of 8-byte alignment
16351 requiring nodes anywhere but at the start of the pool. */
16352 if (ARM_DOUBLEWORD_ALIGN
16353 && fix->fix_size >= 8 && mp->fix_size < 8)
16354 return NULL;
16355 else
16356 min_mp = mp;
16357 }
16358 else if (mp->max_address
16359 < minipool_barrier->address + mp->offset + fix->fix_size)
16360 {
16361 /* Inserting before this entry would push the fix beyond
16362 its maximum address (which can happen if we have
16363 re-located a forwards fix); force the new fix to come
16364 after it. */
16365 if (ARM_DOUBLEWORD_ALIGN
16366 && fix->fix_size >= 8 && mp->fix_size < 8)
16367 return NULL;
16368 else
16369 {
16370 min_mp = mp;
16371 min_address = mp->min_address + fix->fix_size;
16372 }
16373 }
16374 /* Do not insert a non-8-byte aligned quantity before 8-byte
16375 aligned quantities. */
16376 else if (ARM_DOUBLEWORD_ALIGN
16377 && fix->fix_size < 8
16378 && mp->fix_size >= 8)
16379 {
16380 min_mp = mp;
16381 min_address = mp->min_address + fix->fix_size;
16382 }
16383 }
16384 }
16385
16386 /* We need to create a new entry. */
16387 mp = XNEW (Mnode);
16388 mp->fix_size = fix->fix_size;
16389 mp->mode = fix->mode;
16390 mp->value = fix->value;
16391 mp->refcount = 1;
16392 mp->max_address = minipool_barrier->address + 65536;
16393
16394 mp->min_address = min_address;
16395
16396 if (min_mp == NULL)
16397 {
16398 mp->prev = NULL;
16399 mp->next = minipool_vector_head;
16400
16401 if (mp->next == NULL)
16402 {
16403 minipool_vector_tail = mp;
16404 minipool_vector_label = gen_label_rtx ();
16405 }
16406 else
16407 mp->next->prev = mp;
16408
16409 minipool_vector_head = mp;
16410 }
16411 else
16412 {
16413 mp->next = min_mp->next;
16414 mp->prev = min_mp;
16415 min_mp->next = mp;
16416
16417 if (mp->next != NULL)
16418 mp->next->prev = mp;
16419 else
16420 minipool_vector_tail = mp;
16421 }
16422
16423 /* Save the new entry. */
16424 min_mp = mp;
16425
16426 if (mp->prev)
16427 mp = mp->prev;
16428 else
16429 mp->offset = 0;
16430
16431 /* Scan over the following entries and adjust their offsets. */
16432 while (mp->next != NULL)
16433 {
16434 if (mp->next->min_address < mp->min_address + mp->fix_size)
16435 mp->next->min_address = mp->min_address + mp->fix_size;
16436
16437 if (mp->refcount)
16438 mp->next->offset = mp->offset + mp->fix_size;
16439 else
16440 mp->next->offset = mp->offset;
16441
16442 mp = mp->next;
16443 }
16444
16445 return min_mp;
16446 }
16447
16448 static void
16449 assign_minipool_offsets (Mfix *barrier)
16450 {
16451 HOST_WIDE_INT offset = 0;
16452 Mnode *mp;
16453
16454 minipool_barrier = barrier;
16455
16456 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16457 {
16458 mp->offset = offset;
16459
16460 if (mp->refcount > 0)
16461 offset += mp->fix_size;
16462 }
16463 }
16464
16465 /* Output the literal table */
16466 static void
16467 dump_minipool (rtx_insn *scan)
16468 {
16469 Mnode * mp;
16470 Mnode * nmp;
16471 int align64 = 0;
16472
16473 if (ARM_DOUBLEWORD_ALIGN)
16474 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16475 if (mp->refcount > 0 && mp->fix_size >= 8)
16476 {
16477 align64 = 1;
16478 break;
16479 }
16480
16481 if (dump_file)
16482 fprintf (dump_file,
16483 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16484 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16485
16486 scan = emit_label_after (gen_label_rtx (), scan);
16487 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16488 scan = emit_label_after (minipool_vector_label, scan);
16489
16490 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16491 {
16492 if (mp->refcount > 0)
16493 {
16494 if (dump_file)
16495 {
16496 fprintf (dump_file,
16497 ";; Offset %u, min %ld, max %ld ",
16498 (unsigned) mp->offset, (unsigned long) mp->min_address,
16499 (unsigned long) mp->max_address);
16500 arm_print_value (dump_file, mp->value);
16501 fputc ('\n', dump_file);
16502 }
16503
16504 rtx val = copy_rtx (mp->value);
16505
16506 switch (GET_MODE_SIZE (mp->mode))
16507 {
16508 #ifdef HAVE_consttable_1
16509 case 1:
16510 scan = emit_insn_after (gen_consttable_1 (val), scan);
16511 break;
16512
16513 #endif
16514 #ifdef HAVE_consttable_2
16515 case 2:
16516 scan = emit_insn_after (gen_consttable_2 (val), scan);
16517 break;
16518
16519 #endif
16520 #ifdef HAVE_consttable_4
16521 case 4:
16522 scan = emit_insn_after (gen_consttable_4 (val), scan);
16523 break;
16524
16525 #endif
16526 #ifdef HAVE_consttable_8
16527 case 8:
16528 scan = emit_insn_after (gen_consttable_8 (val), scan);
16529 break;
16530
16531 #endif
16532 #ifdef HAVE_consttable_16
16533 case 16:
16534 scan = emit_insn_after (gen_consttable_16 (val), scan);
16535 break;
16536
16537 #endif
16538 default:
16539 gcc_unreachable ();
16540 }
16541 }
16542
16543 nmp = mp->next;
16544 free (mp);
16545 }
16546
16547 minipool_vector_head = minipool_vector_tail = NULL;
16548 scan = emit_insn_after (gen_consttable_end (), scan);
16549 scan = emit_barrier_after (scan);
16550 }
16551
16552 /* Return the cost of forcibly inserting a barrier after INSN. */
16553 static int
16554 arm_barrier_cost (rtx_insn *insn)
16555 {
16556 /* Basing the location of the pool on the loop depth is preferable,
16557 but at the moment, the basic block information seems to be
16558 corrupt by this stage of the compilation. */
16559 int base_cost = 50;
16560 rtx_insn *next = next_nonnote_insn (insn);
16561
16562 if (next != NULL && LABEL_P (next))
16563 base_cost -= 20;
16564
16565 switch (GET_CODE (insn))
16566 {
16567 case CODE_LABEL:
16568 /* It will always be better to place the table before the label, rather
16569 than after it. */
16570 return 50;
16571
16572 case INSN:
16573 case CALL_INSN:
16574 return base_cost;
16575
16576 case JUMP_INSN:
16577 return base_cost - 10;
16578
16579 default:
16580 return base_cost + 10;
16581 }
16582 }
16583
16584 /* Find the best place in the insn stream in the range
16585 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16586 Create the barrier by inserting a jump and add a new fix entry for
16587 it. */
16588 static Mfix *
16589 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16590 {
16591 HOST_WIDE_INT count = 0;
16592 rtx_barrier *barrier;
16593 rtx_insn *from = fix->insn;
16594 /* The instruction after which we will insert the jump. */
16595 rtx_insn *selected = NULL;
16596 int selected_cost;
16597 /* The address at which the jump instruction will be placed. */
16598 HOST_WIDE_INT selected_address;
16599 Mfix * new_fix;
16600 HOST_WIDE_INT max_count = max_address - fix->address;
16601 rtx_code_label *label = gen_label_rtx ();
16602
16603 selected_cost = arm_barrier_cost (from);
16604 selected_address = fix->address;
16605
16606 while (from && count < max_count)
16607 {
16608 rtx_jump_table_data *tmp;
16609 int new_cost;
16610
16611 /* This code shouldn't have been called if there was a natural barrier
16612 within range. */
16613 gcc_assert (!BARRIER_P (from));
16614
16615 /* Count the length of this insn. This must stay in sync with the
16616 code that pushes minipool fixes. */
16617 if (LABEL_P (from))
16618 count += get_label_padding (from);
16619 else
16620 count += get_attr_length (from);
16621
16622 /* If there is a jump table, add its length. */
16623 if (tablejump_p (from, NULL, &tmp))
16624 {
16625 count += get_jump_table_size (tmp);
16626
16627 /* Jump tables aren't in a basic block, so base the cost on
16628 the dispatch insn. If we select this location, we will
16629 still put the pool after the table. */
16630 new_cost = arm_barrier_cost (from);
16631
16632 if (count < max_count
16633 && (!selected || new_cost <= selected_cost))
16634 {
16635 selected = tmp;
16636 selected_cost = new_cost;
16637 selected_address = fix->address + count;
16638 }
16639
16640 /* Continue after the dispatch table. */
16641 from = NEXT_INSN (tmp);
16642 continue;
16643 }
16644
16645 new_cost = arm_barrier_cost (from);
16646
16647 if (count < max_count
16648 && (!selected || new_cost <= selected_cost))
16649 {
16650 selected = from;
16651 selected_cost = new_cost;
16652 selected_address = fix->address + count;
16653 }
16654
16655 from = NEXT_INSN (from);
16656 }
16657
16658 /* Make sure that we found a place to insert the jump. */
16659 gcc_assert (selected);
16660
16661 /* Create a new JUMP_INSN that branches around a barrier. */
16662 from = emit_jump_insn_after (gen_jump (label), selected);
16663 JUMP_LABEL (from) = label;
16664 barrier = emit_barrier_after (from);
16665 emit_label_after (label, barrier);
16666
16667 /* Create a minipool barrier entry for the new barrier. */
16668 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16669 new_fix->insn = barrier;
16670 new_fix->address = selected_address;
16671 new_fix->next = fix->next;
16672 fix->next = new_fix;
16673
16674 return new_fix;
16675 }
16676
16677 /* Record that there is a natural barrier in the insn stream at
16678 ADDRESS. */
16679 static void
16680 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16681 {
16682 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16683
16684 fix->insn = insn;
16685 fix->address = address;
16686
16687 fix->next = NULL;
16688 if (minipool_fix_head != NULL)
16689 minipool_fix_tail->next = fix;
16690 else
16691 minipool_fix_head = fix;
16692
16693 minipool_fix_tail = fix;
16694 }
16695
16696 /* Record INSN, which will need fixing up to load a value from the
16697 minipool. ADDRESS is the offset of the insn since the start of the
16698 function; LOC is a pointer to the part of the insn which requires
16699 fixing; VALUE is the constant that must be loaded, which is of type
16700 MODE. */
16701 static void
16702 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16703 machine_mode mode, rtx value)
16704 {
16705 gcc_assert (!arm_disable_literal_pool);
16706 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16707
16708 fix->insn = insn;
16709 fix->address = address;
16710 fix->loc = loc;
16711 fix->mode = mode;
16712 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16713 fix->value = value;
16714 fix->forwards = get_attr_pool_range (insn);
16715 fix->backwards = get_attr_neg_pool_range (insn);
16716 fix->minipool = NULL;
16717
16718 /* If an insn doesn't have a range defined for it, then it isn't
16719 expecting to be reworked by this code. Better to stop now than
16720 to generate duff assembly code. */
16721 gcc_assert (fix->forwards || fix->backwards);
16722
16723 /* If an entry requires 8-byte alignment then assume all constant pools
16724 require 4 bytes of padding. Trying to do this later on a per-pool
16725 basis is awkward because existing pool entries have to be modified. */
16726 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16727 minipool_pad = 4;
16728
16729 if (dump_file)
16730 {
16731 fprintf (dump_file,
16732 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16733 GET_MODE_NAME (mode),
16734 INSN_UID (insn), (unsigned long) address,
16735 -1 * (long)fix->backwards, (long)fix->forwards);
16736 arm_print_value (dump_file, fix->value);
16737 fprintf (dump_file, "\n");
16738 }
16739
16740 /* Add it to the chain of fixes. */
16741 fix->next = NULL;
16742
16743 if (minipool_fix_head != NULL)
16744 minipool_fix_tail->next = fix;
16745 else
16746 minipool_fix_head = fix;
16747
16748 minipool_fix_tail = fix;
16749 }
16750
16751 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16752 Returns the number of insns needed, or 99 if we always want to synthesize
16753 the value. */
16754 int
16755 arm_max_const_double_inline_cost ()
16756 {
16757 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16758 }
16759
16760 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16761 Returns the number of insns needed, or 99 if we don't know how to
16762 do it. */
16763 int
16764 arm_const_double_inline_cost (rtx val)
16765 {
16766 rtx lowpart, highpart;
16767 machine_mode mode;
16768
16769 mode = GET_MODE (val);
16770
16771 if (mode == VOIDmode)
16772 mode = DImode;
16773
16774 gcc_assert (GET_MODE_SIZE (mode) == 8);
16775
16776 lowpart = gen_lowpart (SImode, val);
16777 highpart = gen_highpart_mode (SImode, mode, val);
16778
16779 gcc_assert (CONST_INT_P (lowpart));
16780 gcc_assert (CONST_INT_P (highpart));
16781
16782 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16783 NULL_RTX, NULL_RTX, 0, 0)
16784 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16785 NULL_RTX, NULL_RTX, 0, 0));
16786 }
16787
16788 /* Cost of loading a SImode constant. */
16789 static inline int
16790 arm_const_inline_cost (enum rtx_code code, rtx val)
16791 {
16792 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16793 NULL_RTX, NULL_RTX, 1, 0);
16794 }
16795
16796 /* Return true if it is worthwhile to split a 64-bit constant into two
16797 32-bit operations. This is the case if optimizing for size, or
16798 if we have load delay slots, or if one 32-bit part can be done with
16799 a single data operation. */
16800 bool
16801 arm_const_double_by_parts (rtx val)
16802 {
16803 machine_mode mode = GET_MODE (val);
16804 rtx part;
16805
16806 if (optimize_size || arm_ld_sched)
16807 return true;
16808
16809 if (mode == VOIDmode)
16810 mode = DImode;
16811
16812 part = gen_highpart_mode (SImode, mode, val);
16813
16814 gcc_assert (CONST_INT_P (part));
16815
16816 if (const_ok_for_arm (INTVAL (part))
16817 || const_ok_for_arm (~INTVAL (part)))
16818 return true;
16819
16820 part = gen_lowpart (SImode, val);
16821
16822 gcc_assert (CONST_INT_P (part));
16823
16824 if (const_ok_for_arm (INTVAL (part))
16825 || const_ok_for_arm (~INTVAL (part)))
16826 return true;
16827
16828 return false;
16829 }
16830
16831 /* Return true if it is possible to inline both the high and low parts
16832 of a 64-bit constant into 32-bit data processing instructions. */
16833 bool
16834 arm_const_double_by_immediates (rtx val)
16835 {
16836 machine_mode mode = GET_MODE (val);
16837 rtx part;
16838
16839 if (mode == VOIDmode)
16840 mode = DImode;
16841
16842 part = gen_highpart_mode (SImode, mode, val);
16843
16844 gcc_assert (CONST_INT_P (part));
16845
16846 if (!const_ok_for_arm (INTVAL (part)))
16847 return false;
16848
16849 part = gen_lowpart (SImode, val);
16850
16851 gcc_assert (CONST_INT_P (part));
16852
16853 if (!const_ok_for_arm (INTVAL (part)))
16854 return false;
16855
16856 return true;
16857 }
16858
16859 /* Scan INSN and note any of its operands that need fixing.
16860 If DO_PUSHES is false we do not actually push any of the fixups
16861 needed. */
16862 static void
16863 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16864 {
16865 int opno;
16866
16867 extract_constrain_insn (insn);
16868
16869 if (recog_data.n_alternatives == 0)
16870 return;
16871
16872 /* Fill in recog_op_alt with information about the constraints of
16873 this insn. */
16874 preprocess_constraints (insn);
16875
16876 const operand_alternative *op_alt = which_op_alt ();
16877 for (opno = 0; opno < recog_data.n_operands; opno++)
16878 {
16879 /* Things we need to fix can only occur in inputs. */
16880 if (recog_data.operand_type[opno] != OP_IN)
16881 continue;
16882
16883 /* If this alternative is a memory reference, then any mention
16884 of constants in this alternative is really to fool reload
16885 into allowing us to accept one there. We need to fix them up
16886 now so that we output the right code. */
16887 if (op_alt[opno].memory_ok)
16888 {
16889 rtx op = recog_data.operand[opno];
16890
16891 if (CONSTANT_P (op))
16892 {
16893 if (do_pushes)
16894 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16895 recog_data.operand_mode[opno], op);
16896 }
16897 else if (MEM_P (op)
16898 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16899 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16900 {
16901 if (do_pushes)
16902 {
16903 rtx cop = avoid_constant_pool_reference (op);
16904
16905 /* Casting the address of something to a mode narrower
16906 than a word can cause avoid_constant_pool_reference()
16907 to return the pool reference itself. That's no good to
16908 us here. Lets just hope that we can use the
16909 constant pool value directly. */
16910 if (op == cop)
16911 cop = get_pool_constant (XEXP (op, 0));
16912
16913 push_minipool_fix (insn, address,
16914 recog_data.operand_loc[opno],
16915 recog_data.operand_mode[opno], cop);
16916 }
16917
16918 }
16919 }
16920 }
16921
16922 return;
16923 }
16924
16925 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16926 and unions in the context of ARMv8-M Security Extensions. It is used as a
16927 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16928 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16929 or four masks, depending on whether it is being computed for a
16930 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16931 respectively. The tree for the type of the argument or a field within an
16932 argument is passed in ARG_TYPE, the current register this argument or field
16933 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16934 argument or field starts at is passed in STARTING_BIT and the last used bit
16935 is kept in LAST_USED_BIT which is also updated accordingly. */
16936
16937 static unsigned HOST_WIDE_INT
16938 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16939 uint32_t * padding_bits_to_clear,
16940 unsigned starting_bit, int * last_used_bit)
16941
16942 {
16943 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16944
16945 if (TREE_CODE (arg_type) == RECORD_TYPE)
16946 {
16947 unsigned current_bit = starting_bit;
16948 tree field;
16949 long int offset, size;
16950
16951
16952 field = TYPE_FIELDS (arg_type);
16953 while (field)
16954 {
16955 /* The offset within a structure is always an offset from
16956 the start of that structure. Make sure we take that into the
16957 calculation of the register based offset that we use here. */
16958 offset = starting_bit;
16959 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16960 offset %= 32;
16961
16962 /* This is the actual size of the field, for bitfields this is the
16963 bitfield width and not the container size. */
16964 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16965
16966 if (*last_used_bit != offset)
16967 {
16968 if (offset < *last_used_bit)
16969 {
16970 /* This field's offset is before the 'last_used_bit', that
16971 means this field goes on the next register. So we need to
16972 pad the rest of the current register and increase the
16973 register number. */
16974 uint32_t mask;
16975 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16976 mask++;
16977
16978 padding_bits_to_clear[*regno] |= mask;
16979 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16980 (*regno)++;
16981 }
16982 else
16983 {
16984 /* Otherwise we pad the bits between the last field's end and
16985 the start of the new field. */
16986 uint32_t mask;
16987
16988 mask = ((uint32_t)-1) >> (32 - offset);
16989 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16990 padding_bits_to_clear[*regno] |= mask;
16991 }
16992 current_bit = offset;
16993 }
16994
16995 /* Calculate further padding bits for inner structs/unions too. */
16996 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16997 {
16998 *last_used_bit = current_bit;
16999 not_to_clear_reg_mask
17000 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
17001 padding_bits_to_clear, offset,
17002 last_used_bit);
17003 }
17004 else
17005 {
17006 /* Update 'current_bit' with this field's size. If the
17007 'current_bit' lies in a subsequent register, update 'regno' and
17008 reset 'current_bit' to point to the current bit in that new
17009 register. */
17010 current_bit += size;
17011 while (current_bit >= 32)
17012 {
17013 current_bit-=32;
17014 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17015 (*regno)++;
17016 }
17017 *last_used_bit = current_bit;
17018 }
17019
17020 field = TREE_CHAIN (field);
17021 }
17022 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17023 }
17024 else if (TREE_CODE (arg_type) == UNION_TYPE)
17025 {
17026 tree field, field_t;
17027 int i, regno_t, field_size;
17028 int max_reg = -1;
17029 int max_bit = -1;
17030 uint32_t mask;
17031 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
17032 = {-1, -1, -1, -1};
17033
17034 /* To compute the padding bits in a union we only consider bits as
17035 padding bits if they are always either a padding bit or fall outside a
17036 fields size for all fields in the union. */
17037 field = TYPE_FIELDS (arg_type);
17038 while (field)
17039 {
17040 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
17041 = {0U, 0U, 0U, 0U};
17042 int last_used_bit_t = *last_used_bit;
17043 regno_t = *regno;
17044 field_t = TREE_TYPE (field);
17045
17046 /* If the field's type is either a record or a union make sure to
17047 compute their padding bits too. */
17048 if (RECORD_OR_UNION_TYPE_P (field_t))
17049 not_to_clear_reg_mask
17050 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
17051 &padding_bits_to_clear_t[0],
17052 starting_bit, &last_used_bit_t);
17053 else
17054 {
17055 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
17056 regno_t = (field_size / 32) + *regno;
17057 last_used_bit_t = (starting_bit + field_size) % 32;
17058 }
17059
17060 for (i = *regno; i < regno_t; i++)
17061 {
17062 /* For all but the last register used by this field only keep the
17063 padding bits that were padding bits in this field. */
17064 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
17065 }
17066
17067 /* For the last register, keep all padding bits that were padding
17068 bits in this field and any padding bits that are still valid
17069 as padding bits but fall outside of this field's size. */
17070 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
17071 padding_bits_to_clear_res[regno_t]
17072 &= padding_bits_to_clear_t[regno_t] | mask;
17073
17074 /* Update the maximum size of the fields in terms of registers used
17075 ('max_reg') and the 'last_used_bit' in said register. */
17076 if (max_reg < regno_t)
17077 {
17078 max_reg = regno_t;
17079 max_bit = last_used_bit_t;
17080 }
17081 else if (max_reg == regno_t && max_bit < last_used_bit_t)
17082 max_bit = last_used_bit_t;
17083
17084 field = TREE_CHAIN (field);
17085 }
17086
17087 /* Update the current padding_bits_to_clear using the intersection of the
17088 padding bits of all the fields. */
17089 for (i=*regno; i < max_reg; i++)
17090 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
17091
17092 /* Do not keep trailing padding bits, we do not know yet whether this
17093 is the end of the argument. */
17094 mask = ((uint32_t) 1 << max_bit) - 1;
17095 padding_bits_to_clear[max_reg]
17096 |= padding_bits_to_clear_res[max_reg] & mask;
17097
17098 *regno = max_reg;
17099 *last_used_bit = max_bit;
17100 }
17101 else
17102 /* This function should only be used for structs and unions. */
17103 gcc_unreachable ();
17104
17105 return not_to_clear_reg_mask;
17106 }
17107
17108 /* In the context of ARMv8-M Security Extensions, this function is used for both
17109 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17110 registers are used when returning or passing arguments, which is then
17111 returned as a mask. It will also compute a mask to indicate padding/unused
17112 bits for each of these registers, and passes this through the
17113 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
17114 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17115 the starting register used to pass this argument or return value is passed
17116 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17117 for struct and union types. */
17118
17119 static unsigned HOST_WIDE_INT
17120 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17121 uint32_t * padding_bits_to_clear)
17122
17123 {
17124 int last_used_bit = 0;
17125 unsigned HOST_WIDE_INT not_to_clear_mask;
17126
17127 if (RECORD_OR_UNION_TYPE_P (arg_type))
17128 {
17129 not_to_clear_mask
17130 = comp_not_to_clear_mask_str_un (arg_type, &regno,
17131 padding_bits_to_clear, 0,
17132 &last_used_bit);
17133
17134
17135 /* If the 'last_used_bit' is not zero, that means we are still using a
17136 part of the last 'regno'. In such cases we must clear the trailing
17137 bits. Otherwise we are not using regno and we should mark it as to
17138 clear. */
17139 if (last_used_bit != 0)
17140 padding_bits_to_clear[regno]
17141 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17142 else
17143 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17144 }
17145 else
17146 {
17147 not_to_clear_mask = 0;
17148 /* We are not dealing with structs nor unions. So these arguments may be
17149 passed in floating point registers too. In some cases a BLKmode is
17150 used when returning or passing arguments in multiple VFP registers. */
17151 if (GET_MODE (arg_rtx) == BLKmode)
17152 {
17153 int i, arg_regs;
17154 rtx reg;
17155
17156 /* This should really only occur when dealing with the hard-float
17157 ABI. */
17158 gcc_assert (TARGET_HARD_FLOAT_ABI);
17159
17160 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17161 {
17162 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17163 gcc_assert (REG_P (reg));
17164
17165 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17166
17167 /* If we are dealing with DF mode, make sure we don't
17168 clear either of the registers it addresses. */
17169 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17170 if (arg_regs > 1)
17171 {
17172 unsigned HOST_WIDE_INT mask;
17173 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17174 mask -= HOST_WIDE_INT_1U << REGNO (reg);
17175 not_to_clear_mask |= mask;
17176 }
17177 }
17178 }
17179 else
17180 {
17181 /* Otherwise we can rely on the MODE to determine how many registers
17182 are being used by this argument. */
17183 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17184 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17185 if (arg_regs > 1)
17186 {
17187 unsigned HOST_WIDE_INT
17188 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17189 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17190 not_to_clear_mask |= mask;
17191 }
17192 }
17193 }
17194
17195 return not_to_clear_mask;
17196 }
17197
17198 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17199 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
17200 are to be fully cleared, using the value in register CLEARING_REG if more
17201 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17202 the bits that needs to be cleared in caller-saved core registers, with
17203 SCRATCH_REG used as a scratch register for that clearing.
17204
17205 NOTE: one of three following assertions must hold:
17206 - SCRATCH_REG is a low register
17207 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17208 in TO_CLEAR_BITMAP)
17209 - CLEARING_REG is a low register. */
17210
17211 static void
17212 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17213 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17214 {
17215 bool saved_clearing = false;
17216 rtx saved_clearing_reg = NULL_RTX;
17217 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17218
17219 gcc_assert (arm_arch_cmse);
17220
17221 if (!bitmap_empty_p (to_clear_bitmap))
17222 {
17223 minregno = bitmap_first_set_bit (to_clear_bitmap);
17224 maxregno = bitmap_last_set_bit (to_clear_bitmap);
17225 }
17226 clearing_regno = REGNO (clearing_reg);
17227
17228 /* Clear padding bits. */
17229 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17230 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17231 {
17232 uint64_t mask;
17233 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17234
17235 if (padding_bits_to_clear[i] == 0)
17236 continue;
17237
17238 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17239 CLEARING_REG as scratch. */
17240 if (TARGET_THUMB1
17241 && REGNO (scratch_reg) > LAST_LO_REGNUM)
17242 {
17243 /* clearing_reg is not to be cleared, copy its value into scratch_reg
17244 such that we can use clearing_reg to clear the unused bits in the
17245 arguments. */
17246 if ((clearing_regno > maxregno
17247 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17248 && !saved_clearing)
17249 {
17250 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17251 emit_move_insn (scratch_reg, clearing_reg);
17252 saved_clearing = true;
17253 saved_clearing_reg = scratch_reg;
17254 }
17255 scratch_reg = clearing_reg;
17256 }
17257
17258 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
17259 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17260 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17261
17262 /* Fill the top half of the negated padding_bits_to_clear[i]. */
17263 mask = (~padding_bits_to_clear[i]) >> 16;
17264 rtx16 = gen_int_mode (16, SImode);
17265 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17266 if (mask)
17267 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17268
17269 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17270 }
17271 if (saved_clearing)
17272 emit_move_insn (clearing_reg, saved_clearing_reg);
17273
17274
17275 /* Clear full registers. */
17276
17277 /* If not marked for clearing, clearing_reg already does not contain
17278 any secret. */
17279 if (clearing_regno <= maxregno
17280 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17281 {
17282 emit_move_insn (clearing_reg, const0_rtx);
17283 emit_use (clearing_reg);
17284 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17285 }
17286
17287 for (regno = minregno; regno <= maxregno; regno++)
17288 {
17289 if (!bitmap_bit_p (to_clear_bitmap, regno))
17290 continue;
17291
17292 if (IS_VFP_REGNUM (regno))
17293 {
17294 /* If regno is an even vfp register and its successor is also to
17295 be cleared, use vmov. */
17296 if (TARGET_VFP_DOUBLE
17297 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17298 && bitmap_bit_p (to_clear_bitmap, regno + 1))
17299 {
17300 emit_move_insn (gen_rtx_REG (DFmode, regno),
17301 CONST1_RTX (DFmode));
17302 emit_use (gen_rtx_REG (DFmode, regno));
17303 regno++;
17304 }
17305 else
17306 {
17307 emit_move_insn (gen_rtx_REG (SFmode, regno),
17308 CONST1_RTX (SFmode));
17309 emit_use (gen_rtx_REG (SFmode, regno));
17310 }
17311 }
17312 else
17313 {
17314 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17315 emit_use (gen_rtx_REG (SImode, regno));
17316 }
17317 }
17318 }
17319
17320 /* Clears caller saved registers not used to pass arguments before a
17321 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
17322 registers is done in __gnu_cmse_nonsecure_call libcall.
17323 See libgcc/config/arm/cmse_nonsecure_call.S. */
17324
17325 static void
17326 cmse_nonsecure_call_clear_caller_saved (void)
17327 {
17328 basic_block bb;
17329
17330 FOR_EACH_BB_FN (bb, cfun)
17331 {
17332 rtx_insn *insn;
17333
17334 FOR_BB_INSNS (bb, insn)
17335 {
17336 unsigned address_regnum, regno, maxregno =
17337 TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
17338 auto_sbitmap to_clear_bitmap (maxregno + 1);
17339 rtx_insn *seq;
17340 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
17341 rtx address;
17342 CUMULATIVE_ARGS args_so_far_v;
17343 cumulative_args_t args_so_far;
17344 tree arg_type, fntype;
17345 bool first_param = true;
17346 function_args_iterator args_iter;
17347 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17348
17349 if (!NONDEBUG_INSN_P (insn))
17350 continue;
17351
17352 if (!CALL_P (insn))
17353 continue;
17354
17355 pat = PATTERN (insn);
17356 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17357 call = XVECEXP (pat, 0, 0);
17358
17359 /* Get the real call RTX if the insn sets a value, ie. returns. */
17360 if (GET_CODE (call) == SET)
17361 call = SET_SRC (call);
17362
17363 /* Check if it is a cmse_nonsecure_call. */
17364 unspec = XEXP (call, 0);
17365 if (GET_CODE (unspec) != UNSPEC
17366 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17367 continue;
17368
17369 /* Determine the caller-saved registers we need to clear. */
17370 bitmap_clear (to_clear_bitmap);
17371 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
17372
17373 /* Only look at the caller-saved floating point registers in case of
17374 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17375 lazy store and loads which clear both caller- and callee-saved
17376 registers. */
17377 if (TARGET_HARD_FLOAT_ABI)
17378 {
17379 auto_sbitmap float_bitmap (maxregno + 1);
17380
17381 bitmap_clear (float_bitmap);
17382 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
17383 D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
17384 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
17385 }
17386
17387 /* Make sure the register used to hold the function address is not
17388 cleared. */
17389 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17390 gcc_assert (MEM_P (address));
17391 gcc_assert (REG_P (XEXP (address, 0)));
17392 address_regnum = REGNO (XEXP (address, 0));
17393 if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
17394 bitmap_clear_bit (to_clear_bitmap, address_regnum);
17395
17396 /* Set basic block of call insn so that df rescan is performed on
17397 insns inserted here. */
17398 set_block_for_insn (insn, bb);
17399 df_set_flags (DF_DEFER_INSN_RESCAN);
17400 start_sequence ();
17401
17402 /* Make sure the scheduler doesn't schedule other insns beyond
17403 here. */
17404 emit_insn (gen_blockage ());
17405
17406 /* Walk through all arguments and clear registers appropriately.
17407 */
17408 fntype = TREE_TYPE (MEM_EXPR (address));
17409 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17410 NULL_TREE);
17411 args_so_far = pack_cumulative_args (&args_so_far_v);
17412 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17413 {
17414 rtx arg_rtx;
17415 uint64_t to_clear_args_mask;
17416
17417 if (VOID_TYPE_P (arg_type))
17418 continue;
17419
17420 function_arg_info arg (arg_type, /*named=*/true);
17421 if (!first_param)
17422 /* ??? We should advance after processing the argument and pass
17423 the argument we're advancing past. */
17424 arm_function_arg_advance (args_so_far, arg);
17425
17426 arg_rtx = arm_function_arg (args_so_far, arg);
17427 gcc_assert (REG_P (arg_rtx));
17428 to_clear_args_mask
17429 = compute_not_to_clear_mask (arg_type, arg_rtx,
17430 REGNO (arg_rtx),
17431 &padding_bits_to_clear[0]);
17432 if (to_clear_args_mask)
17433 {
17434 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17435 {
17436 if (to_clear_args_mask & (1ULL << regno))
17437 bitmap_clear_bit (to_clear_bitmap, regno);
17438 }
17439 }
17440
17441 first_param = false;
17442 }
17443
17444 /* We use right shift and left shift to clear the LSB of the address
17445 we jump to instead of using bic, to avoid having to use an extra
17446 register on Thumb-1. */
17447 clearing_reg = XEXP (address, 0);
17448 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
17449 emit_insn (gen_rtx_SET (clearing_reg, shift));
17450 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
17451 emit_insn (gen_rtx_SET (clearing_reg, shift));
17452
17453 /* Clear caller-saved registers that leak before doing a non-secure
17454 call. */
17455 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
17456 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
17457 NUM_ARG_REGS, ip_reg, clearing_reg);
17458
17459 seq = get_insns ();
17460 end_sequence ();
17461 emit_insn_before (seq, insn);
17462 }
17463 }
17464 }
17465
17466 /* Rewrite move insn into subtract of 0 if the condition codes will
17467 be useful in next conditional jump insn. */
17468
17469 static void
17470 thumb1_reorg (void)
17471 {
17472 basic_block bb;
17473
17474 FOR_EACH_BB_FN (bb, cfun)
17475 {
17476 rtx dest, src;
17477 rtx cmp, op0, op1, set = NULL;
17478 rtx_insn *prev, *insn = BB_END (bb);
17479 bool insn_clobbered = false;
17480
17481 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17482 insn = PREV_INSN (insn);
17483
17484 /* Find the last cbranchsi4_insn in basic block BB. */
17485 if (insn == BB_HEAD (bb)
17486 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17487 continue;
17488
17489 /* Get the register with which we are comparing. */
17490 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17491 op0 = XEXP (cmp, 0);
17492 op1 = XEXP (cmp, 1);
17493
17494 /* Check that comparison is against ZERO. */
17495 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17496 continue;
17497
17498 /* Find the first flag setting insn before INSN in basic block BB. */
17499 gcc_assert (insn != BB_HEAD (bb));
17500 for (prev = PREV_INSN (insn);
17501 (!insn_clobbered
17502 && prev != BB_HEAD (bb)
17503 && (NOTE_P (prev)
17504 || DEBUG_INSN_P (prev)
17505 || ((set = single_set (prev)) != NULL
17506 && get_attr_conds (prev) == CONDS_NOCOND)));
17507 prev = PREV_INSN (prev))
17508 {
17509 if (reg_set_p (op0, prev))
17510 insn_clobbered = true;
17511 }
17512
17513 /* Skip if op0 is clobbered by insn other than prev. */
17514 if (insn_clobbered)
17515 continue;
17516
17517 if (!set)
17518 continue;
17519
17520 dest = SET_DEST (set);
17521 src = SET_SRC (set);
17522 if (!low_register_operand (dest, SImode)
17523 || !low_register_operand (src, SImode))
17524 continue;
17525
17526 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17527 in INSN. Both src and dest of the move insn are checked. */
17528 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17529 {
17530 dest = copy_rtx (dest);
17531 src = copy_rtx (src);
17532 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17533 PATTERN (prev) = gen_rtx_SET (dest, src);
17534 INSN_CODE (prev) = -1;
17535 /* Set test register in INSN to dest. */
17536 XEXP (cmp, 0) = copy_rtx (dest);
17537 INSN_CODE (insn) = -1;
17538 }
17539 }
17540 }
17541
17542 /* Convert instructions to their cc-clobbering variant if possible, since
17543 that allows us to use smaller encodings. */
17544
17545 static void
17546 thumb2_reorg (void)
17547 {
17548 basic_block bb;
17549 regset_head live;
17550
17551 INIT_REG_SET (&live);
17552
17553 /* We are freeing block_for_insn in the toplev to keep compatibility
17554 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17555 compute_bb_for_insn ();
17556 df_analyze ();
17557
17558 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17559
17560 FOR_EACH_BB_FN (bb, cfun)
17561 {
17562 if ((current_tune->disparage_flag_setting_t16_encodings
17563 == tune_params::DISPARAGE_FLAGS_ALL)
17564 && optimize_bb_for_speed_p (bb))
17565 continue;
17566
17567 rtx_insn *insn;
17568 Convert_Action action = SKIP;
17569 Convert_Action action_for_partial_flag_setting
17570 = ((current_tune->disparage_flag_setting_t16_encodings
17571 != tune_params::DISPARAGE_FLAGS_NEITHER)
17572 && optimize_bb_for_speed_p (bb))
17573 ? SKIP : CONV;
17574
17575 COPY_REG_SET (&live, DF_LR_OUT (bb));
17576 df_simulate_initialize_backwards (bb, &live);
17577 FOR_BB_INSNS_REVERSE (bb, insn)
17578 {
17579 if (NONJUMP_INSN_P (insn)
17580 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17581 && GET_CODE (PATTERN (insn)) == SET)
17582 {
17583 action = SKIP;
17584 rtx pat = PATTERN (insn);
17585 rtx dst = XEXP (pat, 0);
17586 rtx src = XEXP (pat, 1);
17587 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17588
17589 if (UNARY_P (src) || BINARY_P (src))
17590 op0 = XEXP (src, 0);
17591
17592 if (BINARY_P (src))
17593 op1 = XEXP (src, 1);
17594
17595 if (low_register_operand (dst, SImode))
17596 {
17597 switch (GET_CODE (src))
17598 {
17599 case PLUS:
17600 /* Adding two registers and storing the result
17601 in the first source is already a 16-bit
17602 operation. */
17603 if (rtx_equal_p (dst, op0)
17604 && register_operand (op1, SImode))
17605 break;
17606
17607 if (low_register_operand (op0, SImode))
17608 {
17609 /* ADDS <Rd>,<Rn>,<Rm> */
17610 if (low_register_operand (op1, SImode))
17611 action = CONV;
17612 /* ADDS <Rdn>,#<imm8> */
17613 /* SUBS <Rdn>,#<imm8> */
17614 else if (rtx_equal_p (dst, op0)
17615 && CONST_INT_P (op1)
17616 && IN_RANGE (INTVAL (op1), -255, 255))
17617 action = CONV;
17618 /* ADDS <Rd>,<Rn>,#<imm3> */
17619 /* SUBS <Rd>,<Rn>,#<imm3> */
17620 else if (CONST_INT_P (op1)
17621 && IN_RANGE (INTVAL (op1), -7, 7))
17622 action = CONV;
17623 }
17624 /* ADCS <Rd>, <Rn> */
17625 else if (GET_CODE (XEXP (src, 0)) == PLUS
17626 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17627 && low_register_operand (XEXP (XEXP (src, 0), 1),
17628 SImode)
17629 && COMPARISON_P (op1)
17630 && cc_register (XEXP (op1, 0), VOIDmode)
17631 && maybe_get_arm_condition_code (op1) == ARM_CS
17632 && XEXP (op1, 1) == const0_rtx)
17633 action = CONV;
17634 break;
17635
17636 case MINUS:
17637 /* RSBS <Rd>,<Rn>,#0
17638 Not handled here: see NEG below. */
17639 /* SUBS <Rd>,<Rn>,#<imm3>
17640 SUBS <Rdn>,#<imm8>
17641 Not handled here: see PLUS above. */
17642 /* SUBS <Rd>,<Rn>,<Rm> */
17643 if (low_register_operand (op0, SImode)
17644 && low_register_operand (op1, SImode))
17645 action = CONV;
17646 break;
17647
17648 case MULT:
17649 /* MULS <Rdm>,<Rn>,<Rdm>
17650 As an exception to the rule, this is only used
17651 when optimizing for size since MULS is slow on all
17652 known implementations. We do not even want to use
17653 MULS in cold code, if optimizing for speed, so we
17654 test the global flag here. */
17655 if (!optimize_size)
17656 break;
17657 /* Fall through. */
17658 case AND:
17659 case IOR:
17660 case XOR:
17661 /* ANDS <Rdn>,<Rm> */
17662 if (rtx_equal_p (dst, op0)
17663 && low_register_operand (op1, SImode))
17664 action = action_for_partial_flag_setting;
17665 else if (rtx_equal_p (dst, op1)
17666 && low_register_operand (op0, SImode))
17667 action = action_for_partial_flag_setting == SKIP
17668 ? SKIP : SWAP_CONV;
17669 break;
17670
17671 case ASHIFTRT:
17672 case ASHIFT:
17673 case LSHIFTRT:
17674 /* ASRS <Rdn>,<Rm> */
17675 /* LSRS <Rdn>,<Rm> */
17676 /* LSLS <Rdn>,<Rm> */
17677 if (rtx_equal_p (dst, op0)
17678 && low_register_operand (op1, SImode))
17679 action = action_for_partial_flag_setting;
17680 /* ASRS <Rd>,<Rm>,#<imm5> */
17681 /* LSRS <Rd>,<Rm>,#<imm5> */
17682 /* LSLS <Rd>,<Rm>,#<imm5> */
17683 else if (low_register_operand (op0, SImode)
17684 && CONST_INT_P (op1)
17685 && IN_RANGE (INTVAL (op1), 0, 31))
17686 action = action_for_partial_flag_setting;
17687 break;
17688
17689 case ROTATERT:
17690 /* RORS <Rdn>,<Rm> */
17691 if (rtx_equal_p (dst, op0)
17692 && low_register_operand (op1, SImode))
17693 action = action_for_partial_flag_setting;
17694 break;
17695
17696 case NOT:
17697 /* MVNS <Rd>,<Rm> */
17698 if (low_register_operand (op0, SImode))
17699 action = action_for_partial_flag_setting;
17700 break;
17701
17702 case NEG:
17703 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17704 if (low_register_operand (op0, SImode))
17705 action = CONV;
17706 break;
17707
17708 case CONST_INT:
17709 /* MOVS <Rd>,#<imm8> */
17710 if (CONST_INT_P (src)
17711 && IN_RANGE (INTVAL (src), 0, 255))
17712 action = action_for_partial_flag_setting;
17713 break;
17714
17715 case REG:
17716 /* MOVS and MOV<c> with registers have different
17717 encodings, so are not relevant here. */
17718 break;
17719
17720 default:
17721 break;
17722 }
17723 }
17724
17725 if (action != SKIP)
17726 {
17727 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17728 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17729 rtvec vec;
17730
17731 if (action == SWAP_CONV)
17732 {
17733 src = copy_rtx (src);
17734 XEXP (src, 0) = op1;
17735 XEXP (src, 1) = op0;
17736 pat = gen_rtx_SET (dst, src);
17737 vec = gen_rtvec (2, pat, clobber);
17738 }
17739 else /* action == CONV */
17740 vec = gen_rtvec (2, pat, clobber);
17741
17742 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17743 INSN_CODE (insn) = -1;
17744 }
17745 }
17746
17747 if (NONDEBUG_INSN_P (insn))
17748 df_simulate_one_insn_backwards (bb, insn, &live);
17749 }
17750 }
17751
17752 CLEAR_REG_SET (&live);
17753 }
17754
17755 /* Gcc puts the pool in the wrong place for ARM, since we can only
17756 load addresses a limited distance around the pc. We do some
17757 special munging to move the constant pool values to the correct
17758 point in the code. */
17759 static void
17760 arm_reorg (void)
17761 {
17762 rtx_insn *insn;
17763 HOST_WIDE_INT address = 0;
17764 Mfix * fix;
17765
17766 if (use_cmse)
17767 cmse_nonsecure_call_clear_caller_saved ();
17768
17769 /* We cannot run the Thumb passes for thunks because there is no CFG. */
17770 if (cfun->is_thunk)
17771 ;
17772 else if (TARGET_THUMB1)
17773 thumb1_reorg ();
17774 else if (TARGET_THUMB2)
17775 thumb2_reorg ();
17776
17777 /* Ensure all insns that must be split have been split at this point.
17778 Otherwise, the pool placement code below may compute incorrect
17779 insn lengths. Note that when optimizing, all insns have already
17780 been split at this point. */
17781 if (!optimize)
17782 split_all_insns_noflow ();
17783
17784 /* Make sure we do not attempt to create a literal pool even though it should
17785 no longer be necessary to create any. */
17786 if (arm_disable_literal_pool)
17787 return ;
17788
17789 minipool_fix_head = minipool_fix_tail = NULL;
17790
17791 /* The first insn must always be a note, or the code below won't
17792 scan it properly. */
17793 insn = get_insns ();
17794 gcc_assert (NOTE_P (insn));
17795 minipool_pad = 0;
17796
17797 /* Scan all the insns and record the operands that will need fixing. */
17798 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17799 {
17800 if (BARRIER_P (insn))
17801 push_minipool_barrier (insn, address);
17802 else if (INSN_P (insn))
17803 {
17804 rtx_jump_table_data *table;
17805
17806 note_invalid_constants (insn, address, true);
17807 address += get_attr_length (insn);
17808
17809 /* If the insn is a vector jump, add the size of the table
17810 and skip the table. */
17811 if (tablejump_p (insn, NULL, &table))
17812 {
17813 address += get_jump_table_size (table);
17814 insn = table;
17815 }
17816 }
17817 else if (LABEL_P (insn))
17818 /* Add the worst-case padding due to alignment. We don't add
17819 the _current_ padding because the minipool insertions
17820 themselves might change it. */
17821 address += get_label_padding (insn);
17822 }
17823
17824 fix = minipool_fix_head;
17825
17826 /* Now scan the fixups and perform the required changes. */
17827 while (fix)
17828 {
17829 Mfix * ftmp;
17830 Mfix * fdel;
17831 Mfix * last_added_fix;
17832 Mfix * last_barrier = NULL;
17833 Mfix * this_fix;
17834
17835 /* Skip any further barriers before the next fix. */
17836 while (fix && BARRIER_P (fix->insn))
17837 fix = fix->next;
17838
17839 /* No more fixes. */
17840 if (fix == NULL)
17841 break;
17842
17843 last_added_fix = NULL;
17844
17845 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17846 {
17847 if (BARRIER_P (ftmp->insn))
17848 {
17849 if (ftmp->address >= minipool_vector_head->max_address)
17850 break;
17851
17852 last_barrier = ftmp;
17853 }
17854 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17855 break;
17856
17857 last_added_fix = ftmp; /* Keep track of the last fix added. */
17858 }
17859
17860 /* If we found a barrier, drop back to that; any fixes that we
17861 could have reached but come after the barrier will now go in
17862 the next mini-pool. */
17863 if (last_barrier != NULL)
17864 {
17865 /* Reduce the refcount for those fixes that won't go into this
17866 pool after all. */
17867 for (fdel = last_barrier->next;
17868 fdel && fdel != ftmp;
17869 fdel = fdel->next)
17870 {
17871 fdel->minipool->refcount--;
17872 fdel->minipool = NULL;
17873 }
17874
17875 ftmp = last_barrier;
17876 }
17877 else
17878 {
17879 /* ftmp is first fix that we can't fit into this pool and
17880 there no natural barriers that we could use. Insert a
17881 new barrier in the code somewhere between the previous
17882 fix and this one, and arrange to jump around it. */
17883 HOST_WIDE_INT max_address;
17884
17885 /* The last item on the list of fixes must be a barrier, so
17886 we can never run off the end of the list of fixes without
17887 last_barrier being set. */
17888 gcc_assert (ftmp);
17889
17890 max_address = minipool_vector_head->max_address;
17891 /* Check that there isn't another fix that is in range that
17892 we couldn't fit into this pool because the pool was
17893 already too large: we need to put the pool before such an
17894 instruction. The pool itself may come just after the
17895 fix because create_fix_barrier also allows space for a
17896 jump instruction. */
17897 if (ftmp->address < max_address)
17898 max_address = ftmp->address + 1;
17899
17900 last_barrier = create_fix_barrier (last_added_fix, max_address);
17901 }
17902
17903 assign_minipool_offsets (last_barrier);
17904
17905 while (ftmp)
17906 {
17907 if (!BARRIER_P (ftmp->insn)
17908 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17909 == NULL))
17910 break;
17911
17912 ftmp = ftmp->next;
17913 }
17914
17915 /* Scan over the fixes we have identified for this pool, fixing them
17916 up and adding the constants to the pool itself. */
17917 for (this_fix = fix; this_fix && ftmp != this_fix;
17918 this_fix = this_fix->next)
17919 if (!BARRIER_P (this_fix->insn))
17920 {
17921 rtx addr
17922 = plus_constant (Pmode,
17923 gen_rtx_LABEL_REF (VOIDmode,
17924 minipool_vector_label),
17925 this_fix->minipool->offset);
17926 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17927 }
17928
17929 dump_minipool (last_barrier->insn);
17930 fix = ftmp;
17931 }
17932
17933 /* From now on we must synthesize any constants that we can't handle
17934 directly. This can happen if the RTL gets split during final
17935 instruction generation. */
17936 cfun->machine->after_arm_reorg = 1;
17937
17938 /* Free the minipool memory. */
17939 obstack_free (&minipool_obstack, minipool_startobj);
17940 }
17941 \f
17942 /* Routines to output assembly language. */
17943
17944 /* Return string representation of passed in real value. */
17945 static const char *
17946 fp_const_from_val (REAL_VALUE_TYPE *r)
17947 {
17948 if (!fp_consts_inited)
17949 init_fp_table ();
17950
17951 gcc_assert (real_equal (r, &value_fp0));
17952 return "0";
17953 }
17954
17955 /* OPERANDS[0] is the entire list of insns that constitute pop,
17956 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17957 is in the list, UPDATE is true iff the list contains explicit
17958 update of base register. */
17959 void
17960 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17961 bool update)
17962 {
17963 int i;
17964 char pattern[100];
17965 int offset;
17966 const char *conditional;
17967 int num_saves = XVECLEN (operands[0], 0);
17968 unsigned int regno;
17969 unsigned int regno_base = REGNO (operands[1]);
17970 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17971
17972 offset = 0;
17973 offset += update ? 1 : 0;
17974 offset += return_pc ? 1 : 0;
17975
17976 /* Is the base register in the list? */
17977 for (i = offset; i < num_saves; i++)
17978 {
17979 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17980 /* If SP is in the list, then the base register must be SP. */
17981 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17982 /* If base register is in the list, there must be no explicit update. */
17983 if (regno == regno_base)
17984 gcc_assert (!update);
17985 }
17986
17987 conditional = reverse ? "%?%D0" : "%?%d0";
17988 /* Can't use POP if returning from an interrupt. */
17989 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17990 sprintf (pattern, "pop%s\t{", conditional);
17991 else
17992 {
17993 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17994 It's just a convention, their semantics are identical. */
17995 if (regno_base == SP_REGNUM)
17996 sprintf (pattern, "ldmfd%s\t", conditional);
17997 else if (update)
17998 sprintf (pattern, "ldmia%s\t", conditional);
17999 else
18000 sprintf (pattern, "ldm%s\t", conditional);
18001
18002 strcat (pattern, reg_names[regno_base]);
18003 if (update)
18004 strcat (pattern, "!, {");
18005 else
18006 strcat (pattern, ", {");
18007 }
18008
18009 /* Output the first destination register. */
18010 strcat (pattern,
18011 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
18012
18013 /* Output the rest of the destination registers. */
18014 for (i = offset + 1; i < num_saves; i++)
18015 {
18016 strcat (pattern, ", ");
18017 strcat (pattern,
18018 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
18019 }
18020
18021 strcat (pattern, "}");
18022
18023 if (interrupt_p && return_pc)
18024 strcat (pattern, "^");
18025
18026 output_asm_insn (pattern, &cond);
18027 }
18028
18029
18030 /* Output the assembly for a store multiple. */
18031
18032 const char *
18033 vfp_output_vstmd (rtx * operands)
18034 {
18035 char pattern[100];
18036 int p;
18037 int base;
18038 int i;
18039 rtx addr_reg = REG_P (XEXP (operands[0], 0))
18040 ? XEXP (operands[0], 0)
18041 : XEXP (XEXP (operands[0], 0), 0);
18042 bool push_p = REGNO (addr_reg) == SP_REGNUM;
18043
18044 if (push_p)
18045 strcpy (pattern, "vpush%?.64\t{%P1");
18046 else
18047 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
18048
18049 p = strlen (pattern);
18050
18051 gcc_assert (REG_P (operands[1]));
18052
18053 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
18054 for (i = 1; i < XVECLEN (operands[2], 0); i++)
18055 {
18056 p += sprintf (&pattern[p], ", d%d", base + i);
18057 }
18058 strcpy (&pattern[p], "}");
18059
18060 output_asm_insn (pattern, operands);
18061 return "";
18062 }
18063
18064
18065 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
18066 number of bytes pushed. */
18067
18068 static int
18069 vfp_emit_fstmd (int base_reg, int count)
18070 {
18071 rtx par;
18072 rtx dwarf;
18073 rtx tmp, reg;
18074 int i;
18075
18076 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
18077 register pairs are stored by a store multiple insn. We avoid this
18078 by pushing an extra pair. */
18079 if (count == 2 && !arm_arch6)
18080 {
18081 if (base_reg == LAST_VFP_REGNUM - 3)
18082 base_reg -= 2;
18083 count++;
18084 }
18085
18086 /* FSTMD may not store more than 16 doubleword registers at once. Split
18087 larger stores into multiple parts (up to a maximum of two, in
18088 practice). */
18089 if (count > 16)
18090 {
18091 int saved;
18092 /* NOTE: base_reg is an internal register number, so each D register
18093 counts as 2. */
18094 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
18095 saved += vfp_emit_fstmd (base_reg, 16);
18096 return saved;
18097 }
18098
18099 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
18100 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
18101
18102 reg = gen_rtx_REG (DFmode, base_reg);
18103 base_reg += 2;
18104
18105 XVECEXP (par, 0, 0)
18106 = gen_rtx_SET (gen_frame_mem
18107 (BLKmode,
18108 gen_rtx_PRE_MODIFY (Pmode,
18109 stack_pointer_rtx,
18110 plus_constant
18111 (Pmode, stack_pointer_rtx,
18112 - (count * 8)))
18113 ),
18114 gen_rtx_UNSPEC (BLKmode,
18115 gen_rtvec (1, reg),
18116 UNSPEC_PUSH_MULT));
18117
18118 tmp = gen_rtx_SET (stack_pointer_rtx,
18119 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18120 RTX_FRAME_RELATED_P (tmp) = 1;
18121 XVECEXP (dwarf, 0, 0) = tmp;
18122
18123 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18124 RTX_FRAME_RELATED_P (tmp) = 1;
18125 XVECEXP (dwarf, 0, 1) = tmp;
18126
18127 for (i = 1; i < count; i++)
18128 {
18129 reg = gen_rtx_REG (DFmode, base_reg);
18130 base_reg += 2;
18131 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18132
18133 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18134 plus_constant (Pmode,
18135 stack_pointer_rtx,
18136 i * 8)),
18137 reg);
18138 RTX_FRAME_RELATED_P (tmp) = 1;
18139 XVECEXP (dwarf, 0, i + 1) = tmp;
18140 }
18141
18142 par = emit_insn (par);
18143 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18144 RTX_FRAME_RELATED_P (par) = 1;
18145
18146 return count * 8;
18147 }
18148
18149 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18150 has the cmse_nonsecure_call attribute and returns false otherwise. */
18151
18152 bool
18153 detect_cmse_nonsecure_call (tree addr)
18154 {
18155 if (!addr)
18156 return FALSE;
18157
18158 tree fntype = TREE_TYPE (addr);
18159 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18160 TYPE_ATTRIBUTES (fntype)))
18161 return TRUE;
18162 return FALSE;
18163 }
18164
18165
18166 /* Emit a call instruction with pattern PAT. ADDR is the address of
18167 the call target. */
18168
18169 void
18170 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18171 {
18172 rtx insn;
18173
18174 insn = emit_call_insn (pat);
18175
18176 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18177 If the call might use such an entry, add a use of the PIC register
18178 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18179 if (TARGET_VXWORKS_RTP
18180 && flag_pic
18181 && !sibcall
18182 && GET_CODE (addr) == SYMBOL_REF
18183 && (SYMBOL_REF_DECL (addr)
18184 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18185 : !SYMBOL_REF_LOCAL_P (addr)))
18186 {
18187 require_pic_register (NULL_RTX, false /*compute_now*/);
18188 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18189 }
18190
18191 if (TARGET_AAPCS_BASED)
18192 {
18193 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18194 linker. We need to add an IP clobber to allow setting
18195 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18196 is not needed since it's a fixed register. */
18197 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18198 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18199 }
18200 }
18201
18202 /* Output a 'call' insn. */
18203 const char *
18204 output_call (rtx *operands)
18205 {
18206 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
18207
18208 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18209 if (REGNO (operands[0]) == LR_REGNUM)
18210 {
18211 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18212 output_asm_insn ("mov%?\t%0, %|lr", operands);
18213 }
18214
18215 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18216
18217 if (TARGET_INTERWORK || arm_arch4t)
18218 output_asm_insn ("bx%?\t%0", operands);
18219 else
18220 output_asm_insn ("mov%?\t%|pc, %0", operands);
18221
18222 return "";
18223 }
18224
18225 /* Output a move from arm registers to arm registers of a long double
18226 OPERANDS[0] is the destination.
18227 OPERANDS[1] is the source. */
18228 const char *
18229 output_mov_long_double_arm_from_arm (rtx *operands)
18230 {
18231 /* We have to be careful here because the two might overlap. */
18232 int dest_start = REGNO (operands[0]);
18233 int src_start = REGNO (operands[1]);
18234 rtx ops[2];
18235 int i;
18236
18237 if (dest_start < src_start)
18238 {
18239 for (i = 0; i < 3; i++)
18240 {
18241 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18242 ops[1] = gen_rtx_REG (SImode, src_start + i);
18243 output_asm_insn ("mov%?\t%0, %1", ops);
18244 }
18245 }
18246 else
18247 {
18248 for (i = 2; i >= 0; i--)
18249 {
18250 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18251 ops[1] = gen_rtx_REG (SImode, src_start + i);
18252 output_asm_insn ("mov%?\t%0, %1", ops);
18253 }
18254 }
18255
18256 return "";
18257 }
18258
18259 void
18260 arm_emit_movpair (rtx dest, rtx src)
18261 {
18262 /* If the src is an immediate, simplify it. */
18263 if (CONST_INT_P (src))
18264 {
18265 HOST_WIDE_INT val = INTVAL (src);
18266 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18267 if ((val >> 16) & 0x0000ffff)
18268 {
18269 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18270 GEN_INT (16)),
18271 GEN_INT ((val >> 16) & 0x0000ffff));
18272 rtx_insn *insn = get_last_insn ();
18273 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18274 }
18275 return;
18276 }
18277 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18278 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18279 rtx_insn *insn = get_last_insn ();
18280 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18281 }
18282
18283 /* Output a move between double words. It must be REG<-MEM
18284 or MEM<-REG. */
18285 const char *
18286 output_move_double (rtx *operands, bool emit, int *count)
18287 {
18288 enum rtx_code code0 = GET_CODE (operands[0]);
18289 enum rtx_code code1 = GET_CODE (operands[1]);
18290 rtx otherops[3];
18291 if (count)
18292 *count = 1;
18293
18294 /* The only case when this might happen is when
18295 you are looking at the length of a DImode instruction
18296 that has an invalid constant in it. */
18297 if (code0 == REG && code1 != MEM)
18298 {
18299 gcc_assert (!emit);
18300 *count = 2;
18301 return "";
18302 }
18303
18304 if (code0 == REG)
18305 {
18306 unsigned int reg0 = REGNO (operands[0]);
18307
18308 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18309
18310 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18311
18312 switch (GET_CODE (XEXP (operands[1], 0)))
18313 {
18314 case REG:
18315
18316 if (emit)
18317 {
18318 if (TARGET_LDRD
18319 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18320 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18321 else
18322 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18323 }
18324 break;
18325
18326 case PRE_INC:
18327 gcc_assert (TARGET_LDRD);
18328 if (emit)
18329 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18330 break;
18331
18332 case PRE_DEC:
18333 if (emit)
18334 {
18335 if (TARGET_LDRD)
18336 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18337 else
18338 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18339 }
18340 break;
18341
18342 case POST_INC:
18343 if (emit)
18344 {
18345 if (TARGET_LDRD)
18346 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18347 else
18348 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18349 }
18350 break;
18351
18352 case POST_DEC:
18353 gcc_assert (TARGET_LDRD);
18354 if (emit)
18355 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18356 break;
18357
18358 case PRE_MODIFY:
18359 case POST_MODIFY:
18360 /* Autoicrement addressing modes should never have overlapping
18361 base and destination registers, and overlapping index registers
18362 are already prohibited, so this doesn't need to worry about
18363 fix_cm3_ldrd. */
18364 otherops[0] = operands[0];
18365 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18366 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18367
18368 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18369 {
18370 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18371 {
18372 /* Registers overlap so split out the increment. */
18373 if (emit)
18374 {
18375 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18376 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18377 }
18378 if (count)
18379 *count = 2;
18380 }
18381 else
18382 {
18383 /* Use a single insn if we can.
18384 FIXME: IWMMXT allows offsets larger than ldrd can
18385 handle, fix these up with a pair of ldr. */
18386 if (TARGET_THUMB2
18387 || !CONST_INT_P (otherops[2])
18388 || (INTVAL (otherops[2]) > -256
18389 && INTVAL (otherops[2]) < 256))
18390 {
18391 if (emit)
18392 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18393 }
18394 else
18395 {
18396 if (emit)
18397 {
18398 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18399 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18400 }
18401 if (count)
18402 *count = 2;
18403
18404 }
18405 }
18406 }
18407 else
18408 {
18409 /* Use a single insn if we can.
18410 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18411 fix these up with a pair of ldr. */
18412 if (TARGET_THUMB2
18413 || !CONST_INT_P (otherops[2])
18414 || (INTVAL (otherops[2]) > -256
18415 && INTVAL (otherops[2]) < 256))
18416 {
18417 if (emit)
18418 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18419 }
18420 else
18421 {
18422 if (emit)
18423 {
18424 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18425 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18426 }
18427 if (count)
18428 *count = 2;
18429 }
18430 }
18431 break;
18432
18433 case LABEL_REF:
18434 case CONST:
18435 /* We might be able to use ldrd %0, %1 here. However the range is
18436 different to ldr/adr, and it is broken on some ARMv7-M
18437 implementations. */
18438 /* Use the second register of the pair to avoid problematic
18439 overlap. */
18440 otherops[1] = operands[1];
18441 if (emit)
18442 output_asm_insn ("adr%?\t%0, %1", otherops);
18443 operands[1] = otherops[0];
18444 if (emit)
18445 {
18446 if (TARGET_LDRD)
18447 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18448 else
18449 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18450 }
18451
18452 if (count)
18453 *count = 2;
18454 break;
18455
18456 /* ??? This needs checking for thumb2. */
18457 default:
18458 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18459 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18460 {
18461 otherops[0] = operands[0];
18462 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18463 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18464
18465 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18466 {
18467 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18468 {
18469 switch ((int) INTVAL (otherops[2]))
18470 {
18471 case -8:
18472 if (emit)
18473 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18474 return "";
18475 case -4:
18476 if (TARGET_THUMB2)
18477 break;
18478 if (emit)
18479 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18480 return "";
18481 case 4:
18482 if (TARGET_THUMB2)
18483 break;
18484 if (emit)
18485 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18486 return "";
18487 }
18488 }
18489 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18490 operands[1] = otherops[0];
18491 if (TARGET_LDRD
18492 && (REG_P (otherops[2])
18493 || TARGET_THUMB2
18494 || (CONST_INT_P (otherops[2])
18495 && INTVAL (otherops[2]) > -256
18496 && INTVAL (otherops[2]) < 256)))
18497 {
18498 if (reg_overlap_mentioned_p (operands[0],
18499 otherops[2]))
18500 {
18501 /* Swap base and index registers over to
18502 avoid a conflict. */
18503 std::swap (otherops[1], otherops[2]);
18504 }
18505 /* If both registers conflict, it will usually
18506 have been fixed by a splitter. */
18507 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18508 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18509 {
18510 if (emit)
18511 {
18512 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18513 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18514 }
18515 if (count)
18516 *count = 2;
18517 }
18518 else
18519 {
18520 otherops[0] = operands[0];
18521 if (emit)
18522 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18523 }
18524 return "";
18525 }
18526
18527 if (CONST_INT_P (otherops[2]))
18528 {
18529 if (emit)
18530 {
18531 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18532 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18533 else
18534 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18535 }
18536 }
18537 else
18538 {
18539 if (emit)
18540 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18541 }
18542 }
18543 else
18544 {
18545 if (emit)
18546 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18547 }
18548
18549 if (count)
18550 *count = 2;
18551
18552 if (TARGET_LDRD)
18553 return "ldrd%?\t%0, [%1]";
18554
18555 return "ldmia%?\t%1, %M0";
18556 }
18557 else
18558 {
18559 otherops[1] = adjust_address (operands[1], SImode, 4);
18560 /* Take care of overlapping base/data reg. */
18561 if (reg_mentioned_p (operands[0], operands[1]))
18562 {
18563 if (emit)
18564 {
18565 output_asm_insn ("ldr%?\t%0, %1", otherops);
18566 output_asm_insn ("ldr%?\t%0, %1", operands);
18567 }
18568 if (count)
18569 *count = 2;
18570
18571 }
18572 else
18573 {
18574 if (emit)
18575 {
18576 output_asm_insn ("ldr%?\t%0, %1", operands);
18577 output_asm_insn ("ldr%?\t%0, %1", otherops);
18578 }
18579 if (count)
18580 *count = 2;
18581 }
18582 }
18583 }
18584 }
18585 else
18586 {
18587 /* Constraints should ensure this. */
18588 gcc_assert (code0 == MEM && code1 == REG);
18589 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18590 || (TARGET_ARM && TARGET_LDRD));
18591
18592 /* For TARGET_ARM the first source register of an STRD
18593 must be even. This is usually the case for double-word
18594 values but user assembly constraints can force an odd
18595 starting register. */
18596 bool allow_strd = TARGET_LDRD
18597 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
18598 switch (GET_CODE (XEXP (operands[0], 0)))
18599 {
18600 case REG:
18601 if (emit)
18602 {
18603 if (allow_strd)
18604 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18605 else
18606 output_asm_insn ("stm%?\t%m0, %M1", operands);
18607 }
18608 break;
18609
18610 case PRE_INC:
18611 gcc_assert (allow_strd);
18612 if (emit)
18613 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18614 break;
18615
18616 case PRE_DEC:
18617 if (emit)
18618 {
18619 if (allow_strd)
18620 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18621 else
18622 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18623 }
18624 break;
18625
18626 case POST_INC:
18627 if (emit)
18628 {
18629 if (allow_strd)
18630 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18631 else
18632 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18633 }
18634 break;
18635
18636 case POST_DEC:
18637 gcc_assert (allow_strd);
18638 if (emit)
18639 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18640 break;
18641
18642 case PRE_MODIFY:
18643 case POST_MODIFY:
18644 otherops[0] = operands[1];
18645 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18646 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18647
18648 /* IWMMXT allows offsets larger than strd can handle,
18649 fix these up with a pair of str. */
18650 if (!TARGET_THUMB2
18651 && CONST_INT_P (otherops[2])
18652 && (INTVAL(otherops[2]) <= -256
18653 || INTVAL(otherops[2]) >= 256))
18654 {
18655 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18656 {
18657 if (emit)
18658 {
18659 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18660 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18661 }
18662 if (count)
18663 *count = 2;
18664 }
18665 else
18666 {
18667 if (emit)
18668 {
18669 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18670 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18671 }
18672 if (count)
18673 *count = 2;
18674 }
18675 }
18676 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18677 {
18678 if (emit)
18679 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18680 }
18681 else
18682 {
18683 if (emit)
18684 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18685 }
18686 break;
18687
18688 case PLUS:
18689 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18690 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18691 {
18692 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18693 {
18694 case -8:
18695 if (emit)
18696 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18697 return "";
18698
18699 case -4:
18700 if (TARGET_THUMB2)
18701 break;
18702 if (emit)
18703 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18704 return "";
18705
18706 case 4:
18707 if (TARGET_THUMB2)
18708 break;
18709 if (emit)
18710 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18711 return "";
18712 }
18713 }
18714 if (allow_strd
18715 && (REG_P (otherops[2])
18716 || TARGET_THUMB2
18717 || (CONST_INT_P (otherops[2])
18718 && INTVAL (otherops[2]) > -256
18719 && INTVAL (otherops[2]) < 256)))
18720 {
18721 otherops[0] = operands[1];
18722 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18723 if (emit)
18724 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18725 return "";
18726 }
18727 /* Fall through */
18728
18729 default:
18730 otherops[0] = adjust_address (operands[0], SImode, 4);
18731 otherops[1] = operands[1];
18732 if (emit)
18733 {
18734 output_asm_insn ("str%?\t%1, %0", operands);
18735 output_asm_insn ("str%?\t%H1, %0", otherops);
18736 }
18737 if (count)
18738 *count = 2;
18739 }
18740 }
18741
18742 return "";
18743 }
18744
18745 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18746 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18747
18748 const char *
18749 output_move_quad (rtx *operands)
18750 {
18751 if (REG_P (operands[0]))
18752 {
18753 /* Load, or reg->reg move. */
18754
18755 if (MEM_P (operands[1]))
18756 {
18757 switch (GET_CODE (XEXP (operands[1], 0)))
18758 {
18759 case REG:
18760 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18761 break;
18762
18763 case LABEL_REF:
18764 case CONST:
18765 output_asm_insn ("adr%?\t%0, %1", operands);
18766 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18767 break;
18768
18769 default:
18770 gcc_unreachable ();
18771 }
18772 }
18773 else
18774 {
18775 rtx ops[2];
18776 int dest, src, i;
18777
18778 gcc_assert (REG_P (operands[1]));
18779
18780 dest = REGNO (operands[0]);
18781 src = REGNO (operands[1]);
18782
18783 /* This seems pretty dumb, but hopefully GCC won't try to do it
18784 very often. */
18785 if (dest < src)
18786 for (i = 0; i < 4; i++)
18787 {
18788 ops[0] = gen_rtx_REG (SImode, dest + i);
18789 ops[1] = gen_rtx_REG (SImode, src + i);
18790 output_asm_insn ("mov%?\t%0, %1", ops);
18791 }
18792 else
18793 for (i = 3; i >= 0; i--)
18794 {
18795 ops[0] = gen_rtx_REG (SImode, dest + i);
18796 ops[1] = gen_rtx_REG (SImode, src + i);
18797 output_asm_insn ("mov%?\t%0, %1", ops);
18798 }
18799 }
18800 }
18801 else
18802 {
18803 gcc_assert (MEM_P (operands[0]));
18804 gcc_assert (REG_P (operands[1]));
18805 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18806
18807 switch (GET_CODE (XEXP (operands[0], 0)))
18808 {
18809 case REG:
18810 output_asm_insn ("stm%?\t%m0, %M1", operands);
18811 break;
18812
18813 default:
18814 gcc_unreachable ();
18815 }
18816 }
18817
18818 return "";
18819 }
18820
18821 /* Output a VFP load or store instruction. */
18822
18823 const char *
18824 output_move_vfp (rtx *operands)
18825 {
18826 rtx reg, mem, addr, ops[2];
18827 int load = REG_P (operands[0]);
18828 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18829 int sp = (!TARGET_VFP_FP16INST
18830 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18831 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18832 const char *templ;
18833 char buff[50];
18834 machine_mode mode;
18835
18836 reg = operands[!load];
18837 mem = operands[load];
18838
18839 mode = GET_MODE (reg);
18840
18841 gcc_assert (REG_P (reg));
18842 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18843 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18844 || mode == SFmode
18845 || mode == DFmode
18846 || mode == HImode
18847 || mode == SImode
18848 || mode == DImode
18849 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18850 gcc_assert (MEM_P (mem));
18851
18852 addr = XEXP (mem, 0);
18853
18854 switch (GET_CODE (addr))
18855 {
18856 case PRE_DEC:
18857 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18858 ops[0] = XEXP (addr, 0);
18859 ops[1] = reg;
18860 break;
18861
18862 case POST_INC:
18863 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18864 ops[0] = XEXP (addr, 0);
18865 ops[1] = reg;
18866 break;
18867
18868 default:
18869 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18870 ops[0] = reg;
18871 ops[1] = mem;
18872 break;
18873 }
18874
18875 sprintf (buff, templ,
18876 load ? "ld" : "st",
18877 dp ? "64" : sp ? "32" : "16",
18878 dp ? "P" : "",
18879 integer_p ? "\t%@ int" : "");
18880 output_asm_insn (buff, ops);
18881
18882 return "";
18883 }
18884
18885 /* Output a Neon double-word or quad-word load or store, or a load
18886 or store for larger structure modes.
18887
18888 WARNING: The ordering of elements is weird in big-endian mode,
18889 because the EABI requires that vectors stored in memory appear
18890 as though they were stored by a VSTM, as required by the EABI.
18891 GCC RTL defines element ordering based on in-memory order.
18892 This can be different from the architectural ordering of elements
18893 within a NEON register. The intrinsics defined in arm_neon.h use the
18894 NEON register element ordering, not the GCC RTL element ordering.
18895
18896 For example, the in-memory ordering of a big-endian a quadword
18897 vector with 16-bit elements when stored from register pair {d0,d1}
18898 will be (lowest address first, d0[N] is NEON register element N):
18899
18900 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18901
18902 When necessary, quadword registers (dN, dN+1) are moved to ARM
18903 registers from rN in the order:
18904
18905 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18906
18907 So that STM/LDM can be used on vectors in ARM registers, and the
18908 same memory layout will result as if VSTM/VLDM were used.
18909
18910 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18911 possible, which allows use of appropriate alignment tags.
18912 Note that the choice of "64" is independent of the actual vector
18913 element size; this size simply ensures that the behavior is
18914 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18915
18916 Due to limitations of those instructions, use of VST1.64/VLD1.64
18917 is not possible if:
18918 - the address contains PRE_DEC, or
18919 - the mode refers to more than 4 double-word registers
18920
18921 In those cases, it would be possible to replace VSTM/VLDM by a
18922 sequence of instructions; this is not currently implemented since
18923 this is not certain to actually improve performance. */
18924
18925 const char *
18926 output_move_neon (rtx *operands)
18927 {
18928 rtx reg, mem, addr, ops[2];
18929 int regno, nregs, load = REG_P (operands[0]);
18930 const char *templ;
18931 char buff[50];
18932 machine_mode mode;
18933
18934 reg = operands[!load];
18935 mem = operands[load];
18936
18937 mode = GET_MODE (reg);
18938
18939 gcc_assert (REG_P (reg));
18940 regno = REGNO (reg);
18941 nregs = REG_NREGS (reg) / 2;
18942 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18943 || NEON_REGNO_OK_FOR_QUAD (regno));
18944 gcc_assert (VALID_NEON_DREG_MODE (mode)
18945 || VALID_NEON_QREG_MODE (mode)
18946 || VALID_NEON_STRUCT_MODE (mode));
18947 gcc_assert (MEM_P (mem));
18948
18949 addr = XEXP (mem, 0);
18950
18951 /* Strip off const from addresses like (const (plus (...))). */
18952 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18953 addr = XEXP (addr, 0);
18954
18955 switch (GET_CODE (addr))
18956 {
18957 case POST_INC:
18958 /* We have to use vldm / vstm for too-large modes. */
18959 if (nregs > 4)
18960 {
18961 templ = "v%smia%%?\t%%0!, %%h1";
18962 ops[0] = XEXP (addr, 0);
18963 }
18964 else
18965 {
18966 templ = "v%s1.64\t%%h1, %%A0";
18967 ops[0] = mem;
18968 }
18969 ops[1] = reg;
18970 break;
18971
18972 case PRE_DEC:
18973 /* We have to use vldm / vstm in this case, since there is no
18974 pre-decrement form of the vld1 / vst1 instructions. */
18975 templ = "v%smdb%%?\t%%0!, %%h1";
18976 ops[0] = XEXP (addr, 0);
18977 ops[1] = reg;
18978 break;
18979
18980 case POST_MODIFY:
18981 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18982 gcc_unreachable ();
18983
18984 case REG:
18985 /* We have to use vldm / vstm for too-large modes. */
18986 if (nregs > 1)
18987 {
18988 if (nregs > 4)
18989 templ = "v%smia%%?\t%%m0, %%h1";
18990 else
18991 templ = "v%s1.64\t%%h1, %%A0";
18992
18993 ops[0] = mem;
18994 ops[1] = reg;
18995 break;
18996 }
18997 /* Fall through. */
18998 case LABEL_REF:
18999 case PLUS:
19000 {
19001 int i;
19002 int overlap = -1;
19003 for (i = 0; i < nregs; i++)
19004 {
19005 /* We're only using DImode here because it's a convenient size. */
19006 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
19007 ops[1] = adjust_address (mem, DImode, 8 * i);
19008 if (reg_overlap_mentioned_p (ops[0], mem))
19009 {
19010 gcc_assert (overlap == -1);
19011 overlap = i;
19012 }
19013 else
19014 {
19015 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
19016 output_asm_insn (buff, ops);
19017 }
19018 }
19019 if (overlap != -1)
19020 {
19021 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
19022 ops[1] = adjust_address (mem, SImode, 8 * overlap);
19023 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
19024 output_asm_insn (buff, ops);
19025 }
19026
19027 return "";
19028 }
19029
19030 default:
19031 gcc_unreachable ();
19032 }
19033
19034 sprintf (buff, templ, load ? "ld" : "st");
19035 output_asm_insn (buff, ops);
19036
19037 return "";
19038 }
19039
19040 /* Compute and return the length of neon_mov<mode>, where <mode> is
19041 one of VSTRUCT modes: EI, OI, CI or XI. */
19042 int
19043 arm_attr_length_move_neon (rtx_insn *insn)
19044 {
19045 rtx reg, mem, addr;
19046 int load;
19047 machine_mode mode;
19048
19049 extract_insn_cached (insn);
19050
19051 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
19052 {
19053 mode = GET_MODE (recog_data.operand[0]);
19054 switch (mode)
19055 {
19056 case E_EImode:
19057 case E_OImode:
19058 return 8;
19059 case E_CImode:
19060 return 12;
19061 case E_XImode:
19062 return 16;
19063 default:
19064 gcc_unreachable ();
19065 }
19066 }
19067
19068 load = REG_P (recog_data.operand[0]);
19069 reg = recog_data.operand[!load];
19070 mem = recog_data.operand[load];
19071
19072 gcc_assert (MEM_P (mem));
19073
19074 addr = XEXP (mem, 0);
19075
19076 /* Strip off const from addresses like (const (plus (...))). */
19077 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
19078 addr = XEXP (addr, 0);
19079
19080 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
19081 {
19082 int insns = REG_NREGS (reg) / 2;
19083 return insns * 4;
19084 }
19085 else
19086 return 4;
19087 }
19088
19089 /* Return nonzero if the offset in the address is an immediate. Otherwise,
19090 return zero. */
19091
19092 int
19093 arm_address_offset_is_imm (rtx_insn *insn)
19094 {
19095 rtx mem, addr;
19096
19097 extract_insn_cached (insn);
19098
19099 if (REG_P (recog_data.operand[0]))
19100 return 0;
19101
19102 mem = recog_data.operand[0];
19103
19104 gcc_assert (MEM_P (mem));
19105
19106 addr = XEXP (mem, 0);
19107
19108 if (REG_P (addr)
19109 || (GET_CODE (addr) == PLUS
19110 && REG_P (XEXP (addr, 0))
19111 && CONST_INT_P (XEXP (addr, 1))))
19112 return 1;
19113 else
19114 return 0;
19115 }
19116
19117 /* Output an ADD r, s, #n where n may be too big for one instruction.
19118 If adding zero to one register, output nothing. */
19119 const char *
19120 output_add_immediate (rtx *operands)
19121 {
19122 HOST_WIDE_INT n = INTVAL (operands[2]);
19123
19124 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19125 {
19126 if (n < 0)
19127 output_multi_immediate (operands,
19128 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19129 -n);
19130 else
19131 output_multi_immediate (operands,
19132 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19133 n);
19134 }
19135
19136 return "";
19137 }
19138
19139 /* Output a multiple immediate operation.
19140 OPERANDS is the vector of operands referred to in the output patterns.
19141 INSTR1 is the output pattern to use for the first constant.
19142 INSTR2 is the output pattern to use for subsequent constants.
19143 IMMED_OP is the index of the constant slot in OPERANDS.
19144 N is the constant value. */
19145 static const char *
19146 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19147 int immed_op, HOST_WIDE_INT n)
19148 {
19149 #if HOST_BITS_PER_WIDE_INT > 32
19150 n &= 0xffffffff;
19151 #endif
19152
19153 if (n == 0)
19154 {
19155 /* Quick and easy output. */
19156 operands[immed_op] = const0_rtx;
19157 output_asm_insn (instr1, operands);
19158 }
19159 else
19160 {
19161 int i;
19162 const char * instr = instr1;
19163
19164 /* Note that n is never zero here (which would give no output). */
19165 for (i = 0; i < 32; i += 2)
19166 {
19167 if (n & (3 << i))
19168 {
19169 operands[immed_op] = GEN_INT (n & (255 << i));
19170 output_asm_insn (instr, operands);
19171 instr = instr2;
19172 i += 6;
19173 }
19174 }
19175 }
19176
19177 return "";
19178 }
19179
19180 /* Return the name of a shifter operation. */
19181 static const char *
19182 arm_shift_nmem(enum rtx_code code)
19183 {
19184 switch (code)
19185 {
19186 case ASHIFT:
19187 return ARM_LSL_NAME;
19188
19189 case ASHIFTRT:
19190 return "asr";
19191
19192 case LSHIFTRT:
19193 return "lsr";
19194
19195 case ROTATERT:
19196 return "ror";
19197
19198 default:
19199 abort();
19200 }
19201 }
19202
19203 /* Return the appropriate ARM instruction for the operation code.
19204 The returned result should not be overwritten. OP is the rtx of the
19205 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19206 was shifted. */
19207 const char *
19208 arithmetic_instr (rtx op, int shift_first_arg)
19209 {
19210 switch (GET_CODE (op))
19211 {
19212 case PLUS:
19213 return "add";
19214
19215 case MINUS:
19216 return shift_first_arg ? "rsb" : "sub";
19217
19218 case IOR:
19219 return "orr";
19220
19221 case XOR:
19222 return "eor";
19223
19224 case AND:
19225 return "and";
19226
19227 case ASHIFT:
19228 case ASHIFTRT:
19229 case LSHIFTRT:
19230 case ROTATERT:
19231 return arm_shift_nmem(GET_CODE(op));
19232
19233 default:
19234 gcc_unreachable ();
19235 }
19236 }
19237
19238 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19239 for the operation code. The returned result should not be overwritten.
19240 OP is the rtx code of the shift.
19241 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19242 shift. */
19243 static const char *
19244 shift_op (rtx op, HOST_WIDE_INT *amountp)
19245 {
19246 const char * mnem;
19247 enum rtx_code code = GET_CODE (op);
19248
19249 switch (code)
19250 {
19251 case ROTATE:
19252 if (!CONST_INT_P (XEXP (op, 1)))
19253 {
19254 output_operand_lossage ("invalid shift operand");
19255 return NULL;
19256 }
19257
19258 code = ROTATERT;
19259 *amountp = 32 - INTVAL (XEXP (op, 1));
19260 mnem = "ror";
19261 break;
19262
19263 case ASHIFT:
19264 case ASHIFTRT:
19265 case LSHIFTRT:
19266 case ROTATERT:
19267 mnem = arm_shift_nmem(code);
19268 if (CONST_INT_P (XEXP (op, 1)))
19269 {
19270 *amountp = INTVAL (XEXP (op, 1));
19271 }
19272 else if (REG_P (XEXP (op, 1)))
19273 {
19274 *amountp = -1;
19275 return mnem;
19276 }
19277 else
19278 {
19279 output_operand_lossage ("invalid shift operand");
19280 return NULL;
19281 }
19282 break;
19283
19284 case MULT:
19285 /* We never have to worry about the amount being other than a
19286 power of 2, since this case can never be reloaded from a reg. */
19287 if (!CONST_INT_P (XEXP (op, 1)))
19288 {
19289 output_operand_lossage ("invalid shift operand");
19290 return NULL;
19291 }
19292
19293 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19294
19295 /* Amount must be a power of two. */
19296 if (*amountp & (*amountp - 1))
19297 {
19298 output_operand_lossage ("invalid shift operand");
19299 return NULL;
19300 }
19301
19302 *amountp = exact_log2 (*amountp);
19303 gcc_assert (IN_RANGE (*amountp, 0, 31));
19304 return ARM_LSL_NAME;
19305
19306 default:
19307 output_operand_lossage ("invalid shift operand");
19308 return NULL;
19309 }
19310
19311 /* This is not 100% correct, but follows from the desire to merge
19312 multiplication by a power of 2 with the recognizer for a
19313 shift. >=32 is not a valid shift for "lsl", so we must try and
19314 output a shift that produces the correct arithmetical result.
19315 Using lsr #32 is identical except for the fact that the carry bit
19316 is not set correctly if we set the flags; but we never use the
19317 carry bit from such an operation, so we can ignore that. */
19318 if (code == ROTATERT)
19319 /* Rotate is just modulo 32. */
19320 *amountp &= 31;
19321 else if (*amountp != (*amountp & 31))
19322 {
19323 if (code == ASHIFT)
19324 mnem = "lsr";
19325 *amountp = 32;
19326 }
19327
19328 /* Shifts of 0 are no-ops. */
19329 if (*amountp == 0)
19330 return NULL;
19331
19332 return mnem;
19333 }
19334
19335 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19336 because /bin/as is horribly restrictive. The judgement about
19337 whether or not each character is 'printable' (and can be output as
19338 is) or not (and must be printed with an octal escape) must be made
19339 with reference to the *host* character set -- the situation is
19340 similar to that discussed in the comments above pp_c_char in
19341 c-pretty-print.c. */
19342
19343 #define MAX_ASCII_LEN 51
19344
19345 void
19346 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19347 {
19348 int i;
19349 int len_so_far = 0;
19350
19351 fputs ("\t.ascii\t\"", stream);
19352
19353 for (i = 0; i < len; i++)
19354 {
19355 int c = p[i];
19356
19357 if (len_so_far >= MAX_ASCII_LEN)
19358 {
19359 fputs ("\"\n\t.ascii\t\"", stream);
19360 len_so_far = 0;
19361 }
19362
19363 if (ISPRINT (c))
19364 {
19365 if (c == '\\' || c == '\"')
19366 {
19367 putc ('\\', stream);
19368 len_so_far++;
19369 }
19370 putc (c, stream);
19371 len_so_far++;
19372 }
19373 else
19374 {
19375 fprintf (stream, "\\%03o", c);
19376 len_so_far += 4;
19377 }
19378 }
19379
19380 fputs ("\"\n", stream);
19381 }
19382 \f
19383
19384 /* Compute the register save mask for registers 0 through 12
19385 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19386
19387 static unsigned long
19388 arm_compute_save_reg0_reg12_mask (void)
19389 {
19390 unsigned long func_type = arm_current_func_type ();
19391 unsigned long save_reg_mask = 0;
19392 unsigned int reg;
19393
19394 if (IS_INTERRUPT (func_type))
19395 {
19396 unsigned int max_reg;
19397 /* Interrupt functions must not corrupt any registers,
19398 even call clobbered ones. If this is a leaf function
19399 we can just examine the registers used by the RTL, but
19400 otherwise we have to assume that whatever function is
19401 called might clobber anything, and so we have to save
19402 all the call-clobbered registers as well. */
19403 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19404 /* FIQ handlers have registers r8 - r12 banked, so
19405 we only need to check r0 - r7, Normal ISRs only
19406 bank r14 and r15, so we must check up to r12.
19407 r13 is the stack pointer which is always preserved,
19408 so we do not need to consider it here. */
19409 max_reg = 7;
19410 else
19411 max_reg = 12;
19412
19413 for (reg = 0; reg <= max_reg; reg++)
19414 if (df_regs_ever_live_p (reg)
19415 || (! crtl->is_leaf && call_used_regs[reg]))
19416 save_reg_mask |= (1 << reg);
19417
19418 /* Also save the pic base register if necessary. */
19419 if (flag_pic
19420 && !TARGET_SINGLE_PIC_BASE
19421 && arm_pic_register != INVALID_REGNUM
19422 && crtl->uses_pic_offset_table)
19423 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19424 }
19425 else if (IS_VOLATILE(func_type))
19426 {
19427 /* For noreturn functions we historically omitted register saves
19428 altogether. However this really messes up debugging. As a
19429 compromise save just the frame pointers. Combined with the link
19430 register saved elsewhere this should be sufficient to get
19431 a backtrace. */
19432 if (frame_pointer_needed)
19433 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19434 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19435 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19436 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19437 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19438 }
19439 else
19440 {
19441 /* In the normal case we only need to save those registers
19442 which are call saved and which are used by this function. */
19443 for (reg = 0; reg <= 11; reg++)
19444 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19445 save_reg_mask |= (1 << reg);
19446
19447 /* Handle the frame pointer as a special case. */
19448 if (frame_pointer_needed)
19449 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19450
19451 /* If we aren't loading the PIC register,
19452 don't stack it even though it may be live. */
19453 if (flag_pic
19454 && !TARGET_SINGLE_PIC_BASE
19455 && arm_pic_register != INVALID_REGNUM
19456 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19457 || crtl->uses_pic_offset_table))
19458 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19459
19460 /* The prologue will copy SP into R0, so save it. */
19461 if (IS_STACKALIGN (func_type))
19462 save_reg_mask |= 1;
19463 }
19464
19465 /* Save registers so the exception handler can modify them. */
19466 if (crtl->calls_eh_return)
19467 {
19468 unsigned int i;
19469
19470 for (i = 0; ; i++)
19471 {
19472 reg = EH_RETURN_DATA_REGNO (i);
19473 if (reg == INVALID_REGNUM)
19474 break;
19475 save_reg_mask |= 1 << reg;
19476 }
19477 }
19478
19479 return save_reg_mask;
19480 }
19481
19482 /* Return true if r3 is live at the start of the function. */
19483
19484 static bool
19485 arm_r3_live_at_start_p (void)
19486 {
19487 /* Just look at cfg info, which is still close enough to correct at this
19488 point. This gives false positives for broken functions that might use
19489 uninitialized data that happens to be allocated in r3, but who cares? */
19490 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19491 }
19492
19493 /* Compute the number of bytes used to store the static chain register on the
19494 stack, above the stack frame. We need to know this accurately to get the
19495 alignment of the rest of the stack frame correct. */
19496
19497 static int
19498 arm_compute_static_chain_stack_bytes (void)
19499 {
19500 /* Once the value is updated from the init value of -1, do not
19501 re-compute. */
19502 if (cfun->machine->static_chain_stack_bytes != -1)
19503 return cfun->machine->static_chain_stack_bytes;
19504
19505 /* See the defining assertion in arm_expand_prologue. */
19506 if (IS_NESTED (arm_current_func_type ())
19507 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19508 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19509 || flag_stack_clash_protection)
19510 && !df_regs_ever_live_p (LR_REGNUM)))
19511 && arm_r3_live_at_start_p ()
19512 && crtl->args.pretend_args_size == 0)
19513 return 4;
19514
19515 return 0;
19516 }
19517
19518 /* Compute a bit mask of which core registers need to be
19519 saved on the stack for the current function.
19520 This is used by arm_compute_frame_layout, which may add extra registers. */
19521
19522 static unsigned long
19523 arm_compute_save_core_reg_mask (void)
19524 {
19525 unsigned int save_reg_mask = 0;
19526 unsigned long func_type = arm_current_func_type ();
19527 unsigned int reg;
19528
19529 if (IS_NAKED (func_type))
19530 /* This should never really happen. */
19531 return 0;
19532
19533 /* If we are creating a stack frame, then we must save the frame pointer,
19534 IP (which will hold the old stack pointer), LR and the PC. */
19535 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19536 save_reg_mask |=
19537 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19538 | (1 << IP_REGNUM)
19539 | (1 << LR_REGNUM)
19540 | (1 << PC_REGNUM);
19541
19542 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19543
19544 /* Decide if we need to save the link register.
19545 Interrupt routines have their own banked link register,
19546 so they never need to save it.
19547 Otherwise if we do not use the link register we do not need to save
19548 it. If we are pushing other registers onto the stack however, we
19549 can save an instruction in the epilogue by pushing the link register
19550 now and then popping it back into the PC. This incurs extra memory
19551 accesses though, so we only do it when optimizing for size, and only
19552 if we know that we will not need a fancy return sequence. */
19553 if (df_regs_ever_live_p (LR_REGNUM)
19554 || (save_reg_mask
19555 && optimize_size
19556 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19557 && !crtl->tail_call_emit
19558 && !crtl->calls_eh_return))
19559 save_reg_mask |= 1 << LR_REGNUM;
19560
19561 if (cfun->machine->lr_save_eliminated)
19562 save_reg_mask &= ~ (1 << LR_REGNUM);
19563
19564 if (TARGET_REALLY_IWMMXT
19565 && ((bit_count (save_reg_mask)
19566 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19567 arm_compute_static_chain_stack_bytes())
19568 ) % 2) != 0)
19569 {
19570 /* The total number of registers that are going to be pushed
19571 onto the stack is odd. We need to ensure that the stack
19572 is 64-bit aligned before we start to save iWMMXt registers,
19573 and also before we start to create locals. (A local variable
19574 might be a double or long long which we will load/store using
19575 an iWMMXt instruction). Therefore we need to push another
19576 ARM register, so that the stack will be 64-bit aligned. We
19577 try to avoid using the arg registers (r0 -r3) as they might be
19578 used to pass values in a tail call. */
19579 for (reg = 4; reg <= 12; reg++)
19580 if ((save_reg_mask & (1 << reg)) == 0)
19581 break;
19582
19583 if (reg <= 12)
19584 save_reg_mask |= (1 << reg);
19585 else
19586 {
19587 cfun->machine->sibcall_blocked = 1;
19588 save_reg_mask |= (1 << 3);
19589 }
19590 }
19591
19592 /* We may need to push an additional register for use initializing the
19593 PIC base register. */
19594 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19595 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19596 {
19597 reg = thumb_find_work_register (1 << 4);
19598 if (!call_used_regs[reg])
19599 save_reg_mask |= (1 << reg);
19600 }
19601
19602 return save_reg_mask;
19603 }
19604
19605 /* Compute a bit mask of which core registers need to be
19606 saved on the stack for the current function. */
19607 static unsigned long
19608 thumb1_compute_save_core_reg_mask (void)
19609 {
19610 unsigned long mask;
19611 unsigned reg;
19612
19613 mask = 0;
19614 for (reg = 0; reg < 12; reg ++)
19615 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19616 mask |= 1 << reg;
19617
19618 /* Handle the frame pointer as a special case. */
19619 if (frame_pointer_needed)
19620 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19621
19622 if (flag_pic
19623 && !TARGET_SINGLE_PIC_BASE
19624 && arm_pic_register != INVALID_REGNUM
19625 && crtl->uses_pic_offset_table)
19626 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19627
19628 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19629 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19630 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19631
19632 /* LR will also be pushed if any lo regs are pushed. */
19633 if (mask & 0xff || thumb_force_lr_save ())
19634 mask |= (1 << LR_REGNUM);
19635
19636 bool call_clobbered_scratch
19637 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
19638 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
19639
19640 /* Make sure we have a low work register if we need one. We will
19641 need one if we are going to push a high register, but we are not
19642 currently intending to push a low register. However if both the
19643 prologue and epilogue have a spare call-clobbered low register,
19644 then we won't need to find an additional work register. It does
19645 not need to be the same register in the prologue and
19646 epilogue. */
19647 if ((mask & 0xff) == 0
19648 && !call_clobbered_scratch
19649 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19650 {
19651 /* Use thumb_find_work_register to choose which register
19652 we will use. If the register is live then we will
19653 have to push it. Use LAST_LO_REGNUM as our fallback
19654 choice for the register to select. */
19655 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19656 /* Make sure the register returned by thumb_find_work_register is
19657 not part of the return value. */
19658 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19659 reg = LAST_LO_REGNUM;
19660
19661 if (callee_saved_reg_p (reg))
19662 mask |= 1 << reg;
19663 }
19664
19665 /* The 504 below is 8 bytes less than 512 because there are two possible
19666 alignment words. We can't tell here if they will be present or not so we
19667 have to play it safe and assume that they are. */
19668 if ((CALLER_INTERWORKING_SLOT_SIZE +
19669 ROUND_UP_WORD (get_frame_size ()) +
19670 crtl->outgoing_args_size) >= 504)
19671 {
19672 /* This is the same as the code in thumb1_expand_prologue() which
19673 determines which register to use for stack decrement. */
19674 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19675 if (mask & (1 << reg))
19676 break;
19677
19678 if (reg > LAST_LO_REGNUM)
19679 {
19680 /* Make sure we have a register available for stack decrement. */
19681 mask |= 1 << LAST_LO_REGNUM;
19682 }
19683 }
19684
19685 return mask;
19686 }
19687
19688
19689 /* Return the number of bytes required to save VFP registers. */
19690 static int
19691 arm_get_vfp_saved_size (void)
19692 {
19693 unsigned int regno;
19694 int count;
19695 int saved;
19696
19697 saved = 0;
19698 /* Space for saved VFP registers. */
19699 if (TARGET_HARD_FLOAT)
19700 {
19701 count = 0;
19702 for (regno = FIRST_VFP_REGNUM;
19703 regno < LAST_VFP_REGNUM;
19704 regno += 2)
19705 {
19706 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19707 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19708 {
19709 if (count > 0)
19710 {
19711 /* Workaround ARM10 VFPr1 bug. */
19712 if (count == 2 && !arm_arch6)
19713 count++;
19714 saved += count * 8;
19715 }
19716 count = 0;
19717 }
19718 else
19719 count++;
19720 }
19721 if (count > 0)
19722 {
19723 if (count == 2 && !arm_arch6)
19724 count++;
19725 saved += count * 8;
19726 }
19727 }
19728 return saved;
19729 }
19730
19731
19732 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19733 everything bar the final return instruction. If simple_return is true,
19734 then do not output epilogue, because it has already been emitted in RTL.
19735
19736 Note: do not forget to update length attribute of corresponding insn pattern
19737 when changing assembly output (eg. length attribute of
19738 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19739 register clearing sequences). */
19740 const char *
19741 output_return_instruction (rtx operand, bool really_return, bool reverse,
19742 bool simple_return)
19743 {
19744 char conditional[10];
19745 char instr[100];
19746 unsigned reg;
19747 unsigned long live_regs_mask;
19748 unsigned long func_type;
19749 arm_stack_offsets *offsets;
19750
19751 func_type = arm_current_func_type ();
19752
19753 if (IS_NAKED (func_type))
19754 return "";
19755
19756 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19757 {
19758 /* If this function was declared non-returning, and we have
19759 found a tail call, then we have to trust that the called
19760 function won't return. */
19761 if (really_return)
19762 {
19763 rtx ops[2];
19764
19765 /* Otherwise, trap an attempted return by aborting. */
19766 ops[0] = operand;
19767 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19768 : "abort");
19769 assemble_external_libcall (ops[1]);
19770 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19771 }
19772
19773 return "";
19774 }
19775
19776 gcc_assert (!cfun->calls_alloca || really_return);
19777
19778 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19779
19780 cfun->machine->return_used_this_function = 1;
19781
19782 offsets = arm_get_frame_offsets ();
19783 live_regs_mask = offsets->saved_regs_mask;
19784
19785 if (!simple_return && live_regs_mask)
19786 {
19787 const char * return_reg;
19788
19789 /* If we do not have any special requirements for function exit
19790 (e.g. interworking) then we can load the return address
19791 directly into the PC. Otherwise we must load it into LR. */
19792 if (really_return
19793 && !IS_CMSE_ENTRY (func_type)
19794 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19795 return_reg = reg_names[PC_REGNUM];
19796 else
19797 return_reg = reg_names[LR_REGNUM];
19798
19799 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19800 {
19801 /* There are three possible reasons for the IP register
19802 being saved. 1) a stack frame was created, in which case
19803 IP contains the old stack pointer, or 2) an ISR routine
19804 corrupted it, or 3) it was saved to align the stack on
19805 iWMMXt. In case 1, restore IP into SP, otherwise just
19806 restore IP. */
19807 if (frame_pointer_needed)
19808 {
19809 live_regs_mask &= ~ (1 << IP_REGNUM);
19810 live_regs_mask |= (1 << SP_REGNUM);
19811 }
19812 else
19813 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19814 }
19815
19816 /* On some ARM architectures it is faster to use LDR rather than
19817 LDM to load a single register. On other architectures, the
19818 cost is the same. In 26 bit mode, or for exception handlers,
19819 we have to use LDM to load the PC so that the CPSR is also
19820 restored. */
19821 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19822 if (live_regs_mask == (1U << reg))
19823 break;
19824
19825 if (reg <= LAST_ARM_REGNUM
19826 && (reg != LR_REGNUM
19827 || ! really_return
19828 || ! IS_INTERRUPT (func_type)))
19829 {
19830 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19831 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19832 }
19833 else
19834 {
19835 char *p;
19836 int first = 1;
19837
19838 /* Generate the load multiple instruction to restore the
19839 registers. Note we can get here, even if
19840 frame_pointer_needed is true, but only if sp already
19841 points to the base of the saved core registers. */
19842 if (live_regs_mask & (1 << SP_REGNUM))
19843 {
19844 unsigned HOST_WIDE_INT stack_adjust;
19845
19846 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19847 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19848
19849 if (stack_adjust && arm_arch5t && TARGET_ARM)
19850 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19851 else
19852 {
19853 /* If we can't use ldmib (SA110 bug),
19854 then try to pop r3 instead. */
19855 if (stack_adjust)
19856 live_regs_mask |= 1 << 3;
19857
19858 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19859 }
19860 }
19861 /* For interrupt returns we have to use an LDM rather than
19862 a POP so that we can use the exception return variant. */
19863 else if (IS_INTERRUPT (func_type))
19864 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19865 else
19866 sprintf (instr, "pop%s\t{", conditional);
19867
19868 p = instr + strlen (instr);
19869
19870 for (reg = 0; reg <= SP_REGNUM; reg++)
19871 if (live_regs_mask & (1 << reg))
19872 {
19873 int l = strlen (reg_names[reg]);
19874
19875 if (first)
19876 first = 0;
19877 else
19878 {
19879 memcpy (p, ", ", 2);
19880 p += 2;
19881 }
19882
19883 memcpy (p, "%|", 2);
19884 memcpy (p + 2, reg_names[reg], l);
19885 p += l + 2;
19886 }
19887
19888 if (live_regs_mask & (1 << LR_REGNUM))
19889 {
19890 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19891 /* If returning from an interrupt, restore the CPSR. */
19892 if (IS_INTERRUPT (func_type))
19893 strcat (p, "^");
19894 }
19895 else
19896 strcpy (p, "}");
19897 }
19898
19899 output_asm_insn (instr, & operand);
19900
19901 /* See if we need to generate an extra instruction to
19902 perform the actual function return. */
19903 if (really_return
19904 && func_type != ARM_FT_INTERWORKED
19905 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19906 {
19907 /* The return has already been handled
19908 by loading the LR into the PC. */
19909 return "";
19910 }
19911 }
19912
19913 if (really_return)
19914 {
19915 switch ((int) ARM_FUNC_TYPE (func_type))
19916 {
19917 case ARM_FT_ISR:
19918 case ARM_FT_FIQ:
19919 /* ??? This is wrong for unified assembly syntax. */
19920 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19921 break;
19922
19923 case ARM_FT_INTERWORKED:
19924 gcc_assert (arm_arch5t || arm_arch4t);
19925 sprintf (instr, "bx%s\t%%|lr", conditional);
19926 break;
19927
19928 case ARM_FT_EXCEPTION:
19929 /* ??? This is wrong for unified assembly syntax. */
19930 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19931 break;
19932
19933 default:
19934 if (IS_CMSE_ENTRY (func_type))
19935 {
19936 /* Check if we have to clear the 'GE bits' which is only used if
19937 parallel add and subtraction instructions are available. */
19938 if (TARGET_INT_SIMD)
19939 snprintf (instr, sizeof (instr),
19940 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19941 else
19942 snprintf (instr, sizeof (instr),
19943 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19944
19945 output_asm_insn (instr, & operand);
19946 if (TARGET_HARD_FLOAT)
19947 {
19948 /* Clear the cumulative exception-status bits (0-4,7) and the
19949 condition code bits (28-31) of the FPSCR. We need to
19950 remember to clear the first scratch register used (IP) and
19951 save and restore the second (r4). */
19952 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19953 output_asm_insn (instr, & operand);
19954 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19955 output_asm_insn (instr, & operand);
19956 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19957 output_asm_insn (instr, & operand);
19958 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19959 output_asm_insn (instr, & operand);
19960 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19961 output_asm_insn (instr, & operand);
19962 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19963 output_asm_insn (instr, & operand);
19964 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19965 output_asm_insn (instr, & operand);
19966 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19967 output_asm_insn (instr, & operand);
19968 }
19969 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19970 }
19971 /* Use bx if it's available. */
19972 else if (arm_arch5t || arm_arch4t)
19973 sprintf (instr, "bx%s\t%%|lr", conditional);
19974 else
19975 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19976 break;
19977 }
19978
19979 output_asm_insn (instr, & operand);
19980 }
19981
19982 return "";
19983 }
19984
19985 /* Output in FILE asm statements needed to declare the NAME of the function
19986 defined by its DECL node. */
19987
19988 void
19989 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19990 {
19991 size_t cmse_name_len;
19992 char *cmse_name = 0;
19993 char cmse_prefix[] = "__acle_se_";
19994
19995 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19996 extra function label for each function with the 'cmse_nonsecure_entry'
19997 attribute. This extra function label should be prepended with
19998 '__acle_se_', telling the linker that it needs to create secure gateway
19999 veneers for this function. */
20000 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
20001 DECL_ATTRIBUTES (decl)))
20002 {
20003 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
20004 cmse_name = XALLOCAVEC (char, cmse_name_len);
20005 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
20006 targetm.asm_out.globalize_label (file, cmse_name);
20007
20008 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
20009 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
20010 }
20011
20012 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
20013 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20014 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20015 ASM_OUTPUT_LABEL (file, name);
20016
20017 if (cmse_name)
20018 ASM_OUTPUT_LABEL (file, cmse_name);
20019
20020 ARM_OUTPUT_FN_UNWIND (file, TRUE);
20021 }
20022
20023 /* Write the function name into the code section, directly preceding
20024 the function prologue.
20025
20026 Code will be output similar to this:
20027 t0
20028 .ascii "arm_poke_function_name", 0
20029 .align
20030 t1
20031 .word 0xff000000 + (t1 - t0)
20032 arm_poke_function_name
20033 mov ip, sp
20034 stmfd sp!, {fp, ip, lr, pc}
20035 sub fp, ip, #4
20036
20037 When performing a stack backtrace, code can inspect the value
20038 of 'pc' stored at 'fp' + 0. If the trace function then looks
20039 at location pc - 12 and the top 8 bits are set, then we know
20040 that there is a function name embedded immediately preceding this
20041 location and has length ((pc[-3]) & 0xff000000).
20042
20043 We assume that pc is declared as a pointer to an unsigned long.
20044
20045 It is of no benefit to output the function name if we are assembling
20046 a leaf function. These function types will not contain a stack
20047 backtrace structure, therefore it is not possible to determine the
20048 function name. */
20049 void
20050 arm_poke_function_name (FILE *stream, const char *name)
20051 {
20052 unsigned long alignlength;
20053 unsigned long length;
20054 rtx x;
20055
20056 length = strlen (name) + 1;
20057 alignlength = ROUND_UP_WORD (length);
20058
20059 ASM_OUTPUT_ASCII (stream, name, length);
20060 ASM_OUTPUT_ALIGN (stream, 2);
20061 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
20062 assemble_aligned_integer (UNITS_PER_WORD, x);
20063 }
20064
20065 /* Place some comments into the assembler stream
20066 describing the current function. */
20067 static void
20068 arm_output_function_prologue (FILE *f)
20069 {
20070 unsigned long func_type;
20071
20072 /* Sanity check. */
20073 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
20074
20075 func_type = arm_current_func_type ();
20076
20077 switch ((int) ARM_FUNC_TYPE (func_type))
20078 {
20079 default:
20080 case ARM_FT_NORMAL:
20081 break;
20082 case ARM_FT_INTERWORKED:
20083 asm_fprintf (f, "\t%@ Function supports interworking.\n");
20084 break;
20085 case ARM_FT_ISR:
20086 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
20087 break;
20088 case ARM_FT_FIQ:
20089 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
20090 break;
20091 case ARM_FT_EXCEPTION:
20092 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
20093 break;
20094 }
20095
20096 if (IS_NAKED (func_type))
20097 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
20098
20099 if (IS_VOLATILE (func_type))
20100 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
20101
20102 if (IS_NESTED (func_type))
20103 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
20104 if (IS_STACKALIGN (func_type))
20105 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
20106 if (IS_CMSE_ENTRY (func_type))
20107 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
20108
20109 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
20110 (HOST_WIDE_INT) crtl->args.size,
20111 crtl->args.pretend_args_size,
20112 (HOST_WIDE_INT) get_frame_size ());
20113
20114 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20115 frame_pointer_needed,
20116 cfun->machine->uses_anonymous_args);
20117
20118 if (cfun->machine->lr_save_eliminated)
20119 asm_fprintf (f, "\t%@ link register save eliminated.\n");
20120
20121 if (crtl->calls_eh_return)
20122 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
20123
20124 }
20125
20126 static void
20127 arm_output_function_epilogue (FILE *)
20128 {
20129 arm_stack_offsets *offsets;
20130
20131 if (TARGET_THUMB1)
20132 {
20133 int regno;
20134
20135 /* Emit any call-via-reg trampolines that are needed for v4t support
20136 of call_reg and call_value_reg type insns. */
20137 for (regno = 0; regno < LR_REGNUM; regno++)
20138 {
20139 rtx label = cfun->machine->call_via[regno];
20140
20141 if (label != NULL)
20142 {
20143 switch_to_section (function_section (current_function_decl));
20144 targetm.asm_out.internal_label (asm_out_file, "L",
20145 CODE_LABEL_NUMBER (label));
20146 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20147 }
20148 }
20149
20150 /* ??? Probably not safe to set this here, since it assumes that a
20151 function will be emitted as assembly immediately after we generate
20152 RTL for it. This does not happen for inline functions. */
20153 cfun->machine->return_used_this_function = 0;
20154 }
20155 else /* TARGET_32BIT */
20156 {
20157 /* We need to take into account any stack-frame rounding. */
20158 offsets = arm_get_frame_offsets ();
20159
20160 gcc_assert (!use_return_insn (FALSE, NULL)
20161 || (cfun->machine->return_used_this_function != 0)
20162 || offsets->saved_regs == offsets->outgoing_args
20163 || frame_pointer_needed);
20164 }
20165 }
20166
20167 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20168 STR and STRD. If an even number of registers are being pushed, one
20169 or more STRD patterns are created for each register pair. If an
20170 odd number of registers are pushed, emit an initial STR followed by
20171 as many STRD instructions as are needed. This works best when the
20172 stack is initially 64-bit aligned (the normal case), since it
20173 ensures that each STRD is also 64-bit aligned. */
20174 static void
20175 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20176 {
20177 int num_regs = 0;
20178 int i;
20179 int regno;
20180 rtx par = NULL_RTX;
20181 rtx dwarf = NULL_RTX;
20182 rtx tmp;
20183 bool first = true;
20184
20185 num_regs = bit_count (saved_regs_mask);
20186
20187 /* Must be at least one register to save, and can't save SP or PC. */
20188 gcc_assert (num_regs > 0 && num_regs <= 14);
20189 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20190 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20191
20192 /* Create sequence for DWARF info. All the frame-related data for
20193 debugging is held in this wrapper. */
20194 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20195
20196 /* Describe the stack adjustment. */
20197 tmp = gen_rtx_SET (stack_pointer_rtx,
20198 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20199 RTX_FRAME_RELATED_P (tmp) = 1;
20200 XVECEXP (dwarf, 0, 0) = tmp;
20201
20202 /* Find the first register. */
20203 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20204 ;
20205
20206 i = 0;
20207
20208 /* If there's an odd number of registers to push. Start off by
20209 pushing a single register. This ensures that subsequent strd
20210 operations are dword aligned (assuming that SP was originally
20211 64-bit aligned). */
20212 if ((num_regs & 1) != 0)
20213 {
20214 rtx reg, mem, insn;
20215
20216 reg = gen_rtx_REG (SImode, regno);
20217 if (num_regs == 1)
20218 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20219 stack_pointer_rtx));
20220 else
20221 mem = gen_frame_mem (Pmode,
20222 gen_rtx_PRE_MODIFY
20223 (Pmode, stack_pointer_rtx,
20224 plus_constant (Pmode, stack_pointer_rtx,
20225 -4 * num_regs)));
20226
20227 tmp = gen_rtx_SET (mem, reg);
20228 RTX_FRAME_RELATED_P (tmp) = 1;
20229 insn = emit_insn (tmp);
20230 RTX_FRAME_RELATED_P (insn) = 1;
20231 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20232 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20233 RTX_FRAME_RELATED_P (tmp) = 1;
20234 i++;
20235 regno++;
20236 XVECEXP (dwarf, 0, i) = tmp;
20237 first = false;
20238 }
20239
20240 while (i < num_regs)
20241 if (saved_regs_mask & (1 << regno))
20242 {
20243 rtx reg1, reg2, mem1, mem2;
20244 rtx tmp0, tmp1, tmp2;
20245 int regno2;
20246
20247 /* Find the register to pair with this one. */
20248 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20249 regno2++)
20250 ;
20251
20252 reg1 = gen_rtx_REG (SImode, regno);
20253 reg2 = gen_rtx_REG (SImode, regno2);
20254
20255 if (first)
20256 {
20257 rtx insn;
20258
20259 first = false;
20260 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20261 stack_pointer_rtx,
20262 -4 * num_regs));
20263 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20264 stack_pointer_rtx,
20265 -4 * (num_regs - 1)));
20266 tmp0 = gen_rtx_SET (stack_pointer_rtx,
20267 plus_constant (Pmode, stack_pointer_rtx,
20268 -4 * (num_regs)));
20269 tmp1 = gen_rtx_SET (mem1, reg1);
20270 tmp2 = gen_rtx_SET (mem2, reg2);
20271 RTX_FRAME_RELATED_P (tmp0) = 1;
20272 RTX_FRAME_RELATED_P (tmp1) = 1;
20273 RTX_FRAME_RELATED_P (tmp2) = 1;
20274 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20275 XVECEXP (par, 0, 0) = tmp0;
20276 XVECEXP (par, 0, 1) = tmp1;
20277 XVECEXP (par, 0, 2) = tmp2;
20278 insn = emit_insn (par);
20279 RTX_FRAME_RELATED_P (insn) = 1;
20280 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20281 }
20282 else
20283 {
20284 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20285 stack_pointer_rtx,
20286 4 * i));
20287 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20288 stack_pointer_rtx,
20289 4 * (i + 1)));
20290 tmp1 = gen_rtx_SET (mem1, reg1);
20291 tmp2 = gen_rtx_SET (mem2, reg2);
20292 RTX_FRAME_RELATED_P (tmp1) = 1;
20293 RTX_FRAME_RELATED_P (tmp2) = 1;
20294 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20295 XVECEXP (par, 0, 0) = tmp1;
20296 XVECEXP (par, 0, 1) = tmp2;
20297 emit_insn (par);
20298 }
20299
20300 /* Create unwind information. This is an approximation. */
20301 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20302 plus_constant (Pmode,
20303 stack_pointer_rtx,
20304 4 * i)),
20305 reg1);
20306 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20307 plus_constant (Pmode,
20308 stack_pointer_rtx,
20309 4 * (i + 1))),
20310 reg2);
20311
20312 RTX_FRAME_RELATED_P (tmp1) = 1;
20313 RTX_FRAME_RELATED_P (tmp2) = 1;
20314 XVECEXP (dwarf, 0, i + 1) = tmp1;
20315 XVECEXP (dwarf, 0, i + 2) = tmp2;
20316 i += 2;
20317 regno = regno2 + 1;
20318 }
20319 else
20320 regno++;
20321
20322 return;
20323 }
20324
20325 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20326 whenever possible, otherwise it emits single-word stores. The first store
20327 also allocates stack space for all saved registers, using writeback with
20328 post-addressing mode. All other stores use offset addressing. If no STRD
20329 can be emitted, this function emits a sequence of single-word stores,
20330 and not an STM as before, because single-word stores provide more freedom
20331 scheduling and can be turned into an STM by peephole optimizations. */
20332 static void
20333 arm_emit_strd_push (unsigned long saved_regs_mask)
20334 {
20335 int num_regs = 0;
20336 int i, j, dwarf_index = 0;
20337 int offset = 0;
20338 rtx dwarf = NULL_RTX;
20339 rtx insn = NULL_RTX;
20340 rtx tmp, mem;
20341
20342 /* TODO: A more efficient code can be emitted by changing the
20343 layout, e.g., first push all pairs that can use STRD to keep the
20344 stack aligned, and then push all other registers. */
20345 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20346 if (saved_regs_mask & (1 << i))
20347 num_regs++;
20348
20349 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20350 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20351 gcc_assert (num_regs > 0);
20352
20353 /* Create sequence for DWARF info. */
20354 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20355
20356 /* For dwarf info, we generate explicit stack update. */
20357 tmp = gen_rtx_SET (stack_pointer_rtx,
20358 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20359 RTX_FRAME_RELATED_P (tmp) = 1;
20360 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20361
20362 /* Save registers. */
20363 offset = - 4 * num_regs;
20364 j = 0;
20365 while (j <= LAST_ARM_REGNUM)
20366 if (saved_regs_mask & (1 << j))
20367 {
20368 if ((j % 2 == 0)
20369 && (saved_regs_mask & (1 << (j + 1))))
20370 {
20371 /* Current register and previous register form register pair for
20372 which STRD can be generated. */
20373 if (offset < 0)
20374 {
20375 /* Allocate stack space for all saved registers. */
20376 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20377 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20378 mem = gen_frame_mem (DImode, tmp);
20379 offset = 0;
20380 }
20381 else if (offset > 0)
20382 mem = gen_frame_mem (DImode,
20383 plus_constant (Pmode,
20384 stack_pointer_rtx,
20385 offset));
20386 else
20387 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20388
20389 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20390 RTX_FRAME_RELATED_P (tmp) = 1;
20391 tmp = emit_insn (tmp);
20392
20393 /* Record the first store insn. */
20394 if (dwarf_index == 1)
20395 insn = tmp;
20396
20397 /* Generate dwarf info. */
20398 mem = gen_frame_mem (SImode,
20399 plus_constant (Pmode,
20400 stack_pointer_rtx,
20401 offset));
20402 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20403 RTX_FRAME_RELATED_P (tmp) = 1;
20404 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20405
20406 mem = gen_frame_mem (SImode,
20407 plus_constant (Pmode,
20408 stack_pointer_rtx,
20409 offset + 4));
20410 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20411 RTX_FRAME_RELATED_P (tmp) = 1;
20412 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20413
20414 offset += 8;
20415 j += 2;
20416 }
20417 else
20418 {
20419 /* Emit a single word store. */
20420 if (offset < 0)
20421 {
20422 /* Allocate stack space for all saved registers. */
20423 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20424 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20425 mem = gen_frame_mem (SImode, tmp);
20426 offset = 0;
20427 }
20428 else if (offset > 0)
20429 mem = gen_frame_mem (SImode,
20430 plus_constant (Pmode,
20431 stack_pointer_rtx,
20432 offset));
20433 else
20434 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20435
20436 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20437 RTX_FRAME_RELATED_P (tmp) = 1;
20438 tmp = emit_insn (tmp);
20439
20440 /* Record the first store insn. */
20441 if (dwarf_index == 1)
20442 insn = tmp;
20443
20444 /* Generate dwarf info. */
20445 mem = gen_frame_mem (SImode,
20446 plus_constant(Pmode,
20447 stack_pointer_rtx,
20448 offset));
20449 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20450 RTX_FRAME_RELATED_P (tmp) = 1;
20451 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20452
20453 offset += 4;
20454 j += 1;
20455 }
20456 }
20457 else
20458 j++;
20459
20460 /* Attach dwarf info to the first insn we generate. */
20461 gcc_assert (insn != NULL_RTX);
20462 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20463 RTX_FRAME_RELATED_P (insn) = 1;
20464 }
20465
20466 /* Generate and emit an insn that we will recognize as a push_multi.
20467 Unfortunately, since this insn does not reflect very well the actual
20468 semantics of the operation, we need to annotate the insn for the benefit
20469 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20470 MASK for registers that should be annotated for DWARF2 frame unwind
20471 information. */
20472 static rtx
20473 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20474 {
20475 int num_regs = 0;
20476 int num_dwarf_regs = 0;
20477 int i, j;
20478 rtx par;
20479 rtx dwarf;
20480 int dwarf_par_index;
20481 rtx tmp, reg;
20482
20483 /* We don't record the PC in the dwarf frame information. */
20484 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20485
20486 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20487 {
20488 if (mask & (1 << i))
20489 num_regs++;
20490 if (dwarf_regs_mask & (1 << i))
20491 num_dwarf_regs++;
20492 }
20493
20494 gcc_assert (num_regs && num_regs <= 16);
20495 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20496
20497 /* For the body of the insn we are going to generate an UNSPEC in
20498 parallel with several USEs. This allows the insn to be recognized
20499 by the push_multi pattern in the arm.md file.
20500
20501 The body of the insn looks something like this:
20502
20503 (parallel [
20504 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20505 (const_int:SI <num>)))
20506 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20507 (use (reg:SI XX))
20508 (use (reg:SI YY))
20509 ...
20510 ])
20511
20512 For the frame note however, we try to be more explicit and actually
20513 show each register being stored into the stack frame, plus a (single)
20514 decrement of the stack pointer. We do it this way in order to be
20515 friendly to the stack unwinding code, which only wants to see a single
20516 stack decrement per instruction. The RTL we generate for the note looks
20517 something like this:
20518
20519 (sequence [
20520 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20521 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20522 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20523 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20524 ...
20525 ])
20526
20527 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20528 instead we'd have a parallel expression detailing all
20529 the stores to the various memory addresses so that debug
20530 information is more up-to-date. Remember however while writing
20531 this to take care of the constraints with the push instruction.
20532
20533 Note also that this has to be taken care of for the VFP registers.
20534
20535 For more see PR43399. */
20536
20537 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20538 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20539 dwarf_par_index = 1;
20540
20541 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20542 {
20543 if (mask & (1 << i))
20544 {
20545 reg = gen_rtx_REG (SImode, i);
20546
20547 XVECEXP (par, 0, 0)
20548 = gen_rtx_SET (gen_frame_mem
20549 (BLKmode,
20550 gen_rtx_PRE_MODIFY (Pmode,
20551 stack_pointer_rtx,
20552 plus_constant
20553 (Pmode, stack_pointer_rtx,
20554 -4 * num_regs))
20555 ),
20556 gen_rtx_UNSPEC (BLKmode,
20557 gen_rtvec (1, reg),
20558 UNSPEC_PUSH_MULT));
20559
20560 if (dwarf_regs_mask & (1 << i))
20561 {
20562 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20563 reg);
20564 RTX_FRAME_RELATED_P (tmp) = 1;
20565 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20566 }
20567
20568 break;
20569 }
20570 }
20571
20572 for (j = 1, i++; j < num_regs; i++)
20573 {
20574 if (mask & (1 << i))
20575 {
20576 reg = gen_rtx_REG (SImode, i);
20577
20578 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20579
20580 if (dwarf_regs_mask & (1 << i))
20581 {
20582 tmp
20583 = gen_rtx_SET (gen_frame_mem
20584 (SImode,
20585 plus_constant (Pmode, stack_pointer_rtx,
20586 4 * j)),
20587 reg);
20588 RTX_FRAME_RELATED_P (tmp) = 1;
20589 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20590 }
20591
20592 j++;
20593 }
20594 }
20595
20596 par = emit_insn (par);
20597
20598 tmp = gen_rtx_SET (stack_pointer_rtx,
20599 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20600 RTX_FRAME_RELATED_P (tmp) = 1;
20601 XVECEXP (dwarf, 0, 0) = tmp;
20602
20603 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20604
20605 return par;
20606 }
20607
20608 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20609 SIZE is the offset to be adjusted.
20610 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20611 static void
20612 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20613 {
20614 rtx dwarf;
20615
20616 RTX_FRAME_RELATED_P (insn) = 1;
20617 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20618 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20619 }
20620
20621 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20622 SAVED_REGS_MASK shows which registers need to be restored.
20623
20624 Unfortunately, since this insn does not reflect very well the actual
20625 semantics of the operation, we need to annotate the insn for the benefit
20626 of DWARF2 frame unwind information. */
20627 static void
20628 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20629 {
20630 int num_regs = 0;
20631 int i, j;
20632 rtx par;
20633 rtx dwarf = NULL_RTX;
20634 rtx tmp, reg;
20635 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20636 int offset_adj;
20637 int emit_update;
20638
20639 offset_adj = return_in_pc ? 1 : 0;
20640 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20641 if (saved_regs_mask & (1 << i))
20642 num_regs++;
20643
20644 gcc_assert (num_regs && num_regs <= 16);
20645
20646 /* If SP is in reglist, then we don't emit SP update insn. */
20647 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20648
20649 /* The parallel needs to hold num_regs SETs
20650 and one SET for the stack update. */
20651 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20652
20653 if (return_in_pc)
20654 XVECEXP (par, 0, 0) = ret_rtx;
20655
20656 if (emit_update)
20657 {
20658 /* Increment the stack pointer, based on there being
20659 num_regs 4-byte registers to restore. */
20660 tmp = gen_rtx_SET (stack_pointer_rtx,
20661 plus_constant (Pmode,
20662 stack_pointer_rtx,
20663 4 * num_regs));
20664 RTX_FRAME_RELATED_P (tmp) = 1;
20665 XVECEXP (par, 0, offset_adj) = tmp;
20666 }
20667
20668 /* Now restore every reg, which may include PC. */
20669 for (j = 0, i = 0; j < num_regs; i++)
20670 if (saved_regs_mask & (1 << i))
20671 {
20672 reg = gen_rtx_REG (SImode, i);
20673 if ((num_regs == 1) && emit_update && !return_in_pc)
20674 {
20675 /* Emit single load with writeback. */
20676 tmp = gen_frame_mem (SImode,
20677 gen_rtx_POST_INC (Pmode,
20678 stack_pointer_rtx));
20679 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20680 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20681 return;
20682 }
20683
20684 tmp = gen_rtx_SET (reg,
20685 gen_frame_mem
20686 (SImode,
20687 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20688 RTX_FRAME_RELATED_P (tmp) = 1;
20689 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20690
20691 /* We need to maintain a sequence for DWARF info too. As dwarf info
20692 should not have PC, skip PC. */
20693 if (i != PC_REGNUM)
20694 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20695
20696 j++;
20697 }
20698
20699 if (return_in_pc)
20700 par = emit_jump_insn (par);
20701 else
20702 par = emit_insn (par);
20703
20704 REG_NOTES (par) = dwarf;
20705 if (!return_in_pc)
20706 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20707 stack_pointer_rtx, stack_pointer_rtx);
20708 }
20709
20710 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20711 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20712
20713 Unfortunately, since this insn does not reflect very well the actual
20714 semantics of the operation, we need to annotate the insn for the benefit
20715 of DWARF2 frame unwind information. */
20716 static void
20717 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20718 {
20719 int i, j;
20720 rtx par;
20721 rtx dwarf = NULL_RTX;
20722 rtx tmp, reg;
20723
20724 gcc_assert (num_regs && num_regs <= 32);
20725
20726 /* Workaround ARM10 VFPr1 bug. */
20727 if (num_regs == 2 && !arm_arch6)
20728 {
20729 if (first_reg == 15)
20730 first_reg--;
20731
20732 num_regs++;
20733 }
20734
20735 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20736 there could be up to 32 D-registers to restore.
20737 If there are more than 16 D-registers, make two recursive calls,
20738 each of which emits one pop_multi instruction. */
20739 if (num_regs > 16)
20740 {
20741 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20742 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20743 return;
20744 }
20745
20746 /* The parallel needs to hold num_regs SETs
20747 and one SET for the stack update. */
20748 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20749
20750 /* Increment the stack pointer, based on there being
20751 num_regs 8-byte registers to restore. */
20752 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20753 RTX_FRAME_RELATED_P (tmp) = 1;
20754 XVECEXP (par, 0, 0) = tmp;
20755
20756 /* Now show every reg that will be restored, using a SET for each. */
20757 for (j = 0, i=first_reg; j < num_regs; i += 2)
20758 {
20759 reg = gen_rtx_REG (DFmode, i);
20760
20761 tmp = gen_rtx_SET (reg,
20762 gen_frame_mem
20763 (DFmode,
20764 plus_constant (Pmode, base_reg, 8 * j)));
20765 RTX_FRAME_RELATED_P (tmp) = 1;
20766 XVECEXP (par, 0, j + 1) = tmp;
20767
20768 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20769
20770 j++;
20771 }
20772
20773 par = emit_insn (par);
20774 REG_NOTES (par) = dwarf;
20775
20776 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20777 if (REGNO (base_reg) == IP_REGNUM)
20778 {
20779 RTX_FRAME_RELATED_P (par) = 1;
20780 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20781 }
20782 else
20783 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20784 base_reg, base_reg);
20785 }
20786
20787 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20788 number of registers are being popped, multiple LDRD patterns are created for
20789 all register pairs. If odd number of registers are popped, last register is
20790 loaded by using LDR pattern. */
20791 static void
20792 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20793 {
20794 int num_regs = 0;
20795 int i, j;
20796 rtx par = NULL_RTX;
20797 rtx dwarf = NULL_RTX;
20798 rtx tmp, reg, tmp1;
20799 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20800
20801 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20802 if (saved_regs_mask & (1 << i))
20803 num_regs++;
20804
20805 gcc_assert (num_regs && num_regs <= 16);
20806
20807 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20808 to be popped. So, if num_regs is even, now it will become odd,
20809 and we can generate pop with PC. If num_regs is odd, it will be
20810 even now, and ldr with return can be generated for PC. */
20811 if (return_in_pc)
20812 num_regs--;
20813
20814 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20815
20816 /* Var j iterates over all the registers to gather all the registers in
20817 saved_regs_mask. Var i gives index of saved registers in stack frame.
20818 A PARALLEL RTX of register-pair is created here, so that pattern for
20819 LDRD can be matched. As PC is always last register to be popped, and
20820 we have already decremented num_regs if PC, we don't have to worry
20821 about PC in this loop. */
20822 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20823 if (saved_regs_mask & (1 << j))
20824 {
20825 /* Create RTX for memory load. */
20826 reg = gen_rtx_REG (SImode, j);
20827 tmp = gen_rtx_SET (reg,
20828 gen_frame_mem (SImode,
20829 plus_constant (Pmode,
20830 stack_pointer_rtx, 4 * i)));
20831 RTX_FRAME_RELATED_P (tmp) = 1;
20832
20833 if (i % 2 == 0)
20834 {
20835 /* When saved-register index (i) is even, the RTX to be emitted is
20836 yet to be created. Hence create it first. The LDRD pattern we
20837 are generating is :
20838 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20839 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20840 where target registers need not be consecutive. */
20841 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20842 dwarf = NULL_RTX;
20843 }
20844
20845 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20846 added as 0th element and if i is odd, reg_i is added as 1st element
20847 of LDRD pattern shown above. */
20848 XVECEXP (par, 0, (i % 2)) = tmp;
20849 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20850
20851 if ((i % 2) == 1)
20852 {
20853 /* When saved-register index (i) is odd, RTXs for both the registers
20854 to be loaded are generated in above given LDRD pattern, and the
20855 pattern can be emitted now. */
20856 par = emit_insn (par);
20857 REG_NOTES (par) = dwarf;
20858 RTX_FRAME_RELATED_P (par) = 1;
20859 }
20860
20861 i++;
20862 }
20863
20864 /* If the number of registers pushed is odd AND return_in_pc is false OR
20865 number of registers are even AND return_in_pc is true, last register is
20866 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20867 then LDR with post increment. */
20868
20869 /* Increment the stack pointer, based on there being
20870 num_regs 4-byte registers to restore. */
20871 tmp = gen_rtx_SET (stack_pointer_rtx,
20872 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20873 RTX_FRAME_RELATED_P (tmp) = 1;
20874 tmp = emit_insn (tmp);
20875 if (!return_in_pc)
20876 {
20877 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20878 stack_pointer_rtx, stack_pointer_rtx);
20879 }
20880
20881 dwarf = NULL_RTX;
20882
20883 if (((num_regs % 2) == 1 && !return_in_pc)
20884 || ((num_regs % 2) == 0 && return_in_pc))
20885 {
20886 /* Scan for the single register to be popped. Skip until the saved
20887 register is found. */
20888 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20889
20890 /* Gen LDR with post increment here. */
20891 tmp1 = gen_rtx_MEM (SImode,
20892 gen_rtx_POST_INC (SImode,
20893 stack_pointer_rtx));
20894 set_mem_alias_set (tmp1, get_frame_alias_set ());
20895
20896 reg = gen_rtx_REG (SImode, j);
20897 tmp = gen_rtx_SET (reg, tmp1);
20898 RTX_FRAME_RELATED_P (tmp) = 1;
20899 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20900
20901 if (return_in_pc)
20902 {
20903 /* If return_in_pc, j must be PC_REGNUM. */
20904 gcc_assert (j == PC_REGNUM);
20905 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20906 XVECEXP (par, 0, 0) = ret_rtx;
20907 XVECEXP (par, 0, 1) = tmp;
20908 par = emit_jump_insn (par);
20909 }
20910 else
20911 {
20912 par = emit_insn (tmp);
20913 REG_NOTES (par) = dwarf;
20914 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20915 stack_pointer_rtx, stack_pointer_rtx);
20916 }
20917
20918 }
20919 else if ((num_regs % 2) == 1 && return_in_pc)
20920 {
20921 /* There are 2 registers to be popped. So, generate the pattern
20922 pop_multiple_with_stack_update_and_return to pop in PC. */
20923 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20924 }
20925
20926 return;
20927 }
20928
20929 /* LDRD in ARM mode needs consecutive registers as operands. This function
20930 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20931 offset addressing and then generates one separate stack udpate. This provides
20932 more scheduling freedom, compared to writeback on every load. However,
20933 if the function returns using load into PC directly
20934 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20935 before the last load. TODO: Add a peephole optimization to recognize
20936 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20937 peephole optimization to merge the load at stack-offset zero
20938 with the stack update instruction using load with writeback
20939 in post-index addressing mode. */
20940 static void
20941 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20942 {
20943 int j = 0;
20944 int offset = 0;
20945 rtx par = NULL_RTX;
20946 rtx dwarf = NULL_RTX;
20947 rtx tmp, mem;
20948
20949 /* Restore saved registers. */
20950 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20951 j = 0;
20952 while (j <= LAST_ARM_REGNUM)
20953 if (saved_regs_mask & (1 << j))
20954 {
20955 if ((j % 2) == 0
20956 && (saved_regs_mask & (1 << (j + 1)))
20957 && (j + 1) != PC_REGNUM)
20958 {
20959 /* Current register and next register form register pair for which
20960 LDRD can be generated. PC is always the last register popped, and
20961 we handle it separately. */
20962 if (offset > 0)
20963 mem = gen_frame_mem (DImode,
20964 plus_constant (Pmode,
20965 stack_pointer_rtx,
20966 offset));
20967 else
20968 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20969
20970 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20971 tmp = emit_insn (tmp);
20972 RTX_FRAME_RELATED_P (tmp) = 1;
20973
20974 /* Generate dwarf info. */
20975
20976 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20977 gen_rtx_REG (SImode, j),
20978 NULL_RTX);
20979 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20980 gen_rtx_REG (SImode, j + 1),
20981 dwarf);
20982
20983 REG_NOTES (tmp) = dwarf;
20984
20985 offset += 8;
20986 j += 2;
20987 }
20988 else if (j != PC_REGNUM)
20989 {
20990 /* Emit a single word load. */
20991 if (offset > 0)
20992 mem = gen_frame_mem (SImode,
20993 plus_constant (Pmode,
20994 stack_pointer_rtx,
20995 offset));
20996 else
20997 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20998
20999 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
21000 tmp = emit_insn (tmp);
21001 RTX_FRAME_RELATED_P (tmp) = 1;
21002
21003 /* Generate dwarf info. */
21004 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
21005 gen_rtx_REG (SImode, j),
21006 NULL_RTX);
21007
21008 offset += 4;
21009 j += 1;
21010 }
21011 else /* j == PC_REGNUM */
21012 j++;
21013 }
21014 else
21015 j++;
21016
21017 /* Update the stack. */
21018 if (offset > 0)
21019 {
21020 tmp = gen_rtx_SET (stack_pointer_rtx,
21021 plus_constant (Pmode,
21022 stack_pointer_rtx,
21023 offset));
21024 tmp = emit_insn (tmp);
21025 arm_add_cfa_adjust_cfa_note (tmp, offset,
21026 stack_pointer_rtx, stack_pointer_rtx);
21027 offset = 0;
21028 }
21029
21030 if (saved_regs_mask & (1 << PC_REGNUM))
21031 {
21032 /* Only PC is to be popped. */
21033 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21034 XVECEXP (par, 0, 0) = ret_rtx;
21035 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
21036 gen_frame_mem (SImode,
21037 gen_rtx_POST_INC (SImode,
21038 stack_pointer_rtx)));
21039 RTX_FRAME_RELATED_P (tmp) = 1;
21040 XVECEXP (par, 0, 1) = tmp;
21041 par = emit_jump_insn (par);
21042
21043 /* Generate dwarf info. */
21044 dwarf = alloc_reg_note (REG_CFA_RESTORE,
21045 gen_rtx_REG (SImode, PC_REGNUM),
21046 NULL_RTX);
21047 REG_NOTES (par) = dwarf;
21048 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
21049 stack_pointer_rtx, stack_pointer_rtx);
21050 }
21051 }
21052
21053 /* Calculate the size of the return value that is passed in registers. */
21054 static unsigned
21055 arm_size_return_regs (void)
21056 {
21057 machine_mode mode;
21058
21059 if (crtl->return_rtx != 0)
21060 mode = GET_MODE (crtl->return_rtx);
21061 else
21062 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21063
21064 return GET_MODE_SIZE (mode);
21065 }
21066
21067 /* Return true if the current function needs to save/restore LR. */
21068 static bool
21069 thumb_force_lr_save (void)
21070 {
21071 return !cfun->machine->lr_save_eliminated
21072 && (!crtl->is_leaf
21073 || thumb_far_jump_used_p ()
21074 || df_regs_ever_live_p (LR_REGNUM));
21075 }
21076
21077 /* We do not know if r3 will be available because
21078 we do have an indirect tailcall happening in this
21079 particular case. */
21080 static bool
21081 is_indirect_tailcall_p (rtx call)
21082 {
21083 rtx pat = PATTERN (call);
21084
21085 /* Indirect tail call. */
21086 pat = XVECEXP (pat, 0, 0);
21087 if (GET_CODE (pat) == SET)
21088 pat = SET_SRC (pat);
21089
21090 pat = XEXP (XEXP (pat, 0), 0);
21091 return REG_P (pat);
21092 }
21093
21094 /* Return true if r3 is used by any of the tail call insns in the
21095 current function. */
21096 static bool
21097 any_sibcall_could_use_r3 (void)
21098 {
21099 edge_iterator ei;
21100 edge e;
21101
21102 if (!crtl->tail_call_emit)
21103 return false;
21104 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
21105 if (e->flags & EDGE_SIBCALL)
21106 {
21107 rtx_insn *call = BB_END (e->src);
21108 if (!CALL_P (call))
21109 call = prev_nonnote_nondebug_insn (call);
21110 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
21111 if (find_regno_fusage (call, USE, 3)
21112 || is_indirect_tailcall_p (call))
21113 return true;
21114 }
21115 return false;
21116 }
21117
21118
21119 /* Compute the distance from register FROM to register TO.
21120 These can be the arg pointer (26), the soft frame pointer (25),
21121 the stack pointer (13) or the hard frame pointer (11).
21122 In thumb mode r7 is used as the soft frame pointer, if needed.
21123 Typical stack layout looks like this:
21124
21125 old stack pointer -> | |
21126 ----
21127 | | \
21128 | | saved arguments for
21129 | | vararg functions
21130 | | /
21131 --
21132 hard FP & arg pointer -> | | \
21133 | | stack
21134 | | frame
21135 | | /
21136 --
21137 | | \
21138 | | call saved
21139 | | registers
21140 soft frame pointer -> | | /
21141 --
21142 | | \
21143 | | local
21144 | | variables
21145 locals base pointer -> | | /
21146 --
21147 | | \
21148 | | outgoing
21149 | | arguments
21150 current stack pointer -> | | /
21151 --
21152
21153 For a given function some or all of these stack components
21154 may not be needed, giving rise to the possibility of
21155 eliminating some of the registers.
21156
21157 The values returned by this function must reflect the behavior
21158 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21159
21160 The sign of the number returned reflects the direction of stack
21161 growth, so the values are positive for all eliminations except
21162 from the soft frame pointer to the hard frame pointer.
21163
21164 SFP may point just inside the local variables block to ensure correct
21165 alignment. */
21166
21167
21168 /* Return cached stack offsets. */
21169
21170 static arm_stack_offsets *
21171 arm_get_frame_offsets (void)
21172 {
21173 struct arm_stack_offsets *offsets;
21174
21175 offsets = &cfun->machine->stack_offsets;
21176
21177 return offsets;
21178 }
21179
21180
21181 /* Calculate stack offsets. These are used to calculate register elimination
21182 offsets and in prologue/epilogue code. Also calculates which registers
21183 should be saved. */
21184
21185 static void
21186 arm_compute_frame_layout (void)
21187 {
21188 struct arm_stack_offsets *offsets;
21189 unsigned long func_type;
21190 int saved;
21191 int core_saved;
21192 HOST_WIDE_INT frame_size;
21193 int i;
21194
21195 offsets = &cfun->machine->stack_offsets;
21196
21197 /* Initially this is the size of the local variables. It will translated
21198 into an offset once we have determined the size of preceding data. */
21199 frame_size = ROUND_UP_WORD (get_frame_size ());
21200
21201 /* Space for variadic functions. */
21202 offsets->saved_args = crtl->args.pretend_args_size;
21203
21204 /* In Thumb mode this is incorrect, but never used. */
21205 offsets->frame
21206 = (offsets->saved_args
21207 + arm_compute_static_chain_stack_bytes ()
21208 + (frame_pointer_needed ? 4 : 0));
21209
21210 if (TARGET_32BIT)
21211 {
21212 unsigned int regno;
21213
21214 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21215 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21216 saved = core_saved;
21217
21218 /* We know that SP will be doubleword aligned on entry, and we must
21219 preserve that condition at any subroutine call. We also require the
21220 soft frame pointer to be doubleword aligned. */
21221
21222 if (TARGET_REALLY_IWMMXT)
21223 {
21224 /* Check for the call-saved iWMMXt registers. */
21225 for (regno = FIRST_IWMMXT_REGNUM;
21226 regno <= LAST_IWMMXT_REGNUM;
21227 regno++)
21228 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
21229 saved += 8;
21230 }
21231
21232 func_type = arm_current_func_type ();
21233 /* Space for saved VFP registers. */
21234 if (! IS_VOLATILE (func_type)
21235 && TARGET_HARD_FLOAT)
21236 saved += arm_get_vfp_saved_size ();
21237 }
21238 else /* TARGET_THUMB1 */
21239 {
21240 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21241 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21242 saved = core_saved;
21243 if (TARGET_BACKTRACE)
21244 saved += 16;
21245 }
21246
21247 /* Saved registers include the stack frame. */
21248 offsets->saved_regs
21249 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21250 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21251
21252 /* A leaf function does not need any stack alignment if it has nothing
21253 on the stack. */
21254 if (crtl->is_leaf && frame_size == 0
21255 /* However if it calls alloca(), we have a dynamically allocated
21256 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21257 && ! cfun->calls_alloca)
21258 {
21259 offsets->outgoing_args = offsets->soft_frame;
21260 offsets->locals_base = offsets->soft_frame;
21261 return;
21262 }
21263
21264 /* Ensure SFP has the correct alignment. */
21265 if (ARM_DOUBLEWORD_ALIGN
21266 && (offsets->soft_frame & 7))
21267 {
21268 offsets->soft_frame += 4;
21269 /* Try to align stack by pushing an extra reg. Don't bother doing this
21270 when there is a stack frame as the alignment will be rolled into
21271 the normal stack adjustment. */
21272 if (frame_size + crtl->outgoing_args_size == 0)
21273 {
21274 int reg = -1;
21275
21276 /* Register r3 is caller-saved. Normally it does not need to be
21277 saved on entry by the prologue. However if we choose to save
21278 it for padding then we may confuse the compiler into thinking
21279 a prologue sequence is required when in fact it is not. This
21280 will occur when shrink-wrapping if r3 is used as a scratch
21281 register and there are no other callee-saved writes.
21282
21283 This situation can be avoided when other callee-saved registers
21284 are available and r3 is not mandatory if we choose a callee-saved
21285 register for padding. */
21286 bool prefer_callee_reg_p = false;
21287
21288 /* If it is safe to use r3, then do so. This sometimes
21289 generates better code on Thumb-2 by avoiding the need to
21290 use 32-bit push/pop instructions. */
21291 if (! any_sibcall_could_use_r3 ()
21292 && arm_size_return_regs () <= 12
21293 && (offsets->saved_regs_mask & (1 << 3)) == 0
21294 && (TARGET_THUMB2
21295 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21296 {
21297 reg = 3;
21298 if (!TARGET_THUMB2)
21299 prefer_callee_reg_p = true;
21300 }
21301 if (reg == -1
21302 || prefer_callee_reg_p)
21303 {
21304 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21305 {
21306 /* Avoid fixed registers; they may be changed at
21307 arbitrary times so it's unsafe to restore them
21308 during the epilogue. */
21309 if (!fixed_regs[i]
21310 && (offsets->saved_regs_mask & (1 << i)) == 0)
21311 {
21312 reg = i;
21313 break;
21314 }
21315 }
21316 }
21317
21318 if (reg != -1)
21319 {
21320 offsets->saved_regs += 4;
21321 offsets->saved_regs_mask |= (1 << reg);
21322 }
21323 }
21324 }
21325
21326 offsets->locals_base = offsets->soft_frame + frame_size;
21327 offsets->outgoing_args = (offsets->locals_base
21328 + crtl->outgoing_args_size);
21329
21330 if (ARM_DOUBLEWORD_ALIGN)
21331 {
21332 /* Ensure SP remains doubleword aligned. */
21333 if (offsets->outgoing_args & 7)
21334 offsets->outgoing_args += 4;
21335 gcc_assert (!(offsets->outgoing_args & 7));
21336 }
21337 }
21338
21339
21340 /* Calculate the relative offsets for the different stack pointers. Positive
21341 offsets are in the direction of stack growth. */
21342
21343 HOST_WIDE_INT
21344 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21345 {
21346 arm_stack_offsets *offsets;
21347
21348 offsets = arm_get_frame_offsets ();
21349
21350 /* OK, now we have enough information to compute the distances.
21351 There must be an entry in these switch tables for each pair
21352 of registers in ELIMINABLE_REGS, even if some of the entries
21353 seem to be redundant or useless. */
21354 switch (from)
21355 {
21356 case ARG_POINTER_REGNUM:
21357 switch (to)
21358 {
21359 case THUMB_HARD_FRAME_POINTER_REGNUM:
21360 return 0;
21361
21362 case FRAME_POINTER_REGNUM:
21363 /* This is the reverse of the soft frame pointer
21364 to hard frame pointer elimination below. */
21365 return offsets->soft_frame - offsets->saved_args;
21366
21367 case ARM_HARD_FRAME_POINTER_REGNUM:
21368 /* This is only non-zero in the case where the static chain register
21369 is stored above the frame. */
21370 return offsets->frame - offsets->saved_args - 4;
21371
21372 case STACK_POINTER_REGNUM:
21373 /* If nothing has been pushed on the stack at all
21374 then this will return -4. This *is* correct! */
21375 return offsets->outgoing_args - (offsets->saved_args + 4);
21376
21377 default:
21378 gcc_unreachable ();
21379 }
21380 gcc_unreachable ();
21381
21382 case FRAME_POINTER_REGNUM:
21383 switch (to)
21384 {
21385 case THUMB_HARD_FRAME_POINTER_REGNUM:
21386 return 0;
21387
21388 case ARM_HARD_FRAME_POINTER_REGNUM:
21389 /* The hard frame pointer points to the top entry in the
21390 stack frame. The soft frame pointer to the bottom entry
21391 in the stack frame. If there is no stack frame at all,
21392 then they are identical. */
21393
21394 return offsets->frame - offsets->soft_frame;
21395
21396 case STACK_POINTER_REGNUM:
21397 return offsets->outgoing_args - offsets->soft_frame;
21398
21399 default:
21400 gcc_unreachable ();
21401 }
21402 gcc_unreachable ();
21403
21404 default:
21405 /* You cannot eliminate from the stack pointer.
21406 In theory you could eliminate from the hard frame
21407 pointer to the stack pointer, but this will never
21408 happen, since if a stack frame is not needed the
21409 hard frame pointer will never be used. */
21410 gcc_unreachable ();
21411 }
21412 }
21413
21414 /* Given FROM and TO register numbers, say whether this elimination is
21415 allowed. Frame pointer elimination is automatically handled.
21416
21417 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21418 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21419 pointer, we must eliminate FRAME_POINTER_REGNUM into
21420 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21421 ARG_POINTER_REGNUM. */
21422
21423 bool
21424 arm_can_eliminate (const int from, const int to)
21425 {
21426 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21427 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21428 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21429 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21430 true);
21431 }
21432
21433 /* Emit RTL to save coprocessor registers on function entry. Returns the
21434 number of bytes pushed. */
21435
21436 static int
21437 arm_save_coproc_regs(void)
21438 {
21439 int saved_size = 0;
21440 unsigned reg;
21441 unsigned start_reg;
21442 rtx insn;
21443
21444 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21445 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21446 {
21447 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21448 insn = gen_rtx_MEM (V2SImode, insn);
21449 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21450 RTX_FRAME_RELATED_P (insn) = 1;
21451 saved_size += 8;
21452 }
21453
21454 if (TARGET_HARD_FLOAT)
21455 {
21456 start_reg = FIRST_VFP_REGNUM;
21457
21458 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21459 {
21460 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21461 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21462 {
21463 if (start_reg != reg)
21464 saved_size += vfp_emit_fstmd (start_reg,
21465 (reg - start_reg) / 2);
21466 start_reg = reg + 2;
21467 }
21468 }
21469 if (start_reg != reg)
21470 saved_size += vfp_emit_fstmd (start_reg,
21471 (reg - start_reg) / 2);
21472 }
21473 return saved_size;
21474 }
21475
21476
21477 /* Set the Thumb frame pointer from the stack pointer. */
21478
21479 static void
21480 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21481 {
21482 HOST_WIDE_INT amount;
21483 rtx insn, dwarf;
21484
21485 amount = offsets->outgoing_args - offsets->locals_base;
21486 if (amount < 1024)
21487 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21488 stack_pointer_rtx, GEN_INT (amount)));
21489 else
21490 {
21491 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21492 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21493 expects the first two operands to be the same. */
21494 if (TARGET_THUMB2)
21495 {
21496 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21497 stack_pointer_rtx,
21498 hard_frame_pointer_rtx));
21499 }
21500 else
21501 {
21502 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21503 hard_frame_pointer_rtx,
21504 stack_pointer_rtx));
21505 }
21506 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21507 plus_constant (Pmode, stack_pointer_rtx, amount));
21508 RTX_FRAME_RELATED_P (dwarf) = 1;
21509 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21510 }
21511
21512 RTX_FRAME_RELATED_P (insn) = 1;
21513 }
21514
21515 struct scratch_reg {
21516 rtx reg;
21517 bool saved;
21518 };
21519
21520 /* Return a short-lived scratch register for use as a 2nd scratch register on
21521 function entry after the registers are saved in the prologue. This register
21522 must be released by means of release_scratch_register_on_entry. IP is not
21523 considered since it is always used as the 1st scratch register if available.
21524
21525 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21526 mask of live registers. */
21527
21528 static void
21529 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21530 unsigned long live_regs)
21531 {
21532 int regno = -1;
21533
21534 sr->saved = false;
21535
21536 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21537 regno = LR_REGNUM;
21538 else
21539 {
21540 unsigned int i;
21541
21542 for (i = 4; i < 11; i++)
21543 if (regno1 != i && (live_regs & (1 << i)) != 0)
21544 {
21545 regno = i;
21546 break;
21547 }
21548
21549 if (regno < 0)
21550 {
21551 /* If IP is used as the 1st scratch register for a nested function,
21552 then either r3 wasn't available or is used to preserve IP. */
21553 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21554 regno1 = 3;
21555 regno = (regno1 == 3 ? 2 : 3);
21556 sr->saved
21557 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21558 regno);
21559 }
21560 }
21561
21562 sr->reg = gen_rtx_REG (SImode, regno);
21563 if (sr->saved)
21564 {
21565 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21566 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21567 rtx x = gen_rtx_SET (stack_pointer_rtx,
21568 plus_constant (Pmode, stack_pointer_rtx, -4));
21569 RTX_FRAME_RELATED_P (insn) = 1;
21570 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21571 }
21572 }
21573
21574 /* Release a scratch register obtained from the preceding function. */
21575
21576 static void
21577 release_scratch_register_on_entry (struct scratch_reg *sr)
21578 {
21579 if (sr->saved)
21580 {
21581 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21582 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21583 rtx x = gen_rtx_SET (stack_pointer_rtx,
21584 plus_constant (Pmode, stack_pointer_rtx, 4));
21585 RTX_FRAME_RELATED_P (insn) = 1;
21586 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21587 }
21588 }
21589
21590 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21591
21592 #if PROBE_INTERVAL > 4096
21593 #error Cannot use indexed addressing mode for stack probing
21594 #endif
21595
21596 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21597 inclusive. These are offsets from the current stack pointer. REGNO1
21598 is the index number of the 1st scratch register and LIVE_REGS is the
21599 mask of live registers. */
21600
21601 static void
21602 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21603 unsigned int regno1, unsigned long live_regs)
21604 {
21605 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21606
21607 /* See if we have a constant small number of probes to generate. If so,
21608 that's the easy case. */
21609 if (size <= PROBE_INTERVAL)
21610 {
21611 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21612 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21613 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21614 }
21615
21616 /* The run-time loop is made up of 10 insns in the generic case while the
21617 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21618 else if (size <= 5 * PROBE_INTERVAL)
21619 {
21620 HOST_WIDE_INT i, rem;
21621
21622 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21623 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21624 emit_stack_probe (reg1);
21625
21626 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21627 it exceeds SIZE. If only two probes are needed, this will not
21628 generate any code. Then probe at FIRST + SIZE. */
21629 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21630 {
21631 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21632 emit_stack_probe (reg1);
21633 }
21634
21635 rem = size - (i - PROBE_INTERVAL);
21636 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21637 {
21638 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21639 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21640 }
21641 else
21642 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21643 }
21644
21645 /* Otherwise, do the same as above, but in a loop. Note that we must be
21646 extra careful with variables wrapping around because we might be at
21647 the very top (or the very bottom) of the address space and we have
21648 to be able to handle this case properly; in particular, we use an
21649 equality test for the loop condition. */
21650 else
21651 {
21652 HOST_WIDE_INT rounded_size;
21653 struct scratch_reg sr;
21654
21655 get_scratch_register_on_entry (&sr, regno1, live_regs);
21656
21657 emit_move_insn (reg1, GEN_INT (first));
21658
21659
21660 /* Step 1: round SIZE to the previous multiple of the interval. */
21661
21662 rounded_size = size & -PROBE_INTERVAL;
21663 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21664
21665
21666 /* Step 2: compute initial and final value of the loop counter. */
21667
21668 /* TEST_ADDR = SP + FIRST. */
21669 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21670
21671 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21672 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21673
21674
21675 /* Step 3: the loop
21676
21677 do
21678 {
21679 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21680 probe at TEST_ADDR
21681 }
21682 while (TEST_ADDR != LAST_ADDR)
21683
21684 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21685 until it is equal to ROUNDED_SIZE. */
21686
21687 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21688
21689
21690 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21691 that SIZE is equal to ROUNDED_SIZE. */
21692
21693 if (size != rounded_size)
21694 {
21695 HOST_WIDE_INT rem = size - rounded_size;
21696
21697 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21698 {
21699 emit_set_insn (sr.reg,
21700 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21701 emit_stack_probe (plus_constant (Pmode, sr.reg,
21702 PROBE_INTERVAL - rem));
21703 }
21704 else
21705 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21706 }
21707
21708 release_scratch_register_on_entry (&sr);
21709 }
21710
21711 /* Make sure nothing is scheduled before we are done. */
21712 emit_insn (gen_blockage ());
21713 }
21714
21715 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21716 absolute addresses. */
21717
21718 const char *
21719 output_probe_stack_range (rtx reg1, rtx reg2)
21720 {
21721 static int labelno = 0;
21722 char loop_lab[32];
21723 rtx xops[2];
21724
21725 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21726
21727 /* Loop. */
21728 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21729
21730 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21731 xops[0] = reg1;
21732 xops[1] = GEN_INT (PROBE_INTERVAL);
21733 output_asm_insn ("sub\t%0, %0, %1", xops);
21734
21735 /* Probe at TEST_ADDR. */
21736 output_asm_insn ("str\tr0, [%0, #0]", xops);
21737
21738 /* Test if TEST_ADDR == LAST_ADDR. */
21739 xops[1] = reg2;
21740 output_asm_insn ("cmp\t%0, %1", xops);
21741
21742 /* Branch. */
21743 fputs ("\tbne\t", asm_out_file);
21744 assemble_name_raw (asm_out_file, loop_lab);
21745 fputc ('\n', asm_out_file);
21746
21747 return "";
21748 }
21749
21750 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21751 function. */
21752 void
21753 arm_expand_prologue (void)
21754 {
21755 rtx amount;
21756 rtx insn;
21757 rtx ip_rtx;
21758 unsigned long live_regs_mask;
21759 unsigned long func_type;
21760 int fp_offset = 0;
21761 int saved_pretend_args = 0;
21762 int saved_regs = 0;
21763 unsigned HOST_WIDE_INT args_to_push;
21764 HOST_WIDE_INT size;
21765 arm_stack_offsets *offsets;
21766 bool clobber_ip;
21767
21768 func_type = arm_current_func_type ();
21769
21770 /* Naked functions don't have prologues. */
21771 if (IS_NAKED (func_type))
21772 {
21773 if (flag_stack_usage_info)
21774 current_function_static_stack_size = 0;
21775 return;
21776 }
21777
21778 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21779 args_to_push = crtl->args.pretend_args_size;
21780
21781 /* Compute which register we will have to save onto the stack. */
21782 offsets = arm_get_frame_offsets ();
21783 live_regs_mask = offsets->saved_regs_mask;
21784
21785 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21786
21787 if (IS_STACKALIGN (func_type))
21788 {
21789 rtx r0, r1;
21790
21791 /* Handle a word-aligned stack pointer. We generate the following:
21792
21793 mov r0, sp
21794 bic r1, r0, #7
21795 mov sp, r1
21796 <save and restore r0 in normal prologue/epilogue>
21797 mov sp, r0
21798 bx lr
21799
21800 The unwinder doesn't need to know about the stack realignment.
21801 Just tell it we saved SP in r0. */
21802 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21803
21804 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21805 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21806
21807 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21808 RTX_FRAME_RELATED_P (insn) = 1;
21809 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21810
21811 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21812
21813 /* ??? The CFA changes here, which may cause GDB to conclude that it
21814 has entered a different function. That said, the unwind info is
21815 correct, individually, before and after this instruction because
21816 we've described the save of SP, which will override the default
21817 handling of SP as restoring from the CFA. */
21818 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21819 }
21820
21821 /* Let's compute the static_chain_stack_bytes required and store it. Right
21822 now the value must be -1 as stored by arm_init_machine_status (). */
21823 cfun->machine->static_chain_stack_bytes
21824 = arm_compute_static_chain_stack_bytes ();
21825
21826 /* The static chain register is the same as the IP register. If it is
21827 clobbered when creating the frame, we need to save and restore it. */
21828 clobber_ip = IS_NESTED (func_type)
21829 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21830 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21831 || flag_stack_clash_protection)
21832 && !df_regs_ever_live_p (LR_REGNUM)
21833 && arm_r3_live_at_start_p ()));
21834
21835 /* Find somewhere to store IP whilst the frame is being created.
21836 We try the following places in order:
21837
21838 1. The last argument register r3 if it is available.
21839 2. A slot on the stack above the frame if there are no
21840 arguments to push onto the stack.
21841 3. Register r3 again, after pushing the argument registers
21842 onto the stack, if this is a varargs function.
21843 4. The last slot on the stack created for the arguments to
21844 push, if this isn't a varargs function.
21845
21846 Note - we only need to tell the dwarf2 backend about the SP
21847 adjustment in the second variant; the static chain register
21848 doesn't need to be unwound, as it doesn't contain a value
21849 inherited from the caller. */
21850 if (clobber_ip)
21851 {
21852 if (!arm_r3_live_at_start_p ())
21853 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21854 else if (args_to_push == 0)
21855 {
21856 rtx addr, dwarf;
21857
21858 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21859 saved_regs += 4;
21860
21861 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21862 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21863 fp_offset = 4;
21864
21865 /* Just tell the dwarf backend that we adjusted SP. */
21866 dwarf = gen_rtx_SET (stack_pointer_rtx,
21867 plus_constant (Pmode, stack_pointer_rtx,
21868 -fp_offset));
21869 RTX_FRAME_RELATED_P (insn) = 1;
21870 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21871 }
21872 else
21873 {
21874 /* Store the args on the stack. */
21875 if (cfun->machine->uses_anonymous_args)
21876 {
21877 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21878 (0xf0 >> (args_to_push / 4)) & 0xf);
21879 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21880 saved_pretend_args = 1;
21881 }
21882 else
21883 {
21884 rtx addr, dwarf;
21885
21886 if (args_to_push == 4)
21887 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21888 else
21889 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21890 plus_constant (Pmode,
21891 stack_pointer_rtx,
21892 -args_to_push));
21893
21894 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21895
21896 /* Just tell the dwarf backend that we adjusted SP. */
21897 dwarf = gen_rtx_SET (stack_pointer_rtx,
21898 plus_constant (Pmode, stack_pointer_rtx,
21899 -args_to_push));
21900 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21901 }
21902
21903 RTX_FRAME_RELATED_P (insn) = 1;
21904 fp_offset = args_to_push;
21905 args_to_push = 0;
21906 }
21907 }
21908
21909 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21910 {
21911 if (IS_INTERRUPT (func_type))
21912 {
21913 /* Interrupt functions must not corrupt any registers.
21914 Creating a frame pointer however, corrupts the IP
21915 register, so we must push it first. */
21916 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21917
21918 /* Do not set RTX_FRAME_RELATED_P on this insn.
21919 The dwarf stack unwinding code only wants to see one
21920 stack decrement per function, and this is not it. If
21921 this instruction is labeled as being part of the frame
21922 creation sequence then dwarf2out_frame_debug_expr will
21923 die when it encounters the assignment of IP to FP
21924 later on, since the use of SP here establishes SP as
21925 the CFA register and not IP.
21926
21927 Anyway this instruction is not really part of the stack
21928 frame creation although it is part of the prologue. */
21929 }
21930
21931 insn = emit_set_insn (ip_rtx,
21932 plus_constant (Pmode, stack_pointer_rtx,
21933 fp_offset));
21934 RTX_FRAME_RELATED_P (insn) = 1;
21935 }
21936
21937 if (args_to_push)
21938 {
21939 /* Push the argument registers, or reserve space for them. */
21940 if (cfun->machine->uses_anonymous_args)
21941 insn = emit_multi_reg_push
21942 ((0xf0 >> (args_to_push / 4)) & 0xf,
21943 (0xf0 >> (args_to_push / 4)) & 0xf);
21944 else
21945 insn = emit_insn
21946 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21947 GEN_INT (- args_to_push)));
21948 RTX_FRAME_RELATED_P (insn) = 1;
21949 }
21950
21951 /* If this is an interrupt service routine, and the link register
21952 is going to be pushed, and we're not generating extra
21953 push of IP (needed when frame is needed and frame layout if apcs),
21954 subtracting four from LR now will mean that the function return
21955 can be done with a single instruction. */
21956 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21957 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21958 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21959 && TARGET_ARM)
21960 {
21961 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21962
21963 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21964 }
21965
21966 if (live_regs_mask)
21967 {
21968 unsigned long dwarf_regs_mask = live_regs_mask;
21969
21970 saved_regs += bit_count (live_regs_mask) * 4;
21971 if (optimize_size && !frame_pointer_needed
21972 && saved_regs == offsets->saved_regs - offsets->saved_args)
21973 {
21974 /* If no coprocessor registers are being pushed and we don't have
21975 to worry about a frame pointer then push extra registers to
21976 create the stack frame. This is done in a way that does not
21977 alter the frame layout, so is independent of the epilogue. */
21978 int n;
21979 int frame;
21980 n = 0;
21981 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21982 n++;
21983 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21984 if (frame && n * 4 >= frame)
21985 {
21986 n = frame / 4;
21987 live_regs_mask |= (1 << n) - 1;
21988 saved_regs += frame;
21989 }
21990 }
21991
21992 if (TARGET_LDRD
21993 && current_tune->prefer_ldrd_strd
21994 && !optimize_function_for_size_p (cfun))
21995 {
21996 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21997 if (TARGET_THUMB2)
21998 thumb2_emit_strd_push (live_regs_mask);
21999 else if (TARGET_ARM
22000 && !TARGET_APCS_FRAME
22001 && !IS_INTERRUPT (func_type))
22002 arm_emit_strd_push (live_regs_mask);
22003 else
22004 {
22005 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
22006 RTX_FRAME_RELATED_P (insn) = 1;
22007 }
22008 }
22009 else
22010 {
22011 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
22012 RTX_FRAME_RELATED_P (insn) = 1;
22013 }
22014 }
22015
22016 if (! IS_VOLATILE (func_type))
22017 saved_regs += arm_save_coproc_regs ();
22018
22019 if (frame_pointer_needed && TARGET_ARM)
22020 {
22021 /* Create the new frame pointer. */
22022 if (TARGET_APCS_FRAME)
22023 {
22024 insn = GEN_INT (-(4 + args_to_push + fp_offset));
22025 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
22026 RTX_FRAME_RELATED_P (insn) = 1;
22027 }
22028 else
22029 {
22030 insn = GEN_INT (saved_regs - (4 + fp_offset));
22031 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22032 stack_pointer_rtx, insn));
22033 RTX_FRAME_RELATED_P (insn) = 1;
22034 }
22035 }
22036
22037 size = offsets->outgoing_args - offsets->saved_args;
22038 if (flag_stack_usage_info)
22039 current_function_static_stack_size = size;
22040
22041 /* If this isn't an interrupt service routine and we have a frame, then do
22042 stack checking. We use IP as the first scratch register, except for the
22043 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
22044 if (!IS_INTERRUPT (func_type)
22045 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
22046 || flag_stack_clash_protection))
22047 {
22048 unsigned int regno;
22049
22050 if (!IS_NESTED (func_type) || clobber_ip)
22051 regno = IP_REGNUM;
22052 else if (df_regs_ever_live_p (LR_REGNUM))
22053 regno = LR_REGNUM;
22054 else
22055 regno = 3;
22056
22057 if (crtl->is_leaf && !cfun->calls_alloca)
22058 {
22059 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
22060 arm_emit_probe_stack_range (get_stack_check_protect (),
22061 size - get_stack_check_protect (),
22062 regno, live_regs_mask);
22063 }
22064 else if (size > 0)
22065 arm_emit_probe_stack_range (get_stack_check_protect (), size,
22066 regno, live_regs_mask);
22067 }
22068
22069 /* Recover the static chain register. */
22070 if (clobber_ip)
22071 {
22072 if (!arm_r3_live_at_start_p () || saved_pretend_args)
22073 insn = gen_rtx_REG (SImode, 3);
22074 else
22075 {
22076 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
22077 insn = gen_frame_mem (SImode, insn);
22078 }
22079 emit_set_insn (ip_rtx, insn);
22080 emit_insn (gen_force_register_use (ip_rtx));
22081 }
22082
22083 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
22084 {
22085 /* This add can produce multiple insns for a large constant, so we
22086 need to get tricky. */
22087 rtx_insn *last = get_last_insn ();
22088
22089 amount = GEN_INT (offsets->saved_args + saved_regs
22090 - offsets->outgoing_args);
22091
22092 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22093 amount));
22094 do
22095 {
22096 last = last ? NEXT_INSN (last) : get_insns ();
22097 RTX_FRAME_RELATED_P (last) = 1;
22098 }
22099 while (last != insn);
22100
22101 /* If the frame pointer is needed, emit a special barrier that
22102 will prevent the scheduler from moving stores to the frame
22103 before the stack adjustment. */
22104 if (frame_pointer_needed)
22105 emit_insn (gen_stack_tie (stack_pointer_rtx,
22106 hard_frame_pointer_rtx));
22107 }
22108
22109
22110 if (frame_pointer_needed && TARGET_THUMB2)
22111 thumb_set_frame_pointer (offsets);
22112
22113 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22114 {
22115 unsigned long mask;
22116
22117 mask = live_regs_mask;
22118 mask &= THUMB2_WORK_REGS;
22119 if (!IS_NESTED (func_type))
22120 mask |= (1 << IP_REGNUM);
22121 arm_load_pic_register (mask, NULL_RTX);
22122 }
22123
22124 /* If we are profiling, make sure no instructions are scheduled before
22125 the call to mcount. Similarly if the user has requested no
22126 scheduling in the prolog. Similarly if we want non-call exceptions
22127 using the EABI unwinder, to prevent faulting instructions from being
22128 swapped with a stack adjustment. */
22129 if (crtl->profile || !TARGET_SCHED_PROLOG
22130 || (arm_except_unwind_info (&global_options) == UI_TARGET
22131 && cfun->can_throw_non_call_exceptions))
22132 emit_insn (gen_blockage ());
22133
22134 /* If the link register is being kept alive, with the return address in it,
22135 then make sure that it does not get reused by the ce2 pass. */
22136 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22137 cfun->machine->lr_save_eliminated = 1;
22138 }
22139 \f
22140 /* Print condition code to STREAM. Helper function for arm_print_operand. */
22141 static void
22142 arm_print_condition (FILE *stream)
22143 {
22144 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22145 {
22146 /* Branch conversion is not implemented for Thumb-2. */
22147 if (TARGET_THUMB)
22148 {
22149 output_operand_lossage ("predicated Thumb instruction");
22150 return;
22151 }
22152 if (current_insn_predicate != NULL)
22153 {
22154 output_operand_lossage
22155 ("predicated instruction in conditional sequence");
22156 return;
22157 }
22158
22159 fputs (arm_condition_codes[arm_current_cc], stream);
22160 }
22161 else if (current_insn_predicate)
22162 {
22163 enum arm_cond_code code;
22164
22165 if (TARGET_THUMB1)
22166 {
22167 output_operand_lossage ("predicated Thumb instruction");
22168 return;
22169 }
22170
22171 code = get_arm_condition_code (current_insn_predicate);
22172 fputs (arm_condition_codes[code], stream);
22173 }
22174 }
22175
22176
22177 /* Globally reserved letters: acln
22178 Puncutation letters currently used: @_|?().!#
22179 Lower case letters currently used: bcdefhimpqtvwxyz
22180 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22181 Letters previously used, but now deprecated/obsolete: sVWXYZ.
22182
22183 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22184
22185 If CODE is 'd', then the X is a condition operand and the instruction
22186 should only be executed if the condition is true.
22187 if CODE is 'D', then the X is a condition operand and the instruction
22188 should only be executed if the condition is false: however, if the mode
22189 of the comparison is CCFPEmode, then always execute the instruction -- we
22190 do this because in these circumstances !GE does not necessarily imply LT;
22191 in these cases the instruction pattern will take care to make sure that
22192 an instruction containing %d will follow, thereby undoing the effects of
22193 doing this instruction unconditionally.
22194 If CODE is 'N' then X is a floating point operand that must be negated
22195 before output.
22196 If CODE is 'B' then output a bitwise inverted value of X (a const int).
22197 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
22198 static void
22199 arm_print_operand (FILE *stream, rtx x, int code)
22200 {
22201 switch (code)
22202 {
22203 case '@':
22204 fputs (ASM_COMMENT_START, stream);
22205 return;
22206
22207 case '_':
22208 fputs (user_label_prefix, stream);
22209 return;
22210
22211 case '|':
22212 fputs (REGISTER_PREFIX, stream);
22213 return;
22214
22215 case '?':
22216 arm_print_condition (stream);
22217 return;
22218
22219 case '.':
22220 /* The current condition code for a condition code setting instruction.
22221 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22222 fputc('s', stream);
22223 arm_print_condition (stream);
22224 return;
22225
22226 case '!':
22227 /* If the instruction is conditionally executed then print
22228 the current condition code, otherwise print 's'. */
22229 gcc_assert (TARGET_THUMB2);
22230 if (current_insn_predicate)
22231 arm_print_condition (stream);
22232 else
22233 fputc('s', stream);
22234 break;
22235
22236 /* %# is a "break" sequence. It doesn't output anything, but is used to
22237 separate e.g. operand numbers from following text, if that text consists
22238 of further digits which we don't want to be part of the operand
22239 number. */
22240 case '#':
22241 return;
22242
22243 case 'N':
22244 {
22245 REAL_VALUE_TYPE r;
22246 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22247 fprintf (stream, "%s", fp_const_from_val (&r));
22248 }
22249 return;
22250
22251 /* An integer or symbol address without a preceding # sign. */
22252 case 'c':
22253 switch (GET_CODE (x))
22254 {
22255 case CONST_INT:
22256 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22257 break;
22258
22259 case SYMBOL_REF:
22260 output_addr_const (stream, x);
22261 break;
22262
22263 case CONST:
22264 if (GET_CODE (XEXP (x, 0)) == PLUS
22265 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22266 {
22267 output_addr_const (stream, x);
22268 break;
22269 }
22270 /* Fall through. */
22271
22272 default:
22273 output_operand_lossage ("Unsupported operand for code '%c'", code);
22274 }
22275 return;
22276
22277 /* An integer that we want to print in HEX. */
22278 case 'x':
22279 switch (GET_CODE (x))
22280 {
22281 case CONST_INT:
22282 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22283 break;
22284
22285 default:
22286 output_operand_lossage ("Unsupported operand for code '%c'", code);
22287 }
22288 return;
22289
22290 case 'B':
22291 if (CONST_INT_P (x))
22292 {
22293 HOST_WIDE_INT val;
22294 val = ARM_SIGN_EXTEND (~INTVAL (x));
22295 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22296 }
22297 else
22298 {
22299 putc ('~', stream);
22300 output_addr_const (stream, x);
22301 }
22302 return;
22303
22304 case 'b':
22305 /* Print the log2 of a CONST_INT. */
22306 {
22307 HOST_WIDE_INT val;
22308
22309 if (!CONST_INT_P (x)
22310 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22311 output_operand_lossage ("Unsupported operand for code '%c'", code);
22312 else
22313 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22314 }
22315 return;
22316
22317 case 'L':
22318 /* The low 16 bits of an immediate constant. */
22319 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22320 return;
22321
22322 case 'i':
22323 fprintf (stream, "%s", arithmetic_instr (x, 1));
22324 return;
22325
22326 case 'I':
22327 fprintf (stream, "%s", arithmetic_instr (x, 0));
22328 return;
22329
22330 case 'S':
22331 {
22332 HOST_WIDE_INT val;
22333 const char *shift;
22334
22335 shift = shift_op (x, &val);
22336
22337 if (shift)
22338 {
22339 fprintf (stream, ", %s ", shift);
22340 if (val == -1)
22341 arm_print_operand (stream, XEXP (x, 1), 0);
22342 else
22343 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22344 }
22345 }
22346 return;
22347
22348 /* An explanation of the 'Q', 'R' and 'H' register operands:
22349
22350 In a pair of registers containing a DI or DF value the 'Q'
22351 operand returns the register number of the register containing
22352 the least significant part of the value. The 'R' operand returns
22353 the register number of the register containing the most
22354 significant part of the value.
22355
22356 The 'H' operand returns the higher of the two register numbers.
22357 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22358 same as the 'Q' operand, since the most significant part of the
22359 value is held in the lower number register. The reverse is true
22360 on systems where WORDS_BIG_ENDIAN is false.
22361
22362 The purpose of these operands is to distinguish between cases
22363 where the endian-ness of the values is important (for example
22364 when they are added together), and cases where the endian-ness
22365 is irrelevant, but the order of register operations is important.
22366 For example when loading a value from memory into a register
22367 pair, the endian-ness does not matter. Provided that the value
22368 from the lower memory address is put into the lower numbered
22369 register, and the value from the higher address is put into the
22370 higher numbered register, the load will work regardless of whether
22371 the value being loaded is big-wordian or little-wordian. The
22372 order of the two register loads can matter however, if the address
22373 of the memory location is actually held in one of the registers
22374 being overwritten by the load.
22375
22376 The 'Q' and 'R' constraints are also available for 64-bit
22377 constants. */
22378 case 'Q':
22379 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22380 {
22381 rtx part = gen_lowpart (SImode, x);
22382 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22383 return;
22384 }
22385
22386 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22387 {
22388 output_operand_lossage ("invalid operand for code '%c'", code);
22389 return;
22390 }
22391
22392 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22393 return;
22394
22395 case 'R':
22396 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22397 {
22398 machine_mode mode = GET_MODE (x);
22399 rtx part;
22400
22401 if (mode == VOIDmode)
22402 mode = DImode;
22403 part = gen_highpart_mode (SImode, mode, x);
22404 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22405 return;
22406 }
22407
22408 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22409 {
22410 output_operand_lossage ("invalid operand for code '%c'", code);
22411 return;
22412 }
22413
22414 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22415 return;
22416
22417 case 'H':
22418 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22419 {
22420 output_operand_lossage ("invalid operand for code '%c'", code);
22421 return;
22422 }
22423
22424 asm_fprintf (stream, "%r", REGNO (x) + 1);
22425 return;
22426
22427 case 'J':
22428 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22429 {
22430 output_operand_lossage ("invalid operand for code '%c'", code);
22431 return;
22432 }
22433
22434 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22435 return;
22436
22437 case 'K':
22438 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22439 {
22440 output_operand_lossage ("invalid operand for code '%c'", code);
22441 return;
22442 }
22443
22444 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22445 return;
22446
22447 case 'm':
22448 asm_fprintf (stream, "%r",
22449 REG_P (XEXP (x, 0))
22450 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22451 return;
22452
22453 case 'M':
22454 asm_fprintf (stream, "{%r-%r}",
22455 REGNO (x),
22456 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22457 return;
22458
22459 /* Like 'M', but writing doubleword vector registers, for use by Neon
22460 insns. */
22461 case 'h':
22462 {
22463 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22464 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22465 if (numregs == 1)
22466 asm_fprintf (stream, "{d%d}", regno);
22467 else
22468 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22469 }
22470 return;
22471
22472 case 'd':
22473 /* CONST_TRUE_RTX means always -- that's the default. */
22474 if (x == const_true_rtx)
22475 return;
22476
22477 if (!COMPARISON_P (x))
22478 {
22479 output_operand_lossage ("invalid operand for code '%c'", code);
22480 return;
22481 }
22482
22483 fputs (arm_condition_codes[get_arm_condition_code (x)],
22484 stream);
22485 return;
22486
22487 case 'D':
22488 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22489 want to do that. */
22490 if (x == const_true_rtx)
22491 {
22492 output_operand_lossage ("instruction never executed");
22493 return;
22494 }
22495 if (!COMPARISON_P (x))
22496 {
22497 output_operand_lossage ("invalid operand for code '%c'", code);
22498 return;
22499 }
22500
22501 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22502 (get_arm_condition_code (x))],
22503 stream);
22504 return;
22505
22506 case 's':
22507 case 'V':
22508 case 'W':
22509 case 'X':
22510 case 'Y':
22511 case 'Z':
22512 /* Former Maverick support, removed after GCC-4.7. */
22513 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22514 return;
22515
22516 case 'U':
22517 if (!REG_P (x)
22518 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22519 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22520 /* Bad value for wCG register number. */
22521 {
22522 output_operand_lossage ("invalid operand for code '%c'", code);
22523 return;
22524 }
22525
22526 else
22527 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22528 return;
22529
22530 /* Print an iWMMXt control register name. */
22531 case 'w':
22532 if (!CONST_INT_P (x)
22533 || INTVAL (x) < 0
22534 || INTVAL (x) >= 16)
22535 /* Bad value for wC register number. */
22536 {
22537 output_operand_lossage ("invalid operand for code '%c'", code);
22538 return;
22539 }
22540
22541 else
22542 {
22543 static const char * wc_reg_names [16] =
22544 {
22545 "wCID", "wCon", "wCSSF", "wCASF",
22546 "wC4", "wC5", "wC6", "wC7",
22547 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22548 "wC12", "wC13", "wC14", "wC15"
22549 };
22550
22551 fputs (wc_reg_names [INTVAL (x)], stream);
22552 }
22553 return;
22554
22555 /* Print the high single-precision register of a VFP double-precision
22556 register. */
22557 case 'p':
22558 {
22559 machine_mode mode = GET_MODE (x);
22560 int regno;
22561
22562 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22563 {
22564 output_operand_lossage ("invalid operand for code '%c'", code);
22565 return;
22566 }
22567
22568 regno = REGNO (x);
22569 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22570 {
22571 output_operand_lossage ("invalid operand for code '%c'", code);
22572 return;
22573 }
22574
22575 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22576 }
22577 return;
22578
22579 /* Print a VFP/Neon double precision or quad precision register name. */
22580 case 'P':
22581 case 'q':
22582 {
22583 machine_mode mode = GET_MODE (x);
22584 int is_quad = (code == 'q');
22585 int regno;
22586
22587 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22588 {
22589 output_operand_lossage ("invalid operand for code '%c'", code);
22590 return;
22591 }
22592
22593 if (!REG_P (x)
22594 || !IS_VFP_REGNUM (REGNO (x)))
22595 {
22596 output_operand_lossage ("invalid operand for code '%c'", code);
22597 return;
22598 }
22599
22600 regno = REGNO (x);
22601 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22602 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22603 {
22604 output_operand_lossage ("invalid operand for code '%c'", code);
22605 return;
22606 }
22607
22608 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22609 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22610 }
22611 return;
22612
22613 /* These two codes print the low/high doubleword register of a Neon quad
22614 register, respectively. For pair-structure types, can also print
22615 low/high quadword registers. */
22616 case 'e':
22617 case 'f':
22618 {
22619 machine_mode mode = GET_MODE (x);
22620 int regno;
22621
22622 if ((GET_MODE_SIZE (mode) != 16
22623 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22624 {
22625 output_operand_lossage ("invalid operand for code '%c'", code);
22626 return;
22627 }
22628
22629 regno = REGNO (x);
22630 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22631 {
22632 output_operand_lossage ("invalid operand for code '%c'", code);
22633 return;
22634 }
22635
22636 if (GET_MODE_SIZE (mode) == 16)
22637 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22638 + (code == 'f' ? 1 : 0));
22639 else
22640 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22641 + (code == 'f' ? 1 : 0));
22642 }
22643 return;
22644
22645 /* Print a VFPv3 floating-point constant, represented as an integer
22646 index. */
22647 case 'G':
22648 {
22649 int index = vfp3_const_double_index (x);
22650 gcc_assert (index != -1);
22651 fprintf (stream, "%d", index);
22652 }
22653 return;
22654
22655 /* Print bits representing opcode features for Neon.
22656
22657 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22658 and polynomials as unsigned.
22659
22660 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22661
22662 Bit 2 is 1 for rounding functions, 0 otherwise. */
22663
22664 /* Identify the type as 's', 'u', 'p' or 'f'. */
22665 case 'T':
22666 {
22667 HOST_WIDE_INT bits = INTVAL (x);
22668 fputc ("uspf"[bits & 3], stream);
22669 }
22670 return;
22671
22672 /* Likewise, but signed and unsigned integers are both 'i'. */
22673 case 'F':
22674 {
22675 HOST_WIDE_INT bits = INTVAL (x);
22676 fputc ("iipf"[bits & 3], stream);
22677 }
22678 return;
22679
22680 /* As for 'T', but emit 'u' instead of 'p'. */
22681 case 't':
22682 {
22683 HOST_WIDE_INT bits = INTVAL (x);
22684 fputc ("usuf"[bits & 3], stream);
22685 }
22686 return;
22687
22688 /* Bit 2: rounding (vs none). */
22689 case 'O':
22690 {
22691 HOST_WIDE_INT bits = INTVAL (x);
22692 fputs ((bits & 4) != 0 ? "r" : "", stream);
22693 }
22694 return;
22695
22696 /* Memory operand for vld1/vst1 instruction. */
22697 case 'A':
22698 {
22699 rtx addr;
22700 bool postinc = FALSE;
22701 rtx postinc_reg = NULL;
22702 unsigned align, memsize, align_bits;
22703
22704 gcc_assert (MEM_P (x));
22705 addr = XEXP (x, 0);
22706 if (GET_CODE (addr) == POST_INC)
22707 {
22708 postinc = 1;
22709 addr = XEXP (addr, 0);
22710 }
22711 if (GET_CODE (addr) == POST_MODIFY)
22712 {
22713 postinc_reg = XEXP( XEXP (addr, 1), 1);
22714 addr = XEXP (addr, 0);
22715 }
22716 asm_fprintf (stream, "[%r", REGNO (addr));
22717
22718 /* We know the alignment of this access, so we can emit a hint in the
22719 instruction (for some alignments) as an aid to the memory subsystem
22720 of the target. */
22721 align = MEM_ALIGN (x) >> 3;
22722 memsize = MEM_SIZE (x);
22723
22724 /* Only certain alignment specifiers are supported by the hardware. */
22725 if (memsize == 32 && (align % 32) == 0)
22726 align_bits = 256;
22727 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22728 align_bits = 128;
22729 else if (memsize >= 8 && (align % 8) == 0)
22730 align_bits = 64;
22731 else
22732 align_bits = 0;
22733
22734 if (align_bits != 0)
22735 asm_fprintf (stream, ":%d", align_bits);
22736
22737 asm_fprintf (stream, "]");
22738
22739 if (postinc)
22740 fputs("!", stream);
22741 if (postinc_reg)
22742 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22743 }
22744 return;
22745
22746 case 'C':
22747 {
22748 rtx addr;
22749
22750 gcc_assert (MEM_P (x));
22751 addr = XEXP (x, 0);
22752 gcc_assert (REG_P (addr));
22753 asm_fprintf (stream, "[%r]", REGNO (addr));
22754 }
22755 return;
22756
22757 /* Translate an S register number into a D register number and element index. */
22758 case 'y':
22759 {
22760 machine_mode mode = GET_MODE (x);
22761 int regno;
22762
22763 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22764 {
22765 output_operand_lossage ("invalid operand for code '%c'", code);
22766 return;
22767 }
22768
22769 regno = REGNO (x);
22770 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22771 {
22772 output_operand_lossage ("invalid operand for code '%c'", code);
22773 return;
22774 }
22775
22776 regno = regno - FIRST_VFP_REGNUM;
22777 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22778 }
22779 return;
22780
22781 case 'v':
22782 gcc_assert (CONST_DOUBLE_P (x));
22783 int result;
22784 result = vfp3_const_double_for_fract_bits (x);
22785 if (result == 0)
22786 result = vfp3_const_double_for_bits (x);
22787 fprintf (stream, "#%d", result);
22788 return;
22789
22790 /* Register specifier for vld1.16/vst1.16. Translate the S register
22791 number into a D register number and element index. */
22792 case 'z':
22793 {
22794 machine_mode mode = GET_MODE (x);
22795 int regno;
22796
22797 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22798 {
22799 output_operand_lossage ("invalid operand for code '%c'", code);
22800 return;
22801 }
22802
22803 regno = REGNO (x);
22804 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22805 {
22806 output_operand_lossage ("invalid operand for code '%c'", code);
22807 return;
22808 }
22809
22810 regno = regno - FIRST_VFP_REGNUM;
22811 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22812 }
22813 return;
22814
22815 default:
22816 if (x == 0)
22817 {
22818 output_operand_lossage ("missing operand");
22819 return;
22820 }
22821
22822 switch (GET_CODE (x))
22823 {
22824 case REG:
22825 asm_fprintf (stream, "%r", REGNO (x));
22826 break;
22827
22828 case MEM:
22829 output_address (GET_MODE (x), XEXP (x, 0));
22830 break;
22831
22832 case CONST_DOUBLE:
22833 {
22834 char fpstr[20];
22835 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22836 sizeof (fpstr), 0, 1);
22837 fprintf (stream, "#%s", fpstr);
22838 }
22839 break;
22840
22841 default:
22842 gcc_assert (GET_CODE (x) != NEG);
22843 fputc ('#', stream);
22844 if (GET_CODE (x) == HIGH)
22845 {
22846 fputs (":lower16:", stream);
22847 x = XEXP (x, 0);
22848 }
22849
22850 output_addr_const (stream, x);
22851 break;
22852 }
22853 }
22854 }
22855 \f
22856 /* Target hook for printing a memory address. */
22857 static void
22858 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22859 {
22860 if (TARGET_32BIT)
22861 {
22862 int is_minus = GET_CODE (x) == MINUS;
22863
22864 if (REG_P (x))
22865 asm_fprintf (stream, "[%r]", REGNO (x));
22866 else if (GET_CODE (x) == PLUS || is_minus)
22867 {
22868 rtx base = XEXP (x, 0);
22869 rtx index = XEXP (x, 1);
22870 HOST_WIDE_INT offset = 0;
22871 if (!REG_P (base)
22872 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22873 {
22874 /* Ensure that BASE is a register. */
22875 /* (one of them must be). */
22876 /* Also ensure the SP is not used as in index register. */
22877 std::swap (base, index);
22878 }
22879 switch (GET_CODE (index))
22880 {
22881 case CONST_INT:
22882 offset = INTVAL (index);
22883 if (is_minus)
22884 offset = -offset;
22885 asm_fprintf (stream, "[%r, #%wd]",
22886 REGNO (base), offset);
22887 break;
22888
22889 case REG:
22890 asm_fprintf (stream, "[%r, %s%r]",
22891 REGNO (base), is_minus ? "-" : "",
22892 REGNO (index));
22893 break;
22894
22895 case MULT:
22896 case ASHIFTRT:
22897 case LSHIFTRT:
22898 case ASHIFT:
22899 case ROTATERT:
22900 {
22901 asm_fprintf (stream, "[%r, %s%r",
22902 REGNO (base), is_minus ? "-" : "",
22903 REGNO (XEXP (index, 0)));
22904 arm_print_operand (stream, index, 'S');
22905 fputs ("]", stream);
22906 break;
22907 }
22908
22909 default:
22910 gcc_unreachable ();
22911 }
22912 }
22913 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22914 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22915 {
22916 gcc_assert (REG_P (XEXP (x, 0)));
22917
22918 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22919 asm_fprintf (stream, "[%r, #%s%d]!",
22920 REGNO (XEXP (x, 0)),
22921 GET_CODE (x) == PRE_DEC ? "-" : "",
22922 GET_MODE_SIZE (mode));
22923 else
22924 asm_fprintf (stream, "[%r], #%s%d",
22925 REGNO (XEXP (x, 0)),
22926 GET_CODE (x) == POST_DEC ? "-" : "",
22927 GET_MODE_SIZE (mode));
22928 }
22929 else if (GET_CODE (x) == PRE_MODIFY)
22930 {
22931 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22932 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22933 asm_fprintf (stream, "#%wd]!",
22934 INTVAL (XEXP (XEXP (x, 1), 1)));
22935 else
22936 asm_fprintf (stream, "%r]!",
22937 REGNO (XEXP (XEXP (x, 1), 1)));
22938 }
22939 else if (GET_CODE (x) == POST_MODIFY)
22940 {
22941 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22942 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22943 asm_fprintf (stream, "#%wd",
22944 INTVAL (XEXP (XEXP (x, 1), 1)));
22945 else
22946 asm_fprintf (stream, "%r",
22947 REGNO (XEXP (XEXP (x, 1), 1)));
22948 }
22949 else output_addr_const (stream, x);
22950 }
22951 else
22952 {
22953 if (REG_P (x))
22954 asm_fprintf (stream, "[%r]", REGNO (x));
22955 else if (GET_CODE (x) == POST_INC)
22956 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22957 else if (GET_CODE (x) == PLUS)
22958 {
22959 gcc_assert (REG_P (XEXP (x, 0)));
22960 if (CONST_INT_P (XEXP (x, 1)))
22961 asm_fprintf (stream, "[%r, #%wd]",
22962 REGNO (XEXP (x, 0)),
22963 INTVAL (XEXP (x, 1)));
22964 else
22965 asm_fprintf (stream, "[%r, %r]",
22966 REGNO (XEXP (x, 0)),
22967 REGNO (XEXP (x, 1)));
22968 }
22969 else
22970 output_addr_const (stream, x);
22971 }
22972 }
22973 \f
22974 /* Target hook for indicating whether a punctuation character for
22975 TARGET_PRINT_OPERAND is valid. */
22976 static bool
22977 arm_print_operand_punct_valid_p (unsigned char code)
22978 {
22979 return (code == '@' || code == '|' || code == '.'
22980 || code == '(' || code == ')' || code == '#'
22981 || (TARGET_32BIT && (code == '?'))
22982 || (TARGET_THUMB2 && (code == '!'))
22983 || (TARGET_THUMB && (code == '_')));
22984 }
22985 \f
22986 /* Target hook for assembling integer objects. The ARM version needs to
22987 handle word-sized values specially. */
22988 static bool
22989 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22990 {
22991 machine_mode mode;
22992
22993 if (size == UNITS_PER_WORD && aligned_p)
22994 {
22995 fputs ("\t.word\t", asm_out_file);
22996 output_addr_const (asm_out_file, x);
22997
22998 /* Mark symbols as position independent. We only do this in the
22999 .text segment, not in the .data segment. */
23000 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
23001 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
23002 {
23003 /* See legitimize_pic_address for an explanation of the
23004 TARGET_VXWORKS_RTP check. */
23005 /* References to weak symbols cannot be resolved locally:
23006 they may be overridden by a non-weak definition at link
23007 time. */
23008 if (!arm_pic_data_is_text_relative
23009 || (GET_CODE (x) == SYMBOL_REF
23010 && (!SYMBOL_REF_LOCAL_P (x)
23011 || (SYMBOL_REF_DECL (x)
23012 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
23013 fputs ("(GOT)", asm_out_file);
23014 else
23015 fputs ("(GOTOFF)", asm_out_file);
23016 }
23017 fputc ('\n', asm_out_file);
23018 return true;
23019 }
23020
23021 mode = GET_MODE (x);
23022
23023 if (arm_vector_mode_supported_p (mode))
23024 {
23025 int i, units;
23026
23027 gcc_assert (GET_CODE (x) == CONST_VECTOR);
23028
23029 units = CONST_VECTOR_NUNITS (x);
23030 size = GET_MODE_UNIT_SIZE (mode);
23031
23032 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23033 for (i = 0; i < units; i++)
23034 {
23035 rtx elt = CONST_VECTOR_ELT (x, i);
23036 assemble_integer
23037 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
23038 }
23039 else
23040 for (i = 0; i < units; i++)
23041 {
23042 rtx elt = CONST_VECTOR_ELT (x, i);
23043 assemble_real
23044 (*CONST_DOUBLE_REAL_VALUE (elt),
23045 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
23046 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
23047 }
23048
23049 return true;
23050 }
23051
23052 return default_assemble_integer (x, size, aligned_p);
23053 }
23054
23055 static void
23056 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
23057 {
23058 section *s;
23059
23060 if (!TARGET_AAPCS_BASED)
23061 {
23062 (is_ctor ?
23063 default_named_section_asm_out_constructor
23064 : default_named_section_asm_out_destructor) (symbol, priority);
23065 return;
23066 }
23067
23068 /* Put these in the .init_array section, using a special relocation. */
23069 if (priority != DEFAULT_INIT_PRIORITY)
23070 {
23071 char buf[18];
23072 sprintf (buf, "%s.%.5u",
23073 is_ctor ? ".init_array" : ".fini_array",
23074 priority);
23075 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
23076 }
23077 else if (is_ctor)
23078 s = ctors_section;
23079 else
23080 s = dtors_section;
23081
23082 switch_to_section (s);
23083 assemble_align (POINTER_SIZE);
23084 fputs ("\t.word\t", asm_out_file);
23085 output_addr_const (asm_out_file, symbol);
23086 fputs ("(target1)\n", asm_out_file);
23087 }
23088
23089 /* Add a function to the list of static constructors. */
23090
23091 static void
23092 arm_elf_asm_constructor (rtx symbol, int priority)
23093 {
23094 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
23095 }
23096
23097 /* Add a function to the list of static destructors. */
23098
23099 static void
23100 arm_elf_asm_destructor (rtx symbol, int priority)
23101 {
23102 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
23103 }
23104 \f
23105 /* A finite state machine takes care of noticing whether or not instructions
23106 can be conditionally executed, and thus decrease execution time and code
23107 size by deleting branch instructions. The fsm is controlled by
23108 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
23109
23110 /* The state of the fsm controlling condition codes are:
23111 0: normal, do nothing special
23112 1: make ASM_OUTPUT_OPCODE not output this instruction
23113 2: make ASM_OUTPUT_OPCODE not output this instruction
23114 3: make instructions conditional
23115 4: make instructions conditional
23116
23117 State transitions (state->state by whom under condition):
23118 0 -> 1 final_prescan_insn if the `target' is a label
23119 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23120 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23121 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23122 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23123 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23124 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23125 (the target insn is arm_target_insn).
23126
23127 If the jump clobbers the conditions then we use states 2 and 4.
23128
23129 A similar thing can be done with conditional return insns.
23130
23131 XXX In case the `target' is an unconditional branch, this conditionalising
23132 of the instructions always reduces code size, but not always execution
23133 time. But then, I want to reduce the code size to somewhere near what
23134 /bin/cc produces. */
23135
23136 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23137 instructions. When a COND_EXEC instruction is seen the subsequent
23138 instructions are scanned so that multiple conditional instructions can be
23139 combined into a single IT block. arm_condexec_count and arm_condexec_mask
23140 specify the length and true/false mask for the IT block. These will be
23141 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
23142
23143 /* Returns the index of the ARM condition code string in
23144 `arm_condition_codes', or ARM_NV if the comparison is invalid.
23145 COMPARISON should be an rtx like `(eq (...) (...))'. */
23146
23147 enum arm_cond_code
23148 maybe_get_arm_condition_code (rtx comparison)
23149 {
23150 machine_mode mode = GET_MODE (XEXP (comparison, 0));
23151 enum arm_cond_code code;
23152 enum rtx_code comp_code = GET_CODE (comparison);
23153
23154 if (GET_MODE_CLASS (mode) != MODE_CC)
23155 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23156 XEXP (comparison, 1));
23157
23158 switch (mode)
23159 {
23160 case E_CC_DNEmode: code = ARM_NE; goto dominance;
23161 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23162 case E_CC_DGEmode: code = ARM_GE; goto dominance;
23163 case E_CC_DGTmode: code = ARM_GT; goto dominance;
23164 case E_CC_DLEmode: code = ARM_LE; goto dominance;
23165 case E_CC_DLTmode: code = ARM_LT; goto dominance;
23166 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23167 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23168 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23169 case E_CC_DLTUmode: code = ARM_CC;
23170
23171 dominance:
23172 if (comp_code == EQ)
23173 return ARM_INVERSE_CONDITION_CODE (code);
23174 if (comp_code == NE)
23175 return code;
23176 return ARM_NV;
23177
23178 case E_CC_NOOVmode:
23179 switch (comp_code)
23180 {
23181 case NE: return ARM_NE;
23182 case EQ: return ARM_EQ;
23183 case GE: return ARM_PL;
23184 case LT: return ARM_MI;
23185 default: return ARM_NV;
23186 }
23187
23188 case E_CC_Zmode:
23189 switch (comp_code)
23190 {
23191 case NE: return ARM_NE;
23192 case EQ: return ARM_EQ;
23193 default: return ARM_NV;
23194 }
23195
23196 case E_CC_Nmode:
23197 switch (comp_code)
23198 {
23199 case NE: return ARM_MI;
23200 case EQ: return ARM_PL;
23201 default: return ARM_NV;
23202 }
23203
23204 case E_CCFPEmode:
23205 case E_CCFPmode:
23206 /* We can handle all cases except UNEQ and LTGT. */
23207 switch (comp_code)
23208 {
23209 case GE: return ARM_GE;
23210 case GT: return ARM_GT;
23211 case LE: return ARM_LS;
23212 case LT: return ARM_MI;
23213 case NE: return ARM_NE;
23214 case EQ: return ARM_EQ;
23215 case ORDERED: return ARM_VC;
23216 case UNORDERED: return ARM_VS;
23217 case UNLT: return ARM_LT;
23218 case UNLE: return ARM_LE;
23219 case UNGT: return ARM_HI;
23220 case UNGE: return ARM_PL;
23221 /* UNEQ and LTGT do not have a representation. */
23222 case UNEQ: /* Fall through. */
23223 case LTGT: /* Fall through. */
23224 default: return ARM_NV;
23225 }
23226
23227 case E_CC_SWPmode:
23228 switch (comp_code)
23229 {
23230 case NE: return ARM_NE;
23231 case EQ: return ARM_EQ;
23232 case GE: return ARM_LE;
23233 case GT: return ARM_LT;
23234 case LE: return ARM_GE;
23235 case LT: return ARM_GT;
23236 case GEU: return ARM_LS;
23237 case GTU: return ARM_CC;
23238 case LEU: return ARM_CS;
23239 case LTU: return ARM_HI;
23240 default: return ARM_NV;
23241 }
23242
23243 case E_CC_Cmode:
23244 switch (comp_code)
23245 {
23246 case LTU: return ARM_CS;
23247 case GEU: return ARM_CC;
23248 case NE: return ARM_CS;
23249 case EQ: return ARM_CC;
23250 default: return ARM_NV;
23251 }
23252
23253 case E_CC_CZmode:
23254 switch (comp_code)
23255 {
23256 case NE: return ARM_NE;
23257 case EQ: return ARM_EQ;
23258 case GEU: return ARM_CS;
23259 case GTU: return ARM_HI;
23260 case LEU: return ARM_LS;
23261 case LTU: return ARM_CC;
23262 default: return ARM_NV;
23263 }
23264
23265 case E_CC_NCVmode:
23266 switch (comp_code)
23267 {
23268 case GE: return ARM_GE;
23269 case LT: return ARM_LT;
23270 case GEU: return ARM_CS;
23271 case LTU: return ARM_CC;
23272 default: return ARM_NV;
23273 }
23274
23275 case E_CC_Vmode:
23276 switch (comp_code)
23277 {
23278 case NE: return ARM_VS;
23279 case EQ: return ARM_VC;
23280 default: return ARM_NV;
23281 }
23282
23283 case E_CCmode:
23284 switch (comp_code)
23285 {
23286 case NE: return ARM_NE;
23287 case EQ: return ARM_EQ;
23288 case GE: return ARM_GE;
23289 case GT: return ARM_GT;
23290 case LE: return ARM_LE;
23291 case LT: return ARM_LT;
23292 case GEU: return ARM_CS;
23293 case GTU: return ARM_HI;
23294 case LEU: return ARM_LS;
23295 case LTU: return ARM_CC;
23296 default: return ARM_NV;
23297 }
23298
23299 default: gcc_unreachable ();
23300 }
23301 }
23302
23303 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23304 static enum arm_cond_code
23305 get_arm_condition_code (rtx comparison)
23306 {
23307 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23308 gcc_assert (code != ARM_NV);
23309 return code;
23310 }
23311
23312 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
23313 code registers when not targetting Thumb1. The VFP condition register
23314 only exists when generating hard-float code. */
23315 static bool
23316 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23317 {
23318 if (!TARGET_32BIT)
23319 return false;
23320
23321 *p1 = CC_REGNUM;
23322 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23323 return true;
23324 }
23325
23326 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23327 instructions. */
23328 void
23329 thumb2_final_prescan_insn (rtx_insn *insn)
23330 {
23331 rtx_insn *first_insn = insn;
23332 rtx body = PATTERN (insn);
23333 rtx predicate;
23334 enum arm_cond_code code;
23335 int n;
23336 int mask;
23337 int max;
23338
23339 /* max_insns_skipped in the tune was already taken into account in the
23340 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23341 just emit the IT blocks as we can. It does not make sense to split
23342 the IT blocks. */
23343 max = MAX_INSN_PER_IT_BLOCK;
23344
23345 /* Remove the previous insn from the count of insns to be output. */
23346 if (arm_condexec_count)
23347 arm_condexec_count--;
23348
23349 /* Nothing to do if we are already inside a conditional block. */
23350 if (arm_condexec_count)
23351 return;
23352
23353 if (GET_CODE (body) != COND_EXEC)
23354 return;
23355
23356 /* Conditional jumps are implemented directly. */
23357 if (JUMP_P (insn))
23358 return;
23359
23360 predicate = COND_EXEC_TEST (body);
23361 arm_current_cc = get_arm_condition_code (predicate);
23362
23363 n = get_attr_ce_count (insn);
23364 arm_condexec_count = 1;
23365 arm_condexec_mask = (1 << n) - 1;
23366 arm_condexec_masklen = n;
23367 /* See if subsequent instructions can be combined into the same block. */
23368 for (;;)
23369 {
23370 insn = next_nonnote_insn (insn);
23371
23372 /* Jumping into the middle of an IT block is illegal, so a label or
23373 barrier terminates the block. */
23374 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23375 break;
23376
23377 body = PATTERN (insn);
23378 /* USE and CLOBBER aren't really insns, so just skip them. */
23379 if (GET_CODE (body) == USE
23380 || GET_CODE (body) == CLOBBER)
23381 continue;
23382
23383 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23384 if (GET_CODE (body) != COND_EXEC)
23385 break;
23386 /* Maximum number of conditionally executed instructions in a block. */
23387 n = get_attr_ce_count (insn);
23388 if (arm_condexec_masklen + n > max)
23389 break;
23390
23391 predicate = COND_EXEC_TEST (body);
23392 code = get_arm_condition_code (predicate);
23393 mask = (1 << n) - 1;
23394 if (arm_current_cc == code)
23395 arm_condexec_mask |= (mask << arm_condexec_masklen);
23396 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23397 break;
23398
23399 arm_condexec_count++;
23400 arm_condexec_masklen += n;
23401
23402 /* A jump must be the last instruction in a conditional block. */
23403 if (JUMP_P (insn))
23404 break;
23405 }
23406 /* Restore recog_data (getting the attributes of other insns can
23407 destroy this array, but final.c assumes that it remains intact
23408 across this call). */
23409 extract_constrain_insn_cached (first_insn);
23410 }
23411
23412 void
23413 arm_final_prescan_insn (rtx_insn *insn)
23414 {
23415 /* BODY will hold the body of INSN. */
23416 rtx body = PATTERN (insn);
23417
23418 /* This will be 1 if trying to repeat the trick, and things need to be
23419 reversed if it appears to fail. */
23420 int reverse = 0;
23421
23422 /* If we start with a return insn, we only succeed if we find another one. */
23423 int seeking_return = 0;
23424 enum rtx_code return_code = UNKNOWN;
23425
23426 /* START_INSN will hold the insn from where we start looking. This is the
23427 first insn after the following code_label if REVERSE is true. */
23428 rtx_insn *start_insn = insn;
23429
23430 /* If in state 4, check if the target branch is reached, in order to
23431 change back to state 0. */
23432 if (arm_ccfsm_state == 4)
23433 {
23434 if (insn == arm_target_insn)
23435 {
23436 arm_target_insn = NULL;
23437 arm_ccfsm_state = 0;
23438 }
23439 return;
23440 }
23441
23442 /* If in state 3, it is possible to repeat the trick, if this insn is an
23443 unconditional branch to a label, and immediately following this branch
23444 is the previous target label which is only used once, and the label this
23445 branch jumps to is not too far off. */
23446 if (arm_ccfsm_state == 3)
23447 {
23448 if (simplejump_p (insn))
23449 {
23450 start_insn = next_nonnote_insn (start_insn);
23451 if (BARRIER_P (start_insn))
23452 {
23453 /* XXX Isn't this always a barrier? */
23454 start_insn = next_nonnote_insn (start_insn);
23455 }
23456 if (LABEL_P (start_insn)
23457 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23458 && LABEL_NUSES (start_insn) == 1)
23459 reverse = TRUE;
23460 else
23461 return;
23462 }
23463 else if (ANY_RETURN_P (body))
23464 {
23465 start_insn = next_nonnote_insn (start_insn);
23466 if (BARRIER_P (start_insn))
23467 start_insn = next_nonnote_insn (start_insn);
23468 if (LABEL_P (start_insn)
23469 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23470 && LABEL_NUSES (start_insn) == 1)
23471 {
23472 reverse = TRUE;
23473 seeking_return = 1;
23474 return_code = GET_CODE (body);
23475 }
23476 else
23477 return;
23478 }
23479 else
23480 return;
23481 }
23482
23483 gcc_assert (!arm_ccfsm_state || reverse);
23484 if (!JUMP_P (insn))
23485 return;
23486
23487 /* This jump might be paralleled with a clobber of the condition codes
23488 the jump should always come first */
23489 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23490 body = XVECEXP (body, 0, 0);
23491
23492 if (reverse
23493 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23494 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23495 {
23496 int insns_skipped;
23497 int fail = FALSE, succeed = FALSE;
23498 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23499 int then_not_else = TRUE;
23500 rtx_insn *this_insn = start_insn;
23501 rtx label = 0;
23502
23503 /* Register the insn jumped to. */
23504 if (reverse)
23505 {
23506 if (!seeking_return)
23507 label = XEXP (SET_SRC (body), 0);
23508 }
23509 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23510 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23511 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23512 {
23513 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23514 then_not_else = FALSE;
23515 }
23516 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23517 {
23518 seeking_return = 1;
23519 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23520 }
23521 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23522 {
23523 seeking_return = 1;
23524 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23525 then_not_else = FALSE;
23526 }
23527 else
23528 gcc_unreachable ();
23529
23530 /* See how many insns this branch skips, and what kind of insns. If all
23531 insns are okay, and the label or unconditional branch to the same
23532 label is not too far away, succeed. */
23533 for (insns_skipped = 0;
23534 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23535 {
23536 rtx scanbody;
23537
23538 this_insn = next_nonnote_insn (this_insn);
23539 if (!this_insn)
23540 break;
23541
23542 switch (GET_CODE (this_insn))
23543 {
23544 case CODE_LABEL:
23545 /* Succeed if it is the target label, otherwise fail since
23546 control falls in from somewhere else. */
23547 if (this_insn == label)
23548 {
23549 arm_ccfsm_state = 1;
23550 succeed = TRUE;
23551 }
23552 else
23553 fail = TRUE;
23554 break;
23555
23556 case BARRIER:
23557 /* Succeed if the following insn is the target label.
23558 Otherwise fail.
23559 If return insns are used then the last insn in a function
23560 will be a barrier. */
23561 this_insn = next_nonnote_insn (this_insn);
23562 if (this_insn && this_insn == label)
23563 {
23564 arm_ccfsm_state = 1;
23565 succeed = TRUE;
23566 }
23567 else
23568 fail = TRUE;
23569 break;
23570
23571 case CALL_INSN:
23572 /* The AAPCS says that conditional calls should not be
23573 used since they make interworking inefficient (the
23574 linker can't transform BL<cond> into BLX). That's
23575 only a problem if the machine has BLX. */
23576 if (arm_arch5t)
23577 {
23578 fail = TRUE;
23579 break;
23580 }
23581
23582 /* Succeed if the following insn is the target label, or
23583 if the following two insns are a barrier and the
23584 target label. */
23585 this_insn = next_nonnote_insn (this_insn);
23586 if (this_insn && BARRIER_P (this_insn))
23587 this_insn = next_nonnote_insn (this_insn);
23588
23589 if (this_insn && this_insn == label
23590 && insns_skipped < max_insns_skipped)
23591 {
23592 arm_ccfsm_state = 1;
23593 succeed = TRUE;
23594 }
23595 else
23596 fail = TRUE;
23597 break;
23598
23599 case JUMP_INSN:
23600 /* If this is an unconditional branch to the same label, succeed.
23601 If it is to another label, do nothing. If it is conditional,
23602 fail. */
23603 /* XXX Probably, the tests for SET and the PC are
23604 unnecessary. */
23605
23606 scanbody = PATTERN (this_insn);
23607 if (GET_CODE (scanbody) == SET
23608 && GET_CODE (SET_DEST (scanbody)) == PC)
23609 {
23610 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23611 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23612 {
23613 arm_ccfsm_state = 2;
23614 succeed = TRUE;
23615 }
23616 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23617 fail = TRUE;
23618 }
23619 /* Fail if a conditional return is undesirable (e.g. on a
23620 StrongARM), but still allow this if optimizing for size. */
23621 else if (GET_CODE (scanbody) == return_code
23622 && !use_return_insn (TRUE, NULL)
23623 && !optimize_size)
23624 fail = TRUE;
23625 else if (GET_CODE (scanbody) == return_code)
23626 {
23627 arm_ccfsm_state = 2;
23628 succeed = TRUE;
23629 }
23630 else if (GET_CODE (scanbody) == PARALLEL)
23631 {
23632 switch (get_attr_conds (this_insn))
23633 {
23634 case CONDS_NOCOND:
23635 break;
23636 default:
23637 fail = TRUE;
23638 break;
23639 }
23640 }
23641 else
23642 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23643
23644 break;
23645
23646 case INSN:
23647 /* Instructions using or affecting the condition codes make it
23648 fail. */
23649 scanbody = PATTERN (this_insn);
23650 if (!(GET_CODE (scanbody) == SET
23651 || GET_CODE (scanbody) == PARALLEL)
23652 || get_attr_conds (this_insn) != CONDS_NOCOND)
23653 fail = TRUE;
23654 break;
23655
23656 default:
23657 break;
23658 }
23659 }
23660 if (succeed)
23661 {
23662 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23663 arm_target_label = CODE_LABEL_NUMBER (label);
23664 else
23665 {
23666 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23667
23668 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23669 {
23670 this_insn = next_nonnote_insn (this_insn);
23671 gcc_assert (!this_insn
23672 || (!BARRIER_P (this_insn)
23673 && !LABEL_P (this_insn)));
23674 }
23675 if (!this_insn)
23676 {
23677 /* Oh, dear! we ran off the end.. give up. */
23678 extract_constrain_insn_cached (insn);
23679 arm_ccfsm_state = 0;
23680 arm_target_insn = NULL;
23681 return;
23682 }
23683 arm_target_insn = this_insn;
23684 }
23685
23686 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23687 what it was. */
23688 if (!reverse)
23689 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23690
23691 if (reverse || then_not_else)
23692 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23693 }
23694
23695 /* Restore recog_data (getting the attributes of other insns can
23696 destroy this array, but final.c assumes that it remains intact
23697 across this call. */
23698 extract_constrain_insn_cached (insn);
23699 }
23700 }
23701
23702 /* Output IT instructions. */
23703 void
23704 thumb2_asm_output_opcode (FILE * stream)
23705 {
23706 char buff[5];
23707 int n;
23708
23709 if (arm_condexec_mask)
23710 {
23711 for (n = 0; n < arm_condexec_masklen; n++)
23712 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23713 buff[n] = 0;
23714 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23715 arm_condition_codes[arm_current_cc]);
23716 arm_condexec_mask = 0;
23717 }
23718 }
23719
23720 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23721 UNITS_PER_WORD bytes wide. */
23722 static unsigned int
23723 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23724 {
23725 if (TARGET_32BIT
23726 && regno > PC_REGNUM
23727 && regno != FRAME_POINTER_REGNUM
23728 && regno != ARG_POINTER_REGNUM
23729 && !IS_VFP_REGNUM (regno))
23730 return 1;
23731
23732 return ARM_NUM_REGS (mode);
23733 }
23734
23735 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23736 static bool
23737 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23738 {
23739 if (GET_MODE_CLASS (mode) == MODE_CC)
23740 return (regno == CC_REGNUM
23741 || (TARGET_HARD_FLOAT
23742 && regno == VFPCC_REGNUM));
23743
23744 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23745 return false;
23746
23747 if (TARGET_THUMB1)
23748 /* For the Thumb we only allow values bigger than SImode in
23749 registers 0 - 6, so that there is always a second low
23750 register available to hold the upper part of the value.
23751 We probably we ought to ensure that the register is the
23752 start of an even numbered register pair. */
23753 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23754
23755 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23756 {
23757 if (mode == SFmode || mode == SImode)
23758 return VFP_REGNO_OK_FOR_SINGLE (regno);
23759
23760 if (mode == DFmode)
23761 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23762
23763 if (mode == HFmode)
23764 return VFP_REGNO_OK_FOR_SINGLE (regno);
23765
23766 /* VFP registers can hold HImode values. */
23767 if (mode == HImode)
23768 return VFP_REGNO_OK_FOR_SINGLE (regno);
23769
23770 if (TARGET_NEON)
23771 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23772 || (VALID_NEON_QREG_MODE (mode)
23773 && NEON_REGNO_OK_FOR_QUAD (regno))
23774 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23775 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23776 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23777 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23778 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23779
23780 return false;
23781 }
23782
23783 if (TARGET_REALLY_IWMMXT)
23784 {
23785 if (IS_IWMMXT_GR_REGNUM (regno))
23786 return mode == SImode;
23787
23788 if (IS_IWMMXT_REGNUM (regno))
23789 return VALID_IWMMXT_REG_MODE (mode);
23790 }
23791
23792 /* We allow almost any value to be stored in the general registers.
23793 Restrict doubleword quantities to even register pairs in ARM state
23794 so that we can use ldrd. Do not allow very large Neon structure
23795 opaque modes in general registers; they would use too many. */
23796 if (regno <= LAST_ARM_REGNUM)
23797 {
23798 if (ARM_NUM_REGS (mode) > 4)
23799 return false;
23800
23801 if (TARGET_THUMB2)
23802 return true;
23803
23804 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23805 }
23806
23807 if (regno == FRAME_POINTER_REGNUM
23808 || regno == ARG_POINTER_REGNUM)
23809 /* We only allow integers in the fake hard registers. */
23810 return GET_MODE_CLASS (mode) == MODE_INT;
23811
23812 return false;
23813 }
23814
23815 /* Implement TARGET_MODES_TIEABLE_P. */
23816
23817 static bool
23818 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23819 {
23820 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23821 return true;
23822
23823 /* We specifically want to allow elements of "structure" modes to
23824 be tieable to the structure. This more general condition allows
23825 other rarer situations too. */
23826 if (TARGET_NEON
23827 && (VALID_NEON_DREG_MODE (mode1)
23828 || VALID_NEON_QREG_MODE (mode1)
23829 || VALID_NEON_STRUCT_MODE (mode1))
23830 && (VALID_NEON_DREG_MODE (mode2)
23831 || VALID_NEON_QREG_MODE (mode2)
23832 || VALID_NEON_STRUCT_MODE (mode2)))
23833 return true;
23834
23835 return false;
23836 }
23837
23838 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23839 not used in arm mode. */
23840
23841 enum reg_class
23842 arm_regno_class (int regno)
23843 {
23844 if (regno == PC_REGNUM)
23845 return NO_REGS;
23846
23847 if (TARGET_THUMB1)
23848 {
23849 if (regno == STACK_POINTER_REGNUM)
23850 return STACK_REG;
23851 if (regno == CC_REGNUM)
23852 return CC_REG;
23853 if (regno < 8)
23854 return LO_REGS;
23855 return HI_REGS;
23856 }
23857
23858 if (TARGET_THUMB2 && regno < 8)
23859 return LO_REGS;
23860
23861 if ( regno <= LAST_ARM_REGNUM
23862 || regno == FRAME_POINTER_REGNUM
23863 || regno == ARG_POINTER_REGNUM)
23864 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23865
23866 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23867 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23868
23869 if (IS_VFP_REGNUM (regno))
23870 {
23871 if (regno <= D7_VFP_REGNUM)
23872 return VFP_D0_D7_REGS;
23873 else if (regno <= LAST_LO_VFP_REGNUM)
23874 return VFP_LO_REGS;
23875 else
23876 return VFP_HI_REGS;
23877 }
23878
23879 if (IS_IWMMXT_REGNUM (regno))
23880 return IWMMXT_REGS;
23881
23882 if (IS_IWMMXT_GR_REGNUM (regno))
23883 return IWMMXT_GR_REGS;
23884
23885 return NO_REGS;
23886 }
23887
23888 /* Handle a special case when computing the offset
23889 of an argument from the frame pointer. */
23890 int
23891 arm_debugger_arg_offset (int value, rtx addr)
23892 {
23893 rtx_insn *insn;
23894
23895 /* We are only interested if dbxout_parms() failed to compute the offset. */
23896 if (value != 0)
23897 return 0;
23898
23899 /* We can only cope with the case where the address is held in a register. */
23900 if (!REG_P (addr))
23901 return 0;
23902
23903 /* If we are using the frame pointer to point at the argument, then
23904 an offset of 0 is correct. */
23905 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23906 return 0;
23907
23908 /* If we are using the stack pointer to point at the
23909 argument, then an offset of 0 is correct. */
23910 /* ??? Check this is consistent with thumb2 frame layout. */
23911 if ((TARGET_THUMB || !frame_pointer_needed)
23912 && REGNO (addr) == SP_REGNUM)
23913 return 0;
23914
23915 /* Oh dear. The argument is pointed to by a register rather
23916 than being held in a register, or being stored at a known
23917 offset from the frame pointer. Since GDB only understands
23918 those two kinds of argument we must translate the address
23919 held in the register into an offset from the frame pointer.
23920 We do this by searching through the insns for the function
23921 looking to see where this register gets its value. If the
23922 register is initialized from the frame pointer plus an offset
23923 then we are in luck and we can continue, otherwise we give up.
23924
23925 This code is exercised by producing debugging information
23926 for a function with arguments like this:
23927
23928 double func (double a, double b, int c, double d) {return d;}
23929
23930 Without this code the stab for parameter 'd' will be set to
23931 an offset of 0 from the frame pointer, rather than 8. */
23932
23933 /* The if() statement says:
23934
23935 If the insn is a normal instruction
23936 and if the insn is setting the value in a register
23937 and if the register being set is the register holding the address of the argument
23938 and if the address is computing by an addition
23939 that involves adding to a register
23940 which is the frame pointer
23941 a constant integer
23942
23943 then... */
23944
23945 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23946 {
23947 if ( NONJUMP_INSN_P (insn)
23948 && GET_CODE (PATTERN (insn)) == SET
23949 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23950 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23951 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23952 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23953 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23954 )
23955 {
23956 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23957
23958 break;
23959 }
23960 }
23961
23962 if (value == 0)
23963 {
23964 debug_rtx (addr);
23965 warning (0, "unable to compute real location of stacked parameter");
23966 value = 8; /* XXX magic hack */
23967 }
23968
23969 return value;
23970 }
23971 \f
23972 /* Implement TARGET_PROMOTED_TYPE. */
23973
23974 static tree
23975 arm_promoted_type (const_tree t)
23976 {
23977 if (SCALAR_FLOAT_TYPE_P (t)
23978 && TYPE_PRECISION (t) == 16
23979 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23980 return float_type_node;
23981 return NULL_TREE;
23982 }
23983
23984 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23985 This simply adds HFmode as a supported mode; even though we don't
23986 implement arithmetic on this type directly, it's supported by
23987 optabs conversions, much the way the double-word arithmetic is
23988 special-cased in the default hook. */
23989
23990 static bool
23991 arm_scalar_mode_supported_p (scalar_mode mode)
23992 {
23993 if (mode == HFmode)
23994 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23995 else if (ALL_FIXED_POINT_MODE_P (mode))
23996 return true;
23997 else
23998 return default_scalar_mode_supported_p (mode);
23999 }
24000
24001 /* Set the value of FLT_EVAL_METHOD.
24002 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
24003
24004 0: evaluate all operations and constants, whose semantic type has at
24005 most the range and precision of type float, to the range and
24006 precision of float; evaluate all other operations and constants to
24007 the range and precision of the semantic type;
24008
24009 N, where _FloatN is a supported interchange floating type
24010 evaluate all operations and constants, whose semantic type has at
24011 most the range and precision of _FloatN type, to the range and
24012 precision of the _FloatN type; evaluate all other operations and
24013 constants to the range and precision of the semantic type;
24014
24015 If we have the ARMv8.2-A extensions then we support _Float16 in native
24016 precision, so we should set this to 16. Otherwise, we support the type,
24017 but want to evaluate expressions in float precision, so set this to
24018 0. */
24019
24020 static enum flt_eval_method
24021 arm_excess_precision (enum excess_precision_type type)
24022 {
24023 switch (type)
24024 {
24025 case EXCESS_PRECISION_TYPE_FAST:
24026 case EXCESS_PRECISION_TYPE_STANDARD:
24027 /* We can calculate either in 16-bit range and precision or
24028 32-bit range and precision. Make that decision based on whether
24029 we have native support for the ARMv8.2-A 16-bit floating-point
24030 instructions or not. */
24031 return (TARGET_VFP_FP16INST
24032 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
24033 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
24034 case EXCESS_PRECISION_TYPE_IMPLICIT:
24035 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
24036 default:
24037 gcc_unreachable ();
24038 }
24039 return FLT_EVAL_METHOD_UNPREDICTABLE;
24040 }
24041
24042
24043 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
24044 _Float16 if we are using anything other than ieee format for 16-bit
24045 floating point. Otherwise, punt to the default implementation. */
24046 static opt_scalar_float_mode
24047 arm_floatn_mode (int n, bool extended)
24048 {
24049 if (!extended && n == 16)
24050 {
24051 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
24052 return HFmode;
24053 return opt_scalar_float_mode ();
24054 }
24055
24056 return default_floatn_mode (n, extended);
24057 }
24058
24059
24060 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
24061 not to early-clobber SRC registers in the process.
24062
24063 We assume that the operands described by SRC and DEST represent a
24064 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
24065 number of components into which the copy has been decomposed. */
24066 void
24067 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
24068 {
24069 unsigned int i;
24070
24071 if (!reg_overlap_mentioned_p (operands[0], operands[1])
24072 || REGNO (operands[0]) < REGNO (operands[1]))
24073 {
24074 for (i = 0; i < count; i++)
24075 {
24076 operands[2 * i] = dest[i];
24077 operands[2 * i + 1] = src[i];
24078 }
24079 }
24080 else
24081 {
24082 for (i = 0; i < count; i++)
24083 {
24084 operands[2 * i] = dest[count - i - 1];
24085 operands[2 * i + 1] = src[count - i - 1];
24086 }
24087 }
24088 }
24089
24090 /* Split operands into moves from op[1] + op[2] into op[0]. */
24091
24092 void
24093 neon_split_vcombine (rtx operands[3])
24094 {
24095 unsigned int dest = REGNO (operands[0]);
24096 unsigned int src1 = REGNO (operands[1]);
24097 unsigned int src2 = REGNO (operands[2]);
24098 machine_mode halfmode = GET_MODE (operands[1]);
24099 unsigned int halfregs = REG_NREGS (operands[1]);
24100 rtx destlo, desthi;
24101
24102 if (src1 == dest && src2 == dest + halfregs)
24103 {
24104 /* No-op move. Can't split to nothing; emit something. */
24105 emit_note (NOTE_INSN_DELETED);
24106 return;
24107 }
24108
24109 /* Preserve register attributes for variable tracking. */
24110 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
24111 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
24112 GET_MODE_SIZE (halfmode));
24113
24114 /* Special case of reversed high/low parts. Use VSWP. */
24115 if (src2 == dest && src1 == dest + halfregs)
24116 {
24117 rtx x = gen_rtx_SET (destlo, operands[1]);
24118 rtx y = gen_rtx_SET (desthi, operands[2]);
24119 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24120 return;
24121 }
24122
24123 if (!reg_overlap_mentioned_p (operands[2], destlo))
24124 {
24125 /* Try to avoid unnecessary moves if part of the result
24126 is in the right place already. */
24127 if (src1 != dest)
24128 emit_move_insn (destlo, operands[1]);
24129 if (src2 != dest + halfregs)
24130 emit_move_insn (desthi, operands[2]);
24131 }
24132 else
24133 {
24134 if (src2 != dest + halfregs)
24135 emit_move_insn (desthi, operands[2]);
24136 if (src1 != dest)
24137 emit_move_insn (destlo, operands[1]);
24138 }
24139 }
24140 \f
24141 /* Return the number (counting from 0) of
24142 the least significant set bit in MASK. */
24143
24144 inline static int
24145 number_of_first_bit_set (unsigned mask)
24146 {
24147 return ctz_hwi (mask);
24148 }
24149
24150 /* Like emit_multi_reg_push, but allowing for a different set of
24151 registers to be described as saved. MASK is the set of registers
24152 to be saved; REAL_REGS is the set of registers to be described as
24153 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24154
24155 static rtx_insn *
24156 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24157 {
24158 unsigned long regno;
24159 rtx par[10], tmp, reg;
24160 rtx_insn *insn;
24161 int i, j;
24162
24163 /* Build the parallel of the registers actually being stored. */
24164 for (i = 0; mask; ++i, mask &= mask - 1)
24165 {
24166 regno = ctz_hwi (mask);
24167 reg = gen_rtx_REG (SImode, regno);
24168
24169 if (i == 0)
24170 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24171 else
24172 tmp = gen_rtx_USE (VOIDmode, reg);
24173
24174 par[i] = tmp;
24175 }
24176
24177 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24178 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24179 tmp = gen_frame_mem (BLKmode, tmp);
24180 tmp = gen_rtx_SET (tmp, par[0]);
24181 par[0] = tmp;
24182
24183 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24184 insn = emit_insn (tmp);
24185
24186 /* Always build the stack adjustment note for unwind info. */
24187 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24188 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24189 par[0] = tmp;
24190
24191 /* Build the parallel of the registers recorded as saved for unwind. */
24192 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24193 {
24194 regno = ctz_hwi (real_regs);
24195 reg = gen_rtx_REG (SImode, regno);
24196
24197 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24198 tmp = gen_frame_mem (SImode, tmp);
24199 tmp = gen_rtx_SET (tmp, reg);
24200 RTX_FRAME_RELATED_P (tmp) = 1;
24201 par[j + 1] = tmp;
24202 }
24203
24204 if (j == 0)
24205 tmp = par[0];
24206 else
24207 {
24208 RTX_FRAME_RELATED_P (par[0]) = 1;
24209 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24210 }
24211
24212 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24213
24214 return insn;
24215 }
24216
24217 /* Emit code to push or pop registers to or from the stack. F is the
24218 assembly file. MASK is the registers to pop. */
24219 static void
24220 thumb_pop (FILE *f, unsigned long mask)
24221 {
24222 int regno;
24223 int lo_mask = mask & 0xFF;
24224
24225 gcc_assert (mask);
24226
24227 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24228 {
24229 /* Special case. Do not generate a POP PC statement here, do it in
24230 thumb_exit() */
24231 thumb_exit (f, -1);
24232 return;
24233 }
24234
24235 fprintf (f, "\tpop\t{");
24236
24237 /* Look at the low registers first. */
24238 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24239 {
24240 if (lo_mask & 1)
24241 {
24242 asm_fprintf (f, "%r", regno);
24243
24244 if ((lo_mask & ~1) != 0)
24245 fprintf (f, ", ");
24246 }
24247 }
24248
24249 if (mask & (1 << PC_REGNUM))
24250 {
24251 /* Catch popping the PC. */
24252 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24253 || IS_CMSE_ENTRY (arm_current_func_type ()))
24254 {
24255 /* The PC is never poped directly, instead
24256 it is popped into r3 and then BX is used. */
24257 fprintf (f, "}\n");
24258
24259 thumb_exit (f, -1);
24260
24261 return;
24262 }
24263 else
24264 {
24265 if (mask & 0xFF)
24266 fprintf (f, ", ");
24267
24268 asm_fprintf (f, "%r", PC_REGNUM);
24269 }
24270 }
24271
24272 fprintf (f, "}\n");
24273 }
24274
24275 /* Generate code to return from a thumb function.
24276 If 'reg_containing_return_addr' is -1, then the return address is
24277 actually on the stack, at the stack pointer.
24278
24279 Note: do not forget to update length attribute of corresponding insn pattern
24280 when changing assembly output (eg. length attribute of epilogue_insns when
24281 updating Armv8-M Baseline Security Extensions register clearing
24282 sequences). */
24283 static void
24284 thumb_exit (FILE *f, int reg_containing_return_addr)
24285 {
24286 unsigned regs_available_for_popping;
24287 unsigned regs_to_pop;
24288 int pops_needed;
24289 unsigned available;
24290 unsigned required;
24291 machine_mode mode;
24292 int size;
24293 int restore_a4 = FALSE;
24294
24295 /* Compute the registers we need to pop. */
24296 regs_to_pop = 0;
24297 pops_needed = 0;
24298
24299 if (reg_containing_return_addr == -1)
24300 {
24301 regs_to_pop |= 1 << LR_REGNUM;
24302 ++pops_needed;
24303 }
24304
24305 if (TARGET_BACKTRACE)
24306 {
24307 /* Restore the (ARM) frame pointer and stack pointer. */
24308 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24309 pops_needed += 2;
24310 }
24311
24312 /* If there is nothing to pop then just emit the BX instruction and
24313 return. */
24314 if (pops_needed == 0)
24315 {
24316 if (crtl->calls_eh_return)
24317 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24318
24319 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24320 {
24321 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24322 reg_containing_return_addr);
24323 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24324 }
24325 else
24326 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24327 return;
24328 }
24329 /* Otherwise if we are not supporting interworking and we have not created
24330 a backtrace structure and the function was not entered in ARM mode then
24331 just pop the return address straight into the PC. */
24332 else if (!TARGET_INTERWORK
24333 && !TARGET_BACKTRACE
24334 && !is_called_in_ARM_mode (current_function_decl)
24335 && !crtl->calls_eh_return
24336 && !IS_CMSE_ENTRY (arm_current_func_type ()))
24337 {
24338 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24339 return;
24340 }
24341
24342 /* Find out how many of the (return) argument registers we can corrupt. */
24343 regs_available_for_popping = 0;
24344
24345 /* If returning via __builtin_eh_return, the bottom three registers
24346 all contain information needed for the return. */
24347 if (crtl->calls_eh_return)
24348 size = 12;
24349 else
24350 {
24351 /* If we can deduce the registers used from the function's
24352 return value. This is more reliable that examining
24353 df_regs_ever_live_p () because that will be set if the register is
24354 ever used in the function, not just if the register is used
24355 to hold a return value. */
24356
24357 if (crtl->return_rtx != 0)
24358 mode = GET_MODE (crtl->return_rtx);
24359 else
24360 mode = DECL_MODE (DECL_RESULT (current_function_decl));
24361
24362 size = GET_MODE_SIZE (mode);
24363
24364 if (size == 0)
24365 {
24366 /* In a void function we can use any argument register.
24367 In a function that returns a structure on the stack
24368 we can use the second and third argument registers. */
24369 if (mode == VOIDmode)
24370 regs_available_for_popping =
24371 (1 << ARG_REGISTER (1))
24372 | (1 << ARG_REGISTER (2))
24373 | (1 << ARG_REGISTER (3));
24374 else
24375 regs_available_for_popping =
24376 (1 << ARG_REGISTER (2))
24377 | (1 << ARG_REGISTER (3));
24378 }
24379 else if (size <= 4)
24380 regs_available_for_popping =
24381 (1 << ARG_REGISTER (2))
24382 | (1 << ARG_REGISTER (3));
24383 else if (size <= 8)
24384 regs_available_for_popping =
24385 (1 << ARG_REGISTER (3));
24386 }
24387
24388 /* Match registers to be popped with registers into which we pop them. */
24389 for (available = regs_available_for_popping,
24390 required = regs_to_pop;
24391 required != 0 && available != 0;
24392 available &= ~(available & - available),
24393 required &= ~(required & - required))
24394 -- pops_needed;
24395
24396 /* If we have any popping registers left over, remove them. */
24397 if (available > 0)
24398 regs_available_for_popping &= ~available;
24399
24400 /* Otherwise if we need another popping register we can use
24401 the fourth argument register. */
24402 else if (pops_needed)
24403 {
24404 /* If we have not found any free argument registers and
24405 reg a4 contains the return address, we must move it. */
24406 if (regs_available_for_popping == 0
24407 && reg_containing_return_addr == LAST_ARG_REGNUM)
24408 {
24409 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24410 reg_containing_return_addr = LR_REGNUM;
24411 }
24412 else if (size > 12)
24413 {
24414 /* Register a4 is being used to hold part of the return value,
24415 but we have dire need of a free, low register. */
24416 restore_a4 = TRUE;
24417
24418 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24419 }
24420
24421 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24422 {
24423 /* The fourth argument register is available. */
24424 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24425
24426 --pops_needed;
24427 }
24428 }
24429
24430 /* Pop as many registers as we can. */
24431 thumb_pop (f, regs_available_for_popping);
24432
24433 /* Process the registers we popped. */
24434 if (reg_containing_return_addr == -1)
24435 {
24436 /* The return address was popped into the lowest numbered register. */
24437 regs_to_pop &= ~(1 << LR_REGNUM);
24438
24439 reg_containing_return_addr =
24440 number_of_first_bit_set (regs_available_for_popping);
24441
24442 /* Remove this register for the mask of available registers, so that
24443 the return address will not be corrupted by further pops. */
24444 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24445 }
24446
24447 /* If we popped other registers then handle them here. */
24448 if (regs_available_for_popping)
24449 {
24450 int frame_pointer;
24451
24452 /* Work out which register currently contains the frame pointer. */
24453 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24454
24455 /* Move it into the correct place. */
24456 asm_fprintf (f, "\tmov\t%r, %r\n",
24457 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24458
24459 /* (Temporarily) remove it from the mask of popped registers. */
24460 regs_available_for_popping &= ~(1 << frame_pointer);
24461 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24462
24463 if (regs_available_for_popping)
24464 {
24465 int stack_pointer;
24466
24467 /* We popped the stack pointer as well,
24468 find the register that contains it. */
24469 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24470
24471 /* Move it into the stack register. */
24472 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24473
24474 /* At this point we have popped all necessary registers, so
24475 do not worry about restoring regs_available_for_popping
24476 to its correct value:
24477
24478 assert (pops_needed == 0)
24479 assert (regs_available_for_popping == (1 << frame_pointer))
24480 assert (regs_to_pop == (1 << STACK_POINTER)) */
24481 }
24482 else
24483 {
24484 /* Since we have just move the popped value into the frame
24485 pointer, the popping register is available for reuse, and
24486 we know that we still have the stack pointer left to pop. */
24487 regs_available_for_popping |= (1 << frame_pointer);
24488 }
24489 }
24490
24491 /* If we still have registers left on the stack, but we no longer have
24492 any registers into which we can pop them, then we must move the return
24493 address into the link register and make available the register that
24494 contained it. */
24495 if (regs_available_for_popping == 0 && pops_needed > 0)
24496 {
24497 regs_available_for_popping |= 1 << reg_containing_return_addr;
24498
24499 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24500 reg_containing_return_addr);
24501
24502 reg_containing_return_addr = LR_REGNUM;
24503 }
24504
24505 /* If we have registers left on the stack then pop some more.
24506 We know that at most we will want to pop FP and SP. */
24507 if (pops_needed > 0)
24508 {
24509 int popped_into;
24510 int move_to;
24511
24512 thumb_pop (f, regs_available_for_popping);
24513
24514 /* We have popped either FP or SP.
24515 Move whichever one it is into the correct register. */
24516 popped_into = number_of_first_bit_set (regs_available_for_popping);
24517 move_to = number_of_first_bit_set (regs_to_pop);
24518
24519 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24520 --pops_needed;
24521 }
24522
24523 /* If we still have not popped everything then we must have only
24524 had one register available to us and we are now popping the SP. */
24525 if (pops_needed > 0)
24526 {
24527 int popped_into;
24528
24529 thumb_pop (f, regs_available_for_popping);
24530
24531 popped_into = number_of_first_bit_set (regs_available_for_popping);
24532
24533 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24534 /*
24535 assert (regs_to_pop == (1 << STACK_POINTER))
24536 assert (pops_needed == 1)
24537 */
24538 }
24539
24540 /* If necessary restore the a4 register. */
24541 if (restore_a4)
24542 {
24543 if (reg_containing_return_addr != LR_REGNUM)
24544 {
24545 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24546 reg_containing_return_addr = LR_REGNUM;
24547 }
24548
24549 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24550 }
24551
24552 if (crtl->calls_eh_return)
24553 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24554
24555 /* Return to caller. */
24556 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24557 {
24558 /* This is for the cases where LR is not being used to contain the return
24559 address. It may therefore contain information that we might not want
24560 to leak, hence it must be cleared. The value in R0 will never be a
24561 secret at this point, so it is safe to use it, see the clearing code
24562 in 'cmse_nonsecure_entry_clear_before_return'. */
24563 if (reg_containing_return_addr != LR_REGNUM)
24564 asm_fprintf (f, "\tmov\tlr, r0\n");
24565
24566 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24567 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24568 }
24569 else
24570 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24571 }
24572 \f
24573 /* Scan INSN just before assembler is output for it.
24574 For Thumb-1, we track the status of the condition codes; this
24575 information is used in the cbranchsi4_insn pattern. */
24576 void
24577 thumb1_final_prescan_insn (rtx_insn *insn)
24578 {
24579 if (flag_print_asm_name)
24580 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24581 INSN_ADDRESSES (INSN_UID (insn)));
24582 /* Don't overwrite the previous setter when we get to a cbranch. */
24583 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24584 {
24585 enum attr_conds conds;
24586
24587 if (cfun->machine->thumb1_cc_insn)
24588 {
24589 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24590 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24591 CC_STATUS_INIT;
24592 }
24593 conds = get_attr_conds (insn);
24594 if (conds == CONDS_SET)
24595 {
24596 rtx set = single_set (insn);
24597 cfun->machine->thumb1_cc_insn = insn;
24598 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24599 cfun->machine->thumb1_cc_op1 = const0_rtx;
24600 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24601 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24602 {
24603 rtx src1 = XEXP (SET_SRC (set), 1);
24604 if (src1 == const0_rtx)
24605 cfun->machine->thumb1_cc_mode = CCmode;
24606 }
24607 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24608 {
24609 /* Record the src register operand instead of dest because
24610 cprop_hardreg pass propagates src. */
24611 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24612 }
24613 }
24614 else if (conds != CONDS_NOCOND)
24615 cfun->machine->thumb1_cc_insn = NULL_RTX;
24616 }
24617
24618 /* Check if unexpected far jump is used. */
24619 if (cfun->machine->lr_save_eliminated
24620 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24621 internal_error("Unexpected thumb1 far jump");
24622 }
24623
24624 int
24625 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24626 {
24627 unsigned HOST_WIDE_INT mask = 0xff;
24628 int i;
24629
24630 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24631 if (val == 0) /* XXX */
24632 return 0;
24633
24634 for (i = 0; i < 25; i++)
24635 if ((val & (mask << i)) == val)
24636 return 1;
24637
24638 return 0;
24639 }
24640
24641 /* Returns nonzero if the current function contains,
24642 or might contain a far jump. */
24643 static int
24644 thumb_far_jump_used_p (void)
24645 {
24646 rtx_insn *insn;
24647 bool far_jump = false;
24648 unsigned int func_size = 0;
24649
24650 /* If we have already decided that far jumps may be used,
24651 do not bother checking again, and always return true even if
24652 it turns out that they are not being used. Once we have made
24653 the decision that far jumps are present (and that hence the link
24654 register will be pushed onto the stack) we cannot go back on it. */
24655 if (cfun->machine->far_jump_used)
24656 return 1;
24657
24658 /* If this function is not being called from the prologue/epilogue
24659 generation code then it must be being called from the
24660 INITIAL_ELIMINATION_OFFSET macro. */
24661 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24662 {
24663 /* In this case we know that we are being asked about the elimination
24664 of the arg pointer register. If that register is not being used,
24665 then there are no arguments on the stack, and we do not have to
24666 worry that a far jump might force the prologue to push the link
24667 register, changing the stack offsets. In this case we can just
24668 return false, since the presence of far jumps in the function will
24669 not affect stack offsets.
24670
24671 If the arg pointer is live (or if it was live, but has now been
24672 eliminated and so set to dead) then we do have to test to see if
24673 the function might contain a far jump. This test can lead to some
24674 false negatives, since before reload is completed, then length of
24675 branch instructions is not known, so gcc defaults to returning their
24676 longest length, which in turn sets the far jump attribute to true.
24677
24678 A false negative will not result in bad code being generated, but it
24679 will result in a needless push and pop of the link register. We
24680 hope that this does not occur too often.
24681
24682 If we need doubleword stack alignment this could affect the other
24683 elimination offsets so we can't risk getting it wrong. */
24684 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24685 cfun->machine->arg_pointer_live = 1;
24686 else if (!cfun->machine->arg_pointer_live)
24687 return 0;
24688 }
24689
24690 /* We should not change far_jump_used during or after reload, as there is
24691 no chance to change stack frame layout. */
24692 if (reload_in_progress || reload_completed)
24693 return 0;
24694
24695 /* Check to see if the function contains a branch
24696 insn with the far jump attribute set. */
24697 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24698 {
24699 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24700 {
24701 far_jump = true;
24702 }
24703 func_size += get_attr_length (insn);
24704 }
24705
24706 /* Attribute far_jump will always be true for thumb1 before
24707 shorten_branch pass. So checking far_jump attribute before
24708 shorten_branch isn't much useful.
24709
24710 Following heuristic tries to estimate more accurately if a far jump
24711 may finally be used. The heuristic is very conservative as there is
24712 no chance to roll-back the decision of not to use far jump.
24713
24714 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24715 2-byte insn is associated with a 4 byte constant pool. Using
24716 function size 2048/3 as the threshold is conservative enough. */
24717 if (far_jump)
24718 {
24719 if ((func_size * 3) >= 2048)
24720 {
24721 /* Record the fact that we have decided that
24722 the function does use far jumps. */
24723 cfun->machine->far_jump_used = 1;
24724 return 1;
24725 }
24726 }
24727
24728 return 0;
24729 }
24730
24731 /* Return nonzero if FUNC must be entered in ARM mode. */
24732 static bool
24733 is_called_in_ARM_mode (tree func)
24734 {
24735 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24736
24737 /* Ignore the problem about functions whose address is taken. */
24738 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24739 return true;
24740
24741 #ifdef ARM_PE
24742 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24743 #else
24744 return false;
24745 #endif
24746 }
24747
24748 /* Given the stack offsets and register mask in OFFSETS, decide how
24749 many additional registers to push instead of subtracting a constant
24750 from SP. For epilogues the principle is the same except we use pop.
24751 FOR_PROLOGUE indicates which we're generating. */
24752 static int
24753 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24754 {
24755 HOST_WIDE_INT amount;
24756 unsigned long live_regs_mask = offsets->saved_regs_mask;
24757 /* Extract a mask of the ones we can give to the Thumb's push/pop
24758 instruction. */
24759 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24760 /* Then count how many other high registers will need to be pushed. */
24761 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24762 int n_free, reg_base, size;
24763
24764 if (!for_prologue && frame_pointer_needed)
24765 amount = offsets->locals_base - offsets->saved_regs;
24766 else
24767 amount = offsets->outgoing_args - offsets->saved_regs;
24768
24769 /* If the stack frame size is 512 exactly, we can save one load
24770 instruction, which should make this a win even when optimizing
24771 for speed. */
24772 if (!optimize_size && amount != 512)
24773 return 0;
24774
24775 /* Can't do this if there are high registers to push. */
24776 if (high_regs_pushed != 0)
24777 return 0;
24778
24779 /* Shouldn't do it in the prologue if no registers would normally
24780 be pushed at all. In the epilogue, also allow it if we'll have
24781 a pop insn for the PC. */
24782 if (l_mask == 0
24783 && (for_prologue
24784 || TARGET_BACKTRACE
24785 || (live_regs_mask & 1 << LR_REGNUM) == 0
24786 || TARGET_INTERWORK
24787 || crtl->args.pretend_args_size != 0))
24788 return 0;
24789
24790 /* Don't do this if thumb_expand_prologue wants to emit instructions
24791 between the push and the stack frame allocation. */
24792 if (for_prologue
24793 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24794 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24795 return 0;
24796
24797 reg_base = 0;
24798 n_free = 0;
24799 if (!for_prologue)
24800 {
24801 size = arm_size_return_regs ();
24802 reg_base = ARM_NUM_INTS (size);
24803 live_regs_mask >>= reg_base;
24804 }
24805
24806 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24807 && (for_prologue || call_used_regs[reg_base + n_free]))
24808 {
24809 live_regs_mask >>= 1;
24810 n_free++;
24811 }
24812
24813 if (n_free == 0)
24814 return 0;
24815 gcc_assert (amount / 4 * 4 == amount);
24816
24817 if (amount >= 512 && (amount - n_free * 4) < 512)
24818 return (amount - 508) / 4;
24819 if (amount <= n_free * 4)
24820 return amount / 4;
24821 return 0;
24822 }
24823
24824 /* The bits which aren't usefully expanded as rtl. */
24825 const char *
24826 thumb1_unexpanded_epilogue (void)
24827 {
24828 arm_stack_offsets *offsets;
24829 int regno;
24830 unsigned long live_regs_mask = 0;
24831 int high_regs_pushed = 0;
24832 int extra_pop;
24833 int had_to_push_lr;
24834 int size;
24835
24836 if (cfun->machine->return_used_this_function != 0)
24837 return "";
24838
24839 if (IS_NAKED (arm_current_func_type ()))
24840 return "";
24841
24842 offsets = arm_get_frame_offsets ();
24843 live_regs_mask = offsets->saved_regs_mask;
24844 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24845
24846 /* If we can deduce the registers used from the function's return value.
24847 This is more reliable that examining df_regs_ever_live_p () because that
24848 will be set if the register is ever used in the function, not just if
24849 the register is used to hold a return value. */
24850 size = arm_size_return_regs ();
24851
24852 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24853 if (extra_pop > 0)
24854 {
24855 unsigned long extra_mask = (1 << extra_pop) - 1;
24856 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24857 }
24858
24859 /* The prolog may have pushed some high registers to use as
24860 work registers. e.g. the testsuite file:
24861 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24862 compiles to produce:
24863 push {r4, r5, r6, r7, lr}
24864 mov r7, r9
24865 mov r6, r8
24866 push {r6, r7}
24867 as part of the prolog. We have to undo that pushing here. */
24868
24869 if (high_regs_pushed)
24870 {
24871 unsigned long mask = live_regs_mask & 0xff;
24872 int next_hi_reg;
24873
24874 mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
24875
24876 if (mask == 0)
24877 /* Oh dear! We have no low registers into which we can pop
24878 high registers! */
24879 internal_error
24880 ("no low registers available for popping high registers");
24881
24882 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24883 if (live_regs_mask & (1 << next_hi_reg))
24884 break;
24885
24886 while (high_regs_pushed)
24887 {
24888 /* Find lo register(s) into which the high register(s) can
24889 be popped. */
24890 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
24891 {
24892 if (mask & (1 << regno))
24893 high_regs_pushed--;
24894 if (high_regs_pushed == 0)
24895 break;
24896 }
24897
24898 if (high_regs_pushed == 0 && regno >= 0)
24899 mask &= ~((1 << regno) - 1);
24900
24901 /* Pop the values into the low register(s). */
24902 thumb_pop (asm_out_file, mask);
24903
24904 /* Move the value(s) into the high registers. */
24905 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
24906 {
24907 if (mask & (1 << regno))
24908 {
24909 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24910 regno);
24911
24912 for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
24913 next_hi_reg--)
24914 if (live_regs_mask & (1 << next_hi_reg))
24915 break;
24916 }
24917 }
24918 }
24919 live_regs_mask &= ~0x0f00;
24920 }
24921
24922 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24923 live_regs_mask &= 0xff;
24924
24925 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24926 {
24927 /* Pop the return address into the PC. */
24928 if (had_to_push_lr)
24929 live_regs_mask |= 1 << PC_REGNUM;
24930
24931 /* Either no argument registers were pushed or a backtrace
24932 structure was created which includes an adjusted stack
24933 pointer, so just pop everything. */
24934 if (live_regs_mask)
24935 thumb_pop (asm_out_file, live_regs_mask);
24936
24937 /* We have either just popped the return address into the
24938 PC or it is was kept in LR for the entire function.
24939 Note that thumb_pop has already called thumb_exit if the
24940 PC was in the list. */
24941 if (!had_to_push_lr)
24942 thumb_exit (asm_out_file, LR_REGNUM);
24943 }
24944 else
24945 {
24946 /* Pop everything but the return address. */
24947 if (live_regs_mask)
24948 thumb_pop (asm_out_file, live_regs_mask);
24949
24950 if (had_to_push_lr)
24951 {
24952 if (size > 12)
24953 {
24954 /* We have no free low regs, so save one. */
24955 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24956 LAST_ARG_REGNUM);
24957 }
24958
24959 /* Get the return address into a temporary register. */
24960 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24961
24962 if (size > 12)
24963 {
24964 /* Move the return address to lr. */
24965 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24966 LAST_ARG_REGNUM);
24967 /* Restore the low register. */
24968 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24969 IP_REGNUM);
24970 regno = LR_REGNUM;
24971 }
24972 else
24973 regno = LAST_ARG_REGNUM;
24974 }
24975 else
24976 regno = LR_REGNUM;
24977
24978 /* Remove the argument registers that were pushed onto the stack. */
24979 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24980 SP_REGNUM, SP_REGNUM,
24981 crtl->args.pretend_args_size);
24982
24983 thumb_exit (asm_out_file, regno);
24984 }
24985
24986 return "";
24987 }
24988
24989 /* Functions to save and restore machine-specific function data. */
24990 static struct machine_function *
24991 arm_init_machine_status (void)
24992 {
24993 struct machine_function *machine;
24994 machine = ggc_cleared_alloc<machine_function> ();
24995
24996 #if ARM_FT_UNKNOWN != 0
24997 machine->func_type = ARM_FT_UNKNOWN;
24998 #endif
24999 machine->static_chain_stack_bytes = -1;
25000 return machine;
25001 }
25002
25003 /* Return an RTX indicating where the return address to the
25004 calling function can be found. */
25005 rtx
25006 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
25007 {
25008 if (count != 0)
25009 return NULL_RTX;
25010
25011 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
25012 }
25013
25014 /* Do anything needed before RTL is emitted for each function. */
25015 void
25016 arm_init_expanders (void)
25017 {
25018 /* Arrange to initialize and mark the machine per-function status. */
25019 init_machine_status = arm_init_machine_status;
25020
25021 /* This is to stop the combine pass optimizing away the alignment
25022 adjustment of va_arg. */
25023 /* ??? It is claimed that this should not be necessary. */
25024 if (cfun)
25025 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
25026 }
25027
25028 /* Check that FUNC is called with a different mode. */
25029
25030 bool
25031 arm_change_mode_p (tree func)
25032 {
25033 if (TREE_CODE (func) != FUNCTION_DECL)
25034 return false;
25035
25036 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
25037
25038 if (!callee_tree)
25039 callee_tree = target_option_default_node;
25040
25041 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25042 int flags = callee_opts->x_target_flags;
25043
25044 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
25045 }
25046
25047 /* Like arm_compute_initial_elimination offset. Simpler because there
25048 isn't an ABI specified frame pointer for Thumb. Instead, we set it
25049 to point at the base of the local variables after static stack
25050 space for a function has been allocated. */
25051
25052 HOST_WIDE_INT
25053 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
25054 {
25055 arm_stack_offsets *offsets;
25056
25057 offsets = arm_get_frame_offsets ();
25058
25059 switch (from)
25060 {
25061 case ARG_POINTER_REGNUM:
25062 switch (to)
25063 {
25064 case STACK_POINTER_REGNUM:
25065 return offsets->outgoing_args - offsets->saved_args;
25066
25067 case FRAME_POINTER_REGNUM:
25068 return offsets->soft_frame - offsets->saved_args;
25069
25070 case ARM_HARD_FRAME_POINTER_REGNUM:
25071 return offsets->saved_regs - offsets->saved_args;
25072
25073 case THUMB_HARD_FRAME_POINTER_REGNUM:
25074 return offsets->locals_base - offsets->saved_args;
25075
25076 default:
25077 gcc_unreachable ();
25078 }
25079 break;
25080
25081 case FRAME_POINTER_REGNUM:
25082 switch (to)
25083 {
25084 case STACK_POINTER_REGNUM:
25085 return offsets->outgoing_args - offsets->soft_frame;
25086
25087 case ARM_HARD_FRAME_POINTER_REGNUM:
25088 return offsets->saved_regs - offsets->soft_frame;
25089
25090 case THUMB_HARD_FRAME_POINTER_REGNUM:
25091 return offsets->locals_base - offsets->soft_frame;
25092
25093 default:
25094 gcc_unreachable ();
25095 }
25096 break;
25097
25098 default:
25099 gcc_unreachable ();
25100 }
25101 }
25102
25103 /* Generate the function's prologue. */
25104
25105 void
25106 thumb1_expand_prologue (void)
25107 {
25108 rtx_insn *insn;
25109
25110 HOST_WIDE_INT amount;
25111 HOST_WIDE_INT size;
25112 arm_stack_offsets *offsets;
25113 unsigned long func_type;
25114 int regno;
25115 unsigned long live_regs_mask;
25116 unsigned long l_mask;
25117 unsigned high_regs_pushed = 0;
25118 bool lr_needs_saving;
25119
25120 func_type = arm_current_func_type ();
25121
25122 /* Naked functions don't have prologues. */
25123 if (IS_NAKED (func_type))
25124 {
25125 if (flag_stack_usage_info)
25126 current_function_static_stack_size = 0;
25127 return;
25128 }
25129
25130 if (IS_INTERRUPT (func_type))
25131 {
25132 error ("interrupt Service Routines cannot be coded in Thumb mode");
25133 return;
25134 }
25135
25136 if (is_called_in_ARM_mode (current_function_decl))
25137 emit_insn (gen_prologue_thumb1_interwork ());
25138
25139 offsets = arm_get_frame_offsets ();
25140 live_regs_mask = offsets->saved_regs_mask;
25141 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25142
25143 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25144 l_mask = live_regs_mask & 0x40ff;
25145 /* Then count how many other high registers will need to be pushed. */
25146 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25147
25148 if (crtl->args.pretend_args_size)
25149 {
25150 rtx x = GEN_INT (-crtl->args.pretend_args_size);
25151
25152 if (cfun->machine->uses_anonymous_args)
25153 {
25154 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25155 unsigned long mask;
25156
25157 mask = 1ul << (LAST_ARG_REGNUM + 1);
25158 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25159
25160 insn = thumb1_emit_multi_reg_push (mask, 0);
25161 }
25162 else
25163 {
25164 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25165 stack_pointer_rtx, x));
25166 }
25167 RTX_FRAME_RELATED_P (insn) = 1;
25168 }
25169
25170 if (TARGET_BACKTRACE)
25171 {
25172 HOST_WIDE_INT offset = 0;
25173 unsigned work_register;
25174 rtx work_reg, x, arm_hfp_rtx;
25175
25176 /* We have been asked to create a stack backtrace structure.
25177 The code looks like this:
25178
25179 0 .align 2
25180 0 func:
25181 0 sub SP, #16 Reserve space for 4 registers.
25182 2 push {R7} Push low registers.
25183 4 add R7, SP, #20 Get the stack pointer before the push.
25184 6 str R7, [SP, #8] Store the stack pointer
25185 (before reserving the space).
25186 8 mov R7, PC Get hold of the start of this code + 12.
25187 10 str R7, [SP, #16] Store it.
25188 12 mov R7, FP Get hold of the current frame pointer.
25189 14 str R7, [SP, #4] Store it.
25190 16 mov R7, LR Get hold of the current return address.
25191 18 str R7, [SP, #12] Store it.
25192 20 add R7, SP, #16 Point at the start of the
25193 backtrace structure.
25194 22 mov FP, R7 Put this value into the frame pointer. */
25195
25196 work_register = thumb_find_work_register (live_regs_mask);
25197 work_reg = gen_rtx_REG (SImode, work_register);
25198 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25199
25200 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25201 stack_pointer_rtx, GEN_INT (-16)));
25202 RTX_FRAME_RELATED_P (insn) = 1;
25203
25204 if (l_mask)
25205 {
25206 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25207 RTX_FRAME_RELATED_P (insn) = 1;
25208 lr_needs_saving = false;
25209
25210 offset = bit_count (l_mask) * UNITS_PER_WORD;
25211 }
25212
25213 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25214 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25215
25216 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25217 x = gen_frame_mem (SImode, x);
25218 emit_move_insn (x, work_reg);
25219
25220 /* Make sure that the instruction fetching the PC is in the right place
25221 to calculate "start of backtrace creation code + 12". */
25222 /* ??? The stores using the common WORK_REG ought to be enough to
25223 prevent the scheduler from doing anything weird. Failing that
25224 we could always move all of the following into an UNSPEC_VOLATILE. */
25225 if (l_mask)
25226 {
25227 x = gen_rtx_REG (SImode, PC_REGNUM);
25228 emit_move_insn (work_reg, x);
25229
25230 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25231 x = gen_frame_mem (SImode, x);
25232 emit_move_insn (x, work_reg);
25233
25234 emit_move_insn (work_reg, arm_hfp_rtx);
25235
25236 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25237 x = gen_frame_mem (SImode, x);
25238 emit_move_insn (x, work_reg);
25239 }
25240 else
25241 {
25242 emit_move_insn (work_reg, arm_hfp_rtx);
25243
25244 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25245 x = gen_frame_mem (SImode, x);
25246 emit_move_insn (x, work_reg);
25247
25248 x = gen_rtx_REG (SImode, PC_REGNUM);
25249 emit_move_insn (work_reg, x);
25250
25251 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25252 x = gen_frame_mem (SImode, x);
25253 emit_move_insn (x, work_reg);
25254 }
25255
25256 x = gen_rtx_REG (SImode, LR_REGNUM);
25257 emit_move_insn (work_reg, x);
25258
25259 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25260 x = gen_frame_mem (SImode, x);
25261 emit_move_insn (x, work_reg);
25262
25263 x = GEN_INT (offset + 12);
25264 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25265
25266 emit_move_insn (arm_hfp_rtx, work_reg);
25267 }
25268 /* Optimization: If we are not pushing any low registers but we are going
25269 to push some high registers then delay our first push. This will just
25270 be a push of LR and we can combine it with the push of the first high
25271 register. */
25272 else if ((l_mask & 0xff) != 0
25273 || (high_regs_pushed == 0 && lr_needs_saving))
25274 {
25275 unsigned long mask = l_mask;
25276 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25277 insn = thumb1_emit_multi_reg_push (mask, mask);
25278 RTX_FRAME_RELATED_P (insn) = 1;
25279 lr_needs_saving = false;
25280 }
25281
25282 if (high_regs_pushed)
25283 {
25284 unsigned pushable_regs;
25285 unsigned next_hi_reg;
25286 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25287 : crtl->args.info.nregs;
25288 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25289
25290 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25291 if (live_regs_mask & (1 << next_hi_reg))
25292 break;
25293
25294 /* Here we need to mask out registers used for passing arguments
25295 even if they can be pushed. This is to avoid using them to
25296 stash the high registers. Such kind of stash may clobber the
25297 use of arguments. */
25298 pushable_regs = l_mask & (~arg_regs_mask);
25299 pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
25300
25301 /* Normally, LR can be used as a scratch register once it has been
25302 saved; but if the function examines its own return address then
25303 the value is still live and we need to avoid using it. */
25304 bool return_addr_live
25305 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
25306 LR_REGNUM);
25307
25308 if (lr_needs_saving || return_addr_live)
25309 pushable_regs &= ~(1 << LR_REGNUM);
25310
25311 if (pushable_regs == 0)
25312 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25313
25314 while (high_regs_pushed > 0)
25315 {
25316 unsigned long real_regs_mask = 0;
25317 unsigned long push_mask = 0;
25318
25319 for (regno = LR_REGNUM; regno >= 0; regno --)
25320 {
25321 if (pushable_regs & (1 << regno))
25322 {
25323 emit_move_insn (gen_rtx_REG (SImode, regno),
25324 gen_rtx_REG (SImode, next_hi_reg));
25325
25326 high_regs_pushed --;
25327 real_regs_mask |= (1 << next_hi_reg);
25328 push_mask |= (1 << regno);
25329
25330 if (high_regs_pushed)
25331 {
25332 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25333 next_hi_reg --)
25334 if (live_regs_mask & (1 << next_hi_reg))
25335 break;
25336 }
25337 else
25338 break;
25339 }
25340 }
25341
25342 /* If we had to find a work register and we have not yet
25343 saved the LR then add it to the list of regs to push. */
25344 if (lr_needs_saving)
25345 {
25346 push_mask |= 1 << LR_REGNUM;
25347 real_regs_mask |= 1 << LR_REGNUM;
25348 lr_needs_saving = false;
25349 /* If the return address is not live at this point, we
25350 can add LR to the list of registers that we can use
25351 for pushes. */
25352 if (!return_addr_live)
25353 pushable_regs |= 1 << LR_REGNUM;
25354 }
25355
25356 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25357 RTX_FRAME_RELATED_P (insn) = 1;
25358 }
25359 }
25360
25361 /* Load the pic register before setting the frame pointer,
25362 so we can use r7 as a temporary work register. */
25363 if (flag_pic && arm_pic_register != INVALID_REGNUM)
25364 arm_load_pic_register (live_regs_mask, NULL_RTX);
25365
25366 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25367 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25368 stack_pointer_rtx);
25369
25370 size = offsets->outgoing_args - offsets->saved_args;
25371 if (flag_stack_usage_info)
25372 current_function_static_stack_size = size;
25373
25374 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25375 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25376 || flag_stack_clash_protection)
25377 && size)
25378 sorry ("%<-fstack-check=specific%> for Thumb-1");
25379
25380 amount = offsets->outgoing_args - offsets->saved_regs;
25381 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25382 if (amount)
25383 {
25384 if (amount < 512)
25385 {
25386 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25387 GEN_INT (- amount)));
25388 RTX_FRAME_RELATED_P (insn) = 1;
25389 }
25390 else
25391 {
25392 rtx reg, dwarf;
25393
25394 /* The stack decrement is too big for an immediate value in a single
25395 insn. In theory we could issue multiple subtracts, but after
25396 three of them it becomes more space efficient to place the full
25397 value in the constant pool and load into a register. (Also the
25398 ARM debugger really likes to see only one stack decrement per
25399 function). So instead we look for a scratch register into which
25400 we can load the decrement, and then we subtract this from the
25401 stack pointer. Unfortunately on the thumb the only available
25402 scratch registers are the argument registers, and we cannot use
25403 these as they may hold arguments to the function. Instead we
25404 attempt to locate a call preserved register which is used by this
25405 function. If we can find one, then we know that it will have
25406 been pushed at the start of the prologue and so we can corrupt
25407 it now. */
25408 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25409 if (live_regs_mask & (1 << regno))
25410 break;
25411
25412 gcc_assert(regno <= LAST_LO_REGNUM);
25413
25414 reg = gen_rtx_REG (SImode, regno);
25415
25416 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25417
25418 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25419 stack_pointer_rtx, reg));
25420
25421 dwarf = gen_rtx_SET (stack_pointer_rtx,
25422 plus_constant (Pmode, stack_pointer_rtx,
25423 -amount));
25424 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25425 RTX_FRAME_RELATED_P (insn) = 1;
25426 }
25427 }
25428
25429 if (frame_pointer_needed)
25430 thumb_set_frame_pointer (offsets);
25431
25432 /* If we are profiling, make sure no instructions are scheduled before
25433 the call to mcount. Similarly if the user has requested no
25434 scheduling in the prolog. Similarly if we want non-call exceptions
25435 using the EABI unwinder, to prevent faulting instructions from being
25436 swapped with a stack adjustment. */
25437 if (crtl->profile || !TARGET_SCHED_PROLOG
25438 || (arm_except_unwind_info (&global_options) == UI_TARGET
25439 && cfun->can_throw_non_call_exceptions))
25440 emit_insn (gen_blockage ());
25441
25442 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25443 if (live_regs_mask & 0xff)
25444 cfun->machine->lr_save_eliminated = 0;
25445 }
25446
25447 /* Clear caller saved registers not used to pass return values and leaked
25448 condition flags before exiting a cmse_nonsecure_entry function. */
25449
25450 void
25451 cmse_nonsecure_entry_clear_before_return (void)
25452 {
25453 int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25454 uint32_t padding_bits_to_clear = 0;
25455 auto_sbitmap to_clear_bitmap (maxregno + 1);
25456 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
25457 tree result_type;
25458
25459 bitmap_clear (to_clear_bitmap);
25460 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25461 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25462
25463 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25464 registers. */
25465 if (TARGET_HARD_FLOAT)
25466 {
25467 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25468
25469 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25470
25471 /* Make sure we don't clear the two scratch registers used to clear the
25472 relevant FPSCR bits in output_return_instruction. */
25473 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25474 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25475 emit_use (gen_rtx_REG (SImode, 4));
25476 bitmap_clear_bit (to_clear_bitmap, 4);
25477 }
25478
25479 /* If the user has defined registers to be caller saved, these are no longer
25480 restored by the function before returning and must thus be cleared for
25481 security purposes. */
25482 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25483 {
25484 /* We do not touch registers that can be used to pass arguments as per
25485 the AAPCS, since these should never be made callee-saved by user
25486 options. */
25487 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25488 continue;
25489 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25490 continue;
25491 if (call_used_regs[regno])
25492 bitmap_set_bit (to_clear_bitmap, regno);
25493 }
25494
25495 /* Make sure we do not clear the registers used to return the result in. */
25496 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25497 if (!VOID_TYPE_P (result_type))
25498 {
25499 uint64_t to_clear_return_mask;
25500 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25501
25502 /* No need to check that we return in registers, because we don't
25503 support returning on stack yet. */
25504 gcc_assert (REG_P (result_rtl));
25505 to_clear_return_mask
25506 = compute_not_to_clear_mask (result_type, result_rtl, 0,
25507 &padding_bits_to_clear);
25508 if (to_clear_return_mask)
25509 {
25510 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25511 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25512 {
25513 if (to_clear_return_mask & (1ULL << regno))
25514 bitmap_clear_bit (to_clear_bitmap, regno);
25515 }
25516 }
25517 }
25518
25519 if (padding_bits_to_clear != 0)
25520 {
25521 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25522 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25523
25524 /* Padding_bits_to_clear is not 0 so we know we are dealing with
25525 returning a composite type, which only uses r0. Let's make sure that
25526 r1-r3 is cleared too. */
25527 bitmap_clear (to_clear_arg_regs_bitmap);
25528 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
25529 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25530 }
25531
25532 /* Clear full registers that leak before returning. */
25533 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
25534 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
25535 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
25536 clearing_reg);
25537 }
25538
25539 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25540 POP instruction can be generated. LR should be replaced by PC. All
25541 the checks required are already done by USE_RETURN_INSN (). Hence,
25542 all we really need to check here is if single register is to be
25543 returned, or multiple register return. */
25544 void
25545 thumb2_expand_return (bool simple_return)
25546 {
25547 int i, num_regs;
25548 unsigned long saved_regs_mask;
25549 arm_stack_offsets *offsets;
25550
25551 offsets = arm_get_frame_offsets ();
25552 saved_regs_mask = offsets->saved_regs_mask;
25553
25554 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25555 if (saved_regs_mask & (1 << i))
25556 num_regs++;
25557
25558 if (!simple_return && saved_regs_mask)
25559 {
25560 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25561 functions or adapt code to handle according to ACLE. This path should
25562 not be reachable for cmse_nonsecure_entry functions though we prefer
25563 to assert it for now to ensure that future code changes do not silently
25564 change this behavior. */
25565 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25566 if (num_regs == 1)
25567 {
25568 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25569 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25570 rtx addr = gen_rtx_MEM (SImode,
25571 gen_rtx_POST_INC (SImode,
25572 stack_pointer_rtx));
25573 set_mem_alias_set (addr, get_frame_alias_set ());
25574 XVECEXP (par, 0, 0) = ret_rtx;
25575 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25576 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25577 emit_jump_insn (par);
25578 }
25579 else
25580 {
25581 saved_regs_mask &= ~ (1 << LR_REGNUM);
25582 saved_regs_mask |= (1 << PC_REGNUM);
25583 arm_emit_multi_reg_pop (saved_regs_mask);
25584 }
25585 }
25586 else
25587 {
25588 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25589 cmse_nonsecure_entry_clear_before_return ();
25590 emit_jump_insn (simple_return_rtx);
25591 }
25592 }
25593
25594 void
25595 thumb1_expand_epilogue (void)
25596 {
25597 HOST_WIDE_INT amount;
25598 arm_stack_offsets *offsets;
25599 int regno;
25600
25601 /* Naked functions don't have prologues. */
25602 if (IS_NAKED (arm_current_func_type ()))
25603 return;
25604
25605 offsets = arm_get_frame_offsets ();
25606 amount = offsets->outgoing_args - offsets->saved_regs;
25607
25608 if (frame_pointer_needed)
25609 {
25610 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25611 amount = offsets->locals_base - offsets->saved_regs;
25612 }
25613 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25614
25615 gcc_assert (amount >= 0);
25616 if (amount)
25617 {
25618 emit_insn (gen_blockage ());
25619
25620 if (amount < 512)
25621 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25622 GEN_INT (amount)));
25623 else
25624 {
25625 /* r3 is always free in the epilogue. */
25626 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25627
25628 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25629 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25630 }
25631 }
25632
25633 /* Emit a USE (stack_pointer_rtx), so that
25634 the stack adjustment will not be deleted. */
25635 emit_insn (gen_force_register_use (stack_pointer_rtx));
25636
25637 if (crtl->profile || !TARGET_SCHED_PROLOG)
25638 emit_insn (gen_blockage ());
25639
25640 /* Emit a clobber for each insn that will be restored in the epilogue,
25641 so that flow2 will get register lifetimes correct. */
25642 for (regno = 0; regno < 13; regno++)
25643 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25644 emit_clobber (gen_rtx_REG (SImode, regno));
25645
25646 if (! df_regs_ever_live_p (LR_REGNUM))
25647 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25648
25649 /* Clear all caller-saved regs that are not used to return. */
25650 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25651 cmse_nonsecure_entry_clear_before_return ();
25652 }
25653
25654 /* Epilogue code for APCS frame. */
25655 static void
25656 arm_expand_epilogue_apcs_frame (bool really_return)
25657 {
25658 unsigned long func_type;
25659 unsigned long saved_regs_mask;
25660 int num_regs = 0;
25661 int i;
25662 int floats_from_frame = 0;
25663 arm_stack_offsets *offsets;
25664
25665 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25666 func_type = arm_current_func_type ();
25667
25668 /* Get frame offsets for ARM. */
25669 offsets = arm_get_frame_offsets ();
25670 saved_regs_mask = offsets->saved_regs_mask;
25671
25672 /* Find the offset of the floating-point save area in the frame. */
25673 floats_from_frame
25674 = (offsets->saved_args
25675 + arm_compute_static_chain_stack_bytes ()
25676 - offsets->frame);
25677
25678 /* Compute how many core registers saved and how far away the floats are. */
25679 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25680 if (saved_regs_mask & (1 << i))
25681 {
25682 num_regs++;
25683 floats_from_frame += 4;
25684 }
25685
25686 if (TARGET_HARD_FLOAT)
25687 {
25688 int start_reg;
25689 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25690
25691 /* The offset is from IP_REGNUM. */
25692 int saved_size = arm_get_vfp_saved_size ();
25693 if (saved_size > 0)
25694 {
25695 rtx_insn *insn;
25696 floats_from_frame += saved_size;
25697 insn = emit_insn (gen_addsi3 (ip_rtx,
25698 hard_frame_pointer_rtx,
25699 GEN_INT (-floats_from_frame)));
25700 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25701 ip_rtx, hard_frame_pointer_rtx);
25702 }
25703
25704 /* Generate VFP register multi-pop. */
25705 start_reg = FIRST_VFP_REGNUM;
25706
25707 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25708 /* Look for a case where a reg does not need restoring. */
25709 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25710 && (!df_regs_ever_live_p (i + 1)
25711 || call_used_regs[i + 1]))
25712 {
25713 if (start_reg != i)
25714 arm_emit_vfp_multi_reg_pop (start_reg,
25715 (i - start_reg) / 2,
25716 gen_rtx_REG (SImode,
25717 IP_REGNUM));
25718 start_reg = i + 2;
25719 }
25720
25721 /* Restore the remaining regs that we have discovered (or possibly
25722 even all of them, if the conditional in the for loop never
25723 fired). */
25724 if (start_reg != i)
25725 arm_emit_vfp_multi_reg_pop (start_reg,
25726 (i - start_reg) / 2,
25727 gen_rtx_REG (SImode, IP_REGNUM));
25728 }
25729
25730 if (TARGET_IWMMXT)
25731 {
25732 /* The frame pointer is guaranteed to be non-double-word aligned, as
25733 it is set to double-word-aligned old_stack_pointer - 4. */
25734 rtx_insn *insn;
25735 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25736
25737 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25738 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25739 {
25740 rtx addr = gen_frame_mem (V2SImode,
25741 plus_constant (Pmode, hard_frame_pointer_rtx,
25742 - lrm_count * 4));
25743 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25744 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25745 gen_rtx_REG (V2SImode, i),
25746 NULL_RTX);
25747 lrm_count += 2;
25748 }
25749 }
25750
25751 /* saved_regs_mask should contain IP which contains old stack pointer
25752 at the time of activation creation. Since SP and IP are adjacent registers,
25753 we can restore the value directly into SP. */
25754 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25755 saved_regs_mask &= ~(1 << IP_REGNUM);
25756 saved_regs_mask |= (1 << SP_REGNUM);
25757
25758 /* There are two registers left in saved_regs_mask - LR and PC. We
25759 only need to restore LR (the return address), but to
25760 save time we can load it directly into PC, unless we need a
25761 special function exit sequence, or we are not really returning. */
25762 if (really_return
25763 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25764 && !crtl->calls_eh_return)
25765 /* Delete LR from the register mask, so that LR on
25766 the stack is loaded into the PC in the register mask. */
25767 saved_regs_mask &= ~(1 << LR_REGNUM);
25768 else
25769 saved_regs_mask &= ~(1 << PC_REGNUM);
25770
25771 num_regs = bit_count (saved_regs_mask);
25772 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25773 {
25774 rtx_insn *insn;
25775 emit_insn (gen_blockage ());
25776 /* Unwind the stack to just below the saved registers. */
25777 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25778 hard_frame_pointer_rtx,
25779 GEN_INT (- 4 * num_regs)));
25780
25781 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25782 stack_pointer_rtx, hard_frame_pointer_rtx);
25783 }
25784
25785 arm_emit_multi_reg_pop (saved_regs_mask);
25786
25787 if (IS_INTERRUPT (func_type))
25788 {
25789 /* Interrupt handlers will have pushed the
25790 IP onto the stack, so restore it now. */
25791 rtx_insn *insn;
25792 rtx addr = gen_rtx_MEM (SImode,
25793 gen_rtx_POST_INC (SImode,
25794 stack_pointer_rtx));
25795 set_mem_alias_set (addr, get_frame_alias_set ());
25796 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25797 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25798 gen_rtx_REG (SImode, IP_REGNUM),
25799 NULL_RTX);
25800 }
25801
25802 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25803 return;
25804
25805 if (crtl->calls_eh_return)
25806 emit_insn (gen_addsi3 (stack_pointer_rtx,
25807 stack_pointer_rtx,
25808 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25809
25810 if (IS_STACKALIGN (func_type))
25811 /* Restore the original stack pointer. Before prologue, the stack was
25812 realigned and the original stack pointer saved in r0. For details,
25813 see comment in arm_expand_prologue. */
25814 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25815
25816 emit_jump_insn (simple_return_rtx);
25817 }
25818
25819 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25820 function is not a sibcall. */
25821 void
25822 arm_expand_epilogue (bool really_return)
25823 {
25824 unsigned long func_type;
25825 unsigned long saved_regs_mask;
25826 int num_regs = 0;
25827 int i;
25828 int amount;
25829 arm_stack_offsets *offsets;
25830
25831 func_type = arm_current_func_type ();
25832
25833 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25834 let output_return_instruction take care of instruction emission if any. */
25835 if (IS_NAKED (func_type)
25836 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25837 {
25838 if (really_return)
25839 emit_jump_insn (simple_return_rtx);
25840 return;
25841 }
25842
25843 /* If we are throwing an exception, then we really must be doing a
25844 return, so we can't tail-call. */
25845 gcc_assert (!crtl->calls_eh_return || really_return);
25846
25847 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25848 {
25849 arm_expand_epilogue_apcs_frame (really_return);
25850 return;
25851 }
25852
25853 /* Get frame offsets for ARM. */
25854 offsets = arm_get_frame_offsets ();
25855 saved_regs_mask = offsets->saved_regs_mask;
25856 num_regs = bit_count (saved_regs_mask);
25857
25858 if (frame_pointer_needed)
25859 {
25860 rtx_insn *insn;
25861 /* Restore stack pointer if necessary. */
25862 if (TARGET_ARM)
25863 {
25864 /* In ARM mode, frame pointer points to first saved register.
25865 Restore stack pointer to last saved register. */
25866 amount = offsets->frame - offsets->saved_regs;
25867
25868 /* Force out any pending memory operations that reference stacked data
25869 before stack de-allocation occurs. */
25870 emit_insn (gen_blockage ());
25871 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25872 hard_frame_pointer_rtx,
25873 GEN_INT (amount)));
25874 arm_add_cfa_adjust_cfa_note (insn, amount,
25875 stack_pointer_rtx,
25876 hard_frame_pointer_rtx);
25877
25878 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25879 deleted. */
25880 emit_insn (gen_force_register_use (stack_pointer_rtx));
25881 }
25882 else
25883 {
25884 /* In Thumb-2 mode, the frame pointer points to the last saved
25885 register. */
25886 amount = offsets->locals_base - offsets->saved_regs;
25887 if (amount)
25888 {
25889 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25890 hard_frame_pointer_rtx,
25891 GEN_INT (amount)));
25892 arm_add_cfa_adjust_cfa_note (insn, amount,
25893 hard_frame_pointer_rtx,
25894 hard_frame_pointer_rtx);
25895 }
25896
25897 /* Force out any pending memory operations that reference stacked data
25898 before stack de-allocation occurs. */
25899 emit_insn (gen_blockage ());
25900 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25901 hard_frame_pointer_rtx));
25902 arm_add_cfa_adjust_cfa_note (insn, 0,
25903 stack_pointer_rtx,
25904 hard_frame_pointer_rtx);
25905 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25906 deleted. */
25907 emit_insn (gen_force_register_use (stack_pointer_rtx));
25908 }
25909 }
25910 else
25911 {
25912 /* Pop off outgoing args and local frame to adjust stack pointer to
25913 last saved register. */
25914 amount = offsets->outgoing_args - offsets->saved_regs;
25915 if (amount)
25916 {
25917 rtx_insn *tmp;
25918 /* Force out any pending memory operations that reference stacked data
25919 before stack de-allocation occurs. */
25920 emit_insn (gen_blockage ());
25921 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25922 stack_pointer_rtx,
25923 GEN_INT (amount)));
25924 arm_add_cfa_adjust_cfa_note (tmp, amount,
25925 stack_pointer_rtx, stack_pointer_rtx);
25926 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25927 not deleted. */
25928 emit_insn (gen_force_register_use (stack_pointer_rtx));
25929 }
25930 }
25931
25932 if (TARGET_HARD_FLOAT)
25933 {
25934 /* Generate VFP register multi-pop. */
25935 int end_reg = LAST_VFP_REGNUM + 1;
25936
25937 /* Scan the registers in reverse order. We need to match
25938 any groupings made in the prologue and generate matching
25939 vldm operations. The need to match groups is because,
25940 unlike pop, vldm can only do consecutive regs. */
25941 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25942 /* Look for a case where a reg does not need restoring. */
25943 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25944 && (!df_regs_ever_live_p (i + 1)
25945 || call_used_regs[i + 1]))
25946 {
25947 /* Restore the regs discovered so far (from reg+2 to
25948 end_reg). */
25949 if (end_reg > i + 2)
25950 arm_emit_vfp_multi_reg_pop (i + 2,
25951 (end_reg - (i + 2)) / 2,
25952 stack_pointer_rtx);
25953 end_reg = i;
25954 }
25955
25956 /* Restore the remaining regs that we have discovered (or possibly
25957 even all of them, if the conditional in the for loop never
25958 fired). */
25959 if (end_reg > i + 2)
25960 arm_emit_vfp_multi_reg_pop (i + 2,
25961 (end_reg - (i + 2)) / 2,
25962 stack_pointer_rtx);
25963 }
25964
25965 if (TARGET_IWMMXT)
25966 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25967 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25968 {
25969 rtx_insn *insn;
25970 rtx addr = gen_rtx_MEM (V2SImode,
25971 gen_rtx_POST_INC (SImode,
25972 stack_pointer_rtx));
25973 set_mem_alias_set (addr, get_frame_alias_set ());
25974 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25975 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25976 gen_rtx_REG (V2SImode, i),
25977 NULL_RTX);
25978 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25979 stack_pointer_rtx, stack_pointer_rtx);
25980 }
25981
25982 if (saved_regs_mask)
25983 {
25984 rtx insn;
25985 bool return_in_pc = false;
25986
25987 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25988 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25989 && !IS_CMSE_ENTRY (func_type)
25990 && !IS_STACKALIGN (func_type)
25991 && really_return
25992 && crtl->args.pretend_args_size == 0
25993 && saved_regs_mask & (1 << LR_REGNUM)
25994 && !crtl->calls_eh_return)
25995 {
25996 saved_regs_mask &= ~(1 << LR_REGNUM);
25997 saved_regs_mask |= (1 << PC_REGNUM);
25998 return_in_pc = true;
25999 }
26000
26001 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
26002 {
26003 for (i = 0; i <= LAST_ARM_REGNUM; i++)
26004 if (saved_regs_mask & (1 << i))
26005 {
26006 rtx addr = gen_rtx_MEM (SImode,
26007 gen_rtx_POST_INC (SImode,
26008 stack_pointer_rtx));
26009 set_mem_alias_set (addr, get_frame_alias_set ());
26010
26011 if (i == PC_REGNUM)
26012 {
26013 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
26014 XVECEXP (insn, 0, 0) = ret_rtx;
26015 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
26016 addr);
26017 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
26018 insn = emit_jump_insn (insn);
26019 }
26020 else
26021 {
26022 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
26023 addr));
26024 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26025 gen_rtx_REG (SImode, i),
26026 NULL_RTX);
26027 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
26028 stack_pointer_rtx,
26029 stack_pointer_rtx);
26030 }
26031 }
26032 }
26033 else
26034 {
26035 if (TARGET_LDRD
26036 && current_tune->prefer_ldrd_strd
26037 && !optimize_function_for_size_p (cfun))
26038 {
26039 if (TARGET_THUMB2)
26040 thumb2_emit_ldrd_pop (saved_regs_mask);
26041 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
26042 arm_emit_ldrd_pop (saved_regs_mask);
26043 else
26044 arm_emit_multi_reg_pop (saved_regs_mask);
26045 }
26046 else
26047 arm_emit_multi_reg_pop (saved_regs_mask);
26048 }
26049
26050 if (return_in_pc)
26051 return;
26052 }
26053
26054 amount
26055 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
26056 if (amount)
26057 {
26058 int i, j;
26059 rtx dwarf = NULL_RTX;
26060 rtx_insn *tmp =
26061 emit_insn (gen_addsi3 (stack_pointer_rtx,
26062 stack_pointer_rtx,
26063 GEN_INT (amount)));
26064
26065 RTX_FRAME_RELATED_P (tmp) = 1;
26066
26067 if (cfun->machine->uses_anonymous_args)
26068 {
26069 /* Restore pretend args. Refer arm_expand_prologue on how to save
26070 pretend_args in stack. */
26071 int num_regs = crtl->args.pretend_args_size / 4;
26072 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
26073 for (j = 0, i = 0; j < num_regs; i++)
26074 if (saved_regs_mask & (1 << i))
26075 {
26076 rtx reg = gen_rtx_REG (SImode, i);
26077 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
26078 j++;
26079 }
26080 REG_NOTES (tmp) = dwarf;
26081 }
26082 arm_add_cfa_adjust_cfa_note (tmp, amount,
26083 stack_pointer_rtx, stack_pointer_rtx);
26084 }
26085
26086 /* Clear all caller-saved regs that are not used to return. */
26087 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26088 {
26089 /* CMSE_ENTRY always returns. */
26090 gcc_assert (really_return);
26091 cmse_nonsecure_entry_clear_before_return ();
26092 }
26093
26094 if (!really_return)
26095 return;
26096
26097 if (crtl->calls_eh_return)
26098 emit_insn (gen_addsi3 (stack_pointer_rtx,
26099 stack_pointer_rtx,
26100 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
26101
26102 if (IS_STACKALIGN (func_type))
26103 /* Restore the original stack pointer. Before prologue, the stack was
26104 realigned and the original stack pointer saved in r0. For details,
26105 see comment in arm_expand_prologue. */
26106 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
26107
26108 emit_jump_insn (simple_return_rtx);
26109 }
26110
26111 /* Implementation of insn prologue_thumb1_interwork. This is the first
26112 "instruction" of a function called in ARM mode. Swap to thumb mode. */
26113
26114 const char *
26115 thumb1_output_interwork (void)
26116 {
26117 const char * name;
26118 FILE *f = asm_out_file;
26119
26120 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
26121 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
26122 == SYMBOL_REF);
26123 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
26124
26125 /* Generate code sequence to switch us into Thumb mode. */
26126 /* The .code 32 directive has already been emitted by
26127 ASM_DECLARE_FUNCTION_NAME. */
26128 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
26129 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
26130
26131 /* Generate a label, so that the debugger will notice the
26132 change in instruction sets. This label is also used by
26133 the assembler to bypass the ARM code when this function
26134 is called from a Thumb encoded function elsewhere in the
26135 same file. Hence the definition of STUB_NAME here must
26136 agree with the definition in gas/config/tc-arm.c. */
26137
26138 #define STUB_NAME ".real_start_of"
26139
26140 fprintf (f, "\t.code\t16\n");
26141 #ifdef ARM_PE
26142 if (arm_dllexport_name_p (name))
26143 name = arm_strip_name_encoding (name);
26144 #endif
26145 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26146 fprintf (f, "\t.thumb_func\n");
26147 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26148
26149 return "";
26150 }
26151
26152 /* Handle the case of a double word load into a low register from
26153 a computed memory address. The computed address may involve a
26154 register which is overwritten by the load. */
26155 const char *
26156 thumb_load_double_from_address (rtx *operands)
26157 {
26158 rtx addr;
26159 rtx base;
26160 rtx offset;
26161 rtx arg1;
26162 rtx arg2;
26163
26164 gcc_assert (REG_P (operands[0]));
26165 gcc_assert (MEM_P (operands[1]));
26166
26167 /* Get the memory address. */
26168 addr = XEXP (operands[1], 0);
26169
26170 /* Work out how the memory address is computed. */
26171 switch (GET_CODE (addr))
26172 {
26173 case REG:
26174 operands[2] = adjust_address (operands[1], SImode, 4);
26175
26176 if (REGNO (operands[0]) == REGNO (addr))
26177 {
26178 output_asm_insn ("ldr\t%H0, %2", operands);
26179 output_asm_insn ("ldr\t%0, %1", operands);
26180 }
26181 else
26182 {
26183 output_asm_insn ("ldr\t%0, %1", operands);
26184 output_asm_insn ("ldr\t%H0, %2", operands);
26185 }
26186 break;
26187
26188 case CONST:
26189 /* Compute <address> + 4 for the high order load. */
26190 operands[2] = adjust_address (operands[1], SImode, 4);
26191
26192 output_asm_insn ("ldr\t%0, %1", operands);
26193 output_asm_insn ("ldr\t%H0, %2", operands);
26194 break;
26195
26196 case PLUS:
26197 arg1 = XEXP (addr, 0);
26198 arg2 = XEXP (addr, 1);
26199
26200 if (CONSTANT_P (arg1))
26201 base = arg2, offset = arg1;
26202 else
26203 base = arg1, offset = arg2;
26204
26205 gcc_assert (REG_P (base));
26206
26207 /* Catch the case of <address> = <reg> + <reg> */
26208 if (REG_P (offset))
26209 {
26210 int reg_offset = REGNO (offset);
26211 int reg_base = REGNO (base);
26212 int reg_dest = REGNO (operands[0]);
26213
26214 /* Add the base and offset registers together into the
26215 higher destination register. */
26216 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26217 reg_dest + 1, reg_base, reg_offset);
26218
26219 /* Load the lower destination register from the address in
26220 the higher destination register. */
26221 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26222 reg_dest, reg_dest + 1);
26223
26224 /* Load the higher destination register from its own address
26225 plus 4. */
26226 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26227 reg_dest + 1, reg_dest + 1);
26228 }
26229 else
26230 {
26231 /* Compute <address> + 4 for the high order load. */
26232 operands[2] = adjust_address (operands[1], SImode, 4);
26233
26234 /* If the computed address is held in the low order register
26235 then load the high order register first, otherwise always
26236 load the low order register first. */
26237 if (REGNO (operands[0]) == REGNO (base))
26238 {
26239 output_asm_insn ("ldr\t%H0, %2", operands);
26240 output_asm_insn ("ldr\t%0, %1", operands);
26241 }
26242 else
26243 {
26244 output_asm_insn ("ldr\t%0, %1", operands);
26245 output_asm_insn ("ldr\t%H0, %2", operands);
26246 }
26247 }
26248 break;
26249
26250 case LABEL_REF:
26251 /* With no registers to worry about we can just load the value
26252 directly. */
26253 operands[2] = adjust_address (operands[1], SImode, 4);
26254
26255 output_asm_insn ("ldr\t%H0, %2", operands);
26256 output_asm_insn ("ldr\t%0, %1", operands);
26257 break;
26258
26259 default:
26260 gcc_unreachable ();
26261 }
26262
26263 return "";
26264 }
26265
26266 const char *
26267 thumb_output_move_mem_multiple (int n, rtx *operands)
26268 {
26269 switch (n)
26270 {
26271 case 2:
26272 if (REGNO (operands[4]) > REGNO (operands[5]))
26273 std::swap (operands[4], operands[5]);
26274
26275 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26276 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26277 break;
26278
26279 case 3:
26280 if (REGNO (operands[4]) > REGNO (operands[5]))
26281 std::swap (operands[4], operands[5]);
26282 if (REGNO (operands[5]) > REGNO (operands[6]))
26283 std::swap (operands[5], operands[6]);
26284 if (REGNO (operands[4]) > REGNO (operands[5]))
26285 std::swap (operands[4], operands[5]);
26286
26287 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26288 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26289 break;
26290
26291 default:
26292 gcc_unreachable ();
26293 }
26294
26295 return "";
26296 }
26297
26298 /* Output a call-via instruction for thumb state. */
26299 const char *
26300 thumb_call_via_reg (rtx reg)
26301 {
26302 int regno = REGNO (reg);
26303 rtx *labelp;
26304
26305 gcc_assert (regno < LR_REGNUM);
26306
26307 /* If we are in the normal text section we can use a single instance
26308 per compilation unit. If we are doing function sections, then we need
26309 an entry per section, since we can't rely on reachability. */
26310 if (in_section == text_section)
26311 {
26312 thumb_call_reg_needed = 1;
26313
26314 if (thumb_call_via_label[regno] == NULL)
26315 thumb_call_via_label[regno] = gen_label_rtx ();
26316 labelp = thumb_call_via_label + regno;
26317 }
26318 else
26319 {
26320 if (cfun->machine->call_via[regno] == NULL)
26321 cfun->machine->call_via[regno] = gen_label_rtx ();
26322 labelp = cfun->machine->call_via + regno;
26323 }
26324
26325 output_asm_insn ("bl\t%a0", labelp);
26326 return "";
26327 }
26328
26329 /* Routines for generating rtl. */
26330 void
26331 thumb_expand_cpymemqi (rtx *operands)
26332 {
26333 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26334 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26335 HOST_WIDE_INT len = INTVAL (operands[2]);
26336 HOST_WIDE_INT offset = 0;
26337
26338 while (len >= 12)
26339 {
26340 emit_insn (gen_cpymem12b (out, in, out, in));
26341 len -= 12;
26342 }
26343
26344 if (len >= 8)
26345 {
26346 emit_insn (gen_cpymem8b (out, in, out, in));
26347 len -= 8;
26348 }
26349
26350 if (len >= 4)
26351 {
26352 rtx reg = gen_reg_rtx (SImode);
26353 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26354 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26355 len -= 4;
26356 offset += 4;
26357 }
26358
26359 if (len >= 2)
26360 {
26361 rtx reg = gen_reg_rtx (HImode);
26362 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26363 plus_constant (Pmode, in,
26364 offset))));
26365 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26366 offset)),
26367 reg));
26368 len -= 2;
26369 offset += 2;
26370 }
26371
26372 if (len)
26373 {
26374 rtx reg = gen_reg_rtx (QImode);
26375 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26376 plus_constant (Pmode, in,
26377 offset))));
26378 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26379 offset)),
26380 reg));
26381 }
26382 }
26383
26384 void
26385 thumb_reload_out_hi (rtx *operands)
26386 {
26387 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26388 }
26389
26390 /* Return the length of a function name prefix
26391 that starts with the character 'c'. */
26392 static int
26393 arm_get_strip_length (int c)
26394 {
26395 switch (c)
26396 {
26397 ARM_NAME_ENCODING_LENGTHS
26398 default: return 0;
26399 }
26400 }
26401
26402 /* Return a pointer to a function's name with any
26403 and all prefix encodings stripped from it. */
26404 const char *
26405 arm_strip_name_encoding (const char *name)
26406 {
26407 int skip;
26408
26409 while ((skip = arm_get_strip_length (* name)))
26410 name += skip;
26411
26412 return name;
26413 }
26414
26415 /* If there is a '*' anywhere in the name's prefix, then
26416 emit the stripped name verbatim, otherwise prepend an
26417 underscore if leading underscores are being used. */
26418 void
26419 arm_asm_output_labelref (FILE *stream, const char *name)
26420 {
26421 int skip;
26422 int verbatim = 0;
26423
26424 while ((skip = arm_get_strip_length (* name)))
26425 {
26426 verbatim |= (*name == '*');
26427 name += skip;
26428 }
26429
26430 if (verbatim)
26431 fputs (name, stream);
26432 else
26433 asm_fprintf (stream, "%U%s", name);
26434 }
26435
26436 /* This function is used to emit an EABI tag and its associated value.
26437 We emit the numerical value of the tag in case the assembler does not
26438 support textual tags. (Eg gas prior to 2.20). If requested we include
26439 the tag name in a comment so that anyone reading the assembler output
26440 will know which tag is being set.
26441
26442 This function is not static because arm-c.c needs it too. */
26443
26444 void
26445 arm_emit_eabi_attribute (const char *name, int num, int val)
26446 {
26447 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26448 if (flag_verbose_asm || flag_debug_asm)
26449 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26450 asm_fprintf (asm_out_file, "\n");
26451 }
26452
26453 /* This function is used to print CPU tuning information as comment
26454 in assembler file. Pointers are not printed for now. */
26455
26456 void
26457 arm_print_tune_info (void)
26458 {
26459 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26460 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26461 current_tune->constant_limit);
26462 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26463 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26464 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26465 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26466 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26467 "prefetch.l1_cache_size:\t%d\n",
26468 current_tune->prefetch.l1_cache_size);
26469 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26470 "prefetch.l1_cache_line_size:\t%d\n",
26471 current_tune->prefetch.l1_cache_line_size);
26472 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26473 "prefer_constant_pool:\t%d\n",
26474 (int) current_tune->prefer_constant_pool);
26475 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26476 "branch_cost:\t(s:speed, p:predictable)\n");
26477 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26478 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26479 current_tune->branch_cost (false, false));
26480 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26481 current_tune->branch_cost (false, true));
26482 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26483 current_tune->branch_cost (true, false));
26484 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26485 current_tune->branch_cost (true, true));
26486 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26487 "prefer_ldrd_strd:\t%d\n",
26488 (int) current_tune->prefer_ldrd_strd);
26489 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26490 "logical_op_non_short_circuit:\t[%d,%d]\n",
26491 (int) current_tune->logical_op_non_short_circuit_thumb,
26492 (int) current_tune->logical_op_non_short_circuit_arm);
26493 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26494 "disparage_flag_setting_t16_encodings:\t%d\n",
26495 (int) current_tune->disparage_flag_setting_t16_encodings);
26496 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26497 "string_ops_prefer_neon:\t%d\n",
26498 (int) current_tune->string_ops_prefer_neon);
26499 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26500 "max_insns_inline_memset:\t%d\n",
26501 current_tune->max_insns_inline_memset);
26502 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26503 current_tune->fusible_ops);
26504 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26505 (int) current_tune->sched_autopref);
26506 }
26507
26508 /* Print .arch and .arch_extension directives corresponding to the
26509 current architecture configuration. */
26510 static void
26511 arm_print_asm_arch_directives ()
26512 {
26513 const arch_option *arch
26514 = arm_parse_arch_option_name (all_architectures, "-march",
26515 arm_active_target.arch_name);
26516 auto_sbitmap opt_bits (isa_num_bits);
26517
26518 gcc_assert (arch);
26519
26520 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26521 arm_last_printed_arch_string = arm_active_target.arch_name;
26522 if (!arch->common.extensions)
26523 return;
26524
26525 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26526 opt->name != NULL;
26527 opt++)
26528 {
26529 if (!opt->remove)
26530 {
26531 arm_initialize_isa (opt_bits, opt->isa_bits);
26532
26533 /* If every feature bit of this option is set in the target
26534 ISA specification, print out the option name. However,
26535 don't print anything if all the bits are part of the
26536 FPU specification. */
26537 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26538 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26539 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26540 }
26541 }
26542 }
26543
26544 static void
26545 arm_file_start (void)
26546 {
26547 int val;
26548
26549 if (TARGET_BPABI)
26550 {
26551 /* We don't have a specified CPU. Use the architecture to
26552 generate the tags.
26553
26554 Note: it might be better to do this unconditionally, then the
26555 assembler would not need to know about all new CPU names as
26556 they are added. */
26557 if (!arm_active_target.core_name)
26558 {
26559 /* armv7ve doesn't support any extensions. */
26560 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26561 {
26562 /* Keep backward compatability for assemblers
26563 which don't support armv7ve. */
26564 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26565 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26566 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26567 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26568 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26569 arm_last_printed_arch_string = "armv7ve";
26570 }
26571 else
26572 arm_print_asm_arch_directives ();
26573 }
26574 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26575 {
26576 asm_fprintf (asm_out_file, "\t.arch %s\n",
26577 arm_active_target.core_name + 8);
26578 arm_last_printed_arch_string = arm_active_target.core_name + 8;
26579 }
26580 else
26581 {
26582 const char* truncated_name
26583 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26584 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26585 }
26586
26587 if (print_tune_info)
26588 arm_print_tune_info ();
26589
26590 if (! TARGET_SOFT_FLOAT)
26591 {
26592 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26593 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26594
26595 if (TARGET_HARD_FLOAT_ABI)
26596 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26597 }
26598
26599 /* Some of these attributes only apply when the corresponding features
26600 are used. However we don't have any easy way of figuring this out.
26601 Conservatively record the setting that would have been used. */
26602
26603 if (flag_rounding_math)
26604 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26605
26606 if (!flag_unsafe_math_optimizations)
26607 {
26608 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26609 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26610 }
26611 if (flag_signaling_nans)
26612 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26613
26614 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26615 flag_finite_math_only ? 1 : 3);
26616
26617 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26618 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26619 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26620 flag_short_enums ? 1 : 2);
26621
26622 /* Tag_ABI_optimization_goals. */
26623 if (optimize_size)
26624 val = 4;
26625 else if (optimize >= 2)
26626 val = 2;
26627 else if (optimize)
26628 val = 1;
26629 else
26630 val = 6;
26631 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26632
26633 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26634 unaligned_access);
26635
26636 if (arm_fp16_format)
26637 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26638 (int) arm_fp16_format);
26639
26640 if (arm_lang_output_object_attributes_hook)
26641 arm_lang_output_object_attributes_hook();
26642 }
26643
26644 default_file_start ();
26645 }
26646
26647 static void
26648 arm_file_end (void)
26649 {
26650 int regno;
26651
26652 if (NEED_INDICATE_EXEC_STACK)
26653 /* Add .note.GNU-stack. */
26654 file_end_indicate_exec_stack ();
26655
26656 if (! thumb_call_reg_needed)
26657 return;
26658
26659 switch_to_section (text_section);
26660 asm_fprintf (asm_out_file, "\t.code 16\n");
26661 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26662
26663 for (regno = 0; regno < LR_REGNUM; regno++)
26664 {
26665 rtx label = thumb_call_via_label[regno];
26666
26667 if (label != 0)
26668 {
26669 targetm.asm_out.internal_label (asm_out_file, "L",
26670 CODE_LABEL_NUMBER (label));
26671 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26672 }
26673 }
26674 }
26675
26676 #ifndef ARM_PE
26677 /* Symbols in the text segment can be accessed without indirecting via the
26678 constant pool; it may take an extra binary operation, but this is still
26679 faster than indirecting via memory. Don't do this when not optimizing,
26680 since we won't be calculating al of the offsets necessary to do this
26681 simplification. */
26682
26683 static void
26684 arm_encode_section_info (tree decl, rtx rtl, int first)
26685 {
26686 if (optimize > 0 && TREE_CONSTANT (decl))
26687 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26688
26689 default_encode_section_info (decl, rtl, first);
26690 }
26691 #endif /* !ARM_PE */
26692
26693 static void
26694 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26695 {
26696 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26697 && !strcmp (prefix, "L"))
26698 {
26699 arm_ccfsm_state = 0;
26700 arm_target_insn = NULL;
26701 }
26702 default_internal_label (stream, prefix, labelno);
26703 }
26704
26705 /* Output code to add DELTA to the first argument, and then jump
26706 to FUNCTION. Used for C++ multiple inheritance. */
26707
26708 static void
26709 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26710 HOST_WIDE_INT, tree function)
26711 {
26712 static int thunk_label = 0;
26713 char label[256];
26714 char labelpc[256];
26715 int mi_delta = delta;
26716 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26717 int shift = 0;
26718 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26719 ? 1 : 0);
26720 if (mi_delta < 0)
26721 mi_delta = - mi_delta;
26722
26723 final_start_function (emit_barrier (), file, 1);
26724
26725 if (TARGET_THUMB1)
26726 {
26727 int labelno = thunk_label++;
26728 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26729 /* Thunks are entered in arm mode when available. */
26730 if (TARGET_THUMB1_ONLY)
26731 {
26732 /* push r3 so we can use it as a temporary. */
26733 /* TODO: Omit this save if r3 is not used. */
26734 fputs ("\tpush {r3}\n", file);
26735 fputs ("\tldr\tr3, ", file);
26736 }
26737 else
26738 {
26739 fputs ("\tldr\tr12, ", file);
26740 }
26741 assemble_name (file, label);
26742 fputc ('\n', file);
26743 if (flag_pic)
26744 {
26745 /* If we are generating PIC, the ldr instruction below loads
26746 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26747 the address of the add + 8, so we have:
26748
26749 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26750 = target + 1.
26751
26752 Note that we have "+ 1" because some versions of GNU ld
26753 don't set the low bit of the result for R_ARM_REL32
26754 relocations against thumb function symbols.
26755 On ARMv6M this is +4, not +8. */
26756 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26757 assemble_name (file, labelpc);
26758 fputs (":\n", file);
26759 if (TARGET_THUMB1_ONLY)
26760 {
26761 /* This is 2 insns after the start of the thunk, so we know it
26762 is 4-byte aligned. */
26763 fputs ("\tadd\tr3, pc, r3\n", file);
26764 fputs ("\tmov r12, r3\n", file);
26765 }
26766 else
26767 fputs ("\tadd\tr12, pc, r12\n", file);
26768 }
26769 else if (TARGET_THUMB1_ONLY)
26770 fputs ("\tmov r12, r3\n", file);
26771 }
26772 if (TARGET_THUMB1_ONLY)
26773 {
26774 if (mi_delta > 255)
26775 {
26776 fputs ("\tldr\tr3, ", file);
26777 assemble_name (file, label);
26778 fputs ("+4\n", file);
26779 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26780 mi_op, this_regno, this_regno);
26781 }
26782 else if (mi_delta != 0)
26783 {
26784 /* Thumb1 unified syntax requires s suffix in instruction name when
26785 one of the operands is immediate. */
26786 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26787 mi_op, this_regno, this_regno,
26788 mi_delta);
26789 }
26790 }
26791 else
26792 {
26793 /* TODO: Use movw/movt for large constants when available. */
26794 while (mi_delta != 0)
26795 {
26796 if ((mi_delta & (3 << shift)) == 0)
26797 shift += 2;
26798 else
26799 {
26800 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26801 mi_op, this_regno, this_regno,
26802 mi_delta & (0xff << shift));
26803 mi_delta &= ~(0xff << shift);
26804 shift += 8;
26805 }
26806 }
26807 }
26808 if (TARGET_THUMB1)
26809 {
26810 if (TARGET_THUMB1_ONLY)
26811 fputs ("\tpop\t{r3}\n", file);
26812
26813 fprintf (file, "\tbx\tr12\n");
26814 ASM_OUTPUT_ALIGN (file, 2);
26815 assemble_name (file, label);
26816 fputs (":\n", file);
26817 if (flag_pic)
26818 {
26819 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26820 rtx tem = XEXP (DECL_RTL (function), 0);
26821 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26822 pipeline offset is four rather than eight. Adjust the offset
26823 accordingly. */
26824 tem = plus_constant (GET_MODE (tem), tem,
26825 TARGET_THUMB1_ONLY ? -3 : -7);
26826 tem = gen_rtx_MINUS (GET_MODE (tem),
26827 tem,
26828 gen_rtx_SYMBOL_REF (Pmode,
26829 ggc_strdup (labelpc)));
26830 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26831 }
26832 else
26833 /* Output ".word .LTHUNKn". */
26834 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26835
26836 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26837 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26838 }
26839 else
26840 {
26841 fputs ("\tb\t", file);
26842 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26843 if (NEED_PLT_RELOC)
26844 fputs ("(PLT)", file);
26845 fputc ('\n', file);
26846 }
26847
26848 final_end_function ();
26849 }
26850
26851 /* MI thunk handling for TARGET_32BIT. */
26852
26853 static void
26854 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26855 HOST_WIDE_INT vcall_offset, tree function)
26856 {
26857 const bool long_call_p = arm_is_long_call_p (function);
26858
26859 /* On ARM, this_regno is R0 or R1 depending on
26860 whether the function returns an aggregate or not.
26861 */
26862 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26863 function)
26864 ? R1_REGNUM : R0_REGNUM);
26865
26866 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26867 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26868 reload_completed = 1;
26869 emit_note (NOTE_INSN_PROLOGUE_END);
26870
26871 /* Add DELTA to THIS_RTX. */
26872 if (delta != 0)
26873 arm_split_constant (PLUS, Pmode, NULL_RTX,
26874 delta, this_rtx, this_rtx, false);
26875
26876 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26877 if (vcall_offset != 0)
26878 {
26879 /* Load *THIS_RTX. */
26880 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26881 /* Compute *THIS_RTX + VCALL_OFFSET. */
26882 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26883 false);
26884 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26885 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26886 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26887 }
26888
26889 /* Generate a tail call to the target function. */
26890 if (!TREE_USED (function))
26891 {
26892 assemble_external (function);
26893 TREE_USED (function) = 1;
26894 }
26895 rtx funexp = XEXP (DECL_RTL (function), 0);
26896 if (long_call_p)
26897 {
26898 emit_move_insn (temp, funexp);
26899 funexp = temp;
26900 }
26901 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26902 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26903 SIBLING_CALL_P (insn) = 1;
26904 emit_barrier ();
26905
26906 /* Indirect calls require a bit of fixup in PIC mode. */
26907 if (long_call_p)
26908 {
26909 split_all_insns_noflow ();
26910 arm_reorg ();
26911 }
26912
26913 insn = get_insns ();
26914 shorten_branches (insn);
26915 final_start_function (insn, file, 1);
26916 final (insn, file, 1);
26917 final_end_function ();
26918
26919 /* Stop pretending this is a post-reload pass. */
26920 reload_completed = 0;
26921 }
26922
26923 /* Output code to add DELTA to the first argument, and then jump
26924 to FUNCTION. Used for C++ multiple inheritance. */
26925
26926 static void
26927 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26928 HOST_WIDE_INT vcall_offset, tree function)
26929 {
26930 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
26931
26932 assemble_start_function (thunk, fnname);
26933 if (TARGET_32BIT)
26934 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26935 else
26936 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26937 assemble_end_function (thunk, fnname);
26938 }
26939
26940 int
26941 arm_emit_vector_const (FILE *file, rtx x)
26942 {
26943 int i;
26944 const char * pattern;
26945
26946 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26947
26948 switch (GET_MODE (x))
26949 {
26950 case E_V2SImode: pattern = "%08x"; break;
26951 case E_V4HImode: pattern = "%04x"; break;
26952 case E_V8QImode: pattern = "%02x"; break;
26953 default: gcc_unreachable ();
26954 }
26955
26956 fprintf (file, "0x");
26957 for (i = CONST_VECTOR_NUNITS (x); i--;)
26958 {
26959 rtx element;
26960
26961 element = CONST_VECTOR_ELT (x, i);
26962 fprintf (file, pattern, INTVAL (element));
26963 }
26964
26965 return 1;
26966 }
26967
26968 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26969 HFmode constant pool entries are actually loaded with ldr. */
26970 void
26971 arm_emit_fp16_const (rtx c)
26972 {
26973 long bits;
26974
26975 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26976 if (WORDS_BIG_ENDIAN)
26977 assemble_zeros (2);
26978 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26979 if (!WORDS_BIG_ENDIAN)
26980 assemble_zeros (2);
26981 }
26982
26983 const char *
26984 arm_output_load_gr (rtx *operands)
26985 {
26986 rtx reg;
26987 rtx offset;
26988 rtx wcgr;
26989 rtx sum;
26990
26991 if (!MEM_P (operands [1])
26992 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26993 || !REG_P (reg = XEXP (sum, 0))
26994 || !CONST_INT_P (offset = XEXP (sum, 1))
26995 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26996 return "wldrw%?\t%0, %1";
26997
26998 /* Fix up an out-of-range load of a GR register. */
26999 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
27000 wcgr = operands[0];
27001 operands[0] = reg;
27002 output_asm_insn ("ldr%?\t%0, %1", operands);
27003
27004 operands[0] = wcgr;
27005 operands[1] = reg;
27006 output_asm_insn ("tmcr%?\t%0, %1", operands);
27007 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
27008
27009 return "";
27010 }
27011
27012 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
27013
27014 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
27015 named arg and all anonymous args onto the stack.
27016 XXX I know the prologue shouldn't be pushing registers, but it is faster
27017 that way. */
27018
27019 static void
27020 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
27021 const function_arg_info &arg,
27022 int *pretend_size,
27023 int second_time ATTRIBUTE_UNUSED)
27024 {
27025 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
27026 int nregs;
27027
27028 cfun->machine->uses_anonymous_args = 1;
27029 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
27030 {
27031 nregs = pcum->aapcs_ncrn;
27032 if (nregs & 1)
27033 {
27034 int res = arm_needs_doubleword_align (arg.mode, arg.type);
27035 if (res < 0 && warn_psabi)
27036 inform (input_location, "parameter passing for argument of "
27037 "type %qT changed in GCC 7.1", arg.type);
27038 else if (res > 0)
27039 {
27040 nregs++;
27041 if (res > 1 && warn_psabi)
27042 inform (input_location,
27043 "parameter passing for argument of type "
27044 "%qT changed in GCC 9.1", arg.type);
27045 }
27046 }
27047 }
27048 else
27049 nregs = pcum->nregs;
27050
27051 if (nregs < NUM_ARG_REGS)
27052 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
27053 }
27054
27055 /* We can't rely on the caller doing the proper promotion when
27056 using APCS or ATPCS. */
27057
27058 static bool
27059 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
27060 {
27061 return !TARGET_AAPCS_BASED;
27062 }
27063
27064 static machine_mode
27065 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
27066 machine_mode mode,
27067 int *punsignedp ATTRIBUTE_UNUSED,
27068 const_tree fntype ATTRIBUTE_UNUSED,
27069 int for_return ATTRIBUTE_UNUSED)
27070 {
27071 if (GET_MODE_CLASS (mode) == MODE_INT
27072 && GET_MODE_SIZE (mode) < 4)
27073 return SImode;
27074
27075 return mode;
27076 }
27077
27078
27079 static bool
27080 arm_default_short_enums (void)
27081 {
27082 return ARM_DEFAULT_SHORT_ENUMS;
27083 }
27084
27085
27086 /* AAPCS requires that anonymous bitfields affect structure alignment. */
27087
27088 static bool
27089 arm_align_anon_bitfield (void)
27090 {
27091 return TARGET_AAPCS_BASED;
27092 }
27093
27094
27095 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
27096
27097 static tree
27098 arm_cxx_guard_type (void)
27099 {
27100 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
27101 }
27102
27103
27104 /* The EABI says test the least significant bit of a guard variable. */
27105
27106 static bool
27107 arm_cxx_guard_mask_bit (void)
27108 {
27109 return TARGET_AAPCS_BASED;
27110 }
27111
27112
27113 /* The EABI specifies that all array cookies are 8 bytes long. */
27114
27115 static tree
27116 arm_get_cookie_size (tree type)
27117 {
27118 tree size;
27119
27120 if (!TARGET_AAPCS_BASED)
27121 return default_cxx_get_cookie_size (type);
27122
27123 size = build_int_cst (sizetype, 8);
27124 return size;
27125 }
27126
27127
27128 /* The EABI says that array cookies should also contain the element size. */
27129
27130 static bool
27131 arm_cookie_has_size (void)
27132 {
27133 return TARGET_AAPCS_BASED;
27134 }
27135
27136
27137 /* The EABI says constructors and destructors should return a pointer to
27138 the object constructed/destroyed. */
27139
27140 static bool
27141 arm_cxx_cdtor_returns_this (void)
27142 {
27143 return TARGET_AAPCS_BASED;
27144 }
27145
27146 /* The EABI says that an inline function may never be the key
27147 method. */
27148
27149 static bool
27150 arm_cxx_key_method_may_be_inline (void)
27151 {
27152 return !TARGET_AAPCS_BASED;
27153 }
27154
27155 static void
27156 arm_cxx_determine_class_data_visibility (tree decl)
27157 {
27158 if (!TARGET_AAPCS_BASED
27159 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27160 return;
27161
27162 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27163 is exported. However, on systems without dynamic vague linkage,
27164 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27165 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27166 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27167 else
27168 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27169 DECL_VISIBILITY_SPECIFIED (decl) = 1;
27170 }
27171
27172 static bool
27173 arm_cxx_class_data_always_comdat (void)
27174 {
27175 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27176 vague linkage if the class has no key function. */
27177 return !TARGET_AAPCS_BASED;
27178 }
27179
27180
27181 /* The EABI says __aeabi_atexit should be used to register static
27182 destructors. */
27183
27184 static bool
27185 arm_cxx_use_aeabi_atexit (void)
27186 {
27187 return TARGET_AAPCS_BASED;
27188 }
27189
27190
27191 void
27192 arm_set_return_address (rtx source, rtx scratch)
27193 {
27194 arm_stack_offsets *offsets;
27195 HOST_WIDE_INT delta;
27196 rtx addr, mem;
27197 unsigned long saved_regs;
27198
27199 offsets = arm_get_frame_offsets ();
27200 saved_regs = offsets->saved_regs_mask;
27201
27202 if ((saved_regs & (1 << LR_REGNUM)) == 0)
27203 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27204 else
27205 {
27206 if (frame_pointer_needed)
27207 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27208 else
27209 {
27210 /* LR will be the first saved register. */
27211 delta = offsets->outgoing_args - (offsets->frame + 4);
27212
27213
27214 if (delta >= 4096)
27215 {
27216 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27217 GEN_INT (delta & ~4095)));
27218 addr = scratch;
27219 delta &= 4095;
27220 }
27221 else
27222 addr = stack_pointer_rtx;
27223
27224 addr = plus_constant (Pmode, addr, delta);
27225 }
27226
27227 /* The store needs to be marked to prevent DSE from deleting
27228 it as dead if it is based on fp. */
27229 mem = gen_frame_mem (Pmode, addr);
27230 MEM_VOLATILE_P (mem) = true;
27231 emit_move_insn (mem, source);
27232 }
27233 }
27234
27235
27236 void
27237 thumb_set_return_address (rtx source, rtx scratch)
27238 {
27239 arm_stack_offsets *offsets;
27240 HOST_WIDE_INT delta;
27241 HOST_WIDE_INT limit;
27242 int reg;
27243 rtx addr, mem;
27244 unsigned long mask;
27245
27246 emit_use (source);
27247
27248 offsets = arm_get_frame_offsets ();
27249 mask = offsets->saved_regs_mask;
27250 if (mask & (1 << LR_REGNUM))
27251 {
27252 limit = 1024;
27253 /* Find the saved regs. */
27254 if (frame_pointer_needed)
27255 {
27256 delta = offsets->soft_frame - offsets->saved_args;
27257 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27258 if (TARGET_THUMB1)
27259 limit = 128;
27260 }
27261 else
27262 {
27263 delta = offsets->outgoing_args - offsets->saved_args;
27264 reg = SP_REGNUM;
27265 }
27266 /* Allow for the stack frame. */
27267 if (TARGET_THUMB1 && TARGET_BACKTRACE)
27268 delta -= 16;
27269 /* The link register is always the first saved register. */
27270 delta -= 4;
27271
27272 /* Construct the address. */
27273 addr = gen_rtx_REG (SImode, reg);
27274 if (delta > limit)
27275 {
27276 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27277 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27278 addr = scratch;
27279 }
27280 else
27281 addr = plus_constant (Pmode, addr, delta);
27282
27283 /* The store needs to be marked to prevent DSE from deleting
27284 it as dead if it is based on fp. */
27285 mem = gen_frame_mem (Pmode, addr);
27286 MEM_VOLATILE_P (mem) = true;
27287 emit_move_insn (mem, source);
27288 }
27289 else
27290 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27291 }
27292
27293 /* Implements target hook vector_mode_supported_p. */
27294 bool
27295 arm_vector_mode_supported_p (machine_mode mode)
27296 {
27297 /* Neon also supports V2SImode, etc. listed in the clause below. */
27298 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27299 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27300 || mode == V2DImode || mode == V8HFmode))
27301 return true;
27302
27303 if ((TARGET_NEON || TARGET_IWMMXT)
27304 && ((mode == V2SImode)
27305 || (mode == V4HImode)
27306 || (mode == V8QImode)))
27307 return true;
27308
27309 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27310 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27311 || mode == V2HAmode))
27312 return true;
27313
27314 return false;
27315 }
27316
27317 /* Implements target hook array_mode_supported_p. */
27318
27319 static bool
27320 arm_array_mode_supported_p (machine_mode mode,
27321 unsigned HOST_WIDE_INT nelems)
27322 {
27323 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
27324 for now, as the lane-swapping logic needs to be extended in the expanders.
27325 See PR target/82518. */
27326 if (TARGET_NEON && !BYTES_BIG_ENDIAN
27327 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27328 && (nelems >= 2 && nelems <= 4))
27329 return true;
27330
27331 return false;
27332 }
27333
27334 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27335 registers when autovectorizing for Neon, at least until multiple vector
27336 widths are supported properly by the middle-end. */
27337
27338 static machine_mode
27339 arm_preferred_simd_mode (scalar_mode mode)
27340 {
27341 if (TARGET_NEON)
27342 switch (mode)
27343 {
27344 case E_SFmode:
27345 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27346 case E_SImode:
27347 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27348 case E_HImode:
27349 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27350 case E_QImode:
27351 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27352 case E_DImode:
27353 if (!TARGET_NEON_VECTORIZE_DOUBLE)
27354 return V2DImode;
27355 break;
27356
27357 default:;
27358 }
27359
27360 if (TARGET_REALLY_IWMMXT)
27361 switch (mode)
27362 {
27363 case E_SImode:
27364 return V2SImode;
27365 case E_HImode:
27366 return V4HImode;
27367 case E_QImode:
27368 return V8QImode;
27369
27370 default:;
27371 }
27372
27373 return word_mode;
27374 }
27375
27376 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27377
27378 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27379 using r0-r4 for function arguments, r7 for the stack frame and don't have
27380 enough left over to do doubleword arithmetic. For Thumb-2 all the
27381 potentially problematic instructions accept high registers so this is not
27382 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27383 that require many low registers. */
27384 static bool
27385 arm_class_likely_spilled_p (reg_class_t rclass)
27386 {
27387 if ((TARGET_THUMB1 && rclass == LO_REGS)
27388 || rclass == CC_REG)
27389 return true;
27390
27391 return false;
27392 }
27393
27394 /* Implements target hook small_register_classes_for_mode_p. */
27395 bool
27396 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27397 {
27398 return TARGET_THUMB1;
27399 }
27400
27401 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27402 ARM insns and therefore guarantee that the shift count is modulo 256.
27403 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27404 guarantee no particular behavior for out-of-range counts. */
27405
27406 static unsigned HOST_WIDE_INT
27407 arm_shift_truncation_mask (machine_mode mode)
27408 {
27409 return mode == SImode ? 255 : 0;
27410 }
27411
27412
27413 /* Map internal gcc register numbers to DWARF2 register numbers. */
27414
27415 unsigned int
27416 arm_dbx_register_number (unsigned int regno)
27417 {
27418 if (regno < 16)
27419 return regno;
27420
27421 if (IS_VFP_REGNUM (regno))
27422 {
27423 /* See comment in arm_dwarf_register_span. */
27424 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27425 return 64 + regno - FIRST_VFP_REGNUM;
27426 else
27427 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27428 }
27429
27430 if (IS_IWMMXT_GR_REGNUM (regno))
27431 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27432
27433 if (IS_IWMMXT_REGNUM (regno))
27434 return 112 + regno - FIRST_IWMMXT_REGNUM;
27435
27436 return DWARF_FRAME_REGISTERS;
27437 }
27438
27439 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27440 GCC models tham as 64 32-bit registers, so we need to describe this to
27441 the DWARF generation code. Other registers can use the default. */
27442 static rtx
27443 arm_dwarf_register_span (rtx rtl)
27444 {
27445 machine_mode mode;
27446 unsigned regno;
27447 rtx parts[16];
27448 int nregs;
27449 int i;
27450
27451 regno = REGNO (rtl);
27452 if (!IS_VFP_REGNUM (regno))
27453 return NULL_RTX;
27454
27455 /* XXX FIXME: The EABI defines two VFP register ranges:
27456 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27457 256-287: D0-D31
27458 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27459 corresponding D register. Until GDB supports this, we shall use the
27460 legacy encodings. We also use these encodings for D0-D15 for
27461 compatibility with older debuggers. */
27462 mode = GET_MODE (rtl);
27463 if (GET_MODE_SIZE (mode) < 8)
27464 return NULL_RTX;
27465
27466 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27467 {
27468 nregs = GET_MODE_SIZE (mode) / 4;
27469 for (i = 0; i < nregs; i += 2)
27470 if (TARGET_BIG_END)
27471 {
27472 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27473 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27474 }
27475 else
27476 {
27477 parts[i] = gen_rtx_REG (SImode, regno + i);
27478 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27479 }
27480 }
27481 else
27482 {
27483 nregs = GET_MODE_SIZE (mode) / 8;
27484 for (i = 0; i < nregs; i++)
27485 parts[i] = gen_rtx_REG (DImode, regno + i);
27486 }
27487
27488 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27489 }
27490
27491 #if ARM_UNWIND_INFO
27492 /* Emit unwind directives for a store-multiple instruction or stack pointer
27493 push during alignment.
27494 These should only ever be generated by the function prologue code, so
27495 expect them to have a particular form.
27496 The store-multiple instruction sometimes pushes pc as the last register,
27497 although it should not be tracked into unwind information, or for -Os
27498 sometimes pushes some dummy registers before first register that needs
27499 to be tracked in unwind information; such dummy registers are there just
27500 to avoid separate stack adjustment, and will not be restored in the
27501 epilogue. */
27502
27503 static void
27504 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27505 {
27506 int i;
27507 HOST_WIDE_INT offset;
27508 HOST_WIDE_INT nregs;
27509 int reg_size;
27510 unsigned reg;
27511 unsigned lastreg;
27512 unsigned padfirst = 0, padlast = 0;
27513 rtx e;
27514
27515 e = XVECEXP (p, 0, 0);
27516 gcc_assert (GET_CODE (e) == SET);
27517
27518 /* First insn will adjust the stack pointer. */
27519 gcc_assert (GET_CODE (e) == SET
27520 && REG_P (SET_DEST (e))
27521 && REGNO (SET_DEST (e)) == SP_REGNUM
27522 && GET_CODE (SET_SRC (e)) == PLUS);
27523
27524 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27525 nregs = XVECLEN (p, 0) - 1;
27526 gcc_assert (nregs);
27527
27528 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27529 if (reg < 16)
27530 {
27531 /* For -Os dummy registers can be pushed at the beginning to
27532 avoid separate stack pointer adjustment. */
27533 e = XVECEXP (p, 0, 1);
27534 e = XEXP (SET_DEST (e), 0);
27535 if (GET_CODE (e) == PLUS)
27536 padfirst = INTVAL (XEXP (e, 1));
27537 gcc_assert (padfirst == 0 || optimize_size);
27538 /* The function prologue may also push pc, but not annotate it as it is
27539 never restored. We turn this into a stack pointer adjustment. */
27540 e = XVECEXP (p, 0, nregs);
27541 e = XEXP (SET_DEST (e), 0);
27542 if (GET_CODE (e) == PLUS)
27543 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27544 else
27545 padlast = offset - 4;
27546 gcc_assert (padlast == 0 || padlast == 4);
27547 if (padlast == 4)
27548 fprintf (asm_out_file, "\t.pad #4\n");
27549 reg_size = 4;
27550 fprintf (asm_out_file, "\t.save {");
27551 }
27552 else if (IS_VFP_REGNUM (reg))
27553 {
27554 reg_size = 8;
27555 fprintf (asm_out_file, "\t.vsave {");
27556 }
27557 else
27558 /* Unknown register type. */
27559 gcc_unreachable ();
27560
27561 /* If the stack increment doesn't match the size of the saved registers,
27562 something has gone horribly wrong. */
27563 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27564
27565 offset = padfirst;
27566 lastreg = 0;
27567 /* The remaining insns will describe the stores. */
27568 for (i = 1; i <= nregs; i++)
27569 {
27570 /* Expect (set (mem <addr>) (reg)).
27571 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27572 e = XVECEXP (p, 0, i);
27573 gcc_assert (GET_CODE (e) == SET
27574 && MEM_P (SET_DEST (e))
27575 && REG_P (SET_SRC (e)));
27576
27577 reg = REGNO (SET_SRC (e));
27578 gcc_assert (reg >= lastreg);
27579
27580 if (i != 1)
27581 fprintf (asm_out_file, ", ");
27582 /* We can't use %r for vfp because we need to use the
27583 double precision register names. */
27584 if (IS_VFP_REGNUM (reg))
27585 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27586 else
27587 asm_fprintf (asm_out_file, "%r", reg);
27588
27589 if (flag_checking)
27590 {
27591 /* Check that the addresses are consecutive. */
27592 e = XEXP (SET_DEST (e), 0);
27593 if (GET_CODE (e) == PLUS)
27594 gcc_assert (REG_P (XEXP (e, 0))
27595 && REGNO (XEXP (e, 0)) == SP_REGNUM
27596 && CONST_INT_P (XEXP (e, 1))
27597 && offset == INTVAL (XEXP (e, 1)));
27598 else
27599 gcc_assert (i == 1
27600 && REG_P (e)
27601 && REGNO (e) == SP_REGNUM);
27602 offset += reg_size;
27603 }
27604 }
27605 fprintf (asm_out_file, "}\n");
27606 if (padfirst)
27607 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27608 }
27609
27610 /* Emit unwind directives for a SET. */
27611
27612 static void
27613 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27614 {
27615 rtx e0;
27616 rtx e1;
27617 unsigned reg;
27618
27619 e0 = XEXP (p, 0);
27620 e1 = XEXP (p, 1);
27621 switch (GET_CODE (e0))
27622 {
27623 case MEM:
27624 /* Pushing a single register. */
27625 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27626 || !REG_P (XEXP (XEXP (e0, 0), 0))
27627 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27628 abort ();
27629
27630 asm_fprintf (asm_out_file, "\t.save ");
27631 if (IS_VFP_REGNUM (REGNO (e1)))
27632 asm_fprintf(asm_out_file, "{d%d}\n",
27633 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27634 else
27635 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27636 break;
27637
27638 case REG:
27639 if (REGNO (e0) == SP_REGNUM)
27640 {
27641 /* A stack increment. */
27642 if (GET_CODE (e1) != PLUS
27643 || !REG_P (XEXP (e1, 0))
27644 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27645 || !CONST_INT_P (XEXP (e1, 1)))
27646 abort ();
27647
27648 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27649 -INTVAL (XEXP (e1, 1)));
27650 }
27651 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27652 {
27653 HOST_WIDE_INT offset;
27654
27655 if (GET_CODE (e1) == PLUS)
27656 {
27657 if (!REG_P (XEXP (e1, 0))
27658 || !CONST_INT_P (XEXP (e1, 1)))
27659 abort ();
27660 reg = REGNO (XEXP (e1, 0));
27661 offset = INTVAL (XEXP (e1, 1));
27662 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27663 HARD_FRAME_POINTER_REGNUM, reg,
27664 offset);
27665 }
27666 else if (REG_P (e1))
27667 {
27668 reg = REGNO (e1);
27669 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27670 HARD_FRAME_POINTER_REGNUM, reg);
27671 }
27672 else
27673 abort ();
27674 }
27675 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27676 {
27677 /* Move from sp to reg. */
27678 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27679 }
27680 else if (GET_CODE (e1) == PLUS
27681 && REG_P (XEXP (e1, 0))
27682 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27683 && CONST_INT_P (XEXP (e1, 1)))
27684 {
27685 /* Set reg to offset from sp. */
27686 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27687 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27688 }
27689 else
27690 abort ();
27691 break;
27692
27693 default:
27694 abort ();
27695 }
27696 }
27697
27698
27699 /* Emit unwind directives for the given insn. */
27700
27701 static void
27702 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27703 {
27704 rtx note, pat;
27705 bool handled_one = false;
27706
27707 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27708 return;
27709
27710 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27711 && (TREE_NOTHROW (current_function_decl)
27712 || crtl->all_throwers_are_sibcalls))
27713 return;
27714
27715 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27716 return;
27717
27718 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27719 {
27720 switch (REG_NOTE_KIND (note))
27721 {
27722 case REG_FRAME_RELATED_EXPR:
27723 pat = XEXP (note, 0);
27724 goto found;
27725
27726 case REG_CFA_REGISTER:
27727 pat = XEXP (note, 0);
27728 if (pat == NULL)
27729 {
27730 pat = PATTERN (insn);
27731 if (GET_CODE (pat) == PARALLEL)
27732 pat = XVECEXP (pat, 0, 0);
27733 }
27734
27735 /* Only emitted for IS_STACKALIGN re-alignment. */
27736 {
27737 rtx dest, src;
27738 unsigned reg;
27739
27740 src = SET_SRC (pat);
27741 dest = SET_DEST (pat);
27742
27743 gcc_assert (src == stack_pointer_rtx);
27744 reg = REGNO (dest);
27745 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27746 reg + 0x90, reg);
27747 }
27748 handled_one = true;
27749 break;
27750
27751 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27752 to get correct dwarf information for shrink-wrap. We should not
27753 emit unwind information for it because these are used either for
27754 pretend arguments or notes to adjust sp and restore registers from
27755 stack. */
27756 case REG_CFA_DEF_CFA:
27757 case REG_CFA_ADJUST_CFA:
27758 case REG_CFA_RESTORE:
27759 return;
27760
27761 case REG_CFA_EXPRESSION:
27762 case REG_CFA_OFFSET:
27763 /* ??? Only handling here what we actually emit. */
27764 gcc_unreachable ();
27765
27766 default:
27767 break;
27768 }
27769 }
27770 if (handled_one)
27771 return;
27772 pat = PATTERN (insn);
27773 found:
27774
27775 switch (GET_CODE (pat))
27776 {
27777 case SET:
27778 arm_unwind_emit_set (asm_out_file, pat);
27779 break;
27780
27781 case SEQUENCE:
27782 /* Store multiple. */
27783 arm_unwind_emit_sequence (asm_out_file, pat);
27784 break;
27785
27786 default:
27787 abort();
27788 }
27789 }
27790
27791
27792 /* Output a reference from a function exception table to the type_info
27793 object X. The EABI specifies that the symbol should be relocated by
27794 an R_ARM_TARGET2 relocation. */
27795
27796 static bool
27797 arm_output_ttype (rtx x)
27798 {
27799 fputs ("\t.word\t", asm_out_file);
27800 output_addr_const (asm_out_file, x);
27801 /* Use special relocations for symbol references. */
27802 if (!CONST_INT_P (x))
27803 fputs ("(TARGET2)", asm_out_file);
27804 fputc ('\n', asm_out_file);
27805
27806 return TRUE;
27807 }
27808
27809 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27810
27811 static void
27812 arm_asm_emit_except_personality (rtx personality)
27813 {
27814 fputs ("\t.personality\t", asm_out_file);
27815 output_addr_const (asm_out_file, personality);
27816 fputc ('\n', asm_out_file);
27817 }
27818 #endif /* ARM_UNWIND_INFO */
27819
27820 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27821
27822 static void
27823 arm_asm_init_sections (void)
27824 {
27825 #if ARM_UNWIND_INFO
27826 exception_section = get_unnamed_section (0, output_section_asm_op,
27827 "\t.handlerdata");
27828 #endif /* ARM_UNWIND_INFO */
27829
27830 #ifdef OBJECT_FORMAT_ELF
27831 if (target_pure_code)
27832 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27833 #endif
27834 }
27835
27836 /* Output unwind directives for the start/end of a function. */
27837
27838 void
27839 arm_output_fn_unwind (FILE * f, bool prologue)
27840 {
27841 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27842 return;
27843
27844 if (prologue)
27845 fputs ("\t.fnstart\n", f);
27846 else
27847 {
27848 /* If this function will never be unwound, then mark it as such.
27849 The came condition is used in arm_unwind_emit to suppress
27850 the frame annotations. */
27851 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27852 && (TREE_NOTHROW (current_function_decl)
27853 || crtl->all_throwers_are_sibcalls))
27854 fputs("\t.cantunwind\n", f);
27855
27856 fputs ("\t.fnend\n", f);
27857 }
27858 }
27859
27860 static bool
27861 arm_emit_tls_decoration (FILE *fp, rtx x)
27862 {
27863 enum tls_reloc reloc;
27864 rtx val;
27865
27866 val = XVECEXP (x, 0, 0);
27867 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27868
27869 output_addr_const (fp, val);
27870
27871 switch (reloc)
27872 {
27873 case TLS_GD32:
27874 fputs ("(tlsgd)", fp);
27875 break;
27876 case TLS_LDM32:
27877 fputs ("(tlsldm)", fp);
27878 break;
27879 case TLS_LDO32:
27880 fputs ("(tlsldo)", fp);
27881 break;
27882 case TLS_IE32:
27883 fputs ("(gottpoff)", fp);
27884 break;
27885 case TLS_LE32:
27886 fputs ("(tpoff)", fp);
27887 break;
27888 case TLS_DESCSEQ:
27889 fputs ("(tlsdesc)", fp);
27890 break;
27891 default:
27892 gcc_unreachable ();
27893 }
27894
27895 switch (reloc)
27896 {
27897 case TLS_GD32:
27898 case TLS_LDM32:
27899 case TLS_IE32:
27900 case TLS_DESCSEQ:
27901 fputs (" + (. - ", fp);
27902 output_addr_const (fp, XVECEXP (x, 0, 2));
27903 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27904 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27905 output_addr_const (fp, XVECEXP (x, 0, 3));
27906 fputc (')', fp);
27907 break;
27908 default:
27909 break;
27910 }
27911
27912 return TRUE;
27913 }
27914
27915 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27916
27917 static void
27918 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27919 {
27920 gcc_assert (size == 4);
27921 fputs ("\t.word\t", file);
27922 output_addr_const (file, x);
27923 fputs ("(tlsldo)", file);
27924 }
27925
27926 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27927
27928 static bool
27929 arm_output_addr_const_extra (FILE *fp, rtx x)
27930 {
27931 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27932 return arm_emit_tls_decoration (fp, x);
27933 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27934 {
27935 char label[256];
27936 int labelno = INTVAL (XVECEXP (x, 0, 0));
27937
27938 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27939 assemble_name_raw (fp, label);
27940
27941 return TRUE;
27942 }
27943 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27944 {
27945 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27946 if (GOT_PCREL)
27947 fputs ("+.", fp);
27948 fputs ("-(", fp);
27949 output_addr_const (fp, XVECEXP (x, 0, 0));
27950 fputc (')', fp);
27951 return TRUE;
27952 }
27953 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27954 {
27955 output_addr_const (fp, XVECEXP (x, 0, 0));
27956 if (GOT_PCREL)
27957 fputs ("+.", fp);
27958 fputs ("-(", fp);
27959 output_addr_const (fp, XVECEXP (x, 0, 1));
27960 fputc (')', fp);
27961 return TRUE;
27962 }
27963 else if (GET_CODE (x) == CONST_VECTOR)
27964 return arm_emit_vector_const (fp, x);
27965
27966 return FALSE;
27967 }
27968
27969 /* Output assembly for a shift instruction.
27970 SET_FLAGS determines how the instruction modifies the condition codes.
27971 0 - Do not set condition codes.
27972 1 - Set condition codes.
27973 2 - Use smallest instruction. */
27974 const char *
27975 arm_output_shift(rtx * operands, int set_flags)
27976 {
27977 char pattern[100];
27978 static const char flag_chars[3] = {'?', '.', '!'};
27979 const char *shift;
27980 HOST_WIDE_INT val;
27981 char c;
27982
27983 c = flag_chars[set_flags];
27984 shift = shift_op(operands[3], &val);
27985 if (shift)
27986 {
27987 if (val != -1)
27988 operands[2] = GEN_INT(val);
27989 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27990 }
27991 else
27992 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27993
27994 output_asm_insn (pattern, operands);
27995 return "";
27996 }
27997
27998 /* Output assembly for a WMMX immediate shift instruction. */
27999 const char *
28000 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
28001 {
28002 int shift = INTVAL (operands[2]);
28003 char templ[50];
28004 machine_mode opmode = GET_MODE (operands[0]);
28005
28006 gcc_assert (shift >= 0);
28007
28008 /* If the shift value in the register versions is > 63 (for D qualifier),
28009 31 (for W qualifier) or 15 (for H qualifier). */
28010 if (((opmode == V4HImode) && (shift > 15))
28011 || ((opmode == V2SImode) && (shift > 31))
28012 || ((opmode == DImode) && (shift > 63)))
28013 {
28014 if (wror_or_wsra)
28015 {
28016 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28017 output_asm_insn (templ, operands);
28018 if (opmode == DImode)
28019 {
28020 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
28021 output_asm_insn (templ, operands);
28022 }
28023 }
28024 else
28025 {
28026 /* The destination register will contain all zeros. */
28027 sprintf (templ, "wzero\t%%0");
28028 output_asm_insn (templ, operands);
28029 }
28030 return "";
28031 }
28032
28033 if ((opmode == DImode) && (shift > 32))
28034 {
28035 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28036 output_asm_insn (templ, operands);
28037 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
28038 output_asm_insn (templ, operands);
28039 }
28040 else
28041 {
28042 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
28043 output_asm_insn (templ, operands);
28044 }
28045 return "";
28046 }
28047
28048 /* Output assembly for a WMMX tinsr instruction. */
28049 const char *
28050 arm_output_iwmmxt_tinsr (rtx *operands)
28051 {
28052 int mask = INTVAL (operands[3]);
28053 int i;
28054 char templ[50];
28055 int units = mode_nunits[GET_MODE (operands[0])];
28056 gcc_assert ((mask & (mask - 1)) == 0);
28057 for (i = 0; i < units; ++i)
28058 {
28059 if ((mask & 0x01) == 1)
28060 {
28061 break;
28062 }
28063 mask >>= 1;
28064 }
28065 gcc_assert (i < units);
28066 {
28067 switch (GET_MODE (operands[0]))
28068 {
28069 case E_V8QImode:
28070 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
28071 break;
28072 case E_V4HImode:
28073 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
28074 break;
28075 case E_V2SImode:
28076 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
28077 break;
28078 default:
28079 gcc_unreachable ();
28080 break;
28081 }
28082 output_asm_insn (templ, operands);
28083 }
28084 return "";
28085 }
28086
28087 /* Output a Thumb-1 casesi dispatch sequence. */
28088 const char *
28089 thumb1_output_casesi (rtx *operands)
28090 {
28091 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
28092
28093 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28094
28095 switch (GET_MODE(diff_vec))
28096 {
28097 case E_QImode:
28098 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28099 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
28100 case E_HImode:
28101 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28102 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
28103 case E_SImode:
28104 return "bl\t%___gnu_thumb1_case_si";
28105 default:
28106 gcc_unreachable ();
28107 }
28108 }
28109
28110 /* Output a Thumb-2 casesi instruction. */
28111 const char *
28112 thumb2_output_casesi (rtx *operands)
28113 {
28114 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
28115
28116 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28117
28118 output_asm_insn ("cmp\t%0, %1", operands);
28119 output_asm_insn ("bhi\t%l3", operands);
28120 switch (GET_MODE(diff_vec))
28121 {
28122 case E_QImode:
28123 return "tbb\t[%|pc, %0]";
28124 case E_HImode:
28125 return "tbh\t[%|pc, %0, lsl #1]";
28126 case E_SImode:
28127 if (flag_pic)
28128 {
28129 output_asm_insn ("adr\t%4, %l2", operands);
28130 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
28131 output_asm_insn ("add\t%4, %4, %5", operands);
28132 return "bx\t%4";
28133 }
28134 else
28135 {
28136 output_asm_insn ("adr\t%4, %l2", operands);
28137 return "ldr\t%|pc, [%4, %0, lsl #2]";
28138 }
28139 default:
28140 gcc_unreachable ();
28141 }
28142 }
28143
28144 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
28145 per-core tuning structs. */
28146 static int
28147 arm_issue_rate (void)
28148 {
28149 return current_tune->issue_rate;
28150 }
28151
28152 /* Return how many instructions should scheduler lookahead to choose the
28153 best one. */
28154 static int
28155 arm_first_cycle_multipass_dfa_lookahead (void)
28156 {
28157 int issue_rate = arm_issue_rate ();
28158
28159 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
28160 }
28161
28162 /* Enable modeling of L2 auto-prefetcher. */
28163 static int
28164 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28165 {
28166 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28167 }
28168
28169 const char *
28170 arm_mangle_type (const_tree type)
28171 {
28172 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28173 has to be managled as if it is in the "std" namespace. */
28174 if (TARGET_AAPCS_BASED
28175 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28176 return "St9__va_list";
28177
28178 /* Half-precision float. */
28179 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28180 return "Dh";
28181
28182 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28183 builtin type. */
28184 if (TYPE_NAME (type) != NULL)
28185 return arm_mangle_builtin_type (type);
28186
28187 /* Use the default mangling. */
28188 return NULL;
28189 }
28190
28191 /* Order of allocation of core registers for Thumb: this allocation is
28192 written over the corresponding initial entries of the array
28193 initialized with REG_ALLOC_ORDER. We allocate all low registers
28194 first. Saving and restoring a low register is usually cheaper than
28195 using a call-clobbered high register. */
28196
28197 static const int thumb_core_reg_alloc_order[] =
28198 {
28199 3, 2, 1, 0, 4, 5, 6, 7,
28200 12, 14, 8, 9, 10, 11
28201 };
28202
28203 /* Adjust register allocation order when compiling for Thumb. */
28204
28205 void
28206 arm_order_regs_for_local_alloc (void)
28207 {
28208 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28209 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28210 if (TARGET_THUMB)
28211 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28212 sizeof (thumb_core_reg_alloc_order));
28213 }
28214
28215 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28216
28217 bool
28218 arm_frame_pointer_required (void)
28219 {
28220 if (SUBTARGET_FRAME_POINTER_REQUIRED)
28221 return true;
28222
28223 /* If the function receives nonlocal gotos, it needs to save the frame
28224 pointer in the nonlocal_goto_save_area object. */
28225 if (cfun->has_nonlocal_label)
28226 return true;
28227
28228 /* The frame pointer is required for non-leaf APCS frames. */
28229 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28230 return true;
28231
28232 /* If we are probing the stack in the prologue, we will have a faulting
28233 instruction prior to the stack adjustment and this requires a frame
28234 pointer if we want to catch the exception using the EABI unwinder. */
28235 if (!IS_INTERRUPT (arm_current_func_type ())
28236 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28237 || flag_stack_clash_protection)
28238 && arm_except_unwind_info (&global_options) == UI_TARGET
28239 && cfun->can_throw_non_call_exceptions)
28240 {
28241 HOST_WIDE_INT size = get_frame_size ();
28242
28243 /* That's irrelevant if there is no stack adjustment. */
28244 if (size <= 0)
28245 return false;
28246
28247 /* That's relevant only if there is a stack probe. */
28248 if (crtl->is_leaf && !cfun->calls_alloca)
28249 {
28250 /* We don't have the final size of the frame so adjust. */
28251 size += 32 * UNITS_PER_WORD;
28252 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28253 return true;
28254 }
28255 else
28256 return true;
28257 }
28258
28259 return false;
28260 }
28261
28262 /* Only thumb1 can't support conditional execution, so return true if
28263 the target is not thumb1. */
28264 static bool
28265 arm_have_conditional_execution (void)
28266 {
28267 return !TARGET_THUMB1;
28268 }
28269
28270 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28271 static HOST_WIDE_INT
28272 arm_vector_alignment (const_tree type)
28273 {
28274 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28275
28276 if (TARGET_AAPCS_BASED)
28277 align = MIN (align, 64);
28278
28279 return align;
28280 }
28281
28282 static void
28283 arm_autovectorize_vector_sizes (vector_sizes *sizes, bool)
28284 {
28285 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28286 {
28287 sizes->safe_push (16);
28288 sizes->safe_push (8);
28289 }
28290 }
28291
28292 static bool
28293 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28294 {
28295 /* Vectors which aren't in packed structures will not be less aligned than
28296 the natural alignment of their element type, so this is safe. */
28297 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28298 return !is_packed;
28299
28300 return default_builtin_vector_alignment_reachable (type, is_packed);
28301 }
28302
28303 static bool
28304 arm_builtin_support_vector_misalignment (machine_mode mode,
28305 const_tree type, int misalignment,
28306 bool is_packed)
28307 {
28308 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28309 {
28310 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28311
28312 if (is_packed)
28313 return align == 1;
28314
28315 /* If the misalignment is unknown, we should be able to handle the access
28316 so long as it is not to a member of a packed data structure. */
28317 if (misalignment == -1)
28318 return true;
28319
28320 /* Return true if the misalignment is a multiple of the natural alignment
28321 of the vector's element type. This is probably always going to be
28322 true in practice, since we've already established that this isn't a
28323 packed access. */
28324 return ((misalignment % align) == 0);
28325 }
28326
28327 return default_builtin_support_vector_misalignment (mode, type, misalignment,
28328 is_packed);
28329 }
28330
28331 static void
28332 arm_conditional_register_usage (void)
28333 {
28334 int regno;
28335
28336 if (TARGET_THUMB1 && optimize_size)
28337 {
28338 /* When optimizing for size on Thumb-1, it's better not
28339 to use the HI regs, because of the overhead of
28340 stacking them. */
28341 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28342 fixed_regs[regno] = call_used_regs[regno] = 1;
28343 }
28344
28345 /* The link register can be clobbered by any branch insn,
28346 but we have no way to track that at present, so mark
28347 it as unavailable. */
28348 if (TARGET_THUMB1)
28349 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28350
28351 if (TARGET_32BIT && TARGET_HARD_FLOAT)
28352 {
28353 /* VFPv3 registers are disabled when earlier VFP
28354 versions are selected due to the definition of
28355 LAST_VFP_REGNUM. */
28356 for (regno = FIRST_VFP_REGNUM;
28357 regno <= LAST_VFP_REGNUM; ++ regno)
28358 {
28359 fixed_regs[regno] = 0;
28360 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28361 || regno >= FIRST_VFP_REGNUM + 32;
28362 }
28363 }
28364
28365 if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
28366 {
28367 regno = FIRST_IWMMXT_GR_REGNUM;
28368 /* The 2002/10/09 revision of the XScale ABI has wCG0
28369 and wCG1 as call-preserved registers. The 2002/11/21
28370 revision changed this so that all wCG registers are
28371 scratch registers. */
28372 for (regno = FIRST_IWMMXT_GR_REGNUM;
28373 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28374 fixed_regs[regno] = 0;
28375 /* The XScale ABI has wR0 - wR9 as scratch registers,
28376 the rest as call-preserved registers. */
28377 for (regno = FIRST_IWMMXT_REGNUM;
28378 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28379 {
28380 fixed_regs[regno] = 0;
28381 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28382 }
28383 }
28384
28385 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28386 {
28387 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28388 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28389 }
28390 else if (TARGET_APCS_STACK)
28391 {
28392 fixed_regs[10] = 1;
28393 call_used_regs[10] = 1;
28394 }
28395 /* -mcaller-super-interworking reserves r11 for calls to
28396 _interwork_r11_call_via_rN(). Making the register global
28397 is an easy way of ensuring that it remains valid for all
28398 calls. */
28399 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28400 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28401 {
28402 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28403 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28404 if (TARGET_CALLER_INTERWORKING)
28405 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28406 }
28407 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28408 }
28409
28410 static reg_class_t
28411 arm_preferred_rename_class (reg_class_t rclass)
28412 {
28413 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28414 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28415 and code size can be reduced. */
28416 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28417 return LO_REGS;
28418 else
28419 return NO_REGS;
28420 }
28421
28422 /* Compute the attribute "length" of insn "*push_multi".
28423 So this function MUST be kept in sync with that insn pattern. */
28424 int
28425 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28426 {
28427 int i, regno, hi_reg;
28428 int num_saves = XVECLEN (parallel_op, 0);
28429
28430 /* ARM mode. */
28431 if (TARGET_ARM)
28432 return 4;
28433 /* Thumb1 mode. */
28434 if (TARGET_THUMB1)
28435 return 2;
28436
28437 /* Thumb2 mode. */
28438 regno = REGNO (first_op);
28439 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28440 list is 8-bit. Normally this means all registers in the list must be
28441 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28442 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28443 with 16-bit encoding. */
28444 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28445 for (i = 1; i < num_saves && !hi_reg; i++)
28446 {
28447 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28448 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28449 }
28450
28451 if (!hi_reg)
28452 return 2;
28453 return 4;
28454 }
28455
28456 /* Compute the attribute "length" of insn. Currently, this function is used
28457 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28458 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28459 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28460 true if OPERANDS contains insn which explicit updates base register. */
28461
28462 int
28463 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28464 {
28465 /* ARM mode. */
28466 if (TARGET_ARM)
28467 return 4;
28468 /* Thumb1 mode. */
28469 if (TARGET_THUMB1)
28470 return 2;
28471
28472 rtx parallel_op = operands[0];
28473 /* Initialize to elements number of PARALLEL. */
28474 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28475 /* Initialize the value to base register. */
28476 unsigned regno = REGNO (operands[1]);
28477 /* Skip return and write back pattern.
28478 We only need register pop pattern for later analysis. */
28479 unsigned first_indx = 0;
28480 first_indx += return_pc ? 1 : 0;
28481 first_indx += write_back_p ? 1 : 0;
28482
28483 /* A pop operation can be done through LDM or POP. If the base register is SP
28484 and if it's with write back, then a LDM will be alias of POP. */
28485 bool pop_p = (regno == SP_REGNUM && write_back_p);
28486 bool ldm_p = !pop_p;
28487
28488 /* Check base register for LDM. */
28489 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28490 return 4;
28491
28492 /* Check each register in the list. */
28493 for (; indx >= first_indx; indx--)
28494 {
28495 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28496 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28497 comment in arm_attr_length_push_multi. */
28498 if (REGNO_REG_CLASS (regno) == HI_REGS
28499 && (regno != PC_REGNUM || ldm_p))
28500 return 4;
28501 }
28502
28503 return 2;
28504 }
28505
28506 /* Compute the number of instructions emitted by output_move_double. */
28507 int
28508 arm_count_output_move_double_insns (rtx *operands)
28509 {
28510 int count;
28511 rtx ops[2];
28512 /* output_move_double may modify the operands array, so call it
28513 here on a copy of the array. */
28514 ops[0] = operands[0];
28515 ops[1] = operands[1];
28516 output_move_double (ops, false, &count);
28517 return count;
28518 }
28519
28520 /* Same as above, but operands are a register/memory pair in SImode.
28521 Assumes operands has the base register in position 0 and memory in position
28522 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
28523 int
28524 arm_count_ldrdstrd_insns (rtx *operands, bool load)
28525 {
28526 int count;
28527 rtx ops[2];
28528 int regnum, memnum;
28529 if (load)
28530 regnum = 0, memnum = 1;
28531 else
28532 regnum = 1, memnum = 0;
28533 ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
28534 ops[memnum] = adjust_address (operands[2], DImode, 0);
28535 output_move_double (ops, false, &count);
28536 return count;
28537 }
28538
28539
28540 int
28541 vfp3_const_double_for_fract_bits (rtx operand)
28542 {
28543 REAL_VALUE_TYPE r0;
28544
28545 if (!CONST_DOUBLE_P (operand))
28546 return 0;
28547
28548 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28549 if (exact_real_inverse (DFmode, &r0)
28550 && !REAL_VALUE_NEGATIVE (r0))
28551 {
28552 if (exact_real_truncate (DFmode, &r0))
28553 {
28554 HOST_WIDE_INT value = real_to_integer (&r0);
28555 value = value & 0xffffffff;
28556 if ((value != 0) && ( (value & (value - 1)) == 0))
28557 {
28558 int ret = exact_log2 (value);
28559 gcc_assert (IN_RANGE (ret, 0, 31));
28560 return ret;
28561 }
28562 }
28563 }
28564 return 0;
28565 }
28566
28567 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28568 log2 is in [1, 32], return that log2. Otherwise return -1.
28569 This is used in the patterns for vcvt.s32.f32 floating-point to
28570 fixed-point conversions. */
28571
28572 int
28573 vfp3_const_double_for_bits (rtx x)
28574 {
28575 const REAL_VALUE_TYPE *r;
28576
28577 if (!CONST_DOUBLE_P (x))
28578 return -1;
28579
28580 r = CONST_DOUBLE_REAL_VALUE (x);
28581
28582 if (REAL_VALUE_NEGATIVE (*r)
28583 || REAL_VALUE_ISNAN (*r)
28584 || REAL_VALUE_ISINF (*r)
28585 || !real_isinteger (r, SFmode))
28586 return -1;
28587
28588 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28589
28590 /* The exact_log2 above will have returned -1 if this is
28591 not an exact log2. */
28592 if (!IN_RANGE (hwint, 1, 32))
28593 return -1;
28594
28595 return hwint;
28596 }
28597
28598 \f
28599 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28600
28601 static void
28602 arm_pre_atomic_barrier (enum memmodel model)
28603 {
28604 if (need_atomic_barrier_p (model, true))
28605 emit_insn (gen_memory_barrier ());
28606 }
28607
28608 static void
28609 arm_post_atomic_barrier (enum memmodel model)
28610 {
28611 if (need_atomic_barrier_p (model, false))
28612 emit_insn (gen_memory_barrier ());
28613 }
28614
28615 /* Emit the load-exclusive and store-exclusive instructions.
28616 Use acquire and release versions if necessary. */
28617
28618 static void
28619 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28620 {
28621 rtx (*gen) (rtx, rtx);
28622
28623 if (acq)
28624 {
28625 switch (mode)
28626 {
28627 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28628 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28629 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28630 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28631 default:
28632 gcc_unreachable ();
28633 }
28634 }
28635 else
28636 {
28637 switch (mode)
28638 {
28639 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28640 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28641 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28642 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28643 default:
28644 gcc_unreachable ();
28645 }
28646 }
28647
28648 emit_insn (gen (rval, mem));
28649 }
28650
28651 static void
28652 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28653 rtx mem, bool rel)
28654 {
28655 rtx (*gen) (rtx, rtx, rtx);
28656
28657 if (rel)
28658 {
28659 switch (mode)
28660 {
28661 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28662 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28663 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28664 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28665 default:
28666 gcc_unreachable ();
28667 }
28668 }
28669 else
28670 {
28671 switch (mode)
28672 {
28673 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28674 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28675 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28676 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28677 default:
28678 gcc_unreachable ();
28679 }
28680 }
28681
28682 emit_insn (gen (bval, rval, mem));
28683 }
28684
28685 /* Mark the previous jump instruction as unlikely. */
28686
28687 static void
28688 emit_unlikely_jump (rtx insn)
28689 {
28690 rtx_insn *jump = emit_jump_insn (insn);
28691 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28692 }
28693
28694 /* Expand a compare and swap pattern. */
28695
28696 void
28697 arm_expand_compare_and_swap (rtx operands[])
28698 {
28699 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28700 machine_mode mode, cmp_mode;
28701
28702 bval = operands[0];
28703 rval = operands[1];
28704 mem = operands[2];
28705 oldval = operands[3];
28706 newval = operands[4];
28707 is_weak = operands[5];
28708 mod_s = operands[6];
28709 mod_f = operands[7];
28710 mode = GET_MODE (mem);
28711
28712 /* Normally the succ memory model must be stronger than fail, but in the
28713 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28714 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28715
28716 if (TARGET_HAVE_LDACQ
28717 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28718 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28719 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28720
28721 switch (mode)
28722 {
28723 case E_QImode:
28724 case E_HImode:
28725 /* For narrow modes, we're going to perform the comparison in SImode,
28726 so do the zero-extension now. */
28727 rval = gen_reg_rtx (SImode);
28728 oldval = convert_modes (SImode, mode, oldval, true);
28729 /* FALLTHRU */
28730
28731 case E_SImode:
28732 /* Force the value into a register if needed. We waited until after
28733 the zero-extension above to do this properly. */
28734 if (!arm_add_operand (oldval, SImode))
28735 oldval = force_reg (SImode, oldval);
28736 break;
28737
28738 case E_DImode:
28739 if (!cmpdi_operand (oldval, mode))
28740 oldval = force_reg (mode, oldval);
28741 break;
28742
28743 default:
28744 gcc_unreachable ();
28745 }
28746
28747 if (TARGET_THUMB1)
28748 cmp_mode = E_SImode;
28749 else
28750 cmp_mode = CC_Zmode;
28751
28752 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28753 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
28754 oldval, newval, is_weak, mod_s, mod_f));
28755
28756 if (mode == QImode || mode == HImode)
28757 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28758
28759 /* In all cases, we arrange for success to be signaled by Z set.
28760 This arrangement allows for the boolean result to be used directly
28761 in a subsequent branch, post optimization. For Thumb-1 targets, the
28762 boolean negation of the result is also stored in bval because Thumb-1
28763 backend lacks dependency tracking for CC flag due to flag-setting not
28764 being represented at RTL level. */
28765 if (TARGET_THUMB1)
28766 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28767 else
28768 {
28769 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28770 emit_insn (gen_rtx_SET (bval, x));
28771 }
28772 }
28773
28774 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28775 another memory store between the load-exclusive and store-exclusive can
28776 reset the monitor from Exclusive to Open state. This means we must wait
28777 until after reload to split the pattern, lest we get a register spill in
28778 the middle of the atomic sequence. Success of the compare and swap is
28779 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28780 for Thumb-1 targets (ie. negation of the boolean value returned by
28781 atomic_compare_and_swapmode standard pattern in operand 0). */
28782
28783 void
28784 arm_split_compare_and_swap (rtx operands[])
28785 {
28786 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
28787 machine_mode mode;
28788 enum memmodel mod_s, mod_f;
28789 bool is_weak;
28790 rtx_code_label *label1, *label2;
28791 rtx x, cond;
28792
28793 rval = operands[1];
28794 mem = operands[2];
28795 oldval = operands[3];
28796 newval = operands[4];
28797 is_weak = (operands[5] != const0_rtx);
28798 mod_s_rtx = operands[6];
28799 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
28800 mod_f = memmodel_from_int (INTVAL (operands[7]));
28801 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28802 mode = GET_MODE (mem);
28803
28804 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28805
28806 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
28807 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
28808
28809 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28810 a full barrier is emitted after the store-release. */
28811 if (is_armv8_sync)
28812 use_acquire = false;
28813
28814 /* Checks whether a barrier is needed and emits one accordingly. */
28815 if (!(use_acquire || use_release))
28816 arm_pre_atomic_barrier (mod_s);
28817
28818 label1 = NULL;
28819 if (!is_weak)
28820 {
28821 label1 = gen_label_rtx ();
28822 emit_label (label1);
28823 }
28824 label2 = gen_label_rtx ();
28825
28826 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28827
28828 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28829 as required to communicate with arm_expand_compare_and_swap. */
28830 if (TARGET_32BIT)
28831 {
28832 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28833 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28834 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28835 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28836 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28837 }
28838 else
28839 {
28840 emit_move_insn (neg_bval, const1_rtx);
28841 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28842 if (thumb1_cmpneg_operand (oldval, SImode))
28843 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28844 label2, cond));
28845 else
28846 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28847 }
28848
28849 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28850
28851 /* Weak or strong, we want EQ to be true for success, so that we
28852 match the flags that we got from the compare above. */
28853 if (TARGET_32BIT)
28854 {
28855 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28856 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28857 emit_insn (gen_rtx_SET (cond, x));
28858 }
28859
28860 if (!is_weak)
28861 {
28862 /* Z is set to boolean value of !neg_bval, as required to communicate
28863 with arm_expand_compare_and_swap. */
28864 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28865 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28866 }
28867
28868 if (!is_mm_relaxed (mod_f))
28869 emit_label (label2);
28870
28871 /* Checks whether a barrier is needed and emits one accordingly. */
28872 if (is_armv8_sync
28873 || !(use_acquire || use_release))
28874 arm_post_atomic_barrier (mod_s);
28875
28876 if (is_mm_relaxed (mod_f))
28877 emit_label (label2);
28878 }
28879
28880 /* Split an atomic operation pattern. Operation is given by CODE and is one
28881 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28882 operation). Operation is performed on the content at MEM and on VALUE
28883 following the memory model MODEL_RTX. The content at MEM before and after
28884 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28885 success of the operation is returned in COND. Using a scratch register or
28886 an operand register for these determines what result is returned for that
28887 pattern. */
28888
28889 void
28890 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28891 rtx value, rtx model_rtx, rtx cond)
28892 {
28893 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28894 machine_mode mode = GET_MODE (mem);
28895 machine_mode wmode = (mode == DImode ? DImode : SImode);
28896 rtx_code_label *label;
28897 bool all_low_regs, bind_old_new;
28898 rtx x;
28899
28900 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28901
28902 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
28903 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
28904
28905 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28906 a full barrier is emitted after the store-release. */
28907 if (is_armv8_sync)
28908 use_acquire = false;
28909
28910 /* Checks whether a barrier is needed and emits one accordingly. */
28911 if (!(use_acquire || use_release))
28912 arm_pre_atomic_barrier (model);
28913
28914 label = gen_label_rtx ();
28915 emit_label (label);
28916
28917 if (new_out)
28918 new_out = gen_lowpart (wmode, new_out);
28919 if (old_out)
28920 old_out = gen_lowpart (wmode, old_out);
28921 else
28922 old_out = new_out;
28923 value = simplify_gen_subreg (wmode, value, mode, 0);
28924
28925 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28926
28927 /* Does the operation require destination and first operand to use the same
28928 register? This is decided by register constraints of relevant insn
28929 patterns in thumb1.md. */
28930 gcc_assert (!new_out || REG_P (new_out));
28931 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28932 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28933 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28934 bind_old_new =
28935 (TARGET_THUMB1
28936 && code != SET
28937 && code != MINUS
28938 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28939
28940 /* We want to return the old value while putting the result of the operation
28941 in the same register as the old value so copy the old value over to the
28942 destination register and use that register for the operation. */
28943 if (old_out && bind_old_new)
28944 {
28945 emit_move_insn (new_out, old_out);
28946 old_out = new_out;
28947 }
28948
28949 switch (code)
28950 {
28951 case SET:
28952 new_out = value;
28953 break;
28954
28955 case NOT:
28956 x = gen_rtx_AND (wmode, old_out, value);
28957 emit_insn (gen_rtx_SET (new_out, x));
28958 x = gen_rtx_NOT (wmode, new_out);
28959 emit_insn (gen_rtx_SET (new_out, x));
28960 break;
28961
28962 case MINUS:
28963 if (CONST_INT_P (value))
28964 {
28965 value = GEN_INT (-INTVAL (value));
28966 code = PLUS;
28967 }
28968 /* FALLTHRU */
28969
28970 case PLUS:
28971 if (mode == DImode)
28972 {
28973 /* DImode plus/minus need to clobber flags. */
28974 /* The adddi3 and subdi3 patterns are incorrectly written so that
28975 they require matching operands, even when we could easily support
28976 three operands. Thankfully, this can be fixed up post-splitting,
28977 as the individual add+adc patterns do accept three operands and
28978 post-reload cprop can make these moves go away. */
28979 emit_move_insn (new_out, old_out);
28980 if (code == PLUS)
28981 x = gen_adddi3 (new_out, new_out, value);
28982 else
28983 x = gen_subdi3 (new_out, new_out, value);
28984 emit_insn (x);
28985 break;
28986 }
28987 /* FALLTHRU */
28988
28989 default:
28990 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28991 emit_insn (gen_rtx_SET (new_out, x));
28992 break;
28993 }
28994
28995 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28996 use_release);
28997
28998 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28999 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
29000
29001 /* Checks whether a barrier is needed and emits one accordingly. */
29002 if (is_armv8_sync
29003 || !(use_acquire || use_release))
29004 arm_post_atomic_barrier (model);
29005 }
29006 \f
29007 #define MAX_VECT_LEN 16
29008
29009 struct expand_vec_perm_d
29010 {
29011 rtx target, op0, op1;
29012 vec_perm_indices perm;
29013 machine_mode vmode;
29014 bool one_vector_p;
29015 bool testing_p;
29016 };
29017
29018 /* Generate a variable permutation. */
29019
29020 static void
29021 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
29022 {
29023 machine_mode vmode = GET_MODE (target);
29024 bool one_vector_p = rtx_equal_p (op0, op1);
29025
29026 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
29027 gcc_checking_assert (GET_MODE (op0) == vmode);
29028 gcc_checking_assert (GET_MODE (op1) == vmode);
29029 gcc_checking_assert (GET_MODE (sel) == vmode);
29030 gcc_checking_assert (TARGET_NEON);
29031
29032 if (one_vector_p)
29033 {
29034 if (vmode == V8QImode)
29035 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
29036 else
29037 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
29038 }
29039 else
29040 {
29041 rtx pair;
29042
29043 if (vmode == V8QImode)
29044 {
29045 pair = gen_reg_rtx (V16QImode);
29046 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
29047 pair = gen_lowpart (TImode, pair);
29048 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
29049 }
29050 else
29051 {
29052 pair = gen_reg_rtx (OImode);
29053 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
29054 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
29055 }
29056 }
29057 }
29058
29059 void
29060 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
29061 {
29062 machine_mode vmode = GET_MODE (target);
29063 unsigned int nelt = GET_MODE_NUNITS (vmode);
29064 bool one_vector_p = rtx_equal_p (op0, op1);
29065 rtx mask;
29066
29067 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29068 numbering of elements for big-endian, we must reverse the order. */
29069 gcc_checking_assert (!BYTES_BIG_ENDIAN);
29070
29071 /* The VTBL instruction does not use a modulo index, so we must take care
29072 of that ourselves. */
29073 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
29074 mask = gen_const_vec_duplicate (vmode, mask);
29075 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
29076
29077 arm_expand_vec_perm_1 (target, op0, op1, sel);
29078 }
29079
29080 /* Map lane ordering between architectural lane order, and GCC lane order,
29081 taking into account ABI. See comment above output_move_neon for details. */
29082
29083 static int
29084 neon_endian_lane_map (machine_mode mode, int lane)
29085 {
29086 if (BYTES_BIG_ENDIAN)
29087 {
29088 int nelems = GET_MODE_NUNITS (mode);
29089 /* Reverse lane order. */
29090 lane = (nelems - 1 - lane);
29091 /* Reverse D register order, to match ABI. */
29092 if (GET_MODE_SIZE (mode) == 16)
29093 lane = lane ^ (nelems / 2);
29094 }
29095 return lane;
29096 }
29097
29098 /* Some permutations index into pairs of vectors, this is a helper function
29099 to map indexes into those pairs of vectors. */
29100
29101 static int
29102 neon_pair_endian_lane_map (machine_mode mode, int lane)
29103 {
29104 int nelem = GET_MODE_NUNITS (mode);
29105 if (BYTES_BIG_ENDIAN)
29106 lane =
29107 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
29108 return lane;
29109 }
29110
29111 /* Generate or test for an insn that supports a constant permutation. */
29112
29113 /* Recognize patterns for the VUZP insns. */
29114
29115 static bool
29116 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
29117 {
29118 unsigned int i, odd, mask, nelt = d->perm.length ();
29119 rtx out0, out1, in0, in1;
29120 int first_elem;
29121 int swap_nelt;
29122
29123 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29124 return false;
29125
29126 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
29127 big endian pattern on 64 bit vectors, so we correct for that. */
29128 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
29129 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
29130
29131 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
29132
29133 if (first_elem == neon_endian_lane_map (d->vmode, 0))
29134 odd = 0;
29135 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
29136 odd = 1;
29137 else
29138 return false;
29139 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29140
29141 for (i = 0; i < nelt; i++)
29142 {
29143 unsigned elt =
29144 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
29145 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
29146 return false;
29147 }
29148
29149 /* Success! */
29150 if (d->testing_p)
29151 return true;
29152
29153 in0 = d->op0;
29154 in1 = d->op1;
29155 if (swap_nelt != 0)
29156 std::swap (in0, in1);
29157
29158 out0 = d->target;
29159 out1 = gen_reg_rtx (d->vmode);
29160 if (odd)
29161 std::swap (out0, out1);
29162
29163 emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
29164 return true;
29165 }
29166
29167 /* Recognize patterns for the VZIP insns. */
29168
29169 static bool
29170 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29171 {
29172 unsigned int i, high, mask, nelt = d->perm.length ();
29173 rtx out0, out1, in0, in1;
29174 int first_elem;
29175 bool is_swapped;
29176
29177 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29178 return false;
29179
29180 is_swapped = BYTES_BIG_ENDIAN;
29181
29182 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29183
29184 high = nelt / 2;
29185 if (first_elem == neon_endian_lane_map (d->vmode, high))
29186 ;
29187 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29188 high = 0;
29189 else
29190 return false;
29191 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29192
29193 for (i = 0; i < nelt / 2; i++)
29194 {
29195 unsigned elt =
29196 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29197 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29198 != elt)
29199 return false;
29200 elt =
29201 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29202 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29203 != elt)
29204 return false;
29205 }
29206
29207 /* Success! */
29208 if (d->testing_p)
29209 return true;
29210
29211 in0 = d->op0;
29212 in1 = d->op1;
29213 if (is_swapped)
29214 std::swap (in0, in1);
29215
29216 out0 = d->target;
29217 out1 = gen_reg_rtx (d->vmode);
29218 if (high)
29219 std::swap (out0, out1);
29220
29221 emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
29222 return true;
29223 }
29224
29225 /* Recognize patterns for the VREV insns. */
29226 static bool
29227 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29228 {
29229 unsigned int i, j, diff, nelt = d->perm.length ();
29230 rtx (*gen) (machine_mode, rtx, rtx);
29231
29232 if (!d->one_vector_p)
29233 return false;
29234
29235 diff = d->perm[0];
29236 switch (diff)
29237 {
29238 case 7:
29239 switch (d->vmode)
29240 {
29241 case E_V16QImode:
29242 case E_V8QImode:
29243 gen = gen_neon_vrev64;
29244 break;
29245 default:
29246 return false;
29247 }
29248 break;
29249 case 3:
29250 switch (d->vmode)
29251 {
29252 case E_V16QImode:
29253 case E_V8QImode:
29254 gen = gen_neon_vrev32;
29255 break;
29256 case E_V8HImode:
29257 case E_V4HImode:
29258 case E_V8HFmode:
29259 case E_V4HFmode:
29260 gen = gen_neon_vrev64;
29261 break;
29262 default:
29263 return false;
29264 }
29265 break;
29266 case 1:
29267 switch (d->vmode)
29268 {
29269 case E_V16QImode:
29270 case E_V8QImode:
29271 gen = gen_neon_vrev16;
29272 break;
29273 case E_V8HImode:
29274 case E_V4HImode:
29275 gen = gen_neon_vrev32;
29276 break;
29277 case E_V4SImode:
29278 case E_V2SImode:
29279 case E_V4SFmode:
29280 case E_V2SFmode:
29281 gen = gen_neon_vrev64;
29282 break;
29283 default:
29284 return false;
29285 }
29286 break;
29287 default:
29288 return false;
29289 }
29290
29291 for (i = 0; i < nelt ; i += diff + 1)
29292 for (j = 0; j <= diff; j += 1)
29293 {
29294 /* This is guaranteed to be true as the value of diff
29295 is 7, 3, 1 and we should have enough elements in the
29296 queue to generate this. Getting a vector mask with a
29297 value of diff other than these values implies that
29298 something is wrong by the time we get here. */
29299 gcc_assert (i + j < nelt);
29300 if (d->perm[i + j] != i + diff - j)
29301 return false;
29302 }
29303
29304 /* Success! */
29305 if (d->testing_p)
29306 return true;
29307
29308 emit_insn (gen (d->vmode, d->target, d->op0));
29309 return true;
29310 }
29311
29312 /* Recognize patterns for the VTRN insns. */
29313
29314 static bool
29315 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29316 {
29317 unsigned int i, odd, mask, nelt = d->perm.length ();
29318 rtx out0, out1, in0, in1;
29319
29320 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29321 return false;
29322
29323 /* Note that these are little-endian tests. Adjust for big-endian later. */
29324 if (d->perm[0] == 0)
29325 odd = 0;
29326 else if (d->perm[0] == 1)
29327 odd = 1;
29328 else
29329 return false;
29330 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29331
29332 for (i = 0; i < nelt; i += 2)
29333 {
29334 if (d->perm[i] != i + odd)
29335 return false;
29336 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29337 return false;
29338 }
29339
29340 /* Success! */
29341 if (d->testing_p)
29342 return true;
29343
29344 in0 = d->op0;
29345 in1 = d->op1;
29346 if (BYTES_BIG_ENDIAN)
29347 {
29348 std::swap (in0, in1);
29349 odd = !odd;
29350 }
29351
29352 out0 = d->target;
29353 out1 = gen_reg_rtx (d->vmode);
29354 if (odd)
29355 std::swap (out0, out1);
29356
29357 emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
29358 return true;
29359 }
29360
29361 /* Recognize patterns for the VEXT insns. */
29362
29363 static bool
29364 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29365 {
29366 unsigned int i, nelt = d->perm.length ();
29367 rtx offset;
29368
29369 unsigned int location;
29370
29371 unsigned int next = d->perm[0] + 1;
29372
29373 /* TODO: Handle GCC's numbering of elements for big-endian. */
29374 if (BYTES_BIG_ENDIAN)
29375 return false;
29376
29377 /* Check if the extracted indexes are increasing by one. */
29378 for (i = 1; i < nelt; next++, i++)
29379 {
29380 /* If we hit the most significant element of the 2nd vector in
29381 the previous iteration, no need to test further. */
29382 if (next == 2 * nelt)
29383 return false;
29384
29385 /* If we are operating on only one vector: it could be a
29386 rotation. If there are only two elements of size < 64, let
29387 arm_evpc_neon_vrev catch it. */
29388 if (d->one_vector_p && (next == nelt))
29389 {
29390 if ((nelt == 2) && (d->vmode != V2DImode))
29391 return false;
29392 else
29393 next = 0;
29394 }
29395
29396 if (d->perm[i] != next)
29397 return false;
29398 }
29399
29400 location = d->perm[0];
29401
29402 /* Success! */
29403 if (d->testing_p)
29404 return true;
29405
29406 offset = GEN_INT (location);
29407
29408 if(d->vmode == E_DImode)
29409 return false;
29410
29411 emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
29412 return true;
29413 }
29414
29415 /* The NEON VTBL instruction is a fully variable permuation that's even
29416 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29417 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29418 can do slightly better by expanding this as a constant where we don't
29419 have to apply a mask. */
29420
29421 static bool
29422 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29423 {
29424 rtx rperm[MAX_VECT_LEN], sel;
29425 machine_mode vmode = d->vmode;
29426 unsigned int i, nelt = d->perm.length ();
29427
29428 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29429 numbering of elements for big-endian, we must reverse the order. */
29430 if (BYTES_BIG_ENDIAN)
29431 return false;
29432
29433 if (d->testing_p)
29434 return true;
29435
29436 /* Generic code will try constant permutation twice. Once with the
29437 original mode and again with the elements lowered to QImode.
29438 So wait and don't do the selector expansion ourselves. */
29439 if (vmode != V8QImode && vmode != V16QImode)
29440 return false;
29441
29442 for (i = 0; i < nelt; ++i)
29443 rperm[i] = GEN_INT (d->perm[i]);
29444 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29445 sel = force_reg (vmode, sel);
29446
29447 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29448 return true;
29449 }
29450
29451 static bool
29452 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29453 {
29454 /* Check if the input mask matches vext before reordering the
29455 operands. */
29456 if (TARGET_NEON)
29457 if (arm_evpc_neon_vext (d))
29458 return true;
29459
29460 /* The pattern matching functions above are written to look for a small
29461 number to begin the sequence (0, 1, N/2). If we begin with an index
29462 from the second operand, we can swap the operands. */
29463 unsigned int nelt = d->perm.length ();
29464 if (d->perm[0] >= nelt)
29465 {
29466 d->perm.rotate_inputs (1);
29467 std::swap (d->op0, d->op1);
29468 }
29469
29470 if (TARGET_NEON)
29471 {
29472 if (arm_evpc_neon_vuzp (d))
29473 return true;
29474 if (arm_evpc_neon_vzip (d))
29475 return true;
29476 if (arm_evpc_neon_vrev (d))
29477 return true;
29478 if (arm_evpc_neon_vtrn (d))
29479 return true;
29480 return arm_evpc_neon_vtbl (d);
29481 }
29482 return false;
29483 }
29484
29485 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
29486
29487 static bool
29488 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
29489 const vec_perm_indices &sel)
29490 {
29491 struct expand_vec_perm_d d;
29492 int i, nelt, which;
29493
29494 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
29495 return false;
29496
29497 d.target = target;
29498 d.op0 = op0;
29499 d.op1 = op1;
29500
29501 d.vmode = vmode;
29502 gcc_assert (VECTOR_MODE_P (d.vmode));
29503 d.testing_p = !target;
29504
29505 nelt = GET_MODE_NUNITS (d.vmode);
29506 for (i = which = 0; i < nelt; ++i)
29507 {
29508 int ei = sel[i] & (2 * nelt - 1);
29509 which |= (ei < nelt ? 1 : 2);
29510 }
29511
29512 switch (which)
29513 {
29514 default:
29515 gcc_unreachable();
29516
29517 case 3:
29518 d.one_vector_p = false;
29519 if (d.testing_p || !rtx_equal_p (op0, op1))
29520 break;
29521
29522 /* The elements of PERM do not suggest that only the first operand
29523 is used, but both operands are identical. Allow easier matching
29524 of the permutation by folding the permutation into the single
29525 input vector. */
29526 /* FALLTHRU */
29527 case 2:
29528 d.op0 = op1;
29529 d.one_vector_p = true;
29530 break;
29531
29532 case 1:
29533 d.op1 = op0;
29534 d.one_vector_p = true;
29535 break;
29536 }
29537
29538 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
29539
29540 if (!d.testing_p)
29541 return arm_expand_vec_perm_const_1 (&d);
29542
29543 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29544 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29545 if (!d.one_vector_p)
29546 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29547
29548 start_sequence ();
29549 bool ret = arm_expand_vec_perm_const_1 (&d);
29550 end_sequence ();
29551
29552 return ret;
29553 }
29554
29555 bool
29556 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29557 {
29558 /* If we are soft float and we do not have ldrd
29559 then all auto increment forms are ok. */
29560 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29561 return true;
29562
29563 switch (code)
29564 {
29565 /* Post increment and Pre Decrement are supported for all
29566 instruction forms except for vector forms. */
29567 case ARM_POST_INC:
29568 case ARM_PRE_DEC:
29569 if (VECTOR_MODE_P (mode))
29570 {
29571 if (code != ARM_PRE_DEC)
29572 return true;
29573 else
29574 return false;
29575 }
29576
29577 return true;
29578
29579 case ARM_POST_DEC:
29580 case ARM_PRE_INC:
29581 /* Without LDRD and mode size greater than
29582 word size, there is no point in auto-incrementing
29583 because ldm and stm will not have these forms. */
29584 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29585 return false;
29586
29587 /* Vector and floating point modes do not support
29588 these auto increment forms. */
29589 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29590 return false;
29591
29592 return true;
29593
29594 default:
29595 return false;
29596
29597 }
29598
29599 return false;
29600 }
29601
29602 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29603 on ARM, since we know that shifts by negative amounts are no-ops.
29604 Additionally, the default expansion code is not available or suitable
29605 for post-reload insn splits (this can occur when the register allocator
29606 chooses not to do a shift in NEON).
29607
29608 This function is used in both initial expand and post-reload splits, and
29609 handles all kinds of 64-bit shifts.
29610
29611 Input requirements:
29612 - It is safe for the input and output to be the same register, but
29613 early-clobber rules apply for the shift amount and scratch registers.
29614 - Shift by register requires both scratch registers. In all other cases
29615 the scratch registers may be NULL.
29616 - Ashiftrt by a register also clobbers the CC register. */
29617 void
29618 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29619 rtx amount, rtx scratch1, rtx scratch2)
29620 {
29621 rtx out_high = gen_highpart (SImode, out);
29622 rtx out_low = gen_lowpart (SImode, out);
29623 rtx in_high = gen_highpart (SImode, in);
29624 rtx in_low = gen_lowpart (SImode, in);
29625
29626 /* Terminology:
29627 in = the register pair containing the input value.
29628 out = the destination register pair.
29629 up = the high- or low-part of each pair.
29630 down = the opposite part to "up".
29631 In a shift, we can consider bits to shift from "up"-stream to
29632 "down"-stream, so in a left-shift "up" is the low-part and "down"
29633 is the high-part of each register pair. */
29634
29635 rtx out_up = code == ASHIFT ? out_low : out_high;
29636 rtx out_down = code == ASHIFT ? out_high : out_low;
29637 rtx in_up = code == ASHIFT ? in_low : in_high;
29638 rtx in_down = code == ASHIFT ? in_high : in_low;
29639
29640 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29641 gcc_assert (out
29642 && (REG_P (out) || GET_CODE (out) == SUBREG)
29643 && GET_MODE (out) == DImode);
29644 gcc_assert (in
29645 && (REG_P (in) || GET_CODE (in) == SUBREG)
29646 && GET_MODE (in) == DImode);
29647 gcc_assert (amount
29648 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29649 && GET_MODE (amount) == SImode)
29650 || CONST_INT_P (amount)));
29651 gcc_assert (scratch1 == NULL
29652 || (GET_CODE (scratch1) == SCRATCH)
29653 || (GET_MODE (scratch1) == SImode
29654 && REG_P (scratch1)));
29655 gcc_assert (scratch2 == NULL
29656 || (GET_CODE (scratch2) == SCRATCH)
29657 || (GET_MODE (scratch2) == SImode
29658 && REG_P (scratch2)));
29659 gcc_assert (!REG_P (out) || !REG_P (amount)
29660 || !HARD_REGISTER_P (out)
29661 || (REGNO (out) != REGNO (amount)
29662 && REGNO (out) + 1 != REGNO (amount)));
29663
29664 /* Macros to make following code more readable. */
29665 #define SUB_32(DEST,SRC) \
29666 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29667 #define RSB_32(DEST,SRC) \
29668 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29669 #define SUB_S_32(DEST,SRC) \
29670 gen_addsi3_compare0 ((DEST), (SRC), \
29671 GEN_INT (-32))
29672 #define SET(DEST,SRC) \
29673 gen_rtx_SET ((DEST), (SRC))
29674 #define SHIFT(CODE,SRC,AMOUNT) \
29675 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29676 #define LSHIFT(CODE,SRC,AMOUNT) \
29677 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29678 SImode, (SRC), (AMOUNT))
29679 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29680 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29681 SImode, (SRC), (AMOUNT))
29682 #define ORR(A,B) \
29683 gen_rtx_IOR (SImode, (A), (B))
29684 #define BRANCH(COND,LABEL) \
29685 gen_arm_cond_branch ((LABEL), \
29686 gen_rtx_ ## COND (CCmode, cc_reg, \
29687 const0_rtx), \
29688 cc_reg)
29689
29690 /* Shifts by register and shifts by constant are handled separately. */
29691 if (CONST_INT_P (amount))
29692 {
29693 /* We have a shift-by-constant. */
29694
29695 /* First, handle out-of-range shift amounts.
29696 In both cases we try to match the result an ARM instruction in a
29697 shift-by-register would give. This helps reduce execution
29698 differences between optimization levels, but it won't stop other
29699 parts of the compiler doing different things. This is "undefined
29700 behavior, in any case. */
29701 if (INTVAL (amount) <= 0)
29702 emit_insn (gen_movdi (out, in));
29703 else if (INTVAL (amount) >= 64)
29704 {
29705 if (code == ASHIFTRT)
29706 {
29707 rtx const31_rtx = GEN_INT (31);
29708 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29709 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29710 }
29711 else
29712 emit_insn (gen_movdi (out, const0_rtx));
29713 }
29714
29715 /* Now handle valid shifts. */
29716 else if (INTVAL (amount) < 32)
29717 {
29718 /* Shifts by a constant less than 32. */
29719 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29720
29721 /* Clearing the out register in DImode first avoids lots
29722 of spilling and results in less stack usage.
29723 Later this redundant insn is completely removed.
29724 Do that only if "in" and "out" are different registers. */
29725 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29726 emit_insn (SET (out, const0_rtx));
29727 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29728 emit_insn (SET (out_down,
29729 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29730 out_down)));
29731 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29732 }
29733 else
29734 {
29735 /* Shifts by a constant greater than 31. */
29736 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29737
29738 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29739 emit_insn (SET (out, const0_rtx));
29740 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29741 if (code == ASHIFTRT)
29742 emit_insn (gen_ashrsi3 (out_up, in_up,
29743 GEN_INT (31)));
29744 else
29745 emit_insn (SET (out_up, const0_rtx));
29746 }
29747 }
29748 else
29749 {
29750 /* We have a shift-by-register. */
29751 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29752
29753 /* This alternative requires the scratch registers. */
29754 gcc_assert (scratch1 && REG_P (scratch1));
29755 gcc_assert (scratch2 && REG_P (scratch2));
29756
29757 /* We will need the values "amount-32" and "32-amount" later.
29758 Swapping them around now allows the later code to be more general. */
29759 switch (code)
29760 {
29761 case ASHIFT:
29762 emit_insn (SUB_32 (scratch1, amount));
29763 emit_insn (RSB_32 (scratch2, amount));
29764 break;
29765 case ASHIFTRT:
29766 emit_insn (RSB_32 (scratch1, amount));
29767 /* Also set CC = amount > 32. */
29768 emit_insn (SUB_S_32 (scratch2, amount));
29769 break;
29770 case LSHIFTRT:
29771 emit_insn (RSB_32 (scratch1, amount));
29772 emit_insn (SUB_32 (scratch2, amount));
29773 break;
29774 default:
29775 gcc_unreachable ();
29776 }
29777
29778 /* Emit code like this:
29779
29780 arithmetic-left:
29781 out_down = in_down << amount;
29782 out_down = (in_up << (amount - 32)) | out_down;
29783 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29784 out_up = in_up << amount;
29785
29786 arithmetic-right:
29787 out_down = in_down >> amount;
29788 out_down = (in_up << (32 - amount)) | out_down;
29789 if (amount < 32)
29790 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29791 out_up = in_up << amount;
29792
29793 logical-right:
29794 out_down = in_down >> amount;
29795 out_down = (in_up << (32 - amount)) | out_down;
29796 if (amount < 32)
29797 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29798 out_up = in_up << amount;
29799
29800 The ARM and Thumb2 variants are the same but implemented slightly
29801 differently. If this were only called during expand we could just
29802 use the Thumb2 case and let combine do the right thing, but this
29803 can also be called from post-reload splitters. */
29804
29805 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29806
29807 if (!TARGET_THUMB2)
29808 {
29809 /* Emit code for ARM mode. */
29810 emit_insn (SET (out_down,
29811 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29812 if (code == ASHIFTRT)
29813 {
29814 rtx_code_label *done_label = gen_label_rtx ();
29815 emit_jump_insn (BRANCH (LT, done_label));
29816 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29817 out_down)));
29818 emit_label (done_label);
29819 }
29820 else
29821 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29822 out_down)));
29823 }
29824 else
29825 {
29826 /* Emit code for Thumb2 mode.
29827 Thumb2 can't do shift and or in one insn. */
29828 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29829 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29830
29831 if (code == ASHIFTRT)
29832 {
29833 rtx_code_label *done_label = gen_label_rtx ();
29834 emit_jump_insn (BRANCH (LT, done_label));
29835 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29836 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29837 emit_label (done_label);
29838 }
29839 else
29840 {
29841 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29842 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29843 }
29844 }
29845
29846 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29847 }
29848
29849 #undef SUB_32
29850 #undef RSB_32
29851 #undef SUB_S_32
29852 #undef SET
29853 #undef SHIFT
29854 #undef LSHIFT
29855 #undef REV_LSHIFT
29856 #undef ORR
29857 #undef BRANCH
29858 }
29859
29860 /* Returns true if the pattern is a valid symbolic address, which is either a
29861 symbol_ref or (symbol_ref + addend).
29862
29863 According to the ARM ELF ABI, the initial addend of REL-type relocations
29864 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29865 literal field of the instruction as a 16-bit signed value in the range
29866 -32768 <= A < 32768. */
29867
29868 bool
29869 arm_valid_symbolic_address_p (rtx addr)
29870 {
29871 rtx xop0, xop1 = NULL_RTX;
29872 rtx tmp = addr;
29873
29874 if (target_word_relocations)
29875 return false;
29876
29877 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29878 return true;
29879
29880 /* (const (plus: symbol_ref const_int)) */
29881 if (GET_CODE (addr) == CONST)
29882 tmp = XEXP (addr, 0);
29883
29884 if (GET_CODE (tmp) == PLUS)
29885 {
29886 xop0 = XEXP (tmp, 0);
29887 xop1 = XEXP (tmp, 1);
29888
29889 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29890 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29891 }
29892
29893 return false;
29894 }
29895
29896 /* Returns true if a valid comparison operation and makes
29897 the operands in a form that is valid. */
29898 bool
29899 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29900 {
29901 enum rtx_code code = GET_CODE (*comparison);
29902 int code_int;
29903 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29904 ? GET_MODE (*op2) : GET_MODE (*op1);
29905
29906 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29907
29908 if (code == UNEQ || code == LTGT)
29909 return false;
29910
29911 code_int = (int)code;
29912 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29913 PUT_CODE (*comparison, (enum rtx_code)code_int);
29914
29915 switch (mode)
29916 {
29917 case E_SImode:
29918 if (!arm_add_operand (*op1, mode))
29919 *op1 = force_reg (mode, *op1);
29920 if (!arm_add_operand (*op2, mode))
29921 *op2 = force_reg (mode, *op2);
29922 return true;
29923
29924 case E_DImode:
29925 if (!cmpdi_operand (*op1, mode))
29926 *op1 = force_reg (mode, *op1);
29927 if (!cmpdi_operand (*op2, mode))
29928 *op2 = force_reg (mode, *op2);
29929 return true;
29930
29931 case E_HFmode:
29932 if (!TARGET_VFP_FP16INST)
29933 break;
29934 /* FP16 comparisons are done in SF mode. */
29935 mode = SFmode;
29936 *op1 = convert_to_mode (mode, *op1, 1);
29937 *op2 = convert_to_mode (mode, *op2, 1);
29938 /* Fall through. */
29939 case E_SFmode:
29940 case E_DFmode:
29941 if (!vfp_compare_operand (*op1, mode))
29942 *op1 = force_reg (mode, *op1);
29943 if (!vfp_compare_operand (*op2, mode))
29944 *op2 = force_reg (mode, *op2);
29945 return true;
29946 default:
29947 break;
29948 }
29949
29950 return false;
29951
29952 }
29953
29954 /* Maximum number of instructions to set block of memory. */
29955 static int
29956 arm_block_set_max_insns (void)
29957 {
29958 if (optimize_function_for_size_p (cfun))
29959 return 4;
29960 else
29961 return current_tune->max_insns_inline_memset;
29962 }
29963
29964 /* Return TRUE if it's profitable to set block of memory for
29965 non-vectorized case. VAL is the value to set the memory
29966 with. LENGTH is the number of bytes to set. ALIGN is the
29967 alignment of the destination memory in bytes. UNALIGNED_P
29968 is TRUE if we can only set the memory with instructions
29969 meeting alignment requirements. USE_STRD_P is TRUE if we
29970 can use strd to set the memory. */
29971 static bool
29972 arm_block_set_non_vect_profit_p (rtx val,
29973 unsigned HOST_WIDE_INT length,
29974 unsigned HOST_WIDE_INT align,
29975 bool unaligned_p, bool use_strd_p)
29976 {
29977 int num = 0;
29978 /* For leftovers in bytes of 0-7, we can set the memory block using
29979 strb/strh/str with minimum instruction number. */
29980 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29981
29982 if (unaligned_p)
29983 {
29984 num = arm_const_inline_cost (SET, val);
29985 num += length / align + length % align;
29986 }
29987 else if (use_strd_p)
29988 {
29989 num = arm_const_double_inline_cost (val);
29990 num += (length >> 3) + leftover[length & 7];
29991 }
29992 else
29993 {
29994 num = arm_const_inline_cost (SET, val);
29995 num += (length >> 2) + leftover[length & 3];
29996 }
29997
29998 /* We may be able to combine last pair STRH/STRB into a single STR
29999 by shifting one byte back. */
30000 if (unaligned_access && length > 3 && (length & 3) == 3)
30001 num--;
30002
30003 return (num <= arm_block_set_max_insns ());
30004 }
30005
30006 /* Return TRUE if it's profitable to set block of memory for
30007 vectorized case. LENGTH is the number of bytes to set.
30008 ALIGN is the alignment of destination memory in bytes.
30009 MODE is the vector mode used to set the memory. */
30010 static bool
30011 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
30012 unsigned HOST_WIDE_INT align,
30013 machine_mode mode)
30014 {
30015 int num;
30016 bool unaligned_p = ((align & 3) != 0);
30017 unsigned int nelt = GET_MODE_NUNITS (mode);
30018
30019 /* Instruction loading constant value. */
30020 num = 1;
30021 /* Instructions storing the memory. */
30022 num += (length + nelt - 1) / nelt;
30023 /* Instructions adjusting the address expression. Only need to
30024 adjust address expression if it's 4 bytes aligned and bytes
30025 leftover can only be stored by mis-aligned store instruction. */
30026 if (!unaligned_p && (length & 3) != 0)
30027 num++;
30028
30029 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
30030 if (!unaligned_p && mode == V16QImode)
30031 num--;
30032
30033 return (num <= arm_block_set_max_insns ());
30034 }
30035
30036 /* Set a block of memory using vectorization instructions for the
30037 unaligned case. We fill the first LENGTH bytes of the memory
30038 area starting from DSTBASE with byte constant VALUE. ALIGN is
30039 the alignment requirement of memory. Return TRUE if succeeded. */
30040 static bool
30041 arm_block_set_unaligned_vect (rtx dstbase,
30042 unsigned HOST_WIDE_INT length,
30043 unsigned HOST_WIDE_INT value,
30044 unsigned HOST_WIDE_INT align)
30045 {
30046 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
30047 rtx dst, mem;
30048 rtx val_vec, reg;
30049 rtx (*gen_func) (rtx, rtx);
30050 machine_mode mode;
30051 unsigned HOST_WIDE_INT v = value;
30052 unsigned int offset = 0;
30053 gcc_assert ((align & 0x3) != 0);
30054 nelt_v8 = GET_MODE_NUNITS (V8QImode);
30055 nelt_v16 = GET_MODE_NUNITS (V16QImode);
30056 if (length >= nelt_v16)
30057 {
30058 mode = V16QImode;
30059 gen_func = gen_movmisalignv16qi;
30060 }
30061 else
30062 {
30063 mode = V8QImode;
30064 gen_func = gen_movmisalignv8qi;
30065 }
30066 nelt_mode = GET_MODE_NUNITS (mode);
30067 gcc_assert (length >= nelt_mode);
30068 /* Skip if it isn't profitable. */
30069 if (!arm_block_set_vect_profit_p (length, align, mode))
30070 return false;
30071
30072 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30073 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30074
30075 v = sext_hwi (v, BITS_PER_WORD);
30076
30077 reg = gen_reg_rtx (mode);
30078 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30079 /* Emit instruction loading the constant value. */
30080 emit_move_insn (reg, val_vec);
30081
30082 /* Handle nelt_mode bytes in a vector. */
30083 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
30084 {
30085 emit_insn ((*gen_func) (mem, reg));
30086 if (i + 2 * nelt_mode <= length)
30087 {
30088 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
30089 offset += nelt_mode;
30090 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30091 }
30092 }
30093
30094 /* If there are not less than nelt_v8 bytes leftover, we must be in
30095 V16QI mode. */
30096 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30097
30098 /* Handle (8, 16) bytes leftover. */
30099 if (i + nelt_v8 < length)
30100 {
30101 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30102 offset += length - i;
30103 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30104
30105 /* We are shifting bytes back, set the alignment accordingly. */
30106 if ((length & 1) != 0 && align >= 2)
30107 set_mem_align (mem, BITS_PER_UNIT);
30108
30109 emit_insn (gen_movmisalignv16qi (mem, reg));
30110 }
30111 /* Handle (0, 8] bytes leftover. */
30112 else if (i < length && i + nelt_v8 >= length)
30113 {
30114 if (mode == V16QImode)
30115 reg = gen_lowpart (V8QImode, reg);
30116
30117 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30118 + (nelt_mode - nelt_v8))));
30119 offset += (length - i) + (nelt_mode - nelt_v8);
30120 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30121
30122 /* We are shifting bytes back, set the alignment accordingly. */
30123 if ((length & 1) != 0 && align >= 2)
30124 set_mem_align (mem, BITS_PER_UNIT);
30125
30126 emit_insn (gen_movmisalignv8qi (mem, reg));
30127 }
30128
30129 return true;
30130 }
30131
30132 /* Set a block of memory using vectorization instructions for the
30133 aligned case. We fill the first LENGTH bytes of the memory area
30134 starting from DSTBASE with byte constant VALUE. ALIGN is the
30135 alignment requirement of memory. Return TRUE if succeeded. */
30136 static bool
30137 arm_block_set_aligned_vect (rtx dstbase,
30138 unsigned HOST_WIDE_INT length,
30139 unsigned HOST_WIDE_INT value,
30140 unsigned HOST_WIDE_INT align)
30141 {
30142 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30143 rtx dst, addr, mem;
30144 rtx val_vec, reg;
30145 machine_mode mode;
30146 unsigned int offset = 0;
30147
30148 gcc_assert ((align & 0x3) == 0);
30149 nelt_v8 = GET_MODE_NUNITS (V8QImode);
30150 nelt_v16 = GET_MODE_NUNITS (V16QImode);
30151 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30152 mode = V16QImode;
30153 else
30154 mode = V8QImode;
30155
30156 nelt_mode = GET_MODE_NUNITS (mode);
30157 gcc_assert (length >= nelt_mode);
30158 /* Skip if it isn't profitable. */
30159 if (!arm_block_set_vect_profit_p (length, align, mode))
30160 return false;
30161
30162 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30163
30164 reg = gen_reg_rtx (mode);
30165 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
30166 /* Emit instruction loading the constant value. */
30167 emit_move_insn (reg, val_vec);
30168
30169 i = 0;
30170 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
30171 if (mode == V16QImode)
30172 {
30173 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30174 emit_insn (gen_movmisalignv16qi (mem, reg));
30175 i += nelt_mode;
30176 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
30177 if (i + nelt_v8 < length && i + nelt_v16 > length)
30178 {
30179 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30180 offset += length - nelt_mode;
30181 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30182 /* We are shifting bytes back, set the alignment accordingly. */
30183 if ((length & 0x3) == 0)
30184 set_mem_align (mem, BITS_PER_UNIT * 4);
30185 else if ((length & 0x1) == 0)
30186 set_mem_align (mem, BITS_PER_UNIT * 2);
30187 else
30188 set_mem_align (mem, BITS_PER_UNIT);
30189
30190 emit_insn (gen_movmisalignv16qi (mem, reg));
30191 return true;
30192 }
30193 /* Fall through for bytes leftover. */
30194 mode = V8QImode;
30195 nelt_mode = GET_MODE_NUNITS (mode);
30196 reg = gen_lowpart (V8QImode, reg);
30197 }
30198
30199 /* Handle 8 bytes in a vector. */
30200 for (; (i + nelt_mode <= length); i += nelt_mode)
30201 {
30202 addr = plus_constant (Pmode, dst, i);
30203 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30204 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
30205 emit_move_insn (mem, reg);
30206 else
30207 emit_insn (gen_unaligned_storev8qi (mem, reg));
30208 }
30209
30210 /* Handle single word leftover by shifting 4 bytes back. We can
30211 use aligned access for this case. */
30212 if (i + UNITS_PER_WORD == length)
30213 {
30214 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30215 offset += i - UNITS_PER_WORD;
30216 mem = adjust_automodify_address (dstbase, mode, addr, offset);
30217 /* We are shifting 4 bytes back, set the alignment accordingly. */
30218 if (align > UNITS_PER_WORD)
30219 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30220
30221 emit_insn (gen_unaligned_storev8qi (mem, reg));
30222 }
30223 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30224 We have to use unaligned access for this case. */
30225 else if (i < length)
30226 {
30227 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30228 offset += length - nelt_mode;
30229 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30230 /* We are shifting bytes back, set the alignment accordingly. */
30231 if ((length & 1) == 0)
30232 set_mem_align (mem, BITS_PER_UNIT * 2);
30233 else
30234 set_mem_align (mem, BITS_PER_UNIT);
30235
30236 emit_insn (gen_movmisalignv8qi (mem, reg));
30237 }
30238
30239 return true;
30240 }
30241
30242 /* Set a block of memory using plain strh/strb instructions, only
30243 using instructions allowed by ALIGN on processor. We fill the
30244 first LENGTH bytes of the memory area starting from DSTBASE
30245 with byte constant VALUE. ALIGN is the alignment requirement
30246 of memory. */
30247 static bool
30248 arm_block_set_unaligned_non_vect (rtx dstbase,
30249 unsigned HOST_WIDE_INT length,
30250 unsigned HOST_WIDE_INT value,
30251 unsigned HOST_WIDE_INT align)
30252 {
30253 unsigned int i;
30254 rtx dst, addr, mem;
30255 rtx val_exp, val_reg, reg;
30256 machine_mode mode;
30257 HOST_WIDE_INT v = value;
30258
30259 gcc_assert (align == 1 || align == 2);
30260
30261 if (align == 2)
30262 v |= (value << BITS_PER_UNIT);
30263
30264 v = sext_hwi (v, BITS_PER_WORD);
30265 val_exp = GEN_INT (v);
30266 /* Skip if it isn't profitable. */
30267 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30268 align, true, false))
30269 return false;
30270
30271 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30272 mode = (align == 2 ? HImode : QImode);
30273 val_reg = force_reg (SImode, val_exp);
30274 reg = gen_lowpart (mode, val_reg);
30275
30276 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30277 {
30278 addr = plus_constant (Pmode, dst, i);
30279 mem = adjust_automodify_address (dstbase, mode, addr, i);
30280 emit_move_insn (mem, reg);
30281 }
30282
30283 /* Handle single byte leftover. */
30284 if (i + 1 == length)
30285 {
30286 reg = gen_lowpart (QImode, val_reg);
30287 addr = plus_constant (Pmode, dst, i);
30288 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30289 emit_move_insn (mem, reg);
30290 i++;
30291 }
30292
30293 gcc_assert (i == length);
30294 return true;
30295 }
30296
30297 /* Set a block of memory using plain strd/str/strh/strb instructions,
30298 to permit unaligned copies on processors which support unaligned
30299 semantics for those instructions. We fill the first LENGTH bytes
30300 of the memory area starting from DSTBASE with byte constant VALUE.
30301 ALIGN is the alignment requirement of memory. */
30302 static bool
30303 arm_block_set_aligned_non_vect (rtx dstbase,
30304 unsigned HOST_WIDE_INT length,
30305 unsigned HOST_WIDE_INT value,
30306 unsigned HOST_WIDE_INT align)
30307 {
30308 unsigned int i;
30309 rtx dst, addr, mem;
30310 rtx val_exp, val_reg, reg;
30311 unsigned HOST_WIDE_INT v;
30312 bool use_strd_p;
30313
30314 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30315 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30316
30317 v = (value | (value << 8) | (value << 16) | (value << 24));
30318 if (length < UNITS_PER_WORD)
30319 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30320
30321 if (use_strd_p)
30322 v |= (v << BITS_PER_WORD);
30323 else
30324 v = sext_hwi (v, BITS_PER_WORD);
30325
30326 val_exp = GEN_INT (v);
30327 /* Skip if it isn't profitable. */
30328 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30329 align, false, use_strd_p))
30330 {
30331 if (!use_strd_p)
30332 return false;
30333
30334 /* Try without strd. */
30335 v = (v >> BITS_PER_WORD);
30336 v = sext_hwi (v, BITS_PER_WORD);
30337 val_exp = GEN_INT (v);
30338 use_strd_p = false;
30339 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30340 align, false, use_strd_p))
30341 return false;
30342 }
30343
30344 i = 0;
30345 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30346 /* Handle double words using strd if possible. */
30347 if (use_strd_p)
30348 {
30349 val_reg = force_reg (DImode, val_exp);
30350 reg = val_reg;
30351 for (; (i + 8 <= length); i += 8)
30352 {
30353 addr = plus_constant (Pmode, dst, i);
30354 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30355 emit_move_insn (mem, reg);
30356 }
30357 }
30358 else
30359 val_reg = force_reg (SImode, val_exp);
30360
30361 /* Handle words. */
30362 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30363 for (; (i + 4 <= length); i += 4)
30364 {
30365 addr = plus_constant (Pmode, dst, i);
30366 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30367 if ((align & 3) == 0)
30368 emit_move_insn (mem, reg);
30369 else
30370 emit_insn (gen_unaligned_storesi (mem, reg));
30371 }
30372
30373 /* Merge last pair of STRH and STRB into a STR if possible. */
30374 if (unaligned_access && i > 0 && (i + 3) == length)
30375 {
30376 addr = plus_constant (Pmode, dst, i - 1);
30377 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30378 /* We are shifting one byte back, set the alignment accordingly. */
30379 if ((align & 1) == 0)
30380 set_mem_align (mem, BITS_PER_UNIT);
30381
30382 /* Most likely this is an unaligned access, and we can't tell at
30383 compilation time. */
30384 emit_insn (gen_unaligned_storesi (mem, reg));
30385 return true;
30386 }
30387
30388 /* Handle half word leftover. */
30389 if (i + 2 <= length)
30390 {
30391 reg = gen_lowpart (HImode, val_reg);
30392 addr = plus_constant (Pmode, dst, i);
30393 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30394 if ((align & 1) == 0)
30395 emit_move_insn (mem, reg);
30396 else
30397 emit_insn (gen_unaligned_storehi (mem, reg));
30398
30399 i += 2;
30400 }
30401
30402 /* Handle single byte leftover. */
30403 if (i + 1 == length)
30404 {
30405 reg = gen_lowpart (QImode, val_reg);
30406 addr = plus_constant (Pmode, dst, i);
30407 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30408 emit_move_insn (mem, reg);
30409 }
30410
30411 return true;
30412 }
30413
30414 /* Set a block of memory using vectorization instructions for both
30415 aligned and unaligned cases. We fill the first LENGTH bytes of
30416 the memory area starting from DSTBASE with byte constant VALUE.
30417 ALIGN is the alignment requirement of memory. */
30418 static bool
30419 arm_block_set_vect (rtx dstbase,
30420 unsigned HOST_WIDE_INT length,
30421 unsigned HOST_WIDE_INT value,
30422 unsigned HOST_WIDE_INT align)
30423 {
30424 /* Check whether we need to use unaligned store instruction. */
30425 if (((align & 3) != 0 || (length & 3) != 0)
30426 /* Check whether unaligned store instruction is available. */
30427 && (!unaligned_access || BYTES_BIG_ENDIAN))
30428 return false;
30429
30430 if ((align & 3) == 0)
30431 return arm_block_set_aligned_vect (dstbase, length, value, align);
30432 else
30433 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30434 }
30435
30436 /* Expand string store operation. Firstly we try to do that by using
30437 vectorization instructions, then try with ARM unaligned access and
30438 double-word store if profitable. OPERANDS[0] is the destination,
30439 OPERANDS[1] is the number of bytes, operands[2] is the value to
30440 initialize the memory, OPERANDS[3] is the known alignment of the
30441 destination. */
30442 bool
30443 arm_gen_setmem (rtx *operands)
30444 {
30445 rtx dstbase = operands[0];
30446 unsigned HOST_WIDE_INT length;
30447 unsigned HOST_WIDE_INT value;
30448 unsigned HOST_WIDE_INT align;
30449
30450 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30451 return false;
30452
30453 length = UINTVAL (operands[1]);
30454 if (length > 64)
30455 return false;
30456
30457 value = (UINTVAL (operands[2]) & 0xFF);
30458 align = UINTVAL (operands[3]);
30459 if (TARGET_NEON && length >= 8
30460 && current_tune->string_ops_prefer_neon
30461 && arm_block_set_vect (dstbase, length, value, align))
30462 return true;
30463
30464 if (!unaligned_access && (align & 3) != 0)
30465 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30466
30467 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30468 }
30469
30470
30471 static bool
30472 arm_macro_fusion_p (void)
30473 {
30474 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30475 }
30476
30477 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30478 for MOVW / MOVT macro fusion. */
30479
30480 static bool
30481 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30482 {
30483 /* We are trying to fuse
30484 movw imm / movt imm
30485 instructions as a group that gets scheduled together. */
30486
30487 rtx set_dest = SET_DEST (curr_set);
30488
30489 if (GET_MODE (set_dest) != SImode)
30490 return false;
30491
30492 /* We are trying to match:
30493 prev (movw) == (set (reg r0) (const_int imm16))
30494 curr (movt) == (set (zero_extract (reg r0)
30495 (const_int 16)
30496 (const_int 16))
30497 (const_int imm16_1))
30498 or
30499 prev (movw) == (set (reg r1)
30500 (high (symbol_ref ("SYM"))))
30501 curr (movt) == (set (reg r0)
30502 (lo_sum (reg r1)
30503 (symbol_ref ("SYM")))) */
30504
30505 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30506 {
30507 if (CONST_INT_P (SET_SRC (curr_set))
30508 && CONST_INT_P (SET_SRC (prev_set))
30509 && REG_P (XEXP (set_dest, 0))
30510 && REG_P (SET_DEST (prev_set))
30511 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30512 return true;
30513
30514 }
30515 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30516 && REG_P (SET_DEST (curr_set))
30517 && REG_P (SET_DEST (prev_set))
30518 && GET_CODE (SET_SRC (prev_set)) == HIGH
30519 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30520 return true;
30521
30522 return false;
30523 }
30524
30525 static bool
30526 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30527 {
30528 rtx prev_set = single_set (prev);
30529 rtx curr_set = single_set (curr);
30530
30531 if (!prev_set
30532 || !curr_set)
30533 return false;
30534
30535 if (any_condjump_p (curr))
30536 return false;
30537
30538 if (!arm_macro_fusion_p ())
30539 return false;
30540
30541 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30542 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30543 return true;
30544
30545 return false;
30546 }
30547
30548 /* Return true iff the instruction fusion described by OP is enabled. */
30549 bool
30550 arm_fusion_enabled_p (tune_params::fuse_ops op)
30551 {
30552 return current_tune->fusible_ops & op;
30553 }
30554
30555 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30556 scheduled for speculative execution. Reject the long-running division
30557 and square-root instructions. */
30558
30559 static bool
30560 arm_sched_can_speculate_insn (rtx_insn *insn)
30561 {
30562 switch (get_attr_type (insn))
30563 {
30564 case TYPE_SDIV:
30565 case TYPE_UDIV:
30566 case TYPE_FDIVS:
30567 case TYPE_FDIVD:
30568 case TYPE_FSQRTS:
30569 case TYPE_FSQRTD:
30570 case TYPE_NEON_FP_SQRT_S:
30571 case TYPE_NEON_FP_SQRT_D:
30572 case TYPE_NEON_FP_SQRT_S_Q:
30573 case TYPE_NEON_FP_SQRT_D_Q:
30574 case TYPE_NEON_FP_DIV_S:
30575 case TYPE_NEON_FP_DIV_D:
30576 case TYPE_NEON_FP_DIV_S_Q:
30577 case TYPE_NEON_FP_DIV_D_Q:
30578 return false;
30579 default:
30580 return true;
30581 }
30582 }
30583
30584 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30585
30586 static unsigned HOST_WIDE_INT
30587 arm_asan_shadow_offset (void)
30588 {
30589 return HOST_WIDE_INT_1U << 29;
30590 }
30591
30592
30593 /* This is a temporary fix for PR60655. Ideally we need
30594 to handle most of these cases in the generic part but
30595 currently we reject minus (..) (sym_ref). We try to
30596 ameliorate the case with minus (sym_ref1) (sym_ref2)
30597 where they are in the same section. */
30598
30599 static bool
30600 arm_const_not_ok_for_debug_p (rtx p)
30601 {
30602 tree decl_op0 = NULL;
30603 tree decl_op1 = NULL;
30604
30605 if (GET_CODE (p) == UNSPEC)
30606 return true;
30607 if (GET_CODE (p) == MINUS)
30608 {
30609 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30610 {
30611 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30612 if (decl_op1
30613 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30614 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30615 {
30616 if ((VAR_P (decl_op1)
30617 || TREE_CODE (decl_op1) == CONST_DECL)
30618 && (VAR_P (decl_op0)
30619 || TREE_CODE (decl_op0) == CONST_DECL))
30620 return (get_variable_section (decl_op1, false)
30621 != get_variable_section (decl_op0, false));
30622
30623 if (TREE_CODE (decl_op1) == LABEL_DECL
30624 && TREE_CODE (decl_op0) == LABEL_DECL)
30625 return (DECL_CONTEXT (decl_op1)
30626 != DECL_CONTEXT (decl_op0));
30627 }
30628
30629 return true;
30630 }
30631 }
30632
30633 return false;
30634 }
30635
30636 /* return TRUE if x is a reference to a value in a constant pool */
30637 extern bool
30638 arm_is_constant_pool_ref (rtx x)
30639 {
30640 return (MEM_P (x)
30641 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30642 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30643 }
30644
30645 /* Remember the last target of arm_set_current_function. */
30646 static GTY(()) tree arm_previous_fndecl;
30647
30648 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30649
30650 void
30651 save_restore_target_globals (tree new_tree)
30652 {
30653 /* If we have a previous state, use it. */
30654 if (TREE_TARGET_GLOBALS (new_tree))
30655 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30656 else if (new_tree == target_option_default_node)
30657 restore_target_globals (&default_target_globals);
30658 else
30659 {
30660 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30661 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30662 }
30663
30664 arm_option_params_internal ();
30665 }
30666
30667 /* Invalidate arm_previous_fndecl. */
30668
30669 void
30670 arm_reset_previous_fndecl (void)
30671 {
30672 arm_previous_fndecl = NULL_TREE;
30673 }
30674
30675 /* Establish appropriate back-end context for processing the function
30676 FNDECL. The argument might be NULL to indicate processing at top
30677 level, outside of any function scope. */
30678
30679 static void
30680 arm_set_current_function (tree fndecl)
30681 {
30682 if (!fndecl || fndecl == arm_previous_fndecl)
30683 return;
30684
30685 tree old_tree = (arm_previous_fndecl
30686 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30687 : NULL_TREE);
30688
30689 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30690
30691 /* If current function has no attributes but previous one did,
30692 use the default node. */
30693 if (! new_tree && old_tree)
30694 new_tree = target_option_default_node;
30695
30696 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30697 the default have been handled by save_restore_target_globals from
30698 arm_pragma_target_parse. */
30699 if (old_tree == new_tree)
30700 return;
30701
30702 arm_previous_fndecl = fndecl;
30703
30704 /* First set the target options. */
30705 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30706
30707 save_restore_target_globals (new_tree);
30708 }
30709
30710 /* Implement TARGET_OPTION_PRINT. */
30711
30712 static void
30713 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30714 {
30715 int flags = ptr->x_target_flags;
30716 const char *fpu_name;
30717
30718 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30719 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30720
30721 fprintf (file, "%*sselected isa %s\n", indent, "",
30722 TARGET_THUMB2_P (flags) ? "thumb2" :
30723 TARGET_THUMB_P (flags) ? "thumb1" :
30724 "arm");
30725
30726 if (ptr->x_arm_arch_string)
30727 fprintf (file, "%*sselected architecture %s\n", indent, "",
30728 ptr->x_arm_arch_string);
30729
30730 if (ptr->x_arm_cpu_string)
30731 fprintf (file, "%*sselected CPU %s\n", indent, "",
30732 ptr->x_arm_cpu_string);
30733
30734 if (ptr->x_arm_tune_string)
30735 fprintf (file, "%*sselected tune %s\n", indent, "",
30736 ptr->x_arm_tune_string);
30737
30738 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30739 }
30740
30741 /* Hook to determine if one function can safely inline another. */
30742
30743 static bool
30744 arm_can_inline_p (tree caller, tree callee)
30745 {
30746 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30747 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30748 bool can_inline = true;
30749
30750 struct cl_target_option *caller_opts
30751 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30752 : target_option_default_node);
30753
30754 struct cl_target_option *callee_opts
30755 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30756 : target_option_default_node);
30757
30758 if (callee_opts == caller_opts)
30759 return true;
30760
30761 /* Callee's ISA features should be a subset of the caller's. */
30762 struct arm_build_target caller_target;
30763 struct arm_build_target callee_target;
30764 caller_target.isa = sbitmap_alloc (isa_num_bits);
30765 callee_target.isa = sbitmap_alloc (isa_num_bits);
30766
30767 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30768 false);
30769 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30770 false);
30771 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30772 can_inline = false;
30773
30774 sbitmap_free (caller_target.isa);
30775 sbitmap_free (callee_target.isa);
30776
30777 /* OK to inline between different modes.
30778 Function with mode specific instructions, e.g using asm,
30779 must be explicitly protected with noinline. */
30780 return can_inline;
30781 }
30782
30783 /* Hook to fix function's alignment affected by target attribute. */
30784
30785 static void
30786 arm_relayout_function (tree fndecl)
30787 {
30788 if (DECL_USER_ALIGN (fndecl))
30789 return;
30790
30791 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30792
30793 if (!callee_tree)
30794 callee_tree = target_option_default_node;
30795
30796 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30797 SET_DECL_ALIGN
30798 (fndecl,
30799 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30800 }
30801
30802 /* Inner function to process the attribute((target(...))), take an argument and
30803 set the current options from the argument. If we have a list, recursively
30804 go over the list. */
30805
30806 static bool
30807 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30808 {
30809 if (TREE_CODE (args) == TREE_LIST)
30810 {
30811 bool ret = true;
30812
30813 for (; args; args = TREE_CHAIN (args))
30814 if (TREE_VALUE (args)
30815 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30816 ret = false;
30817 return ret;
30818 }
30819
30820 else if (TREE_CODE (args) != STRING_CST)
30821 {
30822 error ("attribute %<target%> argument not a string");
30823 return false;
30824 }
30825
30826 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30827 char *q;
30828
30829 while ((q = strtok (argstr, ",")) != NULL)
30830 {
30831 argstr = NULL;
30832 if (!strcmp (q, "thumb"))
30833 opts->x_target_flags |= MASK_THUMB;
30834
30835 else if (!strcmp (q, "arm"))
30836 opts->x_target_flags &= ~MASK_THUMB;
30837
30838 else if (!strcmp (q, "general-regs-only"))
30839 opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
30840
30841 else if (!strncmp (q, "fpu=", 4))
30842 {
30843 int fpu_index;
30844 if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
30845 &fpu_index, CL_TARGET))
30846 {
30847 error ("invalid fpu for target attribute or pragma %qs", q);
30848 return false;
30849 }
30850 if (fpu_index == TARGET_FPU_auto)
30851 {
30852 /* This doesn't really make sense until we support
30853 general dynamic selection of the architecture and all
30854 sub-features. */
30855 sorry ("auto fpu selection not currently permitted here");
30856 return false;
30857 }
30858 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30859 }
30860 else if (!strncmp (q, "arch=", 5))
30861 {
30862 char *arch = q + 5;
30863 const arch_option *arm_selected_arch
30864 = arm_parse_arch_option_name (all_architectures, "arch", arch);
30865
30866 if (!arm_selected_arch)
30867 {
30868 error ("invalid architecture for target attribute or pragma %qs",
30869 q);
30870 return false;
30871 }
30872
30873 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
30874 }
30875 else if (q[0] == '+')
30876 {
30877 opts->x_arm_arch_string
30878 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
30879 }
30880 else
30881 {
30882 error ("unknown target attribute or pragma %qs", q);
30883 return false;
30884 }
30885 }
30886
30887 return true;
30888 }
30889
30890 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30891
30892 tree
30893 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30894 struct gcc_options *opts_set)
30895 {
30896 struct cl_target_option cl_opts;
30897
30898 if (!arm_valid_target_attribute_rec (args, opts))
30899 return NULL_TREE;
30900
30901 cl_target_option_save (&cl_opts, opts);
30902 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30903 arm_option_check_internal (opts);
30904 /* Do any overrides, such as global options arch=xxx.
30905 We do this since arm_active_target was overridden. */
30906 arm_option_reconfigure_globals ();
30907 arm_options_perform_arch_sanity_checks ();
30908 arm_option_override_internal (opts, opts_set);
30909
30910 return build_target_option_node (opts);
30911 }
30912
30913 static void
30914 add_attribute (const char * mode, tree *attributes)
30915 {
30916 size_t len = strlen (mode);
30917 tree value = build_string (len, mode);
30918
30919 TREE_TYPE (value) = build_array_type (char_type_node,
30920 build_index_type (size_int (len)));
30921
30922 *attributes = tree_cons (get_identifier ("target"),
30923 build_tree_list (NULL_TREE, value),
30924 *attributes);
30925 }
30926
30927 /* For testing. Insert thumb or arm modes alternatively on functions. */
30928
30929 static void
30930 arm_insert_attributes (tree fndecl, tree * attributes)
30931 {
30932 const char *mode;
30933
30934 if (! TARGET_FLIP_THUMB)
30935 return;
30936
30937 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30938 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
30939 return;
30940
30941 /* Nested definitions must inherit mode. */
30942 if (current_function_decl)
30943 {
30944 mode = TARGET_THUMB ? "thumb" : "arm";
30945 add_attribute (mode, attributes);
30946 return;
30947 }
30948
30949 /* If there is already a setting don't change it. */
30950 if (lookup_attribute ("target", *attributes) != NULL)
30951 return;
30952
30953 mode = thumb_flipper ? "thumb" : "arm";
30954 add_attribute (mode, attributes);
30955
30956 thumb_flipper = !thumb_flipper;
30957 }
30958
30959 /* Hook to validate attribute((target("string"))). */
30960
30961 static bool
30962 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30963 tree args, int ARG_UNUSED (flags))
30964 {
30965 bool ret = true;
30966 struct gcc_options func_options;
30967 tree cur_tree, new_optimize;
30968 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30969
30970 /* Get the optimization options of the current function. */
30971 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30972
30973 /* If the function changed the optimization levels as well as setting target
30974 options, start with the optimizations specified. */
30975 if (!func_optimize)
30976 func_optimize = optimization_default_node;
30977
30978 /* Init func_options. */
30979 memset (&func_options, 0, sizeof (func_options));
30980 init_options_struct (&func_options, NULL);
30981 lang_hooks.init_options_struct (&func_options);
30982
30983 /* Initialize func_options to the defaults. */
30984 cl_optimization_restore (&func_options,
30985 TREE_OPTIMIZATION (func_optimize));
30986
30987 cl_target_option_restore (&func_options,
30988 TREE_TARGET_OPTION (target_option_default_node));
30989
30990 /* Set func_options flags with new target mode. */
30991 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30992 &global_options_set);
30993
30994 if (cur_tree == NULL_TREE)
30995 ret = false;
30996
30997 new_optimize = build_optimization_node (&func_options);
30998
30999 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
31000
31001 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
31002
31003 finalize_options_struct (&func_options);
31004
31005 return ret;
31006 }
31007
31008 /* Match an ISA feature bitmap to a named FPU. We always use the
31009 first entry that exactly matches the feature set, so that we
31010 effectively canonicalize the FPU name for the assembler. */
31011 static const char*
31012 arm_identify_fpu_from_isa (sbitmap isa)
31013 {
31014 auto_sbitmap fpubits (isa_num_bits);
31015 auto_sbitmap cand_fpubits (isa_num_bits);
31016
31017 bitmap_and (fpubits, isa, isa_all_fpubits);
31018
31019 /* If there are no ISA feature bits relating to the FPU, we must be
31020 doing soft-float. */
31021 if (bitmap_empty_p (fpubits))
31022 return "softvfp";
31023
31024 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31025 {
31026 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
31027 if (bitmap_equal_p (fpubits, cand_fpubits))
31028 return all_fpus[i].name;
31029 }
31030 /* We must find an entry, or things have gone wrong. */
31031 gcc_unreachable ();
31032 }
31033
31034 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
31035 by the function fndecl. */
31036 void
31037 arm_declare_function_name (FILE *stream, const char *name, tree decl)
31038 {
31039 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
31040
31041 struct cl_target_option *targ_options;
31042 if (target_parts)
31043 targ_options = TREE_TARGET_OPTION (target_parts);
31044 else
31045 targ_options = TREE_TARGET_OPTION (target_option_current_node);
31046 gcc_assert (targ_options);
31047
31048 /* Only update the assembler .arch string if it is distinct from the last
31049 such string we printed. arch_to_print is set conditionally in case
31050 targ_options->x_arm_arch_string is NULL which can be the case
31051 when cc1 is invoked directly without passing -march option. */
31052 std::string arch_to_print;
31053 if (targ_options->x_arm_arch_string)
31054 arch_to_print = targ_options->x_arm_arch_string;
31055
31056 if (arch_to_print != arm_last_printed_arch_string)
31057 {
31058 std::string arch_name
31059 = arch_to_print.substr (0, arch_to_print.find ("+"));
31060 asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
31061 const arch_option *arch
31062 = arm_parse_arch_option_name (all_architectures, "-march",
31063 targ_options->x_arm_arch_string);
31064 auto_sbitmap opt_bits (isa_num_bits);
31065
31066 gcc_assert (arch);
31067 if (arch->common.extensions)
31068 {
31069 for (const struct cpu_arch_extension *opt = arch->common.extensions;
31070 opt->name != NULL;
31071 opt++)
31072 {
31073 if (!opt->remove)
31074 {
31075 arm_initialize_isa (opt_bits, opt->isa_bits);
31076 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
31077 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
31078 asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
31079 opt->name);
31080 }
31081 }
31082 }
31083
31084 arm_last_printed_arch_string = arch_to_print;
31085 }
31086
31087 fprintf (stream, "\t.syntax unified\n");
31088
31089 if (TARGET_THUMB)
31090 {
31091 if (is_called_in_ARM_mode (decl)
31092 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
31093 && cfun->is_thunk))
31094 fprintf (stream, "\t.code 32\n");
31095 else if (TARGET_THUMB1)
31096 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31097 else
31098 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31099 }
31100 else
31101 fprintf (stream, "\t.arm\n");
31102
31103 std::string fpu_to_print
31104 = TARGET_SOFT_FLOAT
31105 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31106
31107 if (fpu_to_print != arm_last_printed_arch_string)
31108 {
31109 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31110 arm_last_printed_fpu_string = fpu_to_print;
31111 }
31112
31113 if (TARGET_POKE_FUNCTION_NAME)
31114 arm_poke_function_name (stream, (const char *) name);
31115 }
31116
31117 /* If MEM is in the form of [base+offset], extract the two parts
31118 of address and set to BASE and OFFSET, otherwise return false
31119 after clearing BASE and OFFSET. */
31120
31121 static bool
31122 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31123 {
31124 rtx addr;
31125
31126 gcc_assert (MEM_P (mem));
31127
31128 addr = XEXP (mem, 0);
31129
31130 /* Strip off const from addresses like (const (addr)). */
31131 if (GET_CODE (addr) == CONST)
31132 addr = XEXP (addr, 0);
31133
31134 if (GET_CODE (addr) == REG)
31135 {
31136 *base = addr;
31137 *offset = const0_rtx;
31138 return true;
31139 }
31140
31141 if (GET_CODE (addr) == PLUS
31142 && GET_CODE (XEXP (addr, 0)) == REG
31143 && CONST_INT_P (XEXP (addr, 1)))
31144 {
31145 *base = XEXP (addr, 0);
31146 *offset = XEXP (addr, 1);
31147 return true;
31148 }
31149
31150 *base = NULL_RTX;
31151 *offset = NULL_RTX;
31152
31153 return false;
31154 }
31155
31156 /* If INSN is a load or store of address in the form of [base+offset],
31157 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
31158 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
31159 otherwise return FALSE. */
31160
31161 static bool
31162 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31163 {
31164 rtx x, dest, src;
31165
31166 gcc_assert (INSN_P (insn));
31167 x = PATTERN (insn);
31168 if (GET_CODE (x) != SET)
31169 return false;
31170
31171 src = SET_SRC (x);
31172 dest = SET_DEST (x);
31173 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31174 {
31175 *is_load = false;
31176 extract_base_offset_in_addr (dest, base, offset);
31177 }
31178 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31179 {
31180 *is_load = true;
31181 extract_base_offset_in_addr (src, base, offset);
31182 }
31183 else
31184 return false;
31185
31186 return (*base != NULL_RTX && *offset != NULL_RTX);
31187 }
31188
31189 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31190
31191 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31192 and PRI are only calculated for these instructions. For other instruction,
31193 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
31194 instruction fusion can be supported by returning different priorities.
31195
31196 It's important that irrelevant instructions get the largest FUSION_PRI. */
31197
31198 static void
31199 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31200 int *fusion_pri, int *pri)
31201 {
31202 int tmp, off_val;
31203 bool is_load;
31204 rtx base, offset;
31205
31206 gcc_assert (INSN_P (insn));
31207
31208 tmp = max_pri - 1;
31209 if (!fusion_load_store (insn, &base, &offset, &is_load))
31210 {
31211 *pri = tmp;
31212 *fusion_pri = tmp;
31213 return;
31214 }
31215
31216 /* Load goes first. */
31217 if (is_load)
31218 *fusion_pri = tmp - 1;
31219 else
31220 *fusion_pri = tmp - 2;
31221
31222 tmp /= 2;
31223
31224 /* INSN with smaller base register goes first. */
31225 tmp -= ((REGNO (base) & 0xff) << 20);
31226
31227 /* INSN with smaller offset goes first. */
31228 off_val = (int)(INTVAL (offset));
31229 if (off_val >= 0)
31230 tmp -= (off_val & 0xfffff);
31231 else
31232 tmp += ((- off_val) & 0xfffff);
31233
31234 *pri = tmp;
31235 return;
31236 }
31237
31238
31239 /* Construct and return a PARALLEL RTX vector with elements numbering the
31240 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31241 the vector - from the perspective of the architecture. This does not
31242 line up with GCC's perspective on lane numbers, so we end up with
31243 different masks depending on our target endian-ness. The diagram
31244 below may help. We must draw the distinction when building masks
31245 which select one half of the vector. An instruction selecting
31246 architectural low-lanes for a big-endian target, must be described using
31247 a mask selecting GCC high-lanes.
31248
31249 Big-Endian Little-Endian
31250
31251 GCC 0 1 2 3 3 2 1 0
31252 | x | x | x | x | | x | x | x | x |
31253 Architecture 3 2 1 0 3 2 1 0
31254
31255 Low Mask: { 2, 3 } { 0, 1 }
31256 High Mask: { 0, 1 } { 2, 3 }
31257 */
31258
31259 rtx
31260 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31261 {
31262 int nunits = GET_MODE_NUNITS (mode);
31263 rtvec v = rtvec_alloc (nunits / 2);
31264 int high_base = nunits / 2;
31265 int low_base = 0;
31266 int base;
31267 rtx t1;
31268 int i;
31269
31270 if (BYTES_BIG_ENDIAN)
31271 base = high ? low_base : high_base;
31272 else
31273 base = high ? high_base : low_base;
31274
31275 for (i = 0; i < nunits / 2; i++)
31276 RTVEC_ELT (v, i) = GEN_INT (base + i);
31277
31278 t1 = gen_rtx_PARALLEL (mode, v);
31279 return t1;
31280 }
31281
31282 /* Check OP for validity as a PARALLEL RTX vector with elements
31283 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31284 from the perspective of the architecture. See the diagram above
31285 arm_simd_vect_par_cnst_half_p for more details. */
31286
31287 bool
31288 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31289 bool high)
31290 {
31291 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31292 HOST_WIDE_INT count_op = XVECLEN (op, 0);
31293 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31294 int i = 0;
31295
31296 if (!VECTOR_MODE_P (mode))
31297 return false;
31298
31299 if (count_op != count_ideal)
31300 return false;
31301
31302 for (i = 0; i < count_ideal; i++)
31303 {
31304 rtx elt_op = XVECEXP (op, 0, i);
31305 rtx elt_ideal = XVECEXP (ideal, 0, i);
31306
31307 if (!CONST_INT_P (elt_op)
31308 || INTVAL (elt_ideal) != INTVAL (elt_op))
31309 return false;
31310 }
31311 return true;
31312 }
31313
31314 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31315 in Thumb1. */
31316 static bool
31317 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31318 const_tree)
31319 {
31320 /* For now, we punt and not handle this for TARGET_THUMB1. */
31321 if (vcall_offset && TARGET_THUMB1)
31322 return false;
31323
31324 /* Otherwise ok. */
31325 return true;
31326 }
31327
31328 /* Generate RTL for a conditional branch with rtx comparison CODE in
31329 mode CC_MODE. The destination of the unlikely conditional branch
31330 is LABEL_REF. */
31331
31332 void
31333 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31334 rtx label_ref)
31335 {
31336 rtx x;
31337 x = gen_rtx_fmt_ee (code, VOIDmode,
31338 gen_rtx_REG (cc_mode, CC_REGNUM),
31339 const0_rtx);
31340
31341 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31342 gen_rtx_LABEL_REF (VOIDmode, label_ref),
31343 pc_rtx);
31344 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31345 }
31346
31347 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31348
31349 For pure-code sections there is no letter code for this attribute, so
31350 output all the section flags numerically when this is needed. */
31351
31352 static bool
31353 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31354 {
31355
31356 if (flags & SECTION_ARM_PURECODE)
31357 {
31358 *num = 0x20000000;
31359
31360 if (!(flags & SECTION_DEBUG))
31361 *num |= 0x2;
31362 if (flags & SECTION_EXCLUDE)
31363 *num |= 0x80000000;
31364 if (flags & SECTION_WRITE)
31365 *num |= 0x1;
31366 if (flags & SECTION_CODE)
31367 *num |= 0x4;
31368 if (flags & SECTION_MERGE)
31369 *num |= 0x10;
31370 if (flags & SECTION_STRINGS)
31371 *num |= 0x20;
31372 if (flags & SECTION_TLS)
31373 *num |= 0x400;
31374 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31375 *num |= 0x200;
31376
31377 return true;
31378 }
31379
31380 return false;
31381 }
31382
31383 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31384
31385 If pure-code is passed as an option, make sure all functions are in
31386 sections that have the SHF_ARM_PURECODE attribute. */
31387
31388 static section *
31389 arm_function_section (tree decl, enum node_frequency freq,
31390 bool startup, bool exit)
31391 {
31392 const char * section_name;
31393 section * sec;
31394
31395 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31396 return default_function_section (decl, freq, startup, exit);
31397
31398 if (!target_pure_code)
31399 return default_function_section (decl, freq, startup, exit);
31400
31401
31402 section_name = DECL_SECTION_NAME (decl);
31403
31404 /* If a function is not in a named section then it falls under the 'default'
31405 text section, also known as '.text'. We can preserve previous behavior as
31406 the default text section already has the SHF_ARM_PURECODE section
31407 attribute. */
31408 if (!section_name)
31409 {
31410 section *default_sec = default_function_section (decl, freq, startup,
31411 exit);
31412
31413 /* If default_sec is not null, then it must be a special section like for
31414 example .text.startup. We set the pure-code attribute and return the
31415 same section to preserve existing behavior. */
31416 if (default_sec)
31417 default_sec->common.flags |= SECTION_ARM_PURECODE;
31418 return default_sec;
31419 }
31420
31421 /* Otherwise look whether a section has already been created with
31422 'section_name'. */
31423 sec = get_named_section (decl, section_name, 0);
31424 if (!sec)
31425 /* If that is not the case passing NULL as the section's name to
31426 'get_named_section' will create a section with the declaration's
31427 section name. */
31428 sec = get_named_section (decl, NULL, 0);
31429
31430 /* Set the SHF_ARM_PURECODE attribute. */
31431 sec->common.flags |= SECTION_ARM_PURECODE;
31432
31433 return sec;
31434 }
31435
31436 /* Implements the TARGET_SECTION_FLAGS hook.
31437
31438 If DECL is a function declaration and pure-code is passed as an option
31439 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31440 section's name and RELOC indicates whether the declarations initializer may
31441 contain runtime relocations. */
31442
31443 static unsigned int
31444 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31445 {
31446 unsigned int flags = default_section_type_flags (decl, name, reloc);
31447
31448 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31449 flags |= SECTION_ARM_PURECODE;
31450
31451 return flags;
31452 }
31453
31454 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31455
31456 static void
31457 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31458 rtx op0, rtx op1,
31459 rtx *quot_p, rtx *rem_p)
31460 {
31461 if (mode == SImode)
31462 gcc_assert (!TARGET_IDIV);
31463
31464 scalar_int_mode libval_mode
31465 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31466
31467 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31468 libval_mode,
31469 op0, GET_MODE (op0),
31470 op1, GET_MODE (op1));
31471
31472 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31473 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31474 GET_MODE_SIZE (mode));
31475
31476 gcc_assert (quotient);
31477 gcc_assert (remainder);
31478
31479 *quot_p = quotient;
31480 *rem_p = remainder;
31481 }
31482
31483 /* This function checks for the availability of the coprocessor builtin passed
31484 in BUILTIN for the current target. Returns true if it is available and
31485 false otherwise. If a BUILTIN is passed for which this function has not
31486 been implemented it will cause an exception. */
31487
31488 bool
31489 arm_coproc_builtin_available (enum unspecv builtin)
31490 {
31491 /* None of these builtins are available in Thumb mode if the target only
31492 supports Thumb-1. */
31493 if (TARGET_THUMB1)
31494 return false;
31495
31496 switch (builtin)
31497 {
31498 case VUNSPEC_CDP:
31499 case VUNSPEC_LDC:
31500 case VUNSPEC_LDCL:
31501 case VUNSPEC_STC:
31502 case VUNSPEC_STCL:
31503 case VUNSPEC_MCR:
31504 case VUNSPEC_MRC:
31505 if (arm_arch4)
31506 return true;
31507 break;
31508 case VUNSPEC_CDP2:
31509 case VUNSPEC_LDC2:
31510 case VUNSPEC_LDC2L:
31511 case VUNSPEC_STC2:
31512 case VUNSPEC_STC2L:
31513 case VUNSPEC_MCR2:
31514 case VUNSPEC_MRC2:
31515 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31516 ARMv8-{A,M}. */
31517 if (arm_arch5t)
31518 return true;
31519 break;
31520 case VUNSPEC_MCRR:
31521 case VUNSPEC_MRRC:
31522 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31523 ARMv8-{A,M}. */
31524 if (arm_arch6 || arm_arch5te)
31525 return true;
31526 break;
31527 case VUNSPEC_MCRR2:
31528 case VUNSPEC_MRRC2:
31529 if (arm_arch6)
31530 return true;
31531 break;
31532 default:
31533 gcc_unreachable ();
31534 }
31535 return false;
31536 }
31537
31538 /* This function returns true if OP is a valid memory operand for the ldc and
31539 stc coprocessor instructions and false otherwise. */
31540
31541 bool
31542 arm_coproc_ldc_stc_legitimate_address (rtx op)
31543 {
31544 HOST_WIDE_INT range;
31545 /* Has to be a memory operand. */
31546 if (!MEM_P (op))
31547 return false;
31548
31549 op = XEXP (op, 0);
31550
31551 /* We accept registers. */
31552 if (REG_P (op))
31553 return true;
31554
31555 switch GET_CODE (op)
31556 {
31557 case PLUS:
31558 {
31559 /* Or registers with an offset. */
31560 if (!REG_P (XEXP (op, 0)))
31561 return false;
31562
31563 op = XEXP (op, 1);
31564
31565 /* The offset must be an immediate though. */
31566 if (!CONST_INT_P (op))
31567 return false;
31568
31569 range = INTVAL (op);
31570
31571 /* Within the range of [-1020,1020]. */
31572 if (!IN_RANGE (range, -1020, 1020))
31573 return false;
31574
31575 /* And a multiple of 4. */
31576 return (range % 4) == 0;
31577 }
31578 case PRE_INC:
31579 case POST_INC:
31580 case PRE_DEC:
31581 case POST_DEC:
31582 return REG_P (XEXP (op, 0));
31583 default:
31584 gcc_unreachable ();
31585 }
31586 return false;
31587 }
31588
31589 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31590
31591 In VFPv1, VFP registers could only be accessed in the mode they were
31592 set, so subregs would be invalid there. However, we don't support
31593 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31594
31595 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31596 VFP registers in little-endian order. We can't describe that accurately to
31597 GCC, so avoid taking subregs of such values.
31598
31599 The only exception is going from a 128-bit to a 64-bit type. In that
31600 case the data layout happens to be consistent for big-endian, so we
31601 explicitly allow that case. */
31602
31603 static bool
31604 arm_can_change_mode_class (machine_mode from, machine_mode to,
31605 reg_class_t rclass)
31606 {
31607 if (TARGET_BIG_END
31608 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31609 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31610 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31611 && reg_classes_intersect_p (VFP_REGS, rclass))
31612 return false;
31613 return true;
31614 }
31615
31616 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31617 strcpy from constants will be faster. */
31618
31619 static HOST_WIDE_INT
31620 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31621 {
31622 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31623 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31624 return MAX (align, BITS_PER_WORD * factor);
31625 return align;
31626 }
31627
31628 /* Emit a speculation barrier on target architectures that do not have
31629 DSB/ISB directly. Such systems probably don't need a barrier
31630 themselves, but if the code is ever run on a later architecture, it
31631 might become a problem. */
31632 void
31633 arm_emit_speculation_barrier_function ()
31634 {
31635 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
31636 }
31637
31638 #if CHECKING_P
31639 namespace selftest {
31640
31641 /* Scan the static data tables generated by parsecpu.awk looking for
31642 potential issues with the data. We primarily check for
31643 inconsistencies in the option extensions at present (extensions
31644 that duplicate others but aren't marked as aliases). Furthermore,
31645 for correct canonicalization later options must never be a subset
31646 of an earlier option. Any extension should also only specify other
31647 feature bits and never an architecture bit. The architecture is inferred
31648 from the declaration of the extension. */
31649 static void
31650 arm_test_cpu_arch_data (void)
31651 {
31652 const arch_option *arch;
31653 const cpu_option *cpu;
31654 auto_sbitmap target_isa (isa_num_bits);
31655 auto_sbitmap isa1 (isa_num_bits);
31656 auto_sbitmap isa2 (isa_num_bits);
31657
31658 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31659 {
31660 const cpu_arch_extension *ext1, *ext2;
31661
31662 if (arch->common.extensions == NULL)
31663 continue;
31664
31665 arm_initialize_isa (target_isa, arch->common.isa_bits);
31666
31667 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31668 {
31669 if (ext1->alias)
31670 continue;
31671
31672 arm_initialize_isa (isa1, ext1->isa_bits);
31673 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31674 {
31675 if (ext2->alias || ext1->remove != ext2->remove)
31676 continue;
31677
31678 arm_initialize_isa (isa2, ext2->isa_bits);
31679 /* If the option is a subset of the parent option, it doesn't
31680 add anything and so isn't useful. */
31681 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31682
31683 /* If the extension specifies any architectural bits then
31684 disallow it. Extensions should only specify feature bits. */
31685 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31686 }
31687 }
31688 }
31689
31690 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31691 {
31692 const cpu_arch_extension *ext1, *ext2;
31693
31694 if (cpu->common.extensions == NULL)
31695 continue;
31696
31697 arm_initialize_isa (target_isa, arch->common.isa_bits);
31698
31699 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31700 {
31701 if (ext1->alias)
31702 continue;
31703
31704 arm_initialize_isa (isa1, ext1->isa_bits);
31705 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31706 {
31707 if (ext2->alias || ext1->remove != ext2->remove)
31708 continue;
31709
31710 arm_initialize_isa (isa2, ext2->isa_bits);
31711 /* If the option is a subset of the parent option, it doesn't
31712 add anything and so isn't useful. */
31713 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31714
31715 /* If the extension specifies any architectural bits then
31716 disallow it. Extensions should only specify feature bits. */
31717 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31718 }
31719 }
31720 }
31721 }
31722
31723 /* Scan the static data tables generated by parsecpu.awk looking for
31724 potential issues with the data. Here we check for consistency between the
31725 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31726 a feature bit that is not defined by any FPU flag. */
31727 static void
31728 arm_test_fpu_data (void)
31729 {
31730 auto_sbitmap isa_all_fpubits (isa_num_bits);
31731 auto_sbitmap fpubits (isa_num_bits);
31732 auto_sbitmap tmpset (isa_num_bits);
31733
31734 static const enum isa_feature fpu_bitlist[]
31735 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31736 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31737
31738 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31739 {
31740 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31741 bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31742 bitmap_clear (isa_all_fpubits);
31743 bitmap_copy (isa_all_fpubits, tmpset);
31744 }
31745
31746 if (!bitmap_empty_p (isa_all_fpubits))
31747 {
31748 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31749 " group that are not defined by any FPU.\n"
31750 " Check your arm-cpus.in.\n");
31751 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31752 }
31753 }
31754
31755 static void
31756 arm_run_selftests (void)
31757 {
31758 arm_test_cpu_arch_data ();
31759 arm_test_fpu_data ();
31760 }
31761 } /* Namespace selftest. */
31762
31763 #undef TARGET_RUN_TARGET_SELFTESTS
31764 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31765 #endif /* CHECKING_P */
31766
31767 struct gcc_target targetm = TARGET_INITIALIZER;
31768
31769 #include "gt-arm.h"