]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
2015-07-07 Andrew MacLeod <amacleod@redhat.com>
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "tree.h"
28 #include "rtl.h"
29 #include "df.h"
30 #include "alias.h"
31 #include "fold-const.h"
32 #include "stringpool.h"
33 #include "stor-layout.h"
34 #include "calls.h"
35 #include "varasm.h"
36 #include "obstack.h"
37 #include "regs.h"
38 #include "insn-config.h"
39 #include "conditions.h"
40 #include "output.h"
41 #include "insn-attr.h"
42 #include "flags.h"
43 #include "reload.h"
44 #include "expmed.h"
45 #include "dojump.h"
46 #include "explow.h"
47 #include "emit-rtl.h"
48 #include "stmt.h"
49 #include "expr.h"
50 #include "insn-codes.h"
51 #include "optabs.h"
52 #include "diagnostic-core.h"
53 #include "recog.h"
54 #include "cfgrtl.h"
55 #include "cfganal.h"
56 #include "lcm.h"
57 #include "cfgbuild.h"
58 #include "cfgcleanup.h"
59 #include "cgraph.h"
60 #include "except.h"
61 #include "tm_p.h"
62 #include "target.h"
63 #include "sched-int.h"
64 #include "debug.h"
65 #include "langhooks.h"
66 #include "intl.h"
67 #include "libfuncs.h"
68 #include "params.h"
69 #include "opts.h"
70 #include "dumpfile.h"
71 #include "gimple-expr.h"
72 #include "target-globals.h"
73 #include "builtins.h"
74 #include "tm-constrs.h"
75 #include "rtl-iter.h"
76
77 /* This file should be included last. */
78 #include "target-def.h"
79
80 /* Forward definitions of types. */
81 typedef struct minipool_node Mnode;
82 typedef struct minipool_fixup Mfix;
83
84 void (*arm_lang_output_object_attributes_hook)(void);
85
86 struct four_ints
87 {
88 int i[4];
89 };
90
91 /* Forward function declarations. */
92 static bool arm_const_not_ok_for_debug_p (rtx);
93 static bool arm_needs_doubleword_align (machine_mode, const_tree);
94 static int arm_compute_static_chain_stack_bytes (void);
95 static arm_stack_offsets *arm_get_frame_offsets (void);
96 static void arm_add_gc_roots (void);
97 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
98 HOST_WIDE_INT, rtx, rtx, int, int);
99 static unsigned bit_count (unsigned long);
100 static int arm_address_register_rtx_p (rtx, int);
101 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
102 static bool is_called_in_ARM_mode (tree);
103 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
104 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
105 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
106 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
107 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
108 inline static int thumb1_index_register_rtx_p (rtx, int);
109 static int thumb_far_jump_used_p (void);
110 static bool thumb_force_lr_save (void);
111 static unsigned arm_size_return_regs (void);
112 static bool arm_assemble_integer (rtx, unsigned int, int);
113 static void arm_print_operand (FILE *, rtx, int);
114 static void arm_print_operand_address (FILE *, rtx);
115 static bool arm_print_operand_punct_valid_p (unsigned char code);
116 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
117 static arm_cc get_arm_condition_code (rtx);
118 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
119 static const char *output_multi_immediate (rtx *, const char *, const char *,
120 int, HOST_WIDE_INT);
121 static const char *shift_op (rtx, HOST_WIDE_INT *);
122 static struct machine_function *arm_init_machine_status (void);
123 static void thumb_exit (FILE *, int);
124 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
125 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
126 static Mnode *add_minipool_forward_ref (Mfix *);
127 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
128 static Mnode *add_minipool_backward_ref (Mfix *);
129 static void assign_minipool_offsets (Mfix *);
130 static void arm_print_value (FILE *, rtx);
131 static void dump_minipool (rtx_insn *);
132 static int arm_barrier_cost (rtx_insn *);
133 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
134 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
135 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
136 machine_mode, rtx);
137 static void arm_reorg (void);
138 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
139 static unsigned long arm_compute_save_reg0_reg12_mask (void);
140 static unsigned long arm_compute_save_reg_mask (void);
141 static unsigned long arm_isr_value (tree);
142 static unsigned long arm_compute_func_type (void);
143 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
144 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
145 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
146 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
147 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
148 #endif
149 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
150 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
151 static int arm_comp_type_attributes (const_tree, const_tree);
152 static void arm_set_default_type_attributes (tree);
153 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
154 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
155 static int optimal_immediate_sequence (enum rtx_code code,
156 unsigned HOST_WIDE_INT val,
157 struct four_ints *return_sequence);
158 static int optimal_immediate_sequence_1 (enum rtx_code code,
159 unsigned HOST_WIDE_INT val,
160 struct four_ints *return_sequence,
161 int i);
162 static int arm_get_strip_length (int);
163 static bool arm_function_ok_for_sibcall (tree, tree);
164 static machine_mode arm_promote_function_mode (const_tree,
165 machine_mode, int *,
166 const_tree, int);
167 static bool arm_return_in_memory (const_tree, const_tree);
168 static rtx arm_function_value (const_tree, const_tree, bool);
169 static rtx arm_libcall_value_1 (machine_mode);
170 static rtx arm_libcall_value (machine_mode, const_rtx);
171 static bool arm_function_value_regno_p (const unsigned int);
172 static void arm_internal_label (FILE *, const char *, unsigned long);
173 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
174 tree);
175 static bool arm_have_conditional_execution (void);
176 static bool arm_cannot_force_const_mem (machine_mode, rtx);
177 static bool arm_legitimate_constant_p (machine_mode, rtx);
178 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
179 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
180 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
181 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
182 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
183 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
184 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
185 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
186 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
187 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
188 static void emit_constant_insn (rtx cond, rtx pattern);
189 static rtx_insn *emit_set_insn (rtx, rtx);
190 static rtx emit_multi_reg_push (unsigned long, unsigned long);
191 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
192 tree, bool);
193 static rtx arm_function_arg (cumulative_args_t, machine_mode,
194 const_tree, bool);
195 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
196 const_tree, bool);
197 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
198 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
199 const_tree);
200 static rtx aapcs_libcall_value (machine_mode);
201 static int aapcs_select_return_coproc (const_tree, const_tree);
202
203 #ifdef OBJECT_FORMAT_ELF
204 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
205 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
206 #endif
207 #ifndef ARM_PE
208 static void arm_encode_section_info (tree, rtx, int);
209 #endif
210
211 static void arm_file_end (void);
212 static void arm_file_start (void);
213 static void arm_insert_attributes (tree, tree *);
214
215 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
216 tree, int *, int);
217 static bool arm_pass_by_reference (cumulative_args_t,
218 machine_mode, const_tree, bool);
219 static bool arm_promote_prototypes (const_tree);
220 static bool arm_default_short_enums (void);
221 static bool arm_align_anon_bitfield (void);
222 static bool arm_return_in_msb (const_tree);
223 static bool arm_must_pass_in_stack (machine_mode, const_tree);
224 static bool arm_return_in_memory (const_tree, const_tree);
225 #if ARM_UNWIND_INFO
226 static void arm_unwind_emit (FILE *, rtx_insn *);
227 static bool arm_output_ttype (rtx);
228 static void arm_asm_emit_except_personality (rtx);
229 static void arm_asm_init_sections (void);
230 #endif
231 static rtx arm_dwarf_register_span (rtx);
232
233 static tree arm_cxx_guard_type (void);
234 static bool arm_cxx_guard_mask_bit (void);
235 static tree arm_get_cookie_size (tree);
236 static bool arm_cookie_has_size (void);
237 static bool arm_cxx_cdtor_returns_this (void);
238 static bool arm_cxx_key_method_may_be_inline (void);
239 static void arm_cxx_determine_class_data_visibility (tree);
240 static bool arm_cxx_class_data_always_comdat (void);
241 static bool arm_cxx_use_aeabi_atexit (void);
242 static void arm_init_libfuncs (void);
243 static tree arm_build_builtin_va_list (void);
244 static void arm_expand_builtin_va_start (tree, rtx);
245 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
246 static void arm_option_override (void);
247 static void arm_set_current_function (tree);
248 static bool arm_can_inline_p (tree, tree);
249 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
250 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
251 static bool arm_macro_fusion_p (void);
252 static bool arm_cannot_copy_insn_p (rtx_insn *);
253 static int arm_issue_rate (void);
254 static int arm_first_cycle_multipass_dfa_lookahead (void);
255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
257 static bool arm_output_addr_const_extra (FILE *, rtx);
258 static bool arm_allocate_stack_slots_for_args (void);
259 static bool arm_warn_func_return (tree);
260 static const char *arm_invalid_parameter_type (const_tree t);
261 static const char *arm_invalid_return_type (const_tree t);
262 static tree arm_promoted_type (const_tree t);
263 static tree arm_convert_to_type (tree type, tree expr);
264 static bool arm_scalar_mode_supported_p (machine_mode);
265 static bool arm_frame_pointer_required (void);
266 static bool arm_can_eliminate (const int, const int);
267 static void arm_asm_trampoline_template (FILE *);
268 static void arm_trampoline_init (rtx, tree, rtx);
269 static rtx arm_trampoline_adjust_address (rtx);
270 static rtx arm_pic_static_addr (rtx orig, rtx reg);
271 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
272 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
273 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
274 static bool arm_array_mode_supported_p (machine_mode,
275 unsigned HOST_WIDE_INT);
276 static machine_mode arm_preferred_simd_mode (machine_mode);
277 static bool arm_class_likely_spilled_p (reg_class_t);
278 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
279 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
280 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
281 const_tree type,
282 int misalignment,
283 bool is_packed);
284 static void arm_conditional_register_usage (void);
285 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
286 static unsigned int arm_autovectorize_vector_sizes (void);
287 static int arm_default_branch_cost (bool, bool);
288 static int arm_cortex_a5_branch_cost (bool, bool);
289 static int arm_cortex_m_branch_cost (bool, bool);
290 static int arm_cortex_m7_branch_cost (bool, bool);
291
292 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
293 const unsigned char *sel);
294
295 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
296
297 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
298 tree vectype,
299 int misalign ATTRIBUTE_UNUSED);
300 static unsigned arm_add_stmt_cost (void *data, int count,
301 enum vect_cost_for_stmt kind,
302 struct _stmt_vec_info *stmt_info,
303 int misalign,
304 enum vect_cost_model_location where);
305
306 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
307 bool op0_preserve_value);
308 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
309
310 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
311 \f
312 /* Table of machine attributes. */
313 static const struct attribute_spec arm_attribute_table[] =
314 {
315 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
316 affects_type_identity } */
317 /* Function calls made to this symbol must be done indirectly, because
318 it may lie outside of the 26 bit addressing range of a normal function
319 call. */
320 { "long_call", 0, 0, false, true, true, NULL, false },
321 /* Whereas these functions are always known to reside within the 26 bit
322 addressing range. */
323 { "short_call", 0, 0, false, true, true, NULL, false },
324 /* Specify the procedure call conventions for a function. */
325 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
326 false },
327 /* Interrupt Service Routines have special prologue and epilogue requirements. */
328 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
329 false },
330 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
331 false },
332 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
333 false },
334 #ifdef ARM_PE
335 /* ARM/PE has three new attributes:
336 interfacearm - ?
337 dllexport - for exporting a function/variable that will live in a dll
338 dllimport - for importing a function/variable from a dll
339
340 Microsoft allows multiple declspecs in one __declspec, separating
341 them with spaces. We do NOT support this. Instead, use __declspec
342 multiple times.
343 */
344 { "dllimport", 0, 0, true, false, false, NULL, false },
345 { "dllexport", 0, 0, true, false, false, NULL, false },
346 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
347 false },
348 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
349 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
350 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
351 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
352 false },
353 #endif
354 { NULL, 0, 0, false, false, false, NULL, false }
355 };
356 \f
357 /* Initialize the GCC target structure. */
358 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
359 #undef TARGET_MERGE_DECL_ATTRIBUTES
360 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
361 #endif
362
363 #undef TARGET_LEGITIMIZE_ADDRESS
364 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
365
366 #undef TARGET_LRA_P
367 #define TARGET_LRA_P hook_bool_void_true
368
369 #undef TARGET_ATTRIBUTE_TABLE
370 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
371
372 #undef TARGET_INSERT_ATTRIBUTES
373 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
374
375 #undef TARGET_ASM_FILE_START
376 #define TARGET_ASM_FILE_START arm_file_start
377 #undef TARGET_ASM_FILE_END
378 #define TARGET_ASM_FILE_END arm_file_end
379
380 #undef TARGET_ASM_ALIGNED_SI_OP
381 #define TARGET_ASM_ALIGNED_SI_OP NULL
382 #undef TARGET_ASM_INTEGER
383 #define TARGET_ASM_INTEGER arm_assemble_integer
384
385 #undef TARGET_PRINT_OPERAND
386 #define TARGET_PRINT_OPERAND arm_print_operand
387 #undef TARGET_PRINT_OPERAND_ADDRESS
388 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
389 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
390 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
391
392 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
393 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
394
395 #undef TARGET_ASM_FUNCTION_PROLOGUE
396 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
397
398 #undef TARGET_ASM_FUNCTION_EPILOGUE
399 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
400
401 #undef TARGET_CAN_INLINE_P
402 #define TARGET_CAN_INLINE_P arm_can_inline_p
403
404 #undef TARGET_OPTION_OVERRIDE
405 #define TARGET_OPTION_OVERRIDE arm_option_override
406
407 #undef TARGET_COMP_TYPE_ATTRIBUTES
408 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
409
410 #undef TARGET_SCHED_MACRO_FUSION_P
411 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
412
413 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
414 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
415
416 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
417 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
418
419 #undef TARGET_SCHED_ADJUST_COST
420 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
421
422 #undef TARGET_SET_CURRENT_FUNCTION
423 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
424
425 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
426 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
427
428 #undef TARGET_SCHED_REORDER
429 #define TARGET_SCHED_REORDER arm_sched_reorder
430
431 #undef TARGET_REGISTER_MOVE_COST
432 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
433
434 #undef TARGET_MEMORY_MOVE_COST
435 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
436
437 #undef TARGET_ENCODE_SECTION_INFO
438 #ifdef ARM_PE
439 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
440 #else
441 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
442 #endif
443
444 #undef TARGET_STRIP_NAME_ENCODING
445 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
446
447 #undef TARGET_ASM_INTERNAL_LABEL
448 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
449
450 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
451 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
452
453 #undef TARGET_FUNCTION_VALUE
454 #define TARGET_FUNCTION_VALUE arm_function_value
455
456 #undef TARGET_LIBCALL_VALUE
457 #define TARGET_LIBCALL_VALUE arm_libcall_value
458
459 #undef TARGET_FUNCTION_VALUE_REGNO_P
460 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
461
462 #undef TARGET_ASM_OUTPUT_MI_THUNK
463 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
464 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
465 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
466
467 #undef TARGET_RTX_COSTS
468 #define TARGET_RTX_COSTS arm_rtx_costs
469 #undef TARGET_ADDRESS_COST
470 #define TARGET_ADDRESS_COST arm_address_cost
471
472 #undef TARGET_SHIFT_TRUNCATION_MASK
473 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
474 #undef TARGET_VECTOR_MODE_SUPPORTED_P
475 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
476 #undef TARGET_ARRAY_MODE_SUPPORTED_P
477 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
478 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
479 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
480 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
481 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
482 arm_autovectorize_vector_sizes
483
484 #undef TARGET_MACHINE_DEPENDENT_REORG
485 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
486
487 #undef TARGET_INIT_BUILTINS
488 #define TARGET_INIT_BUILTINS arm_init_builtins
489 #undef TARGET_EXPAND_BUILTIN
490 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
491 #undef TARGET_BUILTIN_DECL
492 #define TARGET_BUILTIN_DECL arm_builtin_decl
493
494 #undef TARGET_INIT_LIBFUNCS
495 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
496
497 #undef TARGET_PROMOTE_FUNCTION_MODE
498 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
499 #undef TARGET_PROMOTE_PROTOTYPES
500 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
501 #undef TARGET_PASS_BY_REFERENCE
502 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
503 #undef TARGET_ARG_PARTIAL_BYTES
504 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
505 #undef TARGET_FUNCTION_ARG
506 #define TARGET_FUNCTION_ARG arm_function_arg
507 #undef TARGET_FUNCTION_ARG_ADVANCE
508 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
509 #undef TARGET_FUNCTION_ARG_BOUNDARY
510 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
511
512 #undef TARGET_SETUP_INCOMING_VARARGS
513 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
514
515 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
516 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
517
518 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
519 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
520 #undef TARGET_TRAMPOLINE_INIT
521 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
522 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
523 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
524
525 #undef TARGET_WARN_FUNC_RETURN
526 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
527
528 #undef TARGET_DEFAULT_SHORT_ENUMS
529 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
530
531 #undef TARGET_ALIGN_ANON_BITFIELD
532 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
533
534 #undef TARGET_NARROW_VOLATILE_BITFIELD
535 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
536
537 #undef TARGET_CXX_GUARD_TYPE
538 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
539
540 #undef TARGET_CXX_GUARD_MASK_BIT
541 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
542
543 #undef TARGET_CXX_GET_COOKIE_SIZE
544 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
545
546 #undef TARGET_CXX_COOKIE_HAS_SIZE
547 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
548
549 #undef TARGET_CXX_CDTOR_RETURNS_THIS
550 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
551
552 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
553 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
554
555 #undef TARGET_CXX_USE_AEABI_ATEXIT
556 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
557
558 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
559 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
560 arm_cxx_determine_class_data_visibility
561
562 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
563 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
564
565 #undef TARGET_RETURN_IN_MSB
566 #define TARGET_RETURN_IN_MSB arm_return_in_msb
567
568 #undef TARGET_RETURN_IN_MEMORY
569 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
570
571 #undef TARGET_MUST_PASS_IN_STACK
572 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
573
574 #if ARM_UNWIND_INFO
575 #undef TARGET_ASM_UNWIND_EMIT
576 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
577
578 /* EABI unwinding tables use a different format for the typeinfo tables. */
579 #undef TARGET_ASM_TTYPE
580 #define TARGET_ASM_TTYPE arm_output_ttype
581
582 #undef TARGET_ARM_EABI_UNWINDER
583 #define TARGET_ARM_EABI_UNWINDER true
584
585 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
586 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
587
588 #undef TARGET_ASM_INIT_SECTIONS
589 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
590 #endif /* ARM_UNWIND_INFO */
591
592 #undef TARGET_DWARF_REGISTER_SPAN
593 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
594
595 #undef TARGET_CANNOT_COPY_INSN_P
596 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
597
598 #ifdef HAVE_AS_TLS
599 #undef TARGET_HAVE_TLS
600 #define TARGET_HAVE_TLS true
601 #endif
602
603 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
604 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
605
606 #undef TARGET_LEGITIMATE_CONSTANT_P
607 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
608
609 #undef TARGET_CANNOT_FORCE_CONST_MEM
610 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
611
612 #undef TARGET_MAX_ANCHOR_OFFSET
613 #define TARGET_MAX_ANCHOR_OFFSET 4095
614
615 /* The minimum is set such that the total size of the block
616 for a particular anchor is -4088 + 1 + 4095 bytes, which is
617 divisible by eight, ensuring natural spacing of anchors. */
618 #undef TARGET_MIN_ANCHOR_OFFSET
619 #define TARGET_MIN_ANCHOR_OFFSET -4088
620
621 #undef TARGET_SCHED_ISSUE_RATE
622 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
623
624 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
625 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
626 arm_first_cycle_multipass_dfa_lookahead
627
628 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
629 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
630 arm_first_cycle_multipass_dfa_lookahead_guard
631
632 #undef TARGET_MANGLE_TYPE
633 #define TARGET_MANGLE_TYPE arm_mangle_type
634
635 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
636 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
637
638 #undef TARGET_BUILD_BUILTIN_VA_LIST
639 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
640 #undef TARGET_EXPAND_BUILTIN_VA_START
641 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
642 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
643 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
644
645 #ifdef HAVE_AS_TLS
646 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
647 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
648 #endif
649
650 #undef TARGET_LEGITIMATE_ADDRESS_P
651 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
652
653 #undef TARGET_PREFERRED_RELOAD_CLASS
654 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
655
656 #undef TARGET_INVALID_PARAMETER_TYPE
657 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
658
659 #undef TARGET_INVALID_RETURN_TYPE
660 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
661
662 #undef TARGET_PROMOTED_TYPE
663 #define TARGET_PROMOTED_TYPE arm_promoted_type
664
665 #undef TARGET_CONVERT_TO_TYPE
666 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
667
668 #undef TARGET_SCALAR_MODE_SUPPORTED_P
669 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
670
671 #undef TARGET_FRAME_POINTER_REQUIRED
672 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
673
674 #undef TARGET_CAN_ELIMINATE
675 #define TARGET_CAN_ELIMINATE arm_can_eliminate
676
677 #undef TARGET_CONDITIONAL_REGISTER_USAGE
678 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
679
680 #undef TARGET_CLASS_LIKELY_SPILLED_P
681 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
682
683 #undef TARGET_VECTORIZE_BUILTINS
684 #define TARGET_VECTORIZE_BUILTINS
685
686 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
687 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
688 arm_builtin_vectorized_function
689
690 #undef TARGET_VECTOR_ALIGNMENT
691 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
692
693 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
694 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
695 arm_vector_alignment_reachable
696
697 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
698 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
699 arm_builtin_support_vector_misalignment
700
701 #undef TARGET_PREFERRED_RENAME_CLASS
702 #define TARGET_PREFERRED_RENAME_CLASS \
703 arm_preferred_rename_class
704
705 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
706 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
707 arm_vectorize_vec_perm_const_ok
708
709 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
710 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
711 arm_builtin_vectorization_cost
712 #undef TARGET_VECTORIZE_ADD_STMT_COST
713 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
714
715 #undef TARGET_CANONICALIZE_COMPARISON
716 #define TARGET_CANONICALIZE_COMPARISON \
717 arm_canonicalize_comparison
718
719 #undef TARGET_ASAN_SHADOW_OFFSET
720 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
721
722 #undef MAX_INSN_PER_IT_BLOCK
723 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
724
725 #undef TARGET_CAN_USE_DOLOOP_P
726 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
727
728 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
729 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
730
731 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
732 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
733
734 #undef TARGET_SCHED_FUSION_PRIORITY
735 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
736
737 struct gcc_target targetm = TARGET_INITIALIZER;
738 \f
739 /* Obstack for minipool constant handling. */
740 static struct obstack minipool_obstack;
741 static char * minipool_startobj;
742
743 /* The maximum number of insns skipped which
744 will be conditionalised if possible. */
745 static int max_insns_skipped = 5;
746
747 extern FILE * asm_out_file;
748
749 /* True if we are currently building a constant table. */
750 int making_const_table;
751
752 /* The processor for which instructions should be scheduled. */
753 enum processor_type arm_tune = arm_none;
754
755 /* The current tuning set. */
756 const struct tune_params *current_tune;
757
758 /* Which floating point hardware to schedule for. */
759 int arm_fpu_attr;
760
761 /* Which floating popint hardware to use. */
762 const struct arm_fpu_desc *arm_fpu_desc;
763
764 /* Used for Thumb call_via trampolines. */
765 rtx thumb_call_via_label[14];
766 static int thumb_call_reg_needed;
767
768 /* The bits in this mask specify which
769 instructions we are allowed to generate. */
770 unsigned long insn_flags = 0;
771
772 /* The bits in this mask specify which instruction scheduling options should
773 be used. */
774 unsigned long tune_flags = 0;
775
776 /* The highest ARM architecture version supported by the
777 target. */
778 enum base_architecture arm_base_arch = BASE_ARCH_0;
779
780 /* The following are used in the arm.md file as equivalents to bits
781 in the above two flag variables. */
782
783 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
784 int arm_arch3m = 0;
785
786 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
787 int arm_arch4 = 0;
788
789 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
790 int arm_arch4t = 0;
791
792 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
793 int arm_arch5 = 0;
794
795 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
796 int arm_arch5e = 0;
797
798 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
799 int arm_arch6 = 0;
800
801 /* Nonzero if this chip supports the ARM 6K extensions. */
802 int arm_arch6k = 0;
803
804 /* Nonzero if instructions present in ARMv6-M can be used. */
805 int arm_arch6m = 0;
806
807 /* Nonzero if this chip supports the ARM 7 extensions. */
808 int arm_arch7 = 0;
809
810 /* Nonzero if instructions not present in the 'M' profile can be used. */
811 int arm_arch_notm = 0;
812
813 /* Nonzero if instructions present in ARMv7E-M can be used. */
814 int arm_arch7em = 0;
815
816 /* Nonzero if instructions present in ARMv8 can be used. */
817 int arm_arch8 = 0;
818
819 /* Nonzero if this chip can benefit from load scheduling. */
820 int arm_ld_sched = 0;
821
822 /* Nonzero if this chip is a StrongARM. */
823 int arm_tune_strongarm = 0;
824
825 /* Nonzero if this chip supports Intel Wireless MMX technology. */
826 int arm_arch_iwmmxt = 0;
827
828 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
829 int arm_arch_iwmmxt2 = 0;
830
831 /* Nonzero if this chip is an XScale. */
832 int arm_arch_xscale = 0;
833
834 /* Nonzero if tuning for XScale */
835 int arm_tune_xscale = 0;
836
837 /* Nonzero if we want to tune for stores that access the write-buffer.
838 This typically means an ARM6 or ARM7 with MMU or MPU. */
839 int arm_tune_wbuf = 0;
840
841 /* Nonzero if tuning for Cortex-A9. */
842 int arm_tune_cortex_a9 = 0;
843
844 /* Nonzero if we should define __THUMB_INTERWORK__ in the
845 preprocessor.
846 XXX This is a bit of a hack, it's intended to help work around
847 problems in GLD which doesn't understand that armv5t code is
848 interworking clean. */
849 int arm_cpp_interwork = 0;
850
851 /* Nonzero if chip supports Thumb 2. */
852 int arm_arch_thumb2;
853
854 /* Nonzero if chip supports integer division instruction. */
855 int arm_arch_arm_hwdiv;
856 int arm_arch_thumb_hwdiv;
857
858 /* Nonzero if chip disallows volatile memory access in IT block. */
859 int arm_arch_no_volatile_ce;
860
861 /* Nonzero if we should use Neon to handle 64-bits operations rather
862 than core registers. */
863 int prefer_neon_for_64bits = 0;
864
865 /* Nonzero if we shouldn't use literal pools. */
866 bool arm_disable_literal_pool = false;
867
868 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
869 we must report the mode of the memory reference from
870 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
871 machine_mode output_memory_reference_mode;
872
873 /* The register number to be used for the PIC offset register. */
874 unsigned arm_pic_register = INVALID_REGNUM;
875
876 enum arm_pcs arm_pcs_default;
877
878 /* For an explanation of these variables, see final_prescan_insn below. */
879 int arm_ccfsm_state;
880 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
881 enum arm_cond_code arm_current_cc;
882
883 rtx arm_target_insn;
884 int arm_target_label;
885 /* The number of conditionally executed insns, including the current insn. */
886 int arm_condexec_count = 0;
887 /* A bitmask specifying the patterns for the IT block.
888 Zero means do not output an IT block before this insn. */
889 int arm_condexec_mask = 0;
890 /* The number of bits used in arm_condexec_mask. */
891 int arm_condexec_masklen = 0;
892
893 /* Nonzero if chip supports the ARMv8 CRC instructions. */
894 int arm_arch_crc = 0;
895
896 /* Nonzero if the core has a very small, high-latency, multiply unit. */
897 int arm_m_profile_small_mul = 0;
898
899 /* The condition codes of the ARM, and the inverse function. */
900 static const char * const arm_condition_codes[] =
901 {
902 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
903 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
904 };
905
906 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
907 int arm_regs_in_sequence[] =
908 {
909 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
910 };
911
912 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
913 #define streq(string1, string2) (strcmp (string1, string2) == 0)
914
915 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
916 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
917 | (1 << PIC_OFFSET_TABLE_REGNUM)))
918 \f
919 /* Initialization code. */
920
921 struct processors
922 {
923 const char *const name;
924 enum processor_type core;
925 const char *arch;
926 enum base_architecture base_arch;
927 const unsigned long flags;
928 const struct tune_params *const tune;
929 };
930
931
932 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
933 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
934 { \
935 num_slots, \
936 l1_size, \
937 l1_line_size \
938 }
939
940 /* arm generic vectorizer costs. */
941 static const
942 struct cpu_vec_costs arm_default_vec_cost = {
943 1, /* scalar_stmt_cost. */
944 1, /* scalar load_cost. */
945 1, /* scalar_store_cost. */
946 1, /* vec_stmt_cost. */
947 1, /* vec_to_scalar_cost. */
948 1, /* scalar_to_vec_cost. */
949 1, /* vec_align_load_cost. */
950 1, /* vec_unalign_load_cost. */
951 1, /* vec_unalign_store_cost. */
952 1, /* vec_store_cost. */
953 3, /* cond_taken_branch_cost. */
954 1, /* cond_not_taken_branch_cost. */
955 };
956
957 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
958 #include "aarch-cost-tables.h"
959
960
961
962 const struct cpu_cost_table cortexa9_extra_costs =
963 {
964 /* ALU */
965 {
966 0, /* arith. */
967 0, /* logical. */
968 0, /* shift. */
969 COSTS_N_INSNS (1), /* shift_reg. */
970 COSTS_N_INSNS (1), /* arith_shift. */
971 COSTS_N_INSNS (2), /* arith_shift_reg. */
972 0, /* log_shift. */
973 COSTS_N_INSNS (1), /* log_shift_reg. */
974 COSTS_N_INSNS (1), /* extend. */
975 COSTS_N_INSNS (2), /* extend_arith. */
976 COSTS_N_INSNS (1), /* bfi. */
977 COSTS_N_INSNS (1), /* bfx. */
978 0, /* clz. */
979 0, /* rev. */
980 0, /* non_exec. */
981 true /* non_exec_costs_exec. */
982 },
983 {
984 /* MULT SImode */
985 {
986 COSTS_N_INSNS (3), /* simple. */
987 COSTS_N_INSNS (3), /* flag_setting. */
988 COSTS_N_INSNS (2), /* extend. */
989 COSTS_N_INSNS (3), /* add. */
990 COSTS_N_INSNS (2), /* extend_add. */
991 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
992 },
993 /* MULT DImode */
994 {
995 0, /* simple (N/A). */
996 0, /* flag_setting (N/A). */
997 COSTS_N_INSNS (4), /* extend. */
998 0, /* add (N/A). */
999 COSTS_N_INSNS (4), /* extend_add. */
1000 0 /* idiv (N/A). */
1001 }
1002 },
1003 /* LD/ST */
1004 {
1005 COSTS_N_INSNS (2), /* load. */
1006 COSTS_N_INSNS (2), /* load_sign_extend. */
1007 COSTS_N_INSNS (2), /* ldrd. */
1008 COSTS_N_INSNS (2), /* ldm_1st. */
1009 1, /* ldm_regs_per_insn_1st. */
1010 2, /* ldm_regs_per_insn_subsequent. */
1011 COSTS_N_INSNS (5), /* loadf. */
1012 COSTS_N_INSNS (5), /* loadd. */
1013 COSTS_N_INSNS (1), /* load_unaligned. */
1014 COSTS_N_INSNS (2), /* store. */
1015 COSTS_N_INSNS (2), /* strd. */
1016 COSTS_N_INSNS (2), /* stm_1st. */
1017 1, /* stm_regs_per_insn_1st. */
1018 2, /* stm_regs_per_insn_subsequent. */
1019 COSTS_N_INSNS (1), /* storef. */
1020 COSTS_N_INSNS (1), /* stored. */
1021 COSTS_N_INSNS (1), /* store_unaligned. */
1022 COSTS_N_INSNS (1), /* loadv. */
1023 COSTS_N_INSNS (1) /* storev. */
1024 },
1025 {
1026 /* FP SFmode */
1027 {
1028 COSTS_N_INSNS (14), /* div. */
1029 COSTS_N_INSNS (4), /* mult. */
1030 COSTS_N_INSNS (7), /* mult_addsub. */
1031 COSTS_N_INSNS (30), /* fma. */
1032 COSTS_N_INSNS (3), /* addsub. */
1033 COSTS_N_INSNS (1), /* fpconst. */
1034 COSTS_N_INSNS (1), /* neg. */
1035 COSTS_N_INSNS (3), /* compare. */
1036 COSTS_N_INSNS (3), /* widen. */
1037 COSTS_N_INSNS (3), /* narrow. */
1038 COSTS_N_INSNS (3), /* toint. */
1039 COSTS_N_INSNS (3), /* fromint. */
1040 COSTS_N_INSNS (3) /* roundint. */
1041 },
1042 /* FP DFmode */
1043 {
1044 COSTS_N_INSNS (24), /* div. */
1045 COSTS_N_INSNS (5), /* mult. */
1046 COSTS_N_INSNS (8), /* mult_addsub. */
1047 COSTS_N_INSNS (30), /* fma. */
1048 COSTS_N_INSNS (3), /* addsub. */
1049 COSTS_N_INSNS (1), /* fpconst. */
1050 COSTS_N_INSNS (1), /* neg. */
1051 COSTS_N_INSNS (3), /* compare. */
1052 COSTS_N_INSNS (3), /* widen. */
1053 COSTS_N_INSNS (3), /* narrow. */
1054 COSTS_N_INSNS (3), /* toint. */
1055 COSTS_N_INSNS (3), /* fromint. */
1056 COSTS_N_INSNS (3) /* roundint. */
1057 }
1058 },
1059 /* Vector */
1060 {
1061 COSTS_N_INSNS (1) /* alu. */
1062 }
1063 };
1064
1065 const struct cpu_cost_table cortexa8_extra_costs =
1066 {
1067 /* ALU */
1068 {
1069 0, /* arith. */
1070 0, /* logical. */
1071 COSTS_N_INSNS (1), /* shift. */
1072 0, /* shift_reg. */
1073 COSTS_N_INSNS (1), /* arith_shift. */
1074 0, /* arith_shift_reg. */
1075 COSTS_N_INSNS (1), /* log_shift. */
1076 0, /* log_shift_reg. */
1077 0, /* extend. */
1078 0, /* extend_arith. */
1079 0, /* bfi. */
1080 0, /* bfx. */
1081 0, /* clz. */
1082 0, /* rev. */
1083 0, /* non_exec. */
1084 true /* non_exec_costs_exec. */
1085 },
1086 {
1087 /* MULT SImode */
1088 {
1089 COSTS_N_INSNS (1), /* simple. */
1090 COSTS_N_INSNS (1), /* flag_setting. */
1091 COSTS_N_INSNS (1), /* extend. */
1092 COSTS_N_INSNS (1), /* add. */
1093 COSTS_N_INSNS (1), /* extend_add. */
1094 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1095 },
1096 /* MULT DImode */
1097 {
1098 0, /* simple (N/A). */
1099 0, /* flag_setting (N/A). */
1100 COSTS_N_INSNS (2), /* extend. */
1101 0, /* add (N/A). */
1102 COSTS_N_INSNS (2), /* extend_add. */
1103 0 /* idiv (N/A). */
1104 }
1105 },
1106 /* LD/ST */
1107 {
1108 COSTS_N_INSNS (1), /* load. */
1109 COSTS_N_INSNS (1), /* load_sign_extend. */
1110 COSTS_N_INSNS (1), /* ldrd. */
1111 COSTS_N_INSNS (1), /* ldm_1st. */
1112 1, /* ldm_regs_per_insn_1st. */
1113 2, /* ldm_regs_per_insn_subsequent. */
1114 COSTS_N_INSNS (1), /* loadf. */
1115 COSTS_N_INSNS (1), /* loadd. */
1116 COSTS_N_INSNS (1), /* load_unaligned. */
1117 COSTS_N_INSNS (1), /* store. */
1118 COSTS_N_INSNS (1), /* strd. */
1119 COSTS_N_INSNS (1), /* stm_1st. */
1120 1, /* stm_regs_per_insn_1st. */
1121 2, /* stm_regs_per_insn_subsequent. */
1122 COSTS_N_INSNS (1), /* storef. */
1123 COSTS_N_INSNS (1), /* stored. */
1124 COSTS_N_INSNS (1), /* store_unaligned. */
1125 COSTS_N_INSNS (1), /* loadv. */
1126 COSTS_N_INSNS (1) /* storev. */
1127 },
1128 {
1129 /* FP SFmode */
1130 {
1131 COSTS_N_INSNS (36), /* div. */
1132 COSTS_N_INSNS (11), /* mult. */
1133 COSTS_N_INSNS (20), /* mult_addsub. */
1134 COSTS_N_INSNS (30), /* fma. */
1135 COSTS_N_INSNS (9), /* addsub. */
1136 COSTS_N_INSNS (3), /* fpconst. */
1137 COSTS_N_INSNS (3), /* neg. */
1138 COSTS_N_INSNS (6), /* compare. */
1139 COSTS_N_INSNS (4), /* widen. */
1140 COSTS_N_INSNS (4), /* narrow. */
1141 COSTS_N_INSNS (8), /* toint. */
1142 COSTS_N_INSNS (8), /* fromint. */
1143 COSTS_N_INSNS (8) /* roundint. */
1144 },
1145 /* FP DFmode */
1146 {
1147 COSTS_N_INSNS (64), /* div. */
1148 COSTS_N_INSNS (16), /* mult. */
1149 COSTS_N_INSNS (25), /* mult_addsub. */
1150 COSTS_N_INSNS (30), /* fma. */
1151 COSTS_N_INSNS (9), /* addsub. */
1152 COSTS_N_INSNS (3), /* fpconst. */
1153 COSTS_N_INSNS (3), /* neg. */
1154 COSTS_N_INSNS (6), /* compare. */
1155 COSTS_N_INSNS (6), /* widen. */
1156 COSTS_N_INSNS (6), /* narrow. */
1157 COSTS_N_INSNS (8), /* toint. */
1158 COSTS_N_INSNS (8), /* fromint. */
1159 COSTS_N_INSNS (8) /* roundint. */
1160 }
1161 },
1162 /* Vector */
1163 {
1164 COSTS_N_INSNS (1) /* alu. */
1165 }
1166 };
1167
1168 const struct cpu_cost_table cortexa5_extra_costs =
1169 {
1170 /* ALU */
1171 {
1172 0, /* arith. */
1173 0, /* logical. */
1174 COSTS_N_INSNS (1), /* shift. */
1175 COSTS_N_INSNS (1), /* shift_reg. */
1176 COSTS_N_INSNS (1), /* arith_shift. */
1177 COSTS_N_INSNS (1), /* arith_shift_reg. */
1178 COSTS_N_INSNS (1), /* log_shift. */
1179 COSTS_N_INSNS (1), /* log_shift_reg. */
1180 COSTS_N_INSNS (1), /* extend. */
1181 COSTS_N_INSNS (1), /* extend_arith. */
1182 COSTS_N_INSNS (1), /* bfi. */
1183 COSTS_N_INSNS (1), /* bfx. */
1184 COSTS_N_INSNS (1), /* clz. */
1185 COSTS_N_INSNS (1), /* rev. */
1186 0, /* non_exec. */
1187 true /* non_exec_costs_exec. */
1188 },
1189
1190 {
1191 /* MULT SImode */
1192 {
1193 0, /* simple. */
1194 COSTS_N_INSNS (1), /* flag_setting. */
1195 COSTS_N_INSNS (1), /* extend. */
1196 COSTS_N_INSNS (1), /* add. */
1197 COSTS_N_INSNS (1), /* extend_add. */
1198 COSTS_N_INSNS (7) /* idiv. */
1199 },
1200 /* MULT DImode */
1201 {
1202 0, /* simple (N/A). */
1203 0, /* flag_setting (N/A). */
1204 COSTS_N_INSNS (1), /* extend. */
1205 0, /* add. */
1206 COSTS_N_INSNS (2), /* extend_add. */
1207 0 /* idiv (N/A). */
1208 }
1209 },
1210 /* LD/ST */
1211 {
1212 COSTS_N_INSNS (1), /* load. */
1213 COSTS_N_INSNS (1), /* load_sign_extend. */
1214 COSTS_N_INSNS (6), /* ldrd. */
1215 COSTS_N_INSNS (1), /* ldm_1st. */
1216 1, /* ldm_regs_per_insn_1st. */
1217 2, /* ldm_regs_per_insn_subsequent. */
1218 COSTS_N_INSNS (2), /* loadf. */
1219 COSTS_N_INSNS (4), /* loadd. */
1220 COSTS_N_INSNS (1), /* load_unaligned. */
1221 COSTS_N_INSNS (1), /* store. */
1222 COSTS_N_INSNS (3), /* strd. */
1223 COSTS_N_INSNS (1), /* stm_1st. */
1224 1, /* stm_regs_per_insn_1st. */
1225 2, /* stm_regs_per_insn_subsequent. */
1226 COSTS_N_INSNS (2), /* storef. */
1227 COSTS_N_INSNS (2), /* stored. */
1228 COSTS_N_INSNS (1), /* store_unaligned. */
1229 COSTS_N_INSNS (1), /* loadv. */
1230 COSTS_N_INSNS (1) /* storev. */
1231 },
1232 {
1233 /* FP SFmode */
1234 {
1235 COSTS_N_INSNS (15), /* div. */
1236 COSTS_N_INSNS (3), /* mult. */
1237 COSTS_N_INSNS (7), /* mult_addsub. */
1238 COSTS_N_INSNS (7), /* fma. */
1239 COSTS_N_INSNS (3), /* addsub. */
1240 COSTS_N_INSNS (3), /* fpconst. */
1241 COSTS_N_INSNS (3), /* neg. */
1242 COSTS_N_INSNS (3), /* compare. */
1243 COSTS_N_INSNS (3), /* widen. */
1244 COSTS_N_INSNS (3), /* narrow. */
1245 COSTS_N_INSNS (3), /* toint. */
1246 COSTS_N_INSNS (3), /* fromint. */
1247 COSTS_N_INSNS (3) /* roundint. */
1248 },
1249 /* FP DFmode */
1250 {
1251 COSTS_N_INSNS (30), /* div. */
1252 COSTS_N_INSNS (6), /* mult. */
1253 COSTS_N_INSNS (10), /* mult_addsub. */
1254 COSTS_N_INSNS (7), /* fma. */
1255 COSTS_N_INSNS (3), /* addsub. */
1256 COSTS_N_INSNS (3), /* fpconst. */
1257 COSTS_N_INSNS (3), /* neg. */
1258 COSTS_N_INSNS (3), /* compare. */
1259 COSTS_N_INSNS (3), /* widen. */
1260 COSTS_N_INSNS (3), /* narrow. */
1261 COSTS_N_INSNS (3), /* toint. */
1262 COSTS_N_INSNS (3), /* fromint. */
1263 COSTS_N_INSNS (3) /* roundint. */
1264 }
1265 },
1266 /* Vector */
1267 {
1268 COSTS_N_INSNS (1) /* alu. */
1269 }
1270 };
1271
1272
1273 const struct cpu_cost_table cortexa7_extra_costs =
1274 {
1275 /* ALU */
1276 {
1277 0, /* arith. */
1278 0, /* logical. */
1279 COSTS_N_INSNS (1), /* shift. */
1280 COSTS_N_INSNS (1), /* shift_reg. */
1281 COSTS_N_INSNS (1), /* arith_shift. */
1282 COSTS_N_INSNS (1), /* arith_shift_reg. */
1283 COSTS_N_INSNS (1), /* log_shift. */
1284 COSTS_N_INSNS (1), /* log_shift_reg. */
1285 COSTS_N_INSNS (1), /* extend. */
1286 COSTS_N_INSNS (1), /* extend_arith. */
1287 COSTS_N_INSNS (1), /* bfi. */
1288 COSTS_N_INSNS (1), /* bfx. */
1289 COSTS_N_INSNS (1), /* clz. */
1290 COSTS_N_INSNS (1), /* rev. */
1291 0, /* non_exec. */
1292 true /* non_exec_costs_exec. */
1293 },
1294
1295 {
1296 /* MULT SImode */
1297 {
1298 0, /* simple. */
1299 COSTS_N_INSNS (1), /* flag_setting. */
1300 COSTS_N_INSNS (1), /* extend. */
1301 COSTS_N_INSNS (1), /* add. */
1302 COSTS_N_INSNS (1), /* extend_add. */
1303 COSTS_N_INSNS (7) /* idiv. */
1304 },
1305 /* MULT DImode */
1306 {
1307 0, /* simple (N/A). */
1308 0, /* flag_setting (N/A). */
1309 COSTS_N_INSNS (1), /* extend. */
1310 0, /* add. */
1311 COSTS_N_INSNS (2), /* extend_add. */
1312 0 /* idiv (N/A). */
1313 }
1314 },
1315 /* LD/ST */
1316 {
1317 COSTS_N_INSNS (1), /* load. */
1318 COSTS_N_INSNS (1), /* load_sign_extend. */
1319 COSTS_N_INSNS (3), /* ldrd. */
1320 COSTS_N_INSNS (1), /* ldm_1st. */
1321 1, /* ldm_regs_per_insn_1st. */
1322 2, /* ldm_regs_per_insn_subsequent. */
1323 COSTS_N_INSNS (2), /* loadf. */
1324 COSTS_N_INSNS (2), /* loadd. */
1325 COSTS_N_INSNS (1), /* load_unaligned. */
1326 COSTS_N_INSNS (1), /* store. */
1327 COSTS_N_INSNS (3), /* strd. */
1328 COSTS_N_INSNS (1), /* stm_1st. */
1329 1, /* stm_regs_per_insn_1st. */
1330 2, /* stm_regs_per_insn_subsequent. */
1331 COSTS_N_INSNS (2), /* storef. */
1332 COSTS_N_INSNS (2), /* stored. */
1333 COSTS_N_INSNS (1), /* store_unaligned. */
1334 COSTS_N_INSNS (1), /* loadv. */
1335 COSTS_N_INSNS (1) /* storev. */
1336 },
1337 {
1338 /* FP SFmode */
1339 {
1340 COSTS_N_INSNS (15), /* div. */
1341 COSTS_N_INSNS (3), /* mult. */
1342 COSTS_N_INSNS (7), /* mult_addsub. */
1343 COSTS_N_INSNS (7), /* fma. */
1344 COSTS_N_INSNS (3), /* addsub. */
1345 COSTS_N_INSNS (3), /* fpconst. */
1346 COSTS_N_INSNS (3), /* neg. */
1347 COSTS_N_INSNS (3), /* compare. */
1348 COSTS_N_INSNS (3), /* widen. */
1349 COSTS_N_INSNS (3), /* narrow. */
1350 COSTS_N_INSNS (3), /* toint. */
1351 COSTS_N_INSNS (3), /* fromint. */
1352 COSTS_N_INSNS (3) /* roundint. */
1353 },
1354 /* FP DFmode */
1355 {
1356 COSTS_N_INSNS (30), /* div. */
1357 COSTS_N_INSNS (6), /* mult. */
1358 COSTS_N_INSNS (10), /* mult_addsub. */
1359 COSTS_N_INSNS (7), /* fma. */
1360 COSTS_N_INSNS (3), /* addsub. */
1361 COSTS_N_INSNS (3), /* fpconst. */
1362 COSTS_N_INSNS (3), /* neg. */
1363 COSTS_N_INSNS (3), /* compare. */
1364 COSTS_N_INSNS (3), /* widen. */
1365 COSTS_N_INSNS (3), /* narrow. */
1366 COSTS_N_INSNS (3), /* toint. */
1367 COSTS_N_INSNS (3), /* fromint. */
1368 COSTS_N_INSNS (3) /* roundint. */
1369 }
1370 },
1371 /* Vector */
1372 {
1373 COSTS_N_INSNS (1) /* alu. */
1374 }
1375 };
1376
1377 const struct cpu_cost_table cortexa12_extra_costs =
1378 {
1379 /* ALU */
1380 {
1381 0, /* arith. */
1382 0, /* logical. */
1383 0, /* shift. */
1384 COSTS_N_INSNS (1), /* shift_reg. */
1385 COSTS_N_INSNS (1), /* arith_shift. */
1386 COSTS_N_INSNS (1), /* arith_shift_reg. */
1387 COSTS_N_INSNS (1), /* log_shift. */
1388 COSTS_N_INSNS (1), /* log_shift_reg. */
1389 0, /* extend. */
1390 COSTS_N_INSNS (1), /* extend_arith. */
1391 0, /* bfi. */
1392 COSTS_N_INSNS (1), /* bfx. */
1393 COSTS_N_INSNS (1), /* clz. */
1394 COSTS_N_INSNS (1), /* rev. */
1395 0, /* non_exec. */
1396 true /* non_exec_costs_exec. */
1397 },
1398 /* MULT SImode */
1399 {
1400 {
1401 COSTS_N_INSNS (2), /* simple. */
1402 COSTS_N_INSNS (3), /* flag_setting. */
1403 COSTS_N_INSNS (2), /* extend. */
1404 COSTS_N_INSNS (3), /* add. */
1405 COSTS_N_INSNS (2), /* extend_add. */
1406 COSTS_N_INSNS (18) /* idiv. */
1407 },
1408 /* MULT DImode */
1409 {
1410 0, /* simple (N/A). */
1411 0, /* flag_setting (N/A). */
1412 COSTS_N_INSNS (3), /* extend. */
1413 0, /* add (N/A). */
1414 COSTS_N_INSNS (3), /* extend_add. */
1415 0 /* idiv (N/A). */
1416 }
1417 },
1418 /* LD/ST */
1419 {
1420 COSTS_N_INSNS (3), /* load. */
1421 COSTS_N_INSNS (3), /* load_sign_extend. */
1422 COSTS_N_INSNS (3), /* ldrd. */
1423 COSTS_N_INSNS (3), /* ldm_1st. */
1424 1, /* ldm_regs_per_insn_1st. */
1425 2, /* ldm_regs_per_insn_subsequent. */
1426 COSTS_N_INSNS (3), /* loadf. */
1427 COSTS_N_INSNS (3), /* loadd. */
1428 0, /* load_unaligned. */
1429 0, /* store. */
1430 0, /* strd. */
1431 0, /* stm_1st. */
1432 1, /* stm_regs_per_insn_1st. */
1433 2, /* stm_regs_per_insn_subsequent. */
1434 COSTS_N_INSNS (2), /* storef. */
1435 COSTS_N_INSNS (2), /* stored. */
1436 0, /* store_unaligned. */
1437 COSTS_N_INSNS (1), /* loadv. */
1438 COSTS_N_INSNS (1) /* storev. */
1439 },
1440 {
1441 /* FP SFmode */
1442 {
1443 COSTS_N_INSNS (17), /* div. */
1444 COSTS_N_INSNS (4), /* mult. */
1445 COSTS_N_INSNS (8), /* mult_addsub. */
1446 COSTS_N_INSNS (8), /* fma. */
1447 COSTS_N_INSNS (4), /* addsub. */
1448 COSTS_N_INSNS (2), /* fpconst. */
1449 COSTS_N_INSNS (2), /* neg. */
1450 COSTS_N_INSNS (2), /* compare. */
1451 COSTS_N_INSNS (4), /* widen. */
1452 COSTS_N_INSNS (4), /* narrow. */
1453 COSTS_N_INSNS (4), /* toint. */
1454 COSTS_N_INSNS (4), /* fromint. */
1455 COSTS_N_INSNS (4) /* roundint. */
1456 },
1457 /* FP DFmode */
1458 {
1459 COSTS_N_INSNS (31), /* div. */
1460 COSTS_N_INSNS (4), /* mult. */
1461 COSTS_N_INSNS (8), /* mult_addsub. */
1462 COSTS_N_INSNS (8), /* fma. */
1463 COSTS_N_INSNS (4), /* addsub. */
1464 COSTS_N_INSNS (2), /* fpconst. */
1465 COSTS_N_INSNS (2), /* neg. */
1466 COSTS_N_INSNS (2), /* compare. */
1467 COSTS_N_INSNS (4), /* widen. */
1468 COSTS_N_INSNS (4), /* narrow. */
1469 COSTS_N_INSNS (4), /* toint. */
1470 COSTS_N_INSNS (4), /* fromint. */
1471 COSTS_N_INSNS (4) /* roundint. */
1472 }
1473 },
1474 /* Vector */
1475 {
1476 COSTS_N_INSNS (1) /* alu. */
1477 }
1478 };
1479
1480 const struct cpu_cost_table cortexa15_extra_costs =
1481 {
1482 /* ALU */
1483 {
1484 0, /* arith. */
1485 0, /* logical. */
1486 0, /* shift. */
1487 0, /* shift_reg. */
1488 COSTS_N_INSNS (1), /* arith_shift. */
1489 COSTS_N_INSNS (1), /* arith_shift_reg. */
1490 COSTS_N_INSNS (1), /* log_shift. */
1491 COSTS_N_INSNS (1), /* log_shift_reg. */
1492 0, /* extend. */
1493 COSTS_N_INSNS (1), /* extend_arith. */
1494 COSTS_N_INSNS (1), /* bfi. */
1495 0, /* bfx. */
1496 0, /* clz. */
1497 0, /* rev. */
1498 0, /* non_exec. */
1499 true /* non_exec_costs_exec. */
1500 },
1501 /* MULT SImode */
1502 {
1503 {
1504 COSTS_N_INSNS (2), /* simple. */
1505 COSTS_N_INSNS (3), /* flag_setting. */
1506 COSTS_N_INSNS (2), /* extend. */
1507 COSTS_N_INSNS (2), /* add. */
1508 COSTS_N_INSNS (2), /* extend_add. */
1509 COSTS_N_INSNS (18) /* idiv. */
1510 },
1511 /* MULT DImode */
1512 {
1513 0, /* simple (N/A). */
1514 0, /* flag_setting (N/A). */
1515 COSTS_N_INSNS (3), /* extend. */
1516 0, /* add (N/A). */
1517 COSTS_N_INSNS (3), /* extend_add. */
1518 0 /* idiv (N/A). */
1519 }
1520 },
1521 /* LD/ST */
1522 {
1523 COSTS_N_INSNS (3), /* load. */
1524 COSTS_N_INSNS (3), /* load_sign_extend. */
1525 COSTS_N_INSNS (3), /* ldrd. */
1526 COSTS_N_INSNS (4), /* ldm_1st. */
1527 1, /* ldm_regs_per_insn_1st. */
1528 2, /* ldm_regs_per_insn_subsequent. */
1529 COSTS_N_INSNS (4), /* loadf. */
1530 COSTS_N_INSNS (4), /* loadd. */
1531 0, /* load_unaligned. */
1532 0, /* store. */
1533 0, /* strd. */
1534 COSTS_N_INSNS (1), /* stm_1st. */
1535 1, /* stm_regs_per_insn_1st. */
1536 2, /* stm_regs_per_insn_subsequent. */
1537 0, /* storef. */
1538 0, /* stored. */
1539 0, /* store_unaligned. */
1540 COSTS_N_INSNS (1), /* loadv. */
1541 COSTS_N_INSNS (1) /* storev. */
1542 },
1543 {
1544 /* FP SFmode */
1545 {
1546 COSTS_N_INSNS (17), /* div. */
1547 COSTS_N_INSNS (4), /* mult. */
1548 COSTS_N_INSNS (8), /* mult_addsub. */
1549 COSTS_N_INSNS (8), /* fma. */
1550 COSTS_N_INSNS (4), /* addsub. */
1551 COSTS_N_INSNS (2), /* fpconst. */
1552 COSTS_N_INSNS (2), /* neg. */
1553 COSTS_N_INSNS (5), /* compare. */
1554 COSTS_N_INSNS (4), /* widen. */
1555 COSTS_N_INSNS (4), /* narrow. */
1556 COSTS_N_INSNS (4), /* toint. */
1557 COSTS_N_INSNS (4), /* fromint. */
1558 COSTS_N_INSNS (4) /* roundint. */
1559 },
1560 /* FP DFmode */
1561 {
1562 COSTS_N_INSNS (31), /* div. */
1563 COSTS_N_INSNS (4), /* mult. */
1564 COSTS_N_INSNS (8), /* mult_addsub. */
1565 COSTS_N_INSNS (8), /* fma. */
1566 COSTS_N_INSNS (4), /* addsub. */
1567 COSTS_N_INSNS (2), /* fpconst. */
1568 COSTS_N_INSNS (2), /* neg. */
1569 COSTS_N_INSNS (2), /* compare. */
1570 COSTS_N_INSNS (4), /* widen. */
1571 COSTS_N_INSNS (4), /* narrow. */
1572 COSTS_N_INSNS (4), /* toint. */
1573 COSTS_N_INSNS (4), /* fromint. */
1574 COSTS_N_INSNS (4) /* roundint. */
1575 }
1576 },
1577 /* Vector */
1578 {
1579 COSTS_N_INSNS (1) /* alu. */
1580 }
1581 };
1582
1583 const struct cpu_cost_table v7m_extra_costs =
1584 {
1585 /* ALU */
1586 {
1587 0, /* arith. */
1588 0, /* logical. */
1589 0, /* shift. */
1590 0, /* shift_reg. */
1591 0, /* arith_shift. */
1592 COSTS_N_INSNS (1), /* arith_shift_reg. */
1593 0, /* log_shift. */
1594 COSTS_N_INSNS (1), /* log_shift_reg. */
1595 0, /* extend. */
1596 COSTS_N_INSNS (1), /* extend_arith. */
1597 0, /* bfi. */
1598 0, /* bfx. */
1599 0, /* clz. */
1600 0, /* rev. */
1601 COSTS_N_INSNS (1), /* non_exec. */
1602 false /* non_exec_costs_exec. */
1603 },
1604 {
1605 /* MULT SImode */
1606 {
1607 COSTS_N_INSNS (1), /* simple. */
1608 COSTS_N_INSNS (1), /* flag_setting. */
1609 COSTS_N_INSNS (2), /* extend. */
1610 COSTS_N_INSNS (1), /* add. */
1611 COSTS_N_INSNS (3), /* extend_add. */
1612 COSTS_N_INSNS (8) /* idiv. */
1613 },
1614 /* MULT DImode */
1615 {
1616 0, /* simple (N/A). */
1617 0, /* flag_setting (N/A). */
1618 COSTS_N_INSNS (2), /* extend. */
1619 0, /* add (N/A). */
1620 COSTS_N_INSNS (3), /* extend_add. */
1621 0 /* idiv (N/A). */
1622 }
1623 },
1624 /* LD/ST */
1625 {
1626 COSTS_N_INSNS (2), /* load. */
1627 0, /* load_sign_extend. */
1628 COSTS_N_INSNS (3), /* ldrd. */
1629 COSTS_N_INSNS (2), /* ldm_1st. */
1630 1, /* ldm_regs_per_insn_1st. */
1631 1, /* ldm_regs_per_insn_subsequent. */
1632 COSTS_N_INSNS (2), /* loadf. */
1633 COSTS_N_INSNS (3), /* loadd. */
1634 COSTS_N_INSNS (1), /* load_unaligned. */
1635 COSTS_N_INSNS (2), /* store. */
1636 COSTS_N_INSNS (3), /* strd. */
1637 COSTS_N_INSNS (2), /* stm_1st. */
1638 1, /* stm_regs_per_insn_1st. */
1639 1, /* stm_regs_per_insn_subsequent. */
1640 COSTS_N_INSNS (2), /* storef. */
1641 COSTS_N_INSNS (3), /* stored. */
1642 COSTS_N_INSNS (1), /* store_unaligned. */
1643 COSTS_N_INSNS (1), /* loadv. */
1644 COSTS_N_INSNS (1) /* storev. */
1645 },
1646 {
1647 /* FP SFmode */
1648 {
1649 COSTS_N_INSNS (7), /* div. */
1650 COSTS_N_INSNS (2), /* mult. */
1651 COSTS_N_INSNS (5), /* mult_addsub. */
1652 COSTS_N_INSNS (3), /* fma. */
1653 COSTS_N_INSNS (1), /* addsub. */
1654 0, /* fpconst. */
1655 0, /* neg. */
1656 0, /* compare. */
1657 0, /* widen. */
1658 0, /* narrow. */
1659 0, /* toint. */
1660 0, /* fromint. */
1661 0 /* roundint. */
1662 },
1663 /* FP DFmode */
1664 {
1665 COSTS_N_INSNS (15), /* div. */
1666 COSTS_N_INSNS (5), /* mult. */
1667 COSTS_N_INSNS (7), /* mult_addsub. */
1668 COSTS_N_INSNS (7), /* fma. */
1669 COSTS_N_INSNS (3), /* addsub. */
1670 0, /* fpconst. */
1671 0, /* neg. */
1672 0, /* compare. */
1673 0, /* widen. */
1674 0, /* narrow. */
1675 0, /* toint. */
1676 0, /* fromint. */
1677 0 /* roundint. */
1678 }
1679 },
1680 /* Vector */
1681 {
1682 COSTS_N_INSNS (1) /* alu. */
1683 }
1684 };
1685
1686 const struct tune_params arm_slowmul_tune =
1687 {
1688 arm_slowmul_rtx_costs,
1689 NULL, /* Insn extra costs. */
1690 NULL, /* Sched adj cost. */
1691 arm_default_branch_cost,
1692 &arm_default_vec_cost,
1693 3, /* Constant limit. */
1694 5, /* Max cond insns. */
1695 8, /* Memset max inline. */
1696 1, /* Issue rate. */
1697 ARM_PREFETCH_NOT_BENEFICIAL,
1698 tune_params::PREF_CONST_POOL_TRUE,
1699 tune_params::PREF_LDRD_FALSE,
1700 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1701 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1702 tune_params::DISPARAGE_FLAGS_NEITHER,
1703 tune_params::PREF_NEON_64_FALSE,
1704 tune_params::PREF_NEON_STRINGOPS_FALSE,
1705 tune_params::FUSE_NOTHING,
1706 tune_params::SCHED_AUTOPREF_OFF
1707 };
1708
1709 const struct tune_params arm_fastmul_tune =
1710 {
1711 arm_fastmul_rtx_costs,
1712 NULL, /* Insn extra costs. */
1713 NULL, /* Sched adj cost. */
1714 arm_default_branch_cost,
1715 &arm_default_vec_cost,
1716 1, /* Constant limit. */
1717 5, /* Max cond insns. */
1718 8, /* Memset max inline. */
1719 1, /* Issue rate. */
1720 ARM_PREFETCH_NOT_BENEFICIAL,
1721 tune_params::PREF_CONST_POOL_TRUE,
1722 tune_params::PREF_LDRD_FALSE,
1723 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1724 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1725 tune_params::DISPARAGE_FLAGS_NEITHER,
1726 tune_params::PREF_NEON_64_FALSE,
1727 tune_params::PREF_NEON_STRINGOPS_FALSE,
1728 tune_params::FUSE_NOTHING,
1729 tune_params::SCHED_AUTOPREF_OFF
1730 };
1731
1732 /* StrongARM has early execution of branches, so a sequence that is worth
1733 skipping is shorter. Set max_insns_skipped to a lower value. */
1734
1735 const struct tune_params arm_strongarm_tune =
1736 {
1737 arm_fastmul_rtx_costs,
1738 NULL, /* Insn extra costs. */
1739 NULL, /* Sched adj cost. */
1740 arm_default_branch_cost,
1741 &arm_default_vec_cost,
1742 1, /* Constant limit. */
1743 3, /* Max cond insns. */
1744 8, /* Memset max inline. */
1745 1, /* Issue rate. */
1746 ARM_PREFETCH_NOT_BENEFICIAL,
1747 tune_params::PREF_CONST_POOL_TRUE,
1748 tune_params::PREF_LDRD_FALSE,
1749 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1750 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1751 tune_params::DISPARAGE_FLAGS_NEITHER,
1752 tune_params::PREF_NEON_64_FALSE,
1753 tune_params::PREF_NEON_STRINGOPS_FALSE,
1754 tune_params::FUSE_NOTHING,
1755 tune_params::SCHED_AUTOPREF_OFF
1756 };
1757
1758 const struct tune_params arm_xscale_tune =
1759 {
1760 arm_xscale_rtx_costs,
1761 NULL, /* Insn extra costs. */
1762 xscale_sched_adjust_cost,
1763 arm_default_branch_cost,
1764 &arm_default_vec_cost,
1765 2, /* Constant limit. */
1766 3, /* Max cond insns. */
1767 8, /* Memset max inline. */
1768 1, /* Issue rate. */
1769 ARM_PREFETCH_NOT_BENEFICIAL,
1770 tune_params::PREF_CONST_POOL_TRUE,
1771 tune_params::PREF_LDRD_FALSE,
1772 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1773 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1774 tune_params::DISPARAGE_FLAGS_NEITHER,
1775 tune_params::PREF_NEON_64_FALSE,
1776 tune_params::PREF_NEON_STRINGOPS_FALSE,
1777 tune_params::FUSE_NOTHING,
1778 tune_params::SCHED_AUTOPREF_OFF
1779 };
1780
1781 const struct tune_params arm_9e_tune =
1782 {
1783 arm_9e_rtx_costs,
1784 NULL, /* Insn extra costs. */
1785 NULL, /* Sched adj cost. */
1786 arm_default_branch_cost,
1787 &arm_default_vec_cost,
1788 1, /* Constant limit. */
1789 5, /* Max cond insns. */
1790 8, /* Memset max inline. */
1791 1, /* Issue rate. */
1792 ARM_PREFETCH_NOT_BENEFICIAL,
1793 tune_params::PREF_CONST_POOL_TRUE,
1794 tune_params::PREF_LDRD_FALSE,
1795 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1796 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1797 tune_params::DISPARAGE_FLAGS_NEITHER,
1798 tune_params::PREF_NEON_64_FALSE,
1799 tune_params::PREF_NEON_STRINGOPS_FALSE,
1800 tune_params::FUSE_NOTHING,
1801 tune_params::SCHED_AUTOPREF_OFF
1802 };
1803
1804 const struct tune_params arm_marvell_pj4_tune =
1805 {
1806 arm_9e_rtx_costs,
1807 NULL, /* Insn extra costs. */
1808 NULL, /* Sched adj cost. */
1809 arm_default_branch_cost,
1810 &arm_default_vec_cost,
1811 1, /* Constant limit. */
1812 5, /* Max cond insns. */
1813 8, /* Memset max inline. */
1814 2, /* Issue rate. */
1815 ARM_PREFETCH_NOT_BENEFICIAL,
1816 tune_params::PREF_CONST_POOL_TRUE,
1817 tune_params::PREF_LDRD_FALSE,
1818 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1819 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1820 tune_params::DISPARAGE_FLAGS_NEITHER,
1821 tune_params::PREF_NEON_64_FALSE,
1822 tune_params::PREF_NEON_STRINGOPS_FALSE,
1823 tune_params::FUSE_NOTHING,
1824 tune_params::SCHED_AUTOPREF_OFF
1825 };
1826
1827 const struct tune_params arm_v6t2_tune =
1828 {
1829 arm_9e_rtx_costs,
1830 NULL, /* Insn extra costs. */
1831 NULL, /* Sched adj cost. */
1832 arm_default_branch_cost,
1833 &arm_default_vec_cost,
1834 1, /* Constant limit. */
1835 5, /* Max cond insns. */
1836 8, /* Memset max inline. */
1837 1, /* Issue rate. */
1838 ARM_PREFETCH_NOT_BENEFICIAL,
1839 tune_params::PREF_CONST_POOL_FALSE,
1840 tune_params::PREF_LDRD_FALSE,
1841 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1842 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1843 tune_params::DISPARAGE_FLAGS_NEITHER,
1844 tune_params::PREF_NEON_64_FALSE,
1845 tune_params::PREF_NEON_STRINGOPS_FALSE,
1846 tune_params::FUSE_NOTHING,
1847 tune_params::SCHED_AUTOPREF_OFF
1848 };
1849
1850
1851 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1852 const struct tune_params arm_cortex_tune =
1853 {
1854 arm_9e_rtx_costs,
1855 &generic_extra_costs,
1856 NULL, /* Sched adj cost. */
1857 arm_default_branch_cost,
1858 &arm_default_vec_cost,
1859 1, /* Constant limit. */
1860 5, /* Max cond insns. */
1861 8, /* Memset max inline. */
1862 2, /* Issue rate. */
1863 ARM_PREFETCH_NOT_BENEFICIAL,
1864 tune_params::PREF_CONST_POOL_FALSE,
1865 tune_params::PREF_LDRD_FALSE,
1866 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1867 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1868 tune_params::DISPARAGE_FLAGS_NEITHER,
1869 tune_params::PREF_NEON_64_FALSE,
1870 tune_params::PREF_NEON_STRINGOPS_FALSE,
1871 tune_params::FUSE_NOTHING,
1872 tune_params::SCHED_AUTOPREF_OFF
1873 };
1874
1875 const struct tune_params arm_cortex_a8_tune =
1876 {
1877 arm_9e_rtx_costs,
1878 &cortexa8_extra_costs,
1879 NULL, /* Sched adj cost. */
1880 arm_default_branch_cost,
1881 &arm_default_vec_cost,
1882 1, /* Constant limit. */
1883 5, /* Max cond insns. */
1884 8, /* Memset max inline. */
1885 2, /* Issue rate. */
1886 ARM_PREFETCH_NOT_BENEFICIAL,
1887 tune_params::PREF_CONST_POOL_FALSE,
1888 tune_params::PREF_LDRD_FALSE,
1889 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1890 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1891 tune_params::DISPARAGE_FLAGS_NEITHER,
1892 tune_params::PREF_NEON_64_FALSE,
1893 tune_params::PREF_NEON_STRINGOPS_TRUE,
1894 tune_params::FUSE_NOTHING,
1895 tune_params::SCHED_AUTOPREF_OFF
1896 };
1897
1898 const struct tune_params arm_cortex_a7_tune =
1899 {
1900 arm_9e_rtx_costs,
1901 &cortexa7_extra_costs,
1902 NULL, /* Sched adj cost. */
1903 arm_default_branch_cost,
1904 &arm_default_vec_cost,
1905 1, /* Constant limit. */
1906 5, /* Max cond insns. */
1907 8, /* Memset max inline. */
1908 2, /* Issue rate. */
1909 ARM_PREFETCH_NOT_BENEFICIAL,
1910 tune_params::PREF_CONST_POOL_FALSE,
1911 tune_params::PREF_LDRD_FALSE,
1912 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1913 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1914 tune_params::DISPARAGE_FLAGS_NEITHER,
1915 tune_params::PREF_NEON_64_FALSE,
1916 tune_params::PREF_NEON_STRINGOPS_TRUE,
1917 tune_params::FUSE_NOTHING,
1918 tune_params::SCHED_AUTOPREF_OFF
1919 };
1920
1921 const struct tune_params arm_cortex_a15_tune =
1922 {
1923 arm_9e_rtx_costs,
1924 &cortexa15_extra_costs,
1925 NULL, /* Sched adj cost. */
1926 arm_default_branch_cost,
1927 &arm_default_vec_cost,
1928 1, /* Constant limit. */
1929 2, /* Max cond insns. */
1930 8, /* Memset max inline. */
1931 3, /* Issue rate. */
1932 ARM_PREFETCH_NOT_BENEFICIAL,
1933 tune_params::PREF_CONST_POOL_FALSE,
1934 tune_params::PREF_LDRD_TRUE,
1935 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1936 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1937 tune_params::DISPARAGE_FLAGS_ALL,
1938 tune_params::PREF_NEON_64_FALSE,
1939 tune_params::PREF_NEON_STRINGOPS_TRUE,
1940 tune_params::FUSE_NOTHING,
1941 tune_params::SCHED_AUTOPREF_FULL
1942 };
1943
1944 const struct tune_params arm_cortex_a53_tune =
1945 {
1946 arm_9e_rtx_costs,
1947 &cortexa53_extra_costs,
1948 NULL, /* Sched adj cost. */
1949 arm_default_branch_cost,
1950 &arm_default_vec_cost,
1951 1, /* Constant limit. */
1952 5, /* Max cond insns. */
1953 8, /* Memset max inline. */
1954 2, /* Issue rate. */
1955 ARM_PREFETCH_NOT_BENEFICIAL,
1956 tune_params::PREF_CONST_POOL_FALSE,
1957 tune_params::PREF_LDRD_FALSE,
1958 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1959 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1960 tune_params::DISPARAGE_FLAGS_NEITHER,
1961 tune_params::PREF_NEON_64_FALSE,
1962 tune_params::PREF_NEON_STRINGOPS_TRUE,
1963 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1964 tune_params::SCHED_AUTOPREF_OFF
1965 };
1966
1967 const struct tune_params arm_cortex_a57_tune =
1968 {
1969 arm_9e_rtx_costs,
1970 &cortexa57_extra_costs,
1971 NULL, /* Sched adj cost. */
1972 arm_default_branch_cost,
1973 &arm_default_vec_cost,
1974 1, /* Constant limit. */
1975 2, /* Max cond insns. */
1976 8, /* Memset max inline. */
1977 3, /* Issue rate. */
1978 ARM_PREFETCH_NOT_BENEFICIAL,
1979 tune_params::PREF_CONST_POOL_FALSE,
1980 tune_params::PREF_LDRD_TRUE,
1981 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1982 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1983 tune_params::DISPARAGE_FLAGS_ALL,
1984 tune_params::PREF_NEON_64_FALSE,
1985 tune_params::PREF_NEON_STRINGOPS_TRUE,
1986 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1987 tune_params::SCHED_AUTOPREF_FULL
1988 };
1989
1990 const struct tune_params arm_xgene1_tune =
1991 {
1992 arm_9e_rtx_costs,
1993 &xgene1_extra_costs,
1994 NULL, /* Sched adj cost. */
1995 arm_default_branch_cost,
1996 &arm_default_vec_cost,
1997 1, /* Constant limit. */
1998 2, /* Max cond insns. */
1999 32, /* Memset max inline. */
2000 4, /* Issue rate. */
2001 ARM_PREFETCH_NOT_BENEFICIAL,
2002 tune_params::PREF_CONST_POOL_FALSE,
2003 tune_params::PREF_LDRD_TRUE,
2004 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2005 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2006 tune_params::DISPARAGE_FLAGS_ALL,
2007 tune_params::PREF_NEON_64_FALSE,
2008 tune_params::PREF_NEON_STRINGOPS_FALSE,
2009 tune_params::FUSE_NOTHING,
2010 tune_params::SCHED_AUTOPREF_OFF
2011 };
2012
2013 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2014 less appealing. Set max_insns_skipped to a low value. */
2015
2016 const struct tune_params arm_cortex_a5_tune =
2017 {
2018 arm_9e_rtx_costs,
2019 &cortexa5_extra_costs,
2020 NULL, /* Sched adj cost. */
2021 arm_cortex_a5_branch_cost,
2022 &arm_default_vec_cost,
2023 1, /* Constant limit. */
2024 1, /* Max cond insns. */
2025 8, /* Memset max inline. */
2026 2, /* Issue rate. */
2027 ARM_PREFETCH_NOT_BENEFICIAL,
2028 tune_params::PREF_CONST_POOL_FALSE,
2029 tune_params::PREF_LDRD_FALSE,
2030 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2031 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2032 tune_params::DISPARAGE_FLAGS_NEITHER,
2033 tune_params::PREF_NEON_64_FALSE,
2034 tune_params::PREF_NEON_STRINGOPS_TRUE,
2035 tune_params::FUSE_NOTHING,
2036 tune_params::SCHED_AUTOPREF_OFF
2037 };
2038
2039 const struct tune_params arm_cortex_a9_tune =
2040 {
2041 arm_9e_rtx_costs,
2042 &cortexa9_extra_costs,
2043 cortex_a9_sched_adjust_cost,
2044 arm_default_branch_cost,
2045 &arm_default_vec_cost,
2046 1, /* Constant limit. */
2047 5, /* Max cond insns. */
2048 8, /* Memset max inline. */
2049 2, /* Issue rate. */
2050 ARM_PREFETCH_BENEFICIAL(4,32,32),
2051 tune_params::PREF_CONST_POOL_FALSE,
2052 tune_params::PREF_LDRD_FALSE,
2053 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2054 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2055 tune_params::DISPARAGE_FLAGS_NEITHER,
2056 tune_params::PREF_NEON_64_FALSE,
2057 tune_params::PREF_NEON_STRINGOPS_FALSE,
2058 tune_params::FUSE_NOTHING,
2059 tune_params::SCHED_AUTOPREF_OFF
2060 };
2061
2062 const struct tune_params arm_cortex_a12_tune =
2063 {
2064 arm_9e_rtx_costs,
2065 &cortexa12_extra_costs,
2066 NULL, /* Sched adj cost. */
2067 arm_default_branch_cost,
2068 &arm_default_vec_cost, /* Vectorizer costs. */
2069 1, /* Constant limit. */
2070 2, /* Max cond insns. */
2071 8, /* Memset max inline. */
2072 2, /* Issue rate. */
2073 ARM_PREFETCH_NOT_BENEFICIAL,
2074 tune_params::PREF_CONST_POOL_FALSE,
2075 tune_params::PREF_LDRD_TRUE,
2076 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2077 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2078 tune_params::DISPARAGE_FLAGS_ALL,
2079 tune_params::PREF_NEON_64_FALSE,
2080 tune_params::PREF_NEON_STRINGOPS_TRUE,
2081 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2082 tune_params::SCHED_AUTOPREF_OFF
2083 };
2084
2085 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2086 cycle to execute each. An LDR from the constant pool also takes two cycles
2087 to execute, but mildly increases pipelining opportunity (consecutive
2088 loads/stores can be pipelined together, saving one cycle), and may also
2089 improve icache utilisation. Hence we prefer the constant pool for such
2090 processors. */
2091
2092 const struct tune_params arm_v7m_tune =
2093 {
2094 arm_9e_rtx_costs,
2095 &v7m_extra_costs,
2096 NULL, /* Sched adj cost. */
2097 arm_cortex_m_branch_cost,
2098 &arm_default_vec_cost,
2099 1, /* Constant limit. */
2100 2, /* Max cond insns. */
2101 8, /* Memset max inline. */
2102 1, /* Issue rate. */
2103 ARM_PREFETCH_NOT_BENEFICIAL,
2104 tune_params::PREF_CONST_POOL_TRUE,
2105 tune_params::PREF_LDRD_FALSE,
2106 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2107 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2108 tune_params::DISPARAGE_FLAGS_NEITHER,
2109 tune_params::PREF_NEON_64_FALSE,
2110 tune_params::PREF_NEON_STRINGOPS_FALSE,
2111 tune_params::FUSE_NOTHING,
2112 tune_params::SCHED_AUTOPREF_OFF
2113 };
2114
2115 /* Cortex-M7 tuning. */
2116
2117 const struct tune_params arm_cortex_m7_tune =
2118 {
2119 arm_9e_rtx_costs,
2120 &v7m_extra_costs,
2121 NULL, /* Sched adj cost. */
2122 arm_cortex_m7_branch_cost,
2123 &arm_default_vec_cost,
2124 0, /* Constant limit. */
2125 1, /* Max cond insns. */
2126 8, /* Memset max inline. */
2127 2, /* Issue rate. */
2128 ARM_PREFETCH_NOT_BENEFICIAL,
2129 tune_params::PREF_CONST_POOL_TRUE,
2130 tune_params::PREF_LDRD_FALSE,
2131 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2132 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2133 tune_params::DISPARAGE_FLAGS_NEITHER,
2134 tune_params::PREF_NEON_64_FALSE,
2135 tune_params::PREF_NEON_STRINGOPS_FALSE,
2136 tune_params::FUSE_NOTHING,
2137 tune_params::SCHED_AUTOPREF_OFF
2138 };
2139
2140 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2141 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2142 const struct tune_params arm_v6m_tune =
2143 {
2144 arm_9e_rtx_costs,
2145 NULL, /* Insn extra costs. */
2146 NULL, /* Sched adj cost. */
2147 arm_default_branch_cost,
2148 &arm_default_vec_cost, /* Vectorizer costs. */
2149 1, /* Constant limit. */
2150 5, /* Max cond insns. */
2151 8, /* Memset max inline. */
2152 1, /* Issue rate. */
2153 ARM_PREFETCH_NOT_BENEFICIAL,
2154 tune_params::PREF_CONST_POOL_FALSE,
2155 tune_params::PREF_LDRD_FALSE,
2156 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2157 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2158 tune_params::DISPARAGE_FLAGS_NEITHER,
2159 tune_params::PREF_NEON_64_FALSE,
2160 tune_params::PREF_NEON_STRINGOPS_FALSE,
2161 tune_params::FUSE_NOTHING,
2162 tune_params::SCHED_AUTOPREF_OFF
2163 };
2164
2165 const struct tune_params arm_fa726te_tune =
2166 {
2167 arm_9e_rtx_costs,
2168 NULL, /* Insn extra costs. */
2169 fa726te_sched_adjust_cost,
2170 arm_default_branch_cost,
2171 &arm_default_vec_cost,
2172 1, /* Constant limit. */
2173 5, /* Max cond insns. */
2174 8, /* Memset max inline. */
2175 2, /* Issue rate. */
2176 ARM_PREFETCH_NOT_BENEFICIAL,
2177 tune_params::PREF_CONST_POOL_TRUE,
2178 tune_params::PREF_LDRD_FALSE,
2179 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2180 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2181 tune_params::DISPARAGE_FLAGS_NEITHER,
2182 tune_params::PREF_NEON_64_FALSE,
2183 tune_params::PREF_NEON_STRINGOPS_FALSE,
2184 tune_params::FUSE_NOTHING,
2185 tune_params::SCHED_AUTOPREF_OFF
2186 };
2187
2188
2189 /* Not all of these give usefully different compilation alternatives,
2190 but there is no simple way of generalizing them. */
2191 static const struct processors all_cores[] =
2192 {
2193 /* ARM Cores */
2194 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2195 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2196 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2197 #include "arm-cores.def"
2198 #undef ARM_CORE
2199 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2200 };
2201
2202 static const struct processors all_architectures[] =
2203 {
2204 /* ARM Architectures */
2205 /* We don't specify tuning costs here as it will be figured out
2206 from the core. */
2207
2208 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2209 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2210 #include "arm-arches.def"
2211 #undef ARM_ARCH
2212 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2213 };
2214
2215
2216 /* These are populated as commandline arguments are processed, or NULL
2217 if not specified. */
2218 static const struct processors *arm_selected_arch;
2219 static const struct processors *arm_selected_cpu;
2220 static const struct processors *arm_selected_tune;
2221
2222 /* The name of the preprocessor macro to define for this architecture. */
2223
2224 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2225
2226 /* Available values for -mfpu=. */
2227
2228 static const struct arm_fpu_desc all_fpus[] =
2229 {
2230 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2231 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2232 #include "arm-fpus.def"
2233 #undef ARM_FPU
2234 };
2235
2236
2237 /* Supported TLS relocations. */
2238
2239 enum tls_reloc {
2240 TLS_GD32,
2241 TLS_LDM32,
2242 TLS_LDO32,
2243 TLS_IE32,
2244 TLS_LE32,
2245 TLS_DESCSEQ /* GNU scheme */
2246 };
2247
2248 /* The maximum number of insns to be used when loading a constant. */
2249 inline static int
2250 arm_constant_limit (bool size_p)
2251 {
2252 return size_p ? 1 : current_tune->constant_limit;
2253 }
2254
2255 /* Emit an insn that's a simple single-set. Both the operands must be known
2256 to be valid. */
2257 inline static rtx_insn *
2258 emit_set_insn (rtx x, rtx y)
2259 {
2260 return emit_insn (gen_rtx_SET (x, y));
2261 }
2262
2263 /* Return the number of bits set in VALUE. */
2264 static unsigned
2265 bit_count (unsigned long value)
2266 {
2267 unsigned long count = 0;
2268
2269 while (value)
2270 {
2271 count++;
2272 value &= value - 1; /* Clear the least-significant set bit. */
2273 }
2274
2275 return count;
2276 }
2277
2278 typedef struct
2279 {
2280 machine_mode mode;
2281 const char *name;
2282 } arm_fixed_mode_set;
2283
2284 /* A small helper for setting fixed-point library libfuncs. */
2285
2286 static void
2287 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2288 const char *funcname, const char *modename,
2289 int num_suffix)
2290 {
2291 char buffer[50];
2292
2293 if (num_suffix == 0)
2294 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2295 else
2296 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2297
2298 set_optab_libfunc (optable, mode, buffer);
2299 }
2300
2301 static void
2302 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2303 machine_mode from, const char *funcname,
2304 const char *toname, const char *fromname)
2305 {
2306 char buffer[50];
2307 const char *maybe_suffix_2 = "";
2308
2309 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2310 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2311 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2312 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2313 maybe_suffix_2 = "2";
2314
2315 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2316 maybe_suffix_2);
2317
2318 set_conv_libfunc (optable, to, from, buffer);
2319 }
2320
2321 /* Set up library functions unique to ARM. */
2322
2323 static void
2324 arm_init_libfuncs (void)
2325 {
2326 /* For Linux, we have access to kernel support for atomic operations. */
2327 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2328 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2329
2330 /* There are no special library functions unless we are using the
2331 ARM BPABI. */
2332 if (!TARGET_BPABI)
2333 return;
2334
2335 /* The functions below are described in Section 4 of the "Run-Time
2336 ABI for the ARM architecture", Version 1.0. */
2337
2338 /* Double-precision floating-point arithmetic. Table 2. */
2339 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2340 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2341 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2342 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2343 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2344
2345 /* Double-precision comparisons. Table 3. */
2346 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2347 set_optab_libfunc (ne_optab, DFmode, NULL);
2348 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2349 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2350 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2351 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2352 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2353
2354 /* Single-precision floating-point arithmetic. Table 4. */
2355 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2356 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2357 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2358 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2359 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2360
2361 /* Single-precision comparisons. Table 5. */
2362 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2363 set_optab_libfunc (ne_optab, SFmode, NULL);
2364 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2365 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2366 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2367 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2368 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2369
2370 /* Floating-point to integer conversions. Table 6. */
2371 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2372 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2373 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2374 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2375 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2376 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2377 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2378 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2379
2380 /* Conversions between floating types. Table 7. */
2381 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2382 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2383
2384 /* Integer to floating-point conversions. Table 8. */
2385 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2386 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2387 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2388 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2389 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2390 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2391 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2392 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2393
2394 /* Long long. Table 9. */
2395 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2396 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2397 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2398 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2399 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2400 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2401 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2402 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2403
2404 /* Integer (32/32->32) division. \S 4.3.1. */
2405 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2406 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2407
2408 /* The divmod functions are designed so that they can be used for
2409 plain division, even though they return both the quotient and the
2410 remainder. The quotient is returned in the usual location (i.e.,
2411 r0 for SImode, {r0, r1} for DImode), just as would be expected
2412 for an ordinary division routine. Because the AAPCS calling
2413 conventions specify that all of { r0, r1, r2, r3 } are
2414 callee-saved registers, there is no need to tell the compiler
2415 explicitly that those registers are clobbered by these
2416 routines. */
2417 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2418 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2419
2420 /* For SImode division the ABI provides div-without-mod routines,
2421 which are faster. */
2422 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2423 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2424
2425 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2426 divmod libcalls instead. */
2427 set_optab_libfunc (smod_optab, DImode, NULL);
2428 set_optab_libfunc (umod_optab, DImode, NULL);
2429 set_optab_libfunc (smod_optab, SImode, NULL);
2430 set_optab_libfunc (umod_optab, SImode, NULL);
2431
2432 /* Half-precision float operations. The compiler handles all operations
2433 with NULL libfuncs by converting the SFmode. */
2434 switch (arm_fp16_format)
2435 {
2436 case ARM_FP16_FORMAT_IEEE:
2437 case ARM_FP16_FORMAT_ALTERNATIVE:
2438
2439 /* Conversions. */
2440 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2441 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2442 ? "__gnu_f2h_ieee"
2443 : "__gnu_f2h_alternative"));
2444 set_conv_libfunc (sext_optab, SFmode, HFmode,
2445 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2446 ? "__gnu_h2f_ieee"
2447 : "__gnu_h2f_alternative"));
2448
2449 /* Arithmetic. */
2450 set_optab_libfunc (add_optab, HFmode, NULL);
2451 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2452 set_optab_libfunc (smul_optab, HFmode, NULL);
2453 set_optab_libfunc (neg_optab, HFmode, NULL);
2454 set_optab_libfunc (sub_optab, HFmode, NULL);
2455
2456 /* Comparisons. */
2457 set_optab_libfunc (eq_optab, HFmode, NULL);
2458 set_optab_libfunc (ne_optab, HFmode, NULL);
2459 set_optab_libfunc (lt_optab, HFmode, NULL);
2460 set_optab_libfunc (le_optab, HFmode, NULL);
2461 set_optab_libfunc (ge_optab, HFmode, NULL);
2462 set_optab_libfunc (gt_optab, HFmode, NULL);
2463 set_optab_libfunc (unord_optab, HFmode, NULL);
2464 break;
2465
2466 default:
2467 break;
2468 }
2469
2470 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2471 {
2472 const arm_fixed_mode_set fixed_arith_modes[] =
2473 {
2474 { QQmode, "qq" },
2475 { UQQmode, "uqq" },
2476 { HQmode, "hq" },
2477 { UHQmode, "uhq" },
2478 { SQmode, "sq" },
2479 { USQmode, "usq" },
2480 { DQmode, "dq" },
2481 { UDQmode, "udq" },
2482 { TQmode, "tq" },
2483 { UTQmode, "utq" },
2484 { HAmode, "ha" },
2485 { UHAmode, "uha" },
2486 { SAmode, "sa" },
2487 { USAmode, "usa" },
2488 { DAmode, "da" },
2489 { UDAmode, "uda" },
2490 { TAmode, "ta" },
2491 { UTAmode, "uta" }
2492 };
2493 const arm_fixed_mode_set fixed_conv_modes[] =
2494 {
2495 { QQmode, "qq" },
2496 { UQQmode, "uqq" },
2497 { HQmode, "hq" },
2498 { UHQmode, "uhq" },
2499 { SQmode, "sq" },
2500 { USQmode, "usq" },
2501 { DQmode, "dq" },
2502 { UDQmode, "udq" },
2503 { TQmode, "tq" },
2504 { UTQmode, "utq" },
2505 { HAmode, "ha" },
2506 { UHAmode, "uha" },
2507 { SAmode, "sa" },
2508 { USAmode, "usa" },
2509 { DAmode, "da" },
2510 { UDAmode, "uda" },
2511 { TAmode, "ta" },
2512 { UTAmode, "uta" },
2513 { QImode, "qi" },
2514 { HImode, "hi" },
2515 { SImode, "si" },
2516 { DImode, "di" },
2517 { TImode, "ti" },
2518 { SFmode, "sf" },
2519 { DFmode, "df" }
2520 };
2521 unsigned int i, j;
2522
2523 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2524 {
2525 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2526 "add", fixed_arith_modes[i].name, 3);
2527 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2528 "ssadd", fixed_arith_modes[i].name, 3);
2529 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2530 "usadd", fixed_arith_modes[i].name, 3);
2531 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2532 "sub", fixed_arith_modes[i].name, 3);
2533 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2534 "sssub", fixed_arith_modes[i].name, 3);
2535 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2536 "ussub", fixed_arith_modes[i].name, 3);
2537 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2538 "mul", fixed_arith_modes[i].name, 3);
2539 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2540 "ssmul", fixed_arith_modes[i].name, 3);
2541 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2542 "usmul", fixed_arith_modes[i].name, 3);
2543 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2544 "div", fixed_arith_modes[i].name, 3);
2545 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2546 "udiv", fixed_arith_modes[i].name, 3);
2547 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2548 "ssdiv", fixed_arith_modes[i].name, 3);
2549 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2550 "usdiv", fixed_arith_modes[i].name, 3);
2551 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2552 "neg", fixed_arith_modes[i].name, 2);
2553 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2554 "ssneg", fixed_arith_modes[i].name, 2);
2555 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2556 "usneg", fixed_arith_modes[i].name, 2);
2557 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2558 "ashl", fixed_arith_modes[i].name, 3);
2559 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2560 "ashr", fixed_arith_modes[i].name, 3);
2561 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2562 "lshr", fixed_arith_modes[i].name, 3);
2563 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2564 "ssashl", fixed_arith_modes[i].name, 3);
2565 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2566 "usashl", fixed_arith_modes[i].name, 3);
2567 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2568 "cmp", fixed_arith_modes[i].name, 2);
2569 }
2570
2571 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2572 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2573 {
2574 if (i == j
2575 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2576 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2577 continue;
2578
2579 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2580 fixed_conv_modes[j].mode, "fract",
2581 fixed_conv_modes[i].name,
2582 fixed_conv_modes[j].name);
2583 arm_set_fixed_conv_libfunc (satfract_optab,
2584 fixed_conv_modes[i].mode,
2585 fixed_conv_modes[j].mode, "satfract",
2586 fixed_conv_modes[i].name,
2587 fixed_conv_modes[j].name);
2588 arm_set_fixed_conv_libfunc (fractuns_optab,
2589 fixed_conv_modes[i].mode,
2590 fixed_conv_modes[j].mode, "fractuns",
2591 fixed_conv_modes[i].name,
2592 fixed_conv_modes[j].name);
2593 arm_set_fixed_conv_libfunc (satfractuns_optab,
2594 fixed_conv_modes[i].mode,
2595 fixed_conv_modes[j].mode, "satfractuns",
2596 fixed_conv_modes[i].name,
2597 fixed_conv_modes[j].name);
2598 }
2599 }
2600
2601 if (TARGET_AAPCS_BASED)
2602 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2603 }
2604
2605 /* On AAPCS systems, this is the "struct __va_list". */
2606 static GTY(()) tree va_list_type;
2607
2608 /* Return the type to use as __builtin_va_list. */
2609 static tree
2610 arm_build_builtin_va_list (void)
2611 {
2612 tree va_list_name;
2613 tree ap_field;
2614
2615 if (!TARGET_AAPCS_BASED)
2616 return std_build_builtin_va_list ();
2617
2618 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2619 defined as:
2620
2621 struct __va_list
2622 {
2623 void *__ap;
2624 };
2625
2626 The C Library ABI further reinforces this definition in \S
2627 4.1.
2628
2629 We must follow this definition exactly. The structure tag
2630 name is visible in C++ mangled names, and thus forms a part
2631 of the ABI. The field name may be used by people who
2632 #include <stdarg.h>. */
2633 /* Create the type. */
2634 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2635 /* Give it the required name. */
2636 va_list_name = build_decl (BUILTINS_LOCATION,
2637 TYPE_DECL,
2638 get_identifier ("__va_list"),
2639 va_list_type);
2640 DECL_ARTIFICIAL (va_list_name) = 1;
2641 TYPE_NAME (va_list_type) = va_list_name;
2642 TYPE_STUB_DECL (va_list_type) = va_list_name;
2643 /* Create the __ap field. */
2644 ap_field = build_decl (BUILTINS_LOCATION,
2645 FIELD_DECL,
2646 get_identifier ("__ap"),
2647 ptr_type_node);
2648 DECL_ARTIFICIAL (ap_field) = 1;
2649 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2650 TYPE_FIELDS (va_list_type) = ap_field;
2651 /* Compute its layout. */
2652 layout_type (va_list_type);
2653
2654 return va_list_type;
2655 }
2656
2657 /* Return an expression of type "void *" pointing to the next
2658 available argument in a variable-argument list. VALIST is the
2659 user-level va_list object, of type __builtin_va_list. */
2660 static tree
2661 arm_extract_valist_ptr (tree valist)
2662 {
2663 if (TREE_TYPE (valist) == error_mark_node)
2664 return error_mark_node;
2665
2666 /* On an AAPCS target, the pointer is stored within "struct
2667 va_list". */
2668 if (TARGET_AAPCS_BASED)
2669 {
2670 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2671 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2672 valist, ap_field, NULL_TREE);
2673 }
2674
2675 return valist;
2676 }
2677
2678 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2679 static void
2680 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2681 {
2682 valist = arm_extract_valist_ptr (valist);
2683 std_expand_builtin_va_start (valist, nextarg);
2684 }
2685
2686 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2687 static tree
2688 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2689 gimple_seq *post_p)
2690 {
2691 valist = arm_extract_valist_ptr (valist);
2692 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2693 }
2694
2695 /* Check any incompatible options that the user has specified. */
2696 static void
2697 arm_option_check_internal (struct gcc_options *opts)
2698 {
2699 int flags = opts->x_target_flags;
2700
2701 /* Make sure that the processor choice does not conflict with any of the
2702 other command line choices. */
2703 if (TARGET_ARM_P (flags) && !(insn_flags & FL_NOTM))
2704 error ("target CPU does not support ARM mode");
2705
2706 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2707 from here where no function is being compiled currently. */
2708 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2709 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2710
2711 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2712 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2713
2714 /* If this target is normally configured to use APCS frames, warn if they
2715 are turned off and debugging is turned on. */
2716 if (TARGET_ARM_P (flags)
2717 && write_symbols != NO_DEBUG
2718 && !TARGET_APCS_FRAME
2719 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2720 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2721
2722 /* iWMMXt unsupported under Thumb mode. */
2723 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2724 error ("iWMMXt unsupported under Thumb mode");
2725
2726 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2727 error ("can not use -mtp=cp15 with 16-bit Thumb");
2728
2729 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2730 {
2731 error ("RTP PIC is incompatible with Thumb");
2732 flag_pic = 0;
2733 }
2734
2735 /* We only support -mslow-flash-data on armv7-m targets. */
2736 if (target_slow_flash_data
2737 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2738 || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON)))
2739 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2740 }
2741
2742 /* Set params depending on attributes and optimization options. */
2743 static void
2744 arm_option_params_internal (struct gcc_options *opts)
2745 {
2746 int flags = opts->x_target_flags;
2747
2748 /* If we are not using the default (ARM mode) section anchor offset
2749 ranges, then set the correct ranges now. */
2750 if (TARGET_THUMB1_P (flags))
2751 {
2752 /* Thumb-1 LDR instructions cannot have negative offsets.
2753 Permissible positive offset ranges are 5-bit (for byte loads),
2754 6-bit (for halfword loads), or 7-bit (for word loads).
2755 Empirical results suggest a 7-bit anchor range gives the best
2756 overall code size. */
2757 targetm.min_anchor_offset = 0;
2758 targetm.max_anchor_offset = 127;
2759 }
2760 else if (TARGET_THUMB2_P (flags))
2761 {
2762 /* The minimum is set such that the total size of the block
2763 for a particular anchor is 248 + 1 + 4095 bytes, which is
2764 divisible by eight, ensuring natural spacing of anchors. */
2765 targetm.min_anchor_offset = -248;
2766 targetm.max_anchor_offset = 4095;
2767 }
2768 else
2769 {
2770 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2771 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2772 }
2773
2774 if (optimize_size)
2775 {
2776 /* If optimizing for size, bump the number of instructions that we
2777 are prepared to conditionally execute (even on a StrongARM). */
2778 max_insns_skipped = 6;
2779
2780 /* For THUMB2, we limit the conditional sequence to one IT block. */
2781 if (TARGET_THUMB2_P (flags))
2782 max_insns_skipped = opts->x_arm_restrict_it ? 1 : 4;
2783 }
2784 else
2785 /* When -mrestrict-it is in use tone down the if-conversion. */
2786 max_insns_skipped
2787 = (TARGET_THUMB2_P (opts->x_target_flags) && opts->x_arm_restrict_it)
2788 ? 1 : current_tune->max_insns_skipped;
2789 }
2790
2791 /* True if -mflip-thumb should next add an attribute for the default
2792 mode, false if it should next add an attribute for the opposite mode. */
2793 static GTY(()) bool thumb_flipper;
2794
2795 /* Options after initial target override. */
2796 static GTY(()) tree init_optimize;
2797
2798 /* Reset options between modes that the user has specified. */
2799 static void
2800 arm_option_override_internal (struct gcc_options *opts,
2801 struct gcc_options *opts_set)
2802 {
2803 if (TARGET_THUMB_P (opts->x_target_flags) && !(insn_flags & FL_THUMB))
2804 {
2805 warning (0, "target CPU does not support THUMB instructions");
2806 opts->x_target_flags &= ~MASK_THUMB;
2807 }
2808
2809 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2810 {
2811 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2812 opts->x_target_flags &= ~MASK_APCS_FRAME;
2813 }
2814
2815 /* Callee super interworking implies thumb interworking. Adding
2816 this to the flags here simplifies the logic elsewhere. */
2817 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2818 opts->x_target_flags |= MASK_INTERWORK;
2819
2820 /* need to remember initial values so combinaisons of options like
2821 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2822 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2823
2824 if (! opts_set->x_arm_restrict_it)
2825 opts->x_arm_restrict_it = arm_arch8;
2826
2827 if (!TARGET_THUMB2_P (opts->x_target_flags))
2828 opts->x_arm_restrict_it = 0;
2829
2830 /* Don't warn since it's on by default in -O2. */
2831 if (TARGET_THUMB1_P (opts->x_target_flags))
2832 opts->x_flag_schedule_insns = 0;
2833 else
2834 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
2835
2836 /* Disable shrink-wrap when optimizing function for size, since it tends to
2837 generate additional returns. */
2838 if (optimize_function_for_size_p (cfun)
2839 && TARGET_THUMB2_P (opts->x_target_flags))
2840 opts->x_flag_shrink_wrap = false;
2841 else
2842 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
2843
2844 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2845 - epilogue_insns - does not accurately model the corresponding insns
2846 emitted in the asm file. In particular, see the comment in thumb_exit
2847 'Find out how many of the (return) argument registers we can corrupt'.
2848 As a consequence, the epilogue may clobber registers without fipa-ra
2849 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
2850 TODO: Accurately model clobbers for epilogue_insns and reenable
2851 fipa-ra. */
2852 if (TARGET_THUMB1_P (opts->x_target_flags))
2853 opts->x_flag_ipa_ra = 0;
2854 else
2855 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
2856
2857 /* Thumb2 inline assembly code should always use unified syntax.
2858 This will apply to ARM and Thumb1 eventually. */
2859 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
2860 }
2861
2862 /* Fix up any incompatible options that the user has specified. */
2863 static void
2864 arm_option_override (void)
2865 {
2866 arm_selected_arch = NULL;
2867 arm_selected_cpu = NULL;
2868 arm_selected_tune = NULL;
2869
2870 if (global_options_set.x_arm_arch_option)
2871 arm_selected_arch = &all_architectures[arm_arch_option];
2872
2873 if (global_options_set.x_arm_cpu_option)
2874 {
2875 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2876 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2877 }
2878
2879 if (global_options_set.x_arm_tune_option)
2880 arm_selected_tune = &all_cores[(int) arm_tune_option];
2881
2882 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2883 SUBTARGET_OVERRIDE_OPTIONS;
2884 #endif
2885
2886 if (arm_selected_arch)
2887 {
2888 if (arm_selected_cpu)
2889 {
2890 /* Check for conflict between mcpu and march. */
2891 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2892 {
2893 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2894 arm_selected_cpu->name, arm_selected_arch->name);
2895 /* -march wins for code generation.
2896 -mcpu wins for default tuning. */
2897 if (!arm_selected_tune)
2898 arm_selected_tune = arm_selected_cpu;
2899
2900 arm_selected_cpu = arm_selected_arch;
2901 }
2902 else
2903 /* -mcpu wins. */
2904 arm_selected_arch = NULL;
2905 }
2906 else
2907 /* Pick a CPU based on the architecture. */
2908 arm_selected_cpu = arm_selected_arch;
2909 }
2910
2911 /* If the user did not specify a processor, choose one for them. */
2912 if (!arm_selected_cpu)
2913 {
2914 const struct processors * sel;
2915 unsigned int sought;
2916
2917 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2918 if (!arm_selected_cpu->name)
2919 {
2920 #ifdef SUBTARGET_CPU_DEFAULT
2921 /* Use the subtarget default CPU if none was specified by
2922 configure. */
2923 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2924 #endif
2925 /* Default to ARM6. */
2926 if (!arm_selected_cpu->name)
2927 arm_selected_cpu = &all_cores[arm6];
2928 }
2929
2930 sel = arm_selected_cpu;
2931 insn_flags = sel->flags;
2932
2933 /* Now check to see if the user has specified some command line
2934 switch that require certain abilities from the cpu. */
2935 sought = 0;
2936
2937 if (TARGET_INTERWORK || TARGET_THUMB)
2938 {
2939 sought |= (FL_THUMB | FL_MODE32);
2940
2941 /* There are no ARM processors that support both APCS-26 and
2942 interworking. Therefore we force FL_MODE26 to be removed
2943 from insn_flags here (if it was set), so that the search
2944 below will always be able to find a compatible processor. */
2945 insn_flags &= ~FL_MODE26;
2946 }
2947
2948 if (sought != 0 && ((sought & insn_flags) != sought))
2949 {
2950 /* Try to locate a CPU type that supports all of the abilities
2951 of the default CPU, plus the extra abilities requested by
2952 the user. */
2953 for (sel = all_cores; sel->name != NULL; sel++)
2954 if ((sel->flags & sought) == (sought | insn_flags))
2955 break;
2956
2957 if (sel->name == NULL)
2958 {
2959 unsigned current_bit_count = 0;
2960 const struct processors * best_fit = NULL;
2961
2962 /* Ideally we would like to issue an error message here
2963 saying that it was not possible to find a CPU compatible
2964 with the default CPU, but which also supports the command
2965 line options specified by the programmer, and so they
2966 ought to use the -mcpu=<name> command line option to
2967 override the default CPU type.
2968
2969 If we cannot find a cpu that has both the
2970 characteristics of the default cpu and the given
2971 command line options we scan the array again looking
2972 for a best match. */
2973 for (sel = all_cores; sel->name != NULL; sel++)
2974 if ((sel->flags & sought) == sought)
2975 {
2976 unsigned count;
2977
2978 count = bit_count (sel->flags & insn_flags);
2979
2980 if (count >= current_bit_count)
2981 {
2982 best_fit = sel;
2983 current_bit_count = count;
2984 }
2985 }
2986
2987 gcc_assert (best_fit);
2988 sel = best_fit;
2989 }
2990
2991 arm_selected_cpu = sel;
2992 }
2993 }
2994
2995 gcc_assert (arm_selected_cpu);
2996 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2997 if (!arm_selected_tune)
2998 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2999
3000 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
3001 insn_flags = arm_selected_cpu->flags;
3002 arm_base_arch = arm_selected_cpu->base_arch;
3003
3004 arm_tune = arm_selected_tune->core;
3005 tune_flags = arm_selected_tune->flags;
3006 current_tune = arm_selected_tune->tune;
3007
3008 /* TBD: Dwarf info for apcs frame is not handled yet. */
3009 if (TARGET_APCS_FRAME)
3010 flag_shrink_wrap = false;
3011
3012 /* BPABI targets use linker tricks to allow interworking on cores
3013 without thumb support. */
3014 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
3015 {
3016 warning (0, "target CPU does not support interworking" );
3017 target_flags &= ~MASK_INTERWORK;
3018 }
3019
3020 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3021 {
3022 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3023 target_flags |= MASK_APCS_FRAME;
3024 }
3025
3026 if (TARGET_POKE_FUNCTION_NAME)
3027 target_flags |= MASK_APCS_FRAME;
3028
3029 if (TARGET_APCS_REENT && flag_pic)
3030 error ("-fpic and -mapcs-reent are incompatible");
3031
3032 if (TARGET_APCS_REENT)
3033 warning (0, "APCS reentrant code not supported. Ignored");
3034
3035 if (TARGET_APCS_FLOAT)
3036 warning (0, "passing floating point arguments in fp regs not yet supported");
3037
3038 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3039 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
3040 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
3041 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
3042 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
3043 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
3044 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
3045 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
3046 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
3047 arm_arch6m = arm_arch6 && !arm_arch_notm;
3048 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
3049 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
3050 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
3051 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
3052 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
3053
3054 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
3055 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
3056 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
3057 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
3058 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
3059 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
3060 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
3061 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
3062 arm_arch_no_volatile_ce = (insn_flags & FL_NO_VOLATILE_CE) != 0;
3063 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
3064 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
3065 arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
3066
3067 /* V5 code we generate is completely interworking capable, so we turn off
3068 TARGET_INTERWORK here to avoid many tests later on. */
3069
3070 /* XXX However, we must pass the right pre-processor defines to CPP
3071 or GLD can get confused. This is a hack. */
3072 if (TARGET_INTERWORK)
3073 arm_cpp_interwork = 1;
3074
3075 if (arm_arch5)
3076 target_flags &= ~MASK_INTERWORK;
3077
3078 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3079 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3080
3081 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3082 error ("iwmmxt abi requires an iwmmxt capable cpu");
3083
3084 if (!global_options_set.x_arm_fpu_index)
3085 {
3086 const char *target_fpu_name;
3087 bool ok;
3088
3089 #ifdef FPUTYPE_DEFAULT
3090 target_fpu_name = FPUTYPE_DEFAULT;
3091 #else
3092 target_fpu_name = "vfp";
3093 #endif
3094
3095 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
3096 CL_TARGET);
3097 gcc_assert (ok);
3098 }
3099
3100 arm_fpu_desc = &all_fpus[arm_fpu_index];
3101
3102 switch (arm_fpu_desc->model)
3103 {
3104 case ARM_FP_MODEL_VFP:
3105 arm_fpu_attr = FPU_VFP;
3106 break;
3107
3108 default:
3109 gcc_unreachable();
3110 }
3111
3112 if (TARGET_AAPCS_BASED)
3113 {
3114 if (TARGET_CALLER_INTERWORKING)
3115 error ("AAPCS does not support -mcaller-super-interworking");
3116 else
3117 if (TARGET_CALLEE_INTERWORKING)
3118 error ("AAPCS does not support -mcallee-super-interworking");
3119 }
3120
3121 /* iWMMXt and NEON are incompatible. */
3122 if (TARGET_IWMMXT && TARGET_NEON)
3123 error ("iWMMXt and NEON are incompatible");
3124
3125 /* __fp16 support currently assumes the core has ldrh. */
3126 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3127 sorry ("__fp16 and no ldrh");
3128
3129 /* If soft-float is specified then don't use FPU. */
3130 if (TARGET_SOFT_FLOAT)
3131 arm_fpu_attr = FPU_NONE;
3132
3133 if (TARGET_AAPCS_BASED)
3134 {
3135 if (arm_abi == ARM_ABI_IWMMXT)
3136 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3137 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3138 && TARGET_HARD_FLOAT
3139 && TARGET_VFP)
3140 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3141 else
3142 arm_pcs_default = ARM_PCS_AAPCS;
3143 }
3144 else
3145 {
3146 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
3147 sorry ("-mfloat-abi=hard and VFP");
3148
3149 if (arm_abi == ARM_ABI_APCS)
3150 arm_pcs_default = ARM_PCS_APCS;
3151 else
3152 arm_pcs_default = ARM_PCS_ATPCS;
3153 }
3154
3155 /* For arm2/3 there is no need to do any scheduling if we are doing
3156 software floating-point. */
3157 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
3158 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3159
3160 /* Use the cp15 method if it is available. */
3161 if (target_thread_pointer == TP_AUTO)
3162 {
3163 if (arm_arch6k && !TARGET_THUMB1)
3164 target_thread_pointer = TP_CP15;
3165 else
3166 target_thread_pointer = TP_SOFT;
3167 }
3168
3169 /* Override the default structure alignment for AAPCS ABI. */
3170 if (!global_options_set.x_arm_structure_size_boundary)
3171 {
3172 if (TARGET_AAPCS_BASED)
3173 arm_structure_size_boundary = 8;
3174 }
3175 else
3176 {
3177 if (arm_structure_size_boundary != 8
3178 && arm_structure_size_boundary != 32
3179 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3180 {
3181 if (ARM_DOUBLEWORD_ALIGN)
3182 warning (0,
3183 "structure size boundary can only be set to 8, 32 or 64");
3184 else
3185 warning (0, "structure size boundary can only be set to 8 or 32");
3186 arm_structure_size_boundary
3187 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3188 }
3189 }
3190
3191 /* If stack checking is disabled, we can use r10 as the PIC register,
3192 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3193 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3194 {
3195 if (TARGET_VXWORKS_RTP)
3196 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3197 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3198 }
3199
3200 if (flag_pic && TARGET_VXWORKS_RTP)
3201 arm_pic_register = 9;
3202
3203 if (arm_pic_register_string != NULL)
3204 {
3205 int pic_register = decode_reg_name (arm_pic_register_string);
3206
3207 if (!flag_pic)
3208 warning (0, "-mpic-register= is useless without -fpic");
3209
3210 /* Prevent the user from choosing an obviously stupid PIC register. */
3211 else if (pic_register < 0 || call_used_regs[pic_register]
3212 || pic_register == HARD_FRAME_POINTER_REGNUM
3213 || pic_register == STACK_POINTER_REGNUM
3214 || pic_register >= PC_REGNUM
3215 || (TARGET_VXWORKS_RTP
3216 && (unsigned int) pic_register != arm_pic_register))
3217 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3218 else
3219 arm_pic_register = pic_register;
3220 }
3221
3222 if (TARGET_VXWORKS_RTP
3223 && !global_options_set.x_arm_pic_data_is_text_relative)
3224 arm_pic_data_is_text_relative = 0;
3225
3226 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3227 if (fix_cm3_ldrd == 2)
3228 {
3229 if (arm_selected_cpu->core == cortexm3)
3230 fix_cm3_ldrd = 1;
3231 else
3232 fix_cm3_ldrd = 0;
3233 }
3234
3235 /* Enable -munaligned-access by default for
3236 - all ARMv6 architecture-based processors
3237 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3238 - ARMv8 architecture-base processors.
3239
3240 Disable -munaligned-access by default for
3241 - all pre-ARMv6 architecture-based processors
3242 - ARMv6-M architecture-based processors. */
3243
3244 if (unaligned_access == 2)
3245 {
3246 if (arm_arch6 && (arm_arch_notm || arm_arch7))
3247 unaligned_access = 1;
3248 else
3249 unaligned_access = 0;
3250 }
3251 else if (unaligned_access == 1
3252 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3253 {
3254 warning (0, "target CPU does not support unaligned accesses");
3255 unaligned_access = 0;
3256 }
3257
3258 /* Hot/Cold partitioning is not currently supported, since we can't
3259 handle literal pool placement in that case. */
3260 if (flag_reorder_blocks_and_partition)
3261 {
3262 inform (input_location,
3263 "-freorder-blocks-and-partition not supported on this architecture");
3264 flag_reorder_blocks_and_partition = 0;
3265 flag_reorder_blocks = 1;
3266 }
3267
3268 if (flag_pic)
3269 /* Hoisting PIC address calculations more aggressively provides a small,
3270 but measurable, size reduction for PIC code. Therefore, we decrease
3271 the bar for unrestricted expression hoisting to the cost of PIC address
3272 calculation, which is 2 instructions. */
3273 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3274 global_options.x_param_values,
3275 global_options_set.x_param_values);
3276
3277 /* ARM EABI defaults to strict volatile bitfields. */
3278 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3279 && abi_version_at_least(2))
3280 flag_strict_volatile_bitfields = 1;
3281
3282 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3283 have deemed it beneficial (signified by setting
3284 prefetch.num_slots to 1 or more). */
3285 if (flag_prefetch_loop_arrays < 0
3286 && HAVE_prefetch
3287 && optimize >= 3
3288 && current_tune->prefetch.num_slots > 0)
3289 flag_prefetch_loop_arrays = 1;
3290
3291 /* Set up parameters to be used in prefetching algorithm. Do not
3292 override the defaults unless we are tuning for a core we have
3293 researched values for. */
3294 if (current_tune->prefetch.num_slots > 0)
3295 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3296 current_tune->prefetch.num_slots,
3297 global_options.x_param_values,
3298 global_options_set.x_param_values);
3299 if (current_tune->prefetch.l1_cache_line_size >= 0)
3300 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3301 current_tune->prefetch.l1_cache_line_size,
3302 global_options.x_param_values,
3303 global_options_set.x_param_values);
3304 if (current_tune->prefetch.l1_cache_size >= 0)
3305 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3306 current_tune->prefetch.l1_cache_size,
3307 global_options.x_param_values,
3308 global_options_set.x_param_values);
3309
3310 /* Use Neon to perform 64-bits operations rather than core
3311 registers. */
3312 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3313 if (use_neon_for_64bits == 1)
3314 prefer_neon_for_64bits = true;
3315
3316 /* Use the alternative scheduling-pressure algorithm by default. */
3317 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3318 global_options.x_param_values,
3319 global_options_set.x_param_values);
3320
3321 /* Look through ready list and all of queue for instructions
3322 relevant for L2 auto-prefetcher. */
3323 int param_sched_autopref_queue_depth;
3324
3325 switch (current_tune->sched_autopref)
3326 {
3327 case tune_params::SCHED_AUTOPREF_OFF:
3328 param_sched_autopref_queue_depth = -1;
3329 break;
3330
3331 case tune_params::SCHED_AUTOPREF_RANK:
3332 param_sched_autopref_queue_depth = 0;
3333 break;
3334
3335 case tune_params::SCHED_AUTOPREF_FULL:
3336 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3337 break;
3338
3339 default:
3340 gcc_unreachable ();
3341 }
3342
3343 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3344 param_sched_autopref_queue_depth,
3345 global_options.x_param_values,
3346 global_options_set.x_param_values);
3347
3348 /* Currently, for slow flash data, we just disable literal pools. */
3349 if (target_slow_flash_data)
3350 arm_disable_literal_pool = true;
3351
3352 /* Disable scheduling fusion by default if it's not armv7 processor
3353 or doesn't prefer ldrd/strd. */
3354 if (flag_schedule_fusion == 2
3355 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3356 flag_schedule_fusion = 0;
3357
3358 /* Need to remember initial options before they are overriden. */
3359 init_optimize = build_optimization_node (&global_options);
3360
3361 arm_option_override_internal (&global_options, &global_options_set);
3362 arm_option_check_internal (&global_options);
3363 arm_option_params_internal (&global_options);
3364
3365 /* Register global variables with the garbage collector. */
3366 arm_add_gc_roots ();
3367
3368 /* Save the initial options in case the user does function specific
3369 options. */
3370 target_option_default_node = target_option_current_node
3371 = build_target_option_node (&global_options);
3372
3373 /* Init initial mode for testing. */
3374 thumb_flipper = TARGET_THUMB;
3375 }
3376
3377 static void
3378 arm_add_gc_roots (void)
3379 {
3380 gcc_obstack_init(&minipool_obstack);
3381 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3382 }
3383 \f
3384 /* A table of known ARM exception types.
3385 For use with the interrupt function attribute. */
3386
3387 typedef struct
3388 {
3389 const char *const arg;
3390 const unsigned long return_value;
3391 }
3392 isr_attribute_arg;
3393
3394 static const isr_attribute_arg isr_attribute_args [] =
3395 {
3396 { "IRQ", ARM_FT_ISR },
3397 { "irq", ARM_FT_ISR },
3398 { "FIQ", ARM_FT_FIQ },
3399 { "fiq", ARM_FT_FIQ },
3400 { "ABORT", ARM_FT_ISR },
3401 { "abort", ARM_FT_ISR },
3402 { "ABORT", ARM_FT_ISR },
3403 { "abort", ARM_FT_ISR },
3404 { "UNDEF", ARM_FT_EXCEPTION },
3405 { "undef", ARM_FT_EXCEPTION },
3406 { "SWI", ARM_FT_EXCEPTION },
3407 { "swi", ARM_FT_EXCEPTION },
3408 { NULL, ARM_FT_NORMAL }
3409 };
3410
3411 /* Returns the (interrupt) function type of the current
3412 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3413
3414 static unsigned long
3415 arm_isr_value (tree argument)
3416 {
3417 const isr_attribute_arg * ptr;
3418 const char * arg;
3419
3420 if (!arm_arch_notm)
3421 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3422
3423 /* No argument - default to IRQ. */
3424 if (argument == NULL_TREE)
3425 return ARM_FT_ISR;
3426
3427 /* Get the value of the argument. */
3428 if (TREE_VALUE (argument) == NULL_TREE
3429 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3430 return ARM_FT_UNKNOWN;
3431
3432 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3433
3434 /* Check it against the list of known arguments. */
3435 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3436 if (streq (arg, ptr->arg))
3437 return ptr->return_value;
3438
3439 /* An unrecognized interrupt type. */
3440 return ARM_FT_UNKNOWN;
3441 }
3442
3443 /* Computes the type of the current function. */
3444
3445 static unsigned long
3446 arm_compute_func_type (void)
3447 {
3448 unsigned long type = ARM_FT_UNKNOWN;
3449 tree a;
3450 tree attr;
3451
3452 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3453
3454 /* Decide if the current function is volatile. Such functions
3455 never return, and many memory cycles can be saved by not storing
3456 register values that will never be needed again. This optimization
3457 was added to speed up context switching in a kernel application. */
3458 if (optimize > 0
3459 && (TREE_NOTHROW (current_function_decl)
3460 || !(flag_unwind_tables
3461 || (flag_exceptions
3462 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3463 && TREE_THIS_VOLATILE (current_function_decl))
3464 type |= ARM_FT_VOLATILE;
3465
3466 if (cfun->static_chain_decl != NULL)
3467 type |= ARM_FT_NESTED;
3468
3469 attr = DECL_ATTRIBUTES (current_function_decl);
3470
3471 a = lookup_attribute ("naked", attr);
3472 if (a != NULL_TREE)
3473 type |= ARM_FT_NAKED;
3474
3475 a = lookup_attribute ("isr", attr);
3476 if (a == NULL_TREE)
3477 a = lookup_attribute ("interrupt", attr);
3478
3479 if (a == NULL_TREE)
3480 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3481 else
3482 type |= arm_isr_value (TREE_VALUE (a));
3483
3484 return type;
3485 }
3486
3487 /* Returns the type of the current function. */
3488
3489 unsigned long
3490 arm_current_func_type (void)
3491 {
3492 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3493 cfun->machine->func_type = arm_compute_func_type ();
3494
3495 return cfun->machine->func_type;
3496 }
3497
3498 bool
3499 arm_allocate_stack_slots_for_args (void)
3500 {
3501 /* Naked functions should not allocate stack slots for arguments. */
3502 return !IS_NAKED (arm_current_func_type ());
3503 }
3504
3505 static bool
3506 arm_warn_func_return (tree decl)
3507 {
3508 /* Naked functions are implemented entirely in assembly, including the
3509 return sequence, so suppress warnings about this. */
3510 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3511 }
3512
3513 \f
3514 /* Output assembler code for a block containing the constant parts
3515 of a trampoline, leaving space for the variable parts.
3516
3517 On the ARM, (if r8 is the static chain regnum, and remembering that
3518 referencing pc adds an offset of 8) the trampoline looks like:
3519 ldr r8, [pc, #0]
3520 ldr pc, [pc]
3521 .word static chain value
3522 .word function's address
3523 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3524
3525 static void
3526 arm_asm_trampoline_template (FILE *f)
3527 {
3528 if (TARGET_UNIFIED_ASM)
3529 fprintf (f, "\t.syntax unified\n");
3530 else
3531 fprintf (f, "\t.syntax divided\n");
3532
3533 if (TARGET_ARM)
3534 {
3535 fprintf (f, "\t.arm\n");
3536 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3537 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3538 }
3539 else if (TARGET_THUMB2)
3540 {
3541 fprintf (f, "\t.thumb\n");
3542 /* The Thumb-2 trampoline is similar to the arm implementation.
3543 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3544 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3545 STATIC_CHAIN_REGNUM, PC_REGNUM);
3546 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3547 }
3548 else
3549 {
3550 ASM_OUTPUT_ALIGN (f, 2);
3551 fprintf (f, "\t.code\t16\n");
3552 fprintf (f, ".Ltrampoline_start:\n");
3553 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3554 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3555 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3556 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3557 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3558 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3559 }
3560 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3561 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3562 }
3563
3564 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3565
3566 static void
3567 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3568 {
3569 rtx fnaddr, mem, a_tramp;
3570
3571 emit_block_move (m_tramp, assemble_trampoline_template (),
3572 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3573
3574 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3575 emit_move_insn (mem, chain_value);
3576
3577 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3578 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3579 emit_move_insn (mem, fnaddr);
3580
3581 a_tramp = XEXP (m_tramp, 0);
3582 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3583 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3584 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3585 }
3586
3587 /* Thumb trampolines should be entered in thumb mode, so set
3588 the bottom bit of the address. */
3589
3590 static rtx
3591 arm_trampoline_adjust_address (rtx addr)
3592 {
3593 if (TARGET_THUMB)
3594 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3595 NULL, 0, OPTAB_LIB_WIDEN);
3596 return addr;
3597 }
3598 \f
3599 /* Return 1 if it is possible to return using a single instruction.
3600 If SIBLING is non-null, this is a test for a return before a sibling
3601 call. SIBLING is the call insn, so we can examine its register usage. */
3602
3603 int
3604 use_return_insn (int iscond, rtx sibling)
3605 {
3606 int regno;
3607 unsigned int func_type;
3608 unsigned long saved_int_regs;
3609 unsigned HOST_WIDE_INT stack_adjust;
3610 arm_stack_offsets *offsets;
3611
3612 /* Never use a return instruction before reload has run. */
3613 if (!reload_completed)
3614 return 0;
3615
3616 func_type = arm_current_func_type ();
3617
3618 /* Naked, volatile and stack alignment functions need special
3619 consideration. */
3620 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3621 return 0;
3622
3623 /* So do interrupt functions that use the frame pointer and Thumb
3624 interrupt functions. */
3625 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3626 return 0;
3627
3628 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3629 && !optimize_function_for_size_p (cfun))
3630 return 0;
3631
3632 offsets = arm_get_frame_offsets ();
3633 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3634
3635 /* As do variadic functions. */
3636 if (crtl->args.pretend_args_size
3637 || cfun->machine->uses_anonymous_args
3638 /* Or if the function calls __builtin_eh_return () */
3639 || crtl->calls_eh_return
3640 /* Or if the function calls alloca */
3641 || cfun->calls_alloca
3642 /* Or if there is a stack adjustment. However, if the stack pointer
3643 is saved on the stack, we can use a pre-incrementing stack load. */
3644 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3645 && stack_adjust == 4)))
3646 return 0;
3647
3648 saved_int_regs = offsets->saved_regs_mask;
3649
3650 /* Unfortunately, the insn
3651
3652 ldmib sp, {..., sp, ...}
3653
3654 triggers a bug on most SA-110 based devices, such that the stack
3655 pointer won't be correctly restored if the instruction takes a
3656 page fault. We work around this problem by popping r3 along with
3657 the other registers, since that is never slower than executing
3658 another instruction.
3659
3660 We test for !arm_arch5 here, because code for any architecture
3661 less than this could potentially be run on one of the buggy
3662 chips. */
3663 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3664 {
3665 /* Validate that r3 is a call-clobbered register (always true in
3666 the default abi) ... */
3667 if (!call_used_regs[3])
3668 return 0;
3669
3670 /* ... that it isn't being used for a return value ... */
3671 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3672 return 0;
3673
3674 /* ... or for a tail-call argument ... */
3675 if (sibling)
3676 {
3677 gcc_assert (CALL_P (sibling));
3678
3679 if (find_regno_fusage (sibling, USE, 3))
3680 return 0;
3681 }
3682
3683 /* ... and that there are no call-saved registers in r0-r2
3684 (always true in the default ABI). */
3685 if (saved_int_regs & 0x7)
3686 return 0;
3687 }
3688
3689 /* Can't be done if interworking with Thumb, and any registers have been
3690 stacked. */
3691 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3692 return 0;
3693
3694 /* On StrongARM, conditional returns are expensive if they aren't
3695 taken and multiple registers have been stacked. */
3696 if (iscond && arm_tune_strongarm)
3697 {
3698 /* Conditional return when just the LR is stored is a simple
3699 conditional-load instruction, that's not expensive. */
3700 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3701 return 0;
3702
3703 if (flag_pic
3704 && arm_pic_register != INVALID_REGNUM
3705 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3706 return 0;
3707 }
3708
3709 /* If there are saved registers but the LR isn't saved, then we need
3710 two instructions for the return. */
3711 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3712 return 0;
3713
3714 /* Can't be done if any of the VFP regs are pushed,
3715 since this also requires an insn. */
3716 if (TARGET_HARD_FLOAT && TARGET_VFP)
3717 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3718 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3719 return 0;
3720
3721 if (TARGET_REALLY_IWMMXT)
3722 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3723 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3724 return 0;
3725
3726 return 1;
3727 }
3728
3729 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3730 shrink-wrapping if possible. This is the case if we need to emit a
3731 prologue, which we can test by looking at the offsets. */
3732 bool
3733 use_simple_return_p (void)
3734 {
3735 arm_stack_offsets *offsets;
3736
3737 offsets = arm_get_frame_offsets ();
3738 return offsets->outgoing_args != 0;
3739 }
3740
3741 /* Return TRUE if int I is a valid immediate ARM constant. */
3742
3743 int
3744 const_ok_for_arm (HOST_WIDE_INT i)
3745 {
3746 int lowbit;
3747
3748 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3749 be all zero, or all one. */
3750 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3751 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3752 != ((~(unsigned HOST_WIDE_INT) 0)
3753 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3754 return FALSE;
3755
3756 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3757
3758 /* Fast return for 0 and small values. We must do this for zero, since
3759 the code below can't handle that one case. */
3760 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3761 return TRUE;
3762
3763 /* Get the number of trailing zeros. */
3764 lowbit = ffs((int) i) - 1;
3765
3766 /* Only even shifts are allowed in ARM mode so round down to the
3767 nearest even number. */
3768 if (TARGET_ARM)
3769 lowbit &= ~1;
3770
3771 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3772 return TRUE;
3773
3774 if (TARGET_ARM)
3775 {
3776 /* Allow rotated constants in ARM mode. */
3777 if (lowbit <= 4
3778 && ((i & ~0xc000003f) == 0
3779 || (i & ~0xf000000f) == 0
3780 || (i & ~0xfc000003) == 0))
3781 return TRUE;
3782 }
3783 else
3784 {
3785 HOST_WIDE_INT v;
3786
3787 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3788 v = i & 0xff;
3789 v |= v << 16;
3790 if (i == v || i == (v | (v << 8)))
3791 return TRUE;
3792
3793 /* Allow repeated pattern 0xXY00XY00. */
3794 v = i & 0xff00;
3795 v |= v << 16;
3796 if (i == v)
3797 return TRUE;
3798 }
3799
3800 return FALSE;
3801 }
3802
3803 /* Return true if I is a valid constant for the operation CODE. */
3804 int
3805 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3806 {
3807 if (const_ok_for_arm (i))
3808 return 1;
3809
3810 switch (code)
3811 {
3812 case SET:
3813 /* See if we can use movw. */
3814 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3815 return 1;
3816 else
3817 /* Otherwise, try mvn. */
3818 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3819
3820 case PLUS:
3821 /* See if we can use addw or subw. */
3822 if (TARGET_THUMB2
3823 && ((i & 0xfffff000) == 0
3824 || ((-i) & 0xfffff000) == 0))
3825 return 1;
3826 /* else fall through. */
3827
3828 case COMPARE:
3829 case EQ:
3830 case NE:
3831 case GT:
3832 case LE:
3833 case LT:
3834 case GE:
3835 case GEU:
3836 case LTU:
3837 case GTU:
3838 case LEU:
3839 case UNORDERED:
3840 case ORDERED:
3841 case UNEQ:
3842 case UNGE:
3843 case UNLT:
3844 case UNGT:
3845 case UNLE:
3846 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3847
3848 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3849 case XOR:
3850 return 0;
3851
3852 case IOR:
3853 if (TARGET_THUMB2)
3854 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3855 return 0;
3856
3857 case AND:
3858 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3859
3860 default:
3861 gcc_unreachable ();
3862 }
3863 }
3864
3865 /* Return true if I is a valid di mode constant for the operation CODE. */
3866 int
3867 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3868 {
3869 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3870 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3871 rtx hi = GEN_INT (hi_val);
3872 rtx lo = GEN_INT (lo_val);
3873
3874 if (TARGET_THUMB1)
3875 return 0;
3876
3877 switch (code)
3878 {
3879 case AND:
3880 case IOR:
3881 case XOR:
3882 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3883 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3884 case PLUS:
3885 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3886
3887 default:
3888 return 0;
3889 }
3890 }
3891
3892 /* Emit a sequence of insns to handle a large constant.
3893 CODE is the code of the operation required, it can be any of SET, PLUS,
3894 IOR, AND, XOR, MINUS;
3895 MODE is the mode in which the operation is being performed;
3896 VAL is the integer to operate on;
3897 SOURCE is the other operand (a register, or a null-pointer for SET);
3898 SUBTARGETS means it is safe to create scratch registers if that will
3899 either produce a simpler sequence, or we will want to cse the values.
3900 Return value is the number of insns emitted. */
3901
3902 /* ??? Tweak this for thumb2. */
3903 int
3904 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3905 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3906 {
3907 rtx cond;
3908
3909 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3910 cond = COND_EXEC_TEST (PATTERN (insn));
3911 else
3912 cond = NULL_RTX;
3913
3914 if (subtargets || code == SET
3915 || (REG_P (target) && REG_P (source)
3916 && REGNO (target) != REGNO (source)))
3917 {
3918 /* After arm_reorg has been called, we can't fix up expensive
3919 constants by pushing them into memory so we must synthesize
3920 them in-line, regardless of the cost. This is only likely to
3921 be more costly on chips that have load delay slots and we are
3922 compiling without running the scheduler (so no splitting
3923 occurred before the final instruction emission).
3924
3925 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3926 */
3927 if (!cfun->machine->after_arm_reorg
3928 && !cond
3929 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3930 1, 0)
3931 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3932 + (code != SET))))
3933 {
3934 if (code == SET)
3935 {
3936 /* Currently SET is the only monadic value for CODE, all
3937 the rest are diadic. */
3938 if (TARGET_USE_MOVT)
3939 arm_emit_movpair (target, GEN_INT (val));
3940 else
3941 emit_set_insn (target, GEN_INT (val));
3942
3943 return 1;
3944 }
3945 else
3946 {
3947 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3948
3949 if (TARGET_USE_MOVT)
3950 arm_emit_movpair (temp, GEN_INT (val));
3951 else
3952 emit_set_insn (temp, GEN_INT (val));
3953
3954 /* For MINUS, the value is subtracted from, since we never
3955 have subtraction of a constant. */
3956 if (code == MINUS)
3957 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3958 else
3959 emit_set_insn (target,
3960 gen_rtx_fmt_ee (code, mode, source, temp));
3961 return 2;
3962 }
3963 }
3964 }
3965
3966 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3967 1);
3968 }
3969
3970 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3971 ARM/THUMB2 immediates, and add up to VAL.
3972 Thr function return value gives the number of insns required. */
3973 static int
3974 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3975 struct four_ints *return_sequence)
3976 {
3977 int best_consecutive_zeros = 0;
3978 int i;
3979 int best_start = 0;
3980 int insns1, insns2;
3981 struct four_ints tmp_sequence;
3982
3983 /* If we aren't targeting ARM, the best place to start is always at
3984 the bottom, otherwise look more closely. */
3985 if (TARGET_ARM)
3986 {
3987 for (i = 0; i < 32; i += 2)
3988 {
3989 int consecutive_zeros = 0;
3990
3991 if (!(val & (3 << i)))
3992 {
3993 while ((i < 32) && !(val & (3 << i)))
3994 {
3995 consecutive_zeros += 2;
3996 i += 2;
3997 }
3998 if (consecutive_zeros > best_consecutive_zeros)
3999 {
4000 best_consecutive_zeros = consecutive_zeros;
4001 best_start = i - consecutive_zeros;
4002 }
4003 i -= 2;
4004 }
4005 }
4006 }
4007
4008 /* So long as it won't require any more insns to do so, it's
4009 desirable to emit a small constant (in bits 0...9) in the last
4010 insn. This way there is more chance that it can be combined with
4011 a later addressing insn to form a pre-indexed load or store
4012 operation. Consider:
4013
4014 *((volatile int *)0xe0000100) = 1;
4015 *((volatile int *)0xe0000110) = 2;
4016
4017 We want this to wind up as:
4018
4019 mov rA, #0xe0000000
4020 mov rB, #1
4021 str rB, [rA, #0x100]
4022 mov rB, #2
4023 str rB, [rA, #0x110]
4024
4025 rather than having to synthesize both large constants from scratch.
4026
4027 Therefore, we calculate how many insns would be required to emit
4028 the constant starting from `best_start', and also starting from
4029 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4030 yield a shorter sequence, we may as well use zero. */
4031 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4032 if (best_start != 0
4033 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
4034 {
4035 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4036 if (insns2 <= insns1)
4037 {
4038 *return_sequence = tmp_sequence;
4039 insns1 = insns2;
4040 }
4041 }
4042
4043 return insns1;
4044 }
4045
4046 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4047 static int
4048 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4049 struct four_ints *return_sequence, int i)
4050 {
4051 int remainder = val & 0xffffffff;
4052 int insns = 0;
4053
4054 /* Try and find a way of doing the job in either two or three
4055 instructions.
4056
4057 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4058 location. We start at position I. This may be the MSB, or
4059 optimial_immediate_sequence may have positioned it at the largest block
4060 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4061 wrapping around to the top of the word when we drop off the bottom.
4062 In the worst case this code should produce no more than four insns.
4063
4064 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4065 constants, shifted to any arbitrary location. We should always start
4066 at the MSB. */
4067 do
4068 {
4069 int end;
4070 unsigned int b1, b2, b3, b4;
4071 unsigned HOST_WIDE_INT result;
4072 int loc;
4073
4074 gcc_assert (insns < 4);
4075
4076 if (i <= 0)
4077 i += 32;
4078
4079 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4080 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4081 {
4082 loc = i;
4083 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4084 /* We can use addw/subw for the last 12 bits. */
4085 result = remainder;
4086 else
4087 {
4088 /* Use an 8-bit shifted/rotated immediate. */
4089 end = i - 8;
4090 if (end < 0)
4091 end += 32;
4092 result = remainder & ((0x0ff << end)
4093 | ((i < end) ? (0xff >> (32 - end))
4094 : 0));
4095 i -= 8;
4096 }
4097 }
4098 else
4099 {
4100 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4101 arbitrary shifts. */
4102 i -= TARGET_ARM ? 2 : 1;
4103 continue;
4104 }
4105
4106 /* Next, see if we can do a better job with a thumb2 replicated
4107 constant.
4108
4109 We do it this way around to catch the cases like 0x01F001E0 where
4110 two 8-bit immediates would work, but a replicated constant would
4111 make it worse.
4112
4113 TODO: 16-bit constants that don't clear all the bits, but still win.
4114 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4115 if (TARGET_THUMB2)
4116 {
4117 b1 = (remainder & 0xff000000) >> 24;
4118 b2 = (remainder & 0x00ff0000) >> 16;
4119 b3 = (remainder & 0x0000ff00) >> 8;
4120 b4 = remainder & 0xff;
4121
4122 if (loc > 24)
4123 {
4124 /* The 8-bit immediate already found clears b1 (and maybe b2),
4125 but must leave b3 and b4 alone. */
4126
4127 /* First try to find a 32-bit replicated constant that clears
4128 almost everything. We can assume that we can't do it in one,
4129 or else we wouldn't be here. */
4130 unsigned int tmp = b1 & b2 & b3 & b4;
4131 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4132 + (tmp << 24);
4133 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4134 + (tmp == b3) + (tmp == b4);
4135 if (tmp
4136 && (matching_bytes >= 3
4137 || (matching_bytes == 2
4138 && const_ok_for_op (remainder & ~tmp2, code))))
4139 {
4140 /* At least 3 of the bytes match, and the fourth has at
4141 least as many bits set, or two of the bytes match
4142 and it will only require one more insn to finish. */
4143 result = tmp2;
4144 i = tmp != b1 ? 32
4145 : tmp != b2 ? 24
4146 : tmp != b3 ? 16
4147 : 8;
4148 }
4149
4150 /* Second, try to find a 16-bit replicated constant that can
4151 leave three of the bytes clear. If b2 or b4 is already
4152 zero, then we can. If the 8-bit from above would not
4153 clear b2 anyway, then we still win. */
4154 else if (b1 == b3 && (!b2 || !b4
4155 || (remainder & 0x00ff0000 & ~result)))
4156 {
4157 result = remainder & 0xff00ff00;
4158 i = 24;
4159 }
4160 }
4161 else if (loc > 16)
4162 {
4163 /* The 8-bit immediate already found clears b2 (and maybe b3)
4164 and we don't get here unless b1 is alredy clear, but it will
4165 leave b4 unchanged. */
4166
4167 /* If we can clear b2 and b4 at once, then we win, since the
4168 8-bits couldn't possibly reach that far. */
4169 if (b2 == b4)
4170 {
4171 result = remainder & 0x00ff00ff;
4172 i = 16;
4173 }
4174 }
4175 }
4176
4177 return_sequence->i[insns++] = result;
4178 remainder &= ~result;
4179
4180 if (code == SET || code == MINUS)
4181 code = PLUS;
4182 }
4183 while (remainder);
4184
4185 return insns;
4186 }
4187
4188 /* Emit an instruction with the indicated PATTERN. If COND is
4189 non-NULL, conditionalize the execution of the instruction on COND
4190 being true. */
4191
4192 static void
4193 emit_constant_insn (rtx cond, rtx pattern)
4194 {
4195 if (cond)
4196 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4197 emit_insn (pattern);
4198 }
4199
4200 /* As above, but extra parameter GENERATE which, if clear, suppresses
4201 RTL generation. */
4202
4203 static int
4204 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4205 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
4206 int generate)
4207 {
4208 int can_invert = 0;
4209 int can_negate = 0;
4210 int final_invert = 0;
4211 int i;
4212 int set_sign_bit_copies = 0;
4213 int clear_sign_bit_copies = 0;
4214 int clear_zero_bit_copies = 0;
4215 int set_zero_bit_copies = 0;
4216 int insns = 0, neg_insns, inv_insns;
4217 unsigned HOST_WIDE_INT temp1, temp2;
4218 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4219 struct four_ints *immediates;
4220 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4221
4222 /* Find out which operations are safe for a given CODE. Also do a quick
4223 check for degenerate cases; these can occur when DImode operations
4224 are split. */
4225 switch (code)
4226 {
4227 case SET:
4228 can_invert = 1;
4229 break;
4230
4231 case PLUS:
4232 can_negate = 1;
4233 break;
4234
4235 case IOR:
4236 if (remainder == 0xffffffff)
4237 {
4238 if (generate)
4239 emit_constant_insn (cond,
4240 gen_rtx_SET (target,
4241 GEN_INT (ARM_SIGN_EXTEND (val))));
4242 return 1;
4243 }
4244
4245 if (remainder == 0)
4246 {
4247 if (reload_completed && rtx_equal_p (target, source))
4248 return 0;
4249
4250 if (generate)
4251 emit_constant_insn (cond, gen_rtx_SET (target, source));
4252 return 1;
4253 }
4254 break;
4255
4256 case AND:
4257 if (remainder == 0)
4258 {
4259 if (generate)
4260 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4261 return 1;
4262 }
4263 if (remainder == 0xffffffff)
4264 {
4265 if (reload_completed && rtx_equal_p (target, source))
4266 return 0;
4267 if (generate)
4268 emit_constant_insn (cond, gen_rtx_SET (target, source));
4269 return 1;
4270 }
4271 can_invert = 1;
4272 break;
4273
4274 case XOR:
4275 if (remainder == 0)
4276 {
4277 if (reload_completed && rtx_equal_p (target, source))
4278 return 0;
4279 if (generate)
4280 emit_constant_insn (cond, gen_rtx_SET (target, source));
4281 return 1;
4282 }
4283
4284 if (remainder == 0xffffffff)
4285 {
4286 if (generate)
4287 emit_constant_insn (cond,
4288 gen_rtx_SET (target,
4289 gen_rtx_NOT (mode, source)));
4290 return 1;
4291 }
4292 final_invert = 1;
4293 break;
4294
4295 case MINUS:
4296 /* We treat MINUS as (val - source), since (source - val) is always
4297 passed as (source + (-val)). */
4298 if (remainder == 0)
4299 {
4300 if (generate)
4301 emit_constant_insn (cond,
4302 gen_rtx_SET (target,
4303 gen_rtx_NEG (mode, source)));
4304 return 1;
4305 }
4306 if (const_ok_for_arm (val))
4307 {
4308 if (generate)
4309 emit_constant_insn (cond,
4310 gen_rtx_SET (target,
4311 gen_rtx_MINUS (mode, GEN_INT (val),
4312 source)));
4313 return 1;
4314 }
4315
4316 break;
4317
4318 default:
4319 gcc_unreachable ();
4320 }
4321
4322 /* If we can do it in one insn get out quickly. */
4323 if (const_ok_for_op (val, code))
4324 {
4325 if (generate)
4326 emit_constant_insn (cond,
4327 gen_rtx_SET (target,
4328 (source
4329 ? gen_rtx_fmt_ee (code, mode, source,
4330 GEN_INT (val))
4331 : GEN_INT (val))));
4332 return 1;
4333 }
4334
4335 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4336 insn. */
4337 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4338 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4339 {
4340 if (generate)
4341 {
4342 if (mode == SImode && i == 16)
4343 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4344 smaller insn. */
4345 emit_constant_insn (cond,
4346 gen_zero_extendhisi2
4347 (target, gen_lowpart (HImode, source)));
4348 else
4349 /* Extz only supports SImode, but we can coerce the operands
4350 into that mode. */
4351 emit_constant_insn (cond,
4352 gen_extzv_t2 (gen_lowpart (SImode, target),
4353 gen_lowpart (SImode, source),
4354 GEN_INT (i), const0_rtx));
4355 }
4356
4357 return 1;
4358 }
4359
4360 /* Calculate a few attributes that may be useful for specific
4361 optimizations. */
4362 /* Count number of leading zeros. */
4363 for (i = 31; i >= 0; i--)
4364 {
4365 if ((remainder & (1 << i)) == 0)
4366 clear_sign_bit_copies++;
4367 else
4368 break;
4369 }
4370
4371 /* Count number of leading 1's. */
4372 for (i = 31; i >= 0; i--)
4373 {
4374 if ((remainder & (1 << i)) != 0)
4375 set_sign_bit_copies++;
4376 else
4377 break;
4378 }
4379
4380 /* Count number of trailing zero's. */
4381 for (i = 0; i <= 31; i++)
4382 {
4383 if ((remainder & (1 << i)) == 0)
4384 clear_zero_bit_copies++;
4385 else
4386 break;
4387 }
4388
4389 /* Count number of trailing 1's. */
4390 for (i = 0; i <= 31; i++)
4391 {
4392 if ((remainder & (1 << i)) != 0)
4393 set_zero_bit_copies++;
4394 else
4395 break;
4396 }
4397
4398 switch (code)
4399 {
4400 case SET:
4401 /* See if we can do this by sign_extending a constant that is known
4402 to be negative. This is a good, way of doing it, since the shift
4403 may well merge into a subsequent insn. */
4404 if (set_sign_bit_copies > 1)
4405 {
4406 if (const_ok_for_arm
4407 (temp1 = ARM_SIGN_EXTEND (remainder
4408 << (set_sign_bit_copies - 1))))
4409 {
4410 if (generate)
4411 {
4412 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4413 emit_constant_insn (cond,
4414 gen_rtx_SET (new_src, GEN_INT (temp1)));
4415 emit_constant_insn (cond,
4416 gen_ashrsi3 (target, new_src,
4417 GEN_INT (set_sign_bit_copies - 1)));
4418 }
4419 return 2;
4420 }
4421 /* For an inverted constant, we will need to set the low bits,
4422 these will be shifted out of harm's way. */
4423 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4424 if (const_ok_for_arm (~temp1))
4425 {
4426 if (generate)
4427 {
4428 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4429 emit_constant_insn (cond,
4430 gen_rtx_SET (new_src, GEN_INT (temp1)));
4431 emit_constant_insn (cond,
4432 gen_ashrsi3 (target, new_src,
4433 GEN_INT (set_sign_bit_copies - 1)));
4434 }
4435 return 2;
4436 }
4437 }
4438
4439 /* See if we can calculate the value as the difference between two
4440 valid immediates. */
4441 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4442 {
4443 int topshift = clear_sign_bit_copies & ~1;
4444
4445 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4446 & (0xff000000 >> topshift));
4447
4448 /* If temp1 is zero, then that means the 9 most significant
4449 bits of remainder were 1 and we've caused it to overflow.
4450 When topshift is 0 we don't need to do anything since we
4451 can borrow from 'bit 32'. */
4452 if (temp1 == 0 && topshift != 0)
4453 temp1 = 0x80000000 >> (topshift - 1);
4454
4455 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4456
4457 if (const_ok_for_arm (temp2))
4458 {
4459 if (generate)
4460 {
4461 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4462 emit_constant_insn (cond,
4463 gen_rtx_SET (new_src, GEN_INT (temp1)));
4464 emit_constant_insn (cond,
4465 gen_addsi3 (target, new_src,
4466 GEN_INT (-temp2)));
4467 }
4468
4469 return 2;
4470 }
4471 }
4472
4473 /* See if we can generate this by setting the bottom (or the top)
4474 16 bits, and then shifting these into the other half of the
4475 word. We only look for the simplest cases, to do more would cost
4476 too much. Be careful, however, not to generate this when the
4477 alternative would take fewer insns. */
4478 if (val & 0xffff0000)
4479 {
4480 temp1 = remainder & 0xffff0000;
4481 temp2 = remainder & 0x0000ffff;
4482
4483 /* Overlaps outside this range are best done using other methods. */
4484 for (i = 9; i < 24; i++)
4485 {
4486 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4487 && !const_ok_for_arm (temp2))
4488 {
4489 rtx new_src = (subtargets
4490 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4491 : target);
4492 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4493 source, subtargets, generate);
4494 source = new_src;
4495 if (generate)
4496 emit_constant_insn
4497 (cond,
4498 gen_rtx_SET
4499 (target,
4500 gen_rtx_IOR (mode,
4501 gen_rtx_ASHIFT (mode, source,
4502 GEN_INT (i)),
4503 source)));
4504 return insns + 1;
4505 }
4506 }
4507
4508 /* Don't duplicate cases already considered. */
4509 for (i = 17; i < 24; i++)
4510 {
4511 if (((temp1 | (temp1 >> i)) == remainder)
4512 && !const_ok_for_arm (temp1))
4513 {
4514 rtx new_src = (subtargets
4515 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4516 : target);
4517 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4518 source, subtargets, generate);
4519 source = new_src;
4520 if (generate)
4521 emit_constant_insn
4522 (cond,
4523 gen_rtx_SET (target,
4524 gen_rtx_IOR
4525 (mode,
4526 gen_rtx_LSHIFTRT (mode, source,
4527 GEN_INT (i)),
4528 source)));
4529 return insns + 1;
4530 }
4531 }
4532 }
4533 break;
4534
4535 case IOR:
4536 case XOR:
4537 /* If we have IOR or XOR, and the constant can be loaded in a
4538 single instruction, and we can find a temporary to put it in,
4539 then this can be done in two instructions instead of 3-4. */
4540 if (subtargets
4541 /* TARGET can't be NULL if SUBTARGETS is 0 */
4542 || (reload_completed && !reg_mentioned_p (target, source)))
4543 {
4544 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4545 {
4546 if (generate)
4547 {
4548 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4549
4550 emit_constant_insn (cond,
4551 gen_rtx_SET (sub, GEN_INT (val)));
4552 emit_constant_insn (cond,
4553 gen_rtx_SET (target,
4554 gen_rtx_fmt_ee (code, mode,
4555 source, sub)));
4556 }
4557 return 2;
4558 }
4559 }
4560
4561 if (code == XOR)
4562 break;
4563
4564 /* Convert.
4565 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4566 and the remainder 0s for e.g. 0xfff00000)
4567 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4568
4569 This can be done in 2 instructions by using shifts with mov or mvn.
4570 e.g. for
4571 x = x | 0xfff00000;
4572 we generate.
4573 mvn r0, r0, asl #12
4574 mvn r0, r0, lsr #12 */
4575 if (set_sign_bit_copies > 8
4576 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4577 {
4578 if (generate)
4579 {
4580 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4581 rtx shift = GEN_INT (set_sign_bit_copies);
4582
4583 emit_constant_insn
4584 (cond,
4585 gen_rtx_SET (sub,
4586 gen_rtx_NOT (mode,
4587 gen_rtx_ASHIFT (mode,
4588 source,
4589 shift))));
4590 emit_constant_insn
4591 (cond,
4592 gen_rtx_SET (target,
4593 gen_rtx_NOT (mode,
4594 gen_rtx_LSHIFTRT (mode, sub,
4595 shift))));
4596 }
4597 return 2;
4598 }
4599
4600 /* Convert
4601 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4602 to
4603 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4604
4605 For eg. r0 = r0 | 0xfff
4606 mvn r0, r0, lsr #12
4607 mvn r0, r0, asl #12
4608
4609 */
4610 if (set_zero_bit_copies > 8
4611 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4612 {
4613 if (generate)
4614 {
4615 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4616 rtx shift = GEN_INT (set_zero_bit_copies);
4617
4618 emit_constant_insn
4619 (cond,
4620 gen_rtx_SET (sub,
4621 gen_rtx_NOT (mode,
4622 gen_rtx_LSHIFTRT (mode,
4623 source,
4624 shift))));
4625 emit_constant_insn
4626 (cond,
4627 gen_rtx_SET (target,
4628 gen_rtx_NOT (mode,
4629 gen_rtx_ASHIFT (mode, sub,
4630 shift))));
4631 }
4632 return 2;
4633 }
4634
4635 /* This will never be reached for Thumb2 because orn is a valid
4636 instruction. This is for Thumb1 and the ARM 32 bit cases.
4637
4638 x = y | constant (such that ~constant is a valid constant)
4639 Transform this to
4640 x = ~(~y & ~constant).
4641 */
4642 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4643 {
4644 if (generate)
4645 {
4646 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4647 emit_constant_insn (cond,
4648 gen_rtx_SET (sub,
4649 gen_rtx_NOT (mode, source)));
4650 source = sub;
4651 if (subtargets)
4652 sub = gen_reg_rtx (mode);
4653 emit_constant_insn (cond,
4654 gen_rtx_SET (sub,
4655 gen_rtx_AND (mode, source,
4656 GEN_INT (temp1))));
4657 emit_constant_insn (cond,
4658 gen_rtx_SET (target,
4659 gen_rtx_NOT (mode, sub)));
4660 }
4661 return 3;
4662 }
4663 break;
4664
4665 case AND:
4666 /* See if two shifts will do 2 or more insn's worth of work. */
4667 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4668 {
4669 HOST_WIDE_INT shift_mask = ((0xffffffff
4670 << (32 - clear_sign_bit_copies))
4671 & 0xffffffff);
4672
4673 if ((remainder | shift_mask) != 0xffffffff)
4674 {
4675 HOST_WIDE_INT new_val
4676 = ARM_SIGN_EXTEND (remainder | shift_mask);
4677
4678 if (generate)
4679 {
4680 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4681 insns = arm_gen_constant (AND, SImode, cond, new_val,
4682 new_src, source, subtargets, 1);
4683 source = new_src;
4684 }
4685 else
4686 {
4687 rtx targ = subtargets ? NULL_RTX : target;
4688 insns = arm_gen_constant (AND, mode, cond, new_val,
4689 targ, source, subtargets, 0);
4690 }
4691 }
4692
4693 if (generate)
4694 {
4695 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4696 rtx shift = GEN_INT (clear_sign_bit_copies);
4697
4698 emit_insn (gen_ashlsi3 (new_src, source, shift));
4699 emit_insn (gen_lshrsi3 (target, new_src, shift));
4700 }
4701
4702 return insns + 2;
4703 }
4704
4705 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4706 {
4707 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4708
4709 if ((remainder | shift_mask) != 0xffffffff)
4710 {
4711 HOST_WIDE_INT new_val
4712 = ARM_SIGN_EXTEND (remainder | shift_mask);
4713 if (generate)
4714 {
4715 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4716
4717 insns = arm_gen_constant (AND, mode, cond, new_val,
4718 new_src, source, subtargets, 1);
4719 source = new_src;
4720 }
4721 else
4722 {
4723 rtx targ = subtargets ? NULL_RTX : target;
4724
4725 insns = arm_gen_constant (AND, mode, cond, new_val,
4726 targ, source, subtargets, 0);
4727 }
4728 }
4729
4730 if (generate)
4731 {
4732 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4733 rtx shift = GEN_INT (clear_zero_bit_copies);
4734
4735 emit_insn (gen_lshrsi3 (new_src, source, shift));
4736 emit_insn (gen_ashlsi3 (target, new_src, shift));
4737 }
4738
4739 return insns + 2;
4740 }
4741
4742 break;
4743
4744 default:
4745 break;
4746 }
4747
4748 /* Calculate what the instruction sequences would be if we generated it
4749 normally, negated, or inverted. */
4750 if (code == AND)
4751 /* AND cannot be split into multiple insns, so invert and use BIC. */
4752 insns = 99;
4753 else
4754 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4755
4756 if (can_negate)
4757 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4758 &neg_immediates);
4759 else
4760 neg_insns = 99;
4761
4762 if (can_invert || final_invert)
4763 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4764 &inv_immediates);
4765 else
4766 inv_insns = 99;
4767
4768 immediates = &pos_immediates;
4769
4770 /* Is the negated immediate sequence more efficient? */
4771 if (neg_insns < insns && neg_insns <= inv_insns)
4772 {
4773 insns = neg_insns;
4774 immediates = &neg_immediates;
4775 }
4776 else
4777 can_negate = 0;
4778
4779 /* Is the inverted immediate sequence more efficient?
4780 We must allow for an extra NOT instruction for XOR operations, although
4781 there is some chance that the final 'mvn' will get optimized later. */
4782 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4783 {
4784 insns = inv_insns;
4785 immediates = &inv_immediates;
4786 }
4787 else
4788 {
4789 can_invert = 0;
4790 final_invert = 0;
4791 }
4792
4793 /* Now output the chosen sequence as instructions. */
4794 if (generate)
4795 {
4796 for (i = 0; i < insns; i++)
4797 {
4798 rtx new_src, temp1_rtx;
4799
4800 temp1 = immediates->i[i];
4801
4802 if (code == SET || code == MINUS)
4803 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4804 else if ((final_invert || i < (insns - 1)) && subtargets)
4805 new_src = gen_reg_rtx (mode);
4806 else
4807 new_src = target;
4808
4809 if (can_invert)
4810 temp1 = ~temp1;
4811 else if (can_negate)
4812 temp1 = -temp1;
4813
4814 temp1 = trunc_int_for_mode (temp1, mode);
4815 temp1_rtx = GEN_INT (temp1);
4816
4817 if (code == SET)
4818 ;
4819 else if (code == MINUS)
4820 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4821 else
4822 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4823
4824 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
4825 source = new_src;
4826
4827 if (code == SET)
4828 {
4829 can_negate = can_invert;
4830 can_invert = 0;
4831 code = PLUS;
4832 }
4833 else if (code == MINUS)
4834 code = PLUS;
4835 }
4836 }
4837
4838 if (final_invert)
4839 {
4840 if (generate)
4841 emit_constant_insn (cond, gen_rtx_SET (target,
4842 gen_rtx_NOT (mode, source)));
4843 insns++;
4844 }
4845
4846 return insns;
4847 }
4848
4849 /* Canonicalize a comparison so that we are more likely to recognize it.
4850 This can be done for a few constant compares, where we can make the
4851 immediate value easier to load. */
4852
4853 static void
4854 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4855 bool op0_preserve_value)
4856 {
4857 machine_mode mode;
4858 unsigned HOST_WIDE_INT i, maxval;
4859
4860 mode = GET_MODE (*op0);
4861 if (mode == VOIDmode)
4862 mode = GET_MODE (*op1);
4863
4864 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4865
4866 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4867 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4868 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4869 for GTU/LEU in Thumb mode. */
4870 if (mode == DImode)
4871 {
4872
4873 if (*code == GT || *code == LE
4874 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4875 {
4876 /* Missing comparison. First try to use an available
4877 comparison. */
4878 if (CONST_INT_P (*op1))
4879 {
4880 i = INTVAL (*op1);
4881 switch (*code)
4882 {
4883 case GT:
4884 case LE:
4885 if (i != maxval
4886 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4887 {
4888 *op1 = GEN_INT (i + 1);
4889 *code = *code == GT ? GE : LT;
4890 return;
4891 }
4892 break;
4893 case GTU:
4894 case LEU:
4895 if (i != ~((unsigned HOST_WIDE_INT) 0)
4896 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4897 {
4898 *op1 = GEN_INT (i + 1);
4899 *code = *code == GTU ? GEU : LTU;
4900 return;
4901 }
4902 break;
4903 default:
4904 gcc_unreachable ();
4905 }
4906 }
4907
4908 /* If that did not work, reverse the condition. */
4909 if (!op0_preserve_value)
4910 {
4911 std::swap (*op0, *op1);
4912 *code = (int)swap_condition ((enum rtx_code)*code);
4913 }
4914 }
4915 return;
4916 }
4917
4918 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4919 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4920 to facilitate possible combining with a cmp into 'ands'. */
4921 if (mode == SImode
4922 && GET_CODE (*op0) == ZERO_EXTEND
4923 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4924 && GET_MODE (XEXP (*op0, 0)) == QImode
4925 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4926 && subreg_lowpart_p (XEXP (*op0, 0))
4927 && *op1 == const0_rtx)
4928 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4929 GEN_INT (255));
4930
4931 /* Comparisons smaller than DImode. Only adjust comparisons against
4932 an out-of-range constant. */
4933 if (!CONST_INT_P (*op1)
4934 || const_ok_for_arm (INTVAL (*op1))
4935 || const_ok_for_arm (- INTVAL (*op1)))
4936 return;
4937
4938 i = INTVAL (*op1);
4939
4940 switch (*code)
4941 {
4942 case EQ:
4943 case NE:
4944 return;
4945
4946 case GT:
4947 case LE:
4948 if (i != maxval
4949 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4950 {
4951 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4952 *code = *code == GT ? GE : LT;
4953 return;
4954 }
4955 break;
4956
4957 case GE:
4958 case LT:
4959 if (i != ~maxval
4960 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4961 {
4962 *op1 = GEN_INT (i - 1);
4963 *code = *code == GE ? GT : LE;
4964 return;
4965 }
4966 break;
4967
4968 case GTU:
4969 case LEU:
4970 if (i != ~((unsigned HOST_WIDE_INT) 0)
4971 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4972 {
4973 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4974 *code = *code == GTU ? GEU : LTU;
4975 return;
4976 }
4977 break;
4978
4979 case GEU:
4980 case LTU:
4981 if (i != 0
4982 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4983 {
4984 *op1 = GEN_INT (i - 1);
4985 *code = *code == GEU ? GTU : LEU;
4986 return;
4987 }
4988 break;
4989
4990 default:
4991 gcc_unreachable ();
4992 }
4993 }
4994
4995
4996 /* Define how to find the value returned by a function. */
4997
4998 static rtx
4999 arm_function_value(const_tree type, const_tree func,
5000 bool outgoing ATTRIBUTE_UNUSED)
5001 {
5002 machine_mode mode;
5003 int unsignedp ATTRIBUTE_UNUSED;
5004 rtx r ATTRIBUTE_UNUSED;
5005
5006 mode = TYPE_MODE (type);
5007
5008 if (TARGET_AAPCS_BASED)
5009 return aapcs_allocate_return_reg (mode, type, func);
5010
5011 /* Promote integer types. */
5012 if (INTEGRAL_TYPE_P (type))
5013 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5014
5015 /* Promotes small structs returned in a register to full-word size
5016 for big-endian AAPCS. */
5017 if (arm_return_in_msb (type))
5018 {
5019 HOST_WIDE_INT size = int_size_in_bytes (type);
5020 if (size % UNITS_PER_WORD != 0)
5021 {
5022 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5023 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5024 }
5025 }
5026
5027 return arm_libcall_value_1 (mode);
5028 }
5029
5030 /* libcall hashtable helpers. */
5031
5032 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5033 {
5034 static inline hashval_t hash (const rtx_def *);
5035 static inline bool equal (const rtx_def *, const rtx_def *);
5036 static inline void remove (rtx_def *);
5037 };
5038
5039 inline bool
5040 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5041 {
5042 return rtx_equal_p (p1, p2);
5043 }
5044
5045 inline hashval_t
5046 libcall_hasher::hash (const rtx_def *p1)
5047 {
5048 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5049 }
5050
5051 typedef hash_table<libcall_hasher> libcall_table_type;
5052
5053 static void
5054 add_libcall (libcall_table_type *htab, rtx libcall)
5055 {
5056 *htab->find_slot (libcall, INSERT) = libcall;
5057 }
5058
5059 static bool
5060 arm_libcall_uses_aapcs_base (const_rtx libcall)
5061 {
5062 static bool init_done = false;
5063 static libcall_table_type *libcall_htab = NULL;
5064
5065 if (!init_done)
5066 {
5067 init_done = true;
5068
5069 libcall_htab = new libcall_table_type (31);
5070 add_libcall (libcall_htab,
5071 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5072 add_libcall (libcall_htab,
5073 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5074 add_libcall (libcall_htab,
5075 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5076 add_libcall (libcall_htab,
5077 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5078
5079 add_libcall (libcall_htab,
5080 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5081 add_libcall (libcall_htab,
5082 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5083 add_libcall (libcall_htab,
5084 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5085 add_libcall (libcall_htab,
5086 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5087
5088 add_libcall (libcall_htab,
5089 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5090 add_libcall (libcall_htab,
5091 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5092 add_libcall (libcall_htab,
5093 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5094 add_libcall (libcall_htab,
5095 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5096 add_libcall (libcall_htab,
5097 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5098 add_libcall (libcall_htab,
5099 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5100 add_libcall (libcall_htab,
5101 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5102 add_libcall (libcall_htab,
5103 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5104
5105 /* Values from double-precision helper functions are returned in core
5106 registers if the selected core only supports single-precision
5107 arithmetic, even if we are using the hard-float ABI. The same is
5108 true for single-precision helpers, but we will never be using the
5109 hard-float ABI on a CPU which doesn't support single-precision
5110 operations in hardware. */
5111 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5112 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5113 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5114 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5115 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5116 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5117 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5118 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5119 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5120 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5121 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5122 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5123 SFmode));
5124 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5125 DFmode));
5126 }
5127
5128 return libcall && libcall_htab->find (libcall) != NULL;
5129 }
5130
5131 static rtx
5132 arm_libcall_value_1 (machine_mode mode)
5133 {
5134 if (TARGET_AAPCS_BASED)
5135 return aapcs_libcall_value (mode);
5136 else if (TARGET_IWMMXT_ABI
5137 && arm_vector_mode_supported_p (mode))
5138 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5139 else
5140 return gen_rtx_REG (mode, ARG_REGISTER (1));
5141 }
5142
5143 /* Define how to find the value returned by a library function
5144 assuming the value has mode MODE. */
5145
5146 static rtx
5147 arm_libcall_value (machine_mode mode, const_rtx libcall)
5148 {
5149 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5150 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5151 {
5152 /* The following libcalls return their result in integer registers,
5153 even though they return a floating point value. */
5154 if (arm_libcall_uses_aapcs_base (libcall))
5155 return gen_rtx_REG (mode, ARG_REGISTER(1));
5156
5157 }
5158
5159 return arm_libcall_value_1 (mode);
5160 }
5161
5162 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5163
5164 static bool
5165 arm_function_value_regno_p (const unsigned int regno)
5166 {
5167 if (regno == ARG_REGISTER (1)
5168 || (TARGET_32BIT
5169 && TARGET_AAPCS_BASED
5170 && TARGET_VFP
5171 && TARGET_HARD_FLOAT
5172 && regno == FIRST_VFP_REGNUM)
5173 || (TARGET_IWMMXT_ABI
5174 && regno == FIRST_IWMMXT_REGNUM))
5175 return true;
5176
5177 return false;
5178 }
5179
5180 /* Determine the amount of memory needed to store the possible return
5181 registers of an untyped call. */
5182 int
5183 arm_apply_result_size (void)
5184 {
5185 int size = 16;
5186
5187 if (TARGET_32BIT)
5188 {
5189 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
5190 size += 32;
5191 if (TARGET_IWMMXT_ABI)
5192 size += 8;
5193 }
5194
5195 return size;
5196 }
5197
5198 /* Decide whether TYPE should be returned in memory (true)
5199 or in a register (false). FNTYPE is the type of the function making
5200 the call. */
5201 static bool
5202 arm_return_in_memory (const_tree type, const_tree fntype)
5203 {
5204 HOST_WIDE_INT size;
5205
5206 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5207
5208 if (TARGET_AAPCS_BASED)
5209 {
5210 /* Simple, non-aggregate types (ie not including vectors and
5211 complex) are always returned in a register (or registers).
5212 We don't care about which register here, so we can short-cut
5213 some of the detail. */
5214 if (!AGGREGATE_TYPE_P (type)
5215 && TREE_CODE (type) != VECTOR_TYPE
5216 && TREE_CODE (type) != COMPLEX_TYPE)
5217 return false;
5218
5219 /* Any return value that is no larger than one word can be
5220 returned in r0. */
5221 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5222 return false;
5223
5224 /* Check any available co-processors to see if they accept the
5225 type as a register candidate (VFP, for example, can return
5226 some aggregates in consecutive registers). These aren't
5227 available if the call is variadic. */
5228 if (aapcs_select_return_coproc (type, fntype) >= 0)
5229 return false;
5230
5231 /* Vector values should be returned using ARM registers, not
5232 memory (unless they're over 16 bytes, which will break since
5233 we only have four call-clobbered registers to play with). */
5234 if (TREE_CODE (type) == VECTOR_TYPE)
5235 return (size < 0 || size > (4 * UNITS_PER_WORD));
5236
5237 /* The rest go in memory. */
5238 return true;
5239 }
5240
5241 if (TREE_CODE (type) == VECTOR_TYPE)
5242 return (size < 0 || size > (4 * UNITS_PER_WORD));
5243
5244 if (!AGGREGATE_TYPE_P (type) &&
5245 (TREE_CODE (type) != VECTOR_TYPE))
5246 /* All simple types are returned in registers. */
5247 return false;
5248
5249 if (arm_abi != ARM_ABI_APCS)
5250 {
5251 /* ATPCS and later return aggregate types in memory only if they are
5252 larger than a word (or are variable size). */
5253 return (size < 0 || size > UNITS_PER_WORD);
5254 }
5255
5256 /* For the arm-wince targets we choose to be compatible with Microsoft's
5257 ARM and Thumb compilers, which always return aggregates in memory. */
5258 #ifndef ARM_WINCE
5259 /* All structures/unions bigger than one word are returned in memory.
5260 Also catch the case where int_size_in_bytes returns -1. In this case
5261 the aggregate is either huge or of variable size, and in either case
5262 we will want to return it via memory and not in a register. */
5263 if (size < 0 || size > UNITS_PER_WORD)
5264 return true;
5265
5266 if (TREE_CODE (type) == RECORD_TYPE)
5267 {
5268 tree field;
5269
5270 /* For a struct the APCS says that we only return in a register
5271 if the type is 'integer like' and every addressable element
5272 has an offset of zero. For practical purposes this means
5273 that the structure can have at most one non bit-field element
5274 and that this element must be the first one in the structure. */
5275
5276 /* Find the first field, ignoring non FIELD_DECL things which will
5277 have been created by C++. */
5278 for (field = TYPE_FIELDS (type);
5279 field && TREE_CODE (field) != FIELD_DECL;
5280 field = DECL_CHAIN (field))
5281 continue;
5282
5283 if (field == NULL)
5284 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5285
5286 /* Check that the first field is valid for returning in a register. */
5287
5288 /* ... Floats are not allowed */
5289 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5290 return true;
5291
5292 /* ... Aggregates that are not themselves valid for returning in
5293 a register are not allowed. */
5294 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5295 return true;
5296
5297 /* Now check the remaining fields, if any. Only bitfields are allowed,
5298 since they are not addressable. */
5299 for (field = DECL_CHAIN (field);
5300 field;
5301 field = DECL_CHAIN (field))
5302 {
5303 if (TREE_CODE (field) != FIELD_DECL)
5304 continue;
5305
5306 if (!DECL_BIT_FIELD_TYPE (field))
5307 return true;
5308 }
5309
5310 return false;
5311 }
5312
5313 if (TREE_CODE (type) == UNION_TYPE)
5314 {
5315 tree field;
5316
5317 /* Unions can be returned in registers if every element is
5318 integral, or can be returned in an integer register. */
5319 for (field = TYPE_FIELDS (type);
5320 field;
5321 field = DECL_CHAIN (field))
5322 {
5323 if (TREE_CODE (field) != FIELD_DECL)
5324 continue;
5325
5326 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5327 return true;
5328
5329 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5330 return true;
5331 }
5332
5333 return false;
5334 }
5335 #endif /* not ARM_WINCE */
5336
5337 /* Return all other types in memory. */
5338 return true;
5339 }
5340
5341 const struct pcs_attribute_arg
5342 {
5343 const char *arg;
5344 enum arm_pcs value;
5345 } pcs_attribute_args[] =
5346 {
5347 {"aapcs", ARM_PCS_AAPCS},
5348 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5349 #if 0
5350 /* We could recognize these, but changes would be needed elsewhere
5351 * to implement them. */
5352 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5353 {"atpcs", ARM_PCS_ATPCS},
5354 {"apcs", ARM_PCS_APCS},
5355 #endif
5356 {NULL, ARM_PCS_UNKNOWN}
5357 };
5358
5359 static enum arm_pcs
5360 arm_pcs_from_attribute (tree attr)
5361 {
5362 const struct pcs_attribute_arg *ptr;
5363 const char *arg;
5364
5365 /* Get the value of the argument. */
5366 if (TREE_VALUE (attr) == NULL_TREE
5367 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5368 return ARM_PCS_UNKNOWN;
5369
5370 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5371
5372 /* Check it against the list of known arguments. */
5373 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5374 if (streq (arg, ptr->arg))
5375 return ptr->value;
5376
5377 /* An unrecognized interrupt type. */
5378 return ARM_PCS_UNKNOWN;
5379 }
5380
5381 /* Get the PCS variant to use for this call. TYPE is the function's type
5382 specification, DECL is the specific declartion. DECL may be null if
5383 the call could be indirect or if this is a library call. */
5384 static enum arm_pcs
5385 arm_get_pcs_model (const_tree type, const_tree decl)
5386 {
5387 bool user_convention = false;
5388 enum arm_pcs user_pcs = arm_pcs_default;
5389 tree attr;
5390
5391 gcc_assert (type);
5392
5393 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5394 if (attr)
5395 {
5396 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5397 user_convention = true;
5398 }
5399
5400 if (TARGET_AAPCS_BASED)
5401 {
5402 /* Detect varargs functions. These always use the base rules
5403 (no argument is ever a candidate for a co-processor
5404 register). */
5405 bool base_rules = stdarg_p (type);
5406
5407 if (user_convention)
5408 {
5409 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5410 sorry ("non-AAPCS derived PCS variant");
5411 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5412 error ("variadic functions must use the base AAPCS variant");
5413 }
5414
5415 if (base_rules)
5416 return ARM_PCS_AAPCS;
5417 else if (user_convention)
5418 return user_pcs;
5419 else if (decl && flag_unit_at_a_time)
5420 {
5421 /* Local functions never leak outside this compilation unit,
5422 so we are free to use whatever conventions are
5423 appropriate. */
5424 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5425 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5426 if (i && i->local)
5427 return ARM_PCS_AAPCS_LOCAL;
5428 }
5429 }
5430 else if (user_convention && user_pcs != arm_pcs_default)
5431 sorry ("PCS variant");
5432
5433 /* For everything else we use the target's default. */
5434 return arm_pcs_default;
5435 }
5436
5437
5438 static void
5439 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5440 const_tree fntype ATTRIBUTE_UNUSED,
5441 rtx libcall ATTRIBUTE_UNUSED,
5442 const_tree fndecl ATTRIBUTE_UNUSED)
5443 {
5444 /* Record the unallocated VFP registers. */
5445 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5446 pcum->aapcs_vfp_reg_alloc = 0;
5447 }
5448
5449 /* Walk down the type tree of TYPE counting consecutive base elements.
5450 If *MODEP is VOIDmode, then set it to the first valid floating point
5451 type. If a non-floating point type is found, or if a floating point
5452 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5453 otherwise return the count in the sub-tree. */
5454 static int
5455 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5456 {
5457 machine_mode mode;
5458 HOST_WIDE_INT size;
5459
5460 switch (TREE_CODE (type))
5461 {
5462 case REAL_TYPE:
5463 mode = TYPE_MODE (type);
5464 if (mode != DFmode && mode != SFmode)
5465 return -1;
5466
5467 if (*modep == VOIDmode)
5468 *modep = mode;
5469
5470 if (*modep == mode)
5471 return 1;
5472
5473 break;
5474
5475 case COMPLEX_TYPE:
5476 mode = TYPE_MODE (TREE_TYPE (type));
5477 if (mode != DFmode && mode != SFmode)
5478 return -1;
5479
5480 if (*modep == VOIDmode)
5481 *modep = mode;
5482
5483 if (*modep == mode)
5484 return 2;
5485
5486 break;
5487
5488 case VECTOR_TYPE:
5489 /* Use V2SImode and V4SImode as representatives of all 64-bit
5490 and 128-bit vector types, whether or not those modes are
5491 supported with the present options. */
5492 size = int_size_in_bytes (type);
5493 switch (size)
5494 {
5495 case 8:
5496 mode = V2SImode;
5497 break;
5498 case 16:
5499 mode = V4SImode;
5500 break;
5501 default:
5502 return -1;
5503 }
5504
5505 if (*modep == VOIDmode)
5506 *modep = mode;
5507
5508 /* Vector modes are considered to be opaque: two vectors are
5509 equivalent for the purposes of being homogeneous aggregates
5510 if they are the same size. */
5511 if (*modep == mode)
5512 return 1;
5513
5514 break;
5515
5516 case ARRAY_TYPE:
5517 {
5518 int count;
5519 tree index = TYPE_DOMAIN (type);
5520
5521 /* Can't handle incomplete types nor sizes that are not
5522 fixed. */
5523 if (!COMPLETE_TYPE_P (type)
5524 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5525 return -1;
5526
5527 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5528 if (count == -1
5529 || !index
5530 || !TYPE_MAX_VALUE (index)
5531 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5532 || !TYPE_MIN_VALUE (index)
5533 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5534 || count < 0)
5535 return -1;
5536
5537 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5538 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5539
5540 /* There must be no padding. */
5541 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5542 return -1;
5543
5544 return count;
5545 }
5546
5547 case RECORD_TYPE:
5548 {
5549 int count = 0;
5550 int sub_count;
5551 tree field;
5552
5553 /* Can't handle incomplete types nor sizes that are not
5554 fixed. */
5555 if (!COMPLETE_TYPE_P (type)
5556 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5557 return -1;
5558
5559 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5560 {
5561 if (TREE_CODE (field) != FIELD_DECL)
5562 continue;
5563
5564 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5565 if (sub_count < 0)
5566 return -1;
5567 count += sub_count;
5568 }
5569
5570 /* There must be no padding. */
5571 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5572 return -1;
5573
5574 return count;
5575 }
5576
5577 case UNION_TYPE:
5578 case QUAL_UNION_TYPE:
5579 {
5580 /* These aren't very interesting except in a degenerate case. */
5581 int count = 0;
5582 int sub_count;
5583 tree field;
5584
5585 /* Can't handle incomplete types nor sizes that are not
5586 fixed. */
5587 if (!COMPLETE_TYPE_P (type)
5588 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5589 return -1;
5590
5591 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5592 {
5593 if (TREE_CODE (field) != FIELD_DECL)
5594 continue;
5595
5596 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5597 if (sub_count < 0)
5598 return -1;
5599 count = count > sub_count ? count : sub_count;
5600 }
5601
5602 /* There must be no padding. */
5603 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5604 return -1;
5605
5606 return count;
5607 }
5608
5609 default:
5610 break;
5611 }
5612
5613 return -1;
5614 }
5615
5616 /* Return true if PCS_VARIANT should use VFP registers. */
5617 static bool
5618 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5619 {
5620 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5621 {
5622 static bool seen_thumb1_vfp = false;
5623
5624 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5625 {
5626 sorry ("Thumb-1 hard-float VFP ABI");
5627 /* sorry() is not immediately fatal, so only display this once. */
5628 seen_thumb1_vfp = true;
5629 }
5630
5631 return true;
5632 }
5633
5634 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5635 return false;
5636
5637 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5638 (TARGET_VFP_DOUBLE || !is_double));
5639 }
5640
5641 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5642 suitable for passing or returning in VFP registers for the PCS
5643 variant selected. If it is, then *BASE_MODE is updated to contain
5644 a machine mode describing each element of the argument's type and
5645 *COUNT to hold the number of such elements. */
5646 static bool
5647 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5648 machine_mode mode, const_tree type,
5649 machine_mode *base_mode, int *count)
5650 {
5651 machine_mode new_mode = VOIDmode;
5652
5653 /* If we have the type information, prefer that to working things
5654 out from the mode. */
5655 if (type)
5656 {
5657 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5658
5659 if (ag_count > 0 && ag_count <= 4)
5660 *count = ag_count;
5661 else
5662 return false;
5663 }
5664 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5665 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5666 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5667 {
5668 *count = 1;
5669 new_mode = mode;
5670 }
5671 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5672 {
5673 *count = 2;
5674 new_mode = (mode == DCmode ? DFmode : SFmode);
5675 }
5676 else
5677 return false;
5678
5679
5680 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5681 return false;
5682
5683 *base_mode = new_mode;
5684 return true;
5685 }
5686
5687 static bool
5688 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5689 machine_mode mode, const_tree type)
5690 {
5691 int count ATTRIBUTE_UNUSED;
5692 machine_mode ag_mode ATTRIBUTE_UNUSED;
5693
5694 if (!use_vfp_abi (pcs_variant, false))
5695 return false;
5696 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5697 &ag_mode, &count);
5698 }
5699
5700 static bool
5701 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5702 const_tree type)
5703 {
5704 if (!use_vfp_abi (pcum->pcs_variant, false))
5705 return false;
5706
5707 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5708 &pcum->aapcs_vfp_rmode,
5709 &pcum->aapcs_vfp_rcount);
5710 }
5711
5712 static bool
5713 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5714 const_tree type ATTRIBUTE_UNUSED)
5715 {
5716 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5717 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5718 int regno;
5719
5720 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5721 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5722 {
5723 pcum->aapcs_vfp_reg_alloc = mask << regno;
5724 if (mode == BLKmode
5725 || (mode == TImode && ! TARGET_NEON)
5726 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5727 {
5728 int i;
5729 int rcount = pcum->aapcs_vfp_rcount;
5730 int rshift = shift;
5731 machine_mode rmode = pcum->aapcs_vfp_rmode;
5732 rtx par;
5733 if (!TARGET_NEON)
5734 {
5735 /* Avoid using unsupported vector modes. */
5736 if (rmode == V2SImode)
5737 rmode = DImode;
5738 else if (rmode == V4SImode)
5739 {
5740 rmode = DImode;
5741 rcount *= 2;
5742 rshift /= 2;
5743 }
5744 }
5745 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5746 for (i = 0; i < rcount; i++)
5747 {
5748 rtx tmp = gen_rtx_REG (rmode,
5749 FIRST_VFP_REGNUM + regno + i * rshift);
5750 tmp = gen_rtx_EXPR_LIST
5751 (VOIDmode, tmp,
5752 GEN_INT (i * GET_MODE_SIZE (rmode)));
5753 XVECEXP (par, 0, i) = tmp;
5754 }
5755
5756 pcum->aapcs_reg = par;
5757 }
5758 else
5759 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5760 return true;
5761 }
5762 return false;
5763 }
5764
5765 static rtx
5766 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5767 machine_mode mode,
5768 const_tree type ATTRIBUTE_UNUSED)
5769 {
5770 if (!use_vfp_abi (pcs_variant, false))
5771 return NULL;
5772
5773 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5774 {
5775 int count;
5776 machine_mode ag_mode;
5777 int i;
5778 rtx par;
5779 int shift;
5780
5781 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5782 &ag_mode, &count);
5783
5784 if (!TARGET_NEON)
5785 {
5786 if (ag_mode == V2SImode)
5787 ag_mode = DImode;
5788 else if (ag_mode == V4SImode)
5789 {
5790 ag_mode = DImode;
5791 count *= 2;
5792 }
5793 }
5794 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5795 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5796 for (i = 0; i < count; i++)
5797 {
5798 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5799 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5800 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5801 XVECEXP (par, 0, i) = tmp;
5802 }
5803
5804 return par;
5805 }
5806
5807 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5808 }
5809
5810 static void
5811 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5812 machine_mode mode ATTRIBUTE_UNUSED,
5813 const_tree type ATTRIBUTE_UNUSED)
5814 {
5815 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5816 pcum->aapcs_vfp_reg_alloc = 0;
5817 return;
5818 }
5819
5820 #define AAPCS_CP(X) \
5821 { \
5822 aapcs_ ## X ## _cum_init, \
5823 aapcs_ ## X ## _is_call_candidate, \
5824 aapcs_ ## X ## _allocate, \
5825 aapcs_ ## X ## _is_return_candidate, \
5826 aapcs_ ## X ## _allocate_return_reg, \
5827 aapcs_ ## X ## _advance \
5828 }
5829
5830 /* Table of co-processors that can be used to pass arguments in
5831 registers. Idealy no arugment should be a candidate for more than
5832 one co-processor table entry, but the table is processed in order
5833 and stops after the first match. If that entry then fails to put
5834 the argument into a co-processor register, the argument will go on
5835 the stack. */
5836 static struct
5837 {
5838 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5839 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5840
5841 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5842 BLKmode) is a candidate for this co-processor's registers; this
5843 function should ignore any position-dependent state in
5844 CUMULATIVE_ARGS and only use call-type dependent information. */
5845 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5846
5847 /* Return true if the argument does get a co-processor register; it
5848 should set aapcs_reg to an RTX of the register allocated as is
5849 required for a return from FUNCTION_ARG. */
5850 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5851
5852 /* Return true if a result of mode MODE (or type TYPE if MODE is
5853 BLKmode) is can be returned in this co-processor's registers. */
5854 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5855
5856 /* Allocate and return an RTX element to hold the return type of a
5857 call, this routine must not fail and will only be called if
5858 is_return_candidate returned true with the same parameters. */
5859 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5860
5861 /* Finish processing this argument and prepare to start processing
5862 the next one. */
5863 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5864 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5865 {
5866 AAPCS_CP(vfp)
5867 };
5868
5869 #undef AAPCS_CP
5870
5871 static int
5872 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5873 const_tree type)
5874 {
5875 int i;
5876
5877 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5878 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5879 return i;
5880
5881 return -1;
5882 }
5883
5884 static int
5885 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5886 {
5887 /* We aren't passed a decl, so we can't check that a call is local.
5888 However, it isn't clear that that would be a win anyway, since it
5889 might limit some tail-calling opportunities. */
5890 enum arm_pcs pcs_variant;
5891
5892 if (fntype)
5893 {
5894 const_tree fndecl = NULL_TREE;
5895
5896 if (TREE_CODE (fntype) == FUNCTION_DECL)
5897 {
5898 fndecl = fntype;
5899 fntype = TREE_TYPE (fntype);
5900 }
5901
5902 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5903 }
5904 else
5905 pcs_variant = arm_pcs_default;
5906
5907 if (pcs_variant != ARM_PCS_AAPCS)
5908 {
5909 int i;
5910
5911 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5912 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5913 TYPE_MODE (type),
5914 type))
5915 return i;
5916 }
5917 return -1;
5918 }
5919
5920 static rtx
5921 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5922 const_tree fntype)
5923 {
5924 /* We aren't passed a decl, so we can't check that a call is local.
5925 However, it isn't clear that that would be a win anyway, since it
5926 might limit some tail-calling opportunities. */
5927 enum arm_pcs pcs_variant;
5928 int unsignedp ATTRIBUTE_UNUSED;
5929
5930 if (fntype)
5931 {
5932 const_tree fndecl = NULL_TREE;
5933
5934 if (TREE_CODE (fntype) == FUNCTION_DECL)
5935 {
5936 fndecl = fntype;
5937 fntype = TREE_TYPE (fntype);
5938 }
5939
5940 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5941 }
5942 else
5943 pcs_variant = arm_pcs_default;
5944
5945 /* Promote integer types. */
5946 if (type && INTEGRAL_TYPE_P (type))
5947 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5948
5949 if (pcs_variant != ARM_PCS_AAPCS)
5950 {
5951 int i;
5952
5953 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5954 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5955 type))
5956 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5957 mode, type);
5958 }
5959
5960 /* Promotes small structs returned in a register to full-word size
5961 for big-endian AAPCS. */
5962 if (type && arm_return_in_msb (type))
5963 {
5964 HOST_WIDE_INT size = int_size_in_bytes (type);
5965 if (size % UNITS_PER_WORD != 0)
5966 {
5967 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5968 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5969 }
5970 }
5971
5972 return gen_rtx_REG (mode, R0_REGNUM);
5973 }
5974
5975 static rtx
5976 aapcs_libcall_value (machine_mode mode)
5977 {
5978 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5979 && GET_MODE_SIZE (mode) <= 4)
5980 mode = SImode;
5981
5982 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5983 }
5984
5985 /* Lay out a function argument using the AAPCS rules. The rule
5986 numbers referred to here are those in the AAPCS. */
5987 static void
5988 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5989 const_tree type, bool named)
5990 {
5991 int nregs, nregs2;
5992 int ncrn;
5993
5994 /* We only need to do this once per argument. */
5995 if (pcum->aapcs_arg_processed)
5996 return;
5997
5998 pcum->aapcs_arg_processed = true;
5999
6000 /* Special case: if named is false then we are handling an incoming
6001 anonymous argument which is on the stack. */
6002 if (!named)
6003 return;
6004
6005 /* Is this a potential co-processor register candidate? */
6006 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6007 {
6008 int slot = aapcs_select_call_coproc (pcum, mode, type);
6009 pcum->aapcs_cprc_slot = slot;
6010
6011 /* We don't have to apply any of the rules from part B of the
6012 preparation phase, these are handled elsewhere in the
6013 compiler. */
6014
6015 if (slot >= 0)
6016 {
6017 /* A Co-processor register candidate goes either in its own
6018 class of registers or on the stack. */
6019 if (!pcum->aapcs_cprc_failed[slot])
6020 {
6021 /* C1.cp - Try to allocate the argument to co-processor
6022 registers. */
6023 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6024 return;
6025
6026 /* C2.cp - Put the argument on the stack and note that we
6027 can't assign any more candidates in this slot. We also
6028 need to note that we have allocated stack space, so that
6029 we won't later try to split a non-cprc candidate between
6030 core registers and the stack. */
6031 pcum->aapcs_cprc_failed[slot] = true;
6032 pcum->can_split = false;
6033 }
6034
6035 /* We didn't get a register, so this argument goes on the
6036 stack. */
6037 gcc_assert (pcum->can_split == false);
6038 return;
6039 }
6040 }
6041
6042 /* C3 - For double-word aligned arguments, round the NCRN up to the
6043 next even number. */
6044 ncrn = pcum->aapcs_ncrn;
6045 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
6046 ncrn++;
6047
6048 nregs = ARM_NUM_REGS2(mode, type);
6049
6050 /* Sigh, this test should really assert that nregs > 0, but a GCC
6051 extension allows empty structs and then gives them empty size; it
6052 then allows such a structure to be passed by value. For some of
6053 the code below we have to pretend that such an argument has
6054 non-zero size so that we 'locate' it correctly either in
6055 registers or on the stack. */
6056 gcc_assert (nregs >= 0);
6057
6058 nregs2 = nregs ? nregs : 1;
6059
6060 /* C4 - Argument fits entirely in core registers. */
6061 if (ncrn + nregs2 <= NUM_ARG_REGS)
6062 {
6063 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6064 pcum->aapcs_next_ncrn = ncrn + nregs;
6065 return;
6066 }
6067
6068 /* C5 - Some core registers left and there are no arguments already
6069 on the stack: split this argument between the remaining core
6070 registers and the stack. */
6071 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6072 {
6073 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6074 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6075 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6076 return;
6077 }
6078
6079 /* C6 - NCRN is set to 4. */
6080 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6081
6082 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6083 return;
6084 }
6085
6086 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6087 for a call to a function whose data type is FNTYPE.
6088 For a library call, FNTYPE is NULL. */
6089 void
6090 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6091 rtx libname,
6092 tree fndecl ATTRIBUTE_UNUSED)
6093 {
6094 /* Long call handling. */
6095 if (fntype)
6096 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6097 else
6098 pcum->pcs_variant = arm_pcs_default;
6099
6100 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6101 {
6102 if (arm_libcall_uses_aapcs_base (libname))
6103 pcum->pcs_variant = ARM_PCS_AAPCS;
6104
6105 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6106 pcum->aapcs_reg = NULL_RTX;
6107 pcum->aapcs_partial = 0;
6108 pcum->aapcs_arg_processed = false;
6109 pcum->aapcs_cprc_slot = -1;
6110 pcum->can_split = true;
6111
6112 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6113 {
6114 int i;
6115
6116 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6117 {
6118 pcum->aapcs_cprc_failed[i] = false;
6119 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6120 }
6121 }
6122 return;
6123 }
6124
6125 /* Legacy ABIs */
6126
6127 /* On the ARM, the offset starts at 0. */
6128 pcum->nregs = 0;
6129 pcum->iwmmxt_nregs = 0;
6130 pcum->can_split = true;
6131
6132 /* Varargs vectors are treated the same as long long.
6133 named_count avoids having to change the way arm handles 'named' */
6134 pcum->named_count = 0;
6135 pcum->nargs = 0;
6136
6137 if (TARGET_REALLY_IWMMXT && fntype)
6138 {
6139 tree fn_arg;
6140
6141 for (fn_arg = TYPE_ARG_TYPES (fntype);
6142 fn_arg;
6143 fn_arg = TREE_CHAIN (fn_arg))
6144 pcum->named_count += 1;
6145
6146 if (! pcum->named_count)
6147 pcum->named_count = INT_MAX;
6148 }
6149 }
6150
6151 /* Return true if mode/type need doubleword alignment. */
6152 static bool
6153 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6154 {
6155 if (!type)
6156 return PARM_BOUNDARY < GET_MODE_ALIGNMENT (mode);
6157
6158 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6159 if (!AGGREGATE_TYPE_P (type))
6160 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6161
6162 /* Array types: Use member alignment of element type. */
6163 if (TREE_CODE (type) == ARRAY_TYPE)
6164 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6165
6166 /* Record/aggregate types: Use greatest member alignment of any member. */
6167 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6168 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6169 return true;
6170
6171 return false;
6172 }
6173
6174
6175 /* Determine where to put an argument to a function.
6176 Value is zero to push the argument on the stack,
6177 or a hard register in which to store the argument.
6178
6179 MODE is the argument's machine mode.
6180 TYPE is the data type of the argument (as a tree).
6181 This is null for libcalls where that information may
6182 not be available.
6183 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6184 the preceding args and about the function being called.
6185 NAMED is nonzero if this argument is a named parameter
6186 (otherwise it is an extra parameter matching an ellipsis).
6187
6188 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6189 other arguments are passed on the stack. If (NAMED == 0) (which happens
6190 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6191 defined), say it is passed in the stack (function_prologue will
6192 indeed make it pass in the stack if necessary). */
6193
6194 static rtx
6195 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6196 const_tree type, bool named)
6197 {
6198 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6199 int nregs;
6200
6201 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6202 a call insn (op3 of a call_value insn). */
6203 if (mode == VOIDmode)
6204 return const0_rtx;
6205
6206 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6207 {
6208 aapcs_layout_arg (pcum, mode, type, named);
6209 return pcum->aapcs_reg;
6210 }
6211
6212 /* Varargs vectors are treated the same as long long.
6213 named_count avoids having to change the way arm handles 'named' */
6214 if (TARGET_IWMMXT_ABI
6215 && arm_vector_mode_supported_p (mode)
6216 && pcum->named_count > pcum->nargs + 1)
6217 {
6218 if (pcum->iwmmxt_nregs <= 9)
6219 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6220 else
6221 {
6222 pcum->can_split = false;
6223 return NULL_RTX;
6224 }
6225 }
6226
6227 /* Put doubleword aligned quantities in even register pairs. */
6228 if (pcum->nregs & 1
6229 && ARM_DOUBLEWORD_ALIGN
6230 && arm_needs_doubleword_align (mode, type))
6231 pcum->nregs++;
6232
6233 /* Only allow splitting an arg between regs and memory if all preceding
6234 args were allocated to regs. For args passed by reference we only count
6235 the reference pointer. */
6236 if (pcum->can_split)
6237 nregs = 1;
6238 else
6239 nregs = ARM_NUM_REGS2 (mode, type);
6240
6241 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6242 return NULL_RTX;
6243
6244 return gen_rtx_REG (mode, pcum->nregs);
6245 }
6246
6247 static unsigned int
6248 arm_function_arg_boundary (machine_mode mode, const_tree type)
6249 {
6250 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6251 ? DOUBLEWORD_ALIGNMENT
6252 : PARM_BOUNDARY);
6253 }
6254
6255 static int
6256 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6257 tree type, bool named)
6258 {
6259 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6260 int nregs = pcum->nregs;
6261
6262 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6263 {
6264 aapcs_layout_arg (pcum, mode, type, named);
6265 return pcum->aapcs_partial;
6266 }
6267
6268 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6269 return 0;
6270
6271 if (NUM_ARG_REGS > nregs
6272 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6273 && pcum->can_split)
6274 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6275
6276 return 0;
6277 }
6278
6279 /* Update the data in PCUM to advance over an argument
6280 of mode MODE and data type TYPE.
6281 (TYPE is null for libcalls where that information may not be available.) */
6282
6283 static void
6284 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6285 const_tree type, bool named)
6286 {
6287 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6288
6289 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6290 {
6291 aapcs_layout_arg (pcum, mode, type, named);
6292
6293 if (pcum->aapcs_cprc_slot >= 0)
6294 {
6295 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6296 type);
6297 pcum->aapcs_cprc_slot = -1;
6298 }
6299
6300 /* Generic stuff. */
6301 pcum->aapcs_arg_processed = false;
6302 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6303 pcum->aapcs_reg = NULL_RTX;
6304 pcum->aapcs_partial = 0;
6305 }
6306 else
6307 {
6308 pcum->nargs += 1;
6309 if (arm_vector_mode_supported_p (mode)
6310 && pcum->named_count > pcum->nargs
6311 && TARGET_IWMMXT_ABI)
6312 pcum->iwmmxt_nregs += 1;
6313 else
6314 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6315 }
6316 }
6317
6318 /* Variable sized types are passed by reference. This is a GCC
6319 extension to the ARM ABI. */
6320
6321 static bool
6322 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6323 machine_mode mode ATTRIBUTE_UNUSED,
6324 const_tree type, bool named ATTRIBUTE_UNUSED)
6325 {
6326 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6327 }
6328 \f
6329 /* Encode the current state of the #pragma [no_]long_calls. */
6330 typedef enum
6331 {
6332 OFF, /* No #pragma [no_]long_calls is in effect. */
6333 LONG, /* #pragma long_calls is in effect. */
6334 SHORT /* #pragma no_long_calls is in effect. */
6335 } arm_pragma_enum;
6336
6337 static arm_pragma_enum arm_pragma_long_calls = OFF;
6338
6339 void
6340 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6341 {
6342 arm_pragma_long_calls = LONG;
6343 }
6344
6345 void
6346 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6347 {
6348 arm_pragma_long_calls = SHORT;
6349 }
6350
6351 void
6352 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6353 {
6354 arm_pragma_long_calls = OFF;
6355 }
6356 \f
6357 /* Handle an attribute requiring a FUNCTION_DECL;
6358 arguments as in struct attribute_spec.handler. */
6359 static tree
6360 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6361 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6362 {
6363 if (TREE_CODE (*node) != FUNCTION_DECL)
6364 {
6365 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6366 name);
6367 *no_add_attrs = true;
6368 }
6369
6370 return NULL_TREE;
6371 }
6372
6373 /* Handle an "interrupt" or "isr" attribute;
6374 arguments as in struct attribute_spec.handler. */
6375 static tree
6376 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6377 bool *no_add_attrs)
6378 {
6379 if (DECL_P (*node))
6380 {
6381 if (TREE_CODE (*node) != FUNCTION_DECL)
6382 {
6383 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6384 name);
6385 *no_add_attrs = true;
6386 }
6387 /* FIXME: the argument if any is checked for type attributes;
6388 should it be checked for decl ones? */
6389 }
6390 else
6391 {
6392 if (TREE_CODE (*node) == FUNCTION_TYPE
6393 || TREE_CODE (*node) == METHOD_TYPE)
6394 {
6395 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6396 {
6397 warning (OPT_Wattributes, "%qE attribute ignored",
6398 name);
6399 *no_add_attrs = true;
6400 }
6401 }
6402 else if (TREE_CODE (*node) == POINTER_TYPE
6403 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6404 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6405 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6406 {
6407 *node = build_variant_type_copy (*node);
6408 TREE_TYPE (*node) = build_type_attribute_variant
6409 (TREE_TYPE (*node),
6410 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6411 *no_add_attrs = true;
6412 }
6413 else
6414 {
6415 /* Possibly pass this attribute on from the type to a decl. */
6416 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6417 | (int) ATTR_FLAG_FUNCTION_NEXT
6418 | (int) ATTR_FLAG_ARRAY_NEXT))
6419 {
6420 *no_add_attrs = true;
6421 return tree_cons (name, args, NULL_TREE);
6422 }
6423 else
6424 {
6425 warning (OPT_Wattributes, "%qE attribute ignored",
6426 name);
6427 }
6428 }
6429 }
6430
6431 return NULL_TREE;
6432 }
6433
6434 /* Handle a "pcs" attribute; arguments as in struct
6435 attribute_spec.handler. */
6436 static tree
6437 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6438 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6439 {
6440 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6441 {
6442 warning (OPT_Wattributes, "%qE attribute ignored", name);
6443 *no_add_attrs = true;
6444 }
6445 return NULL_TREE;
6446 }
6447
6448 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6449 /* Handle the "notshared" attribute. This attribute is another way of
6450 requesting hidden visibility. ARM's compiler supports
6451 "__declspec(notshared)"; we support the same thing via an
6452 attribute. */
6453
6454 static tree
6455 arm_handle_notshared_attribute (tree *node,
6456 tree name ATTRIBUTE_UNUSED,
6457 tree args ATTRIBUTE_UNUSED,
6458 int flags ATTRIBUTE_UNUSED,
6459 bool *no_add_attrs)
6460 {
6461 tree decl = TYPE_NAME (*node);
6462
6463 if (decl)
6464 {
6465 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6466 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6467 *no_add_attrs = false;
6468 }
6469 return NULL_TREE;
6470 }
6471 #endif
6472
6473 /* Return 0 if the attributes for two types are incompatible, 1 if they
6474 are compatible, and 2 if they are nearly compatible (which causes a
6475 warning to be generated). */
6476 static int
6477 arm_comp_type_attributes (const_tree type1, const_tree type2)
6478 {
6479 int l1, l2, s1, s2;
6480
6481 /* Check for mismatch of non-default calling convention. */
6482 if (TREE_CODE (type1) != FUNCTION_TYPE)
6483 return 1;
6484
6485 /* Check for mismatched call attributes. */
6486 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6487 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6488 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6489 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6490
6491 /* Only bother to check if an attribute is defined. */
6492 if (l1 | l2 | s1 | s2)
6493 {
6494 /* If one type has an attribute, the other must have the same attribute. */
6495 if ((l1 != l2) || (s1 != s2))
6496 return 0;
6497
6498 /* Disallow mixed attributes. */
6499 if ((l1 & s2) || (l2 & s1))
6500 return 0;
6501 }
6502
6503 /* Check for mismatched ISR attribute. */
6504 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6505 if (! l1)
6506 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6507 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6508 if (! l2)
6509 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6510 if (l1 != l2)
6511 return 0;
6512
6513 return 1;
6514 }
6515
6516 /* Assigns default attributes to newly defined type. This is used to
6517 set short_call/long_call attributes for function types of
6518 functions defined inside corresponding #pragma scopes. */
6519 static void
6520 arm_set_default_type_attributes (tree type)
6521 {
6522 /* Add __attribute__ ((long_call)) to all functions, when
6523 inside #pragma long_calls or __attribute__ ((short_call)),
6524 when inside #pragma no_long_calls. */
6525 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6526 {
6527 tree type_attr_list, attr_name;
6528 type_attr_list = TYPE_ATTRIBUTES (type);
6529
6530 if (arm_pragma_long_calls == LONG)
6531 attr_name = get_identifier ("long_call");
6532 else if (arm_pragma_long_calls == SHORT)
6533 attr_name = get_identifier ("short_call");
6534 else
6535 return;
6536
6537 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6538 TYPE_ATTRIBUTES (type) = type_attr_list;
6539 }
6540 }
6541 \f
6542 /* Return true if DECL is known to be linked into section SECTION. */
6543
6544 static bool
6545 arm_function_in_section_p (tree decl, section *section)
6546 {
6547 /* We can only be certain about the prevailing symbol definition. */
6548 if (!decl_binds_to_current_def_p (decl))
6549 return false;
6550
6551 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6552 if (!DECL_SECTION_NAME (decl))
6553 {
6554 /* Make sure that we will not create a unique section for DECL. */
6555 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6556 return false;
6557 }
6558
6559 return function_section (decl) == section;
6560 }
6561
6562 /* Return nonzero if a 32-bit "long_call" should be generated for
6563 a call from the current function to DECL. We generate a long_call
6564 if the function:
6565
6566 a. has an __attribute__((long call))
6567 or b. is within the scope of a #pragma long_calls
6568 or c. the -mlong-calls command line switch has been specified
6569
6570 However we do not generate a long call if the function:
6571
6572 d. has an __attribute__ ((short_call))
6573 or e. is inside the scope of a #pragma no_long_calls
6574 or f. is defined in the same section as the current function. */
6575
6576 bool
6577 arm_is_long_call_p (tree decl)
6578 {
6579 tree attrs;
6580
6581 if (!decl)
6582 return TARGET_LONG_CALLS;
6583
6584 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6585 if (lookup_attribute ("short_call", attrs))
6586 return false;
6587
6588 /* For "f", be conservative, and only cater for cases in which the
6589 whole of the current function is placed in the same section. */
6590 if (!flag_reorder_blocks_and_partition
6591 && TREE_CODE (decl) == FUNCTION_DECL
6592 && arm_function_in_section_p (decl, current_function_section ()))
6593 return false;
6594
6595 if (lookup_attribute ("long_call", attrs))
6596 return true;
6597
6598 return TARGET_LONG_CALLS;
6599 }
6600
6601 /* Return nonzero if it is ok to make a tail-call to DECL. */
6602 static bool
6603 arm_function_ok_for_sibcall (tree decl, tree exp)
6604 {
6605 unsigned long func_type;
6606
6607 if (cfun->machine->sibcall_blocked)
6608 return false;
6609
6610 /* Never tailcall something if we are generating code for Thumb-1. */
6611 if (TARGET_THUMB1)
6612 return false;
6613
6614 /* The PIC register is live on entry to VxWorks PLT entries, so we
6615 must make the call before restoring the PIC register. */
6616 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6617 return false;
6618
6619 /* If we are interworking and the function is not declared static
6620 then we can't tail-call it unless we know that it exists in this
6621 compilation unit (since it might be a Thumb routine). */
6622 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6623 && !TREE_ASM_WRITTEN (decl))
6624 return false;
6625
6626 func_type = arm_current_func_type ();
6627 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6628 if (IS_INTERRUPT (func_type))
6629 return false;
6630
6631 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6632 {
6633 /* Check that the return value locations are the same. For
6634 example that we aren't returning a value from the sibling in
6635 a VFP register but then need to transfer it to a core
6636 register. */
6637 rtx a, b;
6638
6639 a = arm_function_value (TREE_TYPE (exp), decl, false);
6640 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6641 cfun->decl, false);
6642 if (!rtx_equal_p (a, b))
6643 return false;
6644 }
6645
6646 /* Never tailcall if function may be called with a misaligned SP. */
6647 if (IS_STACKALIGN (func_type))
6648 return false;
6649
6650 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6651 references should become a NOP. Don't convert such calls into
6652 sibling calls. */
6653 if (TARGET_AAPCS_BASED
6654 && arm_abi == ARM_ABI_AAPCS
6655 && decl
6656 && DECL_WEAK (decl))
6657 return false;
6658
6659 /* Everything else is ok. */
6660 return true;
6661 }
6662
6663 \f
6664 /* Addressing mode support functions. */
6665
6666 /* Return nonzero if X is a legitimate immediate operand when compiling
6667 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6668 int
6669 legitimate_pic_operand_p (rtx x)
6670 {
6671 if (GET_CODE (x) == SYMBOL_REF
6672 || (GET_CODE (x) == CONST
6673 && GET_CODE (XEXP (x, 0)) == PLUS
6674 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6675 return 0;
6676
6677 return 1;
6678 }
6679
6680 /* Record that the current function needs a PIC register. Initialize
6681 cfun->machine->pic_reg if we have not already done so. */
6682
6683 static void
6684 require_pic_register (void)
6685 {
6686 /* A lot of the logic here is made obscure by the fact that this
6687 routine gets called as part of the rtx cost estimation process.
6688 We don't want those calls to affect any assumptions about the real
6689 function; and further, we can't call entry_of_function() until we
6690 start the real expansion process. */
6691 if (!crtl->uses_pic_offset_table)
6692 {
6693 gcc_assert (can_create_pseudo_p ());
6694 if (arm_pic_register != INVALID_REGNUM
6695 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6696 {
6697 if (!cfun->machine->pic_reg)
6698 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6699
6700 /* Play games to avoid marking the function as needing pic
6701 if we are being called as part of the cost-estimation
6702 process. */
6703 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6704 crtl->uses_pic_offset_table = 1;
6705 }
6706 else
6707 {
6708 rtx_insn *seq, *insn;
6709
6710 if (!cfun->machine->pic_reg)
6711 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6712
6713 /* Play games to avoid marking the function as needing pic
6714 if we are being called as part of the cost-estimation
6715 process. */
6716 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6717 {
6718 crtl->uses_pic_offset_table = 1;
6719 start_sequence ();
6720
6721 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6722 && arm_pic_register > LAST_LO_REGNUM)
6723 emit_move_insn (cfun->machine->pic_reg,
6724 gen_rtx_REG (Pmode, arm_pic_register));
6725 else
6726 arm_load_pic_register (0UL);
6727
6728 seq = get_insns ();
6729 end_sequence ();
6730
6731 for (insn = seq; insn; insn = NEXT_INSN (insn))
6732 if (INSN_P (insn))
6733 INSN_LOCATION (insn) = prologue_location;
6734
6735 /* We can be called during expansion of PHI nodes, where
6736 we can't yet emit instructions directly in the final
6737 insn stream. Queue the insns on the entry edge, they will
6738 be committed after everything else is expanded. */
6739 insert_insn_on_edge (seq,
6740 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6741 }
6742 }
6743 }
6744 }
6745
6746 rtx
6747 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6748 {
6749 if (GET_CODE (orig) == SYMBOL_REF
6750 || GET_CODE (orig) == LABEL_REF)
6751 {
6752 rtx insn;
6753
6754 if (reg == 0)
6755 {
6756 gcc_assert (can_create_pseudo_p ());
6757 reg = gen_reg_rtx (Pmode);
6758 }
6759
6760 /* VxWorks does not impose a fixed gap between segments; the run-time
6761 gap can be different from the object-file gap. We therefore can't
6762 use GOTOFF unless we are absolutely sure that the symbol is in the
6763 same segment as the GOT. Unfortunately, the flexibility of linker
6764 scripts means that we can't be sure of that in general, so assume
6765 that GOTOFF is never valid on VxWorks. */
6766 if ((GET_CODE (orig) == LABEL_REF
6767 || (GET_CODE (orig) == SYMBOL_REF &&
6768 SYMBOL_REF_LOCAL_P (orig)))
6769 && NEED_GOT_RELOC
6770 && arm_pic_data_is_text_relative)
6771 insn = arm_pic_static_addr (orig, reg);
6772 else
6773 {
6774 rtx pat;
6775 rtx mem;
6776
6777 /* If this function doesn't have a pic register, create one now. */
6778 require_pic_register ();
6779
6780 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6781
6782 /* Make the MEM as close to a constant as possible. */
6783 mem = SET_SRC (pat);
6784 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6785 MEM_READONLY_P (mem) = 1;
6786 MEM_NOTRAP_P (mem) = 1;
6787
6788 insn = emit_insn (pat);
6789 }
6790
6791 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6792 by loop. */
6793 set_unique_reg_note (insn, REG_EQUAL, orig);
6794
6795 return reg;
6796 }
6797 else if (GET_CODE (orig) == CONST)
6798 {
6799 rtx base, offset;
6800
6801 if (GET_CODE (XEXP (orig, 0)) == PLUS
6802 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6803 return orig;
6804
6805 /* Handle the case where we have: const (UNSPEC_TLS). */
6806 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6807 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6808 return orig;
6809
6810 /* Handle the case where we have:
6811 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6812 CONST_INT. */
6813 if (GET_CODE (XEXP (orig, 0)) == PLUS
6814 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6815 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6816 {
6817 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6818 return orig;
6819 }
6820
6821 if (reg == 0)
6822 {
6823 gcc_assert (can_create_pseudo_p ());
6824 reg = gen_reg_rtx (Pmode);
6825 }
6826
6827 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6828
6829 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6830 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6831 base == reg ? 0 : reg);
6832
6833 if (CONST_INT_P (offset))
6834 {
6835 /* The base register doesn't really matter, we only want to
6836 test the index for the appropriate mode. */
6837 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6838 {
6839 gcc_assert (can_create_pseudo_p ());
6840 offset = force_reg (Pmode, offset);
6841 }
6842
6843 if (CONST_INT_P (offset))
6844 return plus_constant (Pmode, base, INTVAL (offset));
6845 }
6846
6847 if (GET_MODE_SIZE (mode) > 4
6848 && (GET_MODE_CLASS (mode) == MODE_INT
6849 || TARGET_SOFT_FLOAT))
6850 {
6851 emit_insn (gen_addsi3 (reg, base, offset));
6852 return reg;
6853 }
6854
6855 return gen_rtx_PLUS (Pmode, base, offset);
6856 }
6857
6858 return orig;
6859 }
6860
6861
6862 /* Find a spare register to use during the prolog of a function. */
6863
6864 static int
6865 thumb_find_work_register (unsigned long pushed_regs_mask)
6866 {
6867 int reg;
6868
6869 /* Check the argument registers first as these are call-used. The
6870 register allocation order means that sometimes r3 might be used
6871 but earlier argument registers might not, so check them all. */
6872 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6873 if (!df_regs_ever_live_p (reg))
6874 return reg;
6875
6876 /* Before going on to check the call-saved registers we can try a couple
6877 more ways of deducing that r3 is available. The first is when we are
6878 pushing anonymous arguments onto the stack and we have less than 4
6879 registers worth of fixed arguments(*). In this case r3 will be part of
6880 the variable argument list and so we can be sure that it will be
6881 pushed right at the start of the function. Hence it will be available
6882 for the rest of the prologue.
6883 (*): ie crtl->args.pretend_args_size is greater than 0. */
6884 if (cfun->machine->uses_anonymous_args
6885 && crtl->args.pretend_args_size > 0)
6886 return LAST_ARG_REGNUM;
6887
6888 /* The other case is when we have fixed arguments but less than 4 registers
6889 worth. In this case r3 might be used in the body of the function, but
6890 it is not being used to convey an argument into the function. In theory
6891 we could just check crtl->args.size to see how many bytes are
6892 being passed in argument registers, but it seems that it is unreliable.
6893 Sometimes it will have the value 0 when in fact arguments are being
6894 passed. (See testcase execute/20021111-1.c for an example). So we also
6895 check the args_info.nregs field as well. The problem with this field is
6896 that it makes no allowances for arguments that are passed to the
6897 function but which are not used. Hence we could miss an opportunity
6898 when a function has an unused argument in r3. But it is better to be
6899 safe than to be sorry. */
6900 if (! cfun->machine->uses_anonymous_args
6901 && crtl->args.size >= 0
6902 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6903 && (TARGET_AAPCS_BASED
6904 ? crtl->args.info.aapcs_ncrn < 4
6905 : crtl->args.info.nregs < 4))
6906 return LAST_ARG_REGNUM;
6907
6908 /* Otherwise look for a call-saved register that is going to be pushed. */
6909 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6910 if (pushed_regs_mask & (1 << reg))
6911 return reg;
6912
6913 if (TARGET_THUMB2)
6914 {
6915 /* Thumb-2 can use high regs. */
6916 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6917 if (pushed_regs_mask & (1 << reg))
6918 return reg;
6919 }
6920 /* Something went wrong - thumb_compute_save_reg_mask()
6921 should have arranged for a suitable register to be pushed. */
6922 gcc_unreachable ();
6923 }
6924
6925 static GTY(()) int pic_labelno;
6926
6927 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6928 low register. */
6929
6930 void
6931 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6932 {
6933 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6934
6935 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6936 return;
6937
6938 gcc_assert (flag_pic);
6939
6940 pic_reg = cfun->machine->pic_reg;
6941 if (TARGET_VXWORKS_RTP)
6942 {
6943 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6944 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6945 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6946
6947 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6948
6949 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6950 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6951 }
6952 else
6953 {
6954 /* We use an UNSPEC rather than a LABEL_REF because this label
6955 never appears in the code stream. */
6956
6957 labelno = GEN_INT (pic_labelno++);
6958 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6959 l1 = gen_rtx_CONST (VOIDmode, l1);
6960
6961 /* On the ARM the PC register contains 'dot + 8' at the time of the
6962 addition, on the Thumb it is 'dot + 4'. */
6963 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6964 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6965 UNSPEC_GOTSYM_OFF);
6966 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6967
6968 if (TARGET_32BIT)
6969 {
6970 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6971 }
6972 else /* TARGET_THUMB1 */
6973 {
6974 if (arm_pic_register != INVALID_REGNUM
6975 && REGNO (pic_reg) > LAST_LO_REGNUM)
6976 {
6977 /* We will have pushed the pic register, so we should always be
6978 able to find a work register. */
6979 pic_tmp = gen_rtx_REG (SImode,
6980 thumb_find_work_register (saved_regs));
6981 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6982 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6983 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6984 }
6985 else if (arm_pic_register != INVALID_REGNUM
6986 && arm_pic_register > LAST_LO_REGNUM
6987 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6988 {
6989 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6990 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6991 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6992 }
6993 else
6994 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6995 }
6996 }
6997
6998 /* Need to emit this whether or not we obey regdecls,
6999 since setjmp/longjmp can cause life info to screw up. */
7000 emit_use (pic_reg);
7001 }
7002
7003 /* Generate code to load the address of a static var when flag_pic is set. */
7004 static rtx
7005 arm_pic_static_addr (rtx orig, rtx reg)
7006 {
7007 rtx l1, labelno, offset_rtx, insn;
7008
7009 gcc_assert (flag_pic);
7010
7011 /* We use an UNSPEC rather than a LABEL_REF because this label
7012 never appears in the code stream. */
7013 labelno = GEN_INT (pic_labelno++);
7014 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7015 l1 = gen_rtx_CONST (VOIDmode, l1);
7016
7017 /* On the ARM the PC register contains 'dot + 8' at the time of the
7018 addition, on the Thumb it is 'dot + 4'. */
7019 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7020 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7021 UNSPEC_SYMBOL_OFFSET);
7022 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7023
7024 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7025 return insn;
7026 }
7027
7028 /* Return nonzero if X is valid as an ARM state addressing register. */
7029 static int
7030 arm_address_register_rtx_p (rtx x, int strict_p)
7031 {
7032 int regno;
7033
7034 if (!REG_P (x))
7035 return 0;
7036
7037 regno = REGNO (x);
7038
7039 if (strict_p)
7040 return ARM_REGNO_OK_FOR_BASE_P (regno);
7041
7042 return (regno <= LAST_ARM_REGNUM
7043 || regno >= FIRST_PSEUDO_REGISTER
7044 || regno == FRAME_POINTER_REGNUM
7045 || regno == ARG_POINTER_REGNUM);
7046 }
7047
7048 /* Return TRUE if this rtx is the difference of a symbol and a label,
7049 and will reduce to a PC-relative relocation in the object file.
7050 Expressions like this can be left alone when generating PIC, rather
7051 than forced through the GOT. */
7052 static int
7053 pcrel_constant_p (rtx x)
7054 {
7055 if (GET_CODE (x) == MINUS)
7056 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7057
7058 return FALSE;
7059 }
7060
7061 /* Return true if X will surely end up in an index register after next
7062 splitting pass. */
7063 static bool
7064 will_be_in_index_register (const_rtx x)
7065 {
7066 /* arm.md: calculate_pic_address will split this into a register. */
7067 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7068 }
7069
7070 /* Return nonzero if X is a valid ARM state address operand. */
7071 int
7072 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7073 int strict_p)
7074 {
7075 bool use_ldrd;
7076 enum rtx_code code = GET_CODE (x);
7077
7078 if (arm_address_register_rtx_p (x, strict_p))
7079 return 1;
7080
7081 use_ldrd = (TARGET_LDRD
7082 && (mode == DImode
7083 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7084
7085 if (code == POST_INC || code == PRE_DEC
7086 || ((code == PRE_INC || code == POST_DEC)
7087 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7088 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7089
7090 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7091 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7092 && GET_CODE (XEXP (x, 1)) == PLUS
7093 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7094 {
7095 rtx addend = XEXP (XEXP (x, 1), 1);
7096
7097 /* Don't allow ldrd post increment by register because it's hard
7098 to fixup invalid register choices. */
7099 if (use_ldrd
7100 && GET_CODE (x) == POST_MODIFY
7101 && REG_P (addend))
7102 return 0;
7103
7104 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7105 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7106 }
7107
7108 /* After reload constants split into minipools will have addresses
7109 from a LABEL_REF. */
7110 else if (reload_completed
7111 && (code == LABEL_REF
7112 || (code == CONST
7113 && GET_CODE (XEXP (x, 0)) == PLUS
7114 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7115 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7116 return 1;
7117
7118 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7119 return 0;
7120
7121 else if (code == PLUS)
7122 {
7123 rtx xop0 = XEXP (x, 0);
7124 rtx xop1 = XEXP (x, 1);
7125
7126 return ((arm_address_register_rtx_p (xop0, strict_p)
7127 && ((CONST_INT_P (xop1)
7128 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7129 || (!strict_p && will_be_in_index_register (xop1))))
7130 || (arm_address_register_rtx_p (xop1, strict_p)
7131 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7132 }
7133
7134 #if 0
7135 /* Reload currently can't handle MINUS, so disable this for now */
7136 else if (GET_CODE (x) == MINUS)
7137 {
7138 rtx xop0 = XEXP (x, 0);
7139 rtx xop1 = XEXP (x, 1);
7140
7141 return (arm_address_register_rtx_p (xop0, strict_p)
7142 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7143 }
7144 #endif
7145
7146 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7147 && code == SYMBOL_REF
7148 && CONSTANT_POOL_ADDRESS_P (x)
7149 && ! (flag_pic
7150 && symbol_mentioned_p (get_pool_constant (x))
7151 && ! pcrel_constant_p (get_pool_constant (x))))
7152 return 1;
7153
7154 return 0;
7155 }
7156
7157 /* Return nonzero if X is a valid Thumb-2 address operand. */
7158 static int
7159 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7160 {
7161 bool use_ldrd;
7162 enum rtx_code code = GET_CODE (x);
7163
7164 if (arm_address_register_rtx_p (x, strict_p))
7165 return 1;
7166
7167 use_ldrd = (TARGET_LDRD
7168 && (mode == DImode
7169 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7170
7171 if (code == POST_INC || code == PRE_DEC
7172 || ((code == PRE_INC || code == POST_DEC)
7173 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7174 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7175
7176 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7177 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7178 && GET_CODE (XEXP (x, 1)) == PLUS
7179 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7180 {
7181 /* Thumb-2 only has autoincrement by constant. */
7182 rtx addend = XEXP (XEXP (x, 1), 1);
7183 HOST_WIDE_INT offset;
7184
7185 if (!CONST_INT_P (addend))
7186 return 0;
7187
7188 offset = INTVAL(addend);
7189 if (GET_MODE_SIZE (mode) <= 4)
7190 return (offset > -256 && offset < 256);
7191
7192 return (use_ldrd && offset > -1024 && offset < 1024
7193 && (offset & 3) == 0);
7194 }
7195
7196 /* After reload constants split into minipools will have addresses
7197 from a LABEL_REF. */
7198 else if (reload_completed
7199 && (code == LABEL_REF
7200 || (code == CONST
7201 && GET_CODE (XEXP (x, 0)) == PLUS
7202 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7203 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7204 return 1;
7205
7206 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7207 return 0;
7208
7209 else if (code == PLUS)
7210 {
7211 rtx xop0 = XEXP (x, 0);
7212 rtx xop1 = XEXP (x, 1);
7213
7214 return ((arm_address_register_rtx_p (xop0, strict_p)
7215 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7216 || (!strict_p && will_be_in_index_register (xop1))))
7217 || (arm_address_register_rtx_p (xop1, strict_p)
7218 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7219 }
7220
7221 /* Normally we can assign constant values to target registers without
7222 the help of constant pool. But there are cases we have to use constant
7223 pool like:
7224 1) assign a label to register.
7225 2) sign-extend a 8bit value to 32bit and then assign to register.
7226
7227 Constant pool access in format:
7228 (set (reg r0) (mem (symbol_ref (".LC0"))))
7229 will cause the use of literal pool (later in function arm_reorg).
7230 So here we mark such format as an invalid format, then the compiler
7231 will adjust it into:
7232 (set (reg r0) (symbol_ref (".LC0")))
7233 (set (reg r0) (mem (reg r0))).
7234 No extra register is required, and (mem (reg r0)) won't cause the use
7235 of literal pools. */
7236 else if (arm_disable_literal_pool && code == SYMBOL_REF
7237 && CONSTANT_POOL_ADDRESS_P (x))
7238 return 0;
7239
7240 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7241 && code == SYMBOL_REF
7242 && CONSTANT_POOL_ADDRESS_P (x)
7243 && ! (flag_pic
7244 && symbol_mentioned_p (get_pool_constant (x))
7245 && ! pcrel_constant_p (get_pool_constant (x))))
7246 return 1;
7247
7248 return 0;
7249 }
7250
7251 /* Return nonzero if INDEX is valid for an address index operand in
7252 ARM state. */
7253 static int
7254 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7255 int strict_p)
7256 {
7257 HOST_WIDE_INT range;
7258 enum rtx_code code = GET_CODE (index);
7259
7260 /* Standard coprocessor addressing modes. */
7261 if (TARGET_HARD_FLOAT
7262 && TARGET_VFP
7263 && (mode == SFmode || mode == DFmode))
7264 return (code == CONST_INT && INTVAL (index) < 1024
7265 && INTVAL (index) > -1024
7266 && (INTVAL (index) & 3) == 0);
7267
7268 /* For quad modes, we restrict the constant offset to be slightly less
7269 than what the instruction format permits. We do this because for
7270 quad mode moves, we will actually decompose them into two separate
7271 double-mode reads or writes. INDEX must therefore be a valid
7272 (double-mode) offset and so should INDEX+8. */
7273 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7274 return (code == CONST_INT
7275 && INTVAL (index) < 1016
7276 && INTVAL (index) > -1024
7277 && (INTVAL (index) & 3) == 0);
7278
7279 /* We have no such constraint on double mode offsets, so we permit the
7280 full range of the instruction format. */
7281 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7282 return (code == CONST_INT
7283 && INTVAL (index) < 1024
7284 && INTVAL (index) > -1024
7285 && (INTVAL (index) & 3) == 0);
7286
7287 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7288 return (code == CONST_INT
7289 && INTVAL (index) < 1024
7290 && INTVAL (index) > -1024
7291 && (INTVAL (index) & 3) == 0);
7292
7293 if (arm_address_register_rtx_p (index, strict_p)
7294 && (GET_MODE_SIZE (mode) <= 4))
7295 return 1;
7296
7297 if (mode == DImode || mode == DFmode)
7298 {
7299 if (code == CONST_INT)
7300 {
7301 HOST_WIDE_INT val = INTVAL (index);
7302
7303 if (TARGET_LDRD)
7304 return val > -256 && val < 256;
7305 else
7306 return val > -4096 && val < 4092;
7307 }
7308
7309 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7310 }
7311
7312 if (GET_MODE_SIZE (mode) <= 4
7313 && ! (arm_arch4
7314 && (mode == HImode
7315 || mode == HFmode
7316 || (mode == QImode && outer == SIGN_EXTEND))))
7317 {
7318 if (code == MULT)
7319 {
7320 rtx xiop0 = XEXP (index, 0);
7321 rtx xiop1 = XEXP (index, 1);
7322
7323 return ((arm_address_register_rtx_p (xiop0, strict_p)
7324 && power_of_two_operand (xiop1, SImode))
7325 || (arm_address_register_rtx_p (xiop1, strict_p)
7326 && power_of_two_operand (xiop0, SImode)));
7327 }
7328 else if (code == LSHIFTRT || code == ASHIFTRT
7329 || code == ASHIFT || code == ROTATERT)
7330 {
7331 rtx op = XEXP (index, 1);
7332
7333 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7334 && CONST_INT_P (op)
7335 && INTVAL (op) > 0
7336 && INTVAL (op) <= 31);
7337 }
7338 }
7339
7340 /* For ARM v4 we may be doing a sign-extend operation during the
7341 load. */
7342 if (arm_arch4)
7343 {
7344 if (mode == HImode
7345 || mode == HFmode
7346 || (outer == SIGN_EXTEND && mode == QImode))
7347 range = 256;
7348 else
7349 range = 4096;
7350 }
7351 else
7352 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7353
7354 return (code == CONST_INT
7355 && INTVAL (index) < range
7356 && INTVAL (index) > -range);
7357 }
7358
7359 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7360 index operand. i.e. 1, 2, 4 or 8. */
7361 static bool
7362 thumb2_index_mul_operand (rtx op)
7363 {
7364 HOST_WIDE_INT val;
7365
7366 if (!CONST_INT_P (op))
7367 return false;
7368
7369 val = INTVAL(op);
7370 return (val == 1 || val == 2 || val == 4 || val == 8);
7371 }
7372
7373 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7374 static int
7375 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7376 {
7377 enum rtx_code code = GET_CODE (index);
7378
7379 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7380 /* Standard coprocessor addressing modes. */
7381 if (TARGET_HARD_FLOAT
7382 && TARGET_VFP
7383 && (mode == SFmode || mode == DFmode))
7384 return (code == CONST_INT && INTVAL (index) < 1024
7385 /* Thumb-2 allows only > -256 index range for it's core register
7386 load/stores. Since we allow SF/DF in core registers, we have
7387 to use the intersection between -256~4096 (core) and -1024~1024
7388 (coprocessor). */
7389 && INTVAL (index) > -256
7390 && (INTVAL (index) & 3) == 0);
7391
7392 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7393 {
7394 /* For DImode assume values will usually live in core regs
7395 and only allow LDRD addressing modes. */
7396 if (!TARGET_LDRD || mode != DImode)
7397 return (code == CONST_INT
7398 && INTVAL (index) < 1024
7399 && INTVAL (index) > -1024
7400 && (INTVAL (index) & 3) == 0);
7401 }
7402
7403 /* For quad modes, we restrict the constant offset to be slightly less
7404 than what the instruction format permits. We do this because for
7405 quad mode moves, we will actually decompose them into two separate
7406 double-mode reads or writes. INDEX must therefore be a valid
7407 (double-mode) offset and so should INDEX+8. */
7408 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7409 return (code == CONST_INT
7410 && INTVAL (index) < 1016
7411 && INTVAL (index) > -1024
7412 && (INTVAL (index) & 3) == 0);
7413
7414 /* We have no such constraint on double mode offsets, so we permit the
7415 full range of the instruction format. */
7416 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7417 return (code == CONST_INT
7418 && INTVAL (index) < 1024
7419 && INTVAL (index) > -1024
7420 && (INTVAL (index) & 3) == 0);
7421
7422 if (arm_address_register_rtx_p (index, strict_p)
7423 && (GET_MODE_SIZE (mode) <= 4))
7424 return 1;
7425
7426 if (mode == DImode || mode == DFmode)
7427 {
7428 if (code == CONST_INT)
7429 {
7430 HOST_WIDE_INT val = INTVAL (index);
7431 /* ??? Can we assume ldrd for thumb2? */
7432 /* Thumb-2 ldrd only has reg+const addressing modes. */
7433 /* ldrd supports offsets of +-1020.
7434 However the ldr fallback does not. */
7435 return val > -256 && val < 256 && (val & 3) == 0;
7436 }
7437 else
7438 return 0;
7439 }
7440
7441 if (code == MULT)
7442 {
7443 rtx xiop0 = XEXP (index, 0);
7444 rtx xiop1 = XEXP (index, 1);
7445
7446 return ((arm_address_register_rtx_p (xiop0, strict_p)
7447 && thumb2_index_mul_operand (xiop1))
7448 || (arm_address_register_rtx_p (xiop1, strict_p)
7449 && thumb2_index_mul_operand (xiop0)));
7450 }
7451 else if (code == ASHIFT)
7452 {
7453 rtx op = XEXP (index, 1);
7454
7455 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7456 && CONST_INT_P (op)
7457 && INTVAL (op) > 0
7458 && INTVAL (op) <= 3);
7459 }
7460
7461 return (code == CONST_INT
7462 && INTVAL (index) < 4096
7463 && INTVAL (index) > -256);
7464 }
7465
7466 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7467 static int
7468 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7469 {
7470 int regno;
7471
7472 if (!REG_P (x))
7473 return 0;
7474
7475 regno = REGNO (x);
7476
7477 if (strict_p)
7478 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7479
7480 return (regno <= LAST_LO_REGNUM
7481 || regno > LAST_VIRTUAL_REGISTER
7482 || regno == FRAME_POINTER_REGNUM
7483 || (GET_MODE_SIZE (mode) >= 4
7484 && (regno == STACK_POINTER_REGNUM
7485 || regno >= FIRST_PSEUDO_REGISTER
7486 || x == hard_frame_pointer_rtx
7487 || x == arg_pointer_rtx)));
7488 }
7489
7490 /* Return nonzero if x is a legitimate index register. This is the case
7491 for any base register that can access a QImode object. */
7492 inline static int
7493 thumb1_index_register_rtx_p (rtx x, int strict_p)
7494 {
7495 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7496 }
7497
7498 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7499
7500 The AP may be eliminated to either the SP or the FP, so we use the
7501 least common denominator, e.g. SImode, and offsets from 0 to 64.
7502
7503 ??? Verify whether the above is the right approach.
7504
7505 ??? Also, the FP may be eliminated to the SP, so perhaps that
7506 needs special handling also.
7507
7508 ??? Look at how the mips16 port solves this problem. It probably uses
7509 better ways to solve some of these problems.
7510
7511 Although it is not incorrect, we don't accept QImode and HImode
7512 addresses based on the frame pointer or arg pointer until the
7513 reload pass starts. This is so that eliminating such addresses
7514 into stack based ones won't produce impossible code. */
7515 int
7516 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7517 {
7518 /* ??? Not clear if this is right. Experiment. */
7519 if (GET_MODE_SIZE (mode) < 4
7520 && !(reload_in_progress || reload_completed)
7521 && (reg_mentioned_p (frame_pointer_rtx, x)
7522 || reg_mentioned_p (arg_pointer_rtx, x)
7523 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7524 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7525 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7526 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7527 return 0;
7528
7529 /* Accept any base register. SP only in SImode or larger. */
7530 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7531 return 1;
7532
7533 /* This is PC relative data before arm_reorg runs. */
7534 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7535 && GET_CODE (x) == SYMBOL_REF
7536 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7537 return 1;
7538
7539 /* This is PC relative data after arm_reorg runs. */
7540 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7541 && reload_completed
7542 && (GET_CODE (x) == LABEL_REF
7543 || (GET_CODE (x) == CONST
7544 && GET_CODE (XEXP (x, 0)) == PLUS
7545 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7546 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7547 return 1;
7548
7549 /* Post-inc indexing only supported for SImode and larger. */
7550 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7551 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7552 return 1;
7553
7554 else if (GET_CODE (x) == PLUS)
7555 {
7556 /* REG+REG address can be any two index registers. */
7557 /* We disallow FRAME+REG addressing since we know that FRAME
7558 will be replaced with STACK, and SP relative addressing only
7559 permits SP+OFFSET. */
7560 if (GET_MODE_SIZE (mode) <= 4
7561 && XEXP (x, 0) != frame_pointer_rtx
7562 && XEXP (x, 1) != frame_pointer_rtx
7563 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7564 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7565 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7566 return 1;
7567
7568 /* REG+const has 5-7 bit offset for non-SP registers. */
7569 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7570 || XEXP (x, 0) == arg_pointer_rtx)
7571 && CONST_INT_P (XEXP (x, 1))
7572 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7573 return 1;
7574
7575 /* REG+const has 10-bit offset for SP, but only SImode and
7576 larger is supported. */
7577 /* ??? Should probably check for DI/DFmode overflow here
7578 just like GO_IF_LEGITIMATE_OFFSET does. */
7579 else if (REG_P (XEXP (x, 0))
7580 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7581 && GET_MODE_SIZE (mode) >= 4
7582 && CONST_INT_P (XEXP (x, 1))
7583 && INTVAL (XEXP (x, 1)) >= 0
7584 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7585 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7586 return 1;
7587
7588 else if (REG_P (XEXP (x, 0))
7589 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7590 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7591 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7592 && REGNO (XEXP (x, 0))
7593 <= LAST_VIRTUAL_POINTER_REGISTER))
7594 && GET_MODE_SIZE (mode) >= 4
7595 && CONST_INT_P (XEXP (x, 1))
7596 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7597 return 1;
7598 }
7599
7600 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7601 && GET_MODE_SIZE (mode) == 4
7602 && GET_CODE (x) == SYMBOL_REF
7603 && CONSTANT_POOL_ADDRESS_P (x)
7604 && ! (flag_pic
7605 && symbol_mentioned_p (get_pool_constant (x))
7606 && ! pcrel_constant_p (get_pool_constant (x))))
7607 return 1;
7608
7609 return 0;
7610 }
7611
7612 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7613 instruction of mode MODE. */
7614 int
7615 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7616 {
7617 switch (GET_MODE_SIZE (mode))
7618 {
7619 case 1:
7620 return val >= 0 && val < 32;
7621
7622 case 2:
7623 return val >= 0 && val < 64 && (val & 1) == 0;
7624
7625 default:
7626 return (val >= 0
7627 && (val + GET_MODE_SIZE (mode)) <= 128
7628 && (val & 3) == 0);
7629 }
7630 }
7631
7632 bool
7633 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7634 {
7635 if (TARGET_ARM)
7636 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7637 else if (TARGET_THUMB2)
7638 return thumb2_legitimate_address_p (mode, x, strict_p);
7639 else /* if (TARGET_THUMB1) */
7640 return thumb1_legitimate_address_p (mode, x, strict_p);
7641 }
7642
7643 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7644
7645 Given an rtx X being reloaded into a reg required to be
7646 in class CLASS, return the class of reg to actually use.
7647 In general this is just CLASS, but for the Thumb core registers and
7648 immediate constants we prefer a LO_REGS class or a subset. */
7649
7650 static reg_class_t
7651 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7652 {
7653 if (TARGET_32BIT)
7654 return rclass;
7655 else
7656 {
7657 if (rclass == GENERAL_REGS)
7658 return LO_REGS;
7659 else
7660 return rclass;
7661 }
7662 }
7663
7664 /* Build the SYMBOL_REF for __tls_get_addr. */
7665
7666 static GTY(()) rtx tls_get_addr_libfunc;
7667
7668 static rtx
7669 get_tls_get_addr (void)
7670 {
7671 if (!tls_get_addr_libfunc)
7672 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7673 return tls_get_addr_libfunc;
7674 }
7675
7676 rtx
7677 arm_load_tp (rtx target)
7678 {
7679 if (!target)
7680 target = gen_reg_rtx (SImode);
7681
7682 if (TARGET_HARD_TP)
7683 {
7684 /* Can return in any reg. */
7685 emit_insn (gen_load_tp_hard (target));
7686 }
7687 else
7688 {
7689 /* Always returned in r0. Immediately copy the result into a pseudo,
7690 otherwise other uses of r0 (e.g. setting up function arguments) may
7691 clobber the value. */
7692
7693 rtx tmp;
7694
7695 emit_insn (gen_load_tp_soft ());
7696
7697 tmp = gen_rtx_REG (SImode, R0_REGNUM);
7698 emit_move_insn (target, tmp);
7699 }
7700 return target;
7701 }
7702
7703 static rtx
7704 load_tls_operand (rtx x, rtx reg)
7705 {
7706 rtx tmp;
7707
7708 if (reg == NULL_RTX)
7709 reg = gen_reg_rtx (SImode);
7710
7711 tmp = gen_rtx_CONST (SImode, x);
7712
7713 emit_move_insn (reg, tmp);
7714
7715 return reg;
7716 }
7717
7718 static rtx
7719 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7720 {
7721 rtx insns, label, labelno, sum;
7722
7723 gcc_assert (reloc != TLS_DESCSEQ);
7724 start_sequence ();
7725
7726 labelno = GEN_INT (pic_labelno++);
7727 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7728 label = gen_rtx_CONST (VOIDmode, label);
7729
7730 sum = gen_rtx_UNSPEC (Pmode,
7731 gen_rtvec (4, x, GEN_INT (reloc), label,
7732 GEN_INT (TARGET_ARM ? 8 : 4)),
7733 UNSPEC_TLS);
7734 reg = load_tls_operand (sum, reg);
7735
7736 if (TARGET_ARM)
7737 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7738 else
7739 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7740
7741 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7742 LCT_PURE, /* LCT_CONST? */
7743 Pmode, 1, reg, Pmode);
7744
7745 insns = get_insns ();
7746 end_sequence ();
7747
7748 return insns;
7749 }
7750
7751 static rtx
7752 arm_tls_descseq_addr (rtx x, rtx reg)
7753 {
7754 rtx labelno = GEN_INT (pic_labelno++);
7755 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7756 rtx sum = gen_rtx_UNSPEC (Pmode,
7757 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7758 gen_rtx_CONST (VOIDmode, label),
7759 GEN_INT (!TARGET_ARM)),
7760 UNSPEC_TLS);
7761 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7762
7763 emit_insn (gen_tlscall (x, labelno));
7764 if (!reg)
7765 reg = gen_reg_rtx (SImode);
7766 else
7767 gcc_assert (REGNO (reg) != R0_REGNUM);
7768
7769 emit_move_insn (reg, reg0);
7770
7771 return reg;
7772 }
7773
7774 rtx
7775 legitimize_tls_address (rtx x, rtx reg)
7776 {
7777 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7778 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7779
7780 switch (model)
7781 {
7782 case TLS_MODEL_GLOBAL_DYNAMIC:
7783 if (TARGET_GNU2_TLS)
7784 {
7785 reg = arm_tls_descseq_addr (x, reg);
7786
7787 tp = arm_load_tp (NULL_RTX);
7788
7789 dest = gen_rtx_PLUS (Pmode, tp, reg);
7790 }
7791 else
7792 {
7793 /* Original scheme */
7794 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7795 dest = gen_reg_rtx (Pmode);
7796 emit_libcall_block (insns, dest, ret, x);
7797 }
7798 return dest;
7799
7800 case TLS_MODEL_LOCAL_DYNAMIC:
7801 if (TARGET_GNU2_TLS)
7802 {
7803 reg = arm_tls_descseq_addr (x, reg);
7804
7805 tp = arm_load_tp (NULL_RTX);
7806
7807 dest = gen_rtx_PLUS (Pmode, tp, reg);
7808 }
7809 else
7810 {
7811 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7812
7813 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7814 share the LDM result with other LD model accesses. */
7815 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7816 UNSPEC_TLS);
7817 dest = gen_reg_rtx (Pmode);
7818 emit_libcall_block (insns, dest, ret, eqv);
7819
7820 /* Load the addend. */
7821 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7822 GEN_INT (TLS_LDO32)),
7823 UNSPEC_TLS);
7824 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7825 dest = gen_rtx_PLUS (Pmode, dest, addend);
7826 }
7827 return dest;
7828
7829 case TLS_MODEL_INITIAL_EXEC:
7830 labelno = GEN_INT (pic_labelno++);
7831 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7832 label = gen_rtx_CONST (VOIDmode, label);
7833 sum = gen_rtx_UNSPEC (Pmode,
7834 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7835 GEN_INT (TARGET_ARM ? 8 : 4)),
7836 UNSPEC_TLS);
7837 reg = load_tls_operand (sum, reg);
7838
7839 if (TARGET_ARM)
7840 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7841 else if (TARGET_THUMB2)
7842 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7843 else
7844 {
7845 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7846 emit_move_insn (reg, gen_const_mem (SImode, reg));
7847 }
7848
7849 tp = arm_load_tp (NULL_RTX);
7850
7851 return gen_rtx_PLUS (Pmode, tp, reg);
7852
7853 case TLS_MODEL_LOCAL_EXEC:
7854 tp = arm_load_tp (NULL_RTX);
7855
7856 reg = gen_rtx_UNSPEC (Pmode,
7857 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7858 UNSPEC_TLS);
7859 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7860
7861 return gen_rtx_PLUS (Pmode, tp, reg);
7862
7863 default:
7864 abort ();
7865 }
7866 }
7867
7868 /* Try machine-dependent ways of modifying an illegitimate address
7869 to be legitimate. If we find one, return the new, valid address. */
7870 rtx
7871 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7872 {
7873 if (arm_tls_referenced_p (x))
7874 {
7875 rtx addend = NULL;
7876
7877 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7878 {
7879 addend = XEXP (XEXP (x, 0), 1);
7880 x = XEXP (XEXP (x, 0), 0);
7881 }
7882
7883 if (GET_CODE (x) != SYMBOL_REF)
7884 return x;
7885
7886 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7887
7888 x = legitimize_tls_address (x, NULL_RTX);
7889
7890 if (addend)
7891 {
7892 x = gen_rtx_PLUS (SImode, x, addend);
7893 orig_x = x;
7894 }
7895 else
7896 return x;
7897 }
7898
7899 if (!TARGET_ARM)
7900 {
7901 /* TODO: legitimize_address for Thumb2. */
7902 if (TARGET_THUMB2)
7903 return x;
7904 return thumb_legitimize_address (x, orig_x, mode);
7905 }
7906
7907 if (GET_CODE (x) == PLUS)
7908 {
7909 rtx xop0 = XEXP (x, 0);
7910 rtx xop1 = XEXP (x, 1);
7911
7912 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7913 xop0 = force_reg (SImode, xop0);
7914
7915 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7916 && !symbol_mentioned_p (xop1))
7917 xop1 = force_reg (SImode, xop1);
7918
7919 if (ARM_BASE_REGISTER_RTX_P (xop0)
7920 && CONST_INT_P (xop1))
7921 {
7922 HOST_WIDE_INT n, low_n;
7923 rtx base_reg, val;
7924 n = INTVAL (xop1);
7925
7926 /* VFP addressing modes actually allow greater offsets, but for
7927 now we just stick with the lowest common denominator. */
7928 if (mode == DImode
7929 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7930 {
7931 low_n = n & 0x0f;
7932 n &= ~0x0f;
7933 if (low_n > 4)
7934 {
7935 n += 16;
7936 low_n -= 16;
7937 }
7938 }
7939 else
7940 {
7941 low_n = ((mode) == TImode ? 0
7942 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7943 n -= low_n;
7944 }
7945
7946 base_reg = gen_reg_rtx (SImode);
7947 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7948 emit_move_insn (base_reg, val);
7949 x = plus_constant (Pmode, base_reg, low_n);
7950 }
7951 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7952 x = gen_rtx_PLUS (SImode, xop0, xop1);
7953 }
7954
7955 /* XXX We don't allow MINUS any more -- see comment in
7956 arm_legitimate_address_outer_p (). */
7957 else if (GET_CODE (x) == MINUS)
7958 {
7959 rtx xop0 = XEXP (x, 0);
7960 rtx xop1 = XEXP (x, 1);
7961
7962 if (CONSTANT_P (xop0))
7963 xop0 = force_reg (SImode, xop0);
7964
7965 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7966 xop1 = force_reg (SImode, xop1);
7967
7968 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7969 x = gen_rtx_MINUS (SImode, xop0, xop1);
7970 }
7971
7972 /* Make sure to take full advantage of the pre-indexed addressing mode
7973 with absolute addresses which often allows for the base register to
7974 be factorized for multiple adjacent memory references, and it might
7975 even allows for the mini pool to be avoided entirely. */
7976 else if (CONST_INT_P (x) && optimize > 0)
7977 {
7978 unsigned int bits;
7979 HOST_WIDE_INT mask, base, index;
7980 rtx base_reg;
7981
7982 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7983 use a 8-bit index. So let's use a 12-bit index for SImode only and
7984 hope that arm_gen_constant will enable ldrb to use more bits. */
7985 bits = (mode == SImode) ? 12 : 8;
7986 mask = (1 << bits) - 1;
7987 base = INTVAL (x) & ~mask;
7988 index = INTVAL (x) & mask;
7989 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7990 {
7991 /* It'll most probably be more efficient to generate the base
7992 with more bits set and use a negative index instead. */
7993 base |= mask;
7994 index -= mask;
7995 }
7996 base_reg = force_reg (SImode, GEN_INT (base));
7997 x = plus_constant (Pmode, base_reg, index);
7998 }
7999
8000 if (flag_pic)
8001 {
8002 /* We need to find and carefully transform any SYMBOL and LABEL
8003 references; so go back to the original address expression. */
8004 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8005
8006 if (new_x != orig_x)
8007 x = new_x;
8008 }
8009
8010 return x;
8011 }
8012
8013
8014 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8015 to be legitimate. If we find one, return the new, valid address. */
8016 rtx
8017 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8018 {
8019 if (GET_CODE (x) == PLUS
8020 && CONST_INT_P (XEXP (x, 1))
8021 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8022 || INTVAL (XEXP (x, 1)) < 0))
8023 {
8024 rtx xop0 = XEXP (x, 0);
8025 rtx xop1 = XEXP (x, 1);
8026 HOST_WIDE_INT offset = INTVAL (xop1);
8027
8028 /* Try and fold the offset into a biasing of the base register and
8029 then offsetting that. Don't do this when optimizing for space
8030 since it can cause too many CSEs. */
8031 if (optimize_size && offset >= 0
8032 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8033 {
8034 HOST_WIDE_INT delta;
8035
8036 if (offset >= 256)
8037 delta = offset - (256 - GET_MODE_SIZE (mode));
8038 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8039 delta = 31 * GET_MODE_SIZE (mode);
8040 else
8041 delta = offset & (~31 * GET_MODE_SIZE (mode));
8042
8043 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8044 NULL_RTX);
8045 x = plus_constant (Pmode, xop0, delta);
8046 }
8047 else if (offset < 0 && offset > -256)
8048 /* Small negative offsets are best done with a subtract before the
8049 dereference, forcing these into a register normally takes two
8050 instructions. */
8051 x = force_operand (x, NULL_RTX);
8052 else
8053 {
8054 /* For the remaining cases, force the constant into a register. */
8055 xop1 = force_reg (SImode, xop1);
8056 x = gen_rtx_PLUS (SImode, xop0, xop1);
8057 }
8058 }
8059 else if (GET_CODE (x) == PLUS
8060 && s_register_operand (XEXP (x, 1), SImode)
8061 && !s_register_operand (XEXP (x, 0), SImode))
8062 {
8063 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8064
8065 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8066 }
8067
8068 if (flag_pic)
8069 {
8070 /* We need to find and carefully transform any SYMBOL and LABEL
8071 references; so go back to the original address expression. */
8072 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8073
8074 if (new_x != orig_x)
8075 x = new_x;
8076 }
8077
8078 return x;
8079 }
8080
8081 /* Return TRUE if X contains any TLS symbol references. */
8082
8083 bool
8084 arm_tls_referenced_p (rtx x)
8085 {
8086 if (! TARGET_HAVE_TLS)
8087 return false;
8088
8089 subrtx_iterator::array_type array;
8090 FOR_EACH_SUBRTX (iter, array, x, ALL)
8091 {
8092 const_rtx x = *iter;
8093 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8094 return true;
8095
8096 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8097 TLS offsets, not real symbol references. */
8098 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8099 iter.skip_subrtxes ();
8100 }
8101 return false;
8102 }
8103
8104 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8105
8106 On the ARM, allow any integer (invalid ones are removed later by insn
8107 patterns), nice doubles and symbol_refs which refer to the function's
8108 constant pool XXX.
8109
8110 When generating pic allow anything. */
8111
8112 static bool
8113 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8114 {
8115 return flag_pic || !label_mentioned_p (x);
8116 }
8117
8118 static bool
8119 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8120 {
8121 return (CONST_INT_P (x)
8122 || CONST_DOUBLE_P (x)
8123 || CONSTANT_ADDRESS_P (x)
8124 || flag_pic);
8125 }
8126
8127 static bool
8128 arm_legitimate_constant_p (machine_mode mode, rtx x)
8129 {
8130 return (!arm_cannot_force_const_mem (mode, x)
8131 && (TARGET_32BIT
8132 ? arm_legitimate_constant_p_1 (mode, x)
8133 : thumb_legitimate_constant_p (mode, x)));
8134 }
8135
8136 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8137
8138 static bool
8139 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8140 {
8141 rtx base, offset;
8142
8143 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8144 {
8145 split_const (x, &base, &offset);
8146 if (GET_CODE (base) == SYMBOL_REF
8147 && !offset_within_block_p (base, INTVAL (offset)))
8148 return true;
8149 }
8150 return arm_tls_referenced_p (x);
8151 }
8152 \f
8153 #define REG_OR_SUBREG_REG(X) \
8154 (REG_P (X) \
8155 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8156
8157 #define REG_OR_SUBREG_RTX(X) \
8158 (REG_P (X) ? (X) : SUBREG_REG (X))
8159
8160 static inline int
8161 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8162 {
8163 machine_mode mode = GET_MODE (x);
8164 int total, words;
8165
8166 switch (code)
8167 {
8168 case ASHIFT:
8169 case ASHIFTRT:
8170 case LSHIFTRT:
8171 case ROTATERT:
8172 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8173
8174 case PLUS:
8175 case MINUS:
8176 case COMPARE:
8177 case NEG:
8178 case NOT:
8179 return COSTS_N_INSNS (1);
8180
8181 case MULT:
8182 if (CONST_INT_P (XEXP (x, 1)))
8183 {
8184 int cycles = 0;
8185 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8186
8187 while (i)
8188 {
8189 i >>= 2;
8190 cycles++;
8191 }
8192 return COSTS_N_INSNS (2) + cycles;
8193 }
8194 return COSTS_N_INSNS (1) + 16;
8195
8196 case SET:
8197 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8198 the mode. */
8199 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8200 return (COSTS_N_INSNS (words)
8201 + 4 * ((MEM_P (SET_SRC (x)))
8202 + MEM_P (SET_DEST (x))));
8203
8204 case CONST_INT:
8205 if (outer == SET)
8206 {
8207 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8208 return 0;
8209 if (thumb_shiftable_const (INTVAL (x)))
8210 return COSTS_N_INSNS (2);
8211 return COSTS_N_INSNS (3);
8212 }
8213 else if ((outer == PLUS || outer == COMPARE)
8214 && INTVAL (x) < 256 && INTVAL (x) > -256)
8215 return 0;
8216 else if ((outer == IOR || outer == XOR || outer == AND)
8217 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8218 return COSTS_N_INSNS (1);
8219 else if (outer == AND)
8220 {
8221 int i;
8222 /* This duplicates the tests in the andsi3 expander. */
8223 for (i = 9; i <= 31; i++)
8224 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8225 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8226 return COSTS_N_INSNS (2);
8227 }
8228 else if (outer == ASHIFT || outer == ASHIFTRT
8229 || outer == LSHIFTRT)
8230 return 0;
8231 return COSTS_N_INSNS (2);
8232
8233 case CONST:
8234 case CONST_DOUBLE:
8235 case LABEL_REF:
8236 case SYMBOL_REF:
8237 return COSTS_N_INSNS (3);
8238
8239 case UDIV:
8240 case UMOD:
8241 case DIV:
8242 case MOD:
8243 return 100;
8244
8245 case TRUNCATE:
8246 return 99;
8247
8248 case AND:
8249 case XOR:
8250 case IOR:
8251 /* XXX guess. */
8252 return 8;
8253
8254 case MEM:
8255 /* XXX another guess. */
8256 /* Memory costs quite a lot for the first word, but subsequent words
8257 load at the equivalent of a single insn each. */
8258 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8259 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8260 ? 4 : 0));
8261
8262 case IF_THEN_ELSE:
8263 /* XXX a guess. */
8264 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8265 return 14;
8266 return 2;
8267
8268 case SIGN_EXTEND:
8269 case ZERO_EXTEND:
8270 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8271 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8272
8273 if (mode == SImode)
8274 return total;
8275
8276 if (arm_arch6)
8277 return total + COSTS_N_INSNS (1);
8278
8279 /* Assume a two-shift sequence. Increase the cost slightly so
8280 we prefer actual shifts over an extend operation. */
8281 return total + 1 + COSTS_N_INSNS (2);
8282
8283 default:
8284 return 99;
8285 }
8286 }
8287
8288 static inline bool
8289 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8290 {
8291 machine_mode mode = GET_MODE (x);
8292 enum rtx_code subcode;
8293 rtx operand;
8294 enum rtx_code code = GET_CODE (x);
8295 *total = 0;
8296
8297 switch (code)
8298 {
8299 case MEM:
8300 /* Memory costs quite a lot for the first word, but subsequent words
8301 load at the equivalent of a single insn each. */
8302 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8303 return true;
8304
8305 case DIV:
8306 case MOD:
8307 case UDIV:
8308 case UMOD:
8309 if (TARGET_HARD_FLOAT && mode == SFmode)
8310 *total = COSTS_N_INSNS (2);
8311 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8312 *total = COSTS_N_INSNS (4);
8313 else
8314 *total = COSTS_N_INSNS (20);
8315 return false;
8316
8317 case ROTATE:
8318 if (REG_P (XEXP (x, 1)))
8319 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8320 else if (!CONST_INT_P (XEXP (x, 1)))
8321 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8322
8323 /* Fall through */
8324 case ROTATERT:
8325 if (mode != SImode)
8326 {
8327 *total += COSTS_N_INSNS (4);
8328 return true;
8329 }
8330
8331 /* Fall through */
8332 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8333 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8334 if (mode == DImode)
8335 {
8336 *total += COSTS_N_INSNS (3);
8337 return true;
8338 }
8339
8340 *total += COSTS_N_INSNS (1);
8341 /* Increase the cost of complex shifts because they aren't any faster,
8342 and reduce dual issue opportunities. */
8343 if (arm_tune_cortex_a9
8344 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8345 ++*total;
8346
8347 return true;
8348
8349 case MINUS:
8350 if (mode == DImode)
8351 {
8352 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8353 if (CONST_INT_P (XEXP (x, 0))
8354 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8355 {
8356 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8357 return true;
8358 }
8359
8360 if (CONST_INT_P (XEXP (x, 1))
8361 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8362 {
8363 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8364 return true;
8365 }
8366
8367 return false;
8368 }
8369
8370 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8371 {
8372 if (TARGET_HARD_FLOAT
8373 && (mode == SFmode
8374 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8375 {
8376 *total = COSTS_N_INSNS (1);
8377 if (CONST_DOUBLE_P (XEXP (x, 0))
8378 && arm_const_double_rtx (XEXP (x, 0)))
8379 {
8380 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8381 return true;
8382 }
8383
8384 if (CONST_DOUBLE_P (XEXP (x, 1))
8385 && arm_const_double_rtx (XEXP (x, 1)))
8386 {
8387 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8388 return true;
8389 }
8390
8391 return false;
8392 }
8393 *total = COSTS_N_INSNS (20);
8394 return false;
8395 }
8396
8397 *total = COSTS_N_INSNS (1);
8398 if (CONST_INT_P (XEXP (x, 0))
8399 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8400 {
8401 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8402 return true;
8403 }
8404
8405 subcode = GET_CODE (XEXP (x, 1));
8406 if (subcode == ASHIFT || subcode == ASHIFTRT
8407 || subcode == LSHIFTRT
8408 || subcode == ROTATE || subcode == ROTATERT)
8409 {
8410 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8411 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8412 return true;
8413 }
8414
8415 /* A shift as a part of RSB costs no more than RSB itself. */
8416 if (GET_CODE (XEXP (x, 0)) == MULT
8417 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8418 {
8419 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8420 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8421 return true;
8422 }
8423
8424 if (subcode == MULT
8425 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8426 {
8427 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8428 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8429 return true;
8430 }
8431
8432 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8433 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8434 {
8435 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8436 if (REG_P (XEXP (XEXP (x, 1), 0))
8437 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8438 *total += COSTS_N_INSNS (1);
8439
8440 return true;
8441 }
8442
8443 /* Fall through */
8444
8445 case PLUS:
8446 if (code == PLUS && arm_arch6 && mode == SImode
8447 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8448 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8449 {
8450 *total = COSTS_N_INSNS (1);
8451 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8452 0, speed);
8453 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8454 return true;
8455 }
8456
8457 /* MLA: All arguments must be registers. We filter out
8458 multiplication by a power of two, so that we fall down into
8459 the code below. */
8460 if (GET_CODE (XEXP (x, 0)) == MULT
8461 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8462 {
8463 /* The cost comes from the cost of the multiply. */
8464 return false;
8465 }
8466
8467 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8468 {
8469 if (TARGET_HARD_FLOAT
8470 && (mode == SFmode
8471 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8472 {
8473 *total = COSTS_N_INSNS (1);
8474 if (CONST_DOUBLE_P (XEXP (x, 1))
8475 && arm_const_double_rtx (XEXP (x, 1)))
8476 {
8477 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8478 return true;
8479 }
8480
8481 return false;
8482 }
8483
8484 *total = COSTS_N_INSNS (20);
8485 return false;
8486 }
8487
8488 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8489 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8490 {
8491 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8492 if (REG_P (XEXP (XEXP (x, 0), 0))
8493 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8494 *total += COSTS_N_INSNS (1);
8495 return true;
8496 }
8497
8498 /* Fall through */
8499
8500 case AND: case XOR: case IOR:
8501
8502 /* Normally the frame registers will be spilt into reg+const during
8503 reload, so it is a bad idea to combine them with other instructions,
8504 since then they might not be moved outside of loops. As a compromise
8505 we allow integration with ops that have a constant as their second
8506 operand. */
8507 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8508 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8509 && !CONST_INT_P (XEXP (x, 1)))
8510 *total = COSTS_N_INSNS (1);
8511
8512 if (mode == DImode)
8513 {
8514 *total += COSTS_N_INSNS (2);
8515 if (CONST_INT_P (XEXP (x, 1))
8516 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8517 {
8518 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8519 return true;
8520 }
8521
8522 return false;
8523 }
8524
8525 *total += COSTS_N_INSNS (1);
8526 if (CONST_INT_P (XEXP (x, 1))
8527 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8528 {
8529 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8530 return true;
8531 }
8532 subcode = GET_CODE (XEXP (x, 0));
8533 if (subcode == ASHIFT || subcode == ASHIFTRT
8534 || subcode == LSHIFTRT
8535 || subcode == ROTATE || subcode == ROTATERT)
8536 {
8537 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8538 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8539 return true;
8540 }
8541
8542 if (subcode == MULT
8543 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8544 {
8545 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8546 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8547 return true;
8548 }
8549
8550 if (subcode == UMIN || subcode == UMAX
8551 || subcode == SMIN || subcode == SMAX)
8552 {
8553 *total = COSTS_N_INSNS (3);
8554 return true;
8555 }
8556
8557 return false;
8558
8559 case MULT:
8560 /* This should have been handled by the CPU specific routines. */
8561 gcc_unreachable ();
8562
8563 case TRUNCATE:
8564 if (arm_arch3m && mode == SImode
8565 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8566 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8567 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8568 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8569 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8570 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8571 {
8572 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8573 return true;
8574 }
8575 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8576 return false;
8577
8578 case NEG:
8579 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8580 {
8581 if (TARGET_HARD_FLOAT
8582 && (mode == SFmode
8583 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8584 {
8585 *total = COSTS_N_INSNS (1);
8586 return false;
8587 }
8588 *total = COSTS_N_INSNS (2);
8589 return false;
8590 }
8591
8592 /* Fall through */
8593 case NOT:
8594 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8595 if (mode == SImode && code == NOT)
8596 {
8597 subcode = GET_CODE (XEXP (x, 0));
8598 if (subcode == ASHIFT || subcode == ASHIFTRT
8599 || subcode == LSHIFTRT
8600 || subcode == ROTATE || subcode == ROTATERT
8601 || (subcode == MULT
8602 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8603 {
8604 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8605 /* Register shifts cost an extra cycle. */
8606 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8607 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8608 subcode, 1, speed);
8609 return true;
8610 }
8611 }
8612
8613 return false;
8614
8615 case IF_THEN_ELSE:
8616 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8617 {
8618 *total = COSTS_N_INSNS (4);
8619 return true;
8620 }
8621
8622 operand = XEXP (x, 0);
8623
8624 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8625 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8626 && REG_P (XEXP (operand, 0))
8627 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8628 *total += COSTS_N_INSNS (1);
8629 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8630 + rtx_cost (XEXP (x, 2), code, 2, speed));
8631 return true;
8632
8633 case NE:
8634 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8635 {
8636 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8637 return true;
8638 }
8639 goto scc_insn;
8640
8641 case GE:
8642 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8643 && mode == SImode && XEXP (x, 1) == const0_rtx)
8644 {
8645 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8646 return true;
8647 }
8648 goto scc_insn;
8649
8650 case LT:
8651 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8652 && mode == SImode && XEXP (x, 1) == const0_rtx)
8653 {
8654 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8655 return true;
8656 }
8657 goto scc_insn;
8658
8659 case EQ:
8660 case GT:
8661 case LE:
8662 case GEU:
8663 case LTU:
8664 case GTU:
8665 case LEU:
8666 case UNORDERED:
8667 case ORDERED:
8668 case UNEQ:
8669 case UNGE:
8670 case UNLT:
8671 case UNGT:
8672 case UNLE:
8673 scc_insn:
8674 /* SCC insns. In the case where the comparison has already been
8675 performed, then they cost 2 instructions. Otherwise they need
8676 an additional comparison before them. */
8677 *total = COSTS_N_INSNS (2);
8678 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8679 {
8680 return true;
8681 }
8682
8683 /* Fall through */
8684 case COMPARE:
8685 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8686 {
8687 *total = 0;
8688 return true;
8689 }
8690
8691 *total += COSTS_N_INSNS (1);
8692 if (CONST_INT_P (XEXP (x, 1))
8693 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8694 {
8695 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8696 return true;
8697 }
8698
8699 subcode = GET_CODE (XEXP (x, 0));
8700 if (subcode == ASHIFT || subcode == ASHIFTRT
8701 || subcode == LSHIFTRT
8702 || subcode == ROTATE || subcode == ROTATERT)
8703 {
8704 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8705 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8706 return true;
8707 }
8708
8709 if (subcode == MULT
8710 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8711 {
8712 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8713 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8714 return true;
8715 }
8716
8717 return false;
8718
8719 case UMIN:
8720 case UMAX:
8721 case SMIN:
8722 case SMAX:
8723 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8724 if (!CONST_INT_P (XEXP (x, 1))
8725 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8726 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8727 return true;
8728
8729 case ABS:
8730 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8731 {
8732 if (TARGET_HARD_FLOAT
8733 && (mode == SFmode
8734 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8735 {
8736 *total = COSTS_N_INSNS (1);
8737 return false;
8738 }
8739 *total = COSTS_N_INSNS (20);
8740 return false;
8741 }
8742 *total = COSTS_N_INSNS (1);
8743 if (mode == DImode)
8744 *total += COSTS_N_INSNS (3);
8745 return false;
8746
8747 case SIGN_EXTEND:
8748 case ZERO_EXTEND:
8749 *total = 0;
8750 if (GET_MODE_CLASS (mode) == MODE_INT)
8751 {
8752 rtx op = XEXP (x, 0);
8753 machine_mode opmode = GET_MODE (op);
8754
8755 if (mode == DImode)
8756 *total += COSTS_N_INSNS (1);
8757
8758 if (opmode != SImode)
8759 {
8760 if (MEM_P (op))
8761 {
8762 /* If !arm_arch4, we use one of the extendhisi2_mem
8763 or movhi_bytes patterns for HImode. For a QImode
8764 sign extension, we first zero-extend from memory
8765 and then perform a shift sequence. */
8766 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8767 *total += COSTS_N_INSNS (2);
8768 }
8769 else if (arm_arch6)
8770 *total += COSTS_N_INSNS (1);
8771
8772 /* We don't have the necessary insn, so we need to perform some
8773 other operation. */
8774 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8775 /* An and with constant 255. */
8776 *total += COSTS_N_INSNS (1);
8777 else
8778 /* A shift sequence. Increase costs slightly to avoid
8779 combining two shifts into an extend operation. */
8780 *total += COSTS_N_INSNS (2) + 1;
8781 }
8782
8783 return false;
8784 }
8785
8786 switch (GET_MODE (XEXP (x, 0)))
8787 {
8788 case V8QImode:
8789 case V4HImode:
8790 case V2SImode:
8791 case V4QImode:
8792 case V2HImode:
8793 *total = COSTS_N_INSNS (1);
8794 return false;
8795
8796 default:
8797 gcc_unreachable ();
8798 }
8799 gcc_unreachable ();
8800
8801 case ZERO_EXTRACT:
8802 case SIGN_EXTRACT:
8803 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8804 return true;
8805
8806 case CONST_INT:
8807 if (const_ok_for_arm (INTVAL (x))
8808 || const_ok_for_arm (~INTVAL (x)))
8809 *total = COSTS_N_INSNS (1);
8810 else
8811 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8812 INTVAL (x), NULL_RTX,
8813 NULL_RTX, 0, 0));
8814 return true;
8815
8816 case CONST:
8817 case LABEL_REF:
8818 case SYMBOL_REF:
8819 *total = COSTS_N_INSNS (3);
8820 return true;
8821
8822 case HIGH:
8823 *total = COSTS_N_INSNS (1);
8824 return true;
8825
8826 case LO_SUM:
8827 *total = COSTS_N_INSNS (1);
8828 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8829 return true;
8830
8831 case CONST_DOUBLE:
8832 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8833 && (mode == SFmode || !TARGET_VFP_SINGLE))
8834 *total = COSTS_N_INSNS (1);
8835 else
8836 *total = COSTS_N_INSNS (4);
8837 return true;
8838
8839 case SET:
8840 /* The vec_extract patterns accept memory operands that require an
8841 address reload. Account for the cost of that reload to give the
8842 auto-inc-dec pass an incentive to try to replace them. */
8843 if (TARGET_NEON && MEM_P (SET_DEST (x))
8844 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8845 {
8846 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8847 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8848 *total += COSTS_N_INSNS (1);
8849 return true;
8850 }
8851 /* Likewise for the vec_set patterns. */
8852 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8853 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8854 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8855 {
8856 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8857 *total = rtx_cost (mem, code, 0, speed);
8858 if (!neon_vector_mem_operand (mem, 2, true))
8859 *total += COSTS_N_INSNS (1);
8860 return true;
8861 }
8862 return false;
8863
8864 case UNSPEC:
8865 /* We cost this as high as our memory costs to allow this to
8866 be hoisted from loops. */
8867 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8868 {
8869 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8870 }
8871 return true;
8872
8873 case CONST_VECTOR:
8874 if (TARGET_NEON
8875 && TARGET_HARD_FLOAT
8876 && outer == SET
8877 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8878 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8879 *total = COSTS_N_INSNS (1);
8880 else
8881 *total = COSTS_N_INSNS (4);
8882 return true;
8883
8884 default:
8885 *total = COSTS_N_INSNS (4);
8886 return false;
8887 }
8888 }
8889
8890 /* Estimates the size cost of thumb1 instructions.
8891 For now most of the code is copied from thumb1_rtx_costs. We need more
8892 fine grain tuning when we have more related test cases. */
8893 static inline int
8894 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8895 {
8896 machine_mode mode = GET_MODE (x);
8897 int words;
8898
8899 switch (code)
8900 {
8901 case ASHIFT:
8902 case ASHIFTRT:
8903 case LSHIFTRT:
8904 case ROTATERT:
8905 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8906
8907 case PLUS:
8908 case MINUS:
8909 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8910 defined by RTL expansion, especially for the expansion of
8911 multiplication. */
8912 if ((GET_CODE (XEXP (x, 0)) == MULT
8913 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8914 || (GET_CODE (XEXP (x, 1)) == MULT
8915 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8916 return COSTS_N_INSNS (2);
8917 /* On purpose fall through for normal RTX. */
8918 case COMPARE:
8919 case NEG:
8920 case NOT:
8921 return COSTS_N_INSNS (1);
8922
8923 case MULT:
8924 if (CONST_INT_P (XEXP (x, 1)))
8925 {
8926 /* Thumb1 mul instruction can't operate on const. We must Load it
8927 into a register first. */
8928 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8929 /* For the targets which have a very small and high-latency multiply
8930 unit, we prefer to synthesize the mult with up to 5 instructions,
8931 giving a good balance between size and performance. */
8932 if (arm_arch6m && arm_m_profile_small_mul)
8933 return COSTS_N_INSNS (5);
8934 else
8935 return COSTS_N_INSNS (1) + const_size;
8936 }
8937 return COSTS_N_INSNS (1);
8938
8939 case SET:
8940 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8941 the mode. */
8942 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8943 return COSTS_N_INSNS (words)
8944 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8945 || satisfies_constraint_K (SET_SRC (x))
8946 /* thumb1_movdi_insn. */
8947 || ((words > 1) && MEM_P (SET_SRC (x))));
8948
8949 case CONST_INT:
8950 if (outer == SET)
8951 {
8952 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8953 return COSTS_N_INSNS (1);
8954 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8955 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8956 return COSTS_N_INSNS (2);
8957 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8958 if (thumb_shiftable_const (INTVAL (x)))
8959 return COSTS_N_INSNS (2);
8960 return COSTS_N_INSNS (3);
8961 }
8962 else if ((outer == PLUS || outer == COMPARE)
8963 && INTVAL (x) < 256 && INTVAL (x) > -256)
8964 return 0;
8965 else if ((outer == IOR || outer == XOR || outer == AND)
8966 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8967 return COSTS_N_INSNS (1);
8968 else if (outer == AND)
8969 {
8970 int i;
8971 /* This duplicates the tests in the andsi3 expander. */
8972 for (i = 9; i <= 31; i++)
8973 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8974 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8975 return COSTS_N_INSNS (2);
8976 }
8977 else if (outer == ASHIFT || outer == ASHIFTRT
8978 || outer == LSHIFTRT)
8979 return 0;
8980 return COSTS_N_INSNS (2);
8981
8982 case CONST:
8983 case CONST_DOUBLE:
8984 case LABEL_REF:
8985 case SYMBOL_REF:
8986 return COSTS_N_INSNS (3);
8987
8988 case UDIV:
8989 case UMOD:
8990 case DIV:
8991 case MOD:
8992 return 100;
8993
8994 case TRUNCATE:
8995 return 99;
8996
8997 case AND:
8998 case XOR:
8999 case IOR:
9000 return COSTS_N_INSNS (1);
9001
9002 case MEM:
9003 return (COSTS_N_INSNS (1)
9004 + COSTS_N_INSNS (1)
9005 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9006 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9007 ? COSTS_N_INSNS (1) : 0));
9008
9009 case IF_THEN_ELSE:
9010 /* XXX a guess. */
9011 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9012 return 14;
9013 return 2;
9014
9015 case ZERO_EXTEND:
9016 /* XXX still guessing. */
9017 switch (GET_MODE (XEXP (x, 0)))
9018 {
9019 case QImode:
9020 return (1 + (mode == DImode ? 4 : 0)
9021 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9022
9023 case HImode:
9024 return (4 + (mode == DImode ? 4 : 0)
9025 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9026
9027 case SImode:
9028 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9029
9030 default:
9031 return 99;
9032 }
9033
9034 default:
9035 return 99;
9036 }
9037 }
9038
9039 /* RTX costs when optimizing for size. */
9040 static bool
9041 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9042 int *total)
9043 {
9044 machine_mode mode = GET_MODE (x);
9045 if (TARGET_THUMB1)
9046 {
9047 *total = thumb1_size_rtx_costs (x, code, outer_code);
9048 return true;
9049 }
9050
9051 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9052 switch (code)
9053 {
9054 case MEM:
9055 /* A memory access costs 1 insn if the mode is small, or the address is
9056 a single register, otherwise it costs one insn per word. */
9057 if (REG_P (XEXP (x, 0)))
9058 *total = COSTS_N_INSNS (1);
9059 else if (flag_pic
9060 && GET_CODE (XEXP (x, 0)) == PLUS
9061 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9062 /* This will be split into two instructions.
9063 See arm.md:calculate_pic_address. */
9064 *total = COSTS_N_INSNS (2);
9065 else
9066 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9067 return true;
9068
9069 case DIV:
9070 case MOD:
9071 case UDIV:
9072 case UMOD:
9073 /* Needs a libcall, so it costs about this. */
9074 *total = COSTS_N_INSNS (2);
9075 return false;
9076
9077 case ROTATE:
9078 if (mode == SImode && REG_P (XEXP (x, 1)))
9079 {
9080 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9081 return true;
9082 }
9083 /* Fall through */
9084 case ROTATERT:
9085 case ASHIFT:
9086 case LSHIFTRT:
9087 case ASHIFTRT:
9088 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9089 {
9090 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9091 return true;
9092 }
9093 else if (mode == SImode)
9094 {
9095 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9096 /* Slightly disparage register shifts, but not by much. */
9097 if (!CONST_INT_P (XEXP (x, 1)))
9098 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9099 return true;
9100 }
9101
9102 /* Needs a libcall. */
9103 *total = COSTS_N_INSNS (2);
9104 return false;
9105
9106 case MINUS:
9107 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9108 && (mode == SFmode || !TARGET_VFP_SINGLE))
9109 {
9110 *total = COSTS_N_INSNS (1);
9111 return false;
9112 }
9113
9114 if (mode == SImode)
9115 {
9116 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9117 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9118
9119 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9120 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9121 || subcode1 == ROTATE || subcode1 == ROTATERT
9122 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9123 || subcode1 == ASHIFTRT)
9124 {
9125 /* It's just the cost of the two operands. */
9126 *total = 0;
9127 return false;
9128 }
9129
9130 *total = COSTS_N_INSNS (1);
9131 return false;
9132 }
9133
9134 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9135 return false;
9136
9137 case PLUS:
9138 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9139 && (mode == SFmode || !TARGET_VFP_SINGLE))
9140 {
9141 *total = COSTS_N_INSNS (1);
9142 return false;
9143 }
9144
9145 /* A shift as a part of ADD costs nothing. */
9146 if (GET_CODE (XEXP (x, 0)) == MULT
9147 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9148 {
9149 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9150 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9151 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9152 return true;
9153 }
9154
9155 /* Fall through */
9156 case AND: case XOR: case IOR:
9157 if (mode == SImode)
9158 {
9159 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9160
9161 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9162 || subcode == LSHIFTRT || subcode == ASHIFTRT
9163 || (code == AND && subcode == NOT))
9164 {
9165 /* It's just the cost of the two operands. */
9166 *total = 0;
9167 return false;
9168 }
9169 }
9170
9171 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9172 return false;
9173
9174 case MULT:
9175 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9176 return false;
9177
9178 case NEG:
9179 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9180 && (mode == SFmode || !TARGET_VFP_SINGLE))
9181 {
9182 *total = COSTS_N_INSNS (1);
9183 return false;
9184 }
9185
9186 /* Fall through */
9187 case NOT:
9188 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9189
9190 return false;
9191
9192 case IF_THEN_ELSE:
9193 *total = 0;
9194 return false;
9195
9196 case COMPARE:
9197 if (cc_register (XEXP (x, 0), VOIDmode))
9198 * total = 0;
9199 else
9200 *total = COSTS_N_INSNS (1);
9201 return false;
9202
9203 case ABS:
9204 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9205 && (mode == SFmode || !TARGET_VFP_SINGLE))
9206 *total = COSTS_N_INSNS (1);
9207 else
9208 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9209 return false;
9210
9211 case SIGN_EXTEND:
9212 case ZERO_EXTEND:
9213 return arm_rtx_costs_1 (x, outer_code, total, 0);
9214
9215 case CONST_INT:
9216 if (const_ok_for_arm (INTVAL (x)))
9217 /* A multiplication by a constant requires another instruction
9218 to load the constant to a register. */
9219 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9220 ? 1 : 0);
9221 else if (const_ok_for_arm (~INTVAL (x)))
9222 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9223 else if (const_ok_for_arm (-INTVAL (x)))
9224 {
9225 if (outer_code == COMPARE || outer_code == PLUS
9226 || outer_code == MINUS)
9227 *total = 0;
9228 else
9229 *total = COSTS_N_INSNS (1);
9230 }
9231 else
9232 *total = COSTS_N_INSNS (2);
9233 return true;
9234
9235 case CONST:
9236 case LABEL_REF:
9237 case SYMBOL_REF:
9238 *total = COSTS_N_INSNS (2);
9239 return true;
9240
9241 case CONST_DOUBLE:
9242 *total = COSTS_N_INSNS (4);
9243 return true;
9244
9245 case CONST_VECTOR:
9246 if (TARGET_NEON
9247 && TARGET_HARD_FLOAT
9248 && outer_code == SET
9249 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9250 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9251 *total = COSTS_N_INSNS (1);
9252 else
9253 *total = COSTS_N_INSNS (4);
9254 return true;
9255
9256 case HIGH:
9257 case LO_SUM:
9258 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9259 cost of these slightly. */
9260 *total = COSTS_N_INSNS (1) + 1;
9261 return true;
9262
9263 case SET:
9264 return false;
9265
9266 default:
9267 if (mode != VOIDmode)
9268 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9269 else
9270 *total = COSTS_N_INSNS (4); /* How knows? */
9271 return false;
9272 }
9273 }
9274
9275 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9276 operand, then return the operand that is being shifted. If the shift
9277 is not by a constant, then set SHIFT_REG to point to the operand.
9278 Return NULL if OP is not a shifter operand. */
9279 static rtx
9280 shifter_op_p (rtx op, rtx *shift_reg)
9281 {
9282 enum rtx_code code = GET_CODE (op);
9283
9284 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9285 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9286 return XEXP (op, 0);
9287 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9288 return XEXP (op, 0);
9289 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9290 || code == ASHIFTRT)
9291 {
9292 if (!CONST_INT_P (XEXP (op, 1)))
9293 *shift_reg = XEXP (op, 1);
9294 return XEXP (op, 0);
9295 }
9296
9297 return NULL;
9298 }
9299
9300 static bool
9301 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9302 {
9303 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9304 rtx_code code = GET_CODE (x);
9305 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9306
9307 switch (XINT (x, 1))
9308 {
9309 case UNSPEC_UNALIGNED_LOAD:
9310 /* We can only do unaligned loads into the integer unit, and we can't
9311 use LDM or LDRD. */
9312 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9313 if (speed_p)
9314 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9315 + extra_cost->ldst.load_unaligned);
9316
9317 #ifdef NOT_YET
9318 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9319 ADDR_SPACE_GENERIC, speed_p);
9320 #endif
9321 return true;
9322
9323 case UNSPEC_UNALIGNED_STORE:
9324 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9325 if (speed_p)
9326 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9327 + extra_cost->ldst.store_unaligned);
9328
9329 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9330 #ifdef NOT_YET
9331 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9332 ADDR_SPACE_GENERIC, speed_p);
9333 #endif
9334 return true;
9335
9336 case UNSPEC_VRINTZ:
9337 case UNSPEC_VRINTP:
9338 case UNSPEC_VRINTM:
9339 case UNSPEC_VRINTR:
9340 case UNSPEC_VRINTX:
9341 case UNSPEC_VRINTA:
9342 *cost = COSTS_N_INSNS (1);
9343 if (speed_p)
9344 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9345
9346 return true;
9347 default:
9348 *cost = COSTS_N_INSNS (2);
9349 break;
9350 }
9351 return true;
9352 }
9353
9354 /* Cost of a libcall. We assume one insn per argument, an amount for the
9355 call (one insn for -Os) and then one for processing the result. */
9356 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9357
9358 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9359 do \
9360 { \
9361 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9362 if (shift_op != NULL \
9363 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9364 { \
9365 if (shift_reg) \
9366 { \
9367 if (speed_p) \
9368 *cost += extra_cost->alu.arith_shift_reg; \
9369 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9370 } \
9371 else if (speed_p) \
9372 *cost += extra_cost->alu.arith_shift; \
9373 \
9374 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9375 + rtx_cost (XEXP (x, 1 - IDX), \
9376 OP, 1, speed_p)); \
9377 return true; \
9378 } \
9379 } \
9380 while (0);
9381
9382 /* RTX costs. Make an estimate of the cost of executing the operation
9383 X, which is contained with an operation with code OUTER_CODE.
9384 SPEED_P indicates whether the cost desired is the performance cost,
9385 or the size cost. The estimate is stored in COST and the return
9386 value is TRUE if the cost calculation is final, or FALSE if the
9387 caller should recurse through the operands of X to add additional
9388 costs.
9389
9390 We currently make no attempt to model the size savings of Thumb-2
9391 16-bit instructions. At the normal points in compilation where
9392 this code is called we have no measure of whether the condition
9393 flags are live or not, and thus no realistic way to determine what
9394 the size will eventually be. */
9395 static bool
9396 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9397 const struct cpu_cost_table *extra_cost,
9398 int *cost, bool speed_p)
9399 {
9400 machine_mode mode = GET_MODE (x);
9401
9402 if (TARGET_THUMB1)
9403 {
9404 if (speed_p)
9405 *cost = thumb1_rtx_costs (x, code, outer_code);
9406 else
9407 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9408 return true;
9409 }
9410
9411 switch (code)
9412 {
9413 case SET:
9414 *cost = 0;
9415 /* SET RTXs don't have a mode so we get it from the destination. */
9416 mode = GET_MODE (SET_DEST (x));
9417
9418 if (REG_P (SET_SRC (x))
9419 && REG_P (SET_DEST (x)))
9420 {
9421 /* Assume that most copies can be done with a single insn,
9422 unless we don't have HW FP, in which case everything
9423 larger than word mode will require two insns. */
9424 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9425 && GET_MODE_SIZE (mode) > 4)
9426 || mode == DImode)
9427 ? 2 : 1);
9428 /* Conditional register moves can be encoded
9429 in 16 bits in Thumb mode. */
9430 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9431 *cost >>= 1;
9432
9433 return true;
9434 }
9435
9436 if (CONST_INT_P (SET_SRC (x)))
9437 {
9438 /* Handle CONST_INT here, since the value doesn't have a mode
9439 and we would otherwise be unable to work out the true cost. */
9440 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9441 outer_code = SET;
9442 /* Slightly lower the cost of setting a core reg to a constant.
9443 This helps break up chains and allows for better scheduling. */
9444 if (REG_P (SET_DEST (x))
9445 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9446 *cost -= 1;
9447 x = SET_SRC (x);
9448 /* Immediate moves with an immediate in the range [0, 255] can be
9449 encoded in 16 bits in Thumb mode. */
9450 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9451 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9452 *cost >>= 1;
9453 goto const_int_cost;
9454 }
9455
9456 return false;
9457
9458 case MEM:
9459 /* A memory access costs 1 insn if the mode is small, or the address is
9460 a single register, otherwise it costs one insn per word. */
9461 if (REG_P (XEXP (x, 0)))
9462 *cost = COSTS_N_INSNS (1);
9463 else if (flag_pic
9464 && GET_CODE (XEXP (x, 0)) == PLUS
9465 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9466 /* This will be split into two instructions.
9467 See arm.md:calculate_pic_address. */
9468 *cost = COSTS_N_INSNS (2);
9469 else
9470 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9471
9472 /* For speed optimizations, add the costs of the address and
9473 accessing memory. */
9474 if (speed_p)
9475 #ifdef NOT_YET
9476 *cost += (extra_cost->ldst.load
9477 + arm_address_cost (XEXP (x, 0), mode,
9478 ADDR_SPACE_GENERIC, speed_p));
9479 #else
9480 *cost += extra_cost->ldst.load;
9481 #endif
9482 return true;
9483
9484 case PARALLEL:
9485 {
9486 /* Calculations of LDM costs are complex. We assume an initial cost
9487 (ldm_1st) which will load the number of registers mentioned in
9488 ldm_regs_per_insn_1st registers; then each additional
9489 ldm_regs_per_insn_subsequent registers cost one more insn. The
9490 formula for N regs is thus:
9491
9492 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9493 + ldm_regs_per_insn_subsequent - 1)
9494 / ldm_regs_per_insn_subsequent).
9495
9496 Additional costs may also be added for addressing. A similar
9497 formula is used for STM. */
9498
9499 bool is_ldm = load_multiple_operation (x, SImode);
9500 bool is_stm = store_multiple_operation (x, SImode);
9501
9502 *cost = COSTS_N_INSNS (1);
9503
9504 if (is_ldm || is_stm)
9505 {
9506 if (speed_p)
9507 {
9508 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9509 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9510 ? extra_cost->ldst.ldm_regs_per_insn_1st
9511 : extra_cost->ldst.stm_regs_per_insn_1st;
9512 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9513 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9514 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9515
9516 *cost += regs_per_insn_1st
9517 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9518 + regs_per_insn_sub - 1)
9519 / regs_per_insn_sub);
9520 return true;
9521 }
9522
9523 }
9524 return false;
9525 }
9526 case DIV:
9527 case UDIV:
9528 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9529 && (mode == SFmode || !TARGET_VFP_SINGLE))
9530 *cost = COSTS_N_INSNS (speed_p
9531 ? extra_cost->fp[mode != SFmode].div : 1);
9532 else if (mode == SImode && TARGET_IDIV)
9533 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9534 else
9535 *cost = LIBCALL_COST (2);
9536 return false; /* All arguments must be in registers. */
9537
9538 case MOD:
9539 case UMOD:
9540 *cost = LIBCALL_COST (2);
9541 return false; /* All arguments must be in registers. */
9542
9543 case ROTATE:
9544 if (mode == SImode && REG_P (XEXP (x, 1)))
9545 {
9546 *cost = (COSTS_N_INSNS (2)
9547 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9548 if (speed_p)
9549 *cost += extra_cost->alu.shift_reg;
9550 return true;
9551 }
9552 /* Fall through */
9553 case ROTATERT:
9554 case ASHIFT:
9555 case LSHIFTRT:
9556 case ASHIFTRT:
9557 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9558 {
9559 *cost = (COSTS_N_INSNS (3)
9560 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9561 if (speed_p)
9562 *cost += 2 * extra_cost->alu.shift;
9563 return true;
9564 }
9565 else if (mode == SImode)
9566 {
9567 *cost = (COSTS_N_INSNS (1)
9568 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9569 /* Slightly disparage register shifts at -Os, but not by much. */
9570 if (!CONST_INT_P (XEXP (x, 1)))
9571 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9572 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9573 return true;
9574 }
9575 else if (GET_MODE_CLASS (mode) == MODE_INT
9576 && GET_MODE_SIZE (mode) < 4)
9577 {
9578 if (code == ASHIFT)
9579 {
9580 *cost = (COSTS_N_INSNS (1)
9581 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9582 /* Slightly disparage register shifts at -Os, but not by
9583 much. */
9584 if (!CONST_INT_P (XEXP (x, 1)))
9585 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9586 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9587 }
9588 else if (code == LSHIFTRT || code == ASHIFTRT)
9589 {
9590 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9591 {
9592 /* Can use SBFX/UBFX. */
9593 *cost = COSTS_N_INSNS (1);
9594 if (speed_p)
9595 *cost += extra_cost->alu.bfx;
9596 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9597 }
9598 else
9599 {
9600 *cost = COSTS_N_INSNS (2);
9601 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9602 if (speed_p)
9603 {
9604 if (CONST_INT_P (XEXP (x, 1)))
9605 *cost += 2 * extra_cost->alu.shift;
9606 else
9607 *cost += (extra_cost->alu.shift
9608 + extra_cost->alu.shift_reg);
9609 }
9610 else
9611 /* Slightly disparage register shifts. */
9612 *cost += !CONST_INT_P (XEXP (x, 1));
9613 }
9614 }
9615 else /* Rotates. */
9616 {
9617 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9618 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9619 if (speed_p)
9620 {
9621 if (CONST_INT_P (XEXP (x, 1)))
9622 *cost += (2 * extra_cost->alu.shift
9623 + extra_cost->alu.log_shift);
9624 else
9625 *cost += (extra_cost->alu.shift
9626 + extra_cost->alu.shift_reg
9627 + extra_cost->alu.log_shift_reg);
9628 }
9629 }
9630 return true;
9631 }
9632
9633 *cost = LIBCALL_COST (2);
9634 return false;
9635
9636 case BSWAP:
9637 if (arm_arch6)
9638 {
9639 if (mode == SImode)
9640 {
9641 *cost = COSTS_N_INSNS (1);
9642 if (speed_p)
9643 *cost += extra_cost->alu.rev;
9644
9645 return false;
9646 }
9647 }
9648 else
9649 {
9650 /* No rev instruction available. Look at arm_legacy_rev
9651 and thumb_legacy_rev for the form of RTL used then. */
9652 if (TARGET_THUMB)
9653 {
9654 *cost = COSTS_N_INSNS (10);
9655
9656 if (speed_p)
9657 {
9658 *cost += 6 * extra_cost->alu.shift;
9659 *cost += 3 * extra_cost->alu.logical;
9660 }
9661 }
9662 else
9663 {
9664 *cost = COSTS_N_INSNS (5);
9665
9666 if (speed_p)
9667 {
9668 *cost += 2 * extra_cost->alu.shift;
9669 *cost += extra_cost->alu.arith_shift;
9670 *cost += 2 * extra_cost->alu.logical;
9671 }
9672 }
9673 return true;
9674 }
9675 return false;
9676
9677 case MINUS:
9678 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9679 && (mode == SFmode || !TARGET_VFP_SINGLE))
9680 {
9681 *cost = COSTS_N_INSNS (1);
9682 if (GET_CODE (XEXP (x, 0)) == MULT
9683 || GET_CODE (XEXP (x, 1)) == MULT)
9684 {
9685 rtx mul_op0, mul_op1, sub_op;
9686
9687 if (speed_p)
9688 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9689
9690 if (GET_CODE (XEXP (x, 0)) == MULT)
9691 {
9692 mul_op0 = XEXP (XEXP (x, 0), 0);
9693 mul_op1 = XEXP (XEXP (x, 0), 1);
9694 sub_op = XEXP (x, 1);
9695 }
9696 else
9697 {
9698 mul_op0 = XEXP (XEXP (x, 1), 0);
9699 mul_op1 = XEXP (XEXP (x, 1), 1);
9700 sub_op = XEXP (x, 0);
9701 }
9702
9703 /* The first operand of the multiply may be optionally
9704 negated. */
9705 if (GET_CODE (mul_op0) == NEG)
9706 mul_op0 = XEXP (mul_op0, 0);
9707
9708 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9709 + rtx_cost (mul_op1, code, 0, speed_p)
9710 + rtx_cost (sub_op, code, 0, speed_p));
9711
9712 return true;
9713 }
9714
9715 if (speed_p)
9716 *cost += extra_cost->fp[mode != SFmode].addsub;
9717 return false;
9718 }
9719
9720 if (mode == SImode)
9721 {
9722 rtx shift_by_reg = NULL;
9723 rtx shift_op;
9724 rtx non_shift_op;
9725
9726 *cost = COSTS_N_INSNS (1);
9727
9728 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9729 if (shift_op == NULL)
9730 {
9731 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9732 non_shift_op = XEXP (x, 0);
9733 }
9734 else
9735 non_shift_op = XEXP (x, 1);
9736
9737 if (shift_op != NULL)
9738 {
9739 if (shift_by_reg != NULL)
9740 {
9741 if (speed_p)
9742 *cost += extra_cost->alu.arith_shift_reg;
9743 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9744 }
9745 else if (speed_p)
9746 *cost += extra_cost->alu.arith_shift;
9747
9748 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9749 + rtx_cost (non_shift_op, code, 0, speed_p));
9750 return true;
9751 }
9752
9753 if (arm_arch_thumb2
9754 && GET_CODE (XEXP (x, 1)) == MULT)
9755 {
9756 /* MLS. */
9757 if (speed_p)
9758 *cost += extra_cost->mult[0].add;
9759 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9760 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9761 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9762 return true;
9763 }
9764
9765 if (CONST_INT_P (XEXP (x, 0)))
9766 {
9767 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9768 INTVAL (XEXP (x, 0)), NULL_RTX,
9769 NULL_RTX, 1, 0);
9770 *cost = COSTS_N_INSNS (insns);
9771 if (speed_p)
9772 *cost += insns * extra_cost->alu.arith;
9773 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9774 return true;
9775 }
9776 else if (speed_p)
9777 *cost += extra_cost->alu.arith;
9778
9779 return false;
9780 }
9781
9782 if (GET_MODE_CLASS (mode) == MODE_INT
9783 && GET_MODE_SIZE (mode) < 4)
9784 {
9785 rtx shift_op, shift_reg;
9786 shift_reg = NULL;
9787
9788 /* We check both sides of the MINUS for shifter operands since,
9789 unlike PLUS, it's not commutative. */
9790
9791 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9792 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9793
9794 /* Slightly disparage, as we might need to widen the result. */
9795 *cost = 1 + COSTS_N_INSNS (1);
9796 if (speed_p)
9797 *cost += extra_cost->alu.arith;
9798
9799 if (CONST_INT_P (XEXP (x, 0)))
9800 {
9801 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9802 return true;
9803 }
9804
9805 return false;
9806 }
9807
9808 if (mode == DImode)
9809 {
9810 *cost = COSTS_N_INSNS (2);
9811
9812 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9813 {
9814 rtx op1 = XEXP (x, 1);
9815
9816 if (speed_p)
9817 *cost += 2 * extra_cost->alu.arith;
9818
9819 if (GET_CODE (op1) == ZERO_EXTEND)
9820 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9821 else
9822 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9823 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9824 0, speed_p);
9825 return true;
9826 }
9827 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9828 {
9829 if (speed_p)
9830 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9831 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9832 0, speed_p)
9833 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9834 return true;
9835 }
9836 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9837 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9838 {
9839 if (speed_p)
9840 *cost += (extra_cost->alu.arith
9841 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9842 ? extra_cost->alu.arith
9843 : extra_cost->alu.arith_shift));
9844 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9845 + rtx_cost (XEXP (XEXP (x, 1), 0),
9846 GET_CODE (XEXP (x, 1)), 0, speed_p));
9847 return true;
9848 }
9849
9850 if (speed_p)
9851 *cost += 2 * extra_cost->alu.arith;
9852 return false;
9853 }
9854
9855 /* Vector mode? */
9856
9857 *cost = LIBCALL_COST (2);
9858 return false;
9859
9860 case PLUS:
9861 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9862 && (mode == SFmode || !TARGET_VFP_SINGLE))
9863 {
9864 *cost = COSTS_N_INSNS (1);
9865 if (GET_CODE (XEXP (x, 0)) == MULT)
9866 {
9867 rtx mul_op0, mul_op1, add_op;
9868
9869 if (speed_p)
9870 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9871
9872 mul_op0 = XEXP (XEXP (x, 0), 0);
9873 mul_op1 = XEXP (XEXP (x, 0), 1);
9874 add_op = XEXP (x, 1);
9875
9876 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9877 + rtx_cost (mul_op1, code, 0, speed_p)
9878 + rtx_cost (add_op, code, 0, speed_p));
9879
9880 return true;
9881 }
9882
9883 if (speed_p)
9884 *cost += extra_cost->fp[mode != SFmode].addsub;
9885 return false;
9886 }
9887 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9888 {
9889 *cost = LIBCALL_COST (2);
9890 return false;
9891 }
9892
9893 /* Narrow modes can be synthesized in SImode, but the range
9894 of useful sub-operations is limited. Check for shift operations
9895 on one of the operands. Only left shifts can be used in the
9896 narrow modes. */
9897 if (GET_MODE_CLASS (mode) == MODE_INT
9898 && GET_MODE_SIZE (mode) < 4)
9899 {
9900 rtx shift_op, shift_reg;
9901 shift_reg = NULL;
9902
9903 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9904
9905 if (CONST_INT_P (XEXP (x, 1)))
9906 {
9907 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9908 INTVAL (XEXP (x, 1)), NULL_RTX,
9909 NULL_RTX, 1, 0);
9910 *cost = COSTS_N_INSNS (insns);
9911 if (speed_p)
9912 *cost += insns * extra_cost->alu.arith;
9913 /* Slightly penalize a narrow operation as the result may
9914 need widening. */
9915 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9916 return true;
9917 }
9918
9919 /* Slightly penalize a narrow operation as the result may
9920 need widening. */
9921 *cost = 1 + COSTS_N_INSNS (1);
9922 if (speed_p)
9923 *cost += extra_cost->alu.arith;
9924
9925 return false;
9926 }
9927
9928 if (mode == SImode)
9929 {
9930 rtx shift_op, shift_reg;
9931
9932 *cost = COSTS_N_INSNS (1);
9933 if (TARGET_INT_SIMD
9934 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9935 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9936 {
9937 /* UXTA[BH] or SXTA[BH]. */
9938 if (speed_p)
9939 *cost += extra_cost->alu.extend_arith;
9940 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9941 speed_p)
9942 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9943 return true;
9944 }
9945
9946 shift_reg = NULL;
9947 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9948 if (shift_op != NULL)
9949 {
9950 if (shift_reg)
9951 {
9952 if (speed_p)
9953 *cost += extra_cost->alu.arith_shift_reg;
9954 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9955 }
9956 else if (speed_p)
9957 *cost += extra_cost->alu.arith_shift;
9958
9959 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9960 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9961 return true;
9962 }
9963 if (GET_CODE (XEXP (x, 0)) == MULT)
9964 {
9965 rtx mul_op = XEXP (x, 0);
9966
9967 *cost = COSTS_N_INSNS (1);
9968
9969 if (TARGET_DSP_MULTIPLY
9970 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9971 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9972 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9973 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9974 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9975 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9976 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9977 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9978 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9979 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9980 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9981 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9982 == 16))))))
9983 {
9984 /* SMLA[BT][BT]. */
9985 if (speed_p)
9986 *cost += extra_cost->mult[0].extend_add;
9987 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9988 SIGN_EXTEND, 0, speed_p)
9989 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9990 SIGN_EXTEND, 0, speed_p)
9991 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9992 return true;
9993 }
9994
9995 if (speed_p)
9996 *cost += extra_cost->mult[0].add;
9997 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9998 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9999 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10000 return true;
10001 }
10002 if (CONST_INT_P (XEXP (x, 1)))
10003 {
10004 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10005 INTVAL (XEXP (x, 1)), NULL_RTX,
10006 NULL_RTX, 1, 0);
10007 *cost = COSTS_N_INSNS (insns);
10008 if (speed_p)
10009 *cost += insns * extra_cost->alu.arith;
10010 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
10011 return true;
10012 }
10013 else if (speed_p)
10014 *cost += extra_cost->alu.arith;
10015
10016 return false;
10017 }
10018
10019 if (mode == DImode)
10020 {
10021 if (arm_arch3m
10022 && GET_CODE (XEXP (x, 0)) == MULT
10023 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10024 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10025 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10026 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10027 {
10028 *cost = COSTS_N_INSNS (1);
10029 if (speed_p)
10030 *cost += extra_cost->mult[1].extend_add;
10031 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10032 ZERO_EXTEND, 0, speed_p)
10033 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10034 ZERO_EXTEND, 0, speed_p)
10035 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10036 return true;
10037 }
10038
10039 *cost = COSTS_N_INSNS (2);
10040
10041 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10042 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10043 {
10044 if (speed_p)
10045 *cost += (extra_cost->alu.arith
10046 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10047 ? extra_cost->alu.arith
10048 : extra_cost->alu.arith_shift));
10049
10050 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10051 speed_p)
10052 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10053 return true;
10054 }
10055
10056 if (speed_p)
10057 *cost += 2 * extra_cost->alu.arith;
10058 return false;
10059 }
10060
10061 /* Vector mode? */
10062 *cost = LIBCALL_COST (2);
10063 return false;
10064 case IOR:
10065 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10066 {
10067 *cost = COSTS_N_INSNS (1);
10068 if (speed_p)
10069 *cost += extra_cost->alu.rev;
10070
10071 return true;
10072 }
10073 /* Fall through. */
10074 case AND: case XOR:
10075 if (mode == SImode)
10076 {
10077 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10078 rtx op0 = XEXP (x, 0);
10079 rtx shift_op, shift_reg;
10080
10081 *cost = COSTS_N_INSNS (1);
10082
10083 if (subcode == NOT
10084 && (code == AND
10085 || (code == IOR && TARGET_THUMB2)))
10086 op0 = XEXP (op0, 0);
10087
10088 shift_reg = NULL;
10089 shift_op = shifter_op_p (op0, &shift_reg);
10090 if (shift_op != NULL)
10091 {
10092 if (shift_reg)
10093 {
10094 if (speed_p)
10095 *cost += extra_cost->alu.log_shift_reg;
10096 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10097 }
10098 else if (speed_p)
10099 *cost += extra_cost->alu.log_shift;
10100
10101 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10102 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10103 return true;
10104 }
10105
10106 if (CONST_INT_P (XEXP (x, 1)))
10107 {
10108 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10109 INTVAL (XEXP (x, 1)), NULL_RTX,
10110 NULL_RTX, 1, 0);
10111
10112 *cost = COSTS_N_INSNS (insns);
10113 if (speed_p)
10114 *cost += insns * extra_cost->alu.logical;
10115 *cost += rtx_cost (op0, code, 0, speed_p);
10116 return true;
10117 }
10118
10119 if (speed_p)
10120 *cost += extra_cost->alu.logical;
10121 *cost += (rtx_cost (op0, code, 0, speed_p)
10122 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10123 return true;
10124 }
10125
10126 if (mode == DImode)
10127 {
10128 rtx op0 = XEXP (x, 0);
10129 enum rtx_code subcode = GET_CODE (op0);
10130
10131 *cost = COSTS_N_INSNS (2);
10132
10133 if (subcode == NOT
10134 && (code == AND
10135 || (code == IOR && TARGET_THUMB2)))
10136 op0 = XEXP (op0, 0);
10137
10138 if (GET_CODE (op0) == ZERO_EXTEND)
10139 {
10140 if (speed_p)
10141 *cost += 2 * extra_cost->alu.logical;
10142
10143 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10144 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10145 return true;
10146 }
10147 else if (GET_CODE (op0) == SIGN_EXTEND)
10148 {
10149 if (speed_p)
10150 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10151
10152 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10153 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10154 return true;
10155 }
10156
10157 if (speed_p)
10158 *cost += 2 * extra_cost->alu.logical;
10159
10160 return true;
10161 }
10162 /* Vector mode? */
10163
10164 *cost = LIBCALL_COST (2);
10165 return false;
10166
10167 case MULT:
10168 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10169 && (mode == SFmode || !TARGET_VFP_SINGLE))
10170 {
10171 rtx op0 = XEXP (x, 0);
10172
10173 *cost = COSTS_N_INSNS (1);
10174
10175 if (GET_CODE (op0) == NEG)
10176 op0 = XEXP (op0, 0);
10177
10178 if (speed_p)
10179 *cost += extra_cost->fp[mode != SFmode].mult;
10180
10181 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10182 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10183 return true;
10184 }
10185 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10186 {
10187 *cost = LIBCALL_COST (2);
10188 return false;
10189 }
10190
10191 if (mode == SImode)
10192 {
10193 *cost = COSTS_N_INSNS (1);
10194 if (TARGET_DSP_MULTIPLY
10195 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10196 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10197 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10198 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10199 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10200 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10201 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10202 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10203 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10204 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10205 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10206 && (INTVAL (XEXP (XEXP (x, 1), 1))
10207 == 16))))))
10208 {
10209 /* SMUL[TB][TB]. */
10210 if (speed_p)
10211 *cost += extra_cost->mult[0].extend;
10212 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10213 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10214 return true;
10215 }
10216 if (speed_p)
10217 *cost += extra_cost->mult[0].simple;
10218 return false;
10219 }
10220
10221 if (mode == DImode)
10222 {
10223 if (arm_arch3m
10224 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10225 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10226 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10227 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10228 {
10229 *cost = COSTS_N_INSNS (1);
10230 if (speed_p)
10231 *cost += extra_cost->mult[1].extend;
10232 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10233 ZERO_EXTEND, 0, speed_p)
10234 + rtx_cost (XEXP (XEXP (x, 1), 0),
10235 ZERO_EXTEND, 0, speed_p));
10236 return true;
10237 }
10238
10239 *cost = LIBCALL_COST (2);
10240 return false;
10241 }
10242
10243 /* Vector mode? */
10244 *cost = LIBCALL_COST (2);
10245 return false;
10246
10247 case NEG:
10248 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10249 && (mode == SFmode || !TARGET_VFP_SINGLE))
10250 {
10251 *cost = COSTS_N_INSNS (1);
10252 if (speed_p)
10253 *cost += extra_cost->fp[mode != SFmode].neg;
10254
10255 return false;
10256 }
10257 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10258 {
10259 *cost = LIBCALL_COST (1);
10260 return false;
10261 }
10262
10263 if (mode == SImode)
10264 {
10265 if (GET_CODE (XEXP (x, 0)) == ABS)
10266 {
10267 *cost = COSTS_N_INSNS (2);
10268 /* Assume the non-flag-changing variant. */
10269 if (speed_p)
10270 *cost += (extra_cost->alu.log_shift
10271 + extra_cost->alu.arith_shift);
10272 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10273 return true;
10274 }
10275
10276 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10277 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10278 {
10279 *cost = COSTS_N_INSNS (2);
10280 /* No extra cost for MOV imm and MVN imm. */
10281 /* If the comparison op is using the flags, there's no further
10282 cost, otherwise we need to add the cost of the comparison. */
10283 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10284 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10285 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10286 {
10287 *cost += (COSTS_N_INSNS (1)
10288 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10289 speed_p)
10290 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10291 speed_p));
10292 if (speed_p)
10293 *cost += extra_cost->alu.arith;
10294 }
10295 return true;
10296 }
10297 *cost = COSTS_N_INSNS (1);
10298 if (speed_p)
10299 *cost += extra_cost->alu.arith;
10300 return false;
10301 }
10302
10303 if (GET_MODE_CLASS (mode) == MODE_INT
10304 && GET_MODE_SIZE (mode) < 4)
10305 {
10306 /* Slightly disparage, as we might need an extend operation. */
10307 *cost = 1 + COSTS_N_INSNS (1);
10308 if (speed_p)
10309 *cost += extra_cost->alu.arith;
10310 return false;
10311 }
10312
10313 if (mode == DImode)
10314 {
10315 *cost = COSTS_N_INSNS (2);
10316 if (speed_p)
10317 *cost += 2 * extra_cost->alu.arith;
10318 return false;
10319 }
10320
10321 /* Vector mode? */
10322 *cost = LIBCALL_COST (1);
10323 return false;
10324
10325 case NOT:
10326 if (mode == SImode)
10327 {
10328 rtx shift_op;
10329 rtx shift_reg = NULL;
10330
10331 *cost = COSTS_N_INSNS (1);
10332 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10333
10334 if (shift_op)
10335 {
10336 if (shift_reg != NULL)
10337 {
10338 if (speed_p)
10339 *cost += extra_cost->alu.log_shift_reg;
10340 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10341 }
10342 else if (speed_p)
10343 *cost += extra_cost->alu.log_shift;
10344 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10345 return true;
10346 }
10347
10348 if (speed_p)
10349 *cost += extra_cost->alu.logical;
10350 return false;
10351 }
10352 if (mode == DImode)
10353 {
10354 *cost = COSTS_N_INSNS (2);
10355 return false;
10356 }
10357
10358 /* Vector mode? */
10359
10360 *cost += LIBCALL_COST (1);
10361 return false;
10362
10363 case IF_THEN_ELSE:
10364 {
10365 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10366 {
10367 *cost = COSTS_N_INSNS (4);
10368 return true;
10369 }
10370 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10371 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10372
10373 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10374 /* Assume that if one arm of the if_then_else is a register,
10375 that it will be tied with the result and eliminate the
10376 conditional insn. */
10377 if (REG_P (XEXP (x, 1)))
10378 *cost += op2cost;
10379 else if (REG_P (XEXP (x, 2)))
10380 *cost += op1cost;
10381 else
10382 {
10383 if (speed_p)
10384 {
10385 if (extra_cost->alu.non_exec_costs_exec)
10386 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10387 else
10388 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10389 }
10390 else
10391 *cost += op1cost + op2cost;
10392 }
10393 }
10394 return true;
10395
10396 case COMPARE:
10397 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10398 *cost = 0;
10399 else
10400 {
10401 machine_mode op0mode;
10402 /* We'll mostly assume that the cost of a compare is the cost of the
10403 LHS. However, there are some notable exceptions. */
10404
10405 /* Floating point compares are never done as side-effects. */
10406 op0mode = GET_MODE (XEXP (x, 0));
10407 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10408 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10409 {
10410 *cost = COSTS_N_INSNS (1);
10411 if (speed_p)
10412 *cost += extra_cost->fp[op0mode != SFmode].compare;
10413
10414 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10415 {
10416 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10417 return true;
10418 }
10419
10420 return false;
10421 }
10422 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10423 {
10424 *cost = LIBCALL_COST (2);
10425 return false;
10426 }
10427
10428 /* DImode compares normally take two insns. */
10429 if (op0mode == DImode)
10430 {
10431 *cost = COSTS_N_INSNS (2);
10432 if (speed_p)
10433 *cost += 2 * extra_cost->alu.arith;
10434 return false;
10435 }
10436
10437 if (op0mode == SImode)
10438 {
10439 rtx shift_op;
10440 rtx shift_reg;
10441
10442 if (XEXP (x, 1) == const0_rtx
10443 && !(REG_P (XEXP (x, 0))
10444 || (GET_CODE (XEXP (x, 0)) == SUBREG
10445 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10446 {
10447 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10448
10449 /* Multiply operations that set the flags are often
10450 significantly more expensive. */
10451 if (speed_p
10452 && GET_CODE (XEXP (x, 0)) == MULT
10453 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10454 *cost += extra_cost->mult[0].flag_setting;
10455
10456 if (speed_p
10457 && GET_CODE (XEXP (x, 0)) == PLUS
10458 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10459 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10460 0), 1), mode))
10461 *cost += extra_cost->mult[0].flag_setting;
10462 return true;
10463 }
10464
10465 shift_reg = NULL;
10466 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10467 if (shift_op != NULL)
10468 {
10469 *cost = COSTS_N_INSNS (1);
10470 if (shift_reg != NULL)
10471 {
10472 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10473 if (speed_p)
10474 *cost += extra_cost->alu.arith_shift_reg;
10475 }
10476 else if (speed_p)
10477 *cost += extra_cost->alu.arith_shift;
10478 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10479 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10480 return true;
10481 }
10482
10483 *cost = COSTS_N_INSNS (1);
10484 if (speed_p)
10485 *cost += extra_cost->alu.arith;
10486 if (CONST_INT_P (XEXP (x, 1))
10487 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10488 {
10489 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10490 return true;
10491 }
10492 return false;
10493 }
10494
10495 /* Vector mode? */
10496
10497 *cost = LIBCALL_COST (2);
10498 return false;
10499 }
10500 return true;
10501
10502 case EQ:
10503 case NE:
10504 case LT:
10505 case LE:
10506 case GT:
10507 case GE:
10508 case LTU:
10509 case LEU:
10510 case GEU:
10511 case GTU:
10512 case ORDERED:
10513 case UNORDERED:
10514 case UNEQ:
10515 case UNLE:
10516 case UNLT:
10517 case UNGE:
10518 case UNGT:
10519 case LTGT:
10520 if (outer_code == SET)
10521 {
10522 /* Is it a store-flag operation? */
10523 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10524 && XEXP (x, 1) == const0_rtx)
10525 {
10526 /* Thumb also needs an IT insn. */
10527 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10528 return true;
10529 }
10530 if (XEXP (x, 1) == const0_rtx)
10531 {
10532 switch (code)
10533 {
10534 case LT:
10535 /* LSR Rd, Rn, #31. */
10536 *cost = COSTS_N_INSNS (1);
10537 if (speed_p)
10538 *cost += extra_cost->alu.shift;
10539 break;
10540
10541 case EQ:
10542 /* RSBS T1, Rn, #0
10543 ADC Rd, Rn, T1. */
10544
10545 case NE:
10546 /* SUBS T1, Rn, #1
10547 SBC Rd, Rn, T1. */
10548 *cost = COSTS_N_INSNS (2);
10549 break;
10550
10551 case LE:
10552 /* RSBS T1, Rn, Rn, LSR #31
10553 ADC Rd, Rn, T1. */
10554 *cost = COSTS_N_INSNS (2);
10555 if (speed_p)
10556 *cost += extra_cost->alu.arith_shift;
10557 break;
10558
10559 case GT:
10560 /* RSB Rd, Rn, Rn, ASR #1
10561 LSR Rd, Rd, #31. */
10562 *cost = COSTS_N_INSNS (2);
10563 if (speed_p)
10564 *cost += (extra_cost->alu.arith_shift
10565 + extra_cost->alu.shift);
10566 break;
10567
10568 case GE:
10569 /* ASR Rd, Rn, #31
10570 ADD Rd, Rn, #1. */
10571 *cost = COSTS_N_INSNS (2);
10572 if (speed_p)
10573 *cost += extra_cost->alu.shift;
10574 break;
10575
10576 default:
10577 /* Remaining cases are either meaningless or would take
10578 three insns anyway. */
10579 *cost = COSTS_N_INSNS (3);
10580 break;
10581 }
10582 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10583 return true;
10584 }
10585 else
10586 {
10587 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10588 if (CONST_INT_P (XEXP (x, 1))
10589 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10590 {
10591 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10592 return true;
10593 }
10594
10595 return false;
10596 }
10597 }
10598 /* Not directly inside a set. If it involves the condition code
10599 register it must be the condition for a branch, cond_exec or
10600 I_T_E operation. Since the comparison is performed elsewhere
10601 this is just the control part which has no additional
10602 cost. */
10603 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10604 && XEXP (x, 1) == const0_rtx)
10605 {
10606 *cost = 0;
10607 return true;
10608 }
10609 return false;
10610
10611 case ABS:
10612 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10613 && (mode == SFmode || !TARGET_VFP_SINGLE))
10614 {
10615 *cost = COSTS_N_INSNS (1);
10616 if (speed_p)
10617 *cost += extra_cost->fp[mode != SFmode].neg;
10618
10619 return false;
10620 }
10621 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10622 {
10623 *cost = LIBCALL_COST (1);
10624 return false;
10625 }
10626
10627 if (mode == SImode)
10628 {
10629 *cost = COSTS_N_INSNS (1);
10630 if (speed_p)
10631 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10632 return false;
10633 }
10634 /* Vector mode? */
10635 *cost = LIBCALL_COST (1);
10636 return false;
10637
10638 case SIGN_EXTEND:
10639 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10640 && MEM_P (XEXP (x, 0)))
10641 {
10642 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10643
10644 if (mode == DImode)
10645 *cost += COSTS_N_INSNS (1);
10646
10647 if (!speed_p)
10648 return true;
10649
10650 if (GET_MODE (XEXP (x, 0)) == SImode)
10651 *cost += extra_cost->ldst.load;
10652 else
10653 *cost += extra_cost->ldst.load_sign_extend;
10654
10655 if (mode == DImode)
10656 *cost += extra_cost->alu.shift;
10657
10658 return true;
10659 }
10660
10661 /* Widening from less than 32-bits requires an extend operation. */
10662 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10663 {
10664 /* We have SXTB/SXTH. */
10665 *cost = COSTS_N_INSNS (1);
10666 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10667 if (speed_p)
10668 *cost += extra_cost->alu.extend;
10669 }
10670 else if (GET_MODE (XEXP (x, 0)) != SImode)
10671 {
10672 /* Needs two shifts. */
10673 *cost = COSTS_N_INSNS (2);
10674 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10675 if (speed_p)
10676 *cost += 2 * extra_cost->alu.shift;
10677 }
10678
10679 /* Widening beyond 32-bits requires one more insn. */
10680 if (mode == DImode)
10681 {
10682 *cost += COSTS_N_INSNS (1);
10683 if (speed_p)
10684 *cost += extra_cost->alu.shift;
10685 }
10686
10687 return true;
10688
10689 case ZERO_EXTEND:
10690 if ((arm_arch4
10691 || GET_MODE (XEXP (x, 0)) == SImode
10692 || GET_MODE (XEXP (x, 0)) == QImode)
10693 && MEM_P (XEXP (x, 0)))
10694 {
10695 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10696
10697 if (mode == DImode)
10698 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10699
10700 return true;
10701 }
10702
10703 /* Widening from less than 32-bits requires an extend operation. */
10704 if (GET_MODE (XEXP (x, 0)) == QImode)
10705 {
10706 /* UXTB can be a shorter instruction in Thumb2, but it might
10707 be slower than the AND Rd, Rn, #255 alternative. When
10708 optimizing for speed it should never be slower to use
10709 AND, and we don't really model 16-bit vs 32-bit insns
10710 here. */
10711 *cost = COSTS_N_INSNS (1);
10712 if (speed_p)
10713 *cost += extra_cost->alu.logical;
10714 }
10715 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10716 {
10717 /* We have UXTB/UXTH. */
10718 *cost = COSTS_N_INSNS (1);
10719 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10720 if (speed_p)
10721 *cost += extra_cost->alu.extend;
10722 }
10723 else if (GET_MODE (XEXP (x, 0)) != SImode)
10724 {
10725 /* Needs two shifts. It's marginally preferable to use
10726 shifts rather than two BIC instructions as the second
10727 shift may merge with a subsequent insn as a shifter
10728 op. */
10729 *cost = COSTS_N_INSNS (2);
10730 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10731 if (speed_p)
10732 *cost += 2 * extra_cost->alu.shift;
10733 }
10734 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10735 *cost = COSTS_N_INSNS (1);
10736
10737 /* Widening beyond 32-bits requires one more insn. */
10738 if (mode == DImode)
10739 {
10740 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10741 }
10742
10743 return true;
10744
10745 case CONST_INT:
10746 *cost = 0;
10747 /* CONST_INT has no mode, so we cannot tell for sure how many
10748 insns are really going to be needed. The best we can do is
10749 look at the value passed. If it fits in SImode, then assume
10750 that's the mode it will be used for. Otherwise assume it
10751 will be used in DImode. */
10752 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10753 mode = SImode;
10754 else
10755 mode = DImode;
10756
10757 /* Avoid blowing up in arm_gen_constant (). */
10758 if (!(outer_code == PLUS
10759 || outer_code == AND
10760 || outer_code == IOR
10761 || outer_code == XOR
10762 || outer_code == MINUS))
10763 outer_code = SET;
10764
10765 const_int_cost:
10766 if (mode == SImode)
10767 {
10768 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10769 INTVAL (x), NULL, NULL,
10770 0, 0));
10771 /* Extra costs? */
10772 }
10773 else
10774 {
10775 *cost += COSTS_N_INSNS (arm_gen_constant
10776 (outer_code, SImode, NULL,
10777 trunc_int_for_mode (INTVAL (x), SImode),
10778 NULL, NULL, 0, 0)
10779 + arm_gen_constant (outer_code, SImode, NULL,
10780 INTVAL (x) >> 32, NULL,
10781 NULL, 0, 0));
10782 /* Extra costs? */
10783 }
10784
10785 return true;
10786
10787 case CONST:
10788 case LABEL_REF:
10789 case SYMBOL_REF:
10790 if (speed_p)
10791 {
10792 if (arm_arch_thumb2 && !flag_pic)
10793 *cost = COSTS_N_INSNS (2);
10794 else
10795 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10796 }
10797 else
10798 *cost = COSTS_N_INSNS (2);
10799
10800 if (flag_pic)
10801 {
10802 *cost += COSTS_N_INSNS (1);
10803 if (speed_p)
10804 *cost += extra_cost->alu.arith;
10805 }
10806
10807 return true;
10808
10809 case CONST_FIXED:
10810 *cost = COSTS_N_INSNS (4);
10811 /* Fixme. */
10812 return true;
10813
10814 case CONST_DOUBLE:
10815 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10816 && (mode == SFmode || !TARGET_VFP_SINGLE))
10817 {
10818 if (vfp3_const_double_rtx (x))
10819 {
10820 *cost = COSTS_N_INSNS (1);
10821 if (speed_p)
10822 *cost += extra_cost->fp[mode == DFmode].fpconst;
10823 return true;
10824 }
10825
10826 if (speed_p)
10827 {
10828 *cost = COSTS_N_INSNS (1);
10829 if (mode == DFmode)
10830 *cost += extra_cost->ldst.loadd;
10831 else
10832 *cost += extra_cost->ldst.loadf;
10833 }
10834 else
10835 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10836
10837 return true;
10838 }
10839 *cost = COSTS_N_INSNS (4);
10840 return true;
10841
10842 case CONST_VECTOR:
10843 /* Fixme. */
10844 if (TARGET_NEON
10845 && TARGET_HARD_FLOAT
10846 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10847 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10848 *cost = COSTS_N_INSNS (1);
10849 else
10850 *cost = COSTS_N_INSNS (4);
10851 return true;
10852
10853 case HIGH:
10854 case LO_SUM:
10855 *cost = COSTS_N_INSNS (1);
10856 /* When optimizing for size, we prefer constant pool entries to
10857 MOVW/MOVT pairs, so bump the cost of these slightly. */
10858 if (!speed_p)
10859 *cost += 1;
10860 return true;
10861
10862 case CLZ:
10863 *cost = COSTS_N_INSNS (1);
10864 if (speed_p)
10865 *cost += extra_cost->alu.clz;
10866 return false;
10867
10868 case SMIN:
10869 if (XEXP (x, 1) == const0_rtx)
10870 {
10871 *cost = COSTS_N_INSNS (1);
10872 if (speed_p)
10873 *cost += extra_cost->alu.log_shift;
10874 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10875 return true;
10876 }
10877 /* Fall through. */
10878 case SMAX:
10879 case UMIN:
10880 case UMAX:
10881 *cost = COSTS_N_INSNS (2);
10882 return false;
10883
10884 case TRUNCATE:
10885 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10886 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10887 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10888 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10889 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10890 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10891 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10892 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10893 == ZERO_EXTEND))))
10894 {
10895 *cost = COSTS_N_INSNS (1);
10896 if (speed_p)
10897 *cost += extra_cost->mult[1].extend;
10898 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10899 speed_p)
10900 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10901 0, speed_p));
10902 return true;
10903 }
10904 *cost = LIBCALL_COST (1);
10905 return false;
10906
10907 case UNSPEC_VOLATILE:
10908 case UNSPEC:
10909 return arm_unspec_cost (x, outer_code, speed_p, cost);
10910
10911 case PC:
10912 /* Reading the PC is like reading any other register. Writing it
10913 is more expensive, but we take that into account elsewhere. */
10914 *cost = 0;
10915 return true;
10916
10917 case ZERO_EXTRACT:
10918 /* TODO: Simple zero_extract of bottom bits using AND. */
10919 /* Fall through. */
10920 case SIGN_EXTRACT:
10921 if (arm_arch6
10922 && mode == SImode
10923 && CONST_INT_P (XEXP (x, 1))
10924 && CONST_INT_P (XEXP (x, 2)))
10925 {
10926 *cost = COSTS_N_INSNS (1);
10927 if (speed_p)
10928 *cost += extra_cost->alu.bfx;
10929 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10930 return true;
10931 }
10932 /* Without UBFX/SBFX, need to resort to shift operations. */
10933 *cost = COSTS_N_INSNS (2);
10934 if (speed_p)
10935 *cost += 2 * extra_cost->alu.shift;
10936 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10937 return true;
10938
10939 case FLOAT_EXTEND:
10940 if (TARGET_HARD_FLOAT)
10941 {
10942 *cost = COSTS_N_INSNS (1);
10943 if (speed_p)
10944 *cost += extra_cost->fp[mode == DFmode].widen;
10945 if (!TARGET_FPU_ARMV8
10946 && GET_MODE (XEXP (x, 0)) == HFmode)
10947 {
10948 /* Pre v8, widening HF->DF is a two-step process, first
10949 widening to SFmode. */
10950 *cost += COSTS_N_INSNS (1);
10951 if (speed_p)
10952 *cost += extra_cost->fp[0].widen;
10953 }
10954 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10955 return true;
10956 }
10957
10958 *cost = LIBCALL_COST (1);
10959 return false;
10960
10961 case FLOAT_TRUNCATE:
10962 if (TARGET_HARD_FLOAT)
10963 {
10964 *cost = COSTS_N_INSNS (1);
10965 if (speed_p)
10966 *cost += extra_cost->fp[mode == DFmode].narrow;
10967 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10968 return true;
10969 /* Vector modes? */
10970 }
10971 *cost = LIBCALL_COST (1);
10972 return false;
10973
10974 case FMA:
10975 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10976 {
10977 rtx op0 = XEXP (x, 0);
10978 rtx op1 = XEXP (x, 1);
10979 rtx op2 = XEXP (x, 2);
10980
10981 *cost = COSTS_N_INSNS (1);
10982
10983 /* vfms or vfnma. */
10984 if (GET_CODE (op0) == NEG)
10985 op0 = XEXP (op0, 0);
10986
10987 /* vfnms or vfnma. */
10988 if (GET_CODE (op2) == NEG)
10989 op2 = XEXP (op2, 0);
10990
10991 *cost += rtx_cost (op0, FMA, 0, speed_p);
10992 *cost += rtx_cost (op1, FMA, 1, speed_p);
10993 *cost += rtx_cost (op2, FMA, 2, speed_p);
10994
10995 if (speed_p)
10996 *cost += extra_cost->fp[mode ==DFmode].fma;
10997
10998 return true;
10999 }
11000
11001 *cost = LIBCALL_COST (3);
11002 return false;
11003
11004 case FIX:
11005 case UNSIGNED_FIX:
11006 if (TARGET_HARD_FLOAT)
11007 {
11008 if (GET_MODE_CLASS (mode) == MODE_INT)
11009 {
11010 *cost = COSTS_N_INSNS (1);
11011 if (speed_p)
11012 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
11013 /* Strip of the 'cost' of rounding towards zero. */
11014 if (GET_CODE (XEXP (x, 0)) == FIX)
11015 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
11016 else
11017 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11018 /* ??? Increase the cost to deal with transferring from
11019 FP -> CORE registers? */
11020 return true;
11021 }
11022 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11023 && TARGET_FPU_ARMV8)
11024 {
11025 *cost = COSTS_N_INSNS (1);
11026 if (speed_p)
11027 *cost += extra_cost->fp[mode == DFmode].roundint;
11028 return false;
11029 }
11030 /* Vector costs? */
11031 }
11032 *cost = LIBCALL_COST (1);
11033 return false;
11034
11035 case FLOAT:
11036 case UNSIGNED_FLOAT:
11037 if (TARGET_HARD_FLOAT)
11038 {
11039 /* ??? Increase the cost to deal with transferring from CORE
11040 -> FP registers? */
11041 *cost = COSTS_N_INSNS (1);
11042 if (speed_p)
11043 *cost += extra_cost->fp[mode == DFmode].fromint;
11044 return false;
11045 }
11046 *cost = LIBCALL_COST (1);
11047 return false;
11048
11049 case CALL:
11050 *cost = COSTS_N_INSNS (1);
11051 return true;
11052
11053 case ASM_OPERANDS:
11054 {
11055 /* Just a guess. Guess number of instructions in the asm
11056 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11057 though (see PR60663). */
11058 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11059 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11060
11061 *cost = COSTS_N_INSNS (asm_length + num_operands);
11062 return true;
11063 }
11064 default:
11065 if (mode != VOIDmode)
11066 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11067 else
11068 *cost = COSTS_N_INSNS (4); /* Who knows? */
11069 return false;
11070 }
11071 }
11072
11073 #undef HANDLE_NARROW_SHIFT_ARITH
11074
11075 /* RTX costs when optimizing for size. */
11076 static bool
11077 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11078 int *total, bool speed)
11079 {
11080 bool result;
11081
11082 if (TARGET_OLD_RTX_COSTS
11083 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11084 {
11085 /* Old way. (Deprecated.) */
11086 if (!speed)
11087 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11088 (enum rtx_code) outer_code, total);
11089 else
11090 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11091 (enum rtx_code) outer_code, total,
11092 speed);
11093 }
11094 else
11095 {
11096 /* New way. */
11097 if (current_tune->insn_extra_cost)
11098 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11099 (enum rtx_code) outer_code,
11100 current_tune->insn_extra_cost,
11101 total, speed);
11102 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11103 && current_tune->insn_extra_cost != NULL */
11104 else
11105 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11106 (enum rtx_code) outer_code,
11107 &generic_extra_costs, total, speed);
11108 }
11109
11110 if (dump_file && (dump_flags & TDF_DETAILS))
11111 {
11112 print_rtl_single (dump_file, x);
11113 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11114 *total, result ? "final" : "partial");
11115 }
11116 return result;
11117 }
11118
11119 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11120 supported on any "slowmul" cores, so it can be ignored. */
11121
11122 static bool
11123 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11124 int *total, bool speed)
11125 {
11126 machine_mode mode = GET_MODE (x);
11127
11128 if (TARGET_THUMB)
11129 {
11130 *total = thumb1_rtx_costs (x, code, outer_code);
11131 return true;
11132 }
11133
11134 switch (code)
11135 {
11136 case MULT:
11137 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11138 || mode == DImode)
11139 {
11140 *total = COSTS_N_INSNS (20);
11141 return false;
11142 }
11143
11144 if (CONST_INT_P (XEXP (x, 1)))
11145 {
11146 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11147 & (unsigned HOST_WIDE_INT) 0xffffffff);
11148 int cost, const_ok = const_ok_for_arm (i);
11149 int j, booth_unit_size;
11150
11151 /* Tune as appropriate. */
11152 cost = const_ok ? 4 : 8;
11153 booth_unit_size = 2;
11154 for (j = 0; i && j < 32; j += booth_unit_size)
11155 {
11156 i >>= booth_unit_size;
11157 cost++;
11158 }
11159
11160 *total = COSTS_N_INSNS (cost);
11161 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11162 return true;
11163 }
11164
11165 *total = COSTS_N_INSNS (20);
11166 return false;
11167
11168 default:
11169 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11170 }
11171 }
11172
11173
11174 /* RTX cost for cores with a fast multiply unit (M variants). */
11175
11176 static bool
11177 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11178 int *total, bool speed)
11179 {
11180 machine_mode mode = GET_MODE (x);
11181
11182 if (TARGET_THUMB1)
11183 {
11184 *total = thumb1_rtx_costs (x, code, outer_code);
11185 return true;
11186 }
11187
11188 /* ??? should thumb2 use different costs? */
11189 switch (code)
11190 {
11191 case MULT:
11192 /* There is no point basing this on the tuning, since it is always the
11193 fast variant if it exists at all. */
11194 if (mode == DImode
11195 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11196 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11197 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11198 {
11199 *total = COSTS_N_INSNS(2);
11200 return false;
11201 }
11202
11203
11204 if (mode == DImode)
11205 {
11206 *total = COSTS_N_INSNS (5);
11207 return false;
11208 }
11209
11210 if (CONST_INT_P (XEXP (x, 1)))
11211 {
11212 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11213 & (unsigned HOST_WIDE_INT) 0xffffffff);
11214 int cost, const_ok = const_ok_for_arm (i);
11215 int j, booth_unit_size;
11216
11217 /* Tune as appropriate. */
11218 cost = const_ok ? 4 : 8;
11219 booth_unit_size = 8;
11220 for (j = 0; i && j < 32; j += booth_unit_size)
11221 {
11222 i >>= booth_unit_size;
11223 cost++;
11224 }
11225
11226 *total = COSTS_N_INSNS(cost);
11227 return false;
11228 }
11229
11230 if (mode == SImode)
11231 {
11232 *total = COSTS_N_INSNS (4);
11233 return false;
11234 }
11235
11236 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11237 {
11238 if (TARGET_HARD_FLOAT
11239 && (mode == SFmode
11240 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11241 {
11242 *total = COSTS_N_INSNS (1);
11243 return false;
11244 }
11245 }
11246
11247 /* Requires a lib call */
11248 *total = COSTS_N_INSNS (20);
11249 return false;
11250
11251 default:
11252 return arm_rtx_costs_1 (x, outer_code, total, speed);
11253 }
11254 }
11255
11256
11257 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11258 so it can be ignored. */
11259
11260 static bool
11261 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11262 int *total, bool speed)
11263 {
11264 machine_mode mode = GET_MODE (x);
11265
11266 if (TARGET_THUMB)
11267 {
11268 *total = thumb1_rtx_costs (x, code, outer_code);
11269 return true;
11270 }
11271
11272 switch (code)
11273 {
11274 case COMPARE:
11275 if (GET_CODE (XEXP (x, 0)) != MULT)
11276 return arm_rtx_costs_1 (x, outer_code, total, speed);
11277
11278 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11279 will stall until the multiplication is complete. */
11280 *total = COSTS_N_INSNS (3);
11281 return false;
11282
11283 case MULT:
11284 /* There is no point basing this on the tuning, since it is always the
11285 fast variant if it exists at all. */
11286 if (mode == DImode
11287 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11288 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11289 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11290 {
11291 *total = COSTS_N_INSNS (2);
11292 return false;
11293 }
11294
11295
11296 if (mode == DImode)
11297 {
11298 *total = COSTS_N_INSNS (5);
11299 return false;
11300 }
11301
11302 if (CONST_INT_P (XEXP (x, 1)))
11303 {
11304 /* If operand 1 is a constant we can more accurately
11305 calculate the cost of the multiply. The multiplier can
11306 retire 15 bits on the first cycle and a further 12 on the
11307 second. We do, of course, have to load the constant into
11308 a register first. */
11309 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11310 /* There's a general overhead of one cycle. */
11311 int cost = 1;
11312 unsigned HOST_WIDE_INT masked_const;
11313
11314 if (i & 0x80000000)
11315 i = ~i;
11316
11317 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11318
11319 masked_const = i & 0xffff8000;
11320 if (masked_const != 0)
11321 {
11322 cost++;
11323 masked_const = i & 0xf8000000;
11324 if (masked_const != 0)
11325 cost++;
11326 }
11327 *total = COSTS_N_INSNS (cost);
11328 return false;
11329 }
11330
11331 if (mode == SImode)
11332 {
11333 *total = COSTS_N_INSNS (3);
11334 return false;
11335 }
11336
11337 /* Requires a lib call */
11338 *total = COSTS_N_INSNS (20);
11339 return false;
11340
11341 default:
11342 return arm_rtx_costs_1 (x, outer_code, total, speed);
11343 }
11344 }
11345
11346
11347 /* RTX costs for 9e (and later) cores. */
11348
11349 static bool
11350 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11351 int *total, bool speed)
11352 {
11353 machine_mode mode = GET_MODE (x);
11354
11355 if (TARGET_THUMB1)
11356 {
11357 switch (code)
11358 {
11359 case MULT:
11360 /* Small multiply: 32 cycles for an integer multiply inst. */
11361 if (arm_arch6m && arm_m_profile_small_mul)
11362 *total = COSTS_N_INSNS (32);
11363 else
11364 *total = COSTS_N_INSNS (3);
11365 return true;
11366
11367 default:
11368 *total = thumb1_rtx_costs (x, code, outer_code);
11369 return true;
11370 }
11371 }
11372
11373 switch (code)
11374 {
11375 case MULT:
11376 /* There is no point basing this on the tuning, since it is always the
11377 fast variant if it exists at all. */
11378 if (mode == DImode
11379 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11380 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11381 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11382 {
11383 *total = COSTS_N_INSNS (2);
11384 return false;
11385 }
11386
11387
11388 if (mode == DImode)
11389 {
11390 *total = COSTS_N_INSNS (5);
11391 return false;
11392 }
11393
11394 if (mode == SImode)
11395 {
11396 *total = COSTS_N_INSNS (2);
11397 return false;
11398 }
11399
11400 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11401 {
11402 if (TARGET_HARD_FLOAT
11403 && (mode == SFmode
11404 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11405 {
11406 *total = COSTS_N_INSNS (1);
11407 return false;
11408 }
11409 }
11410
11411 *total = COSTS_N_INSNS (20);
11412 return false;
11413
11414 default:
11415 return arm_rtx_costs_1 (x, outer_code, total, speed);
11416 }
11417 }
11418 /* All address computations that can be done are free, but rtx cost returns
11419 the same for practically all of them. So we weight the different types
11420 of address here in the order (most pref first):
11421 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11422 static inline int
11423 arm_arm_address_cost (rtx x)
11424 {
11425 enum rtx_code c = GET_CODE (x);
11426
11427 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11428 return 0;
11429 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11430 return 10;
11431
11432 if (c == PLUS)
11433 {
11434 if (CONST_INT_P (XEXP (x, 1)))
11435 return 2;
11436
11437 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11438 return 3;
11439
11440 return 4;
11441 }
11442
11443 return 6;
11444 }
11445
11446 static inline int
11447 arm_thumb_address_cost (rtx x)
11448 {
11449 enum rtx_code c = GET_CODE (x);
11450
11451 if (c == REG)
11452 return 1;
11453 if (c == PLUS
11454 && REG_P (XEXP (x, 0))
11455 && CONST_INT_P (XEXP (x, 1)))
11456 return 1;
11457
11458 return 2;
11459 }
11460
11461 static int
11462 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11463 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11464 {
11465 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11466 }
11467
11468 /* Adjust cost hook for XScale. */
11469 static bool
11470 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11471 {
11472 /* Some true dependencies can have a higher cost depending
11473 on precisely how certain input operands are used. */
11474 if (REG_NOTE_KIND(link) == 0
11475 && recog_memoized (insn) >= 0
11476 && recog_memoized (dep) >= 0)
11477 {
11478 int shift_opnum = get_attr_shift (insn);
11479 enum attr_type attr_type = get_attr_type (dep);
11480
11481 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11482 operand for INSN. If we have a shifted input operand and the
11483 instruction we depend on is another ALU instruction, then we may
11484 have to account for an additional stall. */
11485 if (shift_opnum != 0
11486 && (attr_type == TYPE_ALU_SHIFT_IMM
11487 || attr_type == TYPE_ALUS_SHIFT_IMM
11488 || attr_type == TYPE_LOGIC_SHIFT_IMM
11489 || attr_type == TYPE_LOGICS_SHIFT_IMM
11490 || attr_type == TYPE_ALU_SHIFT_REG
11491 || attr_type == TYPE_ALUS_SHIFT_REG
11492 || attr_type == TYPE_LOGIC_SHIFT_REG
11493 || attr_type == TYPE_LOGICS_SHIFT_REG
11494 || attr_type == TYPE_MOV_SHIFT
11495 || attr_type == TYPE_MVN_SHIFT
11496 || attr_type == TYPE_MOV_SHIFT_REG
11497 || attr_type == TYPE_MVN_SHIFT_REG))
11498 {
11499 rtx shifted_operand;
11500 int opno;
11501
11502 /* Get the shifted operand. */
11503 extract_insn (insn);
11504 shifted_operand = recog_data.operand[shift_opnum];
11505
11506 /* Iterate over all the operands in DEP. If we write an operand
11507 that overlaps with SHIFTED_OPERAND, then we have increase the
11508 cost of this dependency. */
11509 extract_insn (dep);
11510 preprocess_constraints (dep);
11511 for (opno = 0; opno < recog_data.n_operands; opno++)
11512 {
11513 /* We can ignore strict inputs. */
11514 if (recog_data.operand_type[opno] == OP_IN)
11515 continue;
11516
11517 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11518 shifted_operand))
11519 {
11520 *cost = 2;
11521 return false;
11522 }
11523 }
11524 }
11525 }
11526 return true;
11527 }
11528
11529 /* Adjust cost hook for Cortex A9. */
11530 static bool
11531 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11532 {
11533 switch (REG_NOTE_KIND (link))
11534 {
11535 case REG_DEP_ANTI:
11536 *cost = 0;
11537 return false;
11538
11539 case REG_DEP_TRUE:
11540 case REG_DEP_OUTPUT:
11541 if (recog_memoized (insn) >= 0
11542 && recog_memoized (dep) >= 0)
11543 {
11544 if (GET_CODE (PATTERN (insn)) == SET)
11545 {
11546 if (GET_MODE_CLASS
11547 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11548 || GET_MODE_CLASS
11549 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11550 {
11551 enum attr_type attr_type_insn = get_attr_type (insn);
11552 enum attr_type attr_type_dep = get_attr_type (dep);
11553
11554 /* By default all dependencies of the form
11555 s0 = s0 <op> s1
11556 s0 = s0 <op> s2
11557 have an extra latency of 1 cycle because
11558 of the input and output dependency in this
11559 case. However this gets modeled as an true
11560 dependency and hence all these checks. */
11561 if (REG_P (SET_DEST (PATTERN (insn)))
11562 && REG_P (SET_DEST (PATTERN (dep)))
11563 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11564 SET_DEST (PATTERN (dep))))
11565 {
11566 /* FMACS is a special case where the dependent
11567 instruction can be issued 3 cycles before
11568 the normal latency in case of an output
11569 dependency. */
11570 if ((attr_type_insn == TYPE_FMACS
11571 || attr_type_insn == TYPE_FMACD)
11572 && (attr_type_dep == TYPE_FMACS
11573 || attr_type_dep == TYPE_FMACD))
11574 {
11575 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11576 *cost = insn_default_latency (dep) - 3;
11577 else
11578 *cost = insn_default_latency (dep);
11579 return false;
11580 }
11581 else
11582 {
11583 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11584 *cost = insn_default_latency (dep) + 1;
11585 else
11586 *cost = insn_default_latency (dep);
11587 }
11588 return false;
11589 }
11590 }
11591 }
11592 }
11593 break;
11594
11595 default:
11596 gcc_unreachable ();
11597 }
11598
11599 return true;
11600 }
11601
11602 /* Adjust cost hook for FA726TE. */
11603 static bool
11604 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11605 {
11606 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11607 have penalty of 3. */
11608 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11609 && recog_memoized (insn) >= 0
11610 && recog_memoized (dep) >= 0
11611 && get_attr_conds (dep) == CONDS_SET)
11612 {
11613 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11614 if (get_attr_conds (insn) == CONDS_USE
11615 && get_attr_type (insn) != TYPE_BRANCH)
11616 {
11617 *cost = 3;
11618 return false;
11619 }
11620
11621 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11622 || get_attr_conds (insn) == CONDS_USE)
11623 {
11624 *cost = 0;
11625 return false;
11626 }
11627 }
11628
11629 return true;
11630 }
11631
11632 /* Implement TARGET_REGISTER_MOVE_COST.
11633
11634 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11635 it is typically more expensive than a single memory access. We set
11636 the cost to less than two memory accesses so that floating
11637 point to integer conversion does not go through memory. */
11638
11639 int
11640 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11641 reg_class_t from, reg_class_t to)
11642 {
11643 if (TARGET_32BIT)
11644 {
11645 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11646 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11647 return 15;
11648 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11649 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11650 return 4;
11651 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11652 return 20;
11653 else
11654 return 2;
11655 }
11656 else
11657 {
11658 if (from == HI_REGS || to == HI_REGS)
11659 return 4;
11660 else
11661 return 2;
11662 }
11663 }
11664
11665 /* Implement TARGET_MEMORY_MOVE_COST. */
11666
11667 int
11668 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11669 bool in ATTRIBUTE_UNUSED)
11670 {
11671 if (TARGET_32BIT)
11672 return 10;
11673 else
11674 {
11675 if (GET_MODE_SIZE (mode) < 4)
11676 return 8;
11677 else
11678 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11679 }
11680 }
11681
11682 /* Vectorizer cost model implementation. */
11683
11684 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11685 static int
11686 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11687 tree vectype,
11688 int misalign ATTRIBUTE_UNUSED)
11689 {
11690 unsigned elements;
11691
11692 switch (type_of_cost)
11693 {
11694 case scalar_stmt:
11695 return current_tune->vec_costs->scalar_stmt_cost;
11696
11697 case scalar_load:
11698 return current_tune->vec_costs->scalar_load_cost;
11699
11700 case scalar_store:
11701 return current_tune->vec_costs->scalar_store_cost;
11702
11703 case vector_stmt:
11704 return current_tune->vec_costs->vec_stmt_cost;
11705
11706 case vector_load:
11707 return current_tune->vec_costs->vec_align_load_cost;
11708
11709 case vector_store:
11710 return current_tune->vec_costs->vec_store_cost;
11711
11712 case vec_to_scalar:
11713 return current_tune->vec_costs->vec_to_scalar_cost;
11714
11715 case scalar_to_vec:
11716 return current_tune->vec_costs->scalar_to_vec_cost;
11717
11718 case unaligned_load:
11719 return current_tune->vec_costs->vec_unalign_load_cost;
11720
11721 case unaligned_store:
11722 return current_tune->vec_costs->vec_unalign_store_cost;
11723
11724 case cond_branch_taken:
11725 return current_tune->vec_costs->cond_taken_branch_cost;
11726
11727 case cond_branch_not_taken:
11728 return current_tune->vec_costs->cond_not_taken_branch_cost;
11729
11730 case vec_perm:
11731 case vec_promote_demote:
11732 return current_tune->vec_costs->vec_stmt_cost;
11733
11734 case vec_construct:
11735 elements = TYPE_VECTOR_SUBPARTS (vectype);
11736 return elements / 2 + 1;
11737
11738 default:
11739 gcc_unreachable ();
11740 }
11741 }
11742
11743 /* Implement targetm.vectorize.add_stmt_cost. */
11744
11745 static unsigned
11746 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11747 struct _stmt_vec_info *stmt_info, int misalign,
11748 enum vect_cost_model_location where)
11749 {
11750 unsigned *cost = (unsigned *) data;
11751 unsigned retval = 0;
11752
11753 if (flag_vect_cost_model)
11754 {
11755 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11756 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11757
11758 /* Statements in an inner loop relative to the loop being
11759 vectorized are weighted more heavily. The value here is
11760 arbitrary and could potentially be improved with analysis. */
11761 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11762 count *= 50; /* FIXME. */
11763
11764 retval = (unsigned) (count * stmt_cost);
11765 cost[where] += retval;
11766 }
11767
11768 return retval;
11769 }
11770
11771 /* Return true if and only if this insn can dual-issue only as older. */
11772 static bool
11773 cortexa7_older_only (rtx_insn *insn)
11774 {
11775 if (recog_memoized (insn) < 0)
11776 return false;
11777
11778 switch (get_attr_type (insn))
11779 {
11780 case TYPE_ALU_DSP_REG:
11781 case TYPE_ALU_SREG:
11782 case TYPE_ALUS_SREG:
11783 case TYPE_LOGIC_REG:
11784 case TYPE_LOGICS_REG:
11785 case TYPE_ADC_REG:
11786 case TYPE_ADCS_REG:
11787 case TYPE_ADR:
11788 case TYPE_BFM:
11789 case TYPE_REV:
11790 case TYPE_MVN_REG:
11791 case TYPE_SHIFT_IMM:
11792 case TYPE_SHIFT_REG:
11793 case TYPE_LOAD_BYTE:
11794 case TYPE_LOAD1:
11795 case TYPE_STORE1:
11796 case TYPE_FFARITHS:
11797 case TYPE_FADDS:
11798 case TYPE_FFARITHD:
11799 case TYPE_FADDD:
11800 case TYPE_FMOV:
11801 case TYPE_F_CVT:
11802 case TYPE_FCMPS:
11803 case TYPE_FCMPD:
11804 case TYPE_FCONSTS:
11805 case TYPE_FCONSTD:
11806 case TYPE_FMULS:
11807 case TYPE_FMACS:
11808 case TYPE_FMULD:
11809 case TYPE_FMACD:
11810 case TYPE_FDIVS:
11811 case TYPE_FDIVD:
11812 case TYPE_F_MRC:
11813 case TYPE_F_MRRC:
11814 case TYPE_F_FLAG:
11815 case TYPE_F_LOADS:
11816 case TYPE_F_STORES:
11817 return true;
11818 default:
11819 return false;
11820 }
11821 }
11822
11823 /* Return true if and only if this insn can dual-issue as younger. */
11824 static bool
11825 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11826 {
11827 if (recog_memoized (insn) < 0)
11828 {
11829 if (verbose > 5)
11830 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11831 return false;
11832 }
11833
11834 switch (get_attr_type (insn))
11835 {
11836 case TYPE_ALU_IMM:
11837 case TYPE_ALUS_IMM:
11838 case TYPE_LOGIC_IMM:
11839 case TYPE_LOGICS_IMM:
11840 case TYPE_EXTEND:
11841 case TYPE_MVN_IMM:
11842 case TYPE_MOV_IMM:
11843 case TYPE_MOV_REG:
11844 case TYPE_MOV_SHIFT:
11845 case TYPE_MOV_SHIFT_REG:
11846 case TYPE_BRANCH:
11847 case TYPE_CALL:
11848 return true;
11849 default:
11850 return false;
11851 }
11852 }
11853
11854
11855 /* Look for an instruction that can dual issue only as an older
11856 instruction, and move it in front of any instructions that can
11857 dual-issue as younger, while preserving the relative order of all
11858 other instructions in the ready list. This is a hueuristic to help
11859 dual-issue in later cycles, by postponing issue of more flexible
11860 instructions. This heuristic may affect dual issue opportunities
11861 in the current cycle. */
11862 static void
11863 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11864 int *n_readyp, int clock)
11865 {
11866 int i;
11867 int first_older_only = -1, first_younger = -1;
11868
11869 if (verbose > 5)
11870 fprintf (file,
11871 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11872 clock,
11873 *n_readyp);
11874
11875 /* Traverse the ready list from the head (the instruction to issue
11876 first), and looking for the first instruction that can issue as
11877 younger and the first instruction that can dual-issue only as
11878 older. */
11879 for (i = *n_readyp - 1; i >= 0; i--)
11880 {
11881 rtx_insn *insn = ready[i];
11882 if (cortexa7_older_only (insn))
11883 {
11884 first_older_only = i;
11885 if (verbose > 5)
11886 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11887 break;
11888 }
11889 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11890 first_younger = i;
11891 }
11892
11893 /* Nothing to reorder because either no younger insn found or insn
11894 that can dual-issue only as older appears before any insn that
11895 can dual-issue as younger. */
11896 if (first_younger == -1)
11897 {
11898 if (verbose > 5)
11899 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11900 return;
11901 }
11902
11903 /* Nothing to reorder because no older-only insn in the ready list. */
11904 if (first_older_only == -1)
11905 {
11906 if (verbose > 5)
11907 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11908 return;
11909 }
11910
11911 /* Move first_older_only insn before first_younger. */
11912 if (verbose > 5)
11913 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11914 INSN_UID(ready [first_older_only]),
11915 INSN_UID(ready [first_younger]));
11916 rtx_insn *first_older_only_insn = ready [first_older_only];
11917 for (i = first_older_only; i < first_younger; i++)
11918 {
11919 ready[i] = ready[i+1];
11920 }
11921
11922 ready[i] = first_older_only_insn;
11923 return;
11924 }
11925
11926 /* Implement TARGET_SCHED_REORDER. */
11927 static int
11928 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11929 int clock)
11930 {
11931 switch (arm_tune)
11932 {
11933 case cortexa7:
11934 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11935 break;
11936 default:
11937 /* Do nothing for other cores. */
11938 break;
11939 }
11940
11941 return arm_issue_rate ();
11942 }
11943
11944 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11945 It corrects the value of COST based on the relationship between
11946 INSN and DEP through the dependence LINK. It returns the new
11947 value. There is a per-core adjust_cost hook to adjust scheduler costs
11948 and the per-core hook can choose to completely override the generic
11949 adjust_cost function. Only put bits of code into arm_adjust_cost that
11950 are common across all cores. */
11951 static int
11952 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11953 {
11954 rtx i_pat, d_pat;
11955
11956 /* When generating Thumb-1 code, we want to place flag-setting operations
11957 close to a conditional branch which depends on them, so that we can
11958 omit the comparison. */
11959 if (TARGET_THUMB1
11960 && REG_NOTE_KIND (link) == 0
11961 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11962 && recog_memoized (dep) >= 0
11963 && get_attr_conds (dep) == CONDS_SET)
11964 return 0;
11965
11966 if (current_tune->sched_adjust_cost != NULL)
11967 {
11968 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11969 return cost;
11970 }
11971
11972 /* XXX Is this strictly true? */
11973 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11974 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11975 return 0;
11976
11977 /* Call insns don't incur a stall, even if they follow a load. */
11978 if (REG_NOTE_KIND (link) == 0
11979 && CALL_P (insn))
11980 return 1;
11981
11982 if ((i_pat = single_set (insn)) != NULL
11983 && MEM_P (SET_SRC (i_pat))
11984 && (d_pat = single_set (dep)) != NULL
11985 && MEM_P (SET_DEST (d_pat)))
11986 {
11987 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11988 /* This is a load after a store, there is no conflict if the load reads
11989 from a cached area. Assume that loads from the stack, and from the
11990 constant pool are cached, and that others will miss. This is a
11991 hack. */
11992
11993 if ((GET_CODE (src_mem) == SYMBOL_REF
11994 && CONSTANT_POOL_ADDRESS_P (src_mem))
11995 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11996 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11997 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11998 return 1;
11999 }
12000
12001 return cost;
12002 }
12003
12004 int
12005 arm_max_conditional_execute (void)
12006 {
12007 return max_insns_skipped;
12008 }
12009
12010 static int
12011 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12012 {
12013 if (TARGET_32BIT)
12014 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12015 else
12016 return (optimize > 0) ? 2 : 0;
12017 }
12018
12019 static int
12020 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12021 {
12022 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12023 }
12024
12025 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12026 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12027 sequences of non-executed instructions in IT blocks probably take the same
12028 amount of time as executed instructions (and the IT instruction itself takes
12029 space in icache). This function was experimentally determined to give good
12030 results on a popular embedded benchmark. */
12031
12032 static int
12033 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12034 {
12035 return (TARGET_32BIT && speed_p) ? 1
12036 : arm_default_branch_cost (speed_p, predictable_p);
12037 }
12038
12039 static int
12040 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12041 {
12042 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12043 }
12044
12045 static bool fp_consts_inited = false;
12046
12047 static REAL_VALUE_TYPE value_fp0;
12048
12049 static void
12050 init_fp_table (void)
12051 {
12052 REAL_VALUE_TYPE r;
12053
12054 r = REAL_VALUE_ATOF ("0", DFmode);
12055 value_fp0 = r;
12056 fp_consts_inited = true;
12057 }
12058
12059 /* Return TRUE if rtx X is a valid immediate FP constant. */
12060 int
12061 arm_const_double_rtx (rtx x)
12062 {
12063 REAL_VALUE_TYPE r;
12064
12065 if (!fp_consts_inited)
12066 init_fp_table ();
12067
12068 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12069 if (REAL_VALUE_MINUS_ZERO (r))
12070 return 0;
12071
12072 if (REAL_VALUES_EQUAL (r, value_fp0))
12073 return 1;
12074
12075 return 0;
12076 }
12077
12078 /* VFPv3 has a fairly wide range of representable immediates, formed from
12079 "quarter-precision" floating-point values. These can be evaluated using this
12080 formula (with ^ for exponentiation):
12081
12082 -1^s * n * 2^-r
12083
12084 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12085 16 <= n <= 31 and 0 <= r <= 7.
12086
12087 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12088
12089 - A (most-significant) is the sign bit.
12090 - BCD are the exponent (encoded as r XOR 3).
12091 - EFGH are the mantissa (encoded as n - 16).
12092 */
12093
12094 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12095 fconst[sd] instruction, or -1 if X isn't suitable. */
12096 static int
12097 vfp3_const_double_index (rtx x)
12098 {
12099 REAL_VALUE_TYPE r, m;
12100 int sign, exponent;
12101 unsigned HOST_WIDE_INT mantissa, mant_hi;
12102 unsigned HOST_WIDE_INT mask;
12103 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12104 bool fail;
12105
12106 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12107 return -1;
12108
12109 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12110
12111 /* We can't represent these things, so detect them first. */
12112 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12113 return -1;
12114
12115 /* Extract sign, exponent and mantissa. */
12116 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12117 r = real_value_abs (&r);
12118 exponent = REAL_EXP (&r);
12119 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12120 highest (sign) bit, with a fixed binary point at bit point_pos.
12121 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12122 bits for the mantissa, this may fail (low bits would be lost). */
12123 real_ldexp (&m, &r, point_pos - exponent);
12124 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12125 mantissa = w.elt (0);
12126 mant_hi = w.elt (1);
12127
12128 /* If there are bits set in the low part of the mantissa, we can't
12129 represent this value. */
12130 if (mantissa != 0)
12131 return -1;
12132
12133 /* Now make it so that mantissa contains the most-significant bits, and move
12134 the point_pos to indicate that the least-significant bits have been
12135 discarded. */
12136 point_pos -= HOST_BITS_PER_WIDE_INT;
12137 mantissa = mant_hi;
12138
12139 /* We can permit four significant bits of mantissa only, plus a high bit
12140 which is always 1. */
12141 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12142 if ((mantissa & mask) != 0)
12143 return -1;
12144
12145 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12146 mantissa >>= point_pos - 5;
12147
12148 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12149 floating-point immediate zero with Neon using an integer-zero load, but
12150 that case is handled elsewhere.) */
12151 if (mantissa == 0)
12152 return -1;
12153
12154 gcc_assert (mantissa >= 16 && mantissa <= 31);
12155
12156 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12157 normalized significands are in the range [1, 2). (Our mantissa is shifted
12158 left 4 places at this point relative to normalized IEEE754 values). GCC
12159 internally uses [0.5, 1) (see real.c), so the exponent returned from
12160 REAL_EXP must be altered. */
12161 exponent = 5 - exponent;
12162
12163 if (exponent < 0 || exponent > 7)
12164 return -1;
12165
12166 /* Sign, mantissa and exponent are now in the correct form to plug into the
12167 formula described in the comment above. */
12168 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12169 }
12170
12171 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12172 int
12173 vfp3_const_double_rtx (rtx x)
12174 {
12175 if (!TARGET_VFP3)
12176 return 0;
12177
12178 return vfp3_const_double_index (x) != -1;
12179 }
12180
12181 /* Recognize immediates which can be used in various Neon instructions. Legal
12182 immediates are described by the following table (for VMVN variants, the
12183 bitwise inverse of the constant shown is recognized. In either case, VMOV
12184 is output and the correct instruction to use for a given constant is chosen
12185 by the assembler). The constant shown is replicated across all elements of
12186 the destination vector.
12187
12188 insn elems variant constant (binary)
12189 ---- ----- ------- -----------------
12190 vmov i32 0 00000000 00000000 00000000 abcdefgh
12191 vmov i32 1 00000000 00000000 abcdefgh 00000000
12192 vmov i32 2 00000000 abcdefgh 00000000 00000000
12193 vmov i32 3 abcdefgh 00000000 00000000 00000000
12194 vmov i16 4 00000000 abcdefgh
12195 vmov i16 5 abcdefgh 00000000
12196 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12197 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12198 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12199 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12200 vmvn i16 10 00000000 abcdefgh
12201 vmvn i16 11 abcdefgh 00000000
12202 vmov i32 12 00000000 00000000 abcdefgh 11111111
12203 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12204 vmov i32 14 00000000 abcdefgh 11111111 11111111
12205 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12206 vmov i8 16 abcdefgh
12207 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12208 eeeeeeee ffffffff gggggggg hhhhhhhh
12209 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12210 vmov f32 19 00000000 00000000 00000000 00000000
12211
12212 For case 18, B = !b. Representable values are exactly those accepted by
12213 vfp3_const_double_index, but are output as floating-point numbers rather
12214 than indices.
12215
12216 For case 19, we will change it to vmov.i32 when assembling.
12217
12218 Variants 0-5 (inclusive) may also be used as immediates for the second
12219 operand of VORR/VBIC instructions.
12220
12221 The INVERSE argument causes the bitwise inverse of the given operand to be
12222 recognized instead (used for recognizing legal immediates for the VAND/VORN
12223 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12224 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12225 output, rather than the real insns vbic/vorr).
12226
12227 INVERSE makes no difference to the recognition of float vectors.
12228
12229 The return value is the variant of immediate as shown in the above table, or
12230 -1 if the given value doesn't match any of the listed patterns.
12231 */
12232 static int
12233 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12234 rtx *modconst, int *elementwidth)
12235 {
12236 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12237 matches = 1; \
12238 for (i = 0; i < idx; i += (STRIDE)) \
12239 if (!(TEST)) \
12240 matches = 0; \
12241 if (matches) \
12242 { \
12243 immtype = (CLASS); \
12244 elsize = (ELSIZE); \
12245 break; \
12246 }
12247
12248 unsigned int i, elsize = 0, idx = 0, n_elts;
12249 unsigned int innersize;
12250 unsigned char bytes[16];
12251 int immtype = -1, matches;
12252 unsigned int invmask = inverse ? 0xff : 0;
12253 bool vector = GET_CODE (op) == CONST_VECTOR;
12254
12255 if (vector)
12256 {
12257 n_elts = CONST_VECTOR_NUNITS (op);
12258 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12259 }
12260 else
12261 {
12262 n_elts = 1;
12263 if (mode == VOIDmode)
12264 mode = DImode;
12265 innersize = GET_MODE_SIZE (mode);
12266 }
12267
12268 /* Vectors of float constants. */
12269 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12270 {
12271 rtx el0 = CONST_VECTOR_ELT (op, 0);
12272 REAL_VALUE_TYPE r0;
12273
12274 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12275 return -1;
12276
12277 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12278
12279 for (i = 1; i < n_elts; i++)
12280 {
12281 rtx elt = CONST_VECTOR_ELT (op, i);
12282 REAL_VALUE_TYPE re;
12283
12284 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12285
12286 if (!REAL_VALUES_EQUAL (r0, re))
12287 return -1;
12288 }
12289
12290 if (modconst)
12291 *modconst = CONST_VECTOR_ELT (op, 0);
12292
12293 if (elementwidth)
12294 *elementwidth = 0;
12295
12296 if (el0 == CONST0_RTX (GET_MODE (el0)))
12297 return 19;
12298 else
12299 return 18;
12300 }
12301
12302 /* Splat vector constant out into a byte vector. */
12303 for (i = 0; i < n_elts; i++)
12304 {
12305 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12306 unsigned HOST_WIDE_INT elpart;
12307 unsigned int part, parts;
12308
12309 if (CONST_INT_P (el))
12310 {
12311 elpart = INTVAL (el);
12312 parts = 1;
12313 }
12314 else if (CONST_DOUBLE_P (el))
12315 {
12316 elpart = CONST_DOUBLE_LOW (el);
12317 parts = 2;
12318 }
12319 else
12320 gcc_unreachable ();
12321
12322 for (part = 0; part < parts; part++)
12323 {
12324 unsigned int byte;
12325 for (byte = 0; byte < innersize; byte++)
12326 {
12327 bytes[idx++] = (elpart & 0xff) ^ invmask;
12328 elpart >>= BITS_PER_UNIT;
12329 }
12330 if (CONST_DOUBLE_P (el))
12331 elpart = CONST_DOUBLE_HIGH (el);
12332 }
12333 }
12334
12335 /* Sanity check. */
12336 gcc_assert (idx == GET_MODE_SIZE (mode));
12337
12338 do
12339 {
12340 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12341 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12342
12343 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12344 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12345
12346 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12347 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12348
12349 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12350 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12351
12352 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12353
12354 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12355
12356 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12357 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12358
12359 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12360 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12361
12362 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12363 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12364
12365 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12366 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12367
12368 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12369
12370 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12371
12372 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12373 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12374
12375 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12376 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12377
12378 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12379 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12380
12381 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12382 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12383
12384 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12385
12386 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12387 && bytes[i] == bytes[(i + 8) % idx]);
12388 }
12389 while (0);
12390
12391 if (immtype == -1)
12392 return -1;
12393
12394 if (elementwidth)
12395 *elementwidth = elsize;
12396
12397 if (modconst)
12398 {
12399 unsigned HOST_WIDE_INT imm = 0;
12400
12401 /* Un-invert bytes of recognized vector, if necessary. */
12402 if (invmask != 0)
12403 for (i = 0; i < idx; i++)
12404 bytes[i] ^= invmask;
12405
12406 if (immtype == 17)
12407 {
12408 /* FIXME: Broken on 32-bit H_W_I hosts. */
12409 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12410
12411 for (i = 0; i < 8; i++)
12412 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12413 << (i * BITS_PER_UNIT);
12414
12415 *modconst = GEN_INT (imm);
12416 }
12417 else
12418 {
12419 unsigned HOST_WIDE_INT imm = 0;
12420
12421 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12422 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12423
12424 *modconst = GEN_INT (imm);
12425 }
12426 }
12427
12428 return immtype;
12429 #undef CHECK
12430 }
12431
12432 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12433 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12434 float elements), and a modified constant (whatever should be output for a
12435 VMOV) in *MODCONST. */
12436
12437 int
12438 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12439 rtx *modconst, int *elementwidth)
12440 {
12441 rtx tmpconst;
12442 int tmpwidth;
12443 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12444
12445 if (retval == -1)
12446 return 0;
12447
12448 if (modconst)
12449 *modconst = tmpconst;
12450
12451 if (elementwidth)
12452 *elementwidth = tmpwidth;
12453
12454 return 1;
12455 }
12456
12457 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12458 the immediate is valid, write a constant suitable for using as an operand
12459 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12460 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12461
12462 int
12463 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12464 rtx *modconst, int *elementwidth)
12465 {
12466 rtx tmpconst;
12467 int tmpwidth;
12468 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12469
12470 if (retval < 0 || retval > 5)
12471 return 0;
12472
12473 if (modconst)
12474 *modconst = tmpconst;
12475
12476 if (elementwidth)
12477 *elementwidth = tmpwidth;
12478
12479 return 1;
12480 }
12481
12482 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12483 the immediate is valid, write a constant suitable for using as an operand
12484 to VSHR/VSHL to *MODCONST and the corresponding element width to
12485 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12486 because they have different limitations. */
12487
12488 int
12489 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12490 rtx *modconst, int *elementwidth,
12491 bool isleftshift)
12492 {
12493 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12494 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12495 unsigned HOST_WIDE_INT last_elt = 0;
12496 unsigned HOST_WIDE_INT maxshift;
12497
12498 /* Split vector constant out into a byte vector. */
12499 for (i = 0; i < n_elts; i++)
12500 {
12501 rtx el = CONST_VECTOR_ELT (op, i);
12502 unsigned HOST_WIDE_INT elpart;
12503
12504 if (CONST_INT_P (el))
12505 elpart = INTVAL (el);
12506 else if (CONST_DOUBLE_P (el))
12507 return 0;
12508 else
12509 gcc_unreachable ();
12510
12511 if (i != 0 && elpart != last_elt)
12512 return 0;
12513
12514 last_elt = elpart;
12515 }
12516
12517 /* Shift less than element size. */
12518 maxshift = innersize * 8;
12519
12520 if (isleftshift)
12521 {
12522 /* Left shift immediate value can be from 0 to <size>-1. */
12523 if (last_elt >= maxshift)
12524 return 0;
12525 }
12526 else
12527 {
12528 /* Right shift immediate value can be from 1 to <size>. */
12529 if (last_elt == 0 || last_elt > maxshift)
12530 return 0;
12531 }
12532
12533 if (elementwidth)
12534 *elementwidth = innersize * 8;
12535
12536 if (modconst)
12537 *modconst = CONST_VECTOR_ELT (op, 0);
12538
12539 return 1;
12540 }
12541
12542 /* Return a string suitable for output of Neon immediate logic operation
12543 MNEM. */
12544
12545 char *
12546 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12547 int inverse, int quad)
12548 {
12549 int width, is_valid;
12550 static char templ[40];
12551
12552 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12553
12554 gcc_assert (is_valid != 0);
12555
12556 if (quad)
12557 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12558 else
12559 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12560
12561 return templ;
12562 }
12563
12564 /* Return a string suitable for output of Neon immediate shift operation
12565 (VSHR or VSHL) MNEM. */
12566
12567 char *
12568 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12569 machine_mode mode, int quad,
12570 bool isleftshift)
12571 {
12572 int width, is_valid;
12573 static char templ[40];
12574
12575 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12576 gcc_assert (is_valid != 0);
12577
12578 if (quad)
12579 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12580 else
12581 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12582
12583 return templ;
12584 }
12585
12586 /* Output a sequence of pairwise operations to implement a reduction.
12587 NOTE: We do "too much work" here, because pairwise operations work on two
12588 registers-worth of operands in one go. Unfortunately we can't exploit those
12589 extra calculations to do the full operation in fewer steps, I don't think.
12590 Although all vector elements of the result but the first are ignored, we
12591 actually calculate the same result in each of the elements. An alternative
12592 such as initially loading a vector with zero to use as each of the second
12593 operands would use up an additional register and take an extra instruction,
12594 for no particular gain. */
12595
12596 void
12597 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12598 rtx (*reduc) (rtx, rtx, rtx))
12599 {
12600 machine_mode inner = GET_MODE_INNER (mode);
12601 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12602 rtx tmpsum = op1;
12603
12604 for (i = parts / 2; i >= 1; i /= 2)
12605 {
12606 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12607 emit_insn (reduc (dest, tmpsum, tmpsum));
12608 tmpsum = dest;
12609 }
12610 }
12611
12612 /* If VALS is a vector constant that can be loaded into a register
12613 using VDUP, generate instructions to do so and return an RTX to
12614 assign to the register. Otherwise return NULL_RTX. */
12615
12616 static rtx
12617 neon_vdup_constant (rtx vals)
12618 {
12619 machine_mode mode = GET_MODE (vals);
12620 machine_mode inner_mode = GET_MODE_INNER (mode);
12621 int n_elts = GET_MODE_NUNITS (mode);
12622 bool all_same = true;
12623 rtx x;
12624 int i;
12625
12626 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12627 return NULL_RTX;
12628
12629 for (i = 0; i < n_elts; ++i)
12630 {
12631 x = XVECEXP (vals, 0, i);
12632 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12633 all_same = false;
12634 }
12635
12636 if (!all_same)
12637 /* The elements are not all the same. We could handle repeating
12638 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12639 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12640 vdup.i16). */
12641 return NULL_RTX;
12642
12643 /* We can load this constant by using VDUP and a constant in a
12644 single ARM register. This will be cheaper than a vector
12645 load. */
12646
12647 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12648 return gen_rtx_VEC_DUPLICATE (mode, x);
12649 }
12650
12651 /* Generate code to load VALS, which is a PARALLEL containing only
12652 constants (for vec_init) or CONST_VECTOR, efficiently into a
12653 register. Returns an RTX to copy into the register, or NULL_RTX
12654 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12655
12656 rtx
12657 neon_make_constant (rtx vals)
12658 {
12659 machine_mode mode = GET_MODE (vals);
12660 rtx target;
12661 rtx const_vec = NULL_RTX;
12662 int n_elts = GET_MODE_NUNITS (mode);
12663 int n_const = 0;
12664 int i;
12665
12666 if (GET_CODE (vals) == CONST_VECTOR)
12667 const_vec = vals;
12668 else if (GET_CODE (vals) == PARALLEL)
12669 {
12670 /* A CONST_VECTOR must contain only CONST_INTs and
12671 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12672 Only store valid constants in a CONST_VECTOR. */
12673 for (i = 0; i < n_elts; ++i)
12674 {
12675 rtx x = XVECEXP (vals, 0, i);
12676 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12677 n_const++;
12678 }
12679 if (n_const == n_elts)
12680 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12681 }
12682 else
12683 gcc_unreachable ();
12684
12685 if (const_vec != NULL
12686 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12687 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12688 return const_vec;
12689 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12690 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12691 pipeline cycle; creating the constant takes one or two ARM
12692 pipeline cycles. */
12693 return target;
12694 else if (const_vec != NULL_RTX)
12695 /* Load from constant pool. On Cortex-A8 this takes two cycles
12696 (for either double or quad vectors). We can not take advantage
12697 of single-cycle VLD1 because we need a PC-relative addressing
12698 mode. */
12699 return const_vec;
12700 else
12701 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12702 We can not construct an initializer. */
12703 return NULL_RTX;
12704 }
12705
12706 /* Initialize vector TARGET to VALS. */
12707
12708 void
12709 neon_expand_vector_init (rtx target, rtx vals)
12710 {
12711 machine_mode mode = GET_MODE (target);
12712 machine_mode inner_mode = GET_MODE_INNER (mode);
12713 int n_elts = GET_MODE_NUNITS (mode);
12714 int n_var = 0, one_var = -1;
12715 bool all_same = true;
12716 rtx x, mem;
12717 int i;
12718
12719 for (i = 0; i < n_elts; ++i)
12720 {
12721 x = XVECEXP (vals, 0, i);
12722 if (!CONSTANT_P (x))
12723 ++n_var, one_var = i;
12724
12725 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12726 all_same = false;
12727 }
12728
12729 if (n_var == 0)
12730 {
12731 rtx constant = neon_make_constant (vals);
12732 if (constant != NULL_RTX)
12733 {
12734 emit_move_insn (target, constant);
12735 return;
12736 }
12737 }
12738
12739 /* Splat a single non-constant element if we can. */
12740 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12741 {
12742 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12743 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12744 return;
12745 }
12746
12747 /* One field is non-constant. Load constant then overwrite varying
12748 field. This is more efficient than using the stack. */
12749 if (n_var == 1)
12750 {
12751 rtx copy = copy_rtx (vals);
12752 rtx index = GEN_INT (one_var);
12753
12754 /* Load constant part of vector, substitute neighboring value for
12755 varying element. */
12756 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12757 neon_expand_vector_init (target, copy);
12758
12759 /* Insert variable. */
12760 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12761 switch (mode)
12762 {
12763 case V8QImode:
12764 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12765 break;
12766 case V16QImode:
12767 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12768 break;
12769 case V4HImode:
12770 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12771 break;
12772 case V8HImode:
12773 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12774 break;
12775 case V2SImode:
12776 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12777 break;
12778 case V4SImode:
12779 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12780 break;
12781 case V2SFmode:
12782 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12783 break;
12784 case V4SFmode:
12785 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12786 break;
12787 case V2DImode:
12788 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12789 break;
12790 default:
12791 gcc_unreachable ();
12792 }
12793 return;
12794 }
12795
12796 /* Construct the vector in memory one field at a time
12797 and load the whole vector. */
12798 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12799 for (i = 0; i < n_elts; i++)
12800 emit_move_insn (adjust_address_nv (mem, inner_mode,
12801 i * GET_MODE_SIZE (inner_mode)),
12802 XVECEXP (vals, 0, i));
12803 emit_move_insn (target, mem);
12804 }
12805
12806 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12807 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12808 reported source locations are bogus. */
12809
12810 static void
12811 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12812 const char *err)
12813 {
12814 HOST_WIDE_INT lane;
12815
12816 gcc_assert (CONST_INT_P (operand));
12817
12818 lane = INTVAL (operand);
12819
12820 if (lane < low || lane >= high)
12821 error (err);
12822 }
12823
12824 /* Bounds-check lanes. */
12825
12826 void
12827 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12828 {
12829 bounds_check (operand, low, high, "lane out of range");
12830 }
12831
12832 /* Bounds-check constants. */
12833
12834 void
12835 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12836 {
12837 bounds_check (operand, low, high, "constant out of range");
12838 }
12839
12840 HOST_WIDE_INT
12841 neon_element_bits (machine_mode mode)
12842 {
12843 if (mode == DImode)
12844 return GET_MODE_BITSIZE (mode);
12845 else
12846 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12847 }
12848
12849 \f
12850 /* Predicates for `match_operand' and `match_operator'. */
12851
12852 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12853 WB is true if full writeback address modes are allowed and is false
12854 if limited writeback address modes (POST_INC and PRE_DEC) are
12855 allowed. */
12856
12857 int
12858 arm_coproc_mem_operand (rtx op, bool wb)
12859 {
12860 rtx ind;
12861
12862 /* Reject eliminable registers. */
12863 if (! (reload_in_progress || reload_completed || lra_in_progress)
12864 && ( reg_mentioned_p (frame_pointer_rtx, op)
12865 || reg_mentioned_p (arg_pointer_rtx, op)
12866 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12867 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12868 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12869 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12870 return FALSE;
12871
12872 /* Constants are converted into offsets from labels. */
12873 if (!MEM_P (op))
12874 return FALSE;
12875
12876 ind = XEXP (op, 0);
12877
12878 if (reload_completed
12879 && (GET_CODE (ind) == LABEL_REF
12880 || (GET_CODE (ind) == CONST
12881 && GET_CODE (XEXP (ind, 0)) == PLUS
12882 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12883 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12884 return TRUE;
12885
12886 /* Match: (mem (reg)). */
12887 if (REG_P (ind))
12888 return arm_address_register_rtx_p (ind, 0);
12889
12890 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12891 acceptable in any case (subject to verification by
12892 arm_address_register_rtx_p). We need WB to be true to accept
12893 PRE_INC and POST_DEC. */
12894 if (GET_CODE (ind) == POST_INC
12895 || GET_CODE (ind) == PRE_DEC
12896 || (wb
12897 && (GET_CODE (ind) == PRE_INC
12898 || GET_CODE (ind) == POST_DEC)))
12899 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12900
12901 if (wb
12902 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12903 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12904 && GET_CODE (XEXP (ind, 1)) == PLUS
12905 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12906 ind = XEXP (ind, 1);
12907
12908 /* Match:
12909 (plus (reg)
12910 (const)). */
12911 if (GET_CODE (ind) == PLUS
12912 && REG_P (XEXP (ind, 0))
12913 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12914 && CONST_INT_P (XEXP (ind, 1))
12915 && INTVAL (XEXP (ind, 1)) > -1024
12916 && INTVAL (XEXP (ind, 1)) < 1024
12917 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12918 return TRUE;
12919
12920 return FALSE;
12921 }
12922
12923 /* Return TRUE if OP is a memory operand which we can load or store a vector
12924 to/from. TYPE is one of the following values:
12925 0 - Vector load/stor (vldr)
12926 1 - Core registers (ldm)
12927 2 - Element/structure loads (vld1)
12928 */
12929 int
12930 neon_vector_mem_operand (rtx op, int type, bool strict)
12931 {
12932 rtx ind;
12933
12934 /* Reject eliminable registers. */
12935 if (! (reload_in_progress || reload_completed)
12936 && ( reg_mentioned_p (frame_pointer_rtx, op)
12937 || reg_mentioned_p (arg_pointer_rtx, op)
12938 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12939 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12940 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12941 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12942 return !strict;
12943
12944 /* Constants are converted into offsets from labels. */
12945 if (!MEM_P (op))
12946 return FALSE;
12947
12948 ind = XEXP (op, 0);
12949
12950 if (reload_completed
12951 && (GET_CODE (ind) == LABEL_REF
12952 || (GET_CODE (ind) == CONST
12953 && GET_CODE (XEXP (ind, 0)) == PLUS
12954 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12955 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12956 return TRUE;
12957
12958 /* Match: (mem (reg)). */
12959 if (REG_P (ind))
12960 return arm_address_register_rtx_p (ind, 0);
12961
12962 /* Allow post-increment with Neon registers. */
12963 if ((type != 1 && GET_CODE (ind) == POST_INC)
12964 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12965 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12966
12967 /* Allow post-increment by register for VLDn */
12968 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12969 && GET_CODE (XEXP (ind, 1)) == PLUS
12970 && REG_P (XEXP (XEXP (ind, 1), 1)))
12971 return true;
12972
12973 /* Match:
12974 (plus (reg)
12975 (const)). */
12976 if (type == 0
12977 && GET_CODE (ind) == PLUS
12978 && REG_P (XEXP (ind, 0))
12979 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12980 && CONST_INT_P (XEXP (ind, 1))
12981 && INTVAL (XEXP (ind, 1)) > -1024
12982 /* For quad modes, we restrict the constant offset to be slightly less
12983 than what the instruction format permits. We have no such constraint
12984 on double mode offsets. (This must match arm_legitimate_index_p.) */
12985 && (INTVAL (XEXP (ind, 1))
12986 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12987 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12988 return TRUE;
12989
12990 return FALSE;
12991 }
12992
12993 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12994 type. */
12995 int
12996 neon_struct_mem_operand (rtx op)
12997 {
12998 rtx ind;
12999
13000 /* Reject eliminable registers. */
13001 if (! (reload_in_progress || reload_completed)
13002 && ( reg_mentioned_p (frame_pointer_rtx, op)
13003 || reg_mentioned_p (arg_pointer_rtx, op)
13004 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13005 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13006 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13007 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13008 return FALSE;
13009
13010 /* Constants are converted into offsets from labels. */
13011 if (!MEM_P (op))
13012 return FALSE;
13013
13014 ind = XEXP (op, 0);
13015
13016 if (reload_completed
13017 && (GET_CODE (ind) == LABEL_REF
13018 || (GET_CODE (ind) == CONST
13019 && GET_CODE (XEXP (ind, 0)) == PLUS
13020 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13021 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13022 return TRUE;
13023
13024 /* Match: (mem (reg)). */
13025 if (REG_P (ind))
13026 return arm_address_register_rtx_p (ind, 0);
13027
13028 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13029 if (GET_CODE (ind) == POST_INC
13030 || GET_CODE (ind) == PRE_DEC)
13031 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13032
13033 return FALSE;
13034 }
13035
13036 /* Return true if X is a register that will be eliminated later on. */
13037 int
13038 arm_eliminable_register (rtx x)
13039 {
13040 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13041 || REGNO (x) == ARG_POINTER_REGNUM
13042 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13043 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13044 }
13045
13046 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13047 coprocessor registers. Otherwise return NO_REGS. */
13048
13049 enum reg_class
13050 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13051 {
13052 if (mode == HFmode)
13053 {
13054 if (!TARGET_NEON_FP16)
13055 return GENERAL_REGS;
13056 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13057 return NO_REGS;
13058 return GENERAL_REGS;
13059 }
13060
13061 /* The neon move patterns handle all legitimate vector and struct
13062 addresses. */
13063 if (TARGET_NEON
13064 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13065 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13066 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13067 || VALID_NEON_STRUCT_MODE (mode)))
13068 return NO_REGS;
13069
13070 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13071 return NO_REGS;
13072
13073 return GENERAL_REGS;
13074 }
13075
13076 /* Values which must be returned in the most-significant end of the return
13077 register. */
13078
13079 static bool
13080 arm_return_in_msb (const_tree valtype)
13081 {
13082 return (TARGET_AAPCS_BASED
13083 && BYTES_BIG_ENDIAN
13084 && (AGGREGATE_TYPE_P (valtype)
13085 || TREE_CODE (valtype) == COMPLEX_TYPE
13086 || FIXED_POINT_TYPE_P (valtype)));
13087 }
13088
13089 /* Return TRUE if X references a SYMBOL_REF. */
13090 int
13091 symbol_mentioned_p (rtx x)
13092 {
13093 const char * fmt;
13094 int i;
13095
13096 if (GET_CODE (x) == SYMBOL_REF)
13097 return 1;
13098
13099 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13100 are constant offsets, not symbols. */
13101 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13102 return 0;
13103
13104 fmt = GET_RTX_FORMAT (GET_CODE (x));
13105
13106 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13107 {
13108 if (fmt[i] == 'E')
13109 {
13110 int j;
13111
13112 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13113 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13114 return 1;
13115 }
13116 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13117 return 1;
13118 }
13119
13120 return 0;
13121 }
13122
13123 /* Return TRUE if X references a LABEL_REF. */
13124 int
13125 label_mentioned_p (rtx x)
13126 {
13127 const char * fmt;
13128 int i;
13129
13130 if (GET_CODE (x) == LABEL_REF)
13131 return 1;
13132
13133 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13134 instruction, but they are constant offsets, not symbols. */
13135 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13136 return 0;
13137
13138 fmt = GET_RTX_FORMAT (GET_CODE (x));
13139 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13140 {
13141 if (fmt[i] == 'E')
13142 {
13143 int j;
13144
13145 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13146 if (label_mentioned_p (XVECEXP (x, i, j)))
13147 return 1;
13148 }
13149 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13150 return 1;
13151 }
13152
13153 return 0;
13154 }
13155
13156 int
13157 tls_mentioned_p (rtx x)
13158 {
13159 switch (GET_CODE (x))
13160 {
13161 case CONST:
13162 return tls_mentioned_p (XEXP (x, 0));
13163
13164 case UNSPEC:
13165 if (XINT (x, 1) == UNSPEC_TLS)
13166 return 1;
13167
13168 default:
13169 return 0;
13170 }
13171 }
13172
13173 /* Must not copy any rtx that uses a pc-relative address. */
13174
13175 static bool
13176 arm_cannot_copy_insn_p (rtx_insn *insn)
13177 {
13178 /* The tls call insn cannot be copied, as it is paired with a data
13179 word. */
13180 if (recog_memoized (insn) == CODE_FOR_tlscall)
13181 return true;
13182
13183 subrtx_iterator::array_type array;
13184 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13185 {
13186 const_rtx x = *iter;
13187 if (GET_CODE (x) == UNSPEC
13188 && (XINT (x, 1) == UNSPEC_PIC_BASE
13189 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13190 return true;
13191 }
13192 return false;
13193 }
13194
13195 enum rtx_code
13196 minmax_code (rtx x)
13197 {
13198 enum rtx_code code = GET_CODE (x);
13199
13200 switch (code)
13201 {
13202 case SMAX:
13203 return GE;
13204 case SMIN:
13205 return LE;
13206 case UMIN:
13207 return LEU;
13208 case UMAX:
13209 return GEU;
13210 default:
13211 gcc_unreachable ();
13212 }
13213 }
13214
13215 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13216
13217 bool
13218 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13219 int *mask, bool *signed_sat)
13220 {
13221 /* The high bound must be a power of two minus one. */
13222 int log = exact_log2 (INTVAL (hi_bound) + 1);
13223 if (log == -1)
13224 return false;
13225
13226 /* The low bound is either zero (for usat) or one less than the
13227 negation of the high bound (for ssat). */
13228 if (INTVAL (lo_bound) == 0)
13229 {
13230 if (mask)
13231 *mask = log;
13232 if (signed_sat)
13233 *signed_sat = false;
13234
13235 return true;
13236 }
13237
13238 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13239 {
13240 if (mask)
13241 *mask = log + 1;
13242 if (signed_sat)
13243 *signed_sat = true;
13244
13245 return true;
13246 }
13247
13248 return false;
13249 }
13250
13251 /* Return 1 if memory locations are adjacent. */
13252 int
13253 adjacent_mem_locations (rtx a, rtx b)
13254 {
13255 /* We don't guarantee to preserve the order of these memory refs. */
13256 if (volatile_refs_p (a) || volatile_refs_p (b))
13257 return 0;
13258
13259 if ((REG_P (XEXP (a, 0))
13260 || (GET_CODE (XEXP (a, 0)) == PLUS
13261 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13262 && (REG_P (XEXP (b, 0))
13263 || (GET_CODE (XEXP (b, 0)) == PLUS
13264 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13265 {
13266 HOST_WIDE_INT val0 = 0, val1 = 0;
13267 rtx reg0, reg1;
13268 int val_diff;
13269
13270 if (GET_CODE (XEXP (a, 0)) == PLUS)
13271 {
13272 reg0 = XEXP (XEXP (a, 0), 0);
13273 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13274 }
13275 else
13276 reg0 = XEXP (a, 0);
13277
13278 if (GET_CODE (XEXP (b, 0)) == PLUS)
13279 {
13280 reg1 = XEXP (XEXP (b, 0), 0);
13281 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13282 }
13283 else
13284 reg1 = XEXP (b, 0);
13285
13286 /* Don't accept any offset that will require multiple
13287 instructions to handle, since this would cause the
13288 arith_adjacentmem pattern to output an overlong sequence. */
13289 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13290 return 0;
13291
13292 /* Don't allow an eliminable register: register elimination can make
13293 the offset too large. */
13294 if (arm_eliminable_register (reg0))
13295 return 0;
13296
13297 val_diff = val1 - val0;
13298
13299 if (arm_ld_sched)
13300 {
13301 /* If the target has load delay slots, then there's no benefit
13302 to using an ldm instruction unless the offset is zero and
13303 we are optimizing for size. */
13304 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13305 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13306 && (val_diff == 4 || val_diff == -4));
13307 }
13308
13309 return ((REGNO (reg0) == REGNO (reg1))
13310 && (val_diff == 4 || val_diff == -4));
13311 }
13312
13313 return 0;
13314 }
13315
13316 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13317 for load operations, false for store operations. CONSECUTIVE is true
13318 if the register numbers in the operation must be consecutive in the register
13319 bank. RETURN_PC is true if value is to be loaded in PC.
13320 The pattern we are trying to match for load is:
13321 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13322 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13323 :
13324 :
13325 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13326 ]
13327 where
13328 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13329 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13330 3. If consecutive is TRUE, then for kth register being loaded,
13331 REGNO (R_dk) = REGNO (R_d0) + k.
13332 The pattern for store is similar. */
13333 bool
13334 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13335 bool consecutive, bool return_pc)
13336 {
13337 HOST_WIDE_INT count = XVECLEN (op, 0);
13338 rtx reg, mem, addr;
13339 unsigned regno;
13340 unsigned first_regno;
13341 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13342 rtx elt;
13343 bool addr_reg_in_reglist = false;
13344 bool update = false;
13345 int reg_increment;
13346 int offset_adj;
13347 int regs_per_val;
13348
13349 /* If not in SImode, then registers must be consecutive
13350 (e.g., VLDM instructions for DFmode). */
13351 gcc_assert ((mode == SImode) || consecutive);
13352 /* Setting return_pc for stores is illegal. */
13353 gcc_assert (!return_pc || load);
13354
13355 /* Set up the increments and the regs per val based on the mode. */
13356 reg_increment = GET_MODE_SIZE (mode);
13357 regs_per_val = reg_increment / 4;
13358 offset_adj = return_pc ? 1 : 0;
13359
13360 if (count <= 1
13361 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13362 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13363 return false;
13364
13365 /* Check if this is a write-back. */
13366 elt = XVECEXP (op, 0, offset_adj);
13367 if (GET_CODE (SET_SRC (elt)) == PLUS)
13368 {
13369 i++;
13370 base = 1;
13371 update = true;
13372
13373 /* The offset adjustment must be the number of registers being
13374 popped times the size of a single register. */
13375 if (!REG_P (SET_DEST (elt))
13376 || !REG_P (XEXP (SET_SRC (elt), 0))
13377 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13378 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13379 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13380 ((count - 1 - offset_adj) * reg_increment))
13381 return false;
13382 }
13383
13384 i = i + offset_adj;
13385 base = base + offset_adj;
13386 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13387 success depends on the type: VLDM can do just one reg,
13388 LDM must do at least two. */
13389 if ((count <= i) && (mode == SImode))
13390 return false;
13391
13392 elt = XVECEXP (op, 0, i - 1);
13393 if (GET_CODE (elt) != SET)
13394 return false;
13395
13396 if (load)
13397 {
13398 reg = SET_DEST (elt);
13399 mem = SET_SRC (elt);
13400 }
13401 else
13402 {
13403 reg = SET_SRC (elt);
13404 mem = SET_DEST (elt);
13405 }
13406
13407 if (!REG_P (reg) || !MEM_P (mem))
13408 return false;
13409
13410 regno = REGNO (reg);
13411 first_regno = regno;
13412 addr = XEXP (mem, 0);
13413 if (GET_CODE (addr) == PLUS)
13414 {
13415 if (!CONST_INT_P (XEXP (addr, 1)))
13416 return false;
13417
13418 offset = INTVAL (XEXP (addr, 1));
13419 addr = XEXP (addr, 0);
13420 }
13421
13422 if (!REG_P (addr))
13423 return false;
13424
13425 /* Don't allow SP to be loaded unless it is also the base register. It
13426 guarantees that SP is reset correctly when an LDM instruction
13427 is interrupted. Otherwise, we might end up with a corrupt stack. */
13428 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13429 return false;
13430
13431 for (; i < count; i++)
13432 {
13433 elt = XVECEXP (op, 0, i);
13434 if (GET_CODE (elt) != SET)
13435 return false;
13436
13437 if (load)
13438 {
13439 reg = SET_DEST (elt);
13440 mem = SET_SRC (elt);
13441 }
13442 else
13443 {
13444 reg = SET_SRC (elt);
13445 mem = SET_DEST (elt);
13446 }
13447
13448 if (!REG_P (reg)
13449 || GET_MODE (reg) != mode
13450 || REGNO (reg) <= regno
13451 || (consecutive
13452 && (REGNO (reg) !=
13453 (unsigned int) (first_regno + regs_per_val * (i - base))))
13454 /* Don't allow SP to be loaded unless it is also the base register. It
13455 guarantees that SP is reset correctly when an LDM instruction
13456 is interrupted. Otherwise, we might end up with a corrupt stack. */
13457 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13458 || !MEM_P (mem)
13459 || GET_MODE (mem) != mode
13460 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13461 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13462 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13463 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13464 offset + (i - base) * reg_increment))
13465 && (!REG_P (XEXP (mem, 0))
13466 || offset + (i - base) * reg_increment != 0)))
13467 return false;
13468
13469 regno = REGNO (reg);
13470 if (regno == REGNO (addr))
13471 addr_reg_in_reglist = true;
13472 }
13473
13474 if (load)
13475 {
13476 if (update && addr_reg_in_reglist)
13477 return false;
13478
13479 /* For Thumb-1, address register is always modified - either by write-back
13480 or by explicit load. If the pattern does not describe an update,
13481 then the address register must be in the list of loaded registers. */
13482 if (TARGET_THUMB1)
13483 return update || addr_reg_in_reglist;
13484 }
13485
13486 return true;
13487 }
13488
13489 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13490 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13491 instruction. ADD_OFFSET is nonzero if the base address register needs
13492 to be modified with an add instruction before we can use it. */
13493
13494 static bool
13495 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13496 int nops, HOST_WIDE_INT add_offset)
13497 {
13498 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13499 if the offset isn't small enough. The reason 2 ldrs are faster
13500 is because these ARMs are able to do more than one cache access
13501 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13502 whilst the ARM8 has a double bandwidth cache. This means that
13503 these cores can do both an instruction fetch and a data fetch in
13504 a single cycle, so the trick of calculating the address into a
13505 scratch register (one of the result regs) and then doing a load
13506 multiple actually becomes slower (and no smaller in code size).
13507 That is the transformation
13508
13509 ldr rd1, [rbase + offset]
13510 ldr rd2, [rbase + offset + 4]
13511
13512 to
13513
13514 add rd1, rbase, offset
13515 ldmia rd1, {rd1, rd2}
13516
13517 produces worse code -- '3 cycles + any stalls on rd2' instead of
13518 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13519 access per cycle, the first sequence could never complete in less
13520 than 6 cycles, whereas the ldm sequence would only take 5 and
13521 would make better use of sequential accesses if not hitting the
13522 cache.
13523
13524 We cheat here and test 'arm_ld_sched' which we currently know to
13525 only be true for the ARM8, ARM9 and StrongARM. If this ever
13526 changes, then the test below needs to be reworked. */
13527 if (nops == 2 && arm_ld_sched && add_offset != 0)
13528 return false;
13529
13530 /* XScale has load-store double instructions, but they have stricter
13531 alignment requirements than load-store multiple, so we cannot
13532 use them.
13533
13534 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13535 the pipeline until completion.
13536
13537 NREGS CYCLES
13538 1 3
13539 2 4
13540 3 5
13541 4 6
13542
13543 An ldr instruction takes 1-3 cycles, but does not block the
13544 pipeline.
13545
13546 NREGS CYCLES
13547 1 1-3
13548 2 2-6
13549 3 3-9
13550 4 4-12
13551
13552 Best case ldr will always win. However, the more ldr instructions
13553 we issue, the less likely we are to be able to schedule them well.
13554 Using ldr instructions also increases code size.
13555
13556 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13557 for counts of 3 or 4 regs. */
13558 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13559 return false;
13560 return true;
13561 }
13562
13563 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13564 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13565 an array ORDER which describes the sequence to use when accessing the
13566 offsets that produces an ascending order. In this sequence, each
13567 offset must be larger by exactly 4 than the previous one. ORDER[0]
13568 must have been filled in with the lowest offset by the caller.
13569 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13570 we use to verify that ORDER produces an ascending order of registers.
13571 Return true if it was possible to construct such an order, false if
13572 not. */
13573
13574 static bool
13575 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13576 int *unsorted_regs)
13577 {
13578 int i;
13579 for (i = 1; i < nops; i++)
13580 {
13581 int j;
13582
13583 order[i] = order[i - 1];
13584 for (j = 0; j < nops; j++)
13585 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13586 {
13587 /* We must find exactly one offset that is higher than the
13588 previous one by 4. */
13589 if (order[i] != order[i - 1])
13590 return false;
13591 order[i] = j;
13592 }
13593 if (order[i] == order[i - 1])
13594 return false;
13595 /* The register numbers must be ascending. */
13596 if (unsorted_regs != NULL
13597 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13598 return false;
13599 }
13600 return true;
13601 }
13602
13603 /* Used to determine in a peephole whether a sequence of load
13604 instructions can be changed into a load-multiple instruction.
13605 NOPS is the number of separate load instructions we are examining. The
13606 first NOPS entries in OPERANDS are the destination registers, the
13607 next NOPS entries are memory operands. If this function is
13608 successful, *BASE is set to the common base register of the memory
13609 accesses; *LOAD_OFFSET is set to the first memory location's offset
13610 from that base register.
13611 REGS is an array filled in with the destination register numbers.
13612 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13613 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13614 the sequence of registers in REGS matches the loads from ascending memory
13615 locations, and the function verifies that the register numbers are
13616 themselves ascending. If CHECK_REGS is false, the register numbers
13617 are stored in the order they are found in the operands. */
13618 static int
13619 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13620 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13621 {
13622 int unsorted_regs[MAX_LDM_STM_OPS];
13623 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13624 int order[MAX_LDM_STM_OPS];
13625 rtx base_reg_rtx = NULL;
13626 int base_reg = -1;
13627 int i, ldm_case;
13628
13629 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13630 easily extended if required. */
13631 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13632
13633 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13634
13635 /* Loop over the operands and check that the memory references are
13636 suitable (i.e. immediate offsets from the same base register). At
13637 the same time, extract the target register, and the memory
13638 offsets. */
13639 for (i = 0; i < nops; i++)
13640 {
13641 rtx reg;
13642 rtx offset;
13643
13644 /* Convert a subreg of a mem into the mem itself. */
13645 if (GET_CODE (operands[nops + i]) == SUBREG)
13646 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13647
13648 gcc_assert (MEM_P (operands[nops + i]));
13649
13650 /* Don't reorder volatile memory references; it doesn't seem worth
13651 looking for the case where the order is ok anyway. */
13652 if (MEM_VOLATILE_P (operands[nops + i]))
13653 return 0;
13654
13655 offset = const0_rtx;
13656
13657 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13658 || (GET_CODE (reg) == SUBREG
13659 && REG_P (reg = SUBREG_REG (reg))))
13660 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13661 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13662 || (GET_CODE (reg) == SUBREG
13663 && REG_P (reg = SUBREG_REG (reg))))
13664 && (CONST_INT_P (offset
13665 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13666 {
13667 if (i == 0)
13668 {
13669 base_reg = REGNO (reg);
13670 base_reg_rtx = reg;
13671 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13672 return 0;
13673 }
13674 else if (base_reg != (int) REGNO (reg))
13675 /* Not addressed from the same base register. */
13676 return 0;
13677
13678 unsorted_regs[i] = (REG_P (operands[i])
13679 ? REGNO (operands[i])
13680 : REGNO (SUBREG_REG (operands[i])));
13681
13682 /* If it isn't an integer register, or if it overwrites the
13683 base register but isn't the last insn in the list, then
13684 we can't do this. */
13685 if (unsorted_regs[i] < 0
13686 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13687 || unsorted_regs[i] > 14
13688 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13689 return 0;
13690
13691 /* Don't allow SP to be loaded unless it is also the base
13692 register. It guarantees that SP is reset correctly when
13693 an LDM instruction is interrupted. Otherwise, we might
13694 end up with a corrupt stack. */
13695 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13696 return 0;
13697
13698 unsorted_offsets[i] = INTVAL (offset);
13699 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13700 order[0] = i;
13701 }
13702 else
13703 /* Not a suitable memory address. */
13704 return 0;
13705 }
13706
13707 /* All the useful information has now been extracted from the
13708 operands into unsorted_regs and unsorted_offsets; additionally,
13709 order[0] has been set to the lowest offset in the list. Sort
13710 the offsets into order, verifying that they are adjacent, and
13711 check that the register numbers are ascending. */
13712 if (!compute_offset_order (nops, unsorted_offsets, order,
13713 check_regs ? unsorted_regs : NULL))
13714 return 0;
13715
13716 if (saved_order)
13717 memcpy (saved_order, order, sizeof order);
13718
13719 if (base)
13720 {
13721 *base = base_reg;
13722
13723 for (i = 0; i < nops; i++)
13724 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13725
13726 *load_offset = unsorted_offsets[order[0]];
13727 }
13728
13729 if (TARGET_THUMB1
13730 && !peep2_reg_dead_p (nops, base_reg_rtx))
13731 return 0;
13732
13733 if (unsorted_offsets[order[0]] == 0)
13734 ldm_case = 1; /* ldmia */
13735 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13736 ldm_case = 2; /* ldmib */
13737 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13738 ldm_case = 3; /* ldmda */
13739 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13740 ldm_case = 4; /* ldmdb */
13741 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13742 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13743 ldm_case = 5;
13744 else
13745 return 0;
13746
13747 if (!multiple_operation_profitable_p (false, nops,
13748 ldm_case == 5
13749 ? unsorted_offsets[order[0]] : 0))
13750 return 0;
13751
13752 return ldm_case;
13753 }
13754
13755 /* Used to determine in a peephole whether a sequence of store instructions can
13756 be changed into a store-multiple instruction.
13757 NOPS is the number of separate store instructions we are examining.
13758 NOPS_TOTAL is the total number of instructions recognized by the peephole
13759 pattern.
13760 The first NOPS entries in OPERANDS are the source registers, the next
13761 NOPS entries are memory operands. If this function is successful, *BASE is
13762 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13763 to the first memory location's offset from that base register. REGS is an
13764 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13765 likewise filled with the corresponding rtx's.
13766 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13767 numbers to an ascending order of stores.
13768 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13769 from ascending memory locations, and the function verifies that the register
13770 numbers are themselves ascending. If CHECK_REGS is false, the register
13771 numbers are stored in the order they are found in the operands. */
13772 static int
13773 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13774 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13775 HOST_WIDE_INT *load_offset, bool check_regs)
13776 {
13777 int unsorted_regs[MAX_LDM_STM_OPS];
13778 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13779 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13780 int order[MAX_LDM_STM_OPS];
13781 int base_reg = -1;
13782 rtx base_reg_rtx = NULL;
13783 int i, stm_case;
13784
13785 /* Write back of base register is currently only supported for Thumb 1. */
13786 int base_writeback = TARGET_THUMB1;
13787
13788 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13789 easily extended if required. */
13790 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13791
13792 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13793
13794 /* Loop over the operands and check that the memory references are
13795 suitable (i.e. immediate offsets from the same base register). At
13796 the same time, extract the target register, and the memory
13797 offsets. */
13798 for (i = 0; i < nops; i++)
13799 {
13800 rtx reg;
13801 rtx offset;
13802
13803 /* Convert a subreg of a mem into the mem itself. */
13804 if (GET_CODE (operands[nops + i]) == SUBREG)
13805 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13806
13807 gcc_assert (MEM_P (operands[nops + i]));
13808
13809 /* Don't reorder volatile memory references; it doesn't seem worth
13810 looking for the case where the order is ok anyway. */
13811 if (MEM_VOLATILE_P (operands[nops + i]))
13812 return 0;
13813
13814 offset = const0_rtx;
13815
13816 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13817 || (GET_CODE (reg) == SUBREG
13818 && REG_P (reg = SUBREG_REG (reg))))
13819 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13820 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13821 || (GET_CODE (reg) == SUBREG
13822 && REG_P (reg = SUBREG_REG (reg))))
13823 && (CONST_INT_P (offset
13824 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13825 {
13826 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13827 ? operands[i] : SUBREG_REG (operands[i]));
13828 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13829
13830 if (i == 0)
13831 {
13832 base_reg = REGNO (reg);
13833 base_reg_rtx = reg;
13834 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13835 return 0;
13836 }
13837 else if (base_reg != (int) REGNO (reg))
13838 /* Not addressed from the same base register. */
13839 return 0;
13840
13841 /* If it isn't an integer register, then we can't do this. */
13842 if (unsorted_regs[i] < 0
13843 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13844 /* The effects are unpredictable if the base register is
13845 both updated and stored. */
13846 || (base_writeback && unsorted_regs[i] == base_reg)
13847 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13848 || unsorted_regs[i] > 14)
13849 return 0;
13850
13851 unsorted_offsets[i] = INTVAL (offset);
13852 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13853 order[0] = i;
13854 }
13855 else
13856 /* Not a suitable memory address. */
13857 return 0;
13858 }
13859
13860 /* All the useful information has now been extracted from the
13861 operands into unsorted_regs and unsorted_offsets; additionally,
13862 order[0] has been set to the lowest offset in the list. Sort
13863 the offsets into order, verifying that they are adjacent, and
13864 check that the register numbers are ascending. */
13865 if (!compute_offset_order (nops, unsorted_offsets, order,
13866 check_regs ? unsorted_regs : NULL))
13867 return 0;
13868
13869 if (saved_order)
13870 memcpy (saved_order, order, sizeof order);
13871
13872 if (base)
13873 {
13874 *base = base_reg;
13875
13876 for (i = 0; i < nops; i++)
13877 {
13878 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13879 if (reg_rtxs)
13880 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13881 }
13882
13883 *load_offset = unsorted_offsets[order[0]];
13884 }
13885
13886 if (TARGET_THUMB1
13887 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13888 return 0;
13889
13890 if (unsorted_offsets[order[0]] == 0)
13891 stm_case = 1; /* stmia */
13892 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13893 stm_case = 2; /* stmib */
13894 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13895 stm_case = 3; /* stmda */
13896 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13897 stm_case = 4; /* stmdb */
13898 else
13899 return 0;
13900
13901 if (!multiple_operation_profitable_p (false, nops, 0))
13902 return 0;
13903
13904 return stm_case;
13905 }
13906 \f
13907 /* Routines for use in generating RTL. */
13908
13909 /* Generate a load-multiple instruction. COUNT is the number of loads in
13910 the instruction; REGS and MEMS are arrays containing the operands.
13911 BASEREG is the base register to be used in addressing the memory operands.
13912 WBACK_OFFSET is nonzero if the instruction should update the base
13913 register. */
13914
13915 static rtx
13916 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13917 HOST_WIDE_INT wback_offset)
13918 {
13919 int i = 0, j;
13920 rtx result;
13921
13922 if (!multiple_operation_profitable_p (false, count, 0))
13923 {
13924 rtx seq;
13925
13926 start_sequence ();
13927
13928 for (i = 0; i < count; i++)
13929 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13930
13931 if (wback_offset != 0)
13932 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13933
13934 seq = get_insns ();
13935 end_sequence ();
13936
13937 return seq;
13938 }
13939
13940 result = gen_rtx_PARALLEL (VOIDmode,
13941 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13942 if (wback_offset != 0)
13943 {
13944 XVECEXP (result, 0, 0)
13945 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13946 i = 1;
13947 count++;
13948 }
13949
13950 for (j = 0; i < count; i++, j++)
13951 XVECEXP (result, 0, i)
13952 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13953
13954 return result;
13955 }
13956
13957 /* Generate a store-multiple instruction. COUNT is the number of stores in
13958 the instruction; REGS and MEMS are arrays containing the operands.
13959 BASEREG is the base register to be used in addressing the memory operands.
13960 WBACK_OFFSET is nonzero if the instruction should update the base
13961 register. */
13962
13963 static rtx
13964 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13965 HOST_WIDE_INT wback_offset)
13966 {
13967 int i = 0, j;
13968 rtx result;
13969
13970 if (GET_CODE (basereg) == PLUS)
13971 basereg = XEXP (basereg, 0);
13972
13973 if (!multiple_operation_profitable_p (false, count, 0))
13974 {
13975 rtx seq;
13976
13977 start_sequence ();
13978
13979 for (i = 0; i < count; i++)
13980 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13981
13982 if (wback_offset != 0)
13983 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13984
13985 seq = get_insns ();
13986 end_sequence ();
13987
13988 return seq;
13989 }
13990
13991 result = gen_rtx_PARALLEL (VOIDmode,
13992 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13993 if (wback_offset != 0)
13994 {
13995 XVECEXP (result, 0, 0)
13996 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13997 i = 1;
13998 count++;
13999 }
14000
14001 for (j = 0; i < count; i++, j++)
14002 XVECEXP (result, 0, i)
14003 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14004
14005 return result;
14006 }
14007
14008 /* Generate either a load-multiple or a store-multiple instruction. This
14009 function can be used in situations where we can start with a single MEM
14010 rtx and adjust its address upwards.
14011 COUNT is the number of operations in the instruction, not counting a
14012 possible update of the base register. REGS is an array containing the
14013 register operands.
14014 BASEREG is the base register to be used in addressing the memory operands,
14015 which are constructed from BASEMEM.
14016 WRITE_BACK specifies whether the generated instruction should include an
14017 update of the base register.
14018 OFFSETP is used to pass an offset to and from this function; this offset
14019 is not used when constructing the address (instead BASEMEM should have an
14020 appropriate offset in its address), it is used only for setting
14021 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14022
14023 static rtx
14024 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14025 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14026 {
14027 rtx mems[MAX_LDM_STM_OPS];
14028 HOST_WIDE_INT offset = *offsetp;
14029 int i;
14030
14031 gcc_assert (count <= MAX_LDM_STM_OPS);
14032
14033 if (GET_CODE (basereg) == PLUS)
14034 basereg = XEXP (basereg, 0);
14035
14036 for (i = 0; i < count; i++)
14037 {
14038 rtx addr = plus_constant (Pmode, basereg, i * 4);
14039 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14040 offset += 4;
14041 }
14042
14043 if (write_back)
14044 *offsetp = offset;
14045
14046 if (is_load)
14047 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14048 write_back ? 4 * count : 0);
14049 else
14050 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14051 write_back ? 4 * count : 0);
14052 }
14053
14054 rtx
14055 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14056 rtx basemem, HOST_WIDE_INT *offsetp)
14057 {
14058 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14059 offsetp);
14060 }
14061
14062 rtx
14063 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14064 rtx basemem, HOST_WIDE_INT *offsetp)
14065 {
14066 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14067 offsetp);
14068 }
14069
14070 /* Called from a peephole2 expander to turn a sequence of loads into an
14071 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14072 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14073 is true if we can reorder the registers because they are used commutatively
14074 subsequently.
14075 Returns true iff we could generate a new instruction. */
14076
14077 bool
14078 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14079 {
14080 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14081 rtx mems[MAX_LDM_STM_OPS];
14082 int i, j, base_reg;
14083 rtx base_reg_rtx;
14084 HOST_WIDE_INT offset;
14085 int write_back = FALSE;
14086 int ldm_case;
14087 rtx addr;
14088
14089 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14090 &base_reg, &offset, !sort_regs);
14091
14092 if (ldm_case == 0)
14093 return false;
14094
14095 if (sort_regs)
14096 for (i = 0; i < nops - 1; i++)
14097 for (j = i + 1; j < nops; j++)
14098 if (regs[i] > regs[j])
14099 {
14100 int t = regs[i];
14101 regs[i] = regs[j];
14102 regs[j] = t;
14103 }
14104 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14105
14106 if (TARGET_THUMB1)
14107 {
14108 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14109 gcc_assert (ldm_case == 1 || ldm_case == 5);
14110 write_back = TRUE;
14111 }
14112
14113 if (ldm_case == 5)
14114 {
14115 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14116 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14117 offset = 0;
14118 if (!TARGET_THUMB1)
14119 {
14120 base_reg = regs[0];
14121 base_reg_rtx = newbase;
14122 }
14123 }
14124
14125 for (i = 0; i < nops; i++)
14126 {
14127 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14128 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14129 SImode, addr, 0);
14130 }
14131 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14132 write_back ? offset + i * 4 : 0));
14133 return true;
14134 }
14135
14136 /* Called from a peephole2 expander to turn a sequence of stores into an
14137 STM instruction. OPERANDS are the operands found by the peephole matcher;
14138 NOPS indicates how many separate stores we are trying to combine.
14139 Returns true iff we could generate a new instruction. */
14140
14141 bool
14142 gen_stm_seq (rtx *operands, int nops)
14143 {
14144 int i;
14145 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14146 rtx mems[MAX_LDM_STM_OPS];
14147 int base_reg;
14148 rtx base_reg_rtx;
14149 HOST_WIDE_INT offset;
14150 int write_back = FALSE;
14151 int stm_case;
14152 rtx addr;
14153 bool base_reg_dies;
14154
14155 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14156 mem_order, &base_reg, &offset, true);
14157
14158 if (stm_case == 0)
14159 return false;
14160
14161 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14162
14163 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14164 if (TARGET_THUMB1)
14165 {
14166 gcc_assert (base_reg_dies);
14167 write_back = TRUE;
14168 }
14169
14170 if (stm_case == 5)
14171 {
14172 gcc_assert (base_reg_dies);
14173 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14174 offset = 0;
14175 }
14176
14177 addr = plus_constant (Pmode, base_reg_rtx, offset);
14178
14179 for (i = 0; i < nops; i++)
14180 {
14181 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14182 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14183 SImode, addr, 0);
14184 }
14185 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14186 write_back ? offset + i * 4 : 0));
14187 return true;
14188 }
14189
14190 /* Called from a peephole2 expander to turn a sequence of stores that are
14191 preceded by constant loads into an STM instruction. OPERANDS are the
14192 operands found by the peephole matcher; NOPS indicates how many
14193 separate stores we are trying to combine; there are 2 * NOPS
14194 instructions in the peephole.
14195 Returns true iff we could generate a new instruction. */
14196
14197 bool
14198 gen_const_stm_seq (rtx *operands, int nops)
14199 {
14200 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14201 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14202 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14203 rtx mems[MAX_LDM_STM_OPS];
14204 int base_reg;
14205 rtx base_reg_rtx;
14206 HOST_WIDE_INT offset;
14207 int write_back = FALSE;
14208 int stm_case;
14209 rtx addr;
14210 bool base_reg_dies;
14211 int i, j;
14212 HARD_REG_SET allocated;
14213
14214 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14215 mem_order, &base_reg, &offset, false);
14216
14217 if (stm_case == 0)
14218 return false;
14219
14220 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14221
14222 /* If the same register is used more than once, try to find a free
14223 register. */
14224 CLEAR_HARD_REG_SET (allocated);
14225 for (i = 0; i < nops; i++)
14226 {
14227 for (j = i + 1; j < nops; j++)
14228 if (regs[i] == regs[j])
14229 {
14230 rtx t = peep2_find_free_register (0, nops * 2,
14231 TARGET_THUMB1 ? "l" : "r",
14232 SImode, &allocated);
14233 if (t == NULL_RTX)
14234 return false;
14235 reg_rtxs[i] = t;
14236 regs[i] = REGNO (t);
14237 }
14238 }
14239
14240 /* Compute an ordering that maps the register numbers to an ascending
14241 sequence. */
14242 reg_order[0] = 0;
14243 for (i = 0; i < nops; i++)
14244 if (regs[i] < regs[reg_order[0]])
14245 reg_order[0] = i;
14246
14247 for (i = 1; i < nops; i++)
14248 {
14249 int this_order = reg_order[i - 1];
14250 for (j = 0; j < nops; j++)
14251 if (regs[j] > regs[reg_order[i - 1]]
14252 && (this_order == reg_order[i - 1]
14253 || regs[j] < regs[this_order]))
14254 this_order = j;
14255 reg_order[i] = this_order;
14256 }
14257
14258 /* Ensure that registers that must be live after the instruction end
14259 up with the correct value. */
14260 for (i = 0; i < nops; i++)
14261 {
14262 int this_order = reg_order[i];
14263 if ((this_order != mem_order[i]
14264 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14265 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14266 return false;
14267 }
14268
14269 /* Load the constants. */
14270 for (i = 0; i < nops; i++)
14271 {
14272 rtx op = operands[2 * nops + mem_order[i]];
14273 sorted_regs[i] = regs[reg_order[i]];
14274 emit_move_insn (reg_rtxs[reg_order[i]], op);
14275 }
14276
14277 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14278
14279 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14280 if (TARGET_THUMB1)
14281 {
14282 gcc_assert (base_reg_dies);
14283 write_back = TRUE;
14284 }
14285
14286 if (stm_case == 5)
14287 {
14288 gcc_assert (base_reg_dies);
14289 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14290 offset = 0;
14291 }
14292
14293 addr = plus_constant (Pmode, base_reg_rtx, offset);
14294
14295 for (i = 0; i < nops; i++)
14296 {
14297 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14298 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14299 SImode, addr, 0);
14300 }
14301 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14302 write_back ? offset + i * 4 : 0));
14303 return true;
14304 }
14305
14306 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14307 unaligned copies on processors which support unaligned semantics for those
14308 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14309 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14310 An interleave factor of 1 (the minimum) will perform no interleaving.
14311 Load/store multiple are used for aligned addresses where possible. */
14312
14313 static void
14314 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14315 HOST_WIDE_INT length,
14316 unsigned int interleave_factor)
14317 {
14318 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14319 int *regnos = XALLOCAVEC (int, interleave_factor);
14320 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14321 HOST_WIDE_INT i, j;
14322 HOST_WIDE_INT remaining = length, words;
14323 rtx halfword_tmp = NULL, byte_tmp = NULL;
14324 rtx dst, src;
14325 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14326 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14327 HOST_WIDE_INT srcoffset, dstoffset;
14328 HOST_WIDE_INT src_autoinc, dst_autoinc;
14329 rtx mem, addr;
14330
14331 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14332
14333 /* Use hard registers if we have aligned source or destination so we can use
14334 load/store multiple with contiguous registers. */
14335 if (dst_aligned || src_aligned)
14336 for (i = 0; i < interleave_factor; i++)
14337 regs[i] = gen_rtx_REG (SImode, i);
14338 else
14339 for (i = 0; i < interleave_factor; i++)
14340 regs[i] = gen_reg_rtx (SImode);
14341
14342 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14343 src = copy_addr_to_reg (XEXP (srcbase, 0));
14344
14345 srcoffset = dstoffset = 0;
14346
14347 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14348 For copying the last bytes we want to subtract this offset again. */
14349 src_autoinc = dst_autoinc = 0;
14350
14351 for (i = 0; i < interleave_factor; i++)
14352 regnos[i] = i;
14353
14354 /* Copy BLOCK_SIZE_BYTES chunks. */
14355
14356 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14357 {
14358 /* Load words. */
14359 if (src_aligned && interleave_factor > 1)
14360 {
14361 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14362 TRUE, srcbase, &srcoffset));
14363 src_autoinc += UNITS_PER_WORD * interleave_factor;
14364 }
14365 else
14366 {
14367 for (j = 0; j < interleave_factor; j++)
14368 {
14369 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14370 - src_autoinc));
14371 mem = adjust_automodify_address (srcbase, SImode, addr,
14372 srcoffset + j * UNITS_PER_WORD);
14373 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14374 }
14375 srcoffset += block_size_bytes;
14376 }
14377
14378 /* Store words. */
14379 if (dst_aligned && interleave_factor > 1)
14380 {
14381 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14382 TRUE, dstbase, &dstoffset));
14383 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14384 }
14385 else
14386 {
14387 for (j = 0; j < interleave_factor; j++)
14388 {
14389 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14390 - dst_autoinc));
14391 mem = adjust_automodify_address (dstbase, SImode, addr,
14392 dstoffset + j * UNITS_PER_WORD);
14393 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14394 }
14395 dstoffset += block_size_bytes;
14396 }
14397
14398 remaining -= block_size_bytes;
14399 }
14400
14401 /* Copy any whole words left (note these aren't interleaved with any
14402 subsequent halfword/byte load/stores in the interests of simplicity). */
14403
14404 words = remaining / UNITS_PER_WORD;
14405
14406 gcc_assert (words < interleave_factor);
14407
14408 if (src_aligned && words > 1)
14409 {
14410 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14411 &srcoffset));
14412 src_autoinc += UNITS_PER_WORD * words;
14413 }
14414 else
14415 {
14416 for (j = 0; j < words; j++)
14417 {
14418 addr = plus_constant (Pmode, src,
14419 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14420 mem = adjust_automodify_address (srcbase, SImode, addr,
14421 srcoffset + j * UNITS_PER_WORD);
14422 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14423 }
14424 srcoffset += words * UNITS_PER_WORD;
14425 }
14426
14427 if (dst_aligned && words > 1)
14428 {
14429 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14430 &dstoffset));
14431 dst_autoinc += words * UNITS_PER_WORD;
14432 }
14433 else
14434 {
14435 for (j = 0; j < words; j++)
14436 {
14437 addr = plus_constant (Pmode, dst,
14438 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14439 mem = adjust_automodify_address (dstbase, SImode, addr,
14440 dstoffset + j * UNITS_PER_WORD);
14441 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14442 }
14443 dstoffset += words * UNITS_PER_WORD;
14444 }
14445
14446 remaining -= words * UNITS_PER_WORD;
14447
14448 gcc_assert (remaining < 4);
14449
14450 /* Copy a halfword if necessary. */
14451
14452 if (remaining >= 2)
14453 {
14454 halfword_tmp = gen_reg_rtx (SImode);
14455
14456 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14457 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14458 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14459
14460 /* Either write out immediately, or delay until we've loaded the last
14461 byte, depending on interleave factor. */
14462 if (interleave_factor == 1)
14463 {
14464 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14465 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14466 emit_insn (gen_unaligned_storehi (mem,
14467 gen_lowpart (HImode, halfword_tmp)));
14468 halfword_tmp = NULL;
14469 dstoffset += 2;
14470 }
14471
14472 remaining -= 2;
14473 srcoffset += 2;
14474 }
14475
14476 gcc_assert (remaining < 2);
14477
14478 /* Copy last byte. */
14479
14480 if ((remaining & 1) != 0)
14481 {
14482 byte_tmp = gen_reg_rtx (SImode);
14483
14484 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14485 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14486 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14487
14488 if (interleave_factor == 1)
14489 {
14490 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14491 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14492 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14493 byte_tmp = NULL;
14494 dstoffset++;
14495 }
14496
14497 remaining--;
14498 srcoffset++;
14499 }
14500
14501 /* Store last halfword if we haven't done so already. */
14502
14503 if (halfword_tmp)
14504 {
14505 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14506 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14507 emit_insn (gen_unaligned_storehi (mem,
14508 gen_lowpart (HImode, halfword_tmp)));
14509 dstoffset += 2;
14510 }
14511
14512 /* Likewise for last byte. */
14513
14514 if (byte_tmp)
14515 {
14516 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14517 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14518 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14519 dstoffset++;
14520 }
14521
14522 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14523 }
14524
14525 /* From mips_adjust_block_mem:
14526
14527 Helper function for doing a loop-based block operation on memory
14528 reference MEM. Each iteration of the loop will operate on LENGTH
14529 bytes of MEM.
14530
14531 Create a new base register for use within the loop and point it to
14532 the start of MEM. Create a new memory reference that uses this
14533 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14534
14535 static void
14536 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14537 rtx *loop_mem)
14538 {
14539 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14540
14541 /* Although the new mem does not refer to a known location,
14542 it does keep up to LENGTH bytes of alignment. */
14543 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14544 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14545 }
14546
14547 /* From mips_block_move_loop:
14548
14549 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14550 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14551 the memory regions do not overlap. */
14552
14553 static void
14554 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14555 unsigned int interleave_factor,
14556 HOST_WIDE_INT bytes_per_iter)
14557 {
14558 rtx src_reg, dest_reg, final_src, test;
14559 HOST_WIDE_INT leftover;
14560
14561 leftover = length % bytes_per_iter;
14562 length -= leftover;
14563
14564 /* Create registers and memory references for use within the loop. */
14565 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14566 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14567
14568 /* Calculate the value that SRC_REG should have after the last iteration of
14569 the loop. */
14570 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14571 0, 0, OPTAB_WIDEN);
14572
14573 /* Emit the start of the loop. */
14574 rtx_code_label *label = gen_label_rtx ();
14575 emit_label (label);
14576
14577 /* Emit the loop body. */
14578 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14579 interleave_factor);
14580
14581 /* Move on to the next block. */
14582 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14583 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14584
14585 /* Emit the loop condition. */
14586 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14587 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14588
14589 /* Mop up any left-over bytes. */
14590 if (leftover)
14591 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14592 }
14593
14594 /* Emit a block move when either the source or destination is unaligned (not
14595 aligned to a four-byte boundary). This may need further tuning depending on
14596 core type, optimize_size setting, etc. */
14597
14598 static int
14599 arm_movmemqi_unaligned (rtx *operands)
14600 {
14601 HOST_WIDE_INT length = INTVAL (operands[2]);
14602
14603 if (optimize_size)
14604 {
14605 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14606 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14607 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14608 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14609 or dst_aligned though: allow more interleaving in those cases since the
14610 resulting code can be smaller. */
14611 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14612 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14613
14614 if (length > 12)
14615 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14616 interleave_factor, bytes_per_iter);
14617 else
14618 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14619 interleave_factor);
14620 }
14621 else
14622 {
14623 /* Note that the loop created by arm_block_move_unaligned_loop may be
14624 subject to loop unrolling, which makes tuning this condition a little
14625 redundant. */
14626 if (length > 32)
14627 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14628 else
14629 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14630 }
14631
14632 return 1;
14633 }
14634
14635 int
14636 arm_gen_movmemqi (rtx *operands)
14637 {
14638 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14639 HOST_WIDE_INT srcoffset, dstoffset;
14640 int i;
14641 rtx src, dst, srcbase, dstbase;
14642 rtx part_bytes_reg = NULL;
14643 rtx mem;
14644
14645 if (!CONST_INT_P (operands[2])
14646 || !CONST_INT_P (operands[3])
14647 || INTVAL (operands[2]) > 64)
14648 return 0;
14649
14650 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14651 return arm_movmemqi_unaligned (operands);
14652
14653 if (INTVAL (operands[3]) & 3)
14654 return 0;
14655
14656 dstbase = operands[0];
14657 srcbase = operands[1];
14658
14659 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14660 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14661
14662 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14663 out_words_to_go = INTVAL (operands[2]) / 4;
14664 last_bytes = INTVAL (operands[2]) & 3;
14665 dstoffset = srcoffset = 0;
14666
14667 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14668 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14669
14670 for (i = 0; in_words_to_go >= 2; i+=4)
14671 {
14672 if (in_words_to_go > 4)
14673 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14674 TRUE, srcbase, &srcoffset));
14675 else
14676 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14677 src, FALSE, srcbase,
14678 &srcoffset));
14679
14680 if (out_words_to_go)
14681 {
14682 if (out_words_to_go > 4)
14683 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14684 TRUE, dstbase, &dstoffset));
14685 else if (out_words_to_go != 1)
14686 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14687 out_words_to_go, dst,
14688 (last_bytes == 0
14689 ? FALSE : TRUE),
14690 dstbase, &dstoffset));
14691 else
14692 {
14693 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14694 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14695 if (last_bytes != 0)
14696 {
14697 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14698 dstoffset += 4;
14699 }
14700 }
14701 }
14702
14703 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14704 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14705 }
14706
14707 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14708 if (out_words_to_go)
14709 {
14710 rtx sreg;
14711
14712 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14713 sreg = copy_to_reg (mem);
14714
14715 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14716 emit_move_insn (mem, sreg);
14717 in_words_to_go--;
14718
14719 gcc_assert (!in_words_to_go); /* Sanity check */
14720 }
14721
14722 if (in_words_to_go)
14723 {
14724 gcc_assert (in_words_to_go > 0);
14725
14726 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14727 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14728 }
14729
14730 gcc_assert (!last_bytes || part_bytes_reg);
14731
14732 if (BYTES_BIG_ENDIAN && last_bytes)
14733 {
14734 rtx tmp = gen_reg_rtx (SImode);
14735
14736 /* The bytes we want are in the top end of the word. */
14737 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14738 GEN_INT (8 * (4 - last_bytes))));
14739 part_bytes_reg = tmp;
14740
14741 while (last_bytes)
14742 {
14743 mem = adjust_automodify_address (dstbase, QImode,
14744 plus_constant (Pmode, dst,
14745 last_bytes - 1),
14746 dstoffset + last_bytes - 1);
14747 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14748
14749 if (--last_bytes)
14750 {
14751 tmp = gen_reg_rtx (SImode);
14752 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14753 part_bytes_reg = tmp;
14754 }
14755 }
14756
14757 }
14758 else
14759 {
14760 if (last_bytes > 1)
14761 {
14762 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14763 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14764 last_bytes -= 2;
14765 if (last_bytes)
14766 {
14767 rtx tmp = gen_reg_rtx (SImode);
14768 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14769 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14770 part_bytes_reg = tmp;
14771 dstoffset += 2;
14772 }
14773 }
14774
14775 if (last_bytes)
14776 {
14777 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14778 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14779 }
14780 }
14781
14782 return 1;
14783 }
14784
14785 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14786 by mode size. */
14787 inline static rtx
14788 next_consecutive_mem (rtx mem)
14789 {
14790 machine_mode mode = GET_MODE (mem);
14791 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14792 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14793
14794 return adjust_automodify_address (mem, mode, addr, offset);
14795 }
14796
14797 /* Copy using LDRD/STRD instructions whenever possible.
14798 Returns true upon success. */
14799 bool
14800 gen_movmem_ldrd_strd (rtx *operands)
14801 {
14802 unsigned HOST_WIDE_INT len;
14803 HOST_WIDE_INT align;
14804 rtx src, dst, base;
14805 rtx reg0;
14806 bool src_aligned, dst_aligned;
14807 bool src_volatile, dst_volatile;
14808
14809 gcc_assert (CONST_INT_P (operands[2]));
14810 gcc_assert (CONST_INT_P (operands[3]));
14811
14812 len = UINTVAL (operands[2]);
14813 if (len > 64)
14814 return false;
14815
14816 /* Maximum alignment we can assume for both src and dst buffers. */
14817 align = INTVAL (operands[3]);
14818
14819 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14820 return false;
14821
14822 /* Place src and dst addresses in registers
14823 and update the corresponding mem rtx. */
14824 dst = operands[0];
14825 dst_volatile = MEM_VOLATILE_P (dst);
14826 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14827 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14828 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14829
14830 src = operands[1];
14831 src_volatile = MEM_VOLATILE_P (src);
14832 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14833 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14834 src = adjust_automodify_address (src, VOIDmode, base, 0);
14835
14836 if (!unaligned_access && !(src_aligned && dst_aligned))
14837 return false;
14838
14839 if (src_volatile || dst_volatile)
14840 return false;
14841
14842 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14843 if (!(dst_aligned || src_aligned))
14844 return arm_gen_movmemqi (operands);
14845
14846 src = adjust_address (src, DImode, 0);
14847 dst = adjust_address (dst, DImode, 0);
14848 while (len >= 8)
14849 {
14850 len -= 8;
14851 reg0 = gen_reg_rtx (DImode);
14852 if (src_aligned)
14853 emit_move_insn (reg0, src);
14854 else
14855 emit_insn (gen_unaligned_loaddi (reg0, src));
14856
14857 if (dst_aligned)
14858 emit_move_insn (dst, reg0);
14859 else
14860 emit_insn (gen_unaligned_storedi (dst, reg0));
14861
14862 src = next_consecutive_mem (src);
14863 dst = next_consecutive_mem (dst);
14864 }
14865
14866 gcc_assert (len < 8);
14867 if (len >= 4)
14868 {
14869 /* More than a word but less than a double-word to copy. Copy a word. */
14870 reg0 = gen_reg_rtx (SImode);
14871 src = adjust_address (src, SImode, 0);
14872 dst = adjust_address (dst, SImode, 0);
14873 if (src_aligned)
14874 emit_move_insn (reg0, src);
14875 else
14876 emit_insn (gen_unaligned_loadsi (reg0, src));
14877
14878 if (dst_aligned)
14879 emit_move_insn (dst, reg0);
14880 else
14881 emit_insn (gen_unaligned_storesi (dst, reg0));
14882
14883 src = next_consecutive_mem (src);
14884 dst = next_consecutive_mem (dst);
14885 len -= 4;
14886 }
14887
14888 if (len == 0)
14889 return true;
14890
14891 /* Copy the remaining bytes. */
14892 if (len >= 2)
14893 {
14894 dst = adjust_address (dst, HImode, 0);
14895 src = adjust_address (src, HImode, 0);
14896 reg0 = gen_reg_rtx (SImode);
14897 if (src_aligned)
14898 emit_insn (gen_zero_extendhisi2 (reg0, src));
14899 else
14900 emit_insn (gen_unaligned_loadhiu (reg0, src));
14901
14902 if (dst_aligned)
14903 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14904 else
14905 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14906
14907 src = next_consecutive_mem (src);
14908 dst = next_consecutive_mem (dst);
14909 if (len == 2)
14910 return true;
14911 }
14912
14913 dst = adjust_address (dst, QImode, 0);
14914 src = adjust_address (src, QImode, 0);
14915 reg0 = gen_reg_rtx (QImode);
14916 emit_move_insn (reg0, src);
14917 emit_move_insn (dst, reg0);
14918 return true;
14919 }
14920
14921 /* Select a dominance comparison mode if possible for a test of the general
14922 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14923 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14924 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14925 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14926 In all cases OP will be either EQ or NE, but we don't need to know which
14927 here. If we are unable to support a dominance comparison we return
14928 CC mode. This will then fail to match for the RTL expressions that
14929 generate this call. */
14930 machine_mode
14931 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14932 {
14933 enum rtx_code cond1, cond2;
14934 int swapped = 0;
14935
14936 /* Currently we will probably get the wrong result if the individual
14937 comparisons are not simple. This also ensures that it is safe to
14938 reverse a comparison if necessary. */
14939 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14940 != CCmode)
14941 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14942 != CCmode))
14943 return CCmode;
14944
14945 /* The if_then_else variant of this tests the second condition if the
14946 first passes, but is true if the first fails. Reverse the first
14947 condition to get a true "inclusive-or" expression. */
14948 if (cond_or == DOM_CC_NX_OR_Y)
14949 cond1 = reverse_condition (cond1);
14950
14951 /* If the comparisons are not equal, and one doesn't dominate the other,
14952 then we can't do this. */
14953 if (cond1 != cond2
14954 && !comparison_dominates_p (cond1, cond2)
14955 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14956 return CCmode;
14957
14958 if (swapped)
14959 std::swap (cond1, cond2);
14960
14961 switch (cond1)
14962 {
14963 case EQ:
14964 if (cond_or == DOM_CC_X_AND_Y)
14965 return CC_DEQmode;
14966
14967 switch (cond2)
14968 {
14969 case EQ: return CC_DEQmode;
14970 case LE: return CC_DLEmode;
14971 case LEU: return CC_DLEUmode;
14972 case GE: return CC_DGEmode;
14973 case GEU: return CC_DGEUmode;
14974 default: gcc_unreachable ();
14975 }
14976
14977 case LT:
14978 if (cond_or == DOM_CC_X_AND_Y)
14979 return CC_DLTmode;
14980
14981 switch (cond2)
14982 {
14983 case LT:
14984 return CC_DLTmode;
14985 case LE:
14986 return CC_DLEmode;
14987 case NE:
14988 return CC_DNEmode;
14989 default:
14990 gcc_unreachable ();
14991 }
14992
14993 case GT:
14994 if (cond_or == DOM_CC_X_AND_Y)
14995 return CC_DGTmode;
14996
14997 switch (cond2)
14998 {
14999 case GT:
15000 return CC_DGTmode;
15001 case GE:
15002 return CC_DGEmode;
15003 case NE:
15004 return CC_DNEmode;
15005 default:
15006 gcc_unreachable ();
15007 }
15008
15009 case LTU:
15010 if (cond_or == DOM_CC_X_AND_Y)
15011 return CC_DLTUmode;
15012
15013 switch (cond2)
15014 {
15015 case LTU:
15016 return CC_DLTUmode;
15017 case LEU:
15018 return CC_DLEUmode;
15019 case NE:
15020 return CC_DNEmode;
15021 default:
15022 gcc_unreachable ();
15023 }
15024
15025 case GTU:
15026 if (cond_or == DOM_CC_X_AND_Y)
15027 return CC_DGTUmode;
15028
15029 switch (cond2)
15030 {
15031 case GTU:
15032 return CC_DGTUmode;
15033 case GEU:
15034 return CC_DGEUmode;
15035 case NE:
15036 return CC_DNEmode;
15037 default:
15038 gcc_unreachable ();
15039 }
15040
15041 /* The remaining cases only occur when both comparisons are the
15042 same. */
15043 case NE:
15044 gcc_assert (cond1 == cond2);
15045 return CC_DNEmode;
15046
15047 case LE:
15048 gcc_assert (cond1 == cond2);
15049 return CC_DLEmode;
15050
15051 case GE:
15052 gcc_assert (cond1 == cond2);
15053 return CC_DGEmode;
15054
15055 case LEU:
15056 gcc_assert (cond1 == cond2);
15057 return CC_DLEUmode;
15058
15059 case GEU:
15060 gcc_assert (cond1 == cond2);
15061 return CC_DGEUmode;
15062
15063 default:
15064 gcc_unreachable ();
15065 }
15066 }
15067
15068 machine_mode
15069 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15070 {
15071 /* All floating point compares return CCFP if it is an equality
15072 comparison, and CCFPE otherwise. */
15073 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15074 {
15075 switch (op)
15076 {
15077 case EQ:
15078 case NE:
15079 case UNORDERED:
15080 case ORDERED:
15081 case UNLT:
15082 case UNLE:
15083 case UNGT:
15084 case UNGE:
15085 case UNEQ:
15086 case LTGT:
15087 return CCFPmode;
15088
15089 case LT:
15090 case LE:
15091 case GT:
15092 case GE:
15093 return CCFPEmode;
15094
15095 default:
15096 gcc_unreachable ();
15097 }
15098 }
15099
15100 /* A compare with a shifted operand. Because of canonicalization, the
15101 comparison will have to be swapped when we emit the assembler. */
15102 if (GET_MODE (y) == SImode
15103 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15104 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15105 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15106 || GET_CODE (x) == ROTATERT))
15107 return CC_SWPmode;
15108
15109 /* This operation is performed swapped, but since we only rely on the Z
15110 flag we don't need an additional mode. */
15111 if (GET_MODE (y) == SImode
15112 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15113 && GET_CODE (x) == NEG
15114 && (op == EQ || op == NE))
15115 return CC_Zmode;
15116
15117 /* This is a special case that is used by combine to allow a
15118 comparison of a shifted byte load to be split into a zero-extend
15119 followed by a comparison of the shifted integer (only valid for
15120 equalities and unsigned inequalities). */
15121 if (GET_MODE (x) == SImode
15122 && GET_CODE (x) == ASHIFT
15123 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15124 && GET_CODE (XEXP (x, 0)) == SUBREG
15125 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15126 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15127 && (op == EQ || op == NE
15128 || op == GEU || op == GTU || op == LTU || op == LEU)
15129 && CONST_INT_P (y))
15130 return CC_Zmode;
15131
15132 /* A construct for a conditional compare, if the false arm contains
15133 0, then both conditions must be true, otherwise either condition
15134 must be true. Not all conditions are possible, so CCmode is
15135 returned if it can't be done. */
15136 if (GET_CODE (x) == IF_THEN_ELSE
15137 && (XEXP (x, 2) == const0_rtx
15138 || XEXP (x, 2) == const1_rtx)
15139 && COMPARISON_P (XEXP (x, 0))
15140 && COMPARISON_P (XEXP (x, 1)))
15141 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15142 INTVAL (XEXP (x, 2)));
15143
15144 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15145 if (GET_CODE (x) == AND
15146 && (op == EQ || op == NE)
15147 && COMPARISON_P (XEXP (x, 0))
15148 && COMPARISON_P (XEXP (x, 1)))
15149 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15150 DOM_CC_X_AND_Y);
15151
15152 if (GET_CODE (x) == IOR
15153 && (op == EQ || op == NE)
15154 && COMPARISON_P (XEXP (x, 0))
15155 && COMPARISON_P (XEXP (x, 1)))
15156 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15157 DOM_CC_X_OR_Y);
15158
15159 /* An operation (on Thumb) where we want to test for a single bit.
15160 This is done by shifting that bit up into the top bit of a
15161 scratch register; we can then branch on the sign bit. */
15162 if (TARGET_THUMB1
15163 && GET_MODE (x) == SImode
15164 && (op == EQ || op == NE)
15165 && GET_CODE (x) == ZERO_EXTRACT
15166 && XEXP (x, 1) == const1_rtx)
15167 return CC_Nmode;
15168
15169 /* An operation that sets the condition codes as a side-effect, the
15170 V flag is not set correctly, so we can only use comparisons where
15171 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15172 instead.) */
15173 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15174 if (GET_MODE (x) == SImode
15175 && y == const0_rtx
15176 && (op == EQ || op == NE || op == LT || op == GE)
15177 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15178 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15179 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15180 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15181 || GET_CODE (x) == LSHIFTRT
15182 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15183 || GET_CODE (x) == ROTATERT
15184 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15185 return CC_NOOVmode;
15186
15187 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15188 return CC_Zmode;
15189
15190 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15191 && GET_CODE (x) == PLUS
15192 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15193 return CC_Cmode;
15194
15195 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15196 {
15197 switch (op)
15198 {
15199 case EQ:
15200 case NE:
15201 /* A DImode comparison against zero can be implemented by
15202 or'ing the two halves together. */
15203 if (y == const0_rtx)
15204 return CC_Zmode;
15205
15206 /* We can do an equality test in three Thumb instructions. */
15207 if (!TARGET_32BIT)
15208 return CC_Zmode;
15209
15210 /* FALLTHROUGH */
15211
15212 case LTU:
15213 case LEU:
15214 case GTU:
15215 case GEU:
15216 /* DImode unsigned comparisons can be implemented by cmp +
15217 cmpeq without a scratch register. Not worth doing in
15218 Thumb-2. */
15219 if (TARGET_32BIT)
15220 return CC_CZmode;
15221
15222 /* FALLTHROUGH */
15223
15224 case LT:
15225 case LE:
15226 case GT:
15227 case GE:
15228 /* DImode signed and unsigned comparisons can be implemented
15229 by cmp + sbcs with a scratch register, but that does not
15230 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15231 gcc_assert (op != EQ && op != NE);
15232 return CC_NCVmode;
15233
15234 default:
15235 gcc_unreachable ();
15236 }
15237 }
15238
15239 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15240 return GET_MODE (x);
15241
15242 return CCmode;
15243 }
15244
15245 /* X and Y are two things to compare using CODE. Emit the compare insn and
15246 return the rtx for register 0 in the proper mode. FP means this is a
15247 floating point compare: I don't think that it is needed on the arm. */
15248 rtx
15249 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15250 {
15251 machine_mode mode;
15252 rtx cc_reg;
15253 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15254
15255 /* We might have X as a constant, Y as a register because of the predicates
15256 used for cmpdi. If so, force X to a register here. */
15257 if (dimode_comparison && !REG_P (x))
15258 x = force_reg (DImode, x);
15259
15260 mode = SELECT_CC_MODE (code, x, y);
15261 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15262
15263 if (dimode_comparison
15264 && mode != CC_CZmode)
15265 {
15266 rtx clobber, set;
15267
15268 /* To compare two non-zero values for equality, XOR them and
15269 then compare against zero. Not used for ARM mode; there
15270 CC_CZmode is cheaper. */
15271 if (mode == CC_Zmode && y != const0_rtx)
15272 {
15273 gcc_assert (!reload_completed);
15274 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15275 y = const0_rtx;
15276 }
15277
15278 /* A scratch register is required. */
15279 if (reload_completed)
15280 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15281 else
15282 scratch = gen_rtx_SCRATCH (SImode);
15283
15284 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15285 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15286 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15287 }
15288 else
15289 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15290
15291 return cc_reg;
15292 }
15293
15294 /* Generate a sequence of insns that will generate the correct return
15295 address mask depending on the physical architecture that the program
15296 is running on. */
15297 rtx
15298 arm_gen_return_addr_mask (void)
15299 {
15300 rtx reg = gen_reg_rtx (Pmode);
15301
15302 emit_insn (gen_return_addr_mask (reg));
15303 return reg;
15304 }
15305
15306 void
15307 arm_reload_in_hi (rtx *operands)
15308 {
15309 rtx ref = operands[1];
15310 rtx base, scratch;
15311 HOST_WIDE_INT offset = 0;
15312
15313 if (GET_CODE (ref) == SUBREG)
15314 {
15315 offset = SUBREG_BYTE (ref);
15316 ref = SUBREG_REG (ref);
15317 }
15318
15319 if (REG_P (ref))
15320 {
15321 /* We have a pseudo which has been spilt onto the stack; there
15322 are two cases here: the first where there is a simple
15323 stack-slot replacement and a second where the stack-slot is
15324 out of range, or is used as a subreg. */
15325 if (reg_equiv_mem (REGNO (ref)))
15326 {
15327 ref = reg_equiv_mem (REGNO (ref));
15328 base = find_replacement (&XEXP (ref, 0));
15329 }
15330 else
15331 /* The slot is out of range, or was dressed up in a SUBREG. */
15332 base = reg_equiv_address (REGNO (ref));
15333 }
15334 else
15335 base = find_replacement (&XEXP (ref, 0));
15336
15337 /* Handle the case where the address is too complex to be offset by 1. */
15338 if (GET_CODE (base) == MINUS
15339 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15340 {
15341 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15342
15343 emit_set_insn (base_plus, base);
15344 base = base_plus;
15345 }
15346 else if (GET_CODE (base) == PLUS)
15347 {
15348 /* The addend must be CONST_INT, or we would have dealt with it above. */
15349 HOST_WIDE_INT hi, lo;
15350
15351 offset += INTVAL (XEXP (base, 1));
15352 base = XEXP (base, 0);
15353
15354 /* Rework the address into a legal sequence of insns. */
15355 /* Valid range for lo is -4095 -> 4095 */
15356 lo = (offset >= 0
15357 ? (offset & 0xfff)
15358 : -((-offset) & 0xfff));
15359
15360 /* Corner case, if lo is the max offset then we would be out of range
15361 once we have added the additional 1 below, so bump the msb into the
15362 pre-loading insn(s). */
15363 if (lo == 4095)
15364 lo &= 0x7ff;
15365
15366 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15367 ^ (HOST_WIDE_INT) 0x80000000)
15368 - (HOST_WIDE_INT) 0x80000000);
15369
15370 gcc_assert (hi + lo == offset);
15371
15372 if (hi != 0)
15373 {
15374 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15375
15376 /* Get the base address; addsi3 knows how to handle constants
15377 that require more than one insn. */
15378 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15379 base = base_plus;
15380 offset = lo;
15381 }
15382 }
15383
15384 /* Operands[2] may overlap operands[0] (though it won't overlap
15385 operands[1]), that's why we asked for a DImode reg -- so we can
15386 use the bit that does not overlap. */
15387 if (REGNO (operands[2]) == REGNO (operands[0]))
15388 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15389 else
15390 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15391
15392 emit_insn (gen_zero_extendqisi2 (scratch,
15393 gen_rtx_MEM (QImode,
15394 plus_constant (Pmode, base,
15395 offset))));
15396 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15397 gen_rtx_MEM (QImode,
15398 plus_constant (Pmode, base,
15399 offset + 1))));
15400 if (!BYTES_BIG_ENDIAN)
15401 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15402 gen_rtx_IOR (SImode,
15403 gen_rtx_ASHIFT
15404 (SImode,
15405 gen_rtx_SUBREG (SImode, operands[0], 0),
15406 GEN_INT (8)),
15407 scratch));
15408 else
15409 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15410 gen_rtx_IOR (SImode,
15411 gen_rtx_ASHIFT (SImode, scratch,
15412 GEN_INT (8)),
15413 gen_rtx_SUBREG (SImode, operands[0], 0)));
15414 }
15415
15416 /* Handle storing a half-word to memory during reload by synthesizing as two
15417 byte stores. Take care not to clobber the input values until after we
15418 have moved them somewhere safe. This code assumes that if the DImode
15419 scratch in operands[2] overlaps either the input value or output address
15420 in some way, then that value must die in this insn (we absolutely need
15421 two scratch registers for some corner cases). */
15422 void
15423 arm_reload_out_hi (rtx *operands)
15424 {
15425 rtx ref = operands[0];
15426 rtx outval = operands[1];
15427 rtx base, scratch;
15428 HOST_WIDE_INT offset = 0;
15429
15430 if (GET_CODE (ref) == SUBREG)
15431 {
15432 offset = SUBREG_BYTE (ref);
15433 ref = SUBREG_REG (ref);
15434 }
15435
15436 if (REG_P (ref))
15437 {
15438 /* We have a pseudo which has been spilt onto the stack; there
15439 are two cases here: the first where there is a simple
15440 stack-slot replacement and a second where the stack-slot is
15441 out of range, or is used as a subreg. */
15442 if (reg_equiv_mem (REGNO (ref)))
15443 {
15444 ref = reg_equiv_mem (REGNO (ref));
15445 base = find_replacement (&XEXP (ref, 0));
15446 }
15447 else
15448 /* The slot is out of range, or was dressed up in a SUBREG. */
15449 base = reg_equiv_address (REGNO (ref));
15450 }
15451 else
15452 base = find_replacement (&XEXP (ref, 0));
15453
15454 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15455
15456 /* Handle the case where the address is too complex to be offset by 1. */
15457 if (GET_CODE (base) == MINUS
15458 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15459 {
15460 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15461
15462 /* Be careful not to destroy OUTVAL. */
15463 if (reg_overlap_mentioned_p (base_plus, outval))
15464 {
15465 /* Updating base_plus might destroy outval, see if we can
15466 swap the scratch and base_plus. */
15467 if (!reg_overlap_mentioned_p (scratch, outval))
15468 std::swap (scratch, base_plus);
15469 else
15470 {
15471 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15472
15473 /* Be conservative and copy OUTVAL into the scratch now,
15474 this should only be necessary if outval is a subreg
15475 of something larger than a word. */
15476 /* XXX Might this clobber base? I can't see how it can,
15477 since scratch is known to overlap with OUTVAL, and
15478 must be wider than a word. */
15479 emit_insn (gen_movhi (scratch_hi, outval));
15480 outval = scratch_hi;
15481 }
15482 }
15483
15484 emit_set_insn (base_plus, base);
15485 base = base_plus;
15486 }
15487 else if (GET_CODE (base) == PLUS)
15488 {
15489 /* The addend must be CONST_INT, or we would have dealt with it above. */
15490 HOST_WIDE_INT hi, lo;
15491
15492 offset += INTVAL (XEXP (base, 1));
15493 base = XEXP (base, 0);
15494
15495 /* Rework the address into a legal sequence of insns. */
15496 /* Valid range for lo is -4095 -> 4095 */
15497 lo = (offset >= 0
15498 ? (offset & 0xfff)
15499 : -((-offset) & 0xfff));
15500
15501 /* Corner case, if lo is the max offset then we would be out of range
15502 once we have added the additional 1 below, so bump the msb into the
15503 pre-loading insn(s). */
15504 if (lo == 4095)
15505 lo &= 0x7ff;
15506
15507 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15508 ^ (HOST_WIDE_INT) 0x80000000)
15509 - (HOST_WIDE_INT) 0x80000000);
15510
15511 gcc_assert (hi + lo == offset);
15512
15513 if (hi != 0)
15514 {
15515 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15516
15517 /* Be careful not to destroy OUTVAL. */
15518 if (reg_overlap_mentioned_p (base_plus, outval))
15519 {
15520 /* Updating base_plus might destroy outval, see if we
15521 can swap the scratch and base_plus. */
15522 if (!reg_overlap_mentioned_p (scratch, outval))
15523 std::swap (scratch, base_plus);
15524 else
15525 {
15526 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15527
15528 /* Be conservative and copy outval into scratch now,
15529 this should only be necessary if outval is a
15530 subreg of something larger than a word. */
15531 /* XXX Might this clobber base? I can't see how it
15532 can, since scratch is known to overlap with
15533 outval. */
15534 emit_insn (gen_movhi (scratch_hi, outval));
15535 outval = scratch_hi;
15536 }
15537 }
15538
15539 /* Get the base address; addsi3 knows how to handle constants
15540 that require more than one insn. */
15541 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15542 base = base_plus;
15543 offset = lo;
15544 }
15545 }
15546
15547 if (BYTES_BIG_ENDIAN)
15548 {
15549 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15550 plus_constant (Pmode, base,
15551 offset + 1)),
15552 gen_lowpart (QImode, outval)));
15553 emit_insn (gen_lshrsi3 (scratch,
15554 gen_rtx_SUBREG (SImode, outval, 0),
15555 GEN_INT (8)));
15556 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15557 offset)),
15558 gen_lowpart (QImode, scratch)));
15559 }
15560 else
15561 {
15562 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15563 offset)),
15564 gen_lowpart (QImode, outval)));
15565 emit_insn (gen_lshrsi3 (scratch,
15566 gen_rtx_SUBREG (SImode, outval, 0),
15567 GEN_INT (8)));
15568 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15569 plus_constant (Pmode, base,
15570 offset + 1)),
15571 gen_lowpart (QImode, scratch)));
15572 }
15573 }
15574
15575 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15576 (padded to the size of a word) should be passed in a register. */
15577
15578 static bool
15579 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15580 {
15581 if (TARGET_AAPCS_BASED)
15582 return must_pass_in_stack_var_size (mode, type);
15583 else
15584 return must_pass_in_stack_var_size_or_pad (mode, type);
15585 }
15586
15587
15588 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15589 Return true if an argument passed on the stack should be padded upwards,
15590 i.e. if the least-significant byte has useful data.
15591 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15592 aggregate types are placed in the lowest memory address. */
15593
15594 bool
15595 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15596 {
15597 if (!TARGET_AAPCS_BASED)
15598 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15599
15600 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15601 return false;
15602
15603 return true;
15604 }
15605
15606
15607 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15608 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15609 register has useful data, and return the opposite if the most
15610 significant byte does. */
15611
15612 bool
15613 arm_pad_reg_upward (machine_mode mode,
15614 tree type, int first ATTRIBUTE_UNUSED)
15615 {
15616 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15617 {
15618 /* For AAPCS, small aggregates, small fixed-point types,
15619 and small complex types are always padded upwards. */
15620 if (type)
15621 {
15622 if ((AGGREGATE_TYPE_P (type)
15623 || TREE_CODE (type) == COMPLEX_TYPE
15624 || FIXED_POINT_TYPE_P (type))
15625 && int_size_in_bytes (type) <= 4)
15626 return true;
15627 }
15628 else
15629 {
15630 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15631 && GET_MODE_SIZE (mode) <= 4)
15632 return true;
15633 }
15634 }
15635
15636 /* Otherwise, use default padding. */
15637 return !BYTES_BIG_ENDIAN;
15638 }
15639
15640 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15641 assuming that the address in the base register is word aligned. */
15642 bool
15643 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15644 {
15645 HOST_WIDE_INT max_offset;
15646
15647 /* Offset must be a multiple of 4 in Thumb mode. */
15648 if (TARGET_THUMB2 && ((offset & 3) != 0))
15649 return false;
15650
15651 if (TARGET_THUMB2)
15652 max_offset = 1020;
15653 else if (TARGET_ARM)
15654 max_offset = 255;
15655 else
15656 return false;
15657
15658 return ((offset <= max_offset) && (offset >= -max_offset));
15659 }
15660
15661 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15662 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15663 Assumes that the address in the base register RN is word aligned. Pattern
15664 guarantees that both memory accesses use the same base register,
15665 the offsets are constants within the range, and the gap between the offsets is 4.
15666 If preload complete then check that registers are legal. WBACK indicates whether
15667 address is updated. LOAD indicates whether memory access is load or store. */
15668 bool
15669 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15670 bool wback, bool load)
15671 {
15672 unsigned int t, t2, n;
15673
15674 if (!reload_completed)
15675 return true;
15676
15677 if (!offset_ok_for_ldrd_strd (offset))
15678 return false;
15679
15680 t = REGNO (rt);
15681 t2 = REGNO (rt2);
15682 n = REGNO (rn);
15683
15684 if ((TARGET_THUMB2)
15685 && ((wback && (n == t || n == t2))
15686 || (t == SP_REGNUM)
15687 || (t == PC_REGNUM)
15688 || (t2 == SP_REGNUM)
15689 || (t2 == PC_REGNUM)
15690 || (!load && (n == PC_REGNUM))
15691 || (load && (t == t2))
15692 /* Triggers Cortex-M3 LDRD errata. */
15693 || (!wback && load && fix_cm3_ldrd && (n == t))))
15694 return false;
15695
15696 if ((TARGET_ARM)
15697 && ((wback && (n == t || n == t2))
15698 || (t2 == PC_REGNUM)
15699 || (t % 2 != 0) /* First destination register is not even. */
15700 || (t2 != t + 1)
15701 /* PC can be used as base register (for offset addressing only),
15702 but it is depricated. */
15703 || (n == PC_REGNUM)))
15704 return false;
15705
15706 return true;
15707 }
15708
15709 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15710 operand MEM's address contains an immediate offset from the base
15711 register and has no side effects, in which case it sets BASE and
15712 OFFSET accordingly. */
15713 static bool
15714 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15715 {
15716 rtx addr;
15717
15718 gcc_assert (base != NULL && offset != NULL);
15719
15720 /* TODO: Handle more general memory operand patterns, such as
15721 PRE_DEC and PRE_INC. */
15722
15723 if (side_effects_p (mem))
15724 return false;
15725
15726 /* Can't deal with subregs. */
15727 if (GET_CODE (mem) == SUBREG)
15728 return false;
15729
15730 gcc_assert (MEM_P (mem));
15731
15732 *offset = const0_rtx;
15733
15734 addr = XEXP (mem, 0);
15735
15736 /* If addr isn't valid for DImode, then we can't handle it. */
15737 if (!arm_legitimate_address_p (DImode, addr,
15738 reload_in_progress || reload_completed))
15739 return false;
15740
15741 if (REG_P (addr))
15742 {
15743 *base = addr;
15744 return true;
15745 }
15746 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15747 {
15748 *base = XEXP (addr, 0);
15749 *offset = XEXP (addr, 1);
15750 return (REG_P (*base) && CONST_INT_P (*offset));
15751 }
15752
15753 return false;
15754 }
15755
15756 /* Called from a peephole2 to replace two word-size accesses with a
15757 single LDRD/STRD instruction. Returns true iff we can generate a
15758 new instruction sequence. That is, both accesses use the same base
15759 register and the gap between constant offsets is 4. This function
15760 may reorder its operands to match ldrd/strd RTL templates.
15761 OPERANDS are the operands found by the peephole matcher;
15762 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15763 corresponding memory operands. LOAD indicaates whether the access
15764 is load or store. CONST_STORE indicates a store of constant
15765 integer values held in OPERANDS[4,5] and assumes that the pattern
15766 is of length 4 insn, for the purpose of checking dead registers.
15767 COMMUTE indicates that register operands may be reordered. */
15768 bool
15769 gen_operands_ldrd_strd (rtx *operands, bool load,
15770 bool const_store, bool commute)
15771 {
15772 int nops = 2;
15773 HOST_WIDE_INT offsets[2], offset;
15774 rtx base = NULL_RTX;
15775 rtx cur_base, cur_offset, tmp;
15776 int i, gap;
15777 HARD_REG_SET regset;
15778
15779 gcc_assert (!const_store || !load);
15780 /* Check that the memory references are immediate offsets from the
15781 same base register. Extract the base register, the destination
15782 registers, and the corresponding memory offsets. */
15783 for (i = 0; i < nops; i++)
15784 {
15785 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15786 return false;
15787
15788 if (i == 0)
15789 base = cur_base;
15790 else if (REGNO (base) != REGNO (cur_base))
15791 return false;
15792
15793 offsets[i] = INTVAL (cur_offset);
15794 if (GET_CODE (operands[i]) == SUBREG)
15795 {
15796 tmp = SUBREG_REG (operands[i]);
15797 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15798 operands[i] = tmp;
15799 }
15800 }
15801
15802 /* Make sure there is no dependency between the individual loads. */
15803 if (load && REGNO (operands[0]) == REGNO (base))
15804 return false; /* RAW */
15805
15806 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15807 return false; /* WAW */
15808
15809 /* If the same input register is used in both stores
15810 when storing different constants, try to find a free register.
15811 For example, the code
15812 mov r0, 0
15813 str r0, [r2]
15814 mov r0, 1
15815 str r0, [r2, #4]
15816 can be transformed into
15817 mov r1, 0
15818 strd r1, r0, [r2]
15819 in Thumb mode assuming that r1 is free. */
15820 if (const_store
15821 && REGNO (operands[0]) == REGNO (operands[1])
15822 && INTVAL (operands[4]) != INTVAL (operands[5]))
15823 {
15824 if (TARGET_THUMB2)
15825 {
15826 CLEAR_HARD_REG_SET (regset);
15827 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15828 if (tmp == NULL_RTX)
15829 return false;
15830
15831 /* Use the new register in the first load to ensure that
15832 if the original input register is not dead after peephole,
15833 then it will have the correct constant value. */
15834 operands[0] = tmp;
15835 }
15836 else if (TARGET_ARM)
15837 {
15838 return false;
15839 int regno = REGNO (operands[0]);
15840 if (!peep2_reg_dead_p (4, operands[0]))
15841 {
15842 /* When the input register is even and is not dead after the
15843 pattern, it has to hold the second constant but we cannot
15844 form a legal STRD in ARM mode with this register as the second
15845 register. */
15846 if (regno % 2 == 0)
15847 return false;
15848
15849 /* Is regno-1 free? */
15850 SET_HARD_REG_SET (regset);
15851 CLEAR_HARD_REG_BIT(regset, regno - 1);
15852 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15853 if (tmp == NULL_RTX)
15854 return false;
15855
15856 operands[0] = tmp;
15857 }
15858 else
15859 {
15860 /* Find a DImode register. */
15861 CLEAR_HARD_REG_SET (regset);
15862 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15863 if (tmp != NULL_RTX)
15864 {
15865 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15866 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15867 }
15868 else
15869 {
15870 /* Can we use the input register to form a DI register? */
15871 SET_HARD_REG_SET (regset);
15872 CLEAR_HARD_REG_BIT(regset,
15873 regno % 2 == 0 ? regno + 1 : regno - 1);
15874 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15875 if (tmp == NULL_RTX)
15876 return false;
15877 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15878 }
15879 }
15880
15881 gcc_assert (operands[0] != NULL_RTX);
15882 gcc_assert (operands[1] != NULL_RTX);
15883 gcc_assert (REGNO (operands[0]) % 2 == 0);
15884 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15885 }
15886 }
15887
15888 /* Make sure the instructions are ordered with lower memory access first. */
15889 if (offsets[0] > offsets[1])
15890 {
15891 gap = offsets[0] - offsets[1];
15892 offset = offsets[1];
15893
15894 /* Swap the instructions such that lower memory is accessed first. */
15895 std::swap (operands[0], operands[1]);
15896 std::swap (operands[2], operands[3]);
15897 if (const_store)
15898 std::swap (operands[4], operands[5]);
15899 }
15900 else
15901 {
15902 gap = offsets[1] - offsets[0];
15903 offset = offsets[0];
15904 }
15905
15906 /* Make sure accesses are to consecutive memory locations. */
15907 if (gap != 4)
15908 return false;
15909
15910 /* Make sure we generate legal instructions. */
15911 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15912 false, load))
15913 return true;
15914
15915 /* In Thumb state, where registers are almost unconstrained, there
15916 is little hope to fix it. */
15917 if (TARGET_THUMB2)
15918 return false;
15919
15920 if (load && commute)
15921 {
15922 /* Try reordering registers. */
15923 std::swap (operands[0], operands[1]);
15924 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15925 false, load))
15926 return true;
15927 }
15928
15929 if (const_store)
15930 {
15931 /* If input registers are dead after this pattern, they can be
15932 reordered or replaced by other registers that are free in the
15933 current pattern. */
15934 if (!peep2_reg_dead_p (4, operands[0])
15935 || !peep2_reg_dead_p (4, operands[1]))
15936 return false;
15937
15938 /* Try to reorder the input registers. */
15939 /* For example, the code
15940 mov r0, 0
15941 mov r1, 1
15942 str r1, [r2]
15943 str r0, [r2, #4]
15944 can be transformed into
15945 mov r1, 0
15946 mov r0, 1
15947 strd r0, [r2]
15948 */
15949 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15950 false, false))
15951 {
15952 std::swap (operands[0], operands[1]);
15953 return true;
15954 }
15955
15956 /* Try to find a free DI register. */
15957 CLEAR_HARD_REG_SET (regset);
15958 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15959 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15960 while (true)
15961 {
15962 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15963 if (tmp == NULL_RTX)
15964 return false;
15965
15966 /* DREG must be an even-numbered register in DImode.
15967 Split it into SI registers. */
15968 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15969 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15970 gcc_assert (operands[0] != NULL_RTX);
15971 gcc_assert (operands[1] != NULL_RTX);
15972 gcc_assert (REGNO (operands[0]) % 2 == 0);
15973 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15974
15975 return (operands_ok_ldrd_strd (operands[0], operands[1],
15976 base, offset,
15977 false, load));
15978 }
15979 }
15980
15981 return false;
15982 }
15983
15984
15985
15986 \f
15987 /* Print a symbolic form of X to the debug file, F. */
15988 static void
15989 arm_print_value (FILE *f, rtx x)
15990 {
15991 switch (GET_CODE (x))
15992 {
15993 case CONST_INT:
15994 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15995 return;
15996
15997 case CONST_DOUBLE:
15998 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15999 return;
16000
16001 case CONST_VECTOR:
16002 {
16003 int i;
16004
16005 fprintf (f, "<");
16006 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16007 {
16008 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16009 if (i < (CONST_VECTOR_NUNITS (x) - 1))
16010 fputc (',', f);
16011 }
16012 fprintf (f, ">");
16013 }
16014 return;
16015
16016 case CONST_STRING:
16017 fprintf (f, "\"%s\"", XSTR (x, 0));
16018 return;
16019
16020 case SYMBOL_REF:
16021 fprintf (f, "`%s'", XSTR (x, 0));
16022 return;
16023
16024 case LABEL_REF:
16025 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16026 return;
16027
16028 case CONST:
16029 arm_print_value (f, XEXP (x, 0));
16030 return;
16031
16032 case PLUS:
16033 arm_print_value (f, XEXP (x, 0));
16034 fprintf (f, "+");
16035 arm_print_value (f, XEXP (x, 1));
16036 return;
16037
16038 case PC:
16039 fprintf (f, "pc");
16040 return;
16041
16042 default:
16043 fprintf (f, "????");
16044 return;
16045 }
16046 }
16047 \f
16048 /* Routines for manipulation of the constant pool. */
16049
16050 /* Arm instructions cannot load a large constant directly into a
16051 register; they have to come from a pc relative load. The constant
16052 must therefore be placed in the addressable range of the pc
16053 relative load. Depending on the precise pc relative load
16054 instruction the range is somewhere between 256 bytes and 4k. This
16055 means that we often have to dump a constant inside a function, and
16056 generate code to branch around it.
16057
16058 It is important to minimize this, since the branches will slow
16059 things down and make the code larger.
16060
16061 Normally we can hide the table after an existing unconditional
16062 branch so that there is no interruption of the flow, but in the
16063 worst case the code looks like this:
16064
16065 ldr rn, L1
16066 ...
16067 b L2
16068 align
16069 L1: .long value
16070 L2:
16071 ...
16072
16073 ldr rn, L3
16074 ...
16075 b L4
16076 align
16077 L3: .long value
16078 L4:
16079 ...
16080
16081 We fix this by performing a scan after scheduling, which notices
16082 which instructions need to have their operands fetched from the
16083 constant table and builds the table.
16084
16085 The algorithm starts by building a table of all the constants that
16086 need fixing up and all the natural barriers in the function (places
16087 where a constant table can be dropped without breaking the flow).
16088 For each fixup we note how far the pc-relative replacement will be
16089 able to reach and the offset of the instruction into the function.
16090
16091 Having built the table we then group the fixes together to form
16092 tables that are as large as possible (subject to addressing
16093 constraints) and emit each table of constants after the last
16094 barrier that is within range of all the instructions in the group.
16095 If a group does not contain a barrier, then we forcibly create one
16096 by inserting a jump instruction into the flow. Once the table has
16097 been inserted, the insns are then modified to reference the
16098 relevant entry in the pool.
16099
16100 Possible enhancements to the algorithm (not implemented) are:
16101
16102 1) For some processors and object formats, there may be benefit in
16103 aligning the pools to the start of cache lines; this alignment
16104 would need to be taken into account when calculating addressability
16105 of a pool. */
16106
16107 /* These typedefs are located at the start of this file, so that
16108 they can be used in the prototypes there. This comment is to
16109 remind readers of that fact so that the following structures
16110 can be understood more easily.
16111
16112 typedef struct minipool_node Mnode;
16113 typedef struct minipool_fixup Mfix; */
16114
16115 struct minipool_node
16116 {
16117 /* Doubly linked chain of entries. */
16118 Mnode * next;
16119 Mnode * prev;
16120 /* The maximum offset into the code that this entry can be placed. While
16121 pushing fixes for forward references, all entries are sorted in order
16122 of increasing max_address. */
16123 HOST_WIDE_INT max_address;
16124 /* Similarly for an entry inserted for a backwards ref. */
16125 HOST_WIDE_INT min_address;
16126 /* The number of fixes referencing this entry. This can become zero
16127 if we "unpush" an entry. In this case we ignore the entry when we
16128 come to emit the code. */
16129 int refcount;
16130 /* The offset from the start of the minipool. */
16131 HOST_WIDE_INT offset;
16132 /* The value in table. */
16133 rtx value;
16134 /* The mode of value. */
16135 machine_mode mode;
16136 /* The size of the value. With iWMMXt enabled
16137 sizes > 4 also imply an alignment of 8-bytes. */
16138 int fix_size;
16139 };
16140
16141 struct minipool_fixup
16142 {
16143 Mfix * next;
16144 rtx_insn * insn;
16145 HOST_WIDE_INT address;
16146 rtx * loc;
16147 machine_mode mode;
16148 int fix_size;
16149 rtx value;
16150 Mnode * minipool;
16151 HOST_WIDE_INT forwards;
16152 HOST_WIDE_INT backwards;
16153 };
16154
16155 /* Fixes less than a word need padding out to a word boundary. */
16156 #define MINIPOOL_FIX_SIZE(mode) \
16157 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16158
16159 static Mnode * minipool_vector_head;
16160 static Mnode * minipool_vector_tail;
16161 static rtx_code_label *minipool_vector_label;
16162 static int minipool_pad;
16163
16164 /* The linked list of all minipool fixes required for this function. */
16165 Mfix * minipool_fix_head;
16166 Mfix * minipool_fix_tail;
16167 /* The fix entry for the current minipool, once it has been placed. */
16168 Mfix * minipool_barrier;
16169
16170 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16171 #define JUMP_TABLES_IN_TEXT_SECTION 0
16172 #endif
16173
16174 static HOST_WIDE_INT
16175 get_jump_table_size (rtx_jump_table_data *insn)
16176 {
16177 /* ADDR_VECs only take room if read-only data does into the text
16178 section. */
16179 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16180 {
16181 rtx body = PATTERN (insn);
16182 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16183 HOST_WIDE_INT size;
16184 HOST_WIDE_INT modesize;
16185
16186 modesize = GET_MODE_SIZE (GET_MODE (body));
16187 size = modesize * XVECLEN (body, elt);
16188 switch (modesize)
16189 {
16190 case 1:
16191 /* Round up size of TBB table to a halfword boundary. */
16192 size = (size + 1) & ~(HOST_WIDE_INT)1;
16193 break;
16194 case 2:
16195 /* No padding necessary for TBH. */
16196 break;
16197 case 4:
16198 /* Add two bytes for alignment on Thumb. */
16199 if (TARGET_THUMB)
16200 size += 2;
16201 break;
16202 default:
16203 gcc_unreachable ();
16204 }
16205 return size;
16206 }
16207
16208 return 0;
16209 }
16210
16211 /* Return the maximum amount of padding that will be inserted before
16212 label LABEL. */
16213
16214 static HOST_WIDE_INT
16215 get_label_padding (rtx label)
16216 {
16217 HOST_WIDE_INT align, min_insn_size;
16218
16219 align = 1 << label_to_alignment (label);
16220 min_insn_size = TARGET_THUMB ? 2 : 4;
16221 return align > min_insn_size ? align - min_insn_size : 0;
16222 }
16223
16224 /* Move a minipool fix MP from its current location to before MAX_MP.
16225 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16226 constraints may need updating. */
16227 static Mnode *
16228 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16229 HOST_WIDE_INT max_address)
16230 {
16231 /* The code below assumes these are different. */
16232 gcc_assert (mp != max_mp);
16233
16234 if (max_mp == NULL)
16235 {
16236 if (max_address < mp->max_address)
16237 mp->max_address = max_address;
16238 }
16239 else
16240 {
16241 if (max_address > max_mp->max_address - mp->fix_size)
16242 mp->max_address = max_mp->max_address - mp->fix_size;
16243 else
16244 mp->max_address = max_address;
16245
16246 /* Unlink MP from its current position. Since max_mp is non-null,
16247 mp->prev must be non-null. */
16248 mp->prev->next = mp->next;
16249 if (mp->next != NULL)
16250 mp->next->prev = mp->prev;
16251 else
16252 minipool_vector_tail = mp->prev;
16253
16254 /* Re-insert it before MAX_MP. */
16255 mp->next = max_mp;
16256 mp->prev = max_mp->prev;
16257 max_mp->prev = mp;
16258
16259 if (mp->prev != NULL)
16260 mp->prev->next = mp;
16261 else
16262 minipool_vector_head = mp;
16263 }
16264
16265 /* Save the new entry. */
16266 max_mp = mp;
16267
16268 /* Scan over the preceding entries and adjust their addresses as
16269 required. */
16270 while (mp->prev != NULL
16271 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16272 {
16273 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16274 mp = mp->prev;
16275 }
16276
16277 return max_mp;
16278 }
16279
16280 /* Add a constant to the minipool for a forward reference. Returns the
16281 node added or NULL if the constant will not fit in this pool. */
16282 static Mnode *
16283 add_minipool_forward_ref (Mfix *fix)
16284 {
16285 /* If set, max_mp is the first pool_entry that has a lower
16286 constraint than the one we are trying to add. */
16287 Mnode * max_mp = NULL;
16288 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16289 Mnode * mp;
16290
16291 /* If the minipool starts before the end of FIX->INSN then this FIX
16292 can not be placed into the current pool. Furthermore, adding the
16293 new constant pool entry may cause the pool to start FIX_SIZE bytes
16294 earlier. */
16295 if (minipool_vector_head &&
16296 (fix->address + get_attr_length (fix->insn)
16297 >= minipool_vector_head->max_address - fix->fix_size))
16298 return NULL;
16299
16300 /* Scan the pool to see if a constant with the same value has
16301 already been added. While we are doing this, also note the
16302 location where we must insert the constant if it doesn't already
16303 exist. */
16304 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16305 {
16306 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16307 && fix->mode == mp->mode
16308 && (!LABEL_P (fix->value)
16309 || (CODE_LABEL_NUMBER (fix->value)
16310 == CODE_LABEL_NUMBER (mp->value)))
16311 && rtx_equal_p (fix->value, mp->value))
16312 {
16313 /* More than one fix references this entry. */
16314 mp->refcount++;
16315 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16316 }
16317
16318 /* Note the insertion point if necessary. */
16319 if (max_mp == NULL
16320 && mp->max_address > max_address)
16321 max_mp = mp;
16322
16323 /* If we are inserting an 8-bytes aligned quantity and
16324 we have not already found an insertion point, then
16325 make sure that all such 8-byte aligned quantities are
16326 placed at the start of the pool. */
16327 if (ARM_DOUBLEWORD_ALIGN
16328 && max_mp == NULL
16329 && fix->fix_size >= 8
16330 && mp->fix_size < 8)
16331 {
16332 max_mp = mp;
16333 max_address = mp->max_address;
16334 }
16335 }
16336
16337 /* The value is not currently in the minipool, so we need to create
16338 a new entry for it. If MAX_MP is NULL, the entry will be put on
16339 the end of the list since the placement is less constrained than
16340 any existing entry. Otherwise, we insert the new fix before
16341 MAX_MP and, if necessary, adjust the constraints on the other
16342 entries. */
16343 mp = XNEW (Mnode);
16344 mp->fix_size = fix->fix_size;
16345 mp->mode = fix->mode;
16346 mp->value = fix->value;
16347 mp->refcount = 1;
16348 /* Not yet required for a backwards ref. */
16349 mp->min_address = -65536;
16350
16351 if (max_mp == NULL)
16352 {
16353 mp->max_address = max_address;
16354 mp->next = NULL;
16355 mp->prev = minipool_vector_tail;
16356
16357 if (mp->prev == NULL)
16358 {
16359 minipool_vector_head = mp;
16360 minipool_vector_label = gen_label_rtx ();
16361 }
16362 else
16363 mp->prev->next = mp;
16364
16365 minipool_vector_tail = mp;
16366 }
16367 else
16368 {
16369 if (max_address > max_mp->max_address - mp->fix_size)
16370 mp->max_address = max_mp->max_address - mp->fix_size;
16371 else
16372 mp->max_address = max_address;
16373
16374 mp->next = max_mp;
16375 mp->prev = max_mp->prev;
16376 max_mp->prev = mp;
16377 if (mp->prev != NULL)
16378 mp->prev->next = mp;
16379 else
16380 minipool_vector_head = mp;
16381 }
16382
16383 /* Save the new entry. */
16384 max_mp = mp;
16385
16386 /* Scan over the preceding entries and adjust their addresses as
16387 required. */
16388 while (mp->prev != NULL
16389 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16390 {
16391 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16392 mp = mp->prev;
16393 }
16394
16395 return max_mp;
16396 }
16397
16398 static Mnode *
16399 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16400 HOST_WIDE_INT min_address)
16401 {
16402 HOST_WIDE_INT offset;
16403
16404 /* The code below assumes these are different. */
16405 gcc_assert (mp != min_mp);
16406
16407 if (min_mp == NULL)
16408 {
16409 if (min_address > mp->min_address)
16410 mp->min_address = min_address;
16411 }
16412 else
16413 {
16414 /* We will adjust this below if it is too loose. */
16415 mp->min_address = min_address;
16416
16417 /* Unlink MP from its current position. Since min_mp is non-null,
16418 mp->next must be non-null. */
16419 mp->next->prev = mp->prev;
16420 if (mp->prev != NULL)
16421 mp->prev->next = mp->next;
16422 else
16423 minipool_vector_head = mp->next;
16424
16425 /* Reinsert it after MIN_MP. */
16426 mp->prev = min_mp;
16427 mp->next = min_mp->next;
16428 min_mp->next = mp;
16429 if (mp->next != NULL)
16430 mp->next->prev = mp;
16431 else
16432 minipool_vector_tail = mp;
16433 }
16434
16435 min_mp = mp;
16436
16437 offset = 0;
16438 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16439 {
16440 mp->offset = offset;
16441 if (mp->refcount > 0)
16442 offset += mp->fix_size;
16443
16444 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16445 mp->next->min_address = mp->min_address + mp->fix_size;
16446 }
16447
16448 return min_mp;
16449 }
16450
16451 /* Add a constant to the minipool for a backward reference. Returns the
16452 node added or NULL if the constant will not fit in this pool.
16453
16454 Note that the code for insertion for a backwards reference can be
16455 somewhat confusing because the calculated offsets for each fix do
16456 not take into account the size of the pool (which is still under
16457 construction. */
16458 static Mnode *
16459 add_minipool_backward_ref (Mfix *fix)
16460 {
16461 /* If set, min_mp is the last pool_entry that has a lower constraint
16462 than the one we are trying to add. */
16463 Mnode *min_mp = NULL;
16464 /* This can be negative, since it is only a constraint. */
16465 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16466 Mnode *mp;
16467
16468 /* If we can't reach the current pool from this insn, or if we can't
16469 insert this entry at the end of the pool without pushing other
16470 fixes out of range, then we don't try. This ensures that we
16471 can't fail later on. */
16472 if (min_address >= minipool_barrier->address
16473 || (minipool_vector_tail->min_address + fix->fix_size
16474 >= minipool_barrier->address))
16475 return NULL;
16476
16477 /* Scan the pool to see if a constant with the same value has
16478 already been added. While we are doing this, also note the
16479 location where we must insert the constant if it doesn't already
16480 exist. */
16481 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16482 {
16483 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16484 && fix->mode == mp->mode
16485 && (!LABEL_P (fix->value)
16486 || (CODE_LABEL_NUMBER (fix->value)
16487 == CODE_LABEL_NUMBER (mp->value)))
16488 && rtx_equal_p (fix->value, mp->value)
16489 /* Check that there is enough slack to move this entry to the
16490 end of the table (this is conservative). */
16491 && (mp->max_address
16492 > (minipool_barrier->address
16493 + minipool_vector_tail->offset
16494 + minipool_vector_tail->fix_size)))
16495 {
16496 mp->refcount++;
16497 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16498 }
16499
16500 if (min_mp != NULL)
16501 mp->min_address += fix->fix_size;
16502 else
16503 {
16504 /* Note the insertion point if necessary. */
16505 if (mp->min_address < min_address)
16506 {
16507 /* For now, we do not allow the insertion of 8-byte alignment
16508 requiring nodes anywhere but at the start of the pool. */
16509 if (ARM_DOUBLEWORD_ALIGN
16510 && fix->fix_size >= 8 && mp->fix_size < 8)
16511 return NULL;
16512 else
16513 min_mp = mp;
16514 }
16515 else if (mp->max_address
16516 < minipool_barrier->address + mp->offset + fix->fix_size)
16517 {
16518 /* Inserting before this entry would push the fix beyond
16519 its maximum address (which can happen if we have
16520 re-located a forwards fix); force the new fix to come
16521 after it. */
16522 if (ARM_DOUBLEWORD_ALIGN
16523 && fix->fix_size >= 8 && mp->fix_size < 8)
16524 return NULL;
16525 else
16526 {
16527 min_mp = mp;
16528 min_address = mp->min_address + fix->fix_size;
16529 }
16530 }
16531 /* Do not insert a non-8-byte aligned quantity before 8-byte
16532 aligned quantities. */
16533 else if (ARM_DOUBLEWORD_ALIGN
16534 && fix->fix_size < 8
16535 && mp->fix_size >= 8)
16536 {
16537 min_mp = mp;
16538 min_address = mp->min_address + fix->fix_size;
16539 }
16540 }
16541 }
16542
16543 /* We need to create a new entry. */
16544 mp = XNEW (Mnode);
16545 mp->fix_size = fix->fix_size;
16546 mp->mode = fix->mode;
16547 mp->value = fix->value;
16548 mp->refcount = 1;
16549 mp->max_address = minipool_barrier->address + 65536;
16550
16551 mp->min_address = min_address;
16552
16553 if (min_mp == NULL)
16554 {
16555 mp->prev = NULL;
16556 mp->next = minipool_vector_head;
16557
16558 if (mp->next == NULL)
16559 {
16560 minipool_vector_tail = mp;
16561 minipool_vector_label = gen_label_rtx ();
16562 }
16563 else
16564 mp->next->prev = mp;
16565
16566 minipool_vector_head = mp;
16567 }
16568 else
16569 {
16570 mp->next = min_mp->next;
16571 mp->prev = min_mp;
16572 min_mp->next = mp;
16573
16574 if (mp->next != NULL)
16575 mp->next->prev = mp;
16576 else
16577 minipool_vector_tail = mp;
16578 }
16579
16580 /* Save the new entry. */
16581 min_mp = mp;
16582
16583 if (mp->prev)
16584 mp = mp->prev;
16585 else
16586 mp->offset = 0;
16587
16588 /* Scan over the following entries and adjust their offsets. */
16589 while (mp->next != NULL)
16590 {
16591 if (mp->next->min_address < mp->min_address + mp->fix_size)
16592 mp->next->min_address = mp->min_address + mp->fix_size;
16593
16594 if (mp->refcount)
16595 mp->next->offset = mp->offset + mp->fix_size;
16596 else
16597 mp->next->offset = mp->offset;
16598
16599 mp = mp->next;
16600 }
16601
16602 return min_mp;
16603 }
16604
16605 static void
16606 assign_minipool_offsets (Mfix *barrier)
16607 {
16608 HOST_WIDE_INT offset = 0;
16609 Mnode *mp;
16610
16611 minipool_barrier = barrier;
16612
16613 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16614 {
16615 mp->offset = offset;
16616
16617 if (mp->refcount > 0)
16618 offset += mp->fix_size;
16619 }
16620 }
16621
16622 /* Output the literal table */
16623 static void
16624 dump_minipool (rtx_insn *scan)
16625 {
16626 Mnode * mp;
16627 Mnode * nmp;
16628 int align64 = 0;
16629
16630 if (ARM_DOUBLEWORD_ALIGN)
16631 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16632 if (mp->refcount > 0 && mp->fix_size >= 8)
16633 {
16634 align64 = 1;
16635 break;
16636 }
16637
16638 if (dump_file)
16639 fprintf (dump_file,
16640 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16641 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16642
16643 scan = emit_label_after (gen_label_rtx (), scan);
16644 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16645 scan = emit_label_after (minipool_vector_label, scan);
16646
16647 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16648 {
16649 if (mp->refcount > 0)
16650 {
16651 if (dump_file)
16652 {
16653 fprintf (dump_file,
16654 ";; Offset %u, min %ld, max %ld ",
16655 (unsigned) mp->offset, (unsigned long) mp->min_address,
16656 (unsigned long) mp->max_address);
16657 arm_print_value (dump_file, mp->value);
16658 fputc ('\n', dump_file);
16659 }
16660
16661 switch (GET_MODE_SIZE (mp->mode))
16662 {
16663 #ifdef HAVE_consttable_1
16664 case 1:
16665 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16666 break;
16667
16668 #endif
16669 #ifdef HAVE_consttable_2
16670 case 2:
16671 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16672 break;
16673
16674 #endif
16675 #ifdef HAVE_consttable_4
16676 case 4:
16677 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16678 break;
16679
16680 #endif
16681 #ifdef HAVE_consttable_8
16682 case 8:
16683 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16684 break;
16685
16686 #endif
16687 #ifdef HAVE_consttable_16
16688 case 16:
16689 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16690 break;
16691
16692 #endif
16693 default:
16694 gcc_unreachable ();
16695 }
16696 }
16697
16698 nmp = mp->next;
16699 free (mp);
16700 }
16701
16702 minipool_vector_head = minipool_vector_tail = NULL;
16703 scan = emit_insn_after (gen_consttable_end (), scan);
16704 scan = emit_barrier_after (scan);
16705 }
16706
16707 /* Return the cost of forcibly inserting a barrier after INSN. */
16708 static int
16709 arm_barrier_cost (rtx_insn *insn)
16710 {
16711 /* Basing the location of the pool on the loop depth is preferable,
16712 but at the moment, the basic block information seems to be
16713 corrupt by this stage of the compilation. */
16714 int base_cost = 50;
16715 rtx_insn *next = next_nonnote_insn (insn);
16716
16717 if (next != NULL && LABEL_P (next))
16718 base_cost -= 20;
16719
16720 switch (GET_CODE (insn))
16721 {
16722 case CODE_LABEL:
16723 /* It will always be better to place the table before the label, rather
16724 than after it. */
16725 return 50;
16726
16727 case INSN:
16728 case CALL_INSN:
16729 return base_cost;
16730
16731 case JUMP_INSN:
16732 return base_cost - 10;
16733
16734 default:
16735 return base_cost + 10;
16736 }
16737 }
16738
16739 /* Find the best place in the insn stream in the range
16740 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16741 Create the barrier by inserting a jump and add a new fix entry for
16742 it. */
16743 static Mfix *
16744 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16745 {
16746 HOST_WIDE_INT count = 0;
16747 rtx_barrier *barrier;
16748 rtx_insn *from = fix->insn;
16749 /* The instruction after which we will insert the jump. */
16750 rtx_insn *selected = NULL;
16751 int selected_cost;
16752 /* The address at which the jump instruction will be placed. */
16753 HOST_WIDE_INT selected_address;
16754 Mfix * new_fix;
16755 HOST_WIDE_INT max_count = max_address - fix->address;
16756 rtx_code_label *label = gen_label_rtx ();
16757
16758 selected_cost = arm_barrier_cost (from);
16759 selected_address = fix->address;
16760
16761 while (from && count < max_count)
16762 {
16763 rtx_jump_table_data *tmp;
16764 int new_cost;
16765
16766 /* This code shouldn't have been called if there was a natural barrier
16767 within range. */
16768 gcc_assert (!BARRIER_P (from));
16769
16770 /* Count the length of this insn. This must stay in sync with the
16771 code that pushes minipool fixes. */
16772 if (LABEL_P (from))
16773 count += get_label_padding (from);
16774 else
16775 count += get_attr_length (from);
16776
16777 /* If there is a jump table, add its length. */
16778 if (tablejump_p (from, NULL, &tmp))
16779 {
16780 count += get_jump_table_size (tmp);
16781
16782 /* Jump tables aren't in a basic block, so base the cost on
16783 the dispatch insn. If we select this location, we will
16784 still put the pool after the table. */
16785 new_cost = arm_barrier_cost (from);
16786
16787 if (count < max_count
16788 && (!selected || new_cost <= selected_cost))
16789 {
16790 selected = tmp;
16791 selected_cost = new_cost;
16792 selected_address = fix->address + count;
16793 }
16794
16795 /* Continue after the dispatch table. */
16796 from = NEXT_INSN (tmp);
16797 continue;
16798 }
16799
16800 new_cost = arm_barrier_cost (from);
16801
16802 if (count < max_count
16803 && (!selected || new_cost <= selected_cost))
16804 {
16805 selected = from;
16806 selected_cost = new_cost;
16807 selected_address = fix->address + count;
16808 }
16809
16810 from = NEXT_INSN (from);
16811 }
16812
16813 /* Make sure that we found a place to insert the jump. */
16814 gcc_assert (selected);
16815
16816 /* Make sure we do not split a call and its corresponding
16817 CALL_ARG_LOCATION note. */
16818 if (CALL_P (selected))
16819 {
16820 rtx_insn *next = NEXT_INSN (selected);
16821 if (next && NOTE_P (next)
16822 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16823 selected = next;
16824 }
16825
16826 /* Create a new JUMP_INSN that branches around a barrier. */
16827 from = emit_jump_insn_after (gen_jump (label), selected);
16828 JUMP_LABEL (from) = label;
16829 barrier = emit_barrier_after (from);
16830 emit_label_after (label, barrier);
16831
16832 /* Create a minipool barrier entry for the new barrier. */
16833 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16834 new_fix->insn = barrier;
16835 new_fix->address = selected_address;
16836 new_fix->next = fix->next;
16837 fix->next = new_fix;
16838
16839 return new_fix;
16840 }
16841
16842 /* Record that there is a natural barrier in the insn stream at
16843 ADDRESS. */
16844 static void
16845 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16846 {
16847 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16848
16849 fix->insn = insn;
16850 fix->address = address;
16851
16852 fix->next = NULL;
16853 if (minipool_fix_head != NULL)
16854 minipool_fix_tail->next = fix;
16855 else
16856 minipool_fix_head = fix;
16857
16858 minipool_fix_tail = fix;
16859 }
16860
16861 /* Record INSN, which will need fixing up to load a value from the
16862 minipool. ADDRESS is the offset of the insn since the start of the
16863 function; LOC is a pointer to the part of the insn which requires
16864 fixing; VALUE is the constant that must be loaded, which is of type
16865 MODE. */
16866 static void
16867 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16868 machine_mode mode, rtx value)
16869 {
16870 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16871
16872 fix->insn = insn;
16873 fix->address = address;
16874 fix->loc = loc;
16875 fix->mode = mode;
16876 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16877 fix->value = value;
16878 fix->forwards = get_attr_pool_range (insn);
16879 fix->backwards = get_attr_neg_pool_range (insn);
16880 fix->minipool = NULL;
16881
16882 /* If an insn doesn't have a range defined for it, then it isn't
16883 expecting to be reworked by this code. Better to stop now than
16884 to generate duff assembly code. */
16885 gcc_assert (fix->forwards || fix->backwards);
16886
16887 /* If an entry requires 8-byte alignment then assume all constant pools
16888 require 4 bytes of padding. Trying to do this later on a per-pool
16889 basis is awkward because existing pool entries have to be modified. */
16890 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16891 minipool_pad = 4;
16892
16893 if (dump_file)
16894 {
16895 fprintf (dump_file,
16896 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16897 GET_MODE_NAME (mode),
16898 INSN_UID (insn), (unsigned long) address,
16899 -1 * (long)fix->backwards, (long)fix->forwards);
16900 arm_print_value (dump_file, fix->value);
16901 fprintf (dump_file, "\n");
16902 }
16903
16904 /* Add it to the chain of fixes. */
16905 fix->next = NULL;
16906
16907 if (minipool_fix_head != NULL)
16908 minipool_fix_tail->next = fix;
16909 else
16910 minipool_fix_head = fix;
16911
16912 minipool_fix_tail = fix;
16913 }
16914
16915 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16916 Returns the number of insns needed, or 99 if we always want to synthesize
16917 the value. */
16918 int
16919 arm_max_const_double_inline_cost ()
16920 {
16921 /* Let the value get synthesized to avoid the use of literal pools. */
16922 if (arm_disable_literal_pool)
16923 return 99;
16924
16925 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16926 }
16927
16928 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16929 Returns the number of insns needed, or 99 if we don't know how to
16930 do it. */
16931 int
16932 arm_const_double_inline_cost (rtx val)
16933 {
16934 rtx lowpart, highpart;
16935 machine_mode mode;
16936
16937 mode = GET_MODE (val);
16938
16939 if (mode == VOIDmode)
16940 mode = DImode;
16941
16942 gcc_assert (GET_MODE_SIZE (mode) == 8);
16943
16944 lowpart = gen_lowpart (SImode, val);
16945 highpart = gen_highpart_mode (SImode, mode, val);
16946
16947 gcc_assert (CONST_INT_P (lowpart));
16948 gcc_assert (CONST_INT_P (highpart));
16949
16950 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16951 NULL_RTX, NULL_RTX, 0, 0)
16952 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16953 NULL_RTX, NULL_RTX, 0, 0));
16954 }
16955
16956 /* Cost of loading a SImode constant. */
16957 static inline int
16958 arm_const_inline_cost (enum rtx_code code, rtx val)
16959 {
16960 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16961 NULL_RTX, NULL_RTX, 1, 0);
16962 }
16963
16964 /* Return true if it is worthwhile to split a 64-bit constant into two
16965 32-bit operations. This is the case if optimizing for size, or
16966 if we have load delay slots, or if one 32-bit part can be done with
16967 a single data operation. */
16968 bool
16969 arm_const_double_by_parts (rtx val)
16970 {
16971 machine_mode mode = GET_MODE (val);
16972 rtx part;
16973
16974 if (optimize_size || arm_ld_sched)
16975 return true;
16976
16977 if (mode == VOIDmode)
16978 mode = DImode;
16979
16980 part = gen_highpart_mode (SImode, mode, val);
16981
16982 gcc_assert (CONST_INT_P (part));
16983
16984 if (const_ok_for_arm (INTVAL (part))
16985 || const_ok_for_arm (~INTVAL (part)))
16986 return true;
16987
16988 part = gen_lowpart (SImode, val);
16989
16990 gcc_assert (CONST_INT_P (part));
16991
16992 if (const_ok_for_arm (INTVAL (part))
16993 || const_ok_for_arm (~INTVAL (part)))
16994 return true;
16995
16996 return false;
16997 }
16998
16999 /* Return true if it is possible to inline both the high and low parts
17000 of a 64-bit constant into 32-bit data processing instructions. */
17001 bool
17002 arm_const_double_by_immediates (rtx val)
17003 {
17004 machine_mode mode = GET_MODE (val);
17005 rtx part;
17006
17007 if (mode == VOIDmode)
17008 mode = DImode;
17009
17010 part = gen_highpart_mode (SImode, mode, val);
17011
17012 gcc_assert (CONST_INT_P (part));
17013
17014 if (!const_ok_for_arm (INTVAL (part)))
17015 return false;
17016
17017 part = gen_lowpart (SImode, val);
17018
17019 gcc_assert (CONST_INT_P (part));
17020
17021 if (!const_ok_for_arm (INTVAL (part)))
17022 return false;
17023
17024 return true;
17025 }
17026
17027 /* Scan INSN and note any of its operands that need fixing.
17028 If DO_PUSHES is false we do not actually push any of the fixups
17029 needed. */
17030 static void
17031 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17032 {
17033 int opno;
17034
17035 extract_constrain_insn (insn);
17036
17037 if (recog_data.n_alternatives == 0)
17038 return;
17039
17040 /* Fill in recog_op_alt with information about the constraints of
17041 this insn. */
17042 preprocess_constraints (insn);
17043
17044 const operand_alternative *op_alt = which_op_alt ();
17045 for (opno = 0; opno < recog_data.n_operands; opno++)
17046 {
17047 /* Things we need to fix can only occur in inputs. */
17048 if (recog_data.operand_type[opno] != OP_IN)
17049 continue;
17050
17051 /* If this alternative is a memory reference, then any mention
17052 of constants in this alternative is really to fool reload
17053 into allowing us to accept one there. We need to fix them up
17054 now so that we output the right code. */
17055 if (op_alt[opno].memory_ok)
17056 {
17057 rtx op = recog_data.operand[opno];
17058
17059 if (CONSTANT_P (op))
17060 {
17061 if (do_pushes)
17062 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17063 recog_data.operand_mode[opno], op);
17064 }
17065 else if (MEM_P (op)
17066 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17067 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17068 {
17069 if (do_pushes)
17070 {
17071 rtx cop = avoid_constant_pool_reference (op);
17072
17073 /* Casting the address of something to a mode narrower
17074 than a word can cause avoid_constant_pool_reference()
17075 to return the pool reference itself. That's no good to
17076 us here. Lets just hope that we can use the
17077 constant pool value directly. */
17078 if (op == cop)
17079 cop = get_pool_constant (XEXP (op, 0));
17080
17081 push_minipool_fix (insn, address,
17082 recog_data.operand_loc[opno],
17083 recog_data.operand_mode[opno], cop);
17084 }
17085
17086 }
17087 }
17088 }
17089
17090 return;
17091 }
17092
17093 /* Rewrite move insn into subtract of 0 if the condition codes will
17094 be useful in next conditional jump insn. */
17095
17096 static void
17097 thumb1_reorg (void)
17098 {
17099 basic_block bb;
17100
17101 FOR_EACH_BB_FN (bb, cfun)
17102 {
17103 rtx dest, src;
17104 rtx pat, op0, set = NULL;
17105 rtx_insn *prev, *insn = BB_END (bb);
17106 bool insn_clobbered = false;
17107
17108 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17109 insn = PREV_INSN (insn);
17110
17111 /* Find the last cbranchsi4_insn in basic block BB. */
17112 if (insn == BB_HEAD (bb)
17113 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17114 continue;
17115
17116 /* Get the register with which we are comparing. */
17117 pat = PATTERN (insn);
17118 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17119
17120 /* Find the first flag setting insn before INSN in basic block BB. */
17121 gcc_assert (insn != BB_HEAD (bb));
17122 for (prev = PREV_INSN (insn);
17123 (!insn_clobbered
17124 && prev != BB_HEAD (bb)
17125 && (NOTE_P (prev)
17126 || DEBUG_INSN_P (prev)
17127 || ((set = single_set (prev)) != NULL
17128 && get_attr_conds (prev) == CONDS_NOCOND)));
17129 prev = PREV_INSN (prev))
17130 {
17131 if (reg_set_p (op0, prev))
17132 insn_clobbered = true;
17133 }
17134
17135 /* Skip if op0 is clobbered by insn other than prev. */
17136 if (insn_clobbered)
17137 continue;
17138
17139 if (!set)
17140 continue;
17141
17142 dest = SET_DEST (set);
17143 src = SET_SRC (set);
17144 if (!low_register_operand (dest, SImode)
17145 || !low_register_operand (src, SImode))
17146 continue;
17147
17148 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17149 in INSN. Both src and dest of the move insn are checked. */
17150 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17151 {
17152 dest = copy_rtx (dest);
17153 src = copy_rtx (src);
17154 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17155 PATTERN (prev) = gen_rtx_SET (dest, src);
17156 INSN_CODE (prev) = -1;
17157 /* Set test register in INSN to dest. */
17158 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17159 INSN_CODE (insn) = -1;
17160 }
17161 }
17162 }
17163
17164 /* Convert instructions to their cc-clobbering variant if possible, since
17165 that allows us to use smaller encodings. */
17166
17167 static void
17168 thumb2_reorg (void)
17169 {
17170 basic_block bb;
17171 regset_head live;
17172
17173 INIT_REG_SET (&live);
17174
17175 /* We are freeing block_for_insn in the toplev to keep compatibility
17176 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17177 compute_bb_for_insn ();
17178 df_analyze ();
17179
17180 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17181
17182 FOR_EACH_BB_FN (bb, cfun)
17183 {
17184 if ((current_tune->disparage_flag_setting_t16_encodings
17185 == tune_params::DISPARAGE_FLAGS_ALL)
17186 && optimize_bb_for_speed_p (bb))
17187 continue;
17188
17189 rtx_insn *insn;
17190 Convert_Action action = SKIP;
17191 Convert_Action action_for_partial_flag_setting
17192 = ((current_tune->disparage_flag_setting_t16_encodings
17193 != tune_params::DISPARAGE_FLAGS_NEITHER)
17194 && optimize_bb_for_speed_p (bb))
17195 ? SKIP : CONV;
17196
17197 COPY_REG_SET (&live, DF_LR_OUT (bb));
17198 df_simulate_initialize_backwards (bb, &live);
17199 FOR_BB_INSNS_REVERSE (bb, insn)
17200 {
17201 if (NONJUMP_INSN_P (insn)
17202 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17203 && GET_CODE (PATTERN (insn)) == SET)
17204 {
17205 action = SKIP;
17206 rtx pat = PATTERN (insn);
17207 rtx dst = XEXP (pat, 0);
17208 rtx src = XEXP (pat, 1);
17209 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17210
17211 if (UNARY_P (src) || BINARY_P (src))
17212 op0 = XEXP (src, 0);
17213
17214 if (BINARY_P (src))
17215 op1 = XEXP (src, 1);
17216
17217 if (low_register_operand (dst, SImode))
17218 {
17219 switch (GET_CODE (src))
17220 {
17221 case PLUS:
17222 /* Adding two registers and storing the result
17223 in the first source is already a 16-bit
17224 operation. */
17225 if (rtx_equal_p (dst, op0)
17226 && register_operand (op1, SImode))
17227 break;
17228
17229 if (low_register_operand (op0, SImode))
17230 {
17231 /* ADDS <Rd>,<Rn>,<Rm> */
17232 if (low_register_operand (op1, SImode))
17233 action = CONV;
17234 /* ADDS <Rdn>,#<imm8> */
17235 /* SUBS <Rdn>,#<imm8> */
17236 else if (rtx_equal_p (dst, op0)
17237 && CONST_INT_P (op1)
17238 && IN_RANGE (INTVAL (op1), -255, 255))
17239 action = CONV;
17240 /* ADDS <Rd>,<Rn>,#<imm3> */
17241 /* SUBS <Rd>,<Rn>,#<imm3> */
17242 else if (CONST_INT_P (op1)
17243 && IN_RANGE (INTVAL (op1), -7, 7))
17244 action = CONV;
17245 }
17246 /* ADCS <Rd>, <Rn> */
17247 else if (GET_CODE (XEXP (src, 0)) == PLUS
17248 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17249 && low_register_operand (XEXP (XEXP (src, 0), 1),
17250 SImode)
17251 && COMPARISON_P (op1)
17252 && cc_register (XEXP (op1, 0), VOIDmode)
17253 && maybe_get_arm_condition_code (op1) == ARM_CS
17254 && XEXP (op1, 1) == const0_rtx)
17255 action = CONV;
17256 break;
17257
17258 case MINUS:
17259 /* RSBS <Rd>,<Rn>,#0
17260 Not handled here: see NEG below. */
17261 /* SUBS <Rd>,<Rn>,#<imm3>
17262 SUBS <Rdn>,#<imm8>
17263 Not handled here: see PLUS above. */
17264 /* SUBS <Rd>,<Rn>,<Rm> */
17265 if (low_register_operand (op0, SImode)
17266 && low_register_operand (op1, SImode))
17267 action = CONV;
17268 break;
17269
17270 case MULT:
17271 /* MULS <Rdm>,<Rn>,<Rdm>
17272 As an exception to the rule, this is only used
17273 when optimizing for size since MULS is slow on all
17274 known implementations. We do not even want to use
17275 MULS in cold code, if optimizing for speed, so we
17276 test the global flag here. */
17277 if (!optimize_size)
17278 break;
17279 /* else fall through. */
17280 case AND:
17281 case IOR:
17282 case XOR:
17283 /* ANDS <Rdn>,<Rm> */
17284 if (rtx_equal_p (dst, op0)
17285 && low_register_operand (op1, SImode))
17286 action = action_for_partial_flag_setting;
17287 else if (rtx_equal_p (dst, op1)
17288 && low_register_operand (op0, SImode))
17289 action = action_for_partial_flag_setting == SKIP
17290 ? SKIP : SWAP_CONV;
17291 break;
17292
17293 case ASHIFTRT:
17294 case ASHIFT:
17295 case LSHIFTRT:
17296 /* ASRS <Rdn>,<Rm> */
17297 /* LSRS <Rdn>,<Rm> */
17298 /* LSLS <Rdn>,<Rm> */
17299 if (rtx_equal_p (dst, op0)
17300 && low_register_operand (op1, SImode))
17301 action = action_for_partial_flag_setting;
17302 /* ASRS <Rd>,<Rm>,#<imm5> */
17303 /* LSRS <Rd>,<Rm>,#<imm5> */
17304 /* LSLS <Rd>,<Rm>,#<imm5> */
17305 else if (low_register_operand (op0, SImode)
17306 && CONST_INT_P (op1)
17307 && IN_RANGE (INTVAL (op1), 0, 31))
17308 action = action_for_partial_flag_setting;
17309 break;
17310
17311 case ROTATERT:
17312 /* RORS <Rdn>,<Rm> */
17313 if (rtx_equal_p (dst, op0)
17314 && low_register_operand (op1, SImode))
17315 action = action_for_partial_flag_setting;
17316 break;
17317
17318 case NOT:
17319 /* MVNS <Rd>,<Rm> */
17320 if (low_register_operand (op0, SImode))
17321 action = action_for_partial_flag_setting;
17322 break;
17323
17324 case NEG:
17325 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17326 if (low_register_operand (op0, SImode))
17327 action = CONV;
17328 break;
17329
17330 case CONST_INT:
17331 /* MOVS <Rd>,#<imm8> */
17332 if (CONST_INT_P (src)
17333 && IN_RANGE (INTVAL (src), 0, 255))
17334 action = action_for_partial_flag_setting;
17335 break;
17336
17337 case REG:
17338 /* MOVS and MOV<c> with registers have different
17339 encodings, so are not relevant here. */
17340 break;
17341
17342 default:
17343 break;
17344 }
17345 }
17346
17347 if (action != SKIP)
17348 {
17349 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17350 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17351 rtvec vec;
17352
17353 if (action == SWAP_CONV)
17354 {
17355 src = copy_rtx (src);
17356 XEXP (src, 0) = op1;
17357 XEXP (src, 1) = op0;
17358 pat = gen_rtx_SET (dst, src);
17359 vec = gen_rtvec (2, pat, clobber);
17360 }
17361 else /* action == CONV */
17362 vec = gen_rtvec (2, pat, clobber);
17363
17364 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17365 INSN_CODE (insn) = -1;
17366 }
17367 }
17368
17369 if (NONDEBUG_INSN_P (insn))
17370 df_simulate_one_insn_backwards (bb, insn, &live);
17371 }
17372 }
17373
17374 CLEAR_REG_SET (&live);
17375 }
17376
17377 /* Gcc puts the pool in the wrong place for ARM, since we can only
17378 load addresses a limited distance around the pc. We do some
17379 special munging to move the constant pool values to the correct
17380 point in the code. */
17381 static void
17382 arm_reorg (void)
17383 {
17384 rtx_insn *insn;
17385 HOST_WIDE_INT address = 0;
17386 Mfix * fix;
17387
17388 if (TARGET_THUMB1)
17389 thumb1_reorg ();
17390 else if (TARGET_THUMB2)
17391 thumb2_reorg ();
17392
17393 /* Ensure all insns that must be split have been split at this point.
17394 Otherwise, the pool placement code below may compute incorrect
17395 insn lengths. Note that when optimizing, all insns have already
17396 been split at this point. */
17397 if (!optimize)
17398 split_all_insns_noflow ();
17399
17400 minipool_fix_head = minipool_fix_tail = NULL;
17401
17402 /* The first insn must always be a note, or the code below won't
17403 scan it properly. */
17404 insn = get_insns ();
17405 gcc_assert (NOTE_P (insn));
17406 minipool_pad = 0;
17407
17408 /* Scan all the insns and record the operands that will need fixing. */
17409 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17410 {
17411 if (BARRIER_P (insn))
17412 push_minipool_barrier (insn, address);
17413 else if (INSN_P (insn))
17414 {
17415 rtx_jump_table_data *table;
17416
17417 note_invalid_constants (insn, address, true);
17418 address += get_attr_length (insn);
17419
17420 /* If the insn is a vector jump, add the size of the table
17421 and skip the table. */
17422 if (tablejump_p (insn, NULL, &table))
17423 {
17424 address += get_jump_table_size (table);
17425 insn = table;
17426 }
17427 }
17428 else if (LABEL_P (insn))
17429 /* Add the worst-case padding due to alignment. We don't add
17430 the _current_ padding because the minipool insertions
17431 themselves might change it. */
17432 address += get_label_padding (insn);
17433 }
17434
17435 fix = minipool_fix_head;
17436
17437 /* Now scan the fixups and perform the required changes. */
17438 while (fix)
17439 {
17440 Mfix * ftmp;
17441 Mfix * fdel;
17442 Mfix * last_added_fix;
17443 Mfix * last_barrier = NULL;
17444 Mfix * this_fix;
17445
17446 /* Skip any further barriers before the next fix. */
17447 while (fix && BARRIER_P (fix->insn))
17448 fix = fix->next;
17449
17450 /* No more fixes. */
17451 if (fix == NULL)
17452 break;
17453
17454 last_added_fix = NULL;
17455
17456 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17457 {
17458 if (BARRIER_P (ftmp->insn))
17459 {
17460 if (ftmp->address >= minipool_vector_head->max_address)
17461 break;
17462
17463 last_barrier = ftmp;
17464 }
17465 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17466 break;
17467
17468 last_added_fix = ftmp; /* Keep track of the last fix added. */
17469 }
17470
17471 /* If we found a barrier, drop back to that; any fixes that we
17472 could have reached but come after the barrier will now go in
17473 the next mini-pool. */
17474 if (last_barrier != NULL)
17475 {
17476 /* Reduce the refcount for those fixes that won't go into this
17477 pool after all. */
17478 for (fdel = last_barrier->next;
17479 fdel && fdel != ftmp;
17480 fdel = fdel->next)
17481 {
17482 fdel->minipool->refcount--;
17483 fdel->minipool = NULL;
17484 }
17485
17486 ftmp = last_barrier;
17487 }
17488 else
17489 {
17490 /* ftmp is first fix that we can't fit into this pool and
17491 there no natural barriers that we could use. Insert a
17492 new barrier in the code somewhere between the previous
17493 fix and this one, and arrange to jump around it. */
17494 HOST_WIDE_INT max_address;
17495
17496 /* The last item on the list of fixes must be a barrier, so
17497 we can never run off the end of the list of fixes without
17498 last_barrier being set. */
17499 gcc_assert (ftmp);
17500
17501 max_address = minipool_vector_head->max_address;
17502 /* Check that there isn't another fix that is in range that
17503 we couldn't fit into this pool because the pool was
17504 already too large: we need to put the pool before such an
17505 instruction. The pool itself may come just after the
17506 fix because create_fix_barrier also allows space for a
17507 jump instruction. */
17508 if (ftmp->address < max_address)
17509 max_address = ftmp->address + 1;
17510
17511 last_barrier = create_fix_barrier (last_added_fix, max_address);
17512 }
17513
17514 assign_minipool_offsets (last_barrier);
17515
17516 while (ftmp)
17517 {
17518 if (!BARRIER_P (ftmp->insn)
17519 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17520 == NULL))
17521 break;
17522
17523 ftmp = ftmp->next;
17524 }
17525
17526 /* Scan over the fixes we have identified for this pool, fixing them
17527 up and adding the constants to the pool itself. */
17528 for (this_fix = fix; this_fix && ftmp != this_fix;
17529 this_fix = this_fix->next)
17530 if (!BARRIER_P (this_fix->insn))
17531 {
17532 rtx addr
17533 = plus_constant (Pmode,
17534 gen_rtx_LABEL_REF (VOIDmode,
17535 minipool_vector_label),
17536 this_fix->minipool->offset);
17537 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17538 }
17539
17540 dump_minipool (last_barrier->insn);
17541 fix = ftmp;
17542 }
17543
17544 /* From now on we must synthesize any constants that we can't handle
17545 directly. This can happen if the RTL gets split during final
17546 instruction generation. */
17547 cfun->machine->after_arm_reorg = 1;
17548
17549 /* Free the minipool memory. */
17550 obstack_free (&minipool_obstack, minipool_startobj);
17551 }
17552 \f
17553 /* Routines to output assembly language. */
17554
17555 /* Return string representation of passed in real value. */
17556 static const char *
17557 fp_const_from_val (REAL_VALUE_TYPE *r)
17558 {
17559 if (!fp_consts_inited)
17560 init_fp_table ();
17561
17562 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17563 return "0";
17564 }
17565
17566 /* OPERANDS[0] is the entire list of insns that constitute pop,
17567 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17568 is in the list, UPDATE is true iff the list contains explicit
17569 update of base register. */
17570 void
17571 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17572 bool update)
17573 {
17574 int i;
17575 char pattern[100];
17576 int offset;
17577 const char *conditional;
17578 int num_saves = XVECLEN (operands[0], 0);
17579 unsigned int regno;
17580 unsigned int regno_base = REGNO (operands[1]);
17581
17582 offset = 0;
17583 offset += update ? 1 : 0;
17584 offset += return_pc ? 1 : 0;
17585
17586 /* Is the base register in the list? */
17587 for (i = offset; i < num_saves; i++)
17588 {
17589 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17590 /* If SP is in the list, then the base register must be SP. */
17591 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17592 /* If base register is in the list, there must be no explicit update. */
17593 if (regno == regno_base)
17594 gcc_assert (!update);
17595 }
17596
17597 conditional = reverse ? "%?%D0" : "%?%d0";
17598 if ((regno_base == SP_REGNUM) && TARGET_THUMB)
17599 {
17600 /* Output pop (not stmfd) because it has a shorter encoding. */
17601 gcc_assert (update);
17602 sprintf (pattern, "pop%s\t{", conditional);
17603 }
17604 else
17605 {
17606 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17607 It's just a convention, their semantics are identical. */
17608 if (regno_base == SP_REGNUM)
17609 sprintf (pattern, "ldm%sfd\t", conditional);
17610 else if (TARGET_UNIFIED_ASM)
17611 sprintf (pattern, "ldmia%s\t", conditional);
17612 else
17613 sprintf (pattern, "ldm%sia\t", conditional);
17614
17615 strcat (pattern, reg_names[regno_base]);
17616 if (update)
17617 strcat (pattern, "!, {");
17618 else
17619 strcat (pattern, ", {");
17620 }
17621
17622 /* Output the first destination register. */
17623 strcat (pattern,
17624 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17625
17626 /* Output the rest of the destination registers. */
17627 for (i = offset + 1; i < num_saves; i++)
17628 {
17629 strcat (pattern, ", ");
17630 strcat (pattern,
17631 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17632 }
17633
17634 strcat (pattern, "}");
17635
17636 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17637 strcat (pattern, "^");
17638
17639 output_asm_insn (pattern, &cond);
17640 }
17641
17642
17643 /* Output the assembly for a store multiple. */
17644
17645 const char *
17646 vfp_output_vstmd (rtx * operands)
17647 {
17648 char pattern[100];
17649 int p;
17650 int base;
17651 int i;
17652 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17653 ? XEXP (operands[0], 0)
17654 : XEXP (XEXP (operands[0], 0), 0);
17655 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17656
17657 if (push_p)
17658 strcpy (pattern, "vpush%?.64\t{%P1");
17659 else
17660 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17661
17662 p = strlen (pattern);
17663
17664 gcc_assert (REG_P (operands[1]));
17665
17666 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17667 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17668 {
17669 p += sprintf (&pattern[p], ", d%d", base + i);
17670 }
17671 strcpy (&pattern[p], "}");
17672
17673 output_asm_insn (pattern, operands);
17674 return "";
17675 }
17676
17677
17678 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17679 number of bytes pushed. */
17680
17681 static int
17682 vfp_emit_fstmd (int base_reg, int count)
17683 {
17684 rtx par;
17685 rtx dwarf;
17686 rtx tmp, reg;
17687 int i;
17688
17689 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17690 register pairs are stored by a store multiple insn. We avoid this
17691 by pushing an extra pair. */
17692 if (count == 2 && !arm_arch6)
17693 {
17694 if (base_reg == LAST_VFP_REGNUM - 3)
17695 base_reg -= 2;
17696 count++;
17697 }
17698
17699 /* FSTMD may not store more than 16 doubleword registers at once. Split
17700 larger stores into multiple parts (up to a maximum of two, in
17701 practice). */
17702 if (count > 16)
17703 {
17704 int saved;
17705 /* NOTE: base_reg is an internal register number, so each D register
17706 counts as 2. */
17707 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17708 saved += vfp_emit_fstmd (base_reg, 16);
17709 return saved;
17710 }
17711
17712 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17713 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17714
17715 reg = gen_rtx_REG (DFmode, base_reg);
17716 base_reg += 2;
17717
17718 XVECEXP (par, 0, 0)
17719 = gen_rtx_SET (gen_frame_mem
17720 (BLKmode,
17721 gen_rtx_PRE_MODIFY (Pmode,
17722 stack_pointer_rtx,
17723 plus_constant
17724 (Pmode, stack_pointer_rtx,
17725 - (count * 8)))
17726 ),
17727 gen_rtx_UNSPEC (BLKmode,
17728 gen_rtvec (1, reg),
17729 UNSPEC_PUSH_MULT));
17730
17731 tmp = gen_rtx_SET (stack_pointer_rtx,
17732 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17733 RTX_FRAME_RELATED_P (tmp) = 1;
17734 XVECEXP (dwarf, 0, 0) = tmp;
17735
17736 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17737 RTX_FRAME_RELATED_P (tmp) = 1;
17738 XVECEXP (dwarf, 0, 1) = tmp;
17739
17740 for (i = 1; i < count; i++)
17741 {
17742 reg = gen_rtx_REG (DFmode, base_reg);
17743 base_reg += 2;
17744 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17745
17746 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17747 plus_constant (Pmode,
17748 stack_pointer_rtx,
17749 i * 8)),
17750 reg);
17751 RTX_FRAME_RELATED_P (tmp) = 1;
17752 XVECEXP (dwarf, 0, i + 1) = tmp;
17753 }
17754
17755 par = emit_insn (par);
17756 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17757 RTX_FRAME_RELATED_P (par) = 1;
17758
17759 return count * 8;
17760 }
17761
17762 /* Emit a call instruction with pattern PAT. ADDR is the address of
17763 the call target. */
17764
17765 void
17766 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17767 {
17768 rtx insn;
17769
17770 insn = emit_call_insn (pat);
17771
17772 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17773 If the call might use such an entry, add a use of the PIC register
17774 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17775 if (TARGET_VXWORKS_RTP
17776 && flag_pic
17777 && !sibcall
17778 && GET_CODE (addr) == SYMBOL_REF
17779 && (SYMBOL_REF_DECL (addr)
17780 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17781 : !SYMBOL_REF_LOCAL_P (addr)))
17782 {
17783 require_pic_register ();
17784 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17785 }
17786
17787 if (TARGET_AAPCS_BASED)
17788 {
17789 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17790 linker. We need to add an IP clobber to allow setting
17791 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17792 is not needed since it's a fixed register. */
17793 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17794 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17795 }
17796 }
17797
17798 /* Output a 'call' insn. */
17799 const char *
17800 output_call (rtx *operands)
17801 {
17802 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17803
17804 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17805 if (REGNO (operands[0]) == LR_REGNUM)
17806 {
17807 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17808 output_asm_insn ("mov%?\t%0, %|lr", operands);
17809 }
17810
17811 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17812
17813 if (TARGET_INTERWORK || arm_arch4t)
17814 output_asm_insn ("bx%?\t%0", operands);
17815 else
17816 output_asm_insn ("mov%?\t%|pc, %0", operands);
17817
17818 return "";
17819 }
17820
17821 /* Output a 'call' insn that is a reference in memory. This is
17822 disabled for ARMv5 and we prefer a blx instead because otherwise
17823 there's a significant performance overhead. */
17824 const char *
17825 output_call_mem (rtx *operands)
17826 {
17827 gcc_assert (!arm_arch5);
17828 if (TARGET_INTERWORK)
17829 {
17830 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17831 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17832 output_asm_insn ("bx%?\t%|ip", operands);
17833 }
17834 else if (regno_use_in (LR_REGNUM, operands[0]))
17835 {
17836 /* LR is used in the memory address. We load the address in the
17837 first instruction. It's safe to use IP as the target of the
17838 load since the call will kill it anyway. */
17839 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17840 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17841 if (arm_arch4t)
17842 output_asm_insn ("bx%?\t%|ip", operands);
17843 else
17844 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17845 }
17846 else
17847 {
17848 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17849 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17850 }
17851
17852 return "";
17853 }
17854
17855
17856 /* Output a move from arm registers to arm registers of a long double
17857 OPERANDS[0] is the destination.
17858 OPERANDS[1] is the source. */
17859 const char *
17860 output_mov_long_double_arm_from_arm (rtx *operands)
17861 {
17862 /* We have to be careful here because the two might overlap. */
17863 int dest_start = REGNO (operands[0]);
17864 int src_start = REGNO (operands[1]);
17865 rtx ops[2];
17866 int i;
17867
17868 if (dest_start < src_start)
17869 {
17870 for (i = 0; i < 3; i++)
17871 {
17872 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17873 ops[1] = gen_rtx_REG (SImode, src_start + i);
17874 output_asm_insn ("mov%?\t%0, %1", ops);
17875 }
17876 }
17877 else
17878 {
17879 for (i = 2; i >= 0; i--)
17880 {
17881 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17882 ops[1] = gen_rtx_REG (SImode, src_start + i);
17883 output_asm_insn ("mov%?\t%0, %1", ops);
17884 }
17885 }
17886
17887 return "";
17888 }
17889
17890 void
17891 arm_emit_movpair (rtx dest, rtx src)
17892 {
17893 /* If the src is an immediate, simplify it. */
17894 if (CONST_INT_P (src))
17895 {
17896 HOST_WIDE_INT val = INTVAL (src);
17897 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17898 if ((val >> 16) & 0x0000ffff)
17899 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17900 GEN_INT (16)),
17901 GEN_INT ((val >> 16) & 0x0000ffff));
17902 return;
17903 }
17904 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17905 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17906 }
17907
17908 /* Output a move between double words. It must be REG<-MEM
17909 or MEM<-REG. */
17910 const char *
17911 output_move_double (rtx *operands, bool emit, int *count)
17912 {
17913 enum rtx_code code0 = GET_CODE (operands[0]);
17914 enum rtx_code code1 = GET_CODE (operands[1]);
17915 rtx otherops[3];
17916 if (count)
17917 *count = 1;
17918
17919 /* The only case when this might happen is when
17920 you are looking at the length of a DImode instruction
17921 that has an invalid constant in it. */
17922 if (code0 == REG && code1 != MEM)
17923 {
17924 gcc_assert (!emit);
17925 *count = 2;
17926 return "";
17927 }
17928
17929 if (code0 == REG)
17930 {
17931 unsigned int reg0 = REGNO (operands[0]);
17932
17933 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17934
17935 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17936
17937 switch (GET_CODE (XEXP (operands[1], 0)))
17938 {
17939 case REG:
17940
17941 if (emit)
17942 {
17943 if (TARGET_LDRD
17944 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17945 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17946 else
17947 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17948 }
17949 break;
17950
17951 case PRE_INC:
17952 gcc_assert (TARGET_LDRD);
17953 if (emit)
17954 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17955 break;
17956
17957 case PRE_DEC:
17958 if (emit)
17959 {
17960 if (TARGET_LDRD)
17961 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17962 else
17963 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17964 }
17965 break;
17966
17967 case POST_INC:
17968 if (emit)
17969 {
17970 if (TARGET_LDRD)
17971 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17972 else
17973 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17974 }
17975 break;
17976
17977 case POST_DEC:
17978 gcc_assert (TARGET_LDRD);
17979 if (emit)
17980 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17981 break;
17982
17983 case PRE_MODIFY:
17984 case POST_MODIFY:
17985 /* Autoicrement addressing modes should never have overlapping
17986 base and destination registers, and overlapping index registers
17987 are already prohibited, so this doesn't need to worry about
17988 fix_cm3_ldrd. */
17989 otherops[0] = operands[0];
17990 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17991 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17992
17993 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17994 {
17995 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17996 {
17997 /* Registers overlap so split out the increment. */
17998 if (emit)
17999 {
18000 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18001 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
18002 }
18003 if (count)
18004 *count = 2;
18005 }
18006 else
18007 {
18008 /* Use a single insn if we can.
18009 FIXME: IWMMXT allows offsets larger than ldrd can
18010 handle, fix these up with a pair of ldr. */
18011 if (TARGET_THUMB2
18012 || !CONST_INT_P (otherops[2])
18013 || (INTVAL (otherops[2]) > -256
18014 && INTVAL (otherops[2]) < 256))
18015 {
18016 if (emit)
18017 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18018 }
18019 else
18020 {
18021 if (emit)
18022 {
18023 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18024 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18025 }
18026 if (count)
18027 *count = 2;
18028
18029 }
18030 }
18031 }
18032 else
18033 {
18034 /* Use a single insn if we can.
18035 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18036 fix these up with a pair of ldr. */
18037 if (TARGET_THUMB2
18038 || !CONST_INT_P (otherops[2])
18039 || (INTVAL (otherops[2]) > -256
18040 && INTVAL (otherops[2]) < 256))
18041 {
18042 if (emit)
18043 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18044 }
18045 else
18046 {
18047 if (emit)
18048 {
18049 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18050 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18051 }
18052 if (count)
18053 *count = 2;
18054 }
18055 }
18056 break;
18057
18058 case LABEL_REF:
18059 case CONST:
18060 /* We might be able to use ldrd %0, %1 here. However the range is
18061 different to ldr/adr, and it is broken on some ARMv7-M
18062 implementations. */
18063 /* Use the second register of the pair to avoid problematic
18064 overlap. */
18065 otherops[1] = operands[1];
18066 if (emit)
18067 output_asm_insn ("adr%?\t%0, %1", otherops);
18068 operands[1] = otherops[0];
18069 if (emit)
18070 {
18071 if (TARGET_LDRD)
18072 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18073 else
18074 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18075 }
18076
18077 if (count)
18078 *count = 2;
18079 break;
18080
18081 /* ??? This needs checking for thumb2. */
18082 default:
18083 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18084 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18085 {
18086 otherops[0] = operands[0];
18087 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18088 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18089
18090 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18091 {
18092 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18093 {
18094 switch ((int) INTVAL (otherops[2]))
18095 {
18096 case -8:
18097 if (emit)
18098 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18099 return "";
18100 case -4:
18101 if (TARGET_THUMB2)
18102 break;
18103 if (emit)
18104 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18105 return "";
18106 case 4:
18107 if (TARGET_THUMB2)
18108 break;
18109 if (emit)
18110 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18111 return "";
18112 }
18113 }
18114 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18115 operands[1] = otherops[0];
18116 if (TARGET_LDRD
18117 && (REG_P (otherops[2])
18118 || TARGET_THUMB2
18119 || (CONST_INT_P (otherops[2])
18120 && INTVAL (otherops[2]) > -256
18121 && INTVAL (otherops[2]) < 256)))
18122 {
18123 if (reg_overlap_mentioned_p (operands[0],
18124 otherops[2]))
18125 {
18126 /* Swap base and index registers over to
18127 avoid a conflict. */
18128 std::swap (otherops[1], otherops[2]);
18129 }
18130 /* If both registers conflict, it will usually
18131 have been fixed by a splitter. */
18132 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18133 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18134 {
18135 if (emit)
18136 {
18137 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18138 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18139 }
18140 if (count)
18141 *count = 2;
18142 }
18143 else
18144 {
18145 otherops[0] = operands[0];
18146 if (emit)
18147 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18148 }
18149 return "";
18150 }
18151
18152 if (CONST_INT_P (otherops[2]))
18153 {
18154 if (emit)
18155 {
18156 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18157 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18158 else
18159 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18160 }
18161 }
18162 else
18163 {
18164 if (emit)
18165 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18166 }
18167 }
18168 else
18169 {
18170 if (emit)
18171 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18172 }
18173
18174 if (count)
18175 *count = 2;
18176
18177 if (TARGET_LDRD)
18178 return "ldr%(d%)\t%0, [%1]";
18179
18180 return "ldm%(ia%)\t%1, %M0";
18181 }
18182 else
18183 {
18184 otherops[1] = adjust_address (operands[1], SImode, 4);
18185 /* Take care of overlapping base/data reg. */
18186 if (reg_mentioned_p (operands[0], operands[1]))
18187 {
18188 if (emit)
18189 {
18190 output_asm_insn ("ldr%?\t%0, %1", otherops);
18191 output_asm_insn ("ldr%?\t%0, %1", operands);
18192 }
18193 if (count)
18194 *count = 2;
18195
18196 }
18197 else
18198 {
18199 if (emit)
18200 {
18201 output_asm_insn ("ldr%?\t%0, %1", operands);
18202 output_asm_insn ("ldr%?\t%0, %1", otherops);
18203 }
18204 if (count)
18205 *count = 2;
18206 }
18207 }
18208 }
18209 }
18210 else
18211 {
18212 /* Constraints should ensure this. */
18213 gcc_assert (code0 == MEM && code1 == REG);
18214 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18215 || (TARGET_ARM && TARGET_LDRD));
18216
18217 switch (GET_CODE (XEXP (operands[0], 0)))
18218 {
18219 case REG:
18220 if (emit)
18221 {
18222 if (TARGET_LDRD)
18223 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18224 else
18225 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18226 }
18227 break;
18228
18229 case PRE_INC:
18230 gcc_assert (TARGET_LDRD);
18231 if (emit)
18232 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18233 break;
18234
18235 case PRE_DEC:
18236 if (emit)
18237 {
18238 if (TARGET_LDRD)
18239 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18240 else
18241 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18242 }
18243 break;
18244
18245 case POST_INC:
18246 if (emit)
18247 {
18248 if (TARGET_LDRD)
18249 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18250 else
18251 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18252 }
18253 break;
18254
18255 case POST_DEC:
18256 gcc_assert (TARGET_LDRD);
18257 if (emit)
18258 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18259 break;
18260
18261 case PRE_MODIFY:
18262 case POST_MODIFY:
18263 otherops[0] = operands[1];
18264 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18265 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18266
18267 /* IWMMXT allows offsets larger than ldrd can handle,
18268 fix these up with a pair of ldr. */
18269 if (!TARGET_THUMB2
18270 && CONST_INT_P (otherops[2])
18271 && (INTVAL(otherops[2]) <= -256
18272 || INTVAL(otherops[2]) >= 256))
18273 {
18274 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18275 {
18276 if (emit)
18277 {
18278 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18279 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18280 }
18281 if (count)
18282 *count = 2;
18283 }
18284 else
18285 {
18286 if (emit)
18287 {
18288 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18289 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18290 }
18291 if (count)
18292 *count = 2;
18293 }
18294 }
18295 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18296 {
18297 if (emit)
18298 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18299 }
18300 else
18301 {
18302 if (emit)
18303 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18304 }
18305 break;
18306
18307 case PLUS:
18308 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18309 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18310 {
18311 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18312 {
18313 case -8:
18314 if (emit)
18315 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18316 return "";
18317
18318 case -4:
18319 if (TARGET_THUMB2)
18320 break;
18321 if (emit)
18322 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18323 return "";
18324
18325 case 4:
18326 if (TARGET_THUMB2)
18327 break;
18328 if (emit)
18329 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18330 return "";
18331 }
18332 }
18333 if (TARGET_LDRD
18334 && (REG_P (otherops[2])
18335 || TARGET_THUMB2
18336 || (CONST_INT_P (otherops[2])
18337 && INTVAL (otherops[2]) > -256
18338 && INTVAL (otherops[2]) < 256)))
18339 {
18340 otherops[0] = operands[1];
18341 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18342 if (emit)
18343 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18344 return "";
18345 }
18346 /* Fall through */
18347
18348 default:
18349 otherops[0] = adjust_address (operands[0], SImode, 4);
18350 otherops[1] = operands[1];
18351 if (emit)
18352 {
18353 output_asm_insn ("str%?\t%1, %0", operands);
18354 output_asm_insn ("str%?\t%H1, %0", otherops);
18355 }
18356 if (count)
18357 *count = 2;
18358 }
18359 }
18360
18361 return "";
18362 }
18363
18364 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18365 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18366
18367 const char *
18368 output_move_quad (rtx *operands)
18369 {
18370 if (REG_P (operands[0]))
18371 {
18372 /* Load, or reg->reg move. */
18373
18374 if (MEM_P (operands[1]))
18375 {
18376 switch (GET_CODE (XEXP (operands[1], 0)))
18377 {
18378 case REG:
18379 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18380 break;
18381
18382 case LABEL_REF:
18383 case CONST:
18384 output_asm_insn ("adr%?\t%0, %1", operands);
18385 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18386 break;
18387
18388 default:
18389 gcc_unreachable ();
18390 }
18391 }
18392 else
18393 {
18394 rtx ops[2];
18395 int dest, src, i;
18396
18397 gcc_assert (REG_P (operands[1]));
18398
18399 dest = REGNO (operands[0]);
18400 src = REGNO (operands[1]);
18401
18402 /* This seems pretty dumb, but hopefully GCC won't try to do it
18403 very often. */
18404 if (dest < src)
18405 for (i = 0; i < 4; i++)
18406 {
18407 ops[0] = gen_rtx_REG (SImode, dest + i);
18408 ops[1] = gen_rtx_REG (SImode, src + i);
18409 output_asm_insn ("mov%?\t%0, %1", ops);
18410 }
18411 else
18412 for (i = 3; i >= 0; i--)
18413 {
18414 ops[0] = gen_rtx_REG (SImode, dest + i);
18415 ops[1] = gen_rtx_REG (SImode, src + i);
18416 output_asm_insn ("mov%?\t%0, %1", ops);
18417 }
18418 }
18419 }
18420 else
18421 {
18422 gcc_assert (MEM_P (operands[0]));
18423 gcc_assert (REG_P (operands[1]));
18424 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18425
18426 switch (GET_CODE (XEXP (operands[0], 0)))
18427 {
18428 case REG:
18429 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18430 break;
18431
18432 default:
18433 gcc_unreachable ();
18434 }
18435 }
18436
18437 return "";
18438 }
18439
18440 /* Output a VFP load or store instruction. */
18441
18442 const char *
18443 output_move_vfp (rtx *operands)
18444 {
18445 rtx reg, mem, addr, ops[2];
18446 int load = REG_P (operands[0]);
18447 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18448 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18449 const char *templ;
18450 char buff[50];
18451 machine_mode mode;
18452
18453 reg = operands[!load];
18454 mem = operands[load];
18455
18456 mode = GET_MODE (reg);
18457
18458 gcc_assert (REG_P (reg));
18459 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18460 gcc_assert (mode == SFmode
18461 || mode == DFmode
18462 || mode == SImode
18463 || mode == DImode
18464 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18465 gcc_assert (MEM_P (mem));
18466
18467 addr = XEXP (mem, 0);
18468
18469 switch (GET_CODE (addr))
18470 {
18471 case PRE_DEC:
18472 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18473 ops[0] = XEXP (addr, 0);
18474 ops[1] = reg;
18475 break;
18476
18477 case POST_INC:
18478 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18479 ops[0] = XEXP (addr, 0);
18480 ops[1] = reg;
18481 break;
18482
18483 default:
18484 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18485 ops[0] = reg;
18486 ops[1] = mem;
18487 break;
18488 }
18489
18490 sprintf (buff, templ,
18491 load ? "ld" : "st",
18492 dp ? "64" : "32",
18493 dp ? "P" : "",
18494 integer_p ? "\t%@ int" : "");
18495 output_asm_insn (buff, ops);
18496
18497 return "";
18498 }
18499
18500 /* Output a Neon double-word or quad-word load or store, or a load
18501 or store for larger structure modes.
18502
18503 WARNING: The ordering of elements is weird in big-endian mode,
18504 because the EABI requires that vectors stored in memory appear
18505 as though they were stored by a VSTM, as required by the EABI.
18506 GCC RTL defines element ordering based on in-memory order.
18507 This can be different from the architectural ordering of elements
18508 within a NEON register. The intrinsics defined in arm_neon.h use the
18509 NEON register element ordering, not the GCC RTL element ordering.
18510
18511 For example, the in-memory ordering of a big-endian a quadword
18512 vector with 16-bit elements when stored from register pair {d0,d1}
18513 will be (lowest address first, d0[N] is NEON register element N):
18514
18515 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18516
18517 When necessary, quadword registers (dN, dN+1) are moved to ARM
18518 registers from rN in the order:
18519
18520 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18521
18522 So that STM/LDM can be used on vectors in ARM registers, and the
18523 same memory layout will result as if VSTM/VLDM were used.
18524
18525 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18526 possible, which allows use of appropriate alignment tags.
18527 Note that the choice of "64" is independent of the actual vector
18528 element size; this size simply ensures that the behavior is
18529 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18530
18531 Due to limitations of those instructions, use of VST1.64/VLD1.64
18532 is not possible if:
18533 - the address contains PRE_DEC, or
18534 - the mode refers to more than 4 double-word registers
18535
18536 In those cases, it would be possible to replace VSTM/VLDM by a
18537 sequence of instructions; this is not currently implemented since
18538 this is not certain to actually improve performance. */
18539
18540 const char *
18541 output_move_neon (rtx *operands)
18542 {
18543 rtx reg, mem, addr, ops[2];
18544 int regno, nregs, load = REG_P (operands[0]);
18545 const char *templ;
18546 char buff[50];
18547 machine_mode mode;
18548
18549 reg = operands[!load];
18550 mem = operands[load];
18551
18552 mode = GET_MODE (reg);
18553
18554 gcc_assert (REG_P (reg));
18555 regno = REGNO (reg);
18556 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18557 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18558 || NEON_REGNO_OK_FOR_QUAD (regno));
18559 gcc_assert (VALID_NEON_DREG_MODE (mode)
18560 || VALID_NEON_QREG_MODE (mode)
18561 || VALID_NEON_STRUCT_MODE (mode));
18562 gcc_assert (MEM_P (mem));
18563
18564 addr = XEXP (mem, 0);
18565
18566 /* Strip off const from addresses like (const (plus (...))). */
18567 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18568 addr = XEXP (addr, 0);
18569
18570 switch (GET_CODE (addr))
18571 {
18572 case POST_INC:
18573 /* We have to use vldm / vstm for too-large modes. */
18574 if (nregs > 4)
18575 {
18576 templ = "v%smia%%?\t%%0!, %%h1";
18577 ops[0] = XEXP (addr, 0);
18578 }
18579 else
18580 {
18581 templ = "v%s1.64\t%%h1, %%A0";
18582 ops[0] = mem;
18583 }
18584 ops[1] = reg;
18585 break;
18586
18587 case PRE_DEC:
18588 /* We have to use vldm / vstm in this case, since there is no
18589 pre-decrement form of the vld1 / vst1 instructions. */
18590 templ = "v%smdb%%?\t%%0!, %%h1";
18591 ops[0] = XEXP (addr, 0);
18592 ops[1] = reg;
18593 break;
18594
18595 case POST_MODIFY:
18596 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18597 gcc_unreachable ();
18598
18599 case REG:
18600 /* We have to use vldm / vstm for too-large modes. */
18601 if (nregs > 1)
18602 {
18603 if (nregs > 4)
18604 templ = "v%smia%%?\t%%m0, %%h1";
18605 else
18606 templ = "v%s1.64\t%%h1, %%A0";
18607
18608 ops[0] = mem;
18609 ops[1] = reg;
18610 break;
18611 }
18612 /* Fall through. */
18613 case LABEL_REF:
18614 case PLUS:
18615 {
18616 int i;
18617 int overlap = -1;
18618 for (i = 0; i < nregs; i++)
18619 {
18620 /* We're only using DImode here because it's a convenient size. */
18621 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18622 ops[1] = adjust_address (mem, DImode, 8 * i);
18623 if (reg_overlap_mentioned_p (ops[0], mem))
18624 {
18625 gcc_assert (overlap == -1);
18626 overlap = i;
18627 }
18628 else
18629 {
18630 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18631 output_asm_insn (buff, ops);
18632 }
18633 }
18634 if (overlap != -1)
18635 {
18636 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18637 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18638 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18639 output_asm_insn (buff, ops);
18640 }
18641
18642 return "";
18643 }
18644
18645 default:
18646 gcc_unreachable ();
18647 }
18648
18649 sprintf (buff, templ, load ? "ld" : "st");
18650 output_asm_insn (buff, ops);
18651
18652 return "";
18653 }
18654
18655 /* Compute and return the length of neon_mov<mode>, where <mode> is
18656 one of VSTRUCT modes: EI, OI, CI or XI. */
18657 int
18658 arm_attr_length_move_neon (rtx_insn *insn)
18659 {
18660 rtx reg, mem, addr;
18661 int load;
18662 machine_mode mode;
18663
18664 extract_insn_cached (insn);
18665
18666 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18667 {
18668 mode = GET_MODE (recog_data.operand[0]);
18669 switch (mode)
18670 {
18671 case EImode:
18672 case OImode:
18673 return 8;
18674 case CImode:
18675 return 12;
18676 case XImode:
18677 return 16;
18678 default:
18679 gcc_unreachable ();
18680 }
18681 }
18682
18683 load = REG_P (recog_data.operand[0]);
18684 reg = recog_data.operand[!load];
18685 mem = recog_data.operand[load];
18686
18687 gcc_assert (MEM_P (mem));
18688
18689 mode = GET_MODE (reg);
18690 addr = XEXP (mem, 0);
18691
18692 /* Strip off const from addresses like (const (plus (...))). */
18693 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18694 addr = XEXP (addr, 0);
18695
18696 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18697 {
18698 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18699 return insns * 4;
18700 }
18701 else
18702 return 4;
18703 }
18704
18705 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18706 return zero. */
18707
18708 int
18709 arm_address_offset_is_imm (rtx_insn *insn)
18710 {
18711 rtx mem, addr;
18712
18713 extract_insn_cached (insn);
18714
18715 if (REG_P (recog_data.operand[0]))
18716 return 0;
18717
18718 mem = recog_data.operand[0];
18719
18720 gcc_assert (MEM_P (mem));
18721
18722 addr = XEXP (mem, 0);
18723
18724 if (REG_P (addr)
18725 || (GET_CODE (addr) == PLUS
18726 && REG_P (XEXP (addr, 0))
18727 && CONST_INT_P (XEXP (addr, 1))))
18728 return 1;
18729 else
18730 return 0;
18731 }
18732
18733 /* Output an ADD r, s, #n where n may be too big for one instruction.
18734 If adding zero to one register, output nothing. */
18735 const char *
18736 output_add_immediate (rtx *operands)
18737 {
18738 HOST_WIDE_INT n = INTVAL (operands[2]);
18739
18740 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18741 {
18742 if (n < 0)
18743 output_multi_immediate (operands,
18744 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18745 -n);
18746 else
18747 output_multi_immediate (operands,
18748 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18749 n);
18750 }
18751
18752 return "";
18753 }
18754
18755 /* Output a multiple immediate operation.
18756 OPERANDS is the vector of operands referred to in the output patterns.
18757 INSTR1 is the output pattern to use for the first constant.
18758 INSTR2 is the output pattern to use for subsequent constants.
18759 IMMED_OP is the index of the constant slot in OPERANDS.
18760 N is the constant value. */
18761 static const char *
18762 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18763 int immed_op, HOST_WIDE_INT n)
18764 {
18765 #if HOST_BITS_PER_WIDE_INT > 32
18766 n &= 0xffffffff;
18767 #endif
18768
18769 if (n == 0)
18770 {
18771 /* Quick and easy output. */
18772 operands[immed_op] = const0_rtx;
18773 output_asm_insn (instr1, operands);
18774 }
18775 else
18776 {
18777 int i;
18778 const char * instr = instr1;
18779
18780 /* Note that n is never zero here (which would give no output). */
18781 for (i = 0; i < 32; i += 2)
18782 {
18783 if (n & (3 << i))
18784 {
18785 operands[immed_op] = GEN_INT (n & (255 << i));
18786 output_asm_insn (instr, operands);
18787 instr = instr2;
18788 i += 6;
18789 }
18790 }
18791 }
18792
18793 return "";
18794 }
18795
18796 /* Return the name of a shifter operation. */
18797 static const char *
18798 arm_shift_nmem(enum rtx_code code)
18799 {
18800 switch (code)
18801 {
18802 case ASHIFT:
18803 return ARM_LSL_NAME;
18804
18805 case ASHIFTRT:
18806 return "asr";
18807
18808 case LSHIFTRT:
18809 return "lsr";
18810
18811 case ROTATERT:
18812 return "ror";
18813
18814 default:
18815 abort();
18816 }
18817 }
18818
18819 /* Return the appropriate ARM instruction for the operation code.
18820 The returned result should not be overwritten. OP is the rtx of the
18821 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18822 was shifted. */
18823 const char *
18824 arithmetic_instr (rtx op, int shift_first_arg)
18825 {
18826 switch (GET_CODE (op))
18827 {
18828 case PLUS:
18829 return "add";
18830
18831 case MINUS:
18832 return shift_first_arg ? "rsb" : "sub";
18833
18834 case IOR:
18835 return "orr";
18836
18837 case XOR:
18838 return "eor";
18839
18840 case AND:
18841 return "and";
18842
18843 case ASHIFT:
18844 case ASHIFTRT:
18845 case LSHIFTRT:
18846 case ROTATERT:
18847 return arm_shift_nmem(GET_CODE(op));
18848
18849 default:
18850 gcc_unreachable ();
18851 }
18852 }
18853
18854 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18855 for the operation code. The returned result should not be overwritten.
18856 OP is the rtx code of the shift.
18857 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18858 shift. */
18859 static const char *
18860 shift_op (rtx op, HOST_WIDE_INT *amountp)
18861 {
18862 const char * mnem;
18863 enum rtx_code code = GET_CODE (op);
18864
18865 switch (code)
18866 {
18867 case ROTATE:
18868 if (!CONST_INT_P (XEXP (op, 1)))
18869 {
18870 output_operand_lossage ("invalid shift operand");
18871 return NULL;
18872 }
18873
18874 code = ROTATERT;
18875 *amountp = 32 - INTVAL (XEXP (op, 1));
18876 mnem = "ror";
18877 break;
18878
18879 case ASHIFT:
18880 case ASHIFTRT:
18881 case LSHIFTRT:
18882 case ROTATERT:
18883 mnem = arm_shift_nmem(code);
18884 if (CONST_INT_P (XEXP (op, 1)))
18885 {
18886 *amountp = INTVAL (XEXP (op, 1));
18887 }
18888 else if (REG_P (XEXP (op, 1)))
18889 {
18890 *amountp = -1;
18891 return mnem;
18892 }
18893 else
18894 {
18895 output_operand_lossage ("invalid shift operand");
18896 return NULL;
18897 }
18898 break;
18899
18900 case MULT:
18901 /* We never have to worry about the amount being other than a
18902 power of 2, since this case can never be reloaded from a reg. */
18903 if (!CONST_INT_P (XEXP (op, 1)))
18904 {
18905 output_operand_lossage ("invalid shift operand");
18906 return NULL;
18907 }
18908
18909 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18910
18911 /* Amount must be a power of two. */
18912 if (*amountp & (*amountp - 1))
18913 {
18914 output_operand_lossage ("invalid shift operand");
18915 return NULL;
18916 }
18917
18918 *amountp = int_log2 (*amountp);
18919 return ARM_LSL_NAME;
18920
18921 default:
18922 output_operand_lossage ("invalid shift operand");
18923 return NULL;
18924 }
18925
18926 /* This is not 100% correct, but follows from the desire to merge
18927 multiplication by a power of 2 with the recognizer for a
18928 shift. >=32 is not a valid shift for "lsl", so we must try and
18929 output a shift that produces the correct arithmetical result.
18930 Using lsr #32 is identical except for the fact that the carry bit
18931 is not set correctly if we set the flags; but we never use the
18932 carry bit from such an operation, so we can ignore that. */
18933 if (code == ROTATERT)
18934 /* Rotate is just modulo 32. */
18935 *amountp &= 31;
18936 else if (*amountp != (*amountp & 31))
18937 {
18938 if (code == ASHIFT)
18939 mnem = "lsr";
18940 *amountp = 32;
18941 }
18942
18943 /* Shifts of 0 are no-ops. */
18944 if (*amountp == 0)
18945 return NULL;
18946
18947 return mnem;
18948 }
18949
18950 /* Obtain the shift from the POWER of two. */
18951
18952 static HOST_WIDE_INT
18953 int_log2 (HOST_WIDE_INT power)
18954 {
18955 HOST_WIDE_INT shift = 0;
18956
18957 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18958 {
18959 gcc_assert (shift <= 31);
18960 shift++;
18961 }
18962
18963 return shift;
18964 }
18965
18966 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18967 because /bin/as is horribly restrictive. The judgement about
18968 whether or not each character is 'printable' (and can be output as
18969 is) or not (and must be printed with an octal escape) must be made
18970 with reference to the *host* character set -- the situation is
18971 similar to that discussed in the comments above pp_c_char in
18972 c-pretty-print.c. */
18973
18974 #define MAX_ASCII_LEN 51
18975
18976 void
18977 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18978 {
18979 int i;
18980 int len_so_far = 0;
18981
18982 fputs ("\t.ascii\t\"", stream);
18983
18984 for (i = 0; i < len; i++)
18985 {
18986 int c = p[i];
18987
18988 if (len_so_far >= MAX_ASCII_LEN)
18989 {
18990 fputs ("\"\n\t.ascii\t\"", stream);
18991 len_so_far = 0;
18992 }
18993
18994 if (ISPRINT (c))
18995 {
18996 if (c == '\\' || c == '\"')
18997 {
18998 putc ('\\', stream);
18999 len_so_far++;
19000 }
19001 putc (c, stream);
19002 len_so_far++;
19003 }
19004 else
19005 {
19006 fprintf (stream, "\\%03o", c);
19007 len_so_far += 4;
19008 }
19009 }
19010
19011 fputs ("\"\n", stream);
19012 }
19013 \f
19014 /* Whether a register is callee saved or not. This is necessary because high
19015 registers are marked as caller saved when optimizing for size on Thumb-1
19016 targets despite being callee saved in order to avoid using them. */
19017 #define callee_saved_reg_p(reg) \
19018 (!call_used_regs[reg] \
19019 || (TARGET_THUMB1 && optimize_size \
19020 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19021
19022 /* Compute the register save mask for registers 0 through 12
19023 inclusive. This code is used by arm_compute_save_reg_mask. */
19024
19025 static unsigned long
19026 arm_compute_save_reg0_reg12_mask (void)
19027 {
19028 unsigned long func_type = arm_current_func_type ();
19029 unsigned long save_reg_mask = 0;
19030 unsigned int reg;
19031
19032 if (IS_INTERRUPT (func_type))
19033 {
19034 unsigned int max_reg;
19035 /* Interrupt functions must not corrupt any registers,
19036 even call clobbered ones. If this is a leaf function
19037 we can just examine the registers used by the RTL, but
19038 otherwise we have to assume that whatever function is
19039 called might clobber anything, and so we have to save
19040 all the call-clobbered registers as well. */
19041 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19042 /* FIQ handlers have registers r8 - r12 banked, so
19043 we only need to check r0 - r7, Normal ISRs only
19044 bank r14 and r15, so we must check up to r12.
19045 r13 is the stack pointer which is always preserved,
19046 so we do not need to consider it here. */
19047 max_reg = 7;
19048 else
19049 max_reg = 12;
19050
19051 for (reg = 0; reg <= max_reg; reg++)
19052 if (df_regs_ever_live_p (reg)
19053 || (! crtl->is_leaf && call_used_regs[reg]))
19054 save_reg_mask |= (1 << reg);
19055
19056 /* Also save the pic base register if necessary. */
19057 if (flag_pic
19058 && !TARGET_SINGLE_PIC_BASE
19059 && arm_pic_register != INVALID_REGNUM
19060 && crtl->uses_pic_offset_table)
19061 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19062 }
19063 else if (IS_VOLATILE(func_type))
19064 {
19065 /* For noreturn functions we historically omitted register saves
19066 altogether. However this really messes up debugging. As a
19067 compromise save just the frame pointers. Combined with the link
19068 register saved elsewhere this should be sufficient to get
19069 a backtrace. */
19070 if (frame_pointer_needed)
19071 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19072 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19073 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19074 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19075 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19076 }
19077 else
19078 {
19079 /* In the normal case we only need to save those registers
19080 which are call saved and which are used by this function. */
19081 for (reg = 0; reg <= 11; reg++)
19082 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19083 save_reg_mask |= (1 << reg);
19084
19085 /* Handle the frame pointer as a special case. */
19086 if (frame_pointer_needed)
19087 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19088
19089 /* If we aren't loading the PIC register,
19090 don't stack it even though it may be live. */
19091 if (flag_pic
19092 && !TARGET_SINGLE_PIC_BASE
19093 && arm_pic_register != INVALID_REGNUM
19094 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19095 || crtl->uses_pic_offset_table))
19096 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19097
19098 /* The prologue will copy SP into R0, so save it. */
19099 if (IS_STACKALIGN (func_type))
19100 save_reg_mask |= 1;
19101 }
19102
19103 /* Save registers so the exception handler can modify them. */
19104 if (crtl->calls_eh_return)
19105 {
19106 unsigned int i;
19107
19108 for (i = 0; ; i++)
19109 {
19110 reg = EH_RETURN_DATA_REGNO (i);
19111 if (reg == INVALID_REGNUM)
19112 break;
19113 save_reg_mask |= 1 << reg;
19114 }
19115 }
19116
19117 return save_reg_mask;
19118 }
19119
19120 /* Return true if r3 is live at the start of the function. */
19121
19122 static bool
19123 arm_r3_live_at_start_p (void)
19124 {
19125 /* Just look at cfg info, which is still close enough to correct at this
19126 point. This gives false positives for broken functions that might use
19127 uninitialized data that happens to be allocated in r3, but who cares? */
19128 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19129 }
19130
19131 /* Compute the number of bytes used to store the static chain register on the
19132 stack, above the stack frame. We need to know this accurately to get the
19133 alignment of the rest of the stack frame correct. */
19134
19135 static int
19136 arm_compute_static_chain_stack_bytes (void)
19137 {
19138 /* See the defining assertion in arm_expand_prologue. */
19139 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19140 && IS_NESTED (arm_current_func_type ())
19141 && arm_r3_live_at_start_p ()
19142 && crtl->args.pretend_args_size == 0)
19143 return 4;
19144
19145 return 0;
19146 }
19147
19148 /* Compute a bit mask of which registers need to be
19149 saved on the stack for the current function.
19150 This is used by arm_get_frame_offsets, which may add extra registers. */
19151
19152 static unsigned long
19153 arm_compute_save_reg_mask (void)
19154 {
19155 unsigned int save_reg_mask = 0;
19156 unsigned long func_type = arm_current_func_type ();
19157 unsigned int reg;
19158
19159 if (IS_NAKED (func_type))
19160 /* This should never really happen. */
19161 return 0;
19162
19163 /* If we are creating a stack frame, then we must save the frame pointer,
19164 IP (which will hold the old stack pointer), LR and the PC. */
19165 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19166 save_reg_mask |=
19167 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19168 | (1 << IP_REGNUM)
19169 | (1 << LR_REGNUM)
19170 | (1 << PC_REGNUM);
19171
19172 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19173
19174 /* Decide if we need to save the link register.
19175 Interrupt routines have their own banked link register,
19176 so they never need to save it.
19177 Otherwise if we do not use the link register we do not need to save
19178 it. If we are pushing other registers onto the stack however, we
19179 can save an instruction in the epilogue by pushing the link register
19180 now and then popping it back into the PC. This incurs extra memory
19181 accesses though, so we only do it when optimizing for size, and only
19182 if we know that we will not need a fancy return sequence. */
19183 if (df_regs_ever_live_p (LR_REGNUM)
19184 || (save_reg_mask
19185 && optimize_size
19186 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19187 && !crtl->tail_call_emit
19188 && !crtl->calls_eh_return))
19189 save_reg_mask |= 1 << LR_REGNUM;
19190
19191 if (cfun->machine->lr_save_eliminated)
19192 save_reg_mask &= ~ (1 << LR_REGNUM);
19193
19194 if (TARGET_REALLY_IWMMXT
19195 && ((bit_count (save_reg_mask)
19196 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19197 arm_compute_static_chain_stack_bytes())
19198 ) % 2) != 0)
19199 {
19200 /* The total number of registers that are going to be pushed
19201 onto the stack is odd. We need to ensure that the stack
19202 is 64-bit aligned before we start to save iWMMXt registers,
19203 and also before we start to create locals. (A local variable
19204 might be a double or long long which we will load/store using
19205 an iWMMXt instruction). Therefore we need to push another
19206 ARM register, so that the stack will be 64-bit aligned. We
19207 try to avoid using the arg registers (r0 -r3) as they might be
19208 used to pass values in a tail call. */
19209 for (reg = 4; reg <= 12; reg++)
19210 if ((save_reg_mask & (1 << reg)) == 0)
19211 break;
19212
19213 if (reg <= 12)
19214 save_reg_mask |= (1 << reg);
19215 else
19216 {
19217 cfun->machine->sibcall_blocked = 1;
19218 save_reg_mask |= (1 << 3);
19219 }
19220 }
19221
19222 /* We may need to push an additional register for use initializing the
19223 PIC base register. */
19224 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19225 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19226 {
19227 reg = thumb_find_work_register (1 << 4);
19228 if (!call_used_regs[reg])
19229 save_reg_mask |= (1 << reg);
19230 }
19231
19232 return save_reg_mask;
19233 }
19234
19235
19236 /* Compute a bit mask of which registers need to be
19237 saved on the stack for the current function. */
19238 static unsigned long
19239 thumb1_compute_save_reg_mask (void)
19240 {
19241 unsigned long mask;
19242 unsigned reg;
19243
19244 mask = 0;
19245 for (reg = 0; reg < 12; reg ++)
19246 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19247 mask |= 1 << reg;
19248
19249 if (flag_pic
19250 && !TARGET_SINGLE_PIC_BASE
19251 && arm_pic_register != INVALID_REGNUM
19252 && crtl->uses_pic_offset_table)
19253 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19254
19255 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19256 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19257 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19258
19259 /* LR will also be pushed if any lo regs are pushed. */
19260 if (mask & 0xff || thumb_force_lr_save ())
19261 mask |= (1 << LR_REGNUM);
19262
19263 /* Make sure we have a low work register if we need one.
19264 We will need one if we are going to push a high register,
19265 but we are not currently intending to push a low register. */
19266 if ((mask & 0xff) == 0
19267 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19268 {
19269 /* Use thumb_find_work_register to choose which register
19270 we will use. If the register is live then we will
19271 have to push it. Use LAST_LO_REGNUM as our fallback
19272 choice for the register to select. */
19273 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19274 /* Make sure the register returned by thumb_find_work_register is
19275 not part of the return value. */
19276 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19277 reg = LAST_LO_REGNUM;
19278
19279 if (callee_saved_reg_p (reg))
19280 mask |= 1 << reg;
19281 }
19282
19283 /* The 504 below is 8 bytes less than 512 because there are two possible
19284 alignment words. We can't tell here if they will be present or not so we
19285 have to play it safe and assume that they are. */
19286 if ((CALLER_INTERWORKING_SLOT_SIZE +
19287 ROUND_UP_WORD (get_frame_size ()) +
19288 crtl->outgoing_args_size) >= 504)
19289 {
19290 /* This is the same as the code in thumb1_expand_prologue() which
19291 determines which register to use for stack decrement. */
19292 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19293 if (mask & (1 << reg))
19294 break;
19295
19296 if (reg > LAST_LO_REGNUM)
19297 {
19298 /* Make sure we have a register available for stack decrement. */
19299 mask |= 1 << LAST_LO_REGNUM;
19300 }
19301 }
19302
19303 return mask;
19304 }
19305
19306
19307 /* Return the number of bytes required to save VFP registers. */
19308 static int
19309 arm_get_vfp_saved_size (void)
19310 {
19311 unsigned int regno;
19312 int count;
19313 int saved;
19314
19315 saved = 0;
19316 /* Space for saved VFP registers. */
19317 if (TARGET_HARD_FLOAT && TARGET_VFP)
19318 {
19319 count = 0;
19320 for (regno = FIRST_VFP_REGNUM;
19321 regno < LAST_VFP_REGNUM;
19322 regno += 2)
19323 {
19324 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19325 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19326 {
19327 if (count > 0)
19328 {
19329 /* Workaround ARM10 VFPr1 bug. */
19330 if (count == 2 && !arm_arch6)
19331 count++;
19332 saved += count * 8;
19333 }
19334 count = 0;
19335 }
19336 else
19337 count++;
19338 }
19339 if (count > 0)
19340 {
19341 if (count == 2 && !arm_arch6)
19342 count++;
19343 saved += count * 8;
19344 }
19345 }
19346 return saved;
19347 }
19348
19349
19350 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19351 everything bar the final return instruction. If simple_return is true,
19352 then do not output epilogue, because it has already been emitted in RTL. */
19353 const char *
19354 output_return_instruction (rtx operand, bool really_return, bool reverse,
19355 bool simple_return)
19356 {
19357 char conditional[10];
19358 char instr[100];
19359 unsigned reg;
19360 unsigned long live_regs_mask;
19361 unsigned long func_type;
19362 arm_stack_offsets *offsets;
19363
19364 func_type = arm_current_func_type ();
19365
19366 if (IS_NAKED (func_type))
19367 return "";
19368
19369 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19370 {
19371 /* If this function was declared non-returning, and we have
19372 found a tail call, then we have to trust that the called
19373 function won't return. */
19374 if (really_return)
19375 {
19376 rtx ops[2];
19377
19378 /* Otherwise, trap an attempted return by aborting. */
19379 ops[0] = operand;
19380 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19381 : "abort");
19382 assemble_external_libcall (ops[1]);
19383 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19384 }
19385
19386 return "";
19387 }
19388
19389 gcc_assert (!cfun->calls_alloca || really_return);
19390
19391 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19392
19393 cfun->machine->return_used_this_function = 1;
19394
19395 offsets = arm_get_frame_offsets ();
19396 live_regs_mask = offsets->saved_regs_mask;
19397
19398 if (!simple_return && live_regs_mask)
19399 {
19400 const char * return_reg;
19401
19402 /* If we do not have any special requirements for function exit
19403 (e.g. interworking) then we can load the return address
19404 directly into the PC. Otherwise we must load it into LR. */
19405 if (really_return
19406 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19407 return_reg = reg_names[PC_REGNUM];
19408 else
19409 return_reg = reg_names[LR_REGNUM];
19410
19411 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19412 {
19413 /* There are three possible reasons for the IP register
19414 being saved. 1) a stack frame was created, in which case
19415 IP contains the old stack pointer, or 2) an ISR routine
19416 corrupted it, or 3) it was saved to align the stack on
19417 iWMMXt. In case 1, restore IP into SP, otherwise just
19418 restore IP. */
19419 if (frame_pointer_needed)
19420 {
19421 live_regs_mask &= ~ (1 << IP_REGNUM);
19422 live_regs_mask |= (1 << SP_REGNUM);
19423 }
19424 else
19425 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19426 }
19427
19428 /* On some ARM architectures it is faster to use LDR rather than
19429 LDM to load a single register. On other architectures, the
19430 cost is the same. In 26 bit mode, or for exception handlers,
19431 we have to use LDM to load the PC so that the CPSR is also
19432 restored. */
19433 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19434 if (live_regs_mask == (1U << reg))
19435 break;
19436
19437 if (reg <= LAST_ARM_REGNUM
19438 && (reg != LR_REGNUM
19439 || ! really_return
19440 || ! IS_INTERRUPT (func_type)))
19441 {
19442 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19443 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19444 }
19445 else
19446 {
19447 char *p;
19448 int first = 1;
19449
19450 /* Generate the load multiple instruction to restore the
19451 registers. Note we can get here, even if
19452 frame_pointer_needed is true, but only if sp already
19453 points to the base of the saved core registers. */
19454 if (live_regs_mask & (1 << SP_REGNUM))
19455 {
19456 unsigned HOST_WIDE_INT stack_adjust;
19457
19458 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19459 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19460
19461 if (stack_adjust && arm_arch5 && TARGET_ARM)
19462 if (TARGET_UNIFIED_ASM)
19463 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19464 else
19465 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19466 else
19467 {
19468 /* If we can't use ldmib (SA110 bug),
19469 then try to pop r3 instead. */
19470 if (stack_adjust)
19471 live_regs_mask |= 1 << 3;
19472
19473 if (TARGET_UNIFIED_ASM)
19474 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19475 else
19476 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19477 }
19478 }
19479 else
19480 if (TARGET_UNIFIED_ASM)
19481 sprintf (instr, "pop%s\t{", conditional);
19482 else
19483 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19484
19485 p = instr + strlen (instr);
19486
19487 for (reg = 0; reg <= SP_REGNUM; reg++)
19488 if (live_regs_mask & (1 << reg))
19489 {
19490 int l = strlen (reg_names[reg]);
19491
19492 if (first)
19493 first = 0;
19494 else
19495 {
19496 memcpy (p, ", ", 2);
19497 p += 2;
19498 }
19499
19500 memcpy (p, "%|", 2);
19501 memcpy (p + 2, reg_names[reg], l);
19502 p += l + 2;
19503 }
19504
19505 if (live_regs_mask & (1 << LR_REGNUM))
19506 {
19507 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19508 /* If returning from an interrupt, restore the CPSR. */
19509 if (IS_INTERRUPT (func_type))
19510 strcat (p, "^");
19511 }
19512 else
19513 strcpy (p, "}");
19514 }
19515
19516 output_asm_insn (instr, & operand);
19517
19518 /* See if we need to generate an extra instruction to
19519 perform the actual function return. */
19520 if (really_return
19521 && func_type != ARM_FT_INTERWORKED
19522 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19523 {
19524 /* The return has already been handled
19525 by loading the LR into the PC. */
19526 return "";
19527 }
19528 }
19529
19530 if (really_return)
19531 {
19532 switch ((int) ARM_FUNC_TYPE (func_type))
19533 {
19534 case ARM_FT_ISR:
19535 case ARM_FT_FIQ:
19536 /* ??? This is wrong for unified assembly syntax. */
19537 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19538 break;
19539
19540 case ARM_FT_INTERWORKED:
19541 sprintf (instr, "bx%s\t%%|lr", conditional);
19542 break;
19543
19544 case ARM_FT_EXCEPTION:
19545 /* ??? This is wrong for unified assembly syntax. */
19546 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19547 break;
19548
19549 default:
19550 /* Use bx if it's available. */
19551 if (arm_arch5 || arm_arch4t)
19552 sprintf (instr, "bx%s\t%%|lr", conditional);
19553 else
19554 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19555 break;
19556 }
19557
19558 output_asm_insn (instr, & operand);
19559 }
19560
19561 return "";
19562 }
19563
19564 /* Write the function name into the code section, directly preceding
19565 the function prologue.
19566
19567 Code will be output similar to this:
19568 t0
19569 .ascii "arm_poke_function_name", 0
19570 .align
19571 t1
19572 .word 0xff000000 + (t1 - t0)
19573 arm_poke_function_name
19574 mov ip, sp
19575 stmfd sp!, {fp, ip, lr, pc}
19576 sub fp, ip, #4
19577
19578 When performing a stack backtrace, code can inspect the value
19579 of 'pc' stored at 'fp' + 0. If the trace function then looks
19580 at location pc - 12 and the top 8 bits are set, then we know
19581 that there is a function name embedded immediately preceding this
19582 location and has length ((pc[-3]) & 0xff000000).
19583
19584 We assume that pc is declared as a pointer to an unsigned long.
19585
19586 It is of no benefit to output the function name if we are assembling
19587 a leaf function. These function types will not contain a stack
19588 backtrace structure, therefore it is not possible to determine the
19589 function name. */
19590 void
19591 arm_poke_function_name (FILE *stream, const char *name)
19592 {
19593 unsigned long alignlength;
19594 unsigned long length;
19595 rtx x;
19596
19597 length = strlen (name) + 1;
19598 alignlength = ROUND_UP_WORD (length);
19599
19600 ASM_OUTPUT_ASCII (stream, name, length);
19601 ASM_OUTPUT_ALIGN (stream, 2);
19602 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19603 assemble_aligned_integer (UNITS_PER_WORD, x);
19604 }
19605
19606 /* Place some comments into the assembler stream
19607 describing the current function. */
19608 static void
19609 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19610 {
19611 unsigned long func_type;
19612
19613 /* ??? Do we want to print some of the below anyway? */
19614 if (TARGET_THUMB1)
19615 return;
19616
19617 /* Sanity check. */
19618 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19619
19620 func_type = arm_current_func_type ();
19621
19622 switch ((int) ARM_FUNC_TYPE (func_type))
19623 {
19624 default:
19625 case ARM_FT_NORMAL:
19626 break;
19627 case ARM_FT_INTERWORKED:
19628 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19629 break;
19630 case ARM_FT_ISR:
19631 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19632 break;
19633 case ARM_FT_FIQ:
19634 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19635 break;
19636 case ARM_FT_EXCEPTION:
19637 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19638 break;
19639 }
19640
19641 if (IS_NAKED (func_type))
19642 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19643
19644 if (IS_VOLATILE (func_type))
19645 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19646
19647 if (IS_NESTED (func_type))
19648 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19649 if (IS_STACKALIGN (func_type))
19650 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19651
19652 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19653 crtl->args.size,
19654 crtl->args.pretend_args_size, frame_size);
19655
19656 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19657 frame_pointer_needed,
19658 cfun->machine->uses_anonymous_args);
19659
19660 if (cfun->machine->lr_save_eliminated)
19661 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19662
19663 if (crtl->calls_eh_return)
19664 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19665
19666 }
19667
19668 static void
19669 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19670 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19671 {
19672 arm_stack_offsets *offsets;
19673
19674 if (TARGET_THUMB1)
19675 {
19676 int regno;
19677
19678 /* Emit any call-via-reg trampolines that are needed for v4t support
19679 of call_reg and call_value_reg type insns. */
19680 for (regno = 0; regno < LR_REGNUM; regno++)
19681 {
19682 rtx label = cfun->machine->call_via[regno];
19683
19684 if (label != NULL)
19685 {
19686 switch_to_section (function_section (current_function_decl));
19687 targetm.asm_out.internal_label (asm_out_file, "L",
19688 CODE_LABEL_NUMBER (label));
19689 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19690 }
19691 }
19692
19693 /* ??? Probably not safe to set this here, since it assumes that a
19694 function will be emitted as assembly immediately after we generate
19695 RTL for it. This does not happen for inline functions. */
19696 cfun->machine->return_used_this_function = 0;
19697 }
19698 else /* TARGET_32BIT */
19699 {
19700 /* We need to take into account any stack-frame rounding. */
19701 offsets = arm_get_frame_offsets ();
19702
19703 gcc_assert (!use_return_insn (FALSE, NULL)
19704 || (cfun->machine->return_used_this_function != 0)
19705 || offsets->saved_regs == offsets->outgoing_args
19706 || frame_pointer_needed);
19707 }
19708 }
19709
19710 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19711 STR and STRD. If an even number of registers are being pushed, one
19712 or more STRD patterns are created for each register pair. If an
19713 odd number of registers are pushed, emit an initial STR followed by
19714 as many STRD instructions as are needed. This works best when the
19715 stack is initially 64-bit aligned (the normal case), since it
19716 ensures that each STRD is also 64-bit aligned. */
19717 static void
19718 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19719 {
19720 int num_regs = 0;
19721 int i;
19722 int regno;
19723 rtx par = NULL_RTX;
19724 rtx dwarf = NULL_RTX;
19725 rtx tmp;
19726 bool first = true;
19727
19728 num_regs = bit_count (saved_regs_mask);
19729
19730 /* Must be at least one register to save, and can't save SP or PC. */
19731 gcc_assert (num_regs > 0 && num_regs <= 14);
19732 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19733 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19734
19735 /* Create sequence for DWARF info. All the frame-related data for
19736 debugging is held in this wrapper. */
19737 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19738
19739 /* Describe the stack adjustment. */
19740 tmp = gen_rtx_SET (stack_pointer_rtx,
19741 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19742 RTX_FRAME_RELATED_P (tmp) = 1;
19743 XVECEXP (dwarf, 0, 0) = tmp;
19744
19745 /* Find the first register. */
19746 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19747 ;
19748
19749 i = 0;
19750
19751 /* If there's an odd number of registers to push. Start off by
19752 pushing a single register. This ensures that subsequent strd
19753 operations are dword aligned (assuming that SP was originally
19754 64-bit aligned). */
19755 if ((num_regs & 1) != 0)
19756 {
19757 rtx reg, mem, insn;
19758
19759 reg = gen_rtx_REG (SImode, regno);
19760 if (num_regs == 1)
19761 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19762 stack_pointer_rtx));
19763 else
19764 mem = gen_frame_mem (Pmode,
19765 gen_rtx_PRE_MODIFY
19766 (Pmode, stack_pointer_rtx,
19767 plus_constant (Pmode, stack_pointer_rtx,
19768 -4 * num_regs)));
19769
19770 tmp = gen_rtx_SET (mem, reg);
19771 RTX_FRAME_RELATED_P (tmp) = 1;
19772 insn = emit_insn (tmp);
19773 RTX_FRAME_RELATED_P (insn) = 1;
19774 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19775 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19776 RTX_FRAME_RELATED_P (tmp) = 1;
19777 i++;
19778 regno++;
19779 XVECEXP (dwarf, 0, i) = tmp;
19780 first = false;
19781 }
19782
19783 while (i < num_regs)
19784 if (saved_regs_mask & (1 << regno))
19785 {
19786 rtx reg1, reg2, mem1, mem2;
19787 rtx tmp0, tmp1, tmp2;
19788 int regno2;
19789
19790 /* Find the register to pair with this one. */
19791 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19792 regno2++)
19793 ;
19794
19795 reg1 = gen_rtx_REG (SImode, regno);
19796 reg2 = gen_rtx_REG (SImode, regno2);
19797
19798 if (first)
19799 {
19800 rtx insn;
19801
19802 first = false;
19803 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19804 stack_pointer_rtx,
19805 -4 * num_regs));
19806 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19807 stack_pointer_rtx,
19808 -4 * (num_regs - 1)));
19809 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19810 plus_constant (Pmode, stack_pointer_rtx,
19811 -4 * (num_regs)));
19812 tmp1 = gen_rtx_SET (mem1, reg1);
19813 tmp2 = gen_rtx_SET (mem2, reg2);
19814 RTX_FRAME_RELATED_P (tmp0) = 1;
19815 RTX_FRAME_RELATED_P (tmp1) = 1;
19816 RTX_FRAME_RELATED_P (tmp2) = 1;
19817 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19818 XVECEXP (par, 0, 0) = tmp0;
19819 XVECEXP (par, 0, 1) = tmp1;
19820 XVECEXP (par, 0, 2) = tmp2;
19821 insn = emit_insn (par);
19822 RTX_FRAME_RELATED_P (insn) = 1;
19823 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19824 }
19825 else
19826 {
19827 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19828 stack_pointer_rtx,
19829 4 * i));
19830 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19831 stack_pointer_rtx,
19832 4 * (i + 1)));
19833 tmp1 = gen_rtx_SET (mem1, reg1);
19834 tmp2 = gen_rtx_SET (mem2, reg2);
19835 RTX_FRAME_RELATED_P (tmp1) = 1;
19836 RTX_FRAME_RELATED_P (tmp2) = 1;
19837 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19838 XVECEXP (par, 0, 0) = tmp1;
19839 XVECEXP (par, 0, 1) = tmp2;
19840 emit_insn (par);
19841 }
19842
19843 /* Create unwind information. This is an approximation. */
19844 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19845 plus_constant (Pmode,
19846 stack_pointer_rtx,
19847 4 * i)),
19848 reg1);
19849 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19850 plus_constant (Pmode,
19851 stack_pointer_rtx,
19852 4 * (i + 1))),
19853 reg2);
19854
19855 RTX_FRAME_RELATED_P (tmp1) = 1;
19856 RTX_FRAME_RELATED_P (tmp2) = 1;
19857 XVECEXP (dwarf, 0, i + 1) = tmp1;
19858 XVECEXP (dwarf, 0, i + 2) = tmp2;
19859 i += 2;
19860 regno = regno2 + 1;
19861 }
19862 else
19863 regno++;
19864
19865 return;
19866 }
19867
19868 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19869 whenever possible, otherwise it emits single-word stores. The first store
19870 also allocates stack space for all saved registers, using writeback with
19871 post-addressing mode. All other stores use offset addressing. If no STRD
19872 can be emitted, this function emits a sequence of single-word stores,
19873 and not an STM as before, because single-word stores provide more freedom
19874 scheduling and can be turned into an STM by peephole optimizations. */
19875 static void
19876 arm_emit_strd_push (unsigned long saved_regs_mask)
19877 {
19878 int num_regs = 0;
19879 int i, j, dwarf_index = 0;
19880 int offset = 0;
19881 rtx dwarf = NULL_RTX;
19882 rtx insn = NULL_RTX;
19883 rtx tmp, mem;
19884
19885 /* TODO: A more efficient code can be emitted by changing the
19886 layout, e.g., first push all pairs that can use STRD to keep the
19887 stack aligned, and then push all other registers. */
19888 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19889 if (saved_regs_mask & (1 << i))
19890 num_regs++;
19891
19892 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19893 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19894 gcc_assert (num_regs > 0);
19895
19896 /* Create sequence for DWARF info. */
19897 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19898
19899 /* For dwarf info, we generate explicit stack update. */
19900 tmp = gen_rtx_SET (stack_pointer_rtx,
19901 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19902 RTX_FRAME_RELATED_P (tmp) = 1;
19903 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19904
19905 /* Save registers. */
19906 offset = - 4 * num_regs;
19907 j = 0;
19908 while (j <= LAST_ARM_REGNUM)
19909 if (saved_regs_mask & (1 << j))
19910 {
19911 if ((j % 2 == 0)
19912 && (saved_regs_mask & (1 << (j + 1))))
19913 {
19914 /* Current register and previous register form register pair for
19915 which STRD can be generated. */
19916 if (offset < 0)
19917 {
19918 /* Allocate stack space for all saved registers. */
19919 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19920 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19921 mem = gen_frame_mem (DImode, tmp);
19922 offset = 0;
19923 }
19924 else if (offset > 0)
19925 mem = gen_frame_mem (DImode,
19926 plus_constant (Pmode,
19927 stack_pointer_rtx,
19928 offset));
19929 else
19930 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19931
19932 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
19933 RTX_FRAME_RELATED_P (tmp) = 1;
19934 tmp = emit_insn (tmp);
19935
19936 /* Record the first store insn. */
19937 if (dwarf_index == 1)
19938 insn = tmp;
19939
19940 /* Generate dwarf info. */
19941 mem = gen_frame_mem (SImode,
19942 plus_constant (Pmode,
19943 stack_pointer_rtx,
19944 offset));
19945 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19946 RTX_FRAME_RELATED_P (tmp) = 1;
19947 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19948
19949 mem = gen_frame_mem (SImode,
19950 plus_constant (Pmode,
19951 stack_pointer_rtx,
19952 offset + 4));
19953 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
19954 RTX_FRAME_RELATED_P (tmp) = 1;
19955 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19956
19957 offset += 8;
19958 j += 2;
19959 }
19960 else
19961 {
19962 /* Emit a single word store. */
19963 if (offset < 0)
19964 {
19965 /* Allocate stack space for all saved registers. */
19966 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19967 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19968 mem = gen_frame_mem (SImode, tmp);
19969 offset = 0;
19970 }
19971 else if (offset > 0)
19972 mem = gen_frame_mem (SImode,
19973 plus_constant (Pmode,
19974 stack_pointer_rtx,
19975 offset));
19976 else
19977 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19978
19979 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19980 RTX_FRAME_RELATED_P (tmp) = 1;
19981 tmp = emit_insn (tmp);
19982
19983 /* Record the first store insn. */
19984 if (dwarf_index == 1)
19985 insn = tmp;
19986
19987 /* Generate dwarf info. */
19988 mem = gen_frame_mem (SImode,
19989 plus_constant(Pmode,
19990 stack_pointer_rtx,
19991 offset));
19992 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19993 RTX_FRAME_RELATED_P (tmp) = 1;
19994 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19995
19996 offset += 4;
19997 j += 1;
19998 }
19999 }
20000 else
20001 j++;
20002
20003 /* Attach dwarf info to the first insn we generate. */
20004 gcc_assert (insn != NULL_RTX);
20005 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20006 RTX_FRAME_RELATED_P (insn) = 1;
20007 }
20008
20009 /* Generate and emit an insn that we will recognize as a push_multi.
20010 Unfortunately, since this insn does not reflect very well the actual
20011 semantics of the operation, we need to annotate the insn for the benefit
20012 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20013 MASK for registers that should be annotated for DWARF2 frame unwind
20014 information. */
20015 static rtx
20016 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20017 {
20018 int num_regs = 0;
20019 int num_dwarf_regs = 0;
20020 int i, j;
20021 rtx par;
20022 rtx dwarf;
20023 int dwarf_par_index;
20024 rtx tmp, reg;
20025
20026 /* We don't record the PC in the dwarf frame information. */
20027 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20028
20029 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20030 {
20031 if (mask & (1 << i))
20032 num_regs++;
20033 if (dwarf_regs_mask & (1 << i))
20034 num_dwarf_regs++;
20035 }
20036
20037 gcc_assert (num_regs && num_regs <= 16);
20038 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20039
20040 /* For the body of the insn we are going to generate an UNSPEC in
20041 parallel with several USEs. This allows the insn to be recognized
20042 by the push_multi pattern in the arm.md file.
20043
20044 The body of the insn looks something like this:
20045
20046 (parallel [
20047 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20048 (const_int:SI <num>)))
20049 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20050 (use (reg:SI XX))
20051 (use (reg:SI YY))
20052 ...
20053 ])
20054
20055 For the frame note however, we try to be more explicit and actually
20056 show each register being stored into the stack frame, plus a (single)
20057 decrement of the stack pointer. We do it this way in order to be
20058 friendly to the stack unwinding code, which only wants to see a single
20059 stack decrement per instruction. The RTL we generate for the note looks
20060 something like this:
20061
20062 (sequence [
20063 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20064 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20065 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20066 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20067 ...
20068 ])
20069
20070 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20071 instead we'd have a parallel expression detailing all
20072 the stores to the various memory addresses so that debug
20073 information is more up-to-date. Remember however while writing
20074 this to take care of the constraints with the push instruction.
20075
20076 Note also that this has to be taken care of for the VFP registers.
20077
20078 For more see PR43399. */
20079
20080 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20081 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20082 dwarf_par_index = 1;
20083
20084 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20085 {
20086 if (mask & (1 << i))
20087 {
20088 reg = gen_rtx_REG (SImode, i);
20089
20090 XVECEXP (par, 0, 0)
20091 = gen_rtx_SET (gen_frame_mem
20092 (BLKmode,
20093 gen_rtx_PRE_MODIFY (Pmode,
20094 stack_pointer_rtx,
20095 plus_constant
20096 (Pmode, stack_pointer_rtx,
20097 -4 * num_regs))
20098 ),
20099 gen_rtx_UNSPEC (BLKmode,
20100 gen_rtvec (1, reg),
20101 UNSPEC_PUSH_MULT));
20102
20103 if (dwarf_regs_mask & (1 << i))
20104 {
20105 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20106 reg);
20107 RTX_FRAME_RELATED_P (tmp) = 1;
20108 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20109 }
20110
20111 break;
20112 }
20113 }
20114
20115 for (j = 1, i++; j < num_regs; i++)
20116 {
20117 if (mask & (1 << i))
20118 {
20119 reg = gen_rtx_REG (SImode, i);
20120
20121 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20122
20123 if (dwarf_regs_mask & (1 << i))
20124 {
20125 tmp
20126 = gen_rtx_SET (gen_frame_mem
20127 (SImode,
20128 plus_constant (Pmode, stack_pointer_rtx,
20129 4 * j)),
20130 reg);
20131 RTX_FRAME_RELATED_P (tmp) = 1;
20132 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20133 }
20134
20135 j++;
20136 }
20137 }
20138
20139 par = emit_insn (par);
20140
20141 tmp = gen_rtx_SET (stack_pointer_rtx,
20142 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20143 RTX_FRAME_RELATED_P (tmp) = 1;
20144 XVECEXP (dwarf, 0, 0) = tmp;
20145
20146 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20147
20148 return par;
20149 }
20150
20151 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20152 SIZE is the offset to be adjusted.
20153 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20154 static void
20155 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20156 {
20157 rtx dwarf;
20158
20159 RTX_FRAME_RELATED_P (insn) = 1;
20160 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20161 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20162 }
20163
20164 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20165 SAVED_REGS_MASK shows which registers need to be restored.
20166
20167 Unfortunately, since this insn does not reflect very well the actual
20168 semantics of the operation, we need to annotate the insn for the benefit
20169 of DWARF2 frame unwind information. */
20170 static void
20171 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20172 {
20173 int num_regs = 0;
20174 int i, j;
20175 rtx par;
20176 rtx dwarf = NULL_RTX;
20177 rtx tmp, reg;
20178 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20179 int offset_adj;
20180 int emit_update;
20181
20182 offset_adj = return_in_pc ? 1 : 0;
20183 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20184 if (saved_regs_mask & (1 << i))
20185 num_regs++;
20186
20187 gcc_assert (num_regs && num_regs <= 16);
20188
20189 /* If SP is in reglist, then we don't emit SP update insn. */
20190 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20191
20192 /* The parallel needs to hold num_regs SETs
20193 and one SET for the stack update. */
20194 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20195
20196 if (return_in_pc)
20197 XVECEXP (par, 0, 0) = ret_rtx;
20198
20199 if (emit_update)
20200 {
20201 /* Increment the stack pointer, based on there being
20202 num_regs 4-byte registers to restore. */
20203 tmp = gen_rtx_SET (stack_pointer_rtx,
20204 plus_constant (Pmode,
20205 stack_pointer_rtx,
20206 4 * num_regs));
20207 RTX_FRAME_RELATED_P (tmp) = 1;
20208 XVECEXP (par, 0, offset_adj) = tmp;
20209 }
20210
20211 /* Now restore every reg, which may include PC. */
20212 for (j = 0, i = 0; j < num_regs; i++)
20213 if (saved_regs_mask & (1 << i))
20214 {
20215 reg = gen_rtx_REG (SImode, i);
20216 if ((num_regs == 1) && emit_update && !return_in_pc)
20217 {
20218 /* Emit single load with writeback. */
20219 tmp = gen_frame_mem (SImode,
20220 gen_rtx_POST_INC (Pmode,
20221 stack_pointer_rtx));
20222 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20223 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20224 return;
20225 }
20226
20227 tmp = gen_rtx_SET (reg,
20228 gen_frame_mem
20229 (SImode,
20230 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20231 RTX_FRAME_RELATED_P (tmp) = 1;
20232 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20233
20234 /* We need to maintain a sequence for DWARF info too. As dwarf info
20235 should not have PC, skip PC. */
20236 if (i != PC_REGNUM)
20237 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20238
20239 j++;
20240 }
20241
20242 if (return_in_pc)
20243 par = emit_jump_insn (par);
20244 else
20245 par = emit_insn (par);
20246
20247 REG_NOTES (par) = dwarf;
20248 if (!return_in_pc)
20249 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20250 stack_pointer_rtx, stack_pointer_rtx);
20251 }
20252
20253 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20254 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20255
20256 Unfortunately, since this insn does not reflect very well the actual
20257 semantics of the operation, we need to annotate the insn for the benefit
20258 of DWARF2 frame unwind information. */
20259 static void
20260 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20261 {
20262 int i, j;
20263 rtx par;
20264 rtx dwarf = NULL_RTX;
20265 rtx tmp, reg;
20266
20267 gcc_assert (num_regs && num_regs <= 32);
20268
20269 /* Workaround ARM10 VFPr1 bug. */
20270 if (num_regs == 2 && !arm_arch6)
20271 {
20272 if (first_reg == 15)
20273 first_reg--;
20274
20275 num_regs++;
20276 }
20277
20278 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20279 there could be up to 32 D-registers to restore.
20280 If there are more than 16 D-registers, make two recursive calls,
20281 each of which emits one pop_multi instruction. */
20282 if (num_regs > 16)
20283 {
20284 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20285 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20286 return;
20287 }
20288
20289 /* The parallel needs to hold num_regs SETs
20290 and one SET for the stack update. */
20291 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20292
20293 /* Increment the stack pointer, based on there being
20294 num_regs 8-byte registers to restore. */
20295 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20296 RTX_FRAME_RELATED_P (tmp) = 1;
20297 XVECEXP (par, 0, 0) = tmp;
20298
20299 /* Now show every reg that will be restored, using a SET for each. */
20300 for (j = 0, i=first_reg; j < num_regs; i += 2)
20301 {
20302 reg = gen_rtx_REG (DFmode, i);
20303
20304 tmp = gen_rtx_SET (reg,
20305 gen_frame_mem
20306 (DFmode,
20307 plus_constant (Pmode, base_reg, 8 * j)));
20308 RTX_FRAME_RELATED_P (tmp) = 1;
20309 XVECEXP (par, 0, j + 1) = tmp;
20310
20311 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20312
20313 j++;
20314 }
20315
20316 par = emit_insn (par);
20317 REG_NOTES (par) = dwarf;
20318
20319 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20320 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20321 {
20322 RTX_FRAME_RELATED_P (par) = 1;
20323 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20324 }
20325 else
20326 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20327 base_reg, base_reg);
20328 }
20329
20330 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20331 number of registers are being popped, multiple LDRD patterns are created for
20332 all register pairs. If odd number of registers are popped, last register is
20333 loaded by using LDR pattern. */
20334 static void
20335 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20336 {
20337 int num_regs = 0;
20338 int i, j;
20339 rtx par = NULL_RTX;
20340 rtx dwarf = NULL_RTX;
20341 rtx tmp, reg, tmp1;
20342 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20343
20344 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20345 if (saved_regs_mask & (1 << i))
20346 num_regs++;
20347
20348 gcc_assert (num_regs && num_regs <= 16);
20349
20350 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20351 to be popped. So, if num_regs is even, now it will become odd,
20352 and we can generate pop with PC. If num_regs is odd, it will be
20353 even now, and ldr with return can be generated for PC. */
20354 if (return_in_pc)
20355 num_regs--;
20356
20357 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20358
20359 /* Var j iterates over all the registers to gather all the registers in
20360 saved_regs_mask. Var i gives index of saved registers in stack frame.
20361 A PARALLEL RTX of register-pair is created here, so that pattern for
20362 LDRD can be matched. As PC is always last register to be popped, and
20363 we have already decremented num_regs if PC, we don't have to worry
20364 about PC in this loop. */
20365 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20366 if (saved_regs_mask & (1 << j))
20367 {
20368 /* Create RTX for memory load. */
20369 reg = gen_rtx_REG (SImode, j);
20370 tmp = gen_rtx_SET (reg,
20371 gen_frame_mem (SImode,
20372 plus_constant (Pmode,
20373 stack_pointer_rtx, 4 * i)));
20374 RTX_FRAME_RELATED_P (tmp) = 1;
20375
20376 if (i % 2 == 0)
20377 {
20378 /* When saved-register index (i) is even, the RTX to be emitted is
20379 yet to be created. Hence create it first. The LDRD pattern we
20380 are generating is :
20381 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20382 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20383 where target registers need not be consecutive. */
20384 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20385 dwarf = NULL_RTX;
20386 }
20387
20388 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20389 added as 0th element and if i is odd, reg_i is added as 1st element
20390 of LDRD pattern shown above. */
20391 XVECEXP (par, 0, (i % 2)) = tmp;
20392 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20393
20394 if ((i % 2) == 1)
20395 {
20396 /* When saved-register index (i) is odd, RTXs for both the registers
20397 to be loaded are generated in above given LDRD pattern, and the
20398 pattern can be emitted now. */
20399 par = emit_insn (par);
20400 REG_NOTES (par) = dwarf;
20401 RTX_FRAME_RELATED_P (par) = 1;
20402 }
20403
20404 i++;
20405 }
20406
20407 /* If the number of registers pushed is odd AND return_in_pc is false OR
20408 number of registers are even AND return_in_pc is true, last register is
20409 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20410 then LDR with post increment. */
20411
20412 /* Increment the stack pointer, based on there being
20413 num_regs 4-byte registers to restore. */
20414 tmp = gen_rtx_SET (stack_pointer_rtx,
20415 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20416 RTX_FRAME_RELATED_P (tmp) = 1;
20417 tmp = emit_insn (tmp);
20418 if (!return_in_pc)
20419 {
20420 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20421 stack_pointer_rtx, stack_pointer_rtx);
20422 }
20423
20424 dwarf = NULL_RTX;
20425
20426 if (((num_regs % 2) == 1 && !return_in_pc)
20427 || ((num_regs % 2) == 0 && return_in_pc))
20428 {
20429 /* Scan for the single register to be popped. Skip until the saved
20430 register is found. */
20431 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20432
20433 /* Gen LDR with post increment here. */
20434 tmp1 = gen_rtx_MEM (SImode,
20435 gen_rtx_POST_INC (SImode,
20436 stack_pointer_rtx));
20437 set_mem_alias_set (tmp1, get_frame_alias_set ());
20438
20439 reg = gen_rtx_REG (SImode, j);
20440 tmp = gen_rtx_SET (reg, tmp1);
20441 RTX_FRAME_RELATED_P (tmp) = 1;
20442 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20443
20444 if (return_in_pc)
20445 {
20446 /* If return_in_pc, j must be PC_REGNUM. */
20447 gcc_assert (j == PC_REGNUM);
20448 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20449 XVECEXP (par, 0, 0) = ret_rtx;
20450 XVECEXP (par, 0, 1) = tmp;
20451 par = emit_jump_insn (par);
20452 }
20453 else
20454 {
20455 par = emit_insn (tmp);
20456 REG_NOTES (par) = dwarf;
20457 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20458 stack_pointer_rtx, stack_pointer_rtx);
20459 }
20460
20461 }
20462 else if ((num_regs % 2) == 1 && return_in_pc)
20463 {
20464 /* There are 2 registers to be popped. So, generate the pattern
20465 pop_multiple_with_stack_update_and_return to pop in PC. */
20466 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20467 }
20468
20469 return;
20470 }
20471
20472 /* LDRD in ARM mode needs consecutive registers as operands. This function
20473 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20474 offset addressing and then generates one separate stack udpate. This provides
20475 more scheduling freedom, compared to writeback on every load. However,
20476 if the function returns using load into PC directly
20477 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20478 before the last load. TODO: Add a peephole optimization to recognize
20479 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20480 peephole optimization to merge the load at stack-offset zero
20481 with the stack update instruction using load with writeback
20482 in post-index addressing mode. */
20483 static void
20484 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20485 {
20486 int j = 0;
20487 int offset = 0;
20488 rtx par = NULL_RTX;
20489 rtx dwarf = NULL_RTX;
20490 rtx tmp, mem;
20491
20492 /* Restore saved registers. */
20493 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20494 j = 0;
20495 while (j <= LAST_ARM_REGNUM)
20496 if (saved_regs_mask & (1 << j))
20497 {
20498 if ((j % 2) == 0
20499 && (saved_regs_mask & (1 << (j + 1)))
20500 && (j + 1) != PC_REGNUM)
20501 {
20502 /* Current register and next register form register pair for which
20503 LDRD can be generated. PC is always the last register popped, and
20504 we handle it separately. */
20505 if (offset > 0)
20506 mem = gen_frame_mem (DImode,
20507 plus_constant (Pmode,
20508 stack_pointer_rtx,
20509 offset));
20510 else
20511 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20512
20513 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20514 tmp = emit_insn (tmp);
20515 RTX_FRAME_RELATED_P (tmp) = 1;
20516
20517 /* Generate dwarf info. */
20518
20519 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20520 gen_rtx_REG (SImode, j),
20521 NULL_RTX);
20522 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20523 gen_rtx_REG (SImode, j + 1),
20524 dwarf);
20525
20526 REG_NOTES (tmp) = dwarf;
20527
20528 offset += 8;
20529 j += 2;
20530 }
20531 else if (j != PC_REGNUM)
20532 {
20533 /* Emit a single word load. */
20534 if (offset > 0)
20535 mem = gen_frame_mem (SImode,
20536 plus_constant (Pmode,
20537 stack_pointer_rtx,
20538 offset));
20539 else
20540 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20541
20542 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20543 tmp = emit_insn (tmp);
20544 RTX_FRAME_RELATED_P (tmp) = 1;
20545
20546 /* Generate dwarf info. */
20547 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20548 gen_rtx_REG (SImode, j),
20549 NULL_RTX);
20550
20551 offset += 4;
20552 j += 1;
20553 }
20554 else /* j == PC_REGNUM */
20555 j++;
20556 }
20557 else
20558 j++;
20559
20560 /* Update the stack. */
20561 if (offset > 0)
20562 {
20563 tmp = gen_rtx_SET (stack_pointer_rtx,
20564 plus_constant (Pmode,
20565 stack_pointer_rtx,
20566 offset));
20567 tmp = emit_insn (tmp);
20568 arm_add_cfa_adjust_cfa_note (tmp, offset,
20569 stack_pointer_rtx, stack_pointer_rtx);
20570 offset = 0;
20571 }
20572
20573 if (saved_regs_mask & (1 << PC_REGNUM))
20574 {
20575 /* Only PC is to be popped. */
20576 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20577 XVECEXP (par, 0, 0) = ret_rtx;
20578 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20579 gen_frame_mem (SImode,
20580 gen_rtx_POST_INC (SImode,
20581 stack_pointer_rtx)));
20582 RTX_FRAME_RELATED_P (tmp) = 1;
20583 XVECEXP (par, 0, 1) = tmp;
20584 par = emit_jump_insn (par);
20585
20586 /* Generate dwarf info. */
20587 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20588 gen_rtx_REG (SImode, PC_REGNUM),
20589 NULL_RTX);
20590 REG_NOTES (par) = dwarf;
20591 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20592 stack_pointer_rtx, stack_pointer_rtx);
20593 }
20594 }
20595
20596 /* Calculate the size of the return value that is passed in registers. */
20597 static unsigned
20598 arm_size_return_regs (void)
20599 {
20600 machine_mode mode;
20601
20602 if (crtl->return_rtx != 0)
20603 mode = GET_MODE (crtl->return_rtx);
20604 else
20605 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20606
20607 return GET_MODE_SIZE (mode);
20608 }
20609
20610 /* Return true if the current function needs to save/restore LR. */
20611 static bool
20612 thumb_force_lr_save (void)
20613 {
20614 return !cfun->machine->lr_save_eliminated
20615 && (!leaf_function_p ()
20616 || thumb_far_jump_used_p ()
20617 || df_regs_ever_live_p (LR_REGNUM));
20618 }
20619
20620 /* We do not know if r3 will be available because
20621 we do have an indirect tailcall happening in this
20622 particular case. */
20623 static bool
20624 is_indirect_tailcall_p (rtx call)
20625 {
20626 rtx pat = PATTERN (call);
20627
20628 /* Indirect tail call. */
20629 pat = XVECEXP (pat, 0, 0);
20630 if (GET_CODE (pat) == SET)
20631 pat = SET_SRC (pat);
20632
20633 pat = XEXP (XEXP (pat, 0), 0);
20634 return REG_P (pat);
20635 }
20636
20637 /* Return true if r3 is used by any of the tail call insns in the
20638 current function. */
20639 static bool
20640 any_sibcall_could_use_r3 (void)
20641 {
20642 edge_iterator ei;
20643 edge e;
20644
20645 if (!crtl->tail_call_emit)
20646 return false;
20647 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20648 if (e->flags & EDGE_SIBCALL)
20649 {
20650 rtx call = BB_END (e->src);
20651 if (!CALL_P (call))
20652 call = prev_nonnote_nondebug_insn (call);
20653 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20654 if (find_regno_fusage (call, USE, 3)
20655 || is_indirect_tailcall_p (call))
20656 return true;
20657 }
20658 return false;
20659 }
20660
20661
20662 /* Compute the distance from register FROM to register TO.
20663 These can be the arg pointer (26), the soft frame pointer (25),
20664 the stack pointer (13) or the hard frame pointer (11).
20665 In thumb mode r7 is used as the soft frame pointer, if needed.
20666 Typical stack layout looks like this:
20667
20668 old stack pointer -> | |
20669 ----
20670 | | \
20671 | | saved arguments for
20672 | | vararg functions
20673 | | /
20674 --
20675 hard FP & arg pointer -> | | \
20676 | | stack
20677 | | frame
20678 | | /
20679 --
20680 | | \
20681 | | call saved
20682 | | registers
20683 soft frame pointer -> | | /
20684 --
20685 | | \
20686 | | local
20687 | | variables
20688 locals base pointer -> | | /
20689 --
20690 | | \
20691 | | outgoing
20692 | | arguments
20693 current stack pointer -> | | /
20694 --
20695
20696 For a given function some or all of these stack components
20697 may not be needed, giving rise to the possibility of
20698 eliminating some of the registers.
20699
20700 The values returned by this function must reflect the behavior
20701 of arm_expand_prologue() and arm_compute_save_reg_mask().
20702
20703 The sign of the number returned reflects the direction of stack
20704 growth, so the values are positive for all eliminations except
20705 from the soft frame pointer to the hard frame pointer.
20706
20707 SFP may point just inside the local variables block to ensure correct
20708 alignment. */
20709
20710
20711 /* Calculate stack offsets. These are used to calculate register elimination
20712 offsets and in prologue/epilogue code. Also calculates which registers
20713 should be saved. */
20714
20715 static arm_stack_offsets *
20716 arm_get_frame_offsets (void)
20717 {
20718 struct arm_stack_offsets *offsets;
20719 unsigned long func_type;
20720 int leaf;
20721 int saved;
20722 int core_saved;
20723 HOST_WIDE_INT frame_size;
20724 int i;
20725
20726 offsets = &cfun->machine->stack_offsets;
20727
20728 /* We need to know if we are a leaf function. Unfortunately, it
20729 is possible to be called after start_sequence has been called,
20730 which causes get_insns to return the insns for the sequence,
20731 not the function, which will cause leaf_function_p to return
20732 the incorrect result.
20733
20734 to know about leaf functions once reload has completed, and the
20735 frame size cannot be changed after that time, so we can safely
20736 use the cached value. */
20737
20738 if (reload_completed)
20739 return offsets;
20740
20741 /* Initially this is the size of the local variables. It will translated
20742 into an offset once we have determined the size of preceding data. */
20743 frame_size = ROUND_UP_WORD (get_frame_size ());
20744
20745 leaf = leaf_function_p ();
20746
20747 /* Space for variadic functions. */
20748 offsets->saved_args = crtl->args.pretend_args_size;
20749
20750 /* In Thumb mode this is incorrect, but never used. */
20751 offsets->frame
20752 = (offsets->saved_args
20753 + arm_compute_static_chain_stack_bytes ()
20754 + (frame_pointer_needed ? 4 : 0));
20755
20756 if (TARGET_32BIT)
20757 {
20758 unsigned int regno;
20759
20760 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20761 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20762 saved = core_saved;
20763
20764 /* We know that SP will be doubleword aligned on entry, and we must
20765 preserve that condition at any subroutine call. We also require the
20766 soft frame pointer to be doubleword aligned. */
20767
20768 if (TARGET_REALLY_IWMMXT)
20769 {
20770 /* Check for the call-saved iWMMXt registers. */
20771 for (regno = FIRST_IWMMXT_REGNUM;
20772 regno <= LAST_IWMMXT_REGNUM;
20773 regno++)
20774 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20775 saved += 8;
20776 }
20777
20778 func_type = arm_current_func_type ();
20779 /* Space for saved VFP registers. */
20780 if (! IS_VOLATILE (func_type)
20781 && TARGET_HARD_FLOAT && TARGET_VFP)
20782 saved += arm_get_vfp_saved_size ();
20783 }
20784 else /* TARGET_THUMB1 */
20785 {
20786 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20787 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20788 saved = core_saved;
20789 if (TARGET_BACKTRACE)
20790 saved += 16;
20791 }
20792
20793 /* Saved registers include the stack frame. */
20794 offsets->saved_regs
20795 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20796 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20797
20798 /* A leaf function does not need any stack alignment if it has nothing
20799 on the stack. */
20800 if (leaf && frame_size == 0
20801 /* However if it calls alloca(), we have a dynamically allocated
20802 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20803 && ! cfun->calls_alloca)
20804 {
20805 offsets->outgoing_args = offsets->soft_frame;
20806 offsets->locals_base = offsets->soft_frame;
20807 return offsets;
20808 }
20809
20810 /* Ensure SFP has the correct alignment. */
20811 if (ARM_DOUBLEWORD_ALIGN
20812 && (offsets->soft_frame & 7))
20813 {
20814 offsets->soft_frame += 4;
20815 /* Try to align stack by pushing an extra reg. Don't bother doing this
20816 when there is a stack frame as the alignment will be rolled into
20817 the normal stack adjustment. */
20818 if (frame_size + crtl->outgoing_args_size == 0)
20819 {
20820 int reg = -1;
20821
20822 /* Register r3 is caller-saved. Normally it does not need to be
20823 saved on entry by the prologue. However if we choose to save
20824 it for padding then we may confuse the compiler into thinking
20825 a prologue sequence is required when in fact it is not. This
20826 will occur when shrink-wrapping if r3 is used as a scratch
20827 register and there are no other callee-saved writes.
20828
20829 This situation can be avoided when other callee-saved registers
20830 are available and r3 is not mandatory if we choose a callee-saved
20831 register for padding. */
20832 bool prefer_callee_reg_p = false;
20833
20834 /* If it is safe to use r3, then do so. This sometimes
20835 generates better code on Thumb-2 by avoiding the need to
20836 use 32-bit push/pop instructions. */
20837 if (! any_sibcall_could_use_r3 ()
20838 && arm_size_return_regs () <= 12
20839 && (offsets->saved_regs_mask & (1 << 3)) == 0
20840 && (TARGET_THUMB2
20841 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20842 {
20843 reg = 3;
20844 if (!TARGET_THUMB2)
20845 prefer_callee_reg_p = true;
20846 }
20847 if (reg == -1
20848 || prefer_callee_reg_p)
20849 {
20850 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20851 {
20852 /* Avoid fixed registers; they may be changed at
20853 arbitrary times so it's unsafe to restore them
20854 during the epilogue. */
20855 if (!fixed_regs[i]
20856 && (offsets->saved_regs_mask & (1 << i)) == 0)
20857 {
20858 reg = i;
20859 break;
20860 }
20861 }
20862 }
20863
20864 if (reg != -1)
20865 {
20866 offsets->saved_regs += 4;
20867 offsets->saved_regs_mask |= (1 << reg);
20868 }
20869 }
20870 }
20871
20872 offsets->locals_base = offsets->soft_frame + frame_size;
20873 offsets->outgoing_args = (offsets->locals_base
20874 + crtl->outgoing_args_size);
20875
20876 if (ARM_DOUBLEWORD_ALIGN)
20877 {
20878 /* Ensure SP remains doubleword aligned. */
20879 if (offsets->outgoing_args & 7)
20880 offsets->outgoing_args += 4;
20881 gcc_assert (!(offsets->outgoing_args & 7));
20882 }
20883
20884 return offsets;
20885 }
20886
20887
20888 /* Calculate the relative offsets for the different stack pointers. Positive
20889 offsets are in the direction of stack growth. */
20890
20891 HOST_WIDE_INT
20892 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20893 {
20894 arm_stack_offsets *offsets;
20895
20896 offsets = arm_get_frame_offsets ();
20897
20898 /* OK, now we have enough information to compute the distances.
20899 There must be an entry in these switch tables for each pair
20900 of registers in ELIMINABLE_REGS, even if some of the entries
20901 seem to be redundant or useless. */
20902 switch (from)
20903 {
20904 case ARG_POINTER_REGNUM:
20905 switch (to)
20906 {
20907 case THUMB_HARD_FRAME_POINTER_REGNUM:
20908 return 0;
20909
20910 case FRAME_POINTER_REGNUM:
20911 /* This is the reverse of the soft frame pointer
20912 to hard frame pointer elimination below. */
20913 return offsets->soft_frame - offsets->saved_args;
20914
20915 case ARM_HARD_FRAME_POINTER_REGNUM:
20916 /* This is only non-zero in the case where the static chain register
20917 is stored above the frame. */
20918 return offsets->frame - offsets->saved_args - 4;
20919
20920 case STACK_POINTER_REGNUM:
20921 /* If nothing has been pushed on the stack at all
20922 then this will return -4. This *is* correct! */
20923 return offsets->outgoing_args - (offsets->saved_args + 4);
20924
20925 default:
20926 gcc_unreachable ();
20927 }
20928 gcc_unreachable ();
20929
20930 case FRAME_POINTER_REGNUM:
20931 switch (to)
20932 {
20933 case THUMB_HARD_FRAME_POINTER_REGNUM:
20934 return 0;
20935
20936 case ARM_HARD_FRAME_POINTER_REGNUM:
20937 /* The hard frame pointer points to the top entry in the
20938 stack frame. The soft frame pointer to the bottom entry
20939 in the stack frame. If there is no stack frame at all,
20940 then they are identical. */
20941
20942 return offsets->frame - offsets->soft_frame;
20943
20944 case STACK_POINTER_REGNUM:
20945 return offsets->outgoing_args - offsets->soft_frame;
20946
20947 default:
20948 gcc_unreachable ();
20949 }
20950 gcc_unreachable ();
20951
20952 default:
20953 /* You cannot eliminate from the stack pointer.
20954 In theory you could eliminate from the hard frame
20955 pointer to the stack pointer, but this will never
20956 happen, since if a stack frame is not needed the
20957 hard frame pointer will never be used. */
20958 gcc_unreachable ();
20959 }
20960 }
20961
20962 /* Given FROM and TO register numbers, say whether this elimination is
20963 allowed. Frame pointer elimination is automatically handled.
20964
20965 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20966 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20967 pointer, we must eliminate FRAME_POINTER_REGNUM into
20968 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20969 ARG_POINTER_REGNUM. */
20970
20971 bool
20972 arm_can_eliminate (const int from, const int to)
20973 {
20974 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20975 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20976 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20977 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20978 true);
20979 }
20980
20981 /* Emit RTL to save coprocessor registers on function entry. Returns the
20982 number of bytes pushed. */
20983
20984 static int
20985 arm_save_coproc_regs(void)
20986 {
20987 int saved_size = 0;
20988 unsigned reg;
20989 unsigned start_reg;
20990 rtx insn;
20991
20992 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20993 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20994 {
20995 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20996 insn = gen_rtx_MEM (V2SImode, insn);
20997 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20998 RTX_FRAME_RELATED_P (insn) = 1;
20999 saved_size += 8;
21000 }
21001
21002 if (TARGET_HARD_FLOAT && TARGET_VFP)
21003 {
21004 start_reg = FIRST_VFP_REGNUM;
21005
21006 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21007 {
21008 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21009 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21010 {
21011 if (start_reg != reg)
21012 saved_size += vfp_emit_fstmd (start_reg,
21013 (reg - start_reg) / 2);
21014 start_reg = reg + 2;
21015 }
21016 }
21017 if (start_reg != reg)
21018 saved_size += vfp_emit_fstmd (start_reg,
21019 (reg - start_reg) / 2);
21020 }
21021 return saved_size;
21022 }
21023
21024
21025 /* Set the Thumb frame pointer from the stack pointer. */
21026
21027 static void
21028 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21029 {
21030 HOST_WIDE_INT amount;
21031 rtx insn, dwarf;
21032
21033 amount = offsets->outgoing_args - offsets->locals_base;
21034 if (amount < 1024)
21035 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21036 stack_pointer_rtx, GEN_INT (amount)));
21037 else
21038 {
21039 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21040 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21041 expects the first two operands to be the same. */
21042 if (TARGET_THUMB2)
21043 {
21044 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21045 stack_pointer_rtx,
21046 hard_frame_pointer_rtx));
21047 }
21048 else
21049 {
21050 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21051 hard_frame_pointer_rtx,
21052 stack_pointer_rtx));
21053 }
21054 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21055 plus_constant (Pmode, stack_pointer_rtx, amount));
21056 RTX_FRAME_RELATED_P (dwarf) = 1;
21057 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21058 }
21059
21060 RTX_FRAME_RELATED_P (insn) = 1;
21061 }
21062
21063 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21064 function. */
21065 void
21066 arm_expand_prologue (void)
21067 {
21068 rtx amount;
21069 rtx insn;
21070 rtx ip_rtx;
21071 unsigned long live_regs_mask;
21072 unsigned long func_type;
21073 int fp_offset = 0;
21074 int saved_pretend_args = 0;
21075 int saved_regs = 0;
21076 unsigned HOST_WIDE_INT args_to_push;
21077 arm_stack_offsets *offsets;
21078
21079 func_type = arm_current_func_type ();
21080
21081 /* Naked functions don't have prologues. */
21082 if (IS_NAKED (func_type))
21083 return;
21084
21085 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21086 args_to_push = crtl->args.pretend_args_size;
21087
21088 /* Compute which register we will have to save onto the stack. */
21089 offsets = arm_get_frame_offsets ();
21090 live_regs_mask = offsets->saved_regs_mask;
21091
21092 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21093
21094 if (IS_STACKALIGN (func_type))
21095 {
21096 rtx r0, r1;
21097
21098 /* Handle a word-aligned stack pointer. We generate the following:
21099
21100 mov r0, sp
21101 bic r1, r0, #7
21102 mov sp, r1
21103 <save and restore r0 in normal prologue/epilogue>
21104 mov sp, r0
21105 bx lr
21106
21107 The unwinder doesn't need to know about the stack realignment.
21108 Just tell it we saved SP in r0. */
21109 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21110
21111 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21112 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21113
21114 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21115 RTX_FRAME_RELATED_P (insn) = 1;
21116 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21117
21118 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21119
21120 /* ??? The CFA changes here, which may cause GDB to conclude that it
21121 has entered a different function. That said, the unwind info is
21122 correct, individually, before and after this instruction because
21123 we've described the save of SP, which will override the default
21124 handling of SP as restoring from the CFA. */
21125 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21126 }
21127
21128 /* For APCS frames, if IP register is clobbered
21129 when creating frame, save that register in a special
21130 way. */
21131 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21132 {
21133 if (IS_INTERRUPT (func_type))
21134 {
21135 /* Interrupt functions must not corrupt any registers.
21136 Creating a frame pointer however, corrupts the IP
21137 register, so we must push it first. */
21138 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21139
21140 /* Do not set RTX_FRAME_RELATED_P on this insn.
21141 The dwarf stack unwinding code only wants to see one
21142 stack decrement per function, and this is not it. If
21143 this instruction is labeled as being part of the frame
21144 creation sequence then dwarf2out_frame_debug_expr will
21145 die when it encounters the assignment of IP to FP
21146 later on, since the use of SP here establishes SP as
21147 the CFA register and not IP.
21148
21149 Anyway this instruction is not really part of the stack
21150 frame creation although it is part of the prologue. */
21151 }
21152 else if (IS_NESTED (func_type))
21153 {
21154 /* The static chain register is the same as the IP register
21155 used as a scratch register during stack frame creation.
21156 To get around this need to find somewhere to store IP
21157 whilst the frame is being created. We try the following
21158 places in order:
21159
21160 1. The last argument register r3 if it is available.
21161 2. A slot on the stack above the frame if there are no
21162 arguments to push onto the stack.
21163 3. Register r3 again, after pushing the argument registers
21164 onto the stack, if this is a varargs function.
21165 4. The last slot on the stack created for the arguments to
21166 push, if this isn't a varargs function.
21167
21168 Note - we only need to tell the dwarf2 backend about the SP
21169 adjustment in the second variant; the static chain register
21170 doesn't need to be unwound, as it doesn't contain a value
21171 inherited from the caller. */
21172
21173 if (!arm_r3_live_at_start_p ())
21174 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21175 else if (args_to_push == 0)
21176 {
21177 rtx addr, dwarf;
21178
21179 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21180 saved_regs += 4;
21181
21182 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21183 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21184 fp_offset = 4;
21185
21186 /* Just tell the dwarf backend that we adjusted SP. */
21187 dwarf = gen_rtx_SET (stack_pointer_rtx,
21188 plus_constant (Pmode, stack_pointer_rtx,
21189 -fp_offset));
21190 RTX_FRAME_RELATED_P (insn) = 1;
21191 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21192 }
21193 else
21194 {
21195 /* Store the args on the stack. */
21196 if (cfun->machine->uses_anonymous_args)
21197 {
21198 insn
21199 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21200 (0xf0 >> (args_to_push / 4)) & 0xf);
21201 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21202 saved_pretend_args = 1;
21203 }
21204 else
21205 {
21206 rtx addr, dwarf;
21207
21208 if (args_to_push == 4)
21209 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21210 else
21211 addr
21212 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21213 plus_constant (Pmode,
21214 stack_pointer_rtx,
21215 -args_to_push));
21216
21217 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21218
21219 /* Just tell the dwarf backend that we adjusted SP. */
21220 dwarf
21221 = gen_rtx_SET (stack_pointer_rtx,
21222 plus_constant (Pmode, stack_pointer_rtx,
21223 -args_to_push));
21224 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21225 }
21226
21227 RTX_FRAME_RELATED_P (insn) = 1;
21228 fp_offset = args_to_push;
21229 args_to_push = 0;
21230 }
21231 }
21232
21233 insn = emit_set_insn (ip_rtx,
21234 plus_constant (Pmode, stack_pointer_rtx,
21235 fp_offset));
21236 RTX_FRAME_RELATED_P (insn) = 1;
21237 }
21238
21239 if (args_to_push)
21240 {
21241 /* Push the argument registers, or reserve space for them. */
21242 if (cfun->machine->uses_anonymous_args)
21243 insn = emit_multi_reg_push
21244 ((0xf0 >> (args_to_push / 4)) & 0xf,
21245 (0xf0 >> (args_to_push / 4)) & 0xf);
21246 else
21247 insn = emit_insn
21248 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21249 GEN_INT (- args_to_push)));
21250 RTX_FRAME_RELATED_P (insn) = 1;
21251 }
21252
21253 /* If this is an interrupt service routine, and the link register
21254 is going to be pushed, and we're not generating extra
21255 push of IP (needed when frame is needed and frame layout if apcs),
21256 subtracting four from LR now will mean that the function return
21257 can be done with a single instruction. */
21258 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21259 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21260 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21261 && TARGET_ARM)
21262 {
21263 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21264
21265 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21266 }
21267
21268 if (live_regs_mask)
21269 {
21270 unsigned long dwarf_regs_mask = live_regs_mask;
21271
21272 saved_regs += bit_count (live_regs_mask) * 4;
21273 if (optimize_size && !frame_pointer_needed
21274 && saved_regs == offsets->saved_regs - offsets->saved_args)
21275 {
21276 /* If no coprocessor registers are being pushed and we don't have
21277 to worry about a frame pointer then push extra registers to
21278 create the stack frame. This is done is a way that does not
21279 alter the frame layout, so is independent of the epilogue. */
21280 int n;
21281 int frame;
21282 n = 0;
21283 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21284 n++;
21285 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21286 if (frame && n * 4 >= frame)
21287 {
21288 n = frame / 4;
21289 live_regs_mask |= (1 << n) - 1;
21290 saved_regs += frame;
21291 }
21292 }
21293
21294 if (TARGET_LDRD
21295 && current_tune->prefer_ldrd_strd
21296 && !optimize_function_for_size_p (cfun))
21297 {
21298 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21299 if (TARGET_THUMB2)
21300 thumb2_emit_strd_push (live_regs_mask);
21301 else if (TARGET_ARM
21302 && !TARGET_APCS_FRAME
21303 && !IS_INTERRUPT (func_type))
21304 arm_emit_strd_push (live_regs_mask);
21305 else
21306 {
21307 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21308 RTX_FRAME_RELATED_P (insn) = 1;
21309 }
21310 }
21311 else
21312 {
21313 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21314 RTX_FRAME_RELATED_P (insn) = 1;
21315 }
21316 }
21317
21318 if (! IS_VOLATILE (func_type))
21319 saved_regs += arm_save_coproc_regs ();
21320
21321 if (frame_pointer_needed && TARGET_ARM)
21322 {
21323 /* Create the new frame pointer. */
21324 if (TARGET_APCS_FRAME)
21325 {
21326 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21327 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21328 RTX_FRAME_RELATED_P (insn) = 1;
21329
21330 if (IS_NESTED (func_type))
21331 {
21332 /* Recover the static chain register. */
21333 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21334 insn = gen_rtx_REG (SImode, 3);
21335 else
21336 {
21337 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21338 insn = gen_frame_mem (SImode, insn);
21339 }
21340 emit_set_insn (ip_rtx, insn);
21341 /* Add a USE to stop propagate_one_insn() from barfing. */
21342 emit_insn (gen_force_register_use (ip_rtx));
21343 }
21344 }
21345 else
21346 {
21347 insn = GEN_INT (saved_regs - 4);
21348 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21349 stack_pointer_rtx, insn));
21350 RTX_FRAME_RELATED_P (insn) = 1;
21351 }
21352 }
21353
21354 if (flag_stack_usage_info)
21355 current_function_static_stack_size
21356 = offsets->outgoing_args - offsets->saved_args;
21357
21358 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21359 {
21360 /* This add can produce multiple insns for a large constant, so we
21361 need to get tricky. */
21362 rtx_insn *last = get_last_insn ();
21363
21364 amount = GEN_INT (offsets->saved_args + saved_regs
21365 - offsets->outgoing_args);
21366
21367 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21368 amount));
21369 do
21370 {
21371 last = last ? NEXT_INSN (last) : get_insns ();
21372 RTX_FRAME_RELATED_P (last) = 1;
21373 }
21374 while (last != insn);
21375
21376 /* If the frame pointer is needed, emit a special barrier that
21377 will prevent the scheduler from moving stores to the frame
21378 before the stack adjustment. */
21379 if (frame_pointer_needed)
21380 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21381 hard_frame_pointer_rtx));
21382 }
21383
21384
21385 if (frame_pointer_needed && TARGET_THUMB2)
21386 thumb_set_frame_pointer (offsets);
21387
21388 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21389 {
21390 unsigned long mask;
21391
21392 mask = live_regs_mask;
21393 mask &= THUMB2_WORK_REGS;
21394 if (!IS_NESTED (func_type))
21395 mask |= (1 << IP_REGNUM);
21396 arm_load_pic_register (mask);
21397 }
21398
21399 /* If we are profiling, make sure no instructions are scheduled before
21400 the call to mcount. Similarly if the user has requested no
21401 scheduling in the prolog. Similarly if we want non-call exceptions
21402 using the EABI unwinder, to prevent faulting instructions from being
21403 swapped with a stack adjustment. */
21404 if (crtl->profile || !TARGET_SCHED_PROLOG
21405 || (arm_except_unwind_info (&global_options) == UI_TARGET
21406 && cfun->can_throw_non_call_exceptions))
21407 emit_insn (gen_blockage ());
21408
21409 /* If the link register is being kept alive, with the return address in it,
21410 then make sure that it does not get reused by the ce2 pass. */
21411 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21412 cfun->machine->lr_save_eliminated = 1;
21413 }
21414 \f
21415 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21416 static void
21417 arm_print_condition (FILE *stream)
21418 {
21419 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21420 {
21421 /* Branch conversion is not implemented for Thumb-2. */
21422 if (TARGET_THUMB)
21423 {
21424 output_operand_lossage ("predicated Thumb instruction");
21425 return;
21426 }
21427 if (current_insn_predicate != NULL)
21428 {
21429 output_operand_lossage
21430 ("predicated instruction in conditional sequence");
21431 return;
21432 }
21433
21434 fputs (arm_condition_codes[arm_current_cc], stream);
21435 }
21436 else if (current_insn_predicate)
21437 {
21438 enum arm_cond_code code;
21439
21440 if (TARGET_THUMB1)
21441 {
21442 output_operand_lossage ("predicated Thumb instruction");
21443 return;
21444 }
21445
21446 code = get_arm_condition_code (current_insn_predicate);
21447 fputs (arm_condition_codes[code], stream);
21448 }
21449 }
21450
21451
21452 /* Globally reserved letters: acln
21453 Puncutation letters currently used: @_|?().!#
21454 Lower case letters currently used: bcdefhimpqtvwxyz
21455 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21456 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21457
21458 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21459
21460 If CODE is 'd', then the X is a condition operand and the instruction
21461 should only be executed if the condition is true.
21462 if CODE is 'D', then the X is a condition operand and the instruction
21463 should only be executed if the condition is false: however, if the mode
21464 of the comparison is CCFPEmode, then always execute the instruction -- we
21465 do this because in these circumstances !GE does not necessarily imply LT;
21466 in these cases the instruction pattern will take care to make sure that
21467 an instruction containing %d will follow, thereby undoing the effects of
21468 doing this instruction unconditionally.
21469 If CODE is 'N' then X is a floating point operand that must be negated
21470 before output.
21471 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21472 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21473 static void
21474 arm_print_operand (FILE *stream, rtx x, int code)
21475 {
21476 switch (code)
21477 {
21478 case '@':
21479 fputs (ASM_COMMENT_START, stream);
21480 return;
21481
21482 case '_':
21483 fputs (user_label_prefix, stream);
21484 return;
21485
21486 case '|':
21487 fputs (REGISTER_PREFIX, stream);
21488 return;
21489
21490 case '?':
21491 arm_print_condition (stream);
21492 return;
21493
21494 case '(':
21495 /* Nothing in unified syntax, otherwise the current condition code. */
21496 if (!TARGET_UNIFIED_ASM)
21497 arm_print_condition (stream);
21498 break;
21499
21500 case ')':
21501 /* The current condition code in unified syntax, otherwise nothing. */
21502 if (TARGET_UNIFIED_ASM)
21503 arm_print_condition (stream);
21504 break;
21505
21506 case '.':
21507 /* The current condition code for a condition code setting instruction.
21508 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21509 if (TARGET_UNIFIED_ASM)
21510 {
21511 fputc('s', stream);
21512 arm_print_condition (stream);
21513 }
21514 else
21515 {
21516 arm_print_condition (stream);
21517 fputc('s', stream);
21518 }
21519 return;
21520
21521 case '!':
21522 /* If the instruction is conditionally executed then print
21523 the current condition code, otherwise print 's'. */
21524 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21525 if (current_insn_predicate)
21526 arm_print_condition (stream);
21527 else
21528 fputc('s', stream);
21529 break;
21530
21531 /* %# is a "break" sequence. It doesn't output anything, but is used to
21532 separate e.g. operand numbers from following text, if that text consists
21533 of further digits which we don't want to be part of the operand
21534 number. */
21535 case '#':
21536 return;
21537
21538 case 'N':
21539 {
21540 REAL_VALUE_TYPE r;
21541 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21542 r = real_value_negate (&r);
21543 fprintf (stream, "%s", fp_const_from_val (&r));
21544 }
21545 return;
21546
21547 /* An integer or symbol address without a preceding # sign. */
21548 case 'c':
21549 switch (GET_CODE (x))
21550 {
21551 case CONST_INT:
21552 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21553 break;
21554
21555 case SYMBOL_REF:
21556 output_addr_const (stream, x);
21557 break;
21558
21559 case CONST:
21560 if (GET_CODE (XEXP (x, 0)) == PLUS
21561 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21562 {
21563 output_addr_const (stream, x);
21564 break;
21565 }
21566 /* Fall through. */
21567
21568 default:
21569 output_operand_lossage ("Unsupported operand for code '%c'", code);
21570 }
21571 return;
21572
21573 /* An integer that we want to print in HEX. */
21574 case 'x':
21575 switch (GET_CODE (x))
21576 {
21577 case CONST_INT:
21578 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21579 break;
21580
21581 default:
21582 output_operand_lossage ("Unsupported operand for code '%c'", code);
21583 }
21584 return;
21585
21586 case 'B':
21587 if (CONST_INT_P (x))
21588 {
21589 HOST_WIDE_INT val;
21590 val = ARM_SIGN_EXTEND (~INTVAL (x));
21591 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21592 }
21593 else
21594 {
21595 putc ('~', stream);
21596 output_addr_const (stream, x);
21597 }
21598 return;
21599
21600 case 'b':
21601 /* Print the log2 of a CONST_INT. */
21602 {
21603 HOST_WIDE_INT val;
21604
21605 if (!CONST_INT_P (x)
21606 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21607 output_operand_lossage ("Unsupported operand for code '%c'", code);
21608 else
21609 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21610 }
21611 return;
21612
21613 case 'L':
21614 /* The low 16 bits of an immediate constant. */
21615 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21616 return;
21617
21618 case 'i':
21619 fprintf (stream, "%s", arithmetic_instr (x, 1));
21620 return;
21621
21622 case 'I':
21623 fprintf (stream, "%s", arithmetic_instr (x, 0));
21624 return;
21625
21626 case 'S':
21627 {
21628 HOST_WIDE_INT val;
21629 const char *shift;
21630
21631 shift = shift_op (x, &val);
21632
21633 if (shift)
21634 {
21635 fprintf (stream, ", %s ", shift);
21636 if (val == -1)
21637 arm_print_operand (stream, XEXP (x, 1), 0);
21638 else
21639 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21640 }
21641 }
21642 return;
21643
21644 /* An explanation of the 'Q', 'R' and 'H' register operands:
21645
21646 In a pair of registers containing a DI or DF value the 'Q'
21647 operand returns the register number of the register containing
21648 the least significant part of the value. The 'R' operand returns
21649 the register number of the register containing the most
21650 significant part of the value.
21651
21652 The 'H' operand returns the higher of the two register numbers.
21653 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21654 same as the 'Q' operand, since the most significant part of the
21655 value is held in the lower number register. The reverse is true
21656 on systems where WORDS_BIG_ENDIAN is false.
21657
21658 The purpose of these operands is to distinguish between cases
21659 where the endian-ness of the values is important (for example
21660 when they are added together), and cases where the endian-ness
21661 is irrelevant, but the order of register operations is important.
21662 For example when loading a value from memory into a register
21663 pair, the endian-ness does not matter. Provided that the value
21664 from the lower memory address is put into the lower numbered
21665 register, and the value from the higher address is put into the
21666 higher numbered register, the load will work regardless of whether
21667 the value being loaded is big-wordian or little-wordian. The
21668 order of the two register loads can matter however, if the address
21669 of the memory location is actually held in one of the registers
21670 being overwritten by the load.
21671
21672 The 'Q' and 'R' constraints are also available for 64-bit
21673 constants. */
21674 case 'Q':
21675 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21676 {
21677 rtx part = gen_lowpart (SImode, x);
21678 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21679 return;
21680 }
21681
21682 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21683 {
21684 output_operand_lossage ("invalid operand for code '%c'", code);
21685 return;
21686 }
21687
21688 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21689 return;
21690
21691 case 'R':
21692 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21693 {
21694 machine_mode mode = GET_MODE (x);
21695 rtx part;
21696
21697 if (mode == VOIDmode)
21698 mode = DImode;
21699 part = gen_highpart_mode (SImode, mode, x);
21700 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21701 return;
21702 }
21703
21704 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21705 {
21706 output_operand_lossage ("invalid operand for code '%c'", code);
21707 return;
21708 }
21709
21710 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21711 return;
21712
21713 case 'H':
21714 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21715 {
21716 output_operand_lossage ("invalid operand for code '%c'", code);
21717 return;
21718 }
21719
21720 asm_fprintf (stream, "%r", REGNO (x) + 1);
21721 return;
21722
21723 case 'J':
21724 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21725 {
21726 output_operand_lossage ("invalid operand for code '%c'", code);
21727 return;
21728 }
21729
21730 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21731 return;
21732
21733 case 'K':
21734 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21735 {
21736 output_operand_lossage ("invalid operand for code '%c'", code);
21737 return;
21738 }
21739
21740 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21741 return;
21742
21743 case 'm':
21744 asm_fprintf (stream, "%r",
21745 REG_P (XEXP (x, 0))
21746 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21747 return;
21748
21749 case 'M':
21750 asm_fprintf (stream, "{%r-%r}",
21751 REGNO (x),
21752 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21753 return;
21754
21755 /* Like 'M', but writing doubleword vector registers, for use by Neon
21756 insns. */
21757 case 'h':
21758 {
21759 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21760 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21761 if (numregs == 1)
21762 asm_fprintf (stream, "{d%d}", regno);
21763 else
21764 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21765 }
21766 return;
21767
21768 case 'd':
21769 /* CONST_TRUE_RTX means always -- that's the default. */
21770 if (x == const_true_rtx)
21771 return;
21772
21773 if (!COMPARISON_P (x))
21774 {
21775 output_operand_lossage ("invalid operand for code '%c'", code);
21776 return;
21777 }
21778
21779 fputs (arm_condition_codes[get_arm_condition_code (x)],
21780 stream);
21781 return;
21782
21783 case 'D':
21784 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21785 want to do that. */
21786 if (x == const_true_rtx)
21787 {
21788 output_operand_lossage ("instruction never executed");
21789 return;
21790 }
21791 if (!COMPARISON_P (x))
21792 {
21793 output_operand_lossage ("invalid operand for code '%c'", code);
21794 return;
21795 }
21796
21797 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21798 (get_arm_condition_code (x))],
21799 stream);
21800 return;
21801
21802 case 's':
21803 case 'V':
21804 case 'W':
21805 case 'X':
21806 case 'Y':
21807 case 'Z':
21808 /* Former Maverick support, removed after GCC-4.7. */
21809 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21810 return;
21811
21812 case 'U':
21813 if (!REG_P (x)
21814 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21815 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21816 /* Bad value for wCG register number. */
21817 {
21818 output_operand_lossage ("invalid operand for code '%c'", code);
21819 return;
21820 }
21821
21822 else
21823 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21824 return;
21825
21826 /* Print an iWMMXt control register name. */
21827 case 'w':
21828 if (!CONST_INT_P (x)
21829 || INTVAL (x) < 0
21830 || INTVAL (x) >= 16)
21831 /* Bad value for wC register number. */
21832 {
21833 output_operand_lossage ("invalid operand for code '%c'", code);
21834 return;
21835 }
21836
21837 else
21838 {
21839 static const char * wc_reg_names [16] =
21840 {
21841 "wCID", "wCon", "wCSSF", "wCASF",
21842 "wC4", "wC5", "wC6", "wC7",
21843 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21844 "wC12", "wC13", "wC14", "wC15"
21845 };
21846
21847 fputs (wc_reg_names [INTVAL (x)], stream);
21848 }
21849 return;
21850
21851 /* Print the high single-precision register of a VFP double-precision
21852 register. */
21853 case 'p':
21854 {
21855 machine_mode mode = GET_MODE (x);
21856 int regno;
21857
21858 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21859 {
21860 output_operand_lossage ("invalid operand for code '%c'", code);
21861 return;
21862 }
21863
21864 regno = REGNO (x);
21865 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21866 {
21867 output_operand_lossage ("invalid operand for code '%c'", code);
21868 return;
21869 }
21870
21871 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21872 }
21873 return;
21874
21875 /* Print a VFP/Neon double precision or quad precision register name. */
21876 case 'P':
21877 case 'q':
21878 {
21879 machine_mode mode = GET_MODE (x);
21880 int is_quad = (code == 'q');
21881 int regno;
21882
21883 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21884 {
21885 output_operand_lossage ("invalid operand for code '%c'", code);
21886 return;
21887 }
21888
21889 if (!REG_P (x)
21890 || !IS_VFP_REGNUM (REGNO (x)))
21891 {
21892 output_operand_lossage ("invalid operand for code '%c'", code);
21893 return;
21894 }
21895
21896 regno = REGNO (x);
21897 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21898 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21899 {
21900 output_operand_lossage ("invalid operand for code '%c'", code);
21901 return;
21902 }
21903
21904 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21905 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21906 }
21907 return;
21908
21909 /* These two codes print the low/high doubleword register of a Neon quad
21910 register, respectively. For pair-structure types, can also print
21911 low/high quadword registers. */
21912 case 'e':
21913 case 'f':
21914 {
21915 machine_mode mode = GET_MODE (x);
21916 int regno;
21917
21918 if ((GET_MODE_SIZE (mode) != 16
21919 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21920 {
21921 output_operand_lossage ("invalid operand for code '%c'", code);
21922 return;
21923 }
21924
21925 regno = REGNO (x);
21926 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21927 {
21928 output_operand_lossage ("invalid operand for code '%c'", code);
21929 return;
21930 }
21931
21932 if (GET_MODE_SIZE (mode) == 16)
21933 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21934 + (code == 'f' ? 1 : 0));
21935 else
21936 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21937 + (code == 'f' ? 1 : 0));
21938 }
21939 return;
21940
21941 /* Print a VFPv3 floating-point constant, represented as an integer
21942 index. */
21943 case 'G':
21944 {
21945 int index = vfp3_const_double_index (x);
21946 gcc_assert (index != -1);
21947 fprintf (stream, "%d", index);
21948 }
21949 return;
21950
21951 /* Print bits representing opcode features for Neon.
21952
21953 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21954 and polynomials as unsigned.
21955
21956 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21957
21958 Bit 2 is 1 for rounding functions, 0 otherwise. */
21959
21960 /* Identify the type as 's', 'u', 'p' or 'f'. */
21961 case 'T':
21962 {
21963 HOST_WIDE_INT bits = INTVAL (x);
21964 fputc ("uspf"[bits & 3], stream);
21965 }
21966 return;
21967
21968 /* Likewise, but signed and unsigned integers are both 'i'. */
21969 case 'F':
21970 {
21971 HOST_WIDE_INT bits = INTVAL (x);
21972 fputc ("iipf"[bits & 3], stream);
21973 }
21974 return;
21975
21976 /* As for 'T', but emit 'u' instead of 'p'. */
21977 case 't':
21978 {
21979 HOST_WIDE_INT bits = INTVAL (x);
21980 fputc ("usuf"[bits & 3], stream);
21981 }
21982 return;
21983
21984 /* Bit 2: rounding (vs none). */
21985 case 'O':
21986 {
21987 HOST_WIDE_INT bits = INTVAL (x);
21988 fputs ((bits & 4) != 0 ? "r" : "", stream);
21989 }
21990 return;
21991
21992 /* Memory operand for vld1/vst1 instruction. */
21993 case 'A':
21994 {
21995 rtx addr;
21996 bool postinc = FALSE;
21997 rtx postinc_reg = NULL;
21998 unsigned align, memsize, align_bits;
21999
22000 gcc_assert (MEM_P (x));
22001 addr = XEXP (x, 0);
22002 if (GET_CODE (addr) == POST_INC)
22003 {
22004 postinc = 1;
22005 addr = XEXP (addr, 0);
22006 }
22007 if (GET_CODE (addr) == POST_MODIFY)
22008 {
22009 postinc_reg = XEXP( XEXP (addr, 1), 1);
22010 addr = XEXP (addr, 0);
22011 }
22012 asm_fprintf (stream, "[%r", REGNO (addr));
22013
22014 /* We know the alignment of this access, so we can emit a hint in the
22015 instruction (for some alignments) as an aid to the memory subsystem
22016 of the target. */
22017 align = MEM_ALIGN (x) >> 3;
22018 memsize = MEM_SIZE (x);
22019
22020 /* Only certain alignment specifiers are supported by the hardware. */
22021 if (memsize == 32 && (align % 32) == 0)
22022 align_bits = 256;
22023 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22024 align_bits = 128;
22025 else if (memsize >= 8 && (align % 8) == 0)
22026 align_bits = 64;
22027 else
22028 align_bits = 0;
22029
22030 if (align_bits != 0)
22031 asm_fprintf (stream, ":%d", align_bits);
22032
22033 asm_fprintf (stream, "]");
22034
22035 if (postinc)
22036 fputs("!", stream);
22037 if (postinc_reg)
22038 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22039 }
22040 return;
22041
22042 case 'C':
22043 {
22044 rtx addr;
22045
22046 gcc_assert (MEM_P (x));
22047 addr = XEXP (x, 0);
22048 gcc_assert (REG_P (addr));
22049 asm_fprintf (stream, "[%r]", REGNO (addr));
22050 }
22051 return;
22052
22053 /* Translate an S register number into a D register number and element index. */
22054 case 'y':
22055 {
22056 machine_mode mode = GET_MODE (x);
22057 int regno;
22058
22059 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22060 {
22061 output_operand_lossage ("invalid operand for code '%c'", code);
22062 return;
22063 }
22064
22065 regno = REGNO (x);
22066 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22067 {
22068 output_operand_lossage ("invalid operand for code '%c'", code);
22069 return;
22070 }
22071
22072 regno = regno - FIRST_VFP_REGNUM;
22073 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22074 }
22075 return;
22076
22077 case 'v':
22078 gcc_assert (CONST_DOUBLE_P (x));
22079 int result;
22080 result = vfp3_const_double_for_fract_bits (x);
22081 if (result == 0)
22082 result = vfp3_const_double_for_bits (x);
22083 fprintf (stream, "#%d", result);
22084 return;
22085
22086 /* Register specifier for vld1.16/vst1.16. Translate the S register
22087 number into a D register number and element index. */
22088 case 'z':
22089 {
22090 machine_mode mode = GET_MODE (x);
22091 int regno;
22092
22093 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22094 {
22095 output_operand_lossage ("invalid operand for code '%c'", code);
22096 return;
22097 }
22098
22099 regno = REGNO (x);
22100 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22101 {
22102 output_operand_lossage ("invalid operand for code '%c'", code);
22103 return;
22104 }
22105
22106 regno = regno - FIRST_VFP_REGNUM;
22107 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22108 }
22109 return;
22110
22111 default:
22112 if (x == 0)
22113 {
22114 output_operand_lossage ("missing operand");
22115 return;
22116 }
22117
22118 switch (GET_CODE (x))
22119 {
22120 case REG:
22121 asm_fprintf (stream, "%r", REGNO (x));
22122 break;
22123
22124 case MEM:
22125 output_memory_reference_mode = GET_MODE (x);
22126 output_address (XEXP (x, 0));
22127 break;
22128
22129 case CONST_DOUBLE:
22130 {
22131 char fpstr[20];
22132 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22133 sizeof (fpstr), 0, 1);
22134 fprintf (stream, "#%s", fpstr);
22135 }
22136 break;
22137
22138 default:
22139 gcc_assert (GET_CODE (x) != NEG);
22140 fputc ('#', stream);
22141 if (GET_CODE (x) == HIGH)
22142 {
22143 fputs (":lower16:", stream);
22144 x = XEXP (x, 0);
22145 }
22146
22147 output_addr_const (stream, x);
22148 break;
22149 }
22150 }
22151 }
22152 \f
22153 /* Target hook for printing a memory address. */
22154 static void
22155 arm_print_operand_address (FILE *stream, rtx x)
22156 {
22157 if (TARGET_32BIT)
22158 {
22159 int is_minus = GET_CODE (x) == MINUS;
22160
22161 if (REG_P (x))
22162 asm_fprintf (stream, "[%r]", REGNO (x));
22163 else if (GET_CODE (x) == PLUS || is_minus)
22164 {
22165 rtx base = XEXP (x, 0);
22166 rtx index = XEXP (x, 1);
22167 HOST_WIDE_INT offset = 0;
22168 if (!REG_P (base)
22169 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22170 {
22171 /* Ensure that BASE is a register. */
22172 /* (one of them must be). */
22173 /* Also ensure the SP is not used as in index register. */
22174 std::swap (base, index);
22175 }
22176 switch (GET_CODE (index))
22177 {
22178 case CONST_INT:
22179 offset = INTVAL (index);
22180 if (is_minus)
22181 offset = -offset;
22182 asm_fprintf (stream, "[%r, #%wd]",
22183 REGNO (base), offset);
22184 break;
22185
22186 case REG:
22187 asm_fprintf (stream, "[%r, %s%r]",
22188 REGNO (base), is_minus ? "-" : "",
22189 REGNO (index));
22190 break;
22191
22192 case MULT:
22193 case ASHIFTRT:
22194 case LSHIFTRT:
22195 case ASHIFT:
22196 case ROTATERT:
22197 {
22198 asm_fprintf (stream, "[%r, %s%r",
22199 REGNO (base), is_minus ? "-" : "",
22200 REGNO (XEXP (index, 0)));
22201 arm_print_operand (stream, index, 'S');
22202 fputs ("]", stream);
22203 break;
22204 }
22205
22206 default:
22207 gcc_unreachable ();
22208 }
22209 }
22210 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22211 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22212 {
22213 extern machine_mode output_memory_reference_mode;
22214
22215 gcc_assert (REG_P (XEXP (x, 0)));
22216
22217 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22218 asm_fprintf (stream, "[%r, #%s%d]!",
22219 REGNO (XEXP (x, 0)),
22220 GET_CODE (x) == PRE_DEC ? "-" : "",
22221 GET_MODE_SIZE (output_memory_reference_mode));
22222 else
22223 asm_fprintf (stream, "[%r], #%s%d",
22224 REGNO (XEXP (x, 0)),
22225 GET_CODE (x) == POST_DEC ? "-" : "",
22226 GET_MODE_SIZE (output_memory_reference_mode));
22227 }
22228 else if (GET_CODE (x) == PRE_MODIFY)
22229 {
22230 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22231 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22232 asm_fprintf (stream, "#%wd]!",
22233 INTVAL (XEXP (XEXP (x, 1), 1)));
22234 else
22235 asm_fprintf (stream, "%r]!",
22236 REGNO (XEXP (XEXP (x, 1), 1)));
22237 }
22238 else if (GET_CODE (x) == POST_MODIFY)
22239 {
22240 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22241 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22242 asm_fprintf (stream, "#%wd",
22243 INTVAL (XEXP (XEXP (x, 1), 1)));
22244 else
22245 asm_fprintf (stream, "%r",
22246 REGNO (XEXP (XEXP (x, 1), 1)));
22247 }
22248 else output_addr_const (stream, x);
22249 }
22250 else
22251 {
22252 if (REG_P (x))
22253 asm_fprintf (stream, "[%r]", REGNO (x));
22254 else if (GET_CODE (x) == POST_INC)
22255 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22256 else if (GET_CODE (x) == PLUS)
22257 {
22258 gcc_assert (REG_P (XEXP (x, 0)));
22259 if (CONST_INT_P (XEXP (x, 1)))
22260 asm_fprintf (stream, "[%r, #%wd]",
22261 REGNO (XEXP (x, 0)),
22262 INTVAL (XEXP (x, 1)));
22263 else
22264 asm_fprintf (stream, "[%r, %r]",
22265 REGNO (XEXP (x, 0)),
22266 REGNO (XEXP (x, 1)));
22267 }
22268 else
22269 output_addr_const (stream, x);
22270 }
22271 }
22272 \f
22273 /* Target hook for indicating whether a punctuation character for
22274 TARGET_PRINT_OPERAND is valid. */
22275 static bool
22276 arm_print_operand_punct_valid_p (unsigned char code)
22277 {
22278 return (code == '@' || code == '|' || code == '.'
22279 || code == '(' || code == ')' || code == '#'
22280 || (TARGET_32BIT && (code == '?'))
22281 || (TARGET_THUMB2 && (code == '!'))
22282 || (TARGET_THUMB && (code == '_')));
22283 }
22284 \f
22285 /* Target hook for assembling integer objects. The ARM version needs to
22286 handle word-sized values specially. */
22287 static bool
22288 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22289 {
22290 machine_mode mode;
22291
22292 if (size == UNITS_PER_WORD && aligned_p)
22293 {
22294 fputs ("\t.word\t", asm_out_file);
22295 output_addr_const (asm_out_file, x);
22296
22297 /* Mark symbols as position independent. We only do this in the
22298 .text segment, not in the .data segment. */
22299 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22300 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22301 {
22302 /* See legitimize_pic_address for an explanation of the
22303 TARGET_VXWORKS_RTP check. */
22304 if (!arm_pic_data_is_text_relative
22305 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22306 fputs ("(GOT)", asm_out_file);
22307 else
22308 fputs ("(GOTOFF)", asm_out_file);
22309 }
22310 fputc ('\n', asm_out_file);
22311 return true;
22312 }
22313
22314 mode = GET_MODE (x);
22315
22316 if (arm_vector_mode_supported_p (mode))
22317 {
22318 int i, units;
22319
22320 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22321
22322 units = CONST_VECTOR_NUNITS (x);
22323 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22324
22325 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22326 for (i = 0; i < units; i++)
22327 {
22328 rtx elt = CONST_VECTOR_ELT (x, i);
22329 assemble_integer
22330 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22331 }
22332 else
22333 for (i = 0; i < units; i++)
22334 {
22335 rtx elt = CONST_VECTOR_ELT (x, i);
22336 REAL_VALUE_TYPE rval;
22337
22338 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22339
22340 assemble_real
22341 (rval, GET_MODE_INNER (mode),
22342 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22343 }
22344
22345 return true;
22346 }
22347
22348 return default_assemble_integer (x, size, aligned_p);
22349 }
22350
22351 static void
22352 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22353 {
22354 section *s;
22355
22356 if (!TARGET_AAPCS_BASED)
22357 {
22358 (is_ctor ?
22359 default_named_section_asm_out_constructor
22360 : default_named_section_asm_out_destructor) (symbol, priority);
22361 return;
22362 }
22363
22364 /* Put these in the .init_array section, using a special relocation. */
22365 if (priority != DEFAULT_INIT_PRIORITY)
22366 {
22367 char buf[18];
22368 sprintf (buf, "%s.%.5u",
22369 is_ctor ? ".init_array" : ".fini_array",
22370 priority);
22371 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22372 }
22373 else if (is_ctor)
22374 s = ctors_section;
22375 else
22376 s = dtors_section;
22377
22378 switch_to_section (s);
22379 assemble_align (POINTER_SIZE);
22380 fputs ("\t.word\t", asm_out_file);
22381 output_addr_const (asm_out_file, symbol);
22382 fputs ("(target1)\n", asm_out_file);
22383 }
22384
22385 /* Add a function to the list of static constructors. */
22386
22387 static void
22388 arm_elf_asm_constructor (rtx symbol, int priority)
22389 {
22390 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22391 }
22392
22393 /* Add a function to the list of static destructors. */
22394
22395 static void
22396 arm_elf_asm_destructor (rtx symbol, int priority)
22397 {
22398 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22399 }
22400 \f
22401 /* A finite state machine takes care of noticing whether or not instructions
22402 can be conditionally executed, and thus decrease execution time and code
22403 size by deleting branch instructions. The fsm is controlled by
22404 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22405
22406 /* The state of the fsm controlling condition codes are:
22407 0: normal, do nothing special
22408 1: make ASM_OUTPUT_OPCODE not output this instruction
22409 2: make ASM_OUTPUT_OPCODE not output this instruction
22410 3: make instructions conditional
22411 4: make instructions conditional
22412
22413 State transitions (state->state by whom under condition):
22414 0 -> 1 final_prescan_insn if the `target' is a label
22415 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22416 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22417 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22418 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22419 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22420 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22421 (the target insn is arm_target_insn).
22422
22423 If the jump clobbers the conditions then we use states 2 and 4.
22424
22425 A similar thing can be done with conditional return insns.
22426
22427 XXX In case the `target' is an unconditional branch, this conditionalising
22428 of the instructions always reduces code size, but not always execution
22429 time. But then, I want to reduce the code size to somewhere near what
22430 /bin/cc produces. */
22431
22432 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22433 instructions. When a COND_EXEC instruction is seen the subsequent
22434 instructions are scanned so that multiple conditional instructions can be
22435 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22436 specify the length and true/false mask for the IT block. These will be
22437 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22438
22439 /* Returns the index of the ARM condition code string in
22440 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22441 COMPARISON should be an rtx like `(eq (...) (...))'. */
22442
22443 enum arm_cond_code
22444 maybe_get_arm_condition_code (rtx comparison)
22445 {
22446 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22447 enum arm_cond_code code;
22448 enum rtx_code comp_code = GET_CODE (comparison);
22449
22450 if (GET_MODE_CLASS (mode) != MODE_CC)
22451 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22452 XEXP (comparison, 1));
22453
22454 switch (mode)
22455 {
22456 case CC_DNEmode: code = ARM_NE; goto dominance;
22457 case CC_DEQmode: code = ARM_EQ; goto dominance;
22458 case CC_DGEmode: code = ARM_GE; goto dominance;
22459 case CC_DGTmode: code = ARM_GT; goto dominance;
22460 case CC_DLEmode: code = ARM_LE; goto dominance;
22461 case CC_DLTmode: code = ARM_LT; goto dominance;
22462 case CC_DGEUmode: code = ARM_CS; goto dominance;
22463 case CC_DGTUmode: code = ARM_HI; goto dominance;
22464 case CC_DLEUmode: code = ARM_LS; goto dominance;
22465 case CC_DLTUmode: code = ARM_CC;
22466
22467 dominance:
22468 if (comp_code == EQ)
22469 return ARM_INVERSE_CONDITION_CODE (code);
22470 if (comp_code == NE)
22471 return code;
22472 return ARM_NV;
22473
22474 case CC_NOOVmode:
22475 switch (comp_code)
22476 {
22477 case NE: return ARM_NE;
22478 case EQ: return ARM_EQ;
22479 case GE: return ARM_PL;
22480 case LT: return ARM_MI;
22481 default: return ARM_NV;
22482 }
22483
22484 case CC_Zmode:
22485 switch (comp_code)
22486 {
22487 case NE: return ARM_NE;
22488 case EQ: return ARM_EQ;
22489 default: return ARM_NV;
22490 }
22491
22492 case CC_Nmode:
22493 switch (comp_code)
22494 {
22495 case NE: return ARM_MI;
22496 case EQ: return ARM_PL;
22497 default: return ARM_NV;
22498 }
22499
22500 case CCFPEmode:
22501 case CCFPmode:
22502 /* We can handle all cases except UNEQ and LTGT. */
22503 switch (comp_code)
22504 {
22505 case GE: return ARM_GE;
22506 case GT: return ARM_GT;
22507 case LE: return ARM_LS;
22508 case LT: return ARM_MI;
22509 case NE: return ARM_NE;
22510 case EQ: return ARM_EQ;
22511 case ORDERED: return ARM_VC;
22512 case UNORDERED: return ARM_VS;
22513 case UNLT: return ARM_LT;
22514 case UNLE: return ARM_LE;
22515 case UNGT: return ARM_HI;
22516 case UNGE: return ARM_PL;
22517 /* UNEQ and LTGT do not have a representation. */
22518 case UNEQ: /* Fall through. */
22519 case LTGT: /* Fall through. */
22520 default: return ARM_NV;
22521 }
22522
22523 case CC_SWPmode:
22524 switch (comp_code)
22525 {
22526 case NE: return ARM_NE;
22527 case EQ: return ARM_EQ;
22528 case GE: return ARM_LE;
22529 case GT: return ARM_LT;
22530 case LE: return ARM_GE;
22531 case LT: return ARM_GT;
22532 case GEU: return ARM_LS;
22533 case GTU: return ARM_CC;
22534 case LEU: return ARM_CS;
22535 case LTU: return ARM_HI;
22536 default: return ARM_NV;
22537 }
22538
22539 case CC_Cmode:
22540 switch (comp_code)
22541 {
22542 case LTU: return ARM_CS;
22543 case GEU: return ARM_CC;
22544 default: return ARM_NV;
22545 }
22546
22547 case CC_CZmode:
22548 switch (comp_code)
22549 {
22550 case NE: return ARM_NE;
22551 case EQ: return ARM_EQ;
22552 case GEU: return ARM_CS;
22553 case GTU: return ARM_HI;
22554 case LEU: return ARM_LS;
22555 case LTU: return ARM_CC;
22556 default: return ARM_NV;
22557 }
22558
22559 case CC_NCVmode:
22560 switch (comp_code)
22561 {
22562 case GE: return ARM_GE;
22563 case LT: return ARM_LT;
22564 case GEU: return ARM_CS;
22565 case LTU: return ARM_CC;
22566 default: return ARM_NV;
22567 }
22568
22569 case CCmode:
22570 switch (comp_code)
22571 {
22572 case NE: return ARM_NE;
22573 case EQ: return ARM_EQ;
22574 case GE: return ARM_GE;
22575 case GT: return ARM_GT;
22576 case LE: return ARM_LE;
22577 case LT: return ARM_LT;
22578 case GEU: return ARM_CS;
22579 case GTU: return ARM_HI;
22580 case LEU: return ARM_LS;
22581 case LTU: return ARM_CC;
22582 default: return ARM_NV;
22583 }
22584
22585 default: gcc_unreachable ();
22586 }
22587 }
22588
22589 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22590 static enum arm_cond_code
22591 get_arm_condition_code (rtx comparison)
22592 {
22593 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22594 gcc_assert (code != ARM_NV);
22595 return code;
22596 }
22597
22598 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22599 instructions. */
22600 void
22601 thumb2_final_prescan_insn (rtx_insn *insn)
22602 {
22603 rtx_insn *first_insn = insn;
22604 rtx body = PATTERN (insn);
22605 rtx predicate;
22606 enum arm_cond_code code;
22607 int n;
22608 int mask;
22609 int max;
22610
22611 /* max_insns_skipped in the tune was already taken into account in the
22612 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22613 just emit the IT blocks as we can. It does not make sense to split
22614 the IT blocks. */
22615 max = MAX_INSN_PER_IT_BLOCK;
22616
22617 /* Remove the previous insn from the count of insns to be output. */
22618 if (arm_condexec_count)
22619 arm_condexec_count--;
22620
22621 /* Nothing to do if we are already inside a conditional block. */
22622 if (arm_condexec_count)
22623 return;
22624
22625 if (GET_CODE (body) != COND_EXEC)
22626 return;
22627
22628 /* Conditional jumps are implemented directly. */
22629 if (JUMP_P (insn))
22630 return;
22631
22632 predicate = COND_EXEC_TEST (body);
22633 arm_current_cc = get_arm_condition_code (predicate);
22634
22635 n = get_attr_ce_count (insn);
22636 arm_condexec_count = 1;
22637 arm_condexec_mask = (1 << n) - 1;
22638 arm_condexec_masklen = n;
22639 /* See if subsequent instructions can be combined into the same block. */
22640 for (;;)
22641 {
22642 insn = next_nonnote_insn (insn);
22643
22644 /* Jumping into the middle of an IT block is illegal, so a label or
22645 barrier terminates the block. */
22646 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22647 break;
22648
22649 body = PATTERN (insn);
22650 /* USE and CLOBBER aren't really insns, so just skip them. */
22651 if (GET_CODE (body) == USE
22652 || GET_CODE (body) == CLOBBER)
22653 continue;
22654
22655 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22656 if (GET_CODE (body) != COND_EXEC)
22657 break;
22658 /* Maximum number of conditionally executed instructions in a block. */
22659 n = get_attr_ce_count (insn);
22660 if (arm_condexec_masklen + n > max)
22661 break;
22662
22663 predicate = COND_EXEC_TEST (body);
22664 code = get_arm_condition_code (predicate);
22665 mask = (1 << n) - 1;
22666 if (arm_current_cc == code)
22667 arm_condexec_mask |= (mask << arm_condexec_masklen);
22668 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22669 break;
22670
22671 arm_condexec_count++;
22672 arm_condexec_masklen += n;
22673
22674 /* A jump must be the last instruction in a conditional block. */
22675 if (JUMP_P (insn))
22676 break;
22677 }
22678 /* Restore recog_data (getting the attributes of other insns can
22679 destroy this array, but final.c assumes that it remains intact
22680 across this call). */
22681 extract_constrain_insn_cached (first_insn);
22682 }
22683
22684 void
22685 arm_final_prescan_insn (rtx_insn *insn)
22686 {
22687 /* BODY will hold the body of INSN. */
22688 rtx body = PATTERN (insn);
22689
22690 /* This will be 1 if trying to repeat the trick, and things need to be
22691 reversed if it appears to fail. */
22692 int reverse = 0;
22693
22694 /* If we start with a return insn, we only succeed if we find another one. */
22695 int seeking_return = 0;
22696 enum rtx_code return_code = UNKNOWN;
22697
22698 /* START_INSN will hold the insn from where we start looking. This is the
22699 first insn after the following code_label if REVERSE is true. */
22700 rtx_insn *start_insn = insn;
22701
22702 /* If in state 4, check if the target branch is reached, in order to
22703 change back to state 0. */
22704 if (arm_ccfsm_state == 4)
22705 {
22706 if (insn == arm_target_insn)
22707 {
22708 arm_target_insn = NULL;
22709 arm_ccfsm_state = 0;
22710 }
22711 return;
22712 }
22713
22714 /* If in state 3, it is possible to repeat the trick, if this insn is an
22715 unconditional branch to a label, and immediately following this branch
22716 is the previous target label which is only used once, and the label this
22717 branch jumps to is not too far off. */
22718 if (arm_ccfsm_state == 3)
22719 {
22720 if (simplejump_p (insn))
22721 {
22722 start_insn = next_nonnote_insn (start_insn);
22723 if (BARRIER_P (start_insn))
22724 {
22725 /* XXX Isn't this always a barrier? */
22726 start_insn = next_nonnote_insn (start_insn);
22727 }
22728 if (LABEL_P (start_insn)
22729 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22730 && LABEL_NUSES (start_insn) == 1)
22731 reverse = TRUE;
22732 else
22733 return;
22734 }
22735 else if (ANY_RETURN_P (body))
22736 {
22737 start_insn = next_nonnote_insn (start_insn);
22738 if (BARRIER_P (start_insn))
22739 start_insn = next_nonnote_insn (start_insn);
22740 if (LABEL_P (start_insn)
22741 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22742 && LABEL_NUSES (start_insn) == 1)
22743 {
22744 reverse = TRUE;
22745 seeking_return = 1;
22746 return_code = GET_CODE (body);
22747 }
22748 else
22749 return;
22750 }
22751 else
22752 return;
22753 }
22754
22755 gcc_assert (!arm_ccfsm_state || reverse);
22756 if (!JUMP_P (insn))
22757 return;
22758
22759 /* This jump might be paralleled with a clobber of the condition codes
22760 the jump should always come first */
22761 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22762 body = XVECEXP (body, 0, 0);
22763
22764 if (reverse
22765 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22766 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22767 {
22768 int insns_skipped;
22769 int fail = FALSE, succeed = FALSE;
22770 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22771 int then_not_else = TRUE;
22772 rtx_insn *this_insn = start_insn;
22773 rtx label = 0;
22774
22775 /* Register the insn jumped to. */
22776 if (reverse)
22777 {
22778 if (!seeking_return)
22779 label = XEXP (SET_SRC (body), 0);
22780 }
22781 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22782 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22783 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22784 {
22785 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22786 then_not_else = FALSE;
22787 }
22788 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22789 {
22790 seeking_return = 1;
22791 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22792 }
22793 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22794 {
22795 seeking_return = 1;
22796 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22797 then_not_else = FALSE;
22798 }
22799 else
22800 gcc_unreachable ();
22801
22802 /* See how many insns this branch skips, and what kind of insns. If all
22803 insns are okay, and the label or unconditional branch to the same
22804 label is not too far away, succeed. */
22805 for (insns_skipped = 0;
22806 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22807 {
22808 rtx scanbody;
22809
22810 this_insn = next_nonnote_insn (this_insn);
22811 if (!this_insn)
22812 break;
22813
22814 switch (GET_CODE (this_insn))
22815 {
22816 case CODE_LABEL:
22817 /* Succeed if it is the target label, otherwise fail since
22818 control falls in from somewhere else. */
22819 if (this_insn == label)
22820 {
22821 arm_ccfsm_state = 1;
22822 succeed = TRUE;
22823 }
22824 else
22825 fail = TRUE;
22826 break;
22827
22828 case BARRIER:
22829 /* Succeed if the following insn is the target label.
22830 Otherwise fail.
22831 If return insns are used then the last insn in a function
22832 will be a barrier. */
22833 this_insn = next_nonnote_insn (this_insn);
22834 if (this_insn && this_insn == label)
22835 {
22836 arm_ccfsm_state = 1;
22837 succeed = TRUE;
22838 }
22839 else
22840 fail = TRUE;
22841 break;
22842
22843 case CALL_INSN:
22844 /* The AAPCS says that conditional calls should not be
22845 used since they make interworking inefficient (the
22846 linker can't transform BL<cond> into BLX). That's
22847 only a problem if the machine has BLX. */
22848 if (arm_arch5)
22849 {
22850 fail = TRUE;
22851 break;
22852 }
22853
22854 /* Succeed if the following insn is the target label, or
22855 if the following two insns are a barrier and the
22856 target label. */
22857 this_insn = next_nonnote_insn (this_insn);
22858 if (this_insn && BARRIER_P (this_insn))
22859 this_insn = next_nonnote_insn (this_insn);
22860
22861 if (this_insn && this_insn == label
22862 && insns_skipped < max_insns_skipped)
22863 {
22864 arm_ccfsm_state = 1;
22865 succeed = TRUE;
22866 }
22867 else
22868 fail = TRUE;
22869 break;
22870
22871 case JUMP_INSN:
22872 /* If this is an unconditional branch to the same label, succeed.
22873 If it is to another label, do nothing. If it is conditional,
22874 fail. */
22875 /* XXX Probably, the tests for SET and the PC are
22876 unnecessary. */
22877
22878 scanbody = PATTERN (this_insn);
22879 if (GET_CODE (scanbody) == SET
22880 && GET_CODE (SET_DEST (scanbody)) == PC)
22881 {
22882 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22883 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22884 {
22885 arm_ccfsm_state = 2;
22886 succeed = TRUE;
22887 }
22888 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22889 fail = TRUE;
22890 }
22891 /* Fail if a conditional return is undesirable (e.g. on a
22892 StrongARM), but still allow this if optimizing for size. */
22893 else if (GET_CODE (scanbody) == return_code
22894 && !use_return_insn (TRUE, NULL)
22895 && !optimize_size)
22896 fail = TRUE;
22897 else if (GET_CODE (scanbody) == return_code)
22898 {
22899 arm_ccfsm_state = 2;
22900 succeed = TRUE;
22901 }
22902 else if (GET_CODE (scanbody) == PARALLEL)
22903 {
22904 switch (get_attr_conds (this_insn))
22905 {
22906 case CONDS_NOCOND:
22907 break;
22908 default:
22909 fail = TRUE;
22910 break;
22911 }
22912 }
22913 else
22914 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22915
22916 break;
22917
22918 case INSN:
22919 /* Instructions using or affecting the condition codes make it
22920 fail. */
22921 scanbody = PATTERN (this_insn);
22922 if (!(GET_CODE (scanbody) == SET
22923 || GET_CODE (scanbody) == PARALLEL)
22924 || get_attr_conds (this_insn) != CONDS_NOCOND)
22925 fail = TRUE;
22926 break;
22927
22928 default:
22929 break;
22930 }
22931 }
22932 if (succeed)
22933 {
22934 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22935 arm_target_label = CODE_LABEL_NUMBER (label);
22936 else
22937 {
22938 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22939
22940 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22941 {
22942 this_insn = next_nonnote_insn (this_insn);
22943 gcc_assert (!this_insn
22944 || (!BARRIER_P (this_insn)
22945 && !LABEL_P (this_insn)));
22946 }
22947 if (!this_insn)
22948 {
22949 /* Oh, dear! we ran off the end.. give up. */
22950 extract_constrain_insn_cached (insn);
22951 arm_ccfsm_state = 0;
22952 arm_target_insn = NULL;
22953 return;
22954 }
22955 arm_target_insn = this_insn;
22956 }
22957
22958 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22959 what it was. */
22960 if (!reverse)
22961 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22962
22963 if (reverse || then_not_else)
22964 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22965 }
22966
22967 /* Restore recog_data (getting the attributes of other insns can
22968 destroy this array, but final.c assumes that it remains intact
22969 across this call. */
22970 extract_constrain_insn_cached (insn);
22971 }
22972 }
22973
22974 /* Output IT instructions. */
22975 void
22976 thumb2_asm_output_opcode (FILE * stream)
22977 {
22978 char buff[5];
22979 int n;
22980
22981 if (arm_condexec_mask)
22982 {
22983 for (n = 0; n < arm_condexec_masklen; n++)
22984 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22985 buff[n] = 0;
22986 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22987 arm_condition_codes[arm_current_cc]);
22988 arm_condexec_mask = 0;
22989 }
22990 }
22991
22992 /* Returns true if REGNO is a valid register
22993 for holding a quantity of type MODE. */
22994 int
22995 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
22996 {
22997 if (GET_MODE_CLASS (mode) == MODE_CC)
22998 return (regno == CC_REGNUM
22999 || (TARGET_HARD_FLOAT && TARGET_VFP
23000 && regno == VFPCC_REGNUM));
23001
23002 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23003 return false;
23004
23005 if (TARGET_THUMB1)
23006 /* For the Thumb we only allow values bigger than SImode in
23007 registers 0 - 6, so that there is always a second low
23008 register available to hold the upper part of the value.
23009 We probably we ought to ensure that the register is the
23010 start of an even numbered register pair. */
23011 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23012
23013 if (TARGET_HARD_FLOAT && TARGET_VFP
23014 && IS_VFP_REGNUM (regno))
23015 {
23016 if (mode == SFmode || mode == SImode)
23017 return VFP_REGNO_OK_FOR_SINGLE (regno);
23018
23019 if (mode == DFmode)
23020 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23021
23022 /* VFP registers can hold HFmode values, but there is no point in
23023 putting them there unless we have hardware conversion insns. */
23024 if (mode == HFmode)
23025 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23026
23027 if (TARGET_NEON)
23028 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23029 || (VALID_NEON_QREG_MODE (mode)
23030 && NEON_REGNO_OK_FOR_QUAD (regno))
23031 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23032 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23033 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23034 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23035 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23036
23037 return FALSE;
23038 }
23039
23040 if (TARGET_REALLY_IWMMXT)
23041 {
23042 if (IS_IWMMXT_GR_REGNUM (regno))
23043 return mode == SImode;
23044
23045 if (IS_IWMMXT_REGNUM (regno))
23046 return VALID_IWMMXT_REG_MODE (mode);
23047 }
23048
23049 /* We allow almost any value to be stored in the general registers.
23050 Restrict doubleword quantities to even register pairs in ARM state
23051 so that we can use ldrd. Do not allow very large Neon structure
23052 opaque modes in general registers; they would use too many. */
23053 if (regno <= LAST_ARM_REGNUM)
23054 {
23055 if (ARM_NUM_REGS (mode) > 4)
23056 return FALSE;
23057
23058 if (TARGET_THUMB2)
23059 return TRUE;
23060
23061 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23062 }
23063
23064 if (regno == FRAME_POINTER_REGNUM
23065 || regno == ARG_POINTER_REGNUM)
23066 /* We only allow integers in the fake hard registers. */
23067 return GET_MODE_CLASS (mode) == MODE_INT;
23068
23069 return FALSE;
23070 }
23071
23072 /* Implement MODES_TIEABLE_P. */
23073
23074 bool
23075 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23076 {
23077 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23078 return true;
23079
23080 /* We specifically want to allow elements of "structure" modes to
23081 be tieable to the structure. This more general condition allows
23082 other rarer situations too. */
23083 if (TARGET_NEON
23084 && (VALID_NEON_DREG_MODE (mode1)
23085 || VALID_NEON_QREG_MODE (mode1)
23086 || VALID_NEON_STRUCT_MODE (mode1))
23087 && (VALID_NEON_DREG_MODE (mode2)
23088 || VALID_NEON_QREG_MODE (mode2)
23089 || VALID_NEON_STRUCT_MODE (mode2)))
23090 return true;
23091
23092 return false;
23093 }
23094
23095 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23096 not used in arm mode. */
23097
23098 enum reg_class
23099 arm_regno_class (int regno)
23100 {
23101 if (regno == PC_REGNUM)
23102 return NO_REGS;
23103
23104 if (TARGET_THUMB1)
23105 {
23106 if (regno == STACK_POINTER_REGNUM)
23107 return STACK_REG;
23108 if (regno == CC_REGNUM)
23109 return CC_REG;
23110 if (regno < 8)
23111 return LO_REGS;
23112 return HI_REGS;
23113 }
23114
23115 if (TARGET_THUMB2 && regno < 8)
23116 return LO_REGS;
23117
23118 if ( regno <= LAST_ARM_REGNUM
23119 || regno == FRAME_POINTER_REGNUM
23120 || regno == ARG_POINTER_REGNUM)
23121 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23122
23123 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23124 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23125
23126 if (IS_VFP_REGNUM (regno))
23127 {
23128 if (regno <= D7_VFP_REGNUM)
23129 return VFP_D0_D7_REGS;
23130 else if (regno <= LAST_LO_VFP_REGNUM)
23131 return VFP_LO_REGS;
23132 else
23133 return VFP_HI_REGS;
23134 }
23135
23136 if (IS_IWMMXT_REGNUM (regno))
23137 return IWMMXT_REGS;
23138
23139 if (IS_IWMMXT_GR_REGNUM (regno))
23140 return IWMMXT_GR_REGS;
23141
23142 return NO_REGS;
23143 }
23144
23145 /* Handle a special case when computing the offset
23146 of an argument from the frame pointer. */
23147 int
23148 arm_debugger_arg_offset (int value, rtx addr)
23149 {
23150 rtx_insn *insn;
23151
23152 /* We are only interested if dbxout_parms() failed to compute the offset. */
23153 if (value != 0)
23154 return 0;
23155
23156 /* We can only cope with the case where the address is held in a register. */
23157 if (!REG_P (addr))
23158 return 0;
23159
23160 /* If we are using the frame pointer to point at the argument, then
23161 an offset of 0 is correct. */
23162 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23163 return 0;
23164
23165 /* If we are using the stack pointer to point at the
23166 argument, then an offset of 0 is correct. */
23167 /* ??? Check this is consistent with thumb2 frame layout. */
23168 if ((TARGET_THUMB || !frame_pointer_needed)
23169 && REGNO (addr) == SP_REGNUM)
23170 return 0;
23171
23172 /* Oh dear. The argument is pointed to by a register rather
23173 than being held in a register, or being stored at a known
23174 offset from the frame pointer. Since GDB only understands
23175 those two kinds of argument we must translate the address
23176 held in the register into an offset from the frame pointer.
23177 We do this by searching through the insns for the function
23178 looking to see where this register gets its value. If the
23179 register is initialized from the frame pointer plus an offset
23180 then we are in luck and we can continue, otherwise we give up.
23181
23182 This code is exercised by producing debugging information
23183 for a function with arguments like this:
23184
23185 double func (double a, double b, int c, double d) {return d;}
23186
23187 Without this code the stab for parameter 'd' will be set to
23188 an offset of 0 from the frame pointer, rather than 8. */
23189
23190 /* The if() statement says:
23191
23192 If the insn is a normal instruction
23193 and if the insn is setting the value in a register
23194 and if the register being set is the register holding the address of the argument
23195 and if the address is computing by an addition
23196 that involves adding to a register
23197 which is the frame pointer
23198 a constant integer
23199
23200 then... */
23201
23202 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23203 {
23204 if ( NONJUMP_INSN_P (insn)
23205 && GET_CODE (PATTERN (insn)) == SET
23206 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23207 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23208 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23209 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23210 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23211 )
23212 {
23213 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23214
23215 break;
23216 }
23217 }
23218
23219 if (value == 0)
23220 {
23221 debug_rtx (addr);
23222 warning (0, "unable to compute real location of stacked parameter");
23223 value = 8; /* XXX magic hack */
23224 }
23225
23226 return value;
23227 }
23228 \f
23229 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23230
23231 static const char *
23232 arm_invalid_parameter_type (const_tree t)
23233 {
23234 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23235 return N_("function parameters cannot have __fp16 type");
23236 return NULL;
23237 }
23238
23239 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23240
23241 static const char *
23242 arm_invalid_return_type (const_tree t)
23243 {
23244 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23245 return N_("functions cannot return __fp16 type");
23246 return NULL;
23247 }
23248
23249 /* Implement TARGET_PROMOTED_TYPE. */
23250
23251 static tree
23252 arm_promoted_type (const_tree t)
23253 {
23254 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23255 return float_type_node;
23256 return NULL_TREE;
23257 }
23258
23259 /* Implement TARGET_CONVERT_TO_TYPE.
23260 Specifically, this hook implements the peculiarity of the ARM
23261 half-precision floating-point C semantics that requires conversions between
23262 __fp16 to or from double to do an intermediate conversion to float. */
23263
23264 static tree
23265 arm_convert_to_type (tree type, tree expr)
23266 {
23267 tree fromtype = TREE_TYPE (expr);
23268 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23269 return NULL_TREE;
23270 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23271 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23272 return convert (type, convert (float_type_node, expr));
23273 return NULL_TREE;
23274 }
23275
23276 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23277 This simply adds HFmode as a supported mode; even though we don't
23278 implement arithmetic on this type directly, it's supported by
23279 optabs conversions, much the way the double-word arithmetic is
23280 special-cased in the default hook. */
23281
23282 static bool
23283 arm_scalar_mode_supported_p (machine_mode mode)
23284 {
23285 if (mode == HFmode)
23286 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23287 else if (ALL_FIXED_POINT_MODE_P (mode))
23288 return true;
23289 else
23290 return default_scalar_mode_supported_p (mode);
23291 }
23292
23293 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23294 void
23295 neon_reinterpret (rtx dest, rtx src)
23296 {
23297 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23298 }
23299
23300 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23301 not to early-clobber SRC registers in the process.
23302
23303 We assume that the operands described by SRC and DEST represent a
23304 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23305 number of components into which the copy has been decomposed. */
23306 void
23307 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23308 {
23309 unsigned int i;
23310
23311 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23312 || REGNO (operands[0]) < REGNO (operands[1]))
23313 {
23314 for (i = 0; i < count; i++)
23315 {
23316 operands[2 * i] = dest[i];
23317 operands[2 * i + 1] = src[i];
23318 }
23319 }
23320 else
23321 {
23322 for (i = 0; i < count; i++)
23323 {
23324 operands[2 * i] = dest[count - i - 1];
23325 operands[2 * i + 1] = src[count - i - 1];
23326 }
23327 }
23328 }
23329
23330 /* Split operands into moves from op[1] + op[2] into op[0]. */
23331
23332 void
23333 neon_split_vcombine (rtx operands[3])
23334 {
23335 unsigned int dest = REGNO (operands[0]);
23336 unsigned int src1 = REGNO (operands[1]);
23337 unsigned int src2 = REGNO (operands[2]);
23338 machine_mode halfmode = GET_MODE (operands[1]);
23339 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23340 rtx destlo, desthi;
23341
23342 if (src1 == dest && src2 == dest + halfregs)
23343 {
23344 /* No-op move. Can't split to nothing; emit something. */
23345 emit_note (NOTE_INSN_DELETED);
23346 return;
23347 }
23348
23349 /* Preserve register attributes for variable tracking. */
23350 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23351 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23352 GET_MODE_SIZE (halfmode));
23353
23354 /* Special case of reversed high/low parts. Use VSWP. */
23355 if (src2 == dest && src1 == dest + halfregs)
23356 {
23357 rtx x = gen_rtx_SET (destlo, operands[1]);
23358 rtx y = gen_rtx_SET (desthi, operands[2]);
23359 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23360 return;
23361 }
23362
23363 if (!reg_overlap_mentioned_p (operands[2], destlo))
23364 {
23365 /* Try to avoid unnecessary moves if part of the result
23366 is in the right place already. */
23367 if (src1 != dest)
23368 emit_move_insn (destlo, operands[1]);
23369 if (src2 != dest + halfregs)
23370 emit_move_insn (desthi, operands[2]);
23371 }
23372 else
23373 {
23374 if (src2 != dest + halfregs)
23375 emit_move_insn (desthi, operands[2]);
23376 if (src1 != dest)
23377 emit_move_insn (destlo, operands[1]);
23378 }
23379 }
23380 \f
23381 /* Return the number (counting from 0) of
23382 the least significant set bit in MASK. */
23383
23384 inline static int
23385 number_of_first_bit_set (unsigned mask)
23386 {
23387 return ctz_hwi (mask);
23388 }
23389
23390 /* Like emit_multi_reg_push, but allowing for a different set of
23391 registers to be described as saved. MASK is the set of registers
23392 to be saved; REAL_REGS is the set of registers to be described as
23393 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23394
23395 static rtx_insn *
23396 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23397 {
23398 unsigned long regno;
23399 rtx par[10], tmp, reg;
23400 rtx_insn *insn;
23401 int i, j;
23402
23403 /* Build the parallel of the registers actually being stored. */
23404 for (i = 0; mask; ++i, mask &= mask - 1)
23405 {
23406 regno = ctz_hwi (mask);
23407 reg = gen_rtx_REG (SImode, regno);
23408
23409 if (i == 0)
23410 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23411 else
23412 tmp = gen_rtx_USE (VOIDmode, reg);
23413
23414 par[i] = tmp;
23415 }
23416
23417 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23418 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23419 tmp = gen_frame_mem (BLKmode, tmp);
23420 tmp = gen_rtx_SET (tmp, par[0]);
23421 par[0] = tmp;
23422
23423 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23424 insn = emit_insn (tmp);
23425
23426 /* Always build the stack adjustment note for unwind info. */
23427 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23428 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23429 par[0] = tmp;
23430
23431 /* Build the parallel of the registers recorded as saved for unwind. */
23432 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23433 {
23434 regno = ctz_hwi (real_regs);
23435 reg = gen_rtx_REG (SImode, regno);
23436
23437 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23438 tmp = gen_frame_mem (SImode, tmp);
23439 tmp = gen_rtx_SET (tmp, reg);
23440 RTX_FRAME_RELATED_P (tmp) = 1;
23441 par[j + 1] = tmp;
23442 }
23443
23444 if (j == 0)
23445 tmp = par[0];
23446 else
23447 {
23448 RTX_FRAME_RELATED_P (par[0]) = 1;
23449 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23450 }
23451
23452 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23453
23454 return insn;
23455 }
23456
23457 /* Emit code to push or pop registers to or from the stack. F is the
23458 assembly file. MASK is the registers to pop. */
23459 static void
23460 thumb_pop (FILE *f, unsigned long mask)
23461 {
23462 int regno;
23463 int lo_mask = mask & 0xFF;
23464 int pushed_words = 0;
23465
23466 gcc_assert (mask);
23467
23468 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23469 {
23470 /* Special case. Do not generate a POP PC statement here, do it in
23471 thumb_exit() */
23472 thumb_exit (f, -1);
23473 return;
23474 }
23475
23476 fprintf (f, "\tpop\t{");
23477
23478 /* Look at the low registers first. */
23479 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23480 {
23481 if (lo_mask & 1)
23482 {
23483 asm_fprintf (f, "%r", regno);
23484
23485 if ((lo_mask & ~1) != 0)
23486 fprintf (f, ", ");
23487
23488 pushed_words++;
23489 }
23490 }
23491
23492 if (mask & (1 << PC_REGNUM))
23493 {
23494 /* Catch popping the PC. */
23495 if (TARGET_INTERWORK || TARGET_BACKTRACE
23496 || crtl->calls_eh_return)
23497 {
23498 /* The PC is never poped directly, instead
23499 it is popped into r3 and then BX is used. */
23500 fprintf (f, "}\n");
23501
23502 thumb_exit (f, -1);
23503
23504 return;
23505 }
23506 else
23507 {
23508 if (mask & 0xFF)
23509 fprintf (f, ", ");
23510
23511 asm_fprintf (f, "%r", PC_REGNUM);
23512 }
23513 }
23514
23515 fprintf (f, "}\n");
23516 }
23517
23518 /* Generate code to return from a thumb function.
23519 If 'reg_containing_return_addr' is -1, then the return address is
23520 actually on the stack, at the stack pointer. */
23521 static void
23522 thumb_exit (FILE *f, int reg_containing_return_addr)
23523 {
23524 unsigned regs_available_for_popping;
23525 unsigned regs_to_pop;
23526 int pops_needed;
23527 unsigned available;
23528 unsigned required;
23529 machine_mode mode;
23530 int size;
23531 int restore_a4 = FALSE;
23532
23533 /* Compute the registers we need to pop. */
23534 regs_to_pop = 0;
23535 pops_needed = 0;
23536
23537 if (reg_containing_return_addr == -1)
23538 {
23539 regs_to_pop |= 1 << LR_REGNUM;
23540 ++pops_needed;
23541 }
23542
23543 if (TARGET_BACKTRACE)
23544 {
23545 /* Restore the (ARM) frame pointer and stack pointer. */
23546 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23547 pops_needed += 2;
23548 }
23549
23550 /* If there is nothing to pop then just emit the BX instruction and
23551 return. */
23552 if (pops_needed == 0)
23553 {
23554 if (crtl->calls_eh_return)
23555 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23556
23557 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23558 return;
23559 }
23560 /* Otherwise if we are not supporting interworking and we have not created
23561 a backtrace structure and the function was not entered in ARM mode then
23562 just pop the return address straight into the PC. */
23563 else if (!TARGET_INTERWORK
23564 && !TARGET_BACKTRACE
23565 && !is_called_in_ARM_mode (current_function_decl)
23566 && !crtl->calls_eh_return)
23567 {
23568 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23569 return;
23570 }
23571
23572 /* Find out how many of the (return) argument registers we can corrupt. */
23573 regs_available_for_popping = 0;
23574
23575 /* If returning via __builtin_eh_return, the bottom three registers
23576 all contain information needed for the return. */
23577 if (crtl->calls_eh_return)
23578 size = 12;
23579 else
23580 {
23581 /* If we can deduce the registers used from the function's
23582 return value. This is more reliable that examining
23583 df_regs_ever_live_p () because that will be set if the register is
23584 ever used in the function, not just if the register is used
23585 to hold a return value. */
23586
23587 if (crtl->return_rtx != 0)
23588 mode = GET_MODE (crtl->return_rtx);
23589 else
23590 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23591
23592 size = GET_MODE_SIZE (mode);
23593
23594 if (size == 0)
23595 {
23596 /* In a void function we can use any argument register.
23597 In a function that returns a structure on the stack
23598 we can use the second and third argument registers. */
23599 if (mode == VOIDmode)
23600 regs_available_for_popping =
23601 (1 << ARG_REGISTER (1))
23602 | (1 << ARG_REGISTER (2))
23603 | (1 << ARG_REGISTER (3));
23604 else
23605 regs_available_for_popping =
23606 (1 << ARG_REGISTER (2))
23607 | (1 << ARG_REGISTER (3));
23608 }
23609 else if (size <= 4)
23610 regs_available_for_popping =
23611 (1 << ARG_REGISTER (2))
23612 | (1 << ARG_REGISTER (3));
23613 else if (size <= 8)
23614 regs_available_for_popping =
23615 (1 << ARG_REGISTER (3));
23616 }
23617
23618 /* Match registers to be popped with registers into which we pop them. */
23619 for (available = regs_available_for_popping,
23620 required = regs_to_pop;
23621 required != 0 && available != 0;
23622 available &= ~(available & - available),
23623 required &= ~(required & - required))
23624 -- pops_needed;
23625
23626 /* If we have any popping registers left over, remove them. */
23627 if (available > 0)
23628 regs_available_for_popping &= ~available;
23629
23630 /* Otherwise if we need another popping register we can use
23631 the fourth argument register. */
23632 else if (pops_needed)
23633 {
23634 /* If we have not found any free argument registers and
23635 reg a4 contains the return address, we must move it. */
23636 if (regs_available_for_popping == 0
23637 && reg_containing_return_addr == LAST_ARG_REGNUM)
23638 {
23639 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23640 reg_containing_return_addr = LR_REGNUM;
23641 }
23642 else if (size > 12)
23643 {
23644 /* Register a4 is being used to hold part of the return value,
23645 but we have dire need of a free, low register. */
23646 restore_a4 = TRUE;
23647
23648 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23649 }
23650
23651 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23652 {
23653 /* The fourth argument register is available. */
23654 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23655
23656 --pops_needed;
23657 }
23658 }
23659
23660 /* Pop as many registers as we can. */
23661 thumb_pop (f, regs_available_for_popping);
23662
23663 /* Process the registers we popped. */
23664 if (reg_containing_return_addr == -1)
23665 {
23666 /* The return address was popped into the lowest numbered register. */
23667 regs_to_pop &= ~(1 << LR_REGNUM);
23668
23669 reg_containing_return_addr =
23670 number_of_first_bit_set (regs_available_for_popping);
23671
23672 /* Remove this register for the mask of available registers, so that
23673 the return address will not be corrupted by further pops. */
23674 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23675 }
23676
23677 /* If we popped other registers then handle them here. */
23678 if (regs_available_for_popping)
23679 {
23680 int frame_pointer;
23681
23682 /* Work out which register currently contains the frame pointer. */
23683 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23684
23685 /* Move it into the correct place. */
23686 asm_fprintf (f, "\tmov\t%r, %r\n",
23687 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23688
23689 /* (Temporarily) remove it from the mask of popped registers. */
23690 regs_available_for_popping &= ~(1 << frame_pointer);
23691 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23692
23693 if (regs_available_for_popping)
23694 {
23695 int stack_pointer;
23696
23697 /* We popped the stack pointer as well,
23698 find the register that contains it. */
23699 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23700
23701 /* Move it into the stack register. */
23702 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23703
23704 /* At this point we have popped all necessary registers, so
23705 do not worry about restoring regs_available_for_popping
23706 to its correct value:
23707
23708 assert (pops_needed == 0)
23709 assert (regs_available_for_popping == (1 << frame_pointer))
23710 assert (regs_to_pop == (1 << STACK_POINTER)) */
23711 }
23712 else
23713 {
23714 /* Since we have just move the popped value into the frame
23715 pointer, the popping register is available for reuse, and
23716 we know that we still have the stack pointer left to pop. */
23717 regs_available_for_popping |= (1 << frame_pointer);
23718 }
23719 }
23720
23721 /* If we still have registers left on the stack, but we no longer have
23722 any registers into which we can pop them, then we must move the return
23723 address into the link register and make available the register that
23724 contained it. */
23725 if (regs_available_for_popping == 0 && pops_needed > 0)
23726 {
23727 regs_available_for_popping |= 1 << reg_containing_return_addr;
23728
23729 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23730 reg_containing_return_addr);
23731
23732 reg_containing_return_addr = LR_REGNUM;
23733 }
23734
23735 /* If we have registers left on the stack then pop some more.
23736 We know that at most we will want to pop FP and SP. */
23737 if (pops_needed > 0)
23738 {
23739 int popped_into;
23740 int move_to;
23741
23742 thumb_pop (f, regs_available_for_popping);
23743
23744 /* We have popped either FP or SP.
23745 Move whichever one it is into the correct register. */
23746 popped_into = number_of_first_bit_set (regs_available_for_popping);
23747 move_to = number_of_first_bit_set (regs_to_pop);
23748
23749 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23750
23751 regs_to_pop &= ~(1 << move_to);
23752
23753 --pops_needed;
23754 }
23755
23756 /* If we still have not popped everything then we must have only
23757 had one register available to us and we are now popping the SP. */
23758 if (pops_needed > 0)
23759 {
23760 int popped_into;
23761
23762 thumb_pop (f, regs_available_for_popping);
23763
23764 popped_into = number_of_first_bit_set (regs_available_for_popping);
23765
23766 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23767 /*
23768 assert (regs_to_pop == (1 << STACK_POINTER))
23769 assert (pops_needed == 1)
23770 */
23771 }
23772
23773 /* If necessary restore the a4 register. */
23774 if (restore_a4)
23775 {
23776 if (reg_containing_return_addr != LR_REGNUM)
23777 {
23778 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23779 reg_containing_return_addr = LR_REGNUM;
23780 }
23781
23782 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23783 }
23784
23785 if (crtl->calls_eh_return)
23786 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23787
23788 /* Return to caller. */
23789 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23790 }
23791 \f
23792 /* Scan INSN just before assembler is output for it.
23793 For Thumb-1, we track the status of the condition codes; this
23794 information is used in the cbranchsi4_insn pattern. */
23795 void
23796 thumb1_final_prescan_insn (rtx_insn *insn)
23797 {
23798 if (flag_print_asm_name)
23799 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23800 INSN_ADDRESSES (INSN_UID (insn)));
23801 /* Don't overwrite the previous setter when we get to a cbranch. */
23802 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23803 {
23804 enum attr_conds conds;
23805
23806 if (cfun->machine->thumb1_cc_insn)
23807 {
23808 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23809 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23810 CC_STATUS_INIT;
23811 }
23812 conds = get_attr_conds (insn);
23813 if (conds == CONDS_SET)
23814 {
23815 rtx set = single_set (insn);
23816 cfun->machine->thumb1_cc_insn = insn;
23817 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23818 cfun->machine->thumb1_cc_op1 = const0_rtx;
23819 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23820 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23821 {
23822 rtx src1 = XEXP (SET_SRC (set), 1);
23823 if (src1 == const0_rtx)
23824 cfun->machine->thumb1_cc_mode = CCmode;
23825 }
23826 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23827 {
23828 /* Record the src register operand instead of dest because
23829 cprop_hardreg pass propagates src. */
23830 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23831 }
23832 }
23833 else if (conds != CONDS_NOCOND)
23834 cfun->machine->thumb1_cc_insn = NULL_RTX;
23835 }
23836
23837 /* Check if unexpected far jump is used. */
23838 if (cfun->machine->lr_save_eliminated
23839 && get_attr_far_jump (insn) == FAR_JUMP_YES)
23840 internal_error("Unexpected thumb1 far jump");
23841 }
23842
23843 int
23844 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23845 {
23846 unsigned HOST_WIDE_INT mask = 0xff;
23847 int i;
23848
23849 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23850 if (val == 0) /* XXX */
23851 return 0;
23852
23853 for (i = 0; i < 25; i++)
23854 if ((val & (mask << i)) == val)
23855 return 1;
23856
23857 return 0;
23858 }
23859
23860 /* Returns nonzero if the current function contains,
23861 or might contain a far jump. */
23862 static int
23863 thumb_far_jump_used_p (void)
23864 {
23865 rtx_insn *insn;
23866 bool far_jump = false;
23867 unsigned int func_size = 0;
23868
23869 /* This test is only important for leaf functions. */
23870 /* assert (!leaf_function_p ()); */
23871
23872 /* If we have already decided that far jumps may be used,
23873 do not bother checking again, and always return true even if
23874 it turns out that they are not being used. Once we have made
23875 the decision that far jumps are present (and that hence the link
23876 register will be pushed onto the stack) we cannot go back on it. */
23877 if (cfun->machine->far_jump_used)
23878 return 1;
23879
23880 /* If this function is not being called from the prologue/epilogue
23881 generation code then it must be being called from the
23882 INITIAL_ELIMINATION_OFFSET macro. */
23883 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23884 {
23885 /* In this case we know that we are being asked about the elimination
23886 of the arg pointer register. If that register is not being used,
23887 then there are no arguments on the stack, and we do not have to
23888 worry that a far jump might force the prologue to push the link
23889 register, changing the stack offsets. In this case we can just
23890 return false, since the presence of far jumps in the function will
23891 not affect stack offsets.
23892
23893 If the arg pointer is live (or if it was live, but has now been
23894 eliminated and so set to dead) then we do have to test to see if
23895 the function might contain a far jump. This test can lead to some
23896 false negatives, since before reload is completed, then length of
23897 branch instructions is not known, so gcc defaults to returning their
23898 longest length, which in turn sets the far jump attribute to true.
23899
23900 A false negative will not result in bad code being generated, but it
23901 will result in a needless push and pop of the link register. We
23902 hope that this does not occur too often.
23903
23904 If we need doubleword stack alignment this could affect the other
23905 elimination offsets so we can't risk getting it wrong. */
23906 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23907 cfun->machine->arg_pointer_live = 1;
23908 else if (!cfun->machine->arg_pointer_live)
23909 return 0;
23910 }
23911
23912 /* We should not change far_jump_used during or after reload, as there is
23913 no chance to change stack frame layout. */
23914 if (reload_in_progress || reload_completed)
23915 return 0;
23916
23917 /* Check to see if the function contains a branch
23918 insn with the far jump attribute set. */
23919 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23920 {
23921 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
23922 {
23923 far_jump = true;
23924 }
23925 func_size += get_attr_length (insn);
23926 }
23927
23928 /* Attribute far_jump will always be true for thumb1 before
23929 shorten_branch pass. So checking far_jump attribute before
23930 shorten_branch isn't much useful.
23931
23932 Following heuristic tries to estimate more accurately if a far jump
23933 may finally be used. The heuristic is very conservative as there is
23934 no chance to roll-back the decision of not to use far jump.
23935
23936 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23937 2-byte insn is associated with a 4 byte constant pool. Using
23938 function size 2048/3 as the threshold is conservative enough. */
23939 if (far_jump)
23940 {
23941 if ((func_size * 3) >= 2048)
23942 {
23943 /* Record the fact that we have decided that
23944 the function does use far jumps. */
23945 cfun->machine->far_jump_used = 1;
23946 return 1;
23947 }
23948 }
23949
23950 return 0;
23951 }
23952
23953 /* Return nonzero if FUNC must be entered in ARM mode. */
23954 static bool
23955 is_called_in_ARM_mode (tree func)
23956 {
23957 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23958
23959 /* Ignore the problem about functions whose address is taken. */
23960 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23961 return true;
23962
23963 #ifdef ARM_PE
23964 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23965 #else
23966 return false;
23967 #endif
23968 }
23969
23970 /* Given the stack offsets and register mask in OFFSETS, decide how
23971 many additional registers to push instead of subtracting a constant
23972 from SP. For epilogues the principle is the same except we use pop.
23973 FOR_PROLOGUE indicates which we're generating. */
23974 static int
23975 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23976 {
23977 HOST_WIDE_INT amount;
23978 unsigned long live_regs_mask = offsets->saved_regs_mask;
23979 /* Extract a mask of the ones we can give to the Thumb's push/pop
23980 instruction. */
23981 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23982 /* Then count how many other high registers will need to be pushed. */
23983 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23984 int n_free, reg_base, size;
23985
23986 if (!for_prologue && frame_pointer_needed)
23987 amount = offsets->locals_base - offsets->saved_regs;
23988 else
23989 amount = offsets->outgoing_args - offsets->saved_regs;
23990
23991 /* If the stack frame size is 512 exactly, we can save one load
23992 instruction, which should make this a win even when optimizing
23993 for speed. */
23994 if (!optimize_size && amount != 512)
23995 return 0;
23996
23997 /* Can't do this if there are high registers to push. */
23998 if (high_regs_pushed != 0)
23999 return 0;
24000
24001 /* Shouldn't do it in the prologue if no registers would normally
24002 be pushed at all. In the epilogue, also allow it if we'll have
24003 a pop insn for the PC. */
24004 if (l_mask == 0
24005 && (for_prologue
24006 || TARGET_BACKTRACE
24007 || (live_regs_mask & 1 << LR_REGNUM) == 0
24008 || TARGET_INTERWORK
24009 || crtl->args.pretend_args_size != 0))
24010 return 0;
24011
24012 /* Don't do this if thumb_expand_prologue wants to emit instructions
24013 between the push and the stack frame allocation. */
24014 if (for_prologue
24015 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24016 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24017 return 0;
24018
24019 reg_base = 0;
24020 n_free = 0;
24021 if (!for_prologue)
24022 {
24023 size = arm_size_return_regs ();
24024 reg_base = ARM_NUM_INTS (size);
24025 live_regs_mask >>= reg_base;
24026 }
24027
24028 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24029 && (for_prologue || call_used_regs[reg_base + n_free]))
24030 {
24031 live_regs_mask >>= 1;
24032 n_free++;
24033 }
24034
24035 if (n_free == 0)
24036 return 0;
24037 gcc_assert (amount / 4 * 4 == amount);
24038
24039 if (amount >= 512 && (amount - n_free * 4) < 512)
24040 return (amount - 508) / 4;
24041 if (amount <= n_free * 4)
24042 return amount / 4;
24043 return 0;
24044 }
24045
24046 /* The bits which aren't usefully expanded as rtl. */
24047 const char *
24048 thumb1_unexpanded_epilogue (void)
24049 {
24050 arm_stack_offsets *offsets;
24051 int regno;
24052 unsigned long live_regs_mask = 0;
24053 int high_regs_pushed = 0;
24054 int extra_pop;
24055 int had_to_push_lr;
24056 int size;
24057
24058 if (cfun->machine->return_used_this_function != 0)
24059 return "";
24060
24061 if (IS_NAKED (arm_current_func_type ()))
24062 return "";
24063
24064 offsets = arm_get_frame_offsets ();
24065 live_regs_mask = offsets->saved_regs_mask;
24066 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24067
24068 /* If we can deduce the registers used from the function's return value.
24069 This is more reliable that examining df_regs_ever_live_p () because that
24070 will be set if the register is ever used in the function, not just if
24071 the register is used to hold a return value. */
24072 size = arm_size_return_regs ();
24073
24074 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24075 if (extra_pop > 0)
24076 {
24077 unsigned long extra_mask = (1 << extra_pop) - 1;
24078 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24079 }
24080
24081 /* The prolog may have pushed some high registers to use as
24082 work registers. e.g. the testsuite file:
24083 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24084 compiles to produce:
24085 push {r4, r5, r6, r7, lr}
24086 mov r7, r9
24087 mov r6, r8
24088 push {r6, r7}
24089 as part of the prolog. We have to undo that pushing here. */
24090
24091 if (high_regs_pushed)
24092 {
24093 unsigned long mask = live_regs_mask & 0xff;
24094 int next_hi_reg;
24095
24096 /* The available low registers depend on the size of the value we are
24097 returning. */
24098 if (size <= 12)
24099 mask |= 1 << 3;
24100 if (size <= 8)
24101 mask |= 1 << 2;
24102
24103 if (mask == 0)
24104 /* Oh dear! We have no low registers into which we can pop
24105 high registers! */
24106 internal_error
24107 ("no low registers available for popping high registers");
24108
24109 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24110 if (live_regs_mask & (1 << next_hi_reg))
24111 break;
24112
24113 while (high_regs_pushed)
24114 {
24115 /* Find lo register(s) into which the high register(s) can
24116 be popped. */
24117 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24118 {
24119 if (mask & (1 << regno))
24120 high_regs_pushed--;
24121 if (high_regs_pushed == 0)
24122 break;
24123 }
24124
24125 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24126
24127 /* Pop the values into the low register(s). */
24128 thumb_pop (asm_out_file, mask);
24129
24130 /* Move the value(s) into the high registers. */
24131 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24132 {
24133 if (mask & (1 << regno))
24134 {
24135 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24136 regno);
24137
24138 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24139 if (live_regs_mask & (1 << next_hi_reg))
24140 break;
24141 }
24142 }
24143 }
24144 live_regs_mask &= ~0x0f00;
24145 }
24146
24147 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24148 live_regs_mask &= 0xff;
24149
24150 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24151 {
24152 /* Pop the return address into the PC. */
24153 if (had_to_push_lr)
24154 live_regs_mask |= 1 << PC_REGNUM;
24155
24156 /* Either no argument registers were pushed or a backtrace
24157 structure was created which includes an adjusted stack
24158 pointer, so just pop everything. */
24159 if (live_regs_mask)
24160 thumb_pop (asm_out_file, live_regs_mask);
24161
24162 /* We have either just popped the return address into the
24163 PC or it is was kept in LR for the entire function.
24164 Note that thumb_pop has already called thumb_exit if the
24165 PC was in the list. */
24166 if (!had_to_push_lr)
24167 thumb_exit (asm_out_file, LR_REGNUM);
24168 }
24169 else
24170 {
24171 /* Pop everything but the return address. */
24172 if (live_regs_mask)
24173 thumb_pop (asm_out_file, live_regs_mask);
24174
24175 if (had_to_push_lr)
24176 {
24177 if (size > 12)
24178 {
24179 /* We have no free low regs, so save one. */
24180 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24181 LAST_ARG_REGNUM);
24182 }
24183
24184 /* Get the return address into a temporary register. */
24185 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24186
24187 if (size > 12)
24188 {
24189 /* Move the return address to lr. */
24190 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24191 LAST_ARG_REGNUM);
24192 /* Restore the low register. */
24193 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24194 IP_REGNUM);
24195 regno = LR_REGNUM;
24196 }
24197 else
24198 regno = LAST_ARG_REGNUM;
24199 }
24200 else
24201 regno = LR_REGNUM;
24202
24203 /* Remove the argument registers that were pushed onto the stack. */
24204 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24205 SP_REGNUM, SP_REGNUM,
24206 crtl->args.pretend_args_size);
24207
24208 thumb_exit (asm_out_file, regno);
24209 }
24210
24211 return "";
24212 }
24213
24214 /* Functions to save and restore machine-specific function data. */
24215 static struct machine_function *
24216 arm_init_machine_status (void)
24217 {
24218 struct machine_function *machine;
24219 machine = ggc_cleared_alloc<machine_function> ();
24220
24221 #if ARM_FT_UNKNOWN != 0
24222 machine->func_type = ARM_FT_UNKNOWN;
24223 #endif
24224 return machine;
24225 }
24226
24227 /* Return an RTX indicating where the return address to the
24228 calling function can be found. */
24229 rtx
24230 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24231 {
24232 if (count != 0)
24233 return NULL_RTX;
24234
24235 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24236 }
24237
24238 /* Do anything needed before RTL is emitted for each function. */
24239 void
24240 arm_init_expanders (void)
24241 {
24242 /* Arrange to initialize and mark the machine per-function status. */
24243 init_machine_status = arm_init_machine_status;
24244
24245 /* This is to stop the combine pass optimizing away the alignment
24246 adjustment of va_arg. */
24247 /* ??? It is claimed that this should not be necessary. */
24248 if (cfun)
24249 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24250 }
24251
24252 /* Check that FUNC is called with a different mode. */
24253
24254 bool
24255 arm_change_mode_p (tree func)
24256 {
24257 if (TREE_CODE (func) != FUNCTION_DECL)
24258 return false;
24259
24260 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24261
24262 if (!callee_tree)
24263 callee_tree = target_option_default_node;
24264
24265 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24266 int flags = callee_opts->x_target_flags;
24267
24268 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24269 }
24270
24271 /* Like arm_compute_initial_elimination offset. Simpler because there
24272 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24273 to point at the base of the local variables after static stack
24274 space for a function has been allocated. */
24275
24276 HOST_WIDE_INT
24277 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24278 {
24279 arm_stack_offsets *offsets;
24280
24281 offsets = arm_get_frame_offsets ();
24282
24283 switch (from)
24284 {
24285 case ARG_POINTER_REGNUM:
24286 switch (to)
24287 {
24288 case STACK_POINTER_REGNUM:
24289 return offsets->outgoing_args - offsets->saved_args;
24290
24291 case FRAME_POINTER_REGNUM:
24292 return offsets->soft_frame - offsets->saved_args;
24293
24294 case ARM_HARD_FRAME_POINTER_REGNUM:
24295 return offsets->saved_regs - offsets->saved_args;
24296
24297 case THUMB_HARD_FRAME_POINTER_REGNUM:
24298 return offsets->locals_base - offsets->saved_args;
24299
24300 default:
24301 gcc_unreachable ();
24302 }
24303 break;
24304
24305 case FRAME_POINTER_REGNUM:
24306 switch (to)
24307 {
24308 case STACK_POINTER_REGNUM:
24309 return offsets->outgoing_args - offsets->soft_frame;
24310
24311 case ARM_HARD_FRAME_POINTER_REGNUM:
24312 return offsets->saved_regs - offsets->soft_frame;
24313
24314 case THUMB_HARD_FRAME_POINTER_REGNUM:
24315 return offsets->locals_base - offsets->soft_frame;
24316
24317 default:
24318 gcc_unreachable ();
24319 }
24320 break;
24321
24322 default:
24323 gcc_unreachable ();
24324 }
24325 }
24326
24327 /* Generate the function's prologue. */
24328
24329 void
24330 thumb1_expand_prologue (void)
24331 {
24332 rtx_insn *insn;
24333
24334 HOST_WIDE_INT amount;
24335 arm_stack_offsets *offsets;
24336 unsigned long func_type;
24337 int regno;
24338 unsigned long live_regs_mask;
24339 unsigned long l_mask;
24340 unsigned high_regs_pushed = 0;
24341
24342 func_type = arm_current_func_type ();
24343
24344 /* Naked functions don't have prologues. */
24345 if (IS_NAKED (func_type))
24346 return;
24347
24348 if (IS_INTERRUPT (func_type))
24349 {
24350 error ("interrupt Service Routines cannot be coded in Thumb mode");
24351 return;
24352 }
24353
24354 if (is_called_in_ARM_mode (current_function_decl))
24355 emit_insn (gen_prologue_thumb1_interwork ());
24356
24357 offsets = arm_get_frame_offsets ();
24358 live_regs_mask = offsets->saved_regs_mask;
24359
24360 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24361 l_mask = live_regs_mask & 0x40ff;
24362 /* Then count how many other high registers will need to be pushed. */
24363 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24364
24365 if (crtl->args.pretend_args_size)
24366 {
24367 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24368
24369 if (cfun->machine->uses_anonymous_args)
24370 {
24371 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24372 unsigned long mask;
24373
24374 mask = 1ul << (LAST_ARG_REGNUM + 1);
24375 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24376
24377 insn = thumb1_emit_multi_reg_push (mask, 0);
24378 }
24379 else
24380 {
24381 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24382 stack_pointer_rtx, x));
24383 }
24384 RTX_FRAME_RELATED_P (insn) = 1;
24385 }
24386
24387 if (TARGET_BACKTRACE)
24388 {
24389 HOST_WIDE_INT offset = 0;
24390 unsigned work_register;
24391 rtx work_reg, x, arm_hfp_rtx;
24392
24393 /* We have been asked to create a stack backtrace structure.
24394 The code looks like this:
24395
24396 0 .align 2
24397 0 func:
24398 0 sub SP, #16 Reserve space for 4 registers.
24399 2 push {R7} Push low registers.
24400 4 add R7, SP, #20 Get the stack pointer before the push.
24401 6 str R7, [SP, #8] Store the stack pointer
24402 (before reserving the space).
24403 8 mov R7, PC Get hold of the start of this code + 12.
24404 10 str R7, [SP, #16] Store it.
24405 12 mov R7, FP Get hold of the current frame pointer.
24406 14 str R7, [SP, #4] Store it.
24407 16 mov R7, LR Get hold of the current return address.
24408 18 str R7, [SP, #12] Store it.
24409 20 add R7, SP, #16 Point at the start of the
24410 backtrace structure.
24411 22 mov FP, R7 Put this value into the frame pointer. */
24412
24413 work_register = thumb_find_work_register (live_regs_mask);
24414 work_reg = gen_rtx_REG (SImode, work_register);
24415 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24416
24417 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24418 stack_pointer_rtx, GEN_INT (-16)));
24419 RTX_FRAME_RELATED_P (insn) = 1;
24420
24421 if (l_mask)
24422 {
24423 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24424 RTX_FRAME_RELATED_P (insn) = 1;
24425
24426 offset = bit_count (l_mask) * UNITS_PER_WORD;
24427 }
24428
24429 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24430 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24431
24432 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24433 x = gen_frame_mem (SImode, x);
24434 emit_move_insn (x, work_reg);
24435
24436 /* Make sure that the instruction fetching the PC is in the right place
24437 to calculate "start of backtrace creation code + 12". */
24438 /* ??? The stores using the common WORK_REG ought to be enough to
24439 prevent the scheduler from doing anything weird. Failing that
24440 we could always move all of the following into an UNSPEC_VOLATILE. */
24441 if (l_mask)
24442 {
24443 x = gen_rtx_REG (SImode, PC_REGNUM);
24444 emit_move_insn (work_reg, x);
24445
24446 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24447 x = gen_frame_mem (SImode, x);
24448 emit_move_insn (x, work_reg);
24449
24450 emit_move_insn (work_reg, arm_hfp_rtx);
24451
24452 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24453 x = gen_frame_mem (SImode, x);
24454 emit_move_insn (x, work_reg);
24455 }
24456 else
24457 {
24458 emit_move_insn (work_reg, arm_hfp_rtx);
24459
24460 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24461 x = gen_frame_mem (SImode, x);
24462 emit_move_insn (x, work_reg);
24463
24464 x = gen_rtx_REG (SImode, PC_REGNUM);
24465 emit_move_insn (work_reg, x);
24466
24467 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24468 x = gen_frame_mem (SImode, x);
24469 emit_move_insn (x, work_reg);
24470 }
24471
24472 x = gen_rtx_REG (SImode, LR_REGNUM);
24473 emit_move_insn (work_reg, x);
24474
24475 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24476 x = gen_frame_mem (SImode, x);
24477 emit_move_insn (x, work_reg);
24478
24479 x = GEN_INT (offset + 12);
24480 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24481
24482 emit_move_insn (arm_hfp_rtx, work_reg);
24483 }
24484 /* Optimization: If we are not pushing any low registers but we are going
24485 to push some high registers then delay our first push. This will just
24486 be a push of LR and we can combine it with the push of the first high
24487 register. */
24488 else if ((l_mask & 0xff) != 0
24489 || (high_regs_pushed == 0 && l_mask))
24490 {
24491 unsigned long mask = l_mask;
24492 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24493 insn = thumb1_emit_multi_reg_push (mask, mask);
24494 RTX_FRAME_RELATED_P (insn) = 1;
24495 }
24496
24497 if (high_regs_pushed)
24498 {
24499 unsigned pushable_regs;
24500 unsigned next_hi_reg;
24501 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24502 : crtl->args.info.nregs;
24503 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24504
24505 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24506 if (live_regs_mask & (1 << next_hi_reg))
24507 break;
24508
24509 /* Here we need to mask out registers used for passing arguments
24510 even if they can be pushed. This is to avoid using them to stash the high
24511 registers. Such kind of stash may clobber the use of arguments. */
24512 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24513
24514 if (pushable_regs == 0)
24515 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24516
24517 while (high_regs_pushed > 0)
24518 {
24519 unsigned long real_regs_mask = 0;
24520
24521 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24522 {
24523 if (pushable_regs & (1 << regno))
24524 {
24525 emit_move_insn (gen_rtx_REG (SImode, regno),
24526 gen_rtx_REG (SImode, next_hi_reg));
24527
24528 high_regs_pushed --;
24529 real_regs_mask |= (1 << next_hi_reg);
24530
24531 if (high_regs_pushed)
24532 {
24533 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24534 next_hi_reg --)
24535 if (live_regs_mask & (1 << next_hi_reg))
24536 break;
24537 }
24538 else
24539 {
24540 pushable_regs &= ~((1 << regno) - 1);
24541 break;
24542 }
24543 }
24544 }
24545
24546 /* If we had to find a work register and we have not yet
24547 saved the LR then add it to the list of regs to push. */
24548 if (l_mask == (1 << LR_REGNUM))
24549 {
24550 pushable_regs |= l_mask;
24551 real_regs_mask |= l_mask;
24552 l_mask = 0;
24553 }
24554
24555 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24556 RTX_FRAME_RELATED_P (insn) = 1;
24557 }
24558 }
24559
24560 /* Load the pic register before setting the frame pointer,
24561 so we can use r7 as a temporary work register. */
24562 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24563 arm_load_pic_register (live_regs_mask);
24564
24565 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24566 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24567 stack_pointer_rtx);
24568
24569 if (flag_stack_usage_info)
24570 current_function_static_stack_size
24571 = offsets->outgoing_args - offsets->saved_args;
24572
24573 amount = offsets->outgoing_args - offsets->saved_regs;
24574 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24575 if (amount)
24576 {
24577 if (amount < 512)
24578 {
24579 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24580 GEN_INT (- amount)));
24581 RTX_FRAME_RELATED_P (insn) = 1;
24582 }
24583 else
24584 {
24585 rtx reg, dwarf;
24586
24587 /* The stack decrement is too big for an immediate value in a single
24588 insn. In theory we could issue multiple subtracts, but after
24589 three of them it becomes more space efficient to place the full
24590 value in the constant pool and load into a register. (Also the
24591 ARM debugger really likes to see only one stack decrement per
24592 function). So instead we look for a scratch register into which
24593 we can load the decrement, and then we subtract this from the
24594 stack pointer. Unfortunately on the thumb the only available
24595 scratch registers are the argument registers, and we cannot use
24596 these as they may hold arguments to the function. Instead we
24597 attempt to locate a call preserved register which is used by this
24598 function. If we can find one, then we know that it will have
24599 been pushed at the start of the prologue and so we can corrupt
24600 it now. */
24601 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24602 if (live_regs_mask & (1 << regno))
24603 break;
24604
24605 gcc_assert(regno <= LAST_LO_REGNUM);
24606
24607 reg = gen_rtx_REG (SImode, regno);
24608
24609 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24610
24611 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24612 stack_pointer_rtx, reg));
24613
24614 dwarf = gen_rtx_SET (stack_pointer_rtx,
24615 plus_constant (Pmode, stack_pointer_rtx,
24616 -amount));
24617 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24618 RTX_FRAME_RELATED_P (insn) = 1;
24619 }
24620 }
24621
24622 if (frame_pointer_needed)
24623 thumb_set_frame_pointer (offsets);
24624
24625 /* If we are profiling, make sure no instructions are scheduled before
24626 the call to mcount. Similarly if the user has requested no
24627 scheduling in the prolog. Similarly if we want non-call exceptions
24628 using the EABI unwinder, to prevent faulting instructions from being
24629 swapped with a stack adjustment. */
24630 if (crtl->profile || !TARGET_SCHED_PROLOG
24631 || (arm_except_unwind_info (&global_options) == UI_TARGET
24632 && cfun->can_throw_non_call_exceptions))
24633 emit_insn (gen_blockage ());
24634
24635 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24636 if (live_regs_mask & 0xff)
24637 cfun->machine->lr_save_eliminated = 0;
24638 }
24639
24640 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24641 POP instruction can be generated. LR should be replaced by PC. All
24642 the checks required are already done by USE_RETURN_INSN (). Hence,
24643 all we really need to check here is if single register is to be
24644 returned, or multiple register return. */
24645 void
24646 thumb2_expand_return (bool simple_return)
24647 {
24648 int i, num_regs;
24649 unsigned long saved_regs_mask;
24650 arm_stack_offsets *offsets;
24651
24652 offsets = arm_get_frame_offsets ();
24653 saved_regs_mask = offsets->saved_regs_mask;
24654
24655 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24656 if (saved_regs_mask & (1 << i))
24657 num_regs++;
24658
24659 if (!simple_return && saved_regs_mask)
24660 {
24661 if (num_regs == 1)
24662 {
24663 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24664 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24665 rtx addr = gen_rtx_MEM (SImode,
24666 gen_rtx_POST_INC (SImode,
24667 stack_pointer_rtx));
24668 set_mem_alias_set (addr, get_frame_alias_set ());
24669 XVECEXP (par, 0, 0) = ret_rtx;
24670 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
24671 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24672 emit_jump_insn (par);
24673 }
24674 else
24675 {
24676 saved_regs_mask &= ~ (1 << LR_REGNUM);
24677 saved_regs_mask |= (1 << PC_REGNUM);
24678 arm_emit_multi_reg_pop (saved_regs_mask);
24679 }
24680 }
24681 else
24682 {
24683 emit_jump_insn (simple_return_rtx);
24684 }
24685 }
24686
24687 void
24688 thumb1_expand_epilogue (void)
24689 {
24690 HOST_WIDE_INT amount;
24691 arm_stack_offsets *offsets;
24692 int regno;
24693
24694 /* Naked functions don't have prologues. */
24695 if (IS_NAKED (arm_current_func_type ()))
24696 return;
24697
24698 offsets = arm_get_frame_offsets ();
24699 amount = offsets->outgoing_args - offsets->saved_regs;
24700
24701 if (frame_pointer_needed)
24702 {
24703 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24704 amount = offsets->locals_base - offsets->saved_regs;
24705 }
24706 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24707
24708 gcc_assert (amount >= 0);
24709 if (amount)
24710 {
24711 emit_insn (gen_blockage ());
24712
24713 if (amount < 512)
24714 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24715 GEN_INT (amount)));
24716 else
24717 {
24718 /* r3 is always free in the epilogue. */
24719 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24720
24721 emit_insn (gen_movsi (reg, GEN_INT (amount)));
24722 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24723 }
24724 }
24725
24726 /* Emit a USE (stack_pointer_rtx), so that
24727 the stack adjustment will not be deleted. */
24728 emit_insn (gen_force_register_use (stack_pointer_rtx));
24729
24730 if (crtl->profile || !TARGET_SCHED_PROLOG)
24731 emit_insn (gen_blockage ());
24732
24733 /* Emit a clobber for each insn that will be restored in the epilogue,
24734 so that flow2 will get register lifetimes correct. */
24735 for (regno = 0; regno < 13; regno++)
24736 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24737 emit_clobber (gen_rtx_REG (SImode, regno));
24738
24739 if (! df_regs_ever_live_p (LR_REGNUM))
24740 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24741 }
24742
24743 /* Epilogue code for APCS frame. */
24744 static void
24745 arm_expand_epilogue_apcs_frame (bool really_return)
24746 {
24747 unsigned long func_type;
24748 unsigned long saved_regs_mask;
24749 int num_regs = 0;
24750 int i;
24751 int floats_from_frame = 0;
24752 arm_stack_offsets *offsets;
24753
24754 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24755 func_type = arm_current_func_type ();
24756
24757 /* Get frame offsets for ARM. */
24758 offsets = arm_get_frame_offsets ();
24759 saved_regs_mask = offsets->saved_regs_mask;
24760
24761 /* Find the offset of the floating-point save area in the frame. */
24762 floats_from_frame
24763 = (offsets->saved_args
24764 + arm_compute_static_chain_stack_bytes ()
24765 - offsets->frame);
24766
24767 /* Compute how many core registers saved and how far away the floats are. */
24768 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24769 if (saved_regs_mask & (1 << i))
24770 {
24771 num_regs++;
24772 floats_from_frame += 4;
24773 }
24774
24775 if (TARGET_HARD_FLOAT && TARGET_VFP)
24776 {
24777 int start_reg;
24778 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24779
24780 /* The offset is from IP_REGNUM. */
24781 int saved_size = arm_get_vfp_saved_size ();
24782 if (saved_size > 0)
24783 {
24784 rtx_insn *insn;
24785 floats_from_frame += saved_size;
24786 insn = emit_insn (gen_addsi3 (ip_rtx,
24787 hard_frame_pointer_rtx,
24788 GEN_INT (-floats_from_frame)));
24789 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24790 ip_rtx, hard_frame_pointer_rtx);
24791 }
24792
24793 /* Generate VFP register multi-pop. */
24794 start_reg = FIRST_VFP_REGNUM;
24795
24796 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24797 /* Look for a case where a reg does not need restoring. */
24798 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24799 && (!df_regs_ever_live_p (i + 1)
24800 || call_used_regs[i + 1]))
24801 {
24802 if (start_reg != i)
24803 arm_emit_vfp_multi_reg_pop (start_reg,
24804 (i - start_reg) / 2,
24805 gen_rtx_REG (SImode,
24806 IP_REGNUM));
24807 start_reg = i + 2;
24808 }
24809
24810 /* Restore the remaining regs that we have discovered (or possibly
24811 even all of them, if the conditional in the for loop never
24812 fired). */
24813 if (start_reg != i)
24814 arm_emit_vfp_multi_reg_pop (start_reg,
24815 (i - start_reg) / 2,
24816 gen_rtx_REG (SImode, IP_REGNUM));
24817 }
24818
24819 if (TARGET_IWMMXT)
24820 {
24821 /* The frame pointer is guaranteed to be non-double-word aligned, as
24822 it is set to double-word-aligned old_stack_pointer - 4. */
24823 rtx_insn *insn;
24824 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24825
24826 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24827 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24828 {
24829 rtx addr = gen_frame_mem (V2SImode,
24830 plus_constant (Pmode, hard_frame_pointer_rtx,
24831 - lrm_count * 4));
24832 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24833 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24834 gen_rtx_REG (V2SImode, i),
24835 NULL_RTX);
24836 lrm_count += 2;
24837 }
24838 }
24839
24840 /* saved_regs_mask should contain IP which contains old stack pointer
24841 at the time of activation creation. Since SP and IP are adjacent registers,
24842 we can restore the value directly into SP. */
24843 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24844 saved_regs_mask &= ~(1 << IP_REGNUM);
24845 saved_regs_mask |= (1 << SP_REGNUM);
24846
24847 /* There are two registers left in saved_regs_mask - LR and PC. We
24848 only need to restore LR (the return address), but to
24849 save time we can load it directly into PC, unless we need a
24850 special function exit sequence, or we are not really returning. */
24851 if (really_return
24852 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24853 && !crtl->calls_eh_return)
24854 /* Delete LR from the register mask, so that LR on
24855 the stack is loaded into the PC in the register mask. */
24856 saved_regs_mask &= ~(1 << LR_REGNUM);
24857 else
24858 saved_regs_mask &= ~(1 << PC_REGNUM);
24859
24860 num_regs = bit_count (saved_regs_mask);
24861 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24862 {
24863 rtx_insn *insn;
24864 emit_insn (gen_blockage ());
24865 /* Unwind the stack to just below the saved registers. */
24866 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24867 hard_frame_pointer_rtx,
24868 GEN_INT (- 4 * num_regs)));
24869
24870 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24871 stack_pointer_rtx, hard_frame_pointer_rtx);
24872 }
24873
24874 arm_emit_multi_reg_pop (saved_regs_mask);
24875
24876 if (IS_INTERRUPT (func_type))
24877 {
24878 /* Interrupt handlers will have pushed the
24879 IP onto the stack, so restore it now. */
24880 rtx_insn *insn;
24881 rtx addr = gen_rtx_MEM (SImode,
24882 gen_rtx_POST_INC (SImode,
24883 stack_pointer_rtx));
24884 set_mem_alias_set (addr, get_frame_alias_set ());
24885 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24886 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24887 gen_rtx_REG (SImode, IP_REGNUM),
24888 NULL_RTX);
24889 }
24890
24891 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24892 return;
24893
24894 if (crtl->calls_eh_return)
24895 emit_insn (gen_addsi3 (stack_pointer_rtx,
24896 stack_pointer_rtx,
24897 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24898
24899 if (IS_STACKALIGN (func_type))
24900 /* Restore the original stack pointer. Before prologue, the stack was
24901 realigned and the original stack pointer saved in r0. For details,
24902 see comment in arm_expand_prologue. */
24903 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
24904
24905 emit_jump_insn (simple_return_rtx);
24906 }
24907
24908 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24909 function is not a sibcall. */
24910 void
24911 arm_expand_epilogue (bool really_return)
24912 {
24913 unsigned long func_type;
24914 unsigned long saved_regs_mask;
24915 int num_regs = 0;
24916 int i;
24917 int amount;
24918 arm_stack_offsets *offsets;
24919
24920 func_type = arm_current_func_type ();
24921
24922 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24923 let output_return_instruction take care of instruction emission if any. */
24924 if (IS_NAKED (func_type)
24925 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24926 {
24927 if (really_return)
24928 emit_jump_insn (simple_return_rtx);
24929 return;
24930 }
24931
24932 /* If we are throwing an exception, then we really must be doing a
24933 return, so we can't tail-call. */
24934 gcc_assert (!crtl->calls_eh_return || really_return);
24935
24936 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24937 {
24938 arm_expand_epilogue_apcs_frame (really_return);
24939 return;
24940 }
24941
24942 /* Get frame offsets for ARM. */
24943 offsets = arm_get_frame_offsets ();
24944 saved_regs_mask = offsets->saved_regs_mask;
24945 num_regs = bit_count (saved_regs_mask);
24946
24947 if (frame_pointer_needed)
24948 {
24949 rtx_insn *insn;
24950 /* Restore stack pointer if necessary. */
24951 if (TARGET_ARM)
24952 {
24953 /* In ARM mode, frame pointer points to first saved register.
24954 Restore stack pointer to last saved register. */
24955 amount = offsets->frame - offsets->saved_regs;
24956
24957 /* Force out any pending memory operations that reference stacked data
24958 before stack de-allocation occurs. */
24959 emit_insn (gen_blockage ());
24960 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24961 hard_frame_pointer_rtx,
24962 GEN_INT (amount)));
24963 arm_add_cfa_adjust_cfa_note (insn, amount,
24964 stack_pointer_rtx,
24965 hard_frame_pointer_rtx);
24966
24967 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24968 deleted. */
24969 emit_insn (gen_force_register_use (stack_pointer_rtx));
24970 }
24971 else
24972 {
24973 /* In Thumb-2 mode, the frame pointer points to the last saved
24974 register. */
24975 amount = offsets->locals_base - offsets->saved_regs;
24976 if (amount)
24977 {
24978 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24979 hard_frame_pointer_rtx,
24980 GEN_INT (amount)));
24981 arm_add_cfa_adjust_cfa_note (insn, amount,
24982 hard_frame_pointer_rtx,
24983 hard_frame_pointer_rtx);
24984 }
24985
24986 /* Force out any pending memory operations that reference stacked data
24987 before stack de-allocation occurs. */
24988 emit_insn (gen_blockage ());
24989 insn = emit_insn (gen_movsi (stack_pointer_rtx,
24990 hard_frame_pointer_rtx));
24991 arm_add_cfa_adjust_cfa_note (insn, 0,
24992 stack_pointer_rtx,
24993 hard_frame_pointer_rtx);
24994 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24995 deleted. */
24996 emit_insn (gen_force_register_use (stack_pointer_rtx));
24997 }
24998 }
24999 else
25000 {
25001 /* Pop off outgoing args and local frame to adjust stack pointer to
25002 last saved register. */
25003 amount = offsets->outgoing_args - offsets->saved_regs;
25004 if (amount)
25005 {
25006 rtx_insn *tmp;
25007 /* Force out any pending memory operations that reference stacked data
25008 before stack de-allocation occurs. */
25009 emit_insn (gen_blockage ());
25010 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25011 stack_pointer_rtx,
25012 GEN_INT (amount)));
25013 arm_add_cfa_adjust_cfa_note (tmp, amount,
25014 stack_pointer_rtx, stack_pointer_rtx);
25015 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25016 not deleted. */
25017 emit_insn (gen_force_register_use (stack_pointer_rtx));
25018 }
25019 }
25020
25021 if (TARGET_HARD_FLOAT && TARGET_VFP)
25022 {
25023 /* Generate VFP register multi-pop. */
25024 int end_reg = LAST_VFP_REGNUM + 1;
25025
25026 /* Scan the registers in reverse order. We need to match
25027 any groupings made in the prologue and generate matching
25028 vldm operations. The need to match groups is because,
25029 unlike pop, vldm can only do consecutive regs. */
25030 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25031 /* Look for a case where a reg does not need restoring. */
25032 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25033 && (!df_regs_ever_live_p (i + 1)
25034 || call_used_regs[i + 1]))
25035 {
25036 /* Restore the regs discovered so far (from reg+2 to
25037 end_reg). */
25038 if (end_reg > i + 2)
25039 arm_emit_vfp_multi_reg_pop (i + 2,
25040 (end_reg - (i + 2)) / 2,
25041 stack_pointer_rtx);
25042 end_reg = i;
25043 }
25044
25045 /* Restore the remaining regs that we have discovered (or possibly
25046 even all of them, if the conditional in the for loop never
25047 fired). */
25048 if (end_reg > i + 2)
25049 arm_emit_vfp_multi_reg_pop (i + 2,
25050 (end_reg - (i + 2)) / 2,
25051 stack_pointer_rtx);
25052 }
25053
25054 if (TARGET_IWMMXT)
25055 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25056 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25057 {
25058 rtx_insn *insn;
25059 rtx addr = gen_rtx_MEM (V2SImode,
25060 gen_rtx_POST_INC (SImode,
25061 stack_pointer_rtx));
25062 set_mem_alias_set (addr, get_frame_alias_set ());
25063 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25064 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25065 gen_rtx_REG (V2SImode, i),
25066 NULL_RTX);
25067 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25068 stack_pointer_rtx, stack_pointer_rtx);
25069 }
25070
25071 if (saved_regs_mask)
25072 {
25073 rtx insn;
25074 bool return_in_pc = false;
25075
25076 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25077 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25078 && !IS_STACKALIGN (func_type)
25079 && really_return
25080 && crtl->args.pretend_args_size == 0
25081 && saved_regs_mask & (1 << LR_REGNUM)
25082 && !crtl->calls_eh_return)
25083 {
25084 saved_regs_mask &= ~(1 << LR_REGNUM);
25085 saved_regs_mask |= (1 << PC_REGNUM);
25086 return_in_pc = true;
25087 }
25088
25089 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25090 {
25091 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25092 if (saved_regs_mask & (1 << i))
25093 {
25094 rtx addr = gen_rtx_MEM (SImode,
25095 gen_rtx_POST_INC (SImode,
25096 stack_pointer_rtx));
25097 set_mem_alias_set (addr, get_frame_alias_set ());
25098
25099 if (i == PC_REGNUM)
25100 {
25101 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25102 XVECEXP (insn, 0, 0) = ret_rtx;
25103 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25104 addr);
25105 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25106 insn = emit_jump_insn (insn);
25107 }
25108 else
25109 {
25110 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25111 addr));
25112 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25113 gen_rtx_REG (SImode, i),
25114 NULL_RTX);
25115 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25116 stack_pointer_rtx,
25117 stack_pointer_rtx);
25118 }
25119 }
25120 }
25121 else
25122 {
25123 if (TARGET_LDRD
25124 && current_tune->prefer_ldrd_strd
25125 && !optimize_function_for_size_p (cfun))
25126 {
25127 if (TARGET_THUMB2)
25128 thumb2_emit_ldrd_pop (saved_regs_mask);
25129 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25130 arm_emit_ldrd_pop (saved_regs_mask);
25131 else
25132 arm_emit_multi_reg_pop (saved_regs_mask);
25133 }
25134 else
25135 arm_emit_multi_reg_pop (saved_regs_mask);
25136 }
25137
25138 if (return_in_pc)
25139 return;
25140 }
25141
25142 if (crtl->args.pretend_args_size)
25143 {
25144 int i, j;
25145 rtx dwarf = NULL_RTX;
25146 rtx_insn *tmp =
25147 emit_insn (gen_addsi3 (stack_pointer_rtx,
25148 stack_pointer_rtx,
25149 GEN_INT (crtl->args.pretend_args_size)));
25150
25151 RTX_FRAME_RELATED_P (tmp) = 1;
25152
25153 if (cfun->machine->uses_anonymous_args)
25154 {
25155 /* Restore pretend args. Refer arm_expand_prologue on how to save
25156 pretend_args in stack. */
25157 int num_regs = crtl->args.pretend_args_size / 4;
25158 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25159 for (j = 0, i = 0; j < num_regs; i++)
25160 if (saved_regs_mask & (1 << i))
25161 {
25162 rtx reg = gen_rtx_REG (SImode, i);
25163 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25164 j++;
25165 }
25166 REG_NOTES (tmp) = dwarf;
25167 }
25168 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25169 stack_pointer_rtx, stack_pointer_rtx);
25170 }
25171
25172 if (!really_return)
25173 return;
25174
25175 if (crtl->calls_eh_return)
25176 emit_insn (gen_addsi3 (stack_pointer_rtx,
25177 stack_pointer_rtx,
25178 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25179
25180 if (IS_STACKALIGN (func_type))
25181 /* Restore the original stack pointer. Before prologue, the stack was
25182 realigned and the original stack pointer saved in r0. For details,
25183 see comment in arm_expand_prologue. */
25184 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25185
25186 emit_jump_insn (simple_return_rtx);
25187 }
25188
25189 /* Implementation of insn prologue_thumb1_interwork. This is the first
25190 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25191
25192 const char *
25193 thumb1_output_interwork (void)
25194 {
25195 const char * name;
25196 FILE *f = asm_out_file;
25197
25198 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25199 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25200 == SYMBOL_REF);
25201 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25202
25203 /* Generate code sequence to switch us into Thumb mode. */
25204 /* The .code 32 directive has already been emitted by
25205 ASM_DECLARE_FUNCTION_NAME. */
25206 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25207 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25208
25209 /* Generate a label, so that the debugger will notice the
25210 change in instruction sets. This label is also used by
25211 the assembler to bypass the ARM code when this function
25212 is called from a Thumb encoded function elsewhere in the
25213 same file. Hence the definition of STUB_NAME here must
25214 agree with the definition in gas/config/tc-arm.c. */
25215
25216 #define STUB_NAME ".real_start_of"
25217
25218 fprintf (f, "\t.code\t16\n");
25219 #ifdef ARM_PE
25220 if (arm_dllexport_name_p (name))
25221 name = arm_strip_name_encoding (name);
25222 #endif
25223 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25224 fprintf (f, "\t.thumb_func\n");
25225 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25226
25227 return "";
25228 }
25229
25230 /* Handle the case of a double word load into a low register from
25231 a computed memory address. The computed address may involve a
25232 register which is overwritten by the load. */
25233 const char *
25234 thumb_load_double_from_address (rtx *operands)
25235 {
25236 rtx addr;
25237 rtx base;
25238 rtx offset;
25239 rtx arg1;
25240 rtx arg2;
25241
25242 gcc_assert (REG_P (operands[0]));
25243 gcc_assert (MEM_P (operands[1]));
25244
25245 /* Get the memory address. */
25246 addr = XEXP (operands[1], 0);
25247
25248 /* Work out how the memory address is computed. */
25249 switch (GET_CODE (addr))
25250 {
25251 case REG:
25252 operands[2] = adjust_address (operands[1], SImode, 4);
25253
25254 if (REGNO (operands[0]) == REGNO (addr))
25255 {
25256 output_asm_insn ("ldr\t%H0, %2", operands);
25257 output_asm_insn ("ldr\t%0, %1", operands);
25258 }
25259 else
25260 {
25261 output_asm_insn ("ldr\t%0, %1", operands);
25262 output_asm_insn ("ldr\t%H0, %2", operands);
25263 }
25264 break;
25265
25266 case CONST:
25267 /* Compute <address> + 4 for the high order load. */
25268 operands[2] = adjust_address (operands[1], SImode, 4);
25269
25270 output_asm_insn ("ldr\t%0, %1", operands);
25271 output_asm_insn ("ldr\t%H0, %2", operands);
25272 break;
25273
25274 case PLUS:
25275 arg1 = XEXP (addr, 0);
25276 arg2 = XEXP (addr, 1);
25277
25278 if (CONSTANT_P (arg1))
25279 base = arg2, offset = arg1;
25280 else
25281 base = arg1, offset = arg2;
25282
25283 gcc_assert (REG_P (base));
25284
25285 /* Catch the case of <address> = <reg> + <reg> */
25286 if (REG_P (offset))
25287 {
25288 int reg_offset = REGNO (offset);
25289 int reg_base = REGNO (base);
25290 int reg_dest = REGNO (operands[0]);
25291
25292 /* Add the base and offset registers together into the
25293 higher destination register. */
25294 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25295 reg_dest + 1, reg_base, reg_offset);
25296
25297 /* Load the lower destination register from the address in
25298 the higher destination register. */
25299 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25300 reg_dest, reg_dest + 1);
25301
25302 /* Load the higher destination register from its own address
25303 plus 4. */
25304 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25305 reg_dest + 1, reg_dest + 1);
25306 }
25307 else
25308 {
25309 /* Compute <address> + 4 for the high order load. */
25310 operands[2] = adjust_address (operands[1], SImode, 4);
25311
25312 /* If the computed address is held in the low order register
25313 then load the high order register first, otherwise always
25314 load the low order register first. */
25315 if (REGNO (operands[0]) == REGNO (base))
25316 {
25317 output_asm_insn ("ldr\t%H0, %2", operands);
25318 output_asm_insn ("ldr\t%0, %1", operands);
25319 }
25320 else
25321 {
25322 output_asm_insn ("ldr\t%0, %1", operands);
25323 output_asm_insn ("ldr\t%H0, %2", operands);
25324 }
25325 }
25326 break;
25327
25328 case LABEL_REF:
25329 /* With no registers to worry about we can just load the value
25330 directly. */
25331 operands[2] = adjust_address (operands[1], SImode, 4);
25332
25333 output_asm_insn ("ldr\t%H0, %2", operands);
25334 output_asm_insn ("ldr\t%0, %1", operands);
25335 break;
25336
25337 default:
25338 gcc_unreachable ();
25339 }
25340
25341 return "";
25342 }
25343
25344 const char *
25345 thumb_output_move_mem_multiple (int n, rtx *operands)
25346 {
25347 rtx tmp;
25348
25349 switch (n)
25350 {
25351 case 2:
25352 if (REGNO (operands[4]) > REGNO (operands[5]))
25353 {
25354 tmp = operands[4];
25355 operands[4] = operands[5];
25356 operands[5] = tmp;
25357 }
25358 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25359 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25360 break;
25361
25362 case 3:
25363 if (REGNO (operands[4]) > REGNO (operands[5]))
25364 std::swap (operands[4], operands[5]);
25365 if (REGNO (operands[5]) > REGNO (operands[6]))
25366 std::swap (operands[5], operands[6]);
25367 if (REGNO (operands[4]) > REGNO (operands[5]))
25368 std::swap (operands[4], operands[5]);
25369
25370 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25371 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25372 break;
25373
25374 default:
25375 gcc_unreachable ();
25376 }
25377
25378 return "";
25379 }
25380
25381 /* Output a call-via instruction for thumb state. */
25382 const char *
25383 thumb_call_via_reg (rtx reg)
25384 {
25385 int regno = REGNO (reg);
25386 rtx *labelp;
25387
25388 gcc_assert (regno < LR_REGNUM);
25389
25390 /* If we are in the normal text section we can use a single instance
25391 per compilation unit. If we are doing function sections, then we need
25392 an entry per section, since we can't rely on reachability. */
25393 if (in_section == text_section)
25394 {
25395 thumb_call_reg_needed = 1;
25396
25397 if (thumb_call_via_label[regno] == NULL)
25398 thumb_call_via_label[regno] = gen_label_rtx ();
25399 labelp = thumb_call_via_label + regno;
25400 }
25401 else
25402 {
25403 if (cfun->machine->call_via[regno] == NULL)
25404 cfun->machine->call_via[regno] = gen_label_rtx ();
25405 labelp = cfun->machine->call_via + regno;
25406 }
25407
25408 output_asm_insn ("bl\t%a0", labelp);
25409 return "";
25410 }
25411
25412 /* Routines for generating rtl. */
25413 void
25414 thumb_expand_movmemqi (rtx *operands)
25415 {
25416 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25417 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25418 HOST_WIDE_INT len = INTVAL (operands[2]);
25419 HOST_WIDE_INT offset = 0;
25420
25421 while (len >= 12)
25422 {
25423 emit_insn (gen_movmem12b (out, in, out, in));
25424 len -= 12;
25425 }
25426
25427 if (len >= 8)
25428 {
25429 emit_insn (gen_movmem8b (out, in, out, in));
25430 len -= 8;
25431 }
25432
25433 if (len >= 4)
25434 {
25435 rtx reg = gen_reg_rtx (SImode);
25436 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25437 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25438 len -= 4;
25439 offset += 4;
25440 }
25441
25442 if (len >= 2)
25443 {
25444 rtx reg = gen_reg_rtx (HImode);
25445 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25446 plus_constant (Pmode, in,
25447 offset))));
25448 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25449 offset)),
25450 reg));
25451 len -= 2;
25452 offset += 2;
25453 }
25454
25455 if (len)
25456 {
25457 rtx reg = gen_reg_rtx (QImode);
25458 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25459 plus_constant (Pmode, in,
25460 offset))));
25461 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25462 offset)),
25463 reg));
25464 }
25465 }
25466
25467 void
25468 thumb_reload_out_hi (rtx *operands)
25469 {
25470 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25471 }
25472
25473 /* Handle reading a half-word from memory during reload. */
25474 void
25475 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25476 {
25477 gcc_unreachable ();
25478 }
25479
25480 /* Return the length of a function name prefix
25481 that starts with the character 'c'. */
25482 static int
25483 arm_get_strip_length (int c)
25484 {
25485 switch (c)
25486 {
25487 ARM_NAME_ENCODING_LENGTHS
25488 default: return 0;
25489 }
25490 }
25491
25492 /* Return a pointer to a function's name with any
25493 and all prefix encodings stripped from it. */
25494 const char *
25495 arm_strip_name_encoding (const char *name)
25496 {
25497 int skip;
25498
25499 while ((skip = arm_get_strip_length (* name)))
25500 name += skip;
25501
25502 return name;
25503 }
25504
25505 /* If there is a '*' anywhere in the name's prefix, then
25506 emit the stripped name verbatim, otherwise prepend an
25507 underscore if leading underscores are being used. */
25508 void
25509 arm_asm_output_labelref (FILE *stream, const char *name)
25510 {
25511 int skip;
25512 int verbatim = 0;
25513
25514 while ((skip = arm_get_strip_length (* name)))
25515 {
25516 verbatim |= (*name == '*');
25517 name += skip;
25518 }
25519
25520 if (verbatim)
25521 fputs (name, stream);
25522 else
25523 asm_fprintf (stream, "%U%s", name);
25524 }
25525
25526 /* This function is used to emit an EABI tag and its associated value.
25527 We emit the numerical value of the tag in case the assembler does not
25528 support textual tags. (Eg gas prior to 2.20). If requested we include
25529 the tag name in a comment so that anyone reading the assembler output
25530 will know which tag is being set.
25531
25532 This function is not static because arm-c.c needs it too. */
25533
25534 void
25535 arm_emit_eabi_attribute (const char *name, int num, int val)
25536 {
25537 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25538 if (flag_verbose_asm || flag_debug_asm)
25539 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25540 asm_fprintf (asm_out_file, "\n");
25541 }
25542
25543 /* This function is used to print CPU tuning information as comment
25544 in assembler file. Pointers are not printed for now. */
25545
25546 void
25547 arm_print_tune_info (void)
25548 {
25549 asm_fprintf (asm_out_file, "\t@.tune parameters\n");
25550 asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
25551 current_tune->constant_limit);
25552 asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
25553 current_tune->max_insns_skipped);
25554 asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n",
25555 current_tune->prefetch.num_slots);
25556 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n",
25557 current_tune->prefetch.l1_cache_size);
25558 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25559 current_tune->prefetch.l1_cache_line_size);
25560 asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
25561 (int) current_tune->prefer_constant_pool);
25562 asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25563 asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
25564 asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
25565 current_tune->branch_cost (false, false));
25566 asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
25567 current_tune->branch_cost (false, true));
25568 asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
25569 current_tune->branch_cost (true, false));
25570 asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
25571 current_tune->branch_cost (true, true));
25572 asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
25573 (int) current_tune->prefer_ldrd_strd);
25574 asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25575 (int) current_tune->logical_op_non_short_circuit_thumb,
25576 (int) current_tune->logical_op_non_short_circuit_arm);
25577 asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
25578 (int) current_tune->prefer_neon_for_64bits);
25579 asm_fprintf (asm_out_file,
25580 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25581 (int) current_tune->disparage_flag_setting_t16_encodings);
25582 asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
25583 (int) current_tune->string_ops_prefer_neon);
25584 asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
25585 current_tune->max_insns_inline_memset);
25586 asm_fprintf (asm_out_file, "\t\t@fusible_ops:\t%u\n",
25587 current_tune->fusible_ops);
25588 asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
25589 (int) current_tune->sched_autopref);
25590 }
25591
25592 static void
25593 arm_file_start (void)
25594 {
25595 int val;
25596
25597 if (TARGET_BPABI)
25598 {
25599 const char *fpu_name;
25600 if (arm_selected_arch)
25601 {
25602 /* armv7ve doesn't support any extensions. */
25603 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25604 {
25605 /* Keep backward compatability for assemblers
25606 which don't support armv7ve. */
25607 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25608 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25609 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25610 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25611 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25612 }
25613 else
25614 {
25615 const char* pos = strchr (arm_selected_arch->name, '+');
25616 if (pos)
25617 {
25618 char buf[15];
25619 gcc_assert (strlen (arm_selected_arch->name)
25620 <= sizeof (buf) / sizeof (*pos));
25621 strncpy (buf, arm_selected_arch->name,
25622 (pos - arm_selected_arch->name) * sizeof (*pos));
25623 buf[pos - arm_selected_arch->name] = '\0';
25624 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25625 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25626 }
25627 else
25628 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25629 }
25630 }
25631 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25632 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25633 else
25634 {
25635 const char* truncated_name
25636 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25637 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25638 }
25639
25640 if (print_tune_info)
25641 arm_print_tune_info ();
25642
25643 if (TARGET_SOFT_FLOAT)
25644 {
25645 fpu_name = "softvfp";
25646 }
25647 else
25648 {
25649 fpu_name = arm_fpu_desc->name;
25650 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25651 {
25652 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
25653 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25654
25655 if (TARGET_HARD_FLOAT_ABI)
25656 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25657 }
25658 }
25659 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25660
25661 /* Some of these attributes only apply when the corresponding features
25662 are used. However we don't have any easy way of figuring this out.
25663 Conservatively record the setting that would have been used. */
25664
25665 if (flag_rounding_math)
25666 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25667
25668 if (!flag_unsafe_math_optimizations)
25669 {
25670 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25671 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25672 }
25673 if (flag_signaling_nans)
25674 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25675
25676 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25677 flag_finite_math_only ? 1 : 3);
25678
25679 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25680 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25681 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25682 flag_short_enums ? 1 : 2);
25683
25684 /* Tag_ABI_optimization_goals. */
25685 if (optimize_size)
25686 val = 4;
25687 else if (optimize >= 2)
25688 val = 2;
25689 else if (optimize)
25690 val = 1;
25691 else
25692 val = 6;
25693 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25694
25695 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25696 unaligned_access);
25697
25698 if (arm_fp16_format)
25699 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25700 (int) arm_fp16_format);
25701
25702 if (arm_lang_output_object_attributes_hook)
25703 arm_lang_output_object_attributes_hook();
25704 }
25705
25706 default_file_start ();
25707 }
25708
25709 static void
25710 arm_file_end (void)
25711 {
25712 int regno;
25713
25714 if (NEED_INDICATE_EXEC_STACK)
25715 /* Add .note.GNU-stack. */
25716 file_end_indicate_exec_stack ();
25717
25718 if (! thumb_call_reg_needed)
25719 return;
25720
25721 switch_to_section (text_section);
25722 asm_fprintf (asm_out_file, "\t.code 16\n");
25723 ASM_OUTPUT_ALIGN (asm_out_file, 1);
25724
25725 for (regno = 0; regno < LR_REGNUM; regno++)
25726 {
25727 rtx label = thumb_call_via_label[regno];
25728
25729 if (label != 0)
25730 {
25731 targetm.asm_out.internal_label (asm_out_file, "L",
25732 CODE_LABEL_NUMBER (label));
25733 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25734 }
25735 }
25736 }
25737
25738 #ifndef ARM_PE
25739 /* Symbols in the text segment can be accessed without indirecting via the
25740 constant pool; it may take an extra binary operation, but this is still
25741 faster than indirecting via memory. Don't do this when not optimizing,
25742 since we won't be calculating al of the offsets necessary to do this
25743 simplification. */
25744
25745 static void
25746 arm_encode_section_info (tree decl, rtx rtl, int first)
25747 {
25748 if (optimize > 0 && TREE_CONSTANT (decl))
25749 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25750
25751 default_encode_section_info (decl, rtl, first);
25752 }
25753 #endif /* !ARM_PE */
25754
25755 static void
25756 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25757 {
25758 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25759 && !strcmp (prefix, "L"))
25760 {
25761 arm_ccfsm_state = 0;
25762 arm_target_insn = NULL;
25763 }
25764 default_internal_label (stream, prefix, labelno);
25765 }
25766
25767 /* Output code to add DELTA to the first argument, and then jump
25768 to FUNCTION. Used for C++ multiple inheritance. */
25769 static void
25770 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25771 HOST_WIDE_INT delta,
25772 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25773 tree function)
25774 {
25775 static int thunk_label = 0;
25776 char label[256];
25777 char labelpc[256];
25778 int mi_delta = delta;
25779 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25780 int shift = 0;
25781 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25782 ? 1 : 0);
25783 if (mi_delta < 0)
25784 mi_delta = - mi_delta;
25785
25786 final_start_function (emit_barrier (), file, 1);
25787
25788 if (TARGET_THUMB1)
25789 {
25790 int labelno = thunk_label++;
25791 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25792 /* Thunks are entered in arm mode when avaiable. */
25793 if (TARGET_THUMB1_ONLY)
25794 {
25795 /* push r3 so we can use it as a temporary. */
25796 /* TODO: Omit this save if r3 is not used. */
25797 fputs ("\tpush {r3}\n", file);
25798 fputs ("\tldr\tr3, ", file);
25799 }
25800 else
25801 {
25802 fputs ("\tldr\tr12, ", file);
25803 }
25804 assemble_name (file, label);
25805 fputc ('\n', file);
25806 if (flag_pic)
25807 {
25808 /* If we are generating PIC, the ldr instruction below loads
25809 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25810 the address of the add + 8, so we have:
25811
25812 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25813 = target + 1.
25814
25815 Note that we have "+ 1" because some versions of GNU ld
25816 don't set the low bit of the result for R_ARM_REL32
25817 relocations against thumb function symbols.
25818 On ARMv6M this is +4, not +8. */
25819 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25820 assemble_name (file, labelpc);
25821 fputs (":\n", file);
25822 if (TARGET_THUMB1_ONLY)
25823 {
25824 /* This is 2 insns after the start of the thunk, so we know it
25825 is 4-byte aligned. */
25826 fputs ("\tadd\tr3, pc, r3\n", file);
25827 fputs ("\tmov r12, r3\n", file);
25828 }
25829 else
25830 fputs ("\tadd\tr12, pc, r12\n", file);
25831 }
25832 else if (TARGET_THUMB1_ONLY)
25833 fputs ("\tmov r12, r3\n", file);
25834 }
25835 if (TARGET_THUMB1_ONLY)
25836 {
25837 if (mi_delta > 255)
25838 {
25839 fputs ("\tldr\tr3, ", file);
25840 assemble_name (file, label);
25841 fputs ("+4\n", file);
25842 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25843 mi_op, this_regno, this_regno);
25844 }
25845 else if (mi_delta != 0)
25846 {
25847 /* Thumb1 unified syntax requires s suffix in instruction name when
25848 one of the operands is immediate. */
25849 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25850 mi_op, this_regno, this_regno,
25851 mi_delta);
25852 }
25853 }
25854 else
25855 {
25856 /* TODO: Use movw/movt for large constants when available. */
25857 while (mi_delta != 0)
25858 {
25859 if ((mi_delta & (3 << shift)) == 0)
25860 shift += 2;
25861 else
25862 {
25863 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25864 mi_op, this_regno, this_regno,
25865 mi_delta & (0xff << shift));
25866 mi_delta &= ~(0xff << shift);
25867 shift += 8;
25868 }
25869 }
25870 }
25871 if (TARGET_THUMB1)
25872 {
25873 if (TARGET_THUMB1_ONLY)
25874 fputs ("\tpop\t{r3}\n", file);
25875
25876 fprintf (file, "\tbx\tr12\n");
25877 ASM_OUTPUT_ALIGN (file, 2);
25878 assemble_name (file, label);
25879 fputs (":\n", file);
25880 if (flag_pic)
25881 {
25882 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25883 rtx tem = XEXP (DECL_RTL (function), 0);
25884 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25885 pipeline offset is four rather than eight. Adjust the offset
25886 accordingly. */
25887 tem = plus_constant (GET_MODE (tem), tem,
25888 TARGET_THUMB1_ONLY ? -3 : -7);
25889 tem = gen_rtx_MINUS (GET_MODE (tem),
25890 tem,
25891 gen_rtx_SYMBOL_REF (Pmode,
25892 ggc_strdup (labelpc)));
25893 assemble_integer (tem, 4, BITS_PER_WORD, 1);
25894 }
25895 else
25896 /* Output ".word .LTHUNKn". */
25897 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25898
25899 if (TARGET_THUMB1_ONLY && mi_delta > 255)
25900 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25901 }
25902 else
25903 {
25904 fputs ("\tb\t", file);
25905 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25906 if (NEED_PLT_RELOC)
25907 fputs ("(PLT)", file);
25908 fputc ('\n', file);
25909 }
25910
25911 final_end_function ();
25912 }
25913
25914 int
25915 arm_emit_vector_const (FILE *file, rtx x)
25916 {
25917 int i;
25918 const char * pattern;
25919
25920 gcc_assert (GET_CODE (x) == CONST_VECTOR);
25921
25922 switch (GET_MODE (x))
25923 {
25924 case V2SImode: pattern = "%08x"; break;
25925 case V4HImode: pattern = "%04x"; break;
25926 case V8QImode: pattern = "%02x"; break;
25927 default: gcc_unreachable ();
25928 }
25929
25930 fprintf (file, "0x");
25931 for (i = CONST_VECTOR_NUNITS (x); i--;)
25932 {
25933 rtx element;
25934
25935 element = CONST_VECTOR_ELT (x, i);
25936 fprintf (file, pattern, INTVAL (element));
25937 }
25938
25939 return 1;
25940 }
25941
25942 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25943 HFmode constant pool entries are actually loaded with ldr. */
25944 void
25945 arm_emit_fp16_const (rtx c)
25946 {
25947 REAL_VALUE_TYPE r;
25948 long bits;
25949
25950 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25951 bits = real_to_target (NULL, &r, HFmode);
25952 if (WORDS_BIG_ENDIAN)
25953 assemble_zeros (2);
25954 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25955 if (!WORDS_BIG_ENDIAN)
25956 assemble_zeros (2);
25957 }
25958
25959 const char *
25960 arm_output_load_gr (rtx *operands)
25961 {
25962 rtx reg;
25963 rtx offset;
25964 rtx wcgr;
25965 rtx sum;
25966
25967 if (!MEM_P (operands [1])
25968 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25969 || !REG_P (reg = XEXP (sum, 0))
25970 || !CONST_INT_P (offset = XEXP (sum, 1))
25971 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25972 return "wldrw%?\t%0, %1";
25973
25974 /* Fix up an out-of-range load of a GR register. */
25975 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25976 wcgr = operands[0];
25977 operands[0] = reg;
25978 output_asm_insn ("ldr%?\t%0, %1", operands);
25979
25980 operands[0] = wcgr;
25981 operands[1] = reg;
25982 output_asm_insn ("tmcr%?\t%0, %1", operands);
25983 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25984
25985 return "";
25986 }
25987
25988 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25989
25990 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25991 named arg and all anonymous args onto the stack.
25992 XXX I know the prologue shouldn't be pushing registers, but it is faster
25993 that way. */
25994
25995 static void
25996 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25997 machine_mode mode,
25998 tree type,
25999 int *pretend_size,
26000 int second_time ATTRIBUTE_UNUSED)
26001 {
26002 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26003 int nregs;
26004
26005 cfun->machine->uses_anonymous_args = 1;
26006 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26007 {
26008 nregs = pcum->aapcs_ncrn;
26009 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
26010 nregs++;
26011 }
26012 else
26013 nregs = pcum->nregs;
26014
26015 if (nregs < NUM_ARG_REGS)
26016 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26017 }
26018
26019 /* We can't rely on the caller doing the proper promotion when
26020 using APCS or ATPCS. */
26021
26022 static bool
26023 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26024 {
26025 return !TARGET_AAPCS_BASED;
26026 }
26027
26028 static machine_mode
26029 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26030 machine_mode mode,
26031 int *punsignedp ATTRIBUTE_UNUSED,
26032 const_tree fntype ATTRIBUTE_UNUSED,
26033 int for_return ATTRIBUTE_UNUSED)
26034 {
26035 if (GET_MODE_CLASS (mode) == MODE_INT
26036 && GET_MODE_SIZE (mode) < 4)
26037 return SImode;
26038
26039 return mode;
26040 }
26041
26042 /* AAPCS based ABIs use short enums by default. */
26043
26044 static bool
26045 arm_default_short_enums (void)
26046 {
26047 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26048 }
26049
26050
26051 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26052
26053 static bool
26054 arm_align_anon_bitfield (void)
26055 {
26056 return TARGET_AAPCS_BASED;
26057 }
26058
26059
26060 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26061
26062 static tree
26063 arm_cxx_guard_type (void)
26064 {
26065 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26066 }
26067
26068
26069 /* The EABI says test the least significant bit of a guard variable. */
26070
26071 static bool
26072 arm_cxx_guard_mask_bit (void)
26073 {
26074 return TARGET_AAPCS_BASED;
26075 }
26076
26077
26078 /* The EABI specifies that all array cookies are 8 bytes long. */
26079
26080 static tree
26081 arm_get_cookie_size (tree type)
26082 {
26083 tree size;
26084
26085 if (!TARGET_AAPCS_BASED)
26086 return default_cxx_get_cookie_size (type);
26087
26088 size = build_int_cst (sizetype, 8);
26089 return size;
26090 }
26091
26092
26093 /* The EABI says that array cookies should also contain the element size. */
26094
26095 static bool
26096 arm_cookie_has_size (void)
26097 {
26098 return TARGET_AAPCS_BASED;
26099 }
26100
26101
26102 /* The EABI says constructors and destructors should return a pointer to
26103 the object constructed/destroyed. */
26104
26105 static bool
26106 arm_cxx_cdtor_returns_this (void)
26107 {
26108 return TARGET_AAPCS_BASED;
26109 }
26110
26111 /* The EABI says that an inline function may never be the key
26112 method. */
26113
26114 static bool
26115 arm_cxx_key_method_may_be_inline (void)
26116 {
26117 return !TARGET_AAPCS_BASED;
26118 }
26119
26120 static void
26121 arm_cxx_determine_class_data_visibility (tree decl)
26122 {
26123 if (!TARGET_AAPCS_BASED
26124 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26125 return;
26126
26127 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26128 is exported. However, on systems without dynamic vague linkage,
26129 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26130 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26131 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26132 else
26133 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26134 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26135 }
26136
26137 static bool
26138 arm_cxx_class_data_always_comdat (void)
26139 {
26140 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26141 vague linkage if the class has no key function. */
26142 return !TARGET_AAPCS_BASED;
26143 }
26144
26145
26146 /* The EABI says __aeabi_atexit should be used to register static
26147 destructors. */
26148
26149 static bool
26150 arm_cxx_use_aeabi_atexit (void)
26151 {
26152 return TARGET_AAPCS_BASED;
26153 }
26154
26155
26156 void
26157 arm_set_return_address (rtx source, rtx scratch)
26158 {
26159 arm_stack_offsets *offsets;
26160 HOST_WIDE_INT delta;
26161 rtx addr;
26162 unsigned long saved_regs;
26163
26164 offsets = arm_get_frame_offsets ();
26165 saved_regs = offsets->saved_regs_mask;
26166
26167 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26168 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26169 else
26170 {
26171 if (frame_pointer_needed)
26172 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26173 else
26174 {
26175 /* LR will be the first saved register. */
26176 delta = offsets->outgoing_args - (offsets->frame + 4);
26177
26178
26179 if (delta >= 4096)
26180 {
26181 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26182 GEN_INT (delta & ~4095)));
26183 addr = scratch;
26184 delta &= 4095;
26185 }
26186 else
26187 addr = stack_pointer_rtx;
26188
26189 addr = plus_constant (Pmode, addr, delta);
26190 }
26191 /* The store needs to be marked as frame related in order to prevent
26192 DSE from deleting it as dead if it is based on fp. */
26193 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26194 RTX_FRAME_RELATED_P (insn) = 1;
26195 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26196 }
26197 }
26198
26199
26200 void
26201 thumb_set_return_address (rtx source, rtx scratch)
26202 {
26203 arm_stack_offsets *offsets;
26204 HOST_WIDE_INT delta;
26205 HOST_WIDE_INT limit;
26206 int reg;
26207 rtx addr;
26208 unsigned long mask;
26209
26210 emit_use (source);
26211
26212 offsets = arm_get_frame_offsets ();
26213 mask = offsets->saved_regs_mask;
26214 if (mask & (1 << LR_REGNUM))
26215 {
26216 limit = 1024;
26217 /* Find the saved regs. */
26218 if (frame_pointer_needed)
26219 {
26220 delta = offsets->soft_frame - offsets->saved_args;
26221 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26222 if (TARGET_THUMB1)
26223 limit = 128;
26224 }
26225 else
26226 {
26227 delta = offsets->outgoing_args - offsets->saved_args;
26228 reg = SP_REGNUM;
26229 }
26230 /* Allow for the stack frame. */
26231 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26232 delta -= 16;
26233 /* The link register is always the first saved register. */
26234 delta -= 4;
26235
26236 /* Construct the address. */
26237 addr = gen_rtx_REG (SImode, reg);
26238 if (delta > limit)
26239 {
26240 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26241 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26242 addr = scratch;
26243 }
26244 else
26245 addr = plus_constant (Pmode, addr, delta);
26246
26247 /* The store needs to be marked as frame related in order to prevent
26248 DSE from deleting it as dead if it is based on fp. */
26249 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26250 RTX_FRAME_RELATED_P (insn) = 1;
26251 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26252 }
26253 else
26254 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26255 }
26256
26257 /* Implements target hook vector_mode_supported_p. */
26258 bool
26259 arm_vector_mode_supported_p (machine_mode mode)
26260 {
26261 /* Neon also supports V2SImode, etc. listed in the clause below. */
26262 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26263 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26264 return true;
26265
26266 if ((TARGET_NEON || TARGET_IWMMXT)
26267 && ((mode == V2SImode)
26268 || (mode == V4HImode)
26269 || (mode == V8QImode)))
26270 return true;
26271
26272 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26273 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26274 || mode == V2HAmode))
26275 return true;
26276
26277 return false;
26278 }
26279
26280 /* Implements target hook array_mode_supported_p. */
26281
26282 static bool
26283 arm_array_mode_supported_p (machine_mode mode,
26284 unsigned HOST_WIDE_INT nelems)
26285 {
26286 if (TARGET_NEON
26287 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26288 && (nelems >= 2 && nelems <= 4))
26289 return true;
26290
26291 return false;
26292 }
26293
26294 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26295 registers when autovectorizing for Neon, at least until multiple vector
26296 widths are supported properly by the middle-end. */
26297
26298 static machine_mode
26299 arm_preferred_simd_mode (machine_mode mode)
26300 {
26301 if (TARGET_NEON)
26302 switch (mode)
26303 {
26304 case SFmode:
26305 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26306 case SImode:
26307 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26308 case HImode:
26309 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26310 case QImode:
26311 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26312 case DImode:
26313 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26314 return V2DImode;
26315 break;
26316
26317 default:;
26318 }
26319
26320 if (TARGET_REALLY_IWMMXT)
26321 switch (mode)
26322 {
26323 case SImode:
26324 return V2SImode;
26325 case HImode:
26326 return V4HImode;
26327 case QImode:
26328 return V8QImode;
26329
26330 default:;
26331 }
26332
26333 return word_mode;
26334 }
26335
26336 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26337
26338 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26339 using r0-r4 for function arguments, r7 for the stack frame and don't have
26340 enough left over to do doubleword arithmetic. For Thumb-2 all the
26341 potentially problematic instructions accept high registers so this is not
26342 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26343 that require many low registers. */
26344 static bool
26345 arm_class_likely_spilled_p (reg_class_t rclass)
26346 {
26347 if ((TARGET_THUMB1 && rclass == LO_REGS)
26348 || rclass == CC_REG)
26349 return true;
26350
26351 return false;
26352 }
26353
26354 /* Implements target hook small_register_classes_for_mode_p. */
26355 bool
26356 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26357 {
26358 return TARGET_THUMB1;
26359 }
26360
26361 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26362 ARM insns and therefore guarantee that the shift count is modulo 256.
26363 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26364 guarantee no particular behavior for out-of-range counts. */
26365
26366 static unsigned HOST_WIDE_INT
26367 arm_shift_truncation_mask (machine_mode mode)
26368 {
26369 return mode == SImode ? 255 : 0;
26370 }
26371
26372
26373 /* Map internal gcc register numbers to DWARF2 register numbers. */
26374
26375 unsigned int
26376 arm_dbx_register_number (unsigned int regno)
26377 {
26378 if (regno < 16)
26379 return regno;
26380
26381 if (IS_VFP_REGNUM (regno))
26382 {
26383 /* See comment in arm_dwarf_register_span. */
26384 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26385 return 64 + regno - FIRST_VFP_REGNUM;
26386 else
26387 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26388 }
26389
26390 if (IS_IWMMXT_GR_REGNUM (regno))
26391 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26392
26393 if (IS_IWMMXT_REGNUM (regno))
26394 return 112 + regno - FIRST_IWMMXT_REGNUM;
26395
26396 return DWARF_FRAME_REGISTERS;
26397 }
26398
26399 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26400 GCC models tham as 64 32-bit registers, so we need to describe this to
26401 the DWARF generation code. Other registers can use the default. */
26402 static rtx
26403 arm_dwarf_register_span (rtx rtl)
26404 {
26405 machine_mode mode;
26406 unsigned regno;
26407 rtx parts[16];
26408 int nregs;
26409 int i;
26410
26411 regno = REGNO (rtl);
26412 if (!IS_VFP_REGNUM (regno))
26413 return NULL_RTX;
26414
26415 /* XXX FIXME: The EABI defines two VFP register ranges:
26416 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26417 256-287: D0-D31
26418 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26419 corresponding D register. Until GDB supports this, we shall use the
26420 legacy encodings. We also use these encodings for D0-D15 for
26421 compatibility with older debuggers. */
26422 mode = GET_MODE (rtl);
26423 if (GET_MODE_SIZE (mode) < 8)
26424 return NULL_RTX;
26425
26426 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26427 {
26428 nregs = GET_MODE_SIZE (mode) / 4;
26429 for (i = 0; i < nregs; i += 2)
26430 if (TARGET_BIG_END)
26431 {
26432 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26433 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26434 }
26435 else
26436 {
26437 parts[i] = gen_rtx_REG (SImode, regno + i);
26438 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26439 }
26440 }
26441 else
26442 {
26443 nregs = GET_MODE_SIZE (mode) / 8;
26444 for (i = 0; i < nregs; i++)
26445 parts[i] = gen_rtx_REG (DImode, regno + i);
26446 }
26447
26448 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26449 }
26450
26451 #if ARM_UNWIND_INFO
26452 /* Emit unwind directives for a store-multiple instruction or stack pointer
26453 push during alignment.
26454 These should only ever be generated by the function prologue code, so
26455 expect them to have a particular form.
26456 The store-multiple instruction sometimes pushes pc as the last register,
26457 although it should not be tracked into unwind information, or for -Os
26458 sometimes pushes some dummy registers before first register that needs
26459 to be tracked in unwind information; such dummy registers are there just
26460 to avoid separate stack adjustment, and will not be restored in the
26461 epilogue. */
26462
26463 static void
26464 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26465 {
26466 int i;
26467 HOST_WIDE_INT offset;
26468 HOST_WIDE_INT nregs;
26469 int reg_size;
26470 unsigned reg;
26471 unsigned lastreg;
26472 unsigned padfirst = 0, padlast = 0;
26473 rtx e;
26474
26475 e = XVECEXP (p, 0, 0);
26476 gcc_assert (GET_CODE (e) == SET);
26477
26478 /* First insn will adjust the stack pointer. */
26479 gcc_assert (GET_CODE (e) == SET
26480 && REG_P (SET_DEST (e))
26481 && REGNO (SET_DEST (e)) == SP_REGNUM
26482 && GET_CODE (SET_SRC (e)) == PLUS);
26483
26484 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26485 nregs = XVECLEN (p, 0) - 1;
26486 gcc_assert (nregs);
26487
26488 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26489 if (reg < 16)
26490 {
26491 /* For -Os dummy registers can be pushed at the beginning to
26492 avoid separate stack pointer adjustment. */
26493 e = XVECEXP (p, 0, 1);
26494 e = XEXP (SET_DEST (e), 0);
26495 if (GET_CODE (e) == PLUS)
26496 padfirst = INTVAL (XEXP (e, 1));
26497 gcc_assert (padfirst == 0 || optimize_size);
26498 /* The function prologue may also push pc, but not annotate it as it is
26499 never restored. We turn this into a stack pointer adjustment. */
26500 e = XVECEXP (p, 0, nregs);
26501 e = XEXP (SET_DEST (e), 0);
26502 if (GET_CODE (e) == PLUS)
26503 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26504 else
26505 padlast = offset - 4;
26506 gcc_assert (padlast == 0 || padlast == 4);
26507 if (padlast == 4)
26508 fprintf (asm_out_file, "\t.pad #4\n");
26509 reg_size = 4;
26510 fprintf (asm_out_file, "\t.save {");
26511 }
26512 else if (IS_VFP_REGNUM (reg))
26513 {
26514 reg_size = 8;
26515 fprintf (asm_out_file, "\t.vsave {");
26516 }
26517 else
26518 /* Unknown register type. */
26519 gcc_unreachable ();
26520
26521 /* If the stack increment doesn't match the size of the saved registers,
26522 something has gone horribly wrong. */
26523 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26524
26525 offset = padfirst;
26526 lastreg = 0;
26527 /* The remaining insns will describe the stores. */
26528 for (i = 1; i <= nregs; i++)
26529 {
26530 /* Expect (set (mem <addr>) (reg)).
26531 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26532 e = XVECEXP (p, 0, i);
26533 gcc_assert (GET_CODE (e) == SET
26534 && MEM_P (SET_DEST (e))
26535 && REG_P (SET_SRC (e)));
26536
26537 reg = REGNO (SET_SRC (e));
26538 gcc_assert (reg >= lastreg);
26539
26540 if (i != 1)
26541 fprintf (asm_out_file, ", ");
26542 /* We can't use %r for vfp because we need to use the
26543 double precision register names. */
26544 if (IS_VFP_REGNUM (reg))
26545 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26546 else
26547 asm_fprintf (asm_out_file, "%r", reg);
26548
26549 #ifdef ENABLE_CHECKING
26550 /* Check that the addresses are consecutive. */
26551 e = XEXP (SET_DEST (e), 0);
26552 if (GET_CODE (e) == PLUS)
26553 gcc_assert (REG_P (XEXP (e, 0))
26554 && REGNO (XEXP (e, 0)) == SP_REGNUM
26555 && CONST_INT_P (XEXP (e, 1))
26556 && offset == INTVAL (XEXP (e, 1)));
26557 else
26558 gcc_assert (i == 1
26559 && REG_P (e)
26560 && REGNO (e) == SP_REGNUM);
26561 offset += reg_size;
26562 #endif
26563 }
26564 fprintf (asm_out_file, "}\n");
26565 if (padfirst)
26566 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26567 }
26568
26569 /* Emit unwind directives for a SET. */
26570
26571 static void
26572 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26573 {
26574 rtx e0;
26575 rtx e1;
26576 unsigned reg;
26577
26578 e0 = XEXP (p, 0);
26579 e1 = XEXP (p, 1);
26580 switch (GET_CODE (e0))
26581 {
26582 case MEM:
26583 /* Pushing a single register. */
26584 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26585 || !REG_P (XEXP (XEXP (e0, 0), 0))
26586 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26587 abort ();
26588
26589 asm_fprintf (asm_out_file, "\t.save ");
26590 if (IS_VFP_REGNUM (REGNO (e1)))
26591 asm_fprintf(asm_out_file, "{d%d}\n",
26592 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26593 else
26594 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26595 break;
26596
26597 case REG:
26598 if (REGNO (e0) == SP_REGNUM)
26599 {
26600 /* A stack increment. */
26601 if (GET_CODE (e1) != PLUS
26602 || !REG_P (XEXP (e1, 0))
26603 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26604 || !CONST_INT_P (XEXP (e1, 1)))
26605 abort ();
26606
26607 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26608 -INTVAL (XEXP (e1, 1)));
26609 }
26610 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26611 {
26612 HOST_WIDE_INT offset;
26613
26614 if (GET_CODE (e1) == PLUS)
26615 {
26616 if (!REG_P (XEXP (e1, 0))
26617 || !CONST_INT_P (XEXP (e1, 1)))
26618 abort ();
26619 reg = REGNO (XEXP (e1, 0));
26620 offset = INTVAL (XEXP (e1, 1));
26621 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26622 HARD_FRAME_POINTER_REGNUM, reg,
26623 offset);
26624 }
26625 else if (REG_P (e1))
26626 {
26627 reg = REGNO (e1);
26628 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26629 HARD_FRAME_POINTER_REGNUM, reg);
26630 }
26631 else
26632 abort ();
26633 }
26634 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26635 {
26636 /* Move from sp to reg. */
26637 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26638 }
26639 else if (GET_CODE (e1) == PLUS
26640 && REG_P (XEXP (e1, 0))
26641 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26642 && CONST_INT_P (XEXP (e1, 1)))
26643 {
26644 /* Set reg to offset from sp. */
26645 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26646 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26647 }
26648 else
26649 abort ();
26650 break;
26651
26652 default:
26653 abort ();
26654 }
26655 }
26656
26657
26658 /* Emit unwind directives for the given insn. */
26659
26660 static void
26661 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26662 {
26663 rtx note, pat;
26664 bool handled_one = false;
26665
26666 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26667 return;
26668
26669 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26670 && (TREE_NOTHROW (current_function_decl)
26671 || crtl->all_throwers_are_sibcalls))
26672 return;
26673
26674 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26675 return;
26676
26677 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26678 {
26679 switch (REG_NOTE_KIND (note))
26680 {
26681 case REG_FRAME_RELATED_EXPR:
26682 pat = XEXP (note, 0);
26683 goto found;
26684
26685 case REG_CFA_REGISTER:
26686 pat = XEXP (note, 0);
26687 if (pat == NULL)
26688 {
26689 pat = PATTERN (insn);
26690 if (GET_CODE (pat) == PARALLEL)
26691 pat = XVECEXP (pat, 0, 0);
26692 }
26693
26694 /* Only emitted for IS_STACKALIGN re-alignment. */
26695 {
26696 rtx dest, src;
26697 unsigned reg;
26698
26699 src = SET_SRC (pat);
26700 dest = SET_DEST (pat);
26701
26702 gcc_assert (src == stack_pointer_rtx);
26703 reg = REGNO (dest);
26704 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26705 reg + 0x90, reg);
26706 }
26707 handled_one = true;
26708 break;
26709
26710 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26711 to get correct dwarf information for shrink-wrap. We should not
26712 emit unwind information for it because these are used either for
26713 pretend arguments or notes to adjust sp and restore registers from
26714 stack. */
26715 case REG_CFA_DEF_CFA:
26716 case REG_CFA_ADJUST_CFA:
26717 case REG_CFA_RESTORE:
26718 return;
26719
26720 case REG_CFA_EXPRESSION:
26721 case REG_CFA_OFFSET:
26722 /* ??? Only handling here what we actually emit. */
26723 gcc_unreachable ();
26724
26725 default:
26726 break;
26727 }
26728 }
26729 if (handled_one)
26730 return;
26731 pat = PATTERN (insn);
26732 found:
26733
26734 switch (GET_CODE (pat))
26735 {
26736 case SET:
26737 arm_unwind_emit_set (asm_out_file, pat);
26738 break;
26739
26740 case SEQUENCE:
26741 /* Store multiple. */
26742 arm_unwind_emit_sequence (asm_out_file, pat);
26743 break;
26744
26745 default:
26746 abort();
26747 }
26748 }
26749
26750
26751 /* Output a reference from a function exception table to the type_info
26752 object X. The EABI specifies that the symbol should be relocated by
26753 an R_ARM_TARGET2 relocation. */
26754
26755 static bool
26756 arm_output_ttype (rtx x)
26757 {
26758 fputs ("\t.word\t", asm_out_file);
26759 output_addr_const (asm_out_file, x);
26760 /* Use special relocations for symbol references. */
26761 if (!CONST_INT_P (x))
26762 fputs ("(TARGET2)", asm_out_file);
26763 fputc ('\n', asm_out_file);
26764
26765 return TRUE;
26766 }
26767
26768 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26769
26770 static void
26771 arm_asm_emit_except_personality (rtx personality)
26772 {
26773 fputs ("\t.personality\t", asm_out_file);
26774 output_addr_const (asm_out_file, personality);
26775 fputc ('\n', asm_out_file);
26776 }
26777
26778 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26779
26780 static void
26781 arm_asm_init_sections (void)
26782 {
26783 exception_section = get_unnamed_section (0, output_section_asm_op,
26784 "\t.handlerdata");
26785 }
26786 #endif /* ARM_UNWIND_INFO */
26787
26788 /* Output unwind directives for the start/end of a function. */
26789
26790 void
26791 arm_output_fn_unwind (FILE * f, bool prologue)
26792 {
26793 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26794 return;
26795
26796 if (prologue)
26797 fputs ("\t.fnstart\n", f);
26798 else
26799 {
26800 /* If this function will never be unwound, then mark it as such.
26801 The came condition is used in arm_unwind_emit to suppress
26802 the frame annotations. */
26803 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26804 && (TREE_NOTHROW (current_function_decl)
26805 || crtl->all_throwers_are_sibcalls))
26806 fputs("\t.cantunwind\n", f);
26807
26808 fputs ("\t.fnend\n", f);
26809 }
26810 }
26811
26812 static bool
26813 arm_emit_tls_decoration (FILE *fp, rtx x)
26814 {
26815 enum tls_reloc reloc;
26816 rtx val;
26817
26818 val = XVECEXP (x, 0, 0);
26819 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26820
26821 output_addr_const (fp, val);
26822
26823 switch (reloc)
26824 {
26825 case TLS_GD32:
26826 fputs ("(tlsgd)", fp);
26827 break;
26828 case TLS_LDM32:
26829 fputs ("(tlsldm)", fp);
26830 break;
26831 case TLS_LDO32:
26832 fputs ("(tlsldo)", fp);
26833 break;
26834 case TLS_IE32:
26835 fputs ("(gottpoff)", fp);
26836 break;
26837 case TLS_LE32:
26838 fputs ("(tpoff)", fp);
26839 break;
26840 case TLS_DESCSEQ:
26841 fputs ("(tlsdesc)", fp);
26842 break;
26843 default:
26844 gcc_unreachable ();
26845 }
26846
26847 switch (reloc)
26848 {
26849 case TLS_GD32:
26850 case TLS_LDM32:
26851 case TLS_IE32:
26852 case TLS_DESCSEQ:
26853 fputs (" + (. - ", fp);
26854 output_addr_const (fp, XVECEXP (x, 0, 2));
26855 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26856 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26857 output_addr_const (fp, XVECEXP (x, 0, 3));
26858 fputc (')', fp);
26859 break;
26860 default:
26861 break;
26862 }
26863
26864 return TRUE;
26865 }
26866
26867 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26868
26869 static void
26870 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26871 {
26872 gcc_assert (size == 4);
26873 fputs ("\t.word\t", file);
26874 output_addr_const (file, x);
26875 fputs ("(tlsldo)", file);
26876 }
26877
26878 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26879
26880 static bool
26881 arm_output_addr_const_extra (FILE *fp, rtx x)
26882 {
26883 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26884 return arm_emit_tls_decoration (fp, x);
26885 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26886 {
26887 char label[256];
26888 int labelno = INTVAL (XVECEXP (x, 0, 0));
26889
26890 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26891 assemble_name_raw (fp, label);
26892
26893 return TRUE;
26894 }
26895 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26896 {
26897 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26898 if (GOT_PCREL)
26899 fputs ("+.", fp);
26900 fputs ("-(", fp);
26901 output_addr_const (fp, XVECEXP (x, 0, 0));
26902 fputc (')', fp);
26903 return TRUE;
26904 }
26905 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26906 {
26907 output_addr_const (fp, XVECEXP (x, 0, 0));
26908 if (GOT_PCREL)
26909 fputs ("+.", fp);
26910 fputs ("-(", fp);
26911 output_addr_const (fp, XVECEXP (x, 0, 1));
26912 fputc (')', fp);
26913 return TRUE;
26914 }
26915 else if (GET_CODE (x) == CONST_VECTOR)
26916 return arm_emit_vector_const (fp, x);
26917
26918 return FALSE;
26919 }
26920
26921 /* Output assembly for a shift instruction.
26922 SET_FLAGS determines how the instruction modifies the condition codes.
26923 0 - Do not set condition codes.
26924 1 - Set condition codes.
26925 2 - Use smallest instruction. */
26926 const char *
26927 arm_output_shift(rtx * operands, int set_flags)
26928 {
26929 char pattern[100];
26930 static const char flag_chars[3] = {'?', '.', '!'};
26931 const char *shift;
26932 HOST_WIDE_INT val;
26933 char c;
26934
26935 c = flag_chars[set_flags];
26936 if (TARGET_UNIFIED_ASM)
26937 {
26938 shift = shift_op(operands[3], &val);
26939 if (shift)
26940 {
26941 if (val != -1)
26942 operands[2] = GEN_INT(val);
26943 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26944 }
26945 else
26946 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26947 }
26948 else
26949 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26950 output_asm_insn (pattern, operands);
26951 return "";
26952 }
26953
26954 /* Output assembly for a WMMX immediate shift instruction. */
26955 const char *
26956 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26957 {
26958 int shift = INTVAL (operands[2]);
26959 char templ[50];
26960 machine_mode opmode = GET_MODE (operands[0]);
26961
26962 gcc_assert (shift >= 0);
26963
26964 /* If the shift value in the register versions is > 63 (for D qualifier),
26965 31 (for W qualifier) or 15 (for H qualifier). */
26966 if (((opmode == V4HImode) && (shift > 15))
26967 || ((opmode == V2SImode) && (shift > 31))
26968 || ((opmode == DImode) && (shift > 63)))
26969 {
26970 if (wror_or_wsra)
26971 {
26972 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26973 output_asm_insn (templ, operands);
26974 if (opmode == DImode)
26975 {
26976 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26977 output_asm_insn (templ, operands);
26978 }
26979 }
26980 else
26981 {
26982 /* The destination register will contain all zeros. */
26983 sprintf (templ, "wzero\t%%0");
26984 output_asm_insn (templ, operands);
26985 }
26986 return "";
26987 }
26988
26989 if ((opmode == DImode) && (shift > 32))
26990 {
26991 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26992 output_asm_insn (templ, operands);
26993 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26994 output_asm_insn (templ, operands);
26995 }
26996 else
26997 {
26998 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26999 output_asm_insn (templ, operands);
27000 }
27001 return "";
27002 }
27003
27004 /* Output assembly for a WMMX tinsr instruction. */
27005 const char *
27006 arm_output_iwmmxt_tinsr (rtx *operands)
27007 {
27008 int mask = INTVAL (operands[3]);
27009 int i;
27010 char templ[50];
27011 int units = mode_nunits[GET_MODE (operands[0])];
27012 gcc_assert ((mask & (mask - 1)) == 0);
27013 for (i = 0; i < units; ++i)
27014 {
27015 if ((mask & 0x01) == 1)
27016 {
27017 break;
27018 }
27019 mask >>= 1;
27020 }
27021 gcc_assert (i < units);
27022 {
27023 switch (GET_MODE (operands[0]))
27024 {
27025 case V8QImode:
27026 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27027 break;
27028 case V4HImode:
27029 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27030 break;
27031 case V2SImode:
27032 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27033 break;
27034 default:
27035 gcc_unreachable ();
27036 break;
27037 }
27038 output_asm_insn (templ, operands);
27039 }
27040 return "";
27041 }
27042
27043 /* Output a Thumb-1 casesi dispatch sequence. */
27044 const char *
27045 thumb1_output_casesi (rtx *operands)
27046 {
27047 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27048
27049 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27050
27051 switch (GET_MODE(diff_vec))
27052 {
27053 case QImode:
27054 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27055 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27056 case HImode:
27057 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27058 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27059 case SImode:
27060 return "bl\t%___gnu_thumb1_case_si";
27061 default:
27062 gcc_unreachable ();
27063 }
27064 }
27065
27066 /* Output a Thumb-2 casesi instruction. */
27067 const char *
27068 thumb2_output_casesi (rtx *operands)
27069 {
27070 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27071
27072 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27073
27074 output_asm_insn ("cmp\t%0, %1", operands);
27075 output_asm_insn ("bhi\t%l3", operands);
27076 switch (GET_MODE(diff_vec))
27077 {
27078 case QImode:
27079 return "tbb\t[%|pc, %0]";
27080 case HImode:
27081 return "tbh\t[%|pc, %0, lsl #1]";
27082 case SImode:
27083 if (flag_pic)
27084 {
27085 output_asm_insn ("adr\t%4, %l2", operands);
27086 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27087 output_asm_insn ("add\t%4, %4, %5", operands);
27088 return "bx\t%4";
27089 }
27090 else
27091 {
27092 output_asm_insn ("adr\t%4, %l2", operands);
27093 return "ldr\t%|pc, [%4, %0, lsl #2]";
27094 }
27095 default:
27096 gcc_unreachable ();
27097 }
27098 }
27099
27100 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27101 per-core tuning structs. */
27102 static int
27103 arm_issue_rate (void)
27104 {
27105 return current_tune->issue_rate;
27106 }
27107
27108 /* Return how many instructions should scheduler lookahead to choose the
27109 best one. */
27110 static int
27111 arm_first_cycle_multipass_dfa_lookahead (void)
27112 {
27113 int issue_rate = arm_issue_rate ();
27114
27115 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27116 }
27117
27118 /* Enable modeling of L2 auto-prefetcher. */
27119 static int
27120 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27121 {
27122 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27123 }
27124
27125 const char *
27126 arm_mangle_type (const_tree type)
27127 {
27128 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27129 has to be managled as if it is in the "std" namespace. */
27130 if (TARGET_AAPCS_BASED
27131 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27132 return "St9__va_list";
27133
27134 /* Half-precision float. */
27135 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27136 return "Dh";
27137
27138 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27139 builtin type. */
27140 if (TYPE_NAME (type) != NULL)
27141 return arm_mangle_builtin_type (type);
27142
27143 /* Use the default mangling. */
27144 return NULL;
27145 }
27146
27147 /* Order of allocation of core registers for Thumb: this allocation is
27148 written over the corresponding initial entries of the array
27149 initialized with REG_ALLOC_ORDER. We allocate all low registers
27150 first. Saving and restoring a low register is usually cheaper than
27151 using a call-clobbered high register. */
27152
27153 static const int thumb_core_reg_alloc_order[] =
27154 {
27155 3, 2, 1, 0, 4, 5, 6, 7,
27156 14, 12, 8, 9, 10, 11
27157 };
27158
27159 /* Adjust register allocation order when compiling for Thumb. */
27160
27161 void
27162 arm_order_regs_for_local_alloc (void)
27163 {
27164 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27165 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27166 if (TARGET_THUMB)
27167 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27168 sizeof (thumb_core_reg_alloc_order));
27169 }
27170
27171 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27172
27173 bool
27174 arm_frame_pointer_required (void)
27175 {
27176 return (cfun->has_nonlocal_label
27177 || SUBTARGET_FRAME_POINTER_REQUIRED
27178 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27179 }
27180
27181 /* Only thumb1 can't support conditional execution, so return true if
27182 the target is not thumb1. */
27183 static bool
27184 arm_have_conditional_execution (void)
27185 {
27186 return !TARGET_THUMB1;
27187 }
27188
27189 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27190 static HOST_WIDE_INT
27191 arm_vector_alignment (const_tree type)
27192 {
27193 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27194
27195 if (TARGET_AAPCS_BASED)
27196 align = MIN (align, 64);
27197
27198 return align;
27199 }
27200
27201 static unsigned int
27202 arm_autovectorize_vector_sizes (void)
27203 {
27204 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27205 }
27206
27207 static bool
27208 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27209 {
27210 /* Vectors which aren't in packed structures will not be less aligned than
27211 the natural alignment of their element type, so this is safe. */
27212 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27213 return !is_packed;
27214
27215 return default_builtin_vector_alignment_reachable (type, is_packed);
27216 }
27217
27218 static bool
27219 arm_builtin_support_vector_misalignment (machine_mode mode,
27220 const_tree type, int misalignment,
27221 bool is_packed)
27222 {
27223 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27224 {
27225 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27226
27227 if (is_packed)
27228 return align == 1;
27229
27230 /* If the misalignment is unknown, we should be able to handle the access
27231 so long as it is not to a member of a packed data structure. */
27232 if (misalignment == -1)
27233 return true;
27234
27235 /* Return true if the misalignment is a multiple of the natural alignment
27236 of the vector's element type. This is probably always going to be
27237 true in practice, since we've already established that this isn't a
27238 packed access. */
27239 return ((misalignment % align) == 0);
27240 }
27241
27242 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27243 is_packed);
27244 }
27245
27246 static void
27247 arm_conditional_register_usage (void)
27248 {
27249 int regno;
27250
27251 if (TARGET_THUMB1 && optimize_size)
27252 {
27253 /* When optimizing for size on Thumb-1, it's better not
27254 to use the HI regs, because of the overhead of
27255 stacking them. */
27256 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27257 fixed_regs[regno] = call_used_regs[regno] = 1;
27258 }
27259
27260 /* The link register can be clobbered by any branch insn,
27261 but we have no way to track that at present, so mark
27262 it as unavailable. */
27263 if (TARGET_THUMB1)
27264 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27265
27266 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27267 {
27268 /* VFPv3 registers are disabled when earlier VFP
27269 versions are selected due to the definition of
27270 LAST_VFP_REGNUM. */
27271 for (regno = FIRST_VFP_REGNUM;
27272 regno <= LAST_VFP_REGNUM; ++ regno)
27273 {
27274 fixed_regs[regno] = 0;
27275 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27276 || regno >= FIRST_VFP_REGNUM + 32;
27277 }
27278 }
27279
27280 if (TARGET_REALLY_IWMMXT)
27281 {
27282 regno = FIRST_IWMMXT_GR_REGNUM;
27283 /* The 2002/10/09 revision of the XScale ABI has wCG0
27284 and wCG1 as call-preserved registers. The 2002/11/21
27285 revision changed this so that all wCG registers are
27286 scratch registers. */
27287 for (regno = FIRST_IWMMXT_GR_REGNUM;
27288 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27289 fixed_regs[regno] = 0;
27290 /* The XScale ABI has wR0 - wR9 as scratch registers,
27291 the rest as call-preserved registers. */
27292 for (regno = FIRST_IWMMXT_REGNUM;
27293 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27294 {
27295 fixed_regs[regno] = 0;
27296 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27297 }
27298 }
27299
27300 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27301 {
27302 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27303 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27304 }
27305 else if (TARGET_APCS_STACK)
27306 {
27307 fixed_regs[10] = 1;
27308 call_used_regs[10] = 1;
27309 }
27310 /* -mcaller-super-interworking reserves r11 for calls to
27311 _interwork_r11_call_via_rN(). Making the register global
27312 is an easy way of ensuring that it remains valid for all
27313 calls. */
27314 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27315 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27316 {
27317 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27318 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27319 if (TARGET_CALLER_INTERWORKING)
27320 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27321 }
27322 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27323 }
27324
27325 static reg_class_t
27326 arm_preferred_rename_class (reg_class_t rclass)
27327 {
27328 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27329 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27330 and code size can be reduced. */
27331 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27332 return LO_REGS;
27333 else
27334 return NO_REGS;
27335 }
27336
27337 /* Compute the atrribute "length" of insn "*push_multi".
27338 So this function MUST be kept in sync with that insn pattern. */
27339 int
27340 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27341 {
27342 int i, regno, hi_reg;
27343 int num_saves = XVECLEN (parallel_op, 0);
27344
27345 /* ARM mode. */
27346 if (TARGET_ARM)
27347 return 4;
27348 /* Thumb1 mode. */
27349 if (TARGET_THUMB1)
27350 return 2;
27351
27352 /* Thumb2 mode. */
27353 regno = REGNO (first_op);
27354 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27355 for (i = 1; i < num_saves && !hi_reg; i++)
27356 {
27357 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27358 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27359 }
27360
27361 if (!hi_reg)
27362 return 2;
27363 return 4;
27364 }
27365
27366 /* Compute the number of instructions emitted by output_move_double. */
27367 int
27368 arm_count_output_move_double_insns (rtx *operands)
27369 {
27370 int count;
27371 rtx ops[2];
27372 /* output_move_double may modify the operands array, so call it
27373 here on a copy of the array. */
27374 ops[0] = operands[0];
27375 ops[1] = operands[1];
27376 output_move_double (ops, false, &count);
27377 return count;
27378 }
27379
27380 int
27381 vfp3_const_double_for_fract_bits (rtx operand)
27382 {
27383 REAL_VALUE_TYPE r0;
27384
27385 if (!CONST_DOUBLE_P (operand))
27386 return 0;
27387
27388 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27389 if (exact_real_inverse (DFmode, &r0)
27390 && !REAL_VALUE_NEGATIVE (r0))
27391 {
27392 if (exact_real_truncate (DFmode, &r0))
27393 {
27394 HOST_WIDE_INT value = real_to_integer (&r0);
27395 value = value & 0xffffffff;
27396 if ((value != 0) && ( (value & (value - 1)) == 0))
27397 return int_log2 (value);
27398 }
27399 }
27400 return 0;
27401 }
27402
27403 int
27404 vfp3_const_double_for_bits (rtx operand)
27405 {
27406 REAL_VALUE_TYPE r0;
27407
27408 if (!CONST_DOUBLE_P (operand))
27409 return 0;
27410
27411 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27412 if (exact_real_truncate (DFmode, &r0))
27413 {
27414 HOST_WIDE_INT value = real_to_integer (&r0);
27415 value = value & 0xffffffff;
27416 if ((value != 0) && ( (value & (value - 1)) == 0))
27417 return int_log2 (value);
27418 }
27419
27420 return 0;
27421 }
27422 \f
27423 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27424
27425 static void
27426 arm_pre_atomic_barrier (enum memmodel model)
27427 {
27428 if (need_atomic_barrier_p (model, true))
27429 emit_insn (gen_memory_barrier ());
27430 }
27431
27432 static void
27433 arm_post_atomic_barrier (enum memmodel model)
27434 {
27435 if (need_atomic_barrier_p (model, false))
27436 emit_insn (gen_memory_barrier ());
27437 }
27438
27439 /* Emit the load-exclusive and store-exclusive instructions.
27440 Use acquire and release versions if necessary. */
27441
27442 static void
27443 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27444 {
27445 rtx (*gen) (rtx, rtx);
27446
27447 if (acq)
27448 {
27449 switch (mode)
27450 {
27451 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27452 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27453 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27454 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27455 default:
27456 gcc_unreachable ();
27457 }
27458 }
27459 else
27460 {
27461 switch (mode)
27462 {
27463 case QImode: gen = gen_arm_load_exclusiveqi; break;
27464 case HImode: gen = gen_arm_load_exclusivehi; break;
27465 case SImode: gen = gen_arm_load_exclusivesi; break;
27466 case DImode: gen = gen_arm_load_exclusivedi; break;
27467 default:
27468 gcc_unreachable ();
27469 }
27470 }
27471
27472 emit_insn (gen (rval, mem));
27473 }
27474
27475 static void
27476 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27477 rtx mem, bool rel)
27478 {
27479 rtx (*gen) (rtx, rtx, rtx);
27480
27481 if (rel)
27482 {
27483 switch (mode)
27484 {
27485 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27486 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27487 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27488 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27489 default:
27490 gcc_unreachable ();
27491 }
27492 }
27493 else
27494 {
27495 switch (mode)
27496 {
27497 case QImode: gen = gen_arm_store_exclusiveqi; break;
27498 case HImode: gen = gen_arm_store_exclusivehi; break;
27499 case SImode: gen = gen_arm_store_exclusivesi; break;
27500 case DImode: gen = gen_arm_store_exclusivedi; break;
27501 default:
27502 gcc_unreachable ();
27503 }
27504 }
27505
27506 emit_insn (gen (bval, rval, mem));
27507 }
27508
27509 /* Mark the previous jump instruction as unlikely. */
27510
27511 static void
27512 emit_unlikely_jump (rtx insn)
27513 {
27514 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27515
27516 insn = emit_jump_insn (insn);
27517 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27518 }
27519
27520 /* Expand a compare and swap pattern. */
27521
27522 void
27523 arm_expand_compare_and_swap (rtx operands[])
27524 {
27525 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27526 machine_mode mode;
27527 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27528
27529 bval = operands[0];
27530 rval = operands[1];
27531 mem = operands[2];
27532 oldval = operands[3];
27533 newval = operands[4];
27534 is_weak = operands[5];
27535 mod_s = operands[6];
27536 mod_f = operands[7];
27537 mode = GET_MODE (mem);
27538
27539 /* Normally the succ memory model must be stronger than fail, but in the
27540 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27541 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27542
27543 if (TARGET_HAVE_LDACQ
27544 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
27545 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
27546 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27547
27548 switch (mode)
27549 {
27550 case QImode:
27551 case HImode:
27552 /* For narrow modes, we're going to perform the comparison in SImode,
27553 so do the zero-extension now. */
27554 rval = gen_reg_rtx (SImode);
27555 oldval = convert_modes (SImode, mode, oldval, true);
27556 /* FALLTHRU */
27557
27558 case SImode:
27559 /* Force the value into a register if needed. We waited until after
27560 the zero-extension above to do this properly. */
27561 if (!arm_add_operand (oldval, SImode))
27562 oldval = force_reg (SImode, oldval);
27563 break;
27564
27565 case DImode:
27566 if (!cmpdi_operand (oldval, mode))
27567 oldval = force_reg (mode, oldval);
27568 break;
27569
27570 default:
27571 gcc_unreachable ();
27572 }
27573
27574 switch (mode)
27575 {
27576 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27577 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27578 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27579 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27580 default:
27581 gcc_unreachable ();
27582 }
27583
27584 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27585
27586 if (mode == QImode || mode == HImode)
27587 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27588
27589 /* In all cases, we arrange for success to be signaled by Z set.
27590 This arrangement allows for the boolean result to be used directly
27591 in a subsequent branch, post optimization. */
27592 x = gen_rtx_REG (CCmode, CC_REGNUM);
27593 x = gen_rtx_EQ (SImode, x, const0_rtx);
27594 emit_insn (gen_rtx_SET (bval, x));
27595 }
27596
27597 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27598 another memory store between the load-exclusive and store-exclusive can
27599 reset the monitor from Exclusive to Open state. This means we must wait
27600 until after reload to split the pattern, lest we get a register spill in
27601 the middle of the atomic sequence. */
27602
27603 void
27604 arm_split_compare_and_swap (rtx operands[])
27605 {
27606 rtx rval, mem, oldval, newval, scratch;
27607 machine_mode mode;
27608 enum memmodel mod_s, mod_f;
27609 bool is_weak;
27610 rtx_code_label *label1, *label2;
27611 rtx x, cond;
27612
27613 rval = operands[0];
27614 mem = operands[1];
27615 oldval = operands[2];
27616 newval = operands[3];
27617 is_weak = (operands[4] != const0_rtx);
27618 mod_s = memmodel_from_int (INTVAL (operands[5]));
27619 mod_f = memmodel_from_int (INTVAL (operands[6]));
27620 scratch = operands[7];
27621 mode = GET_MODE (mem);
27622
27623 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
27624
27625 bool use_acquire = TARGET_HAVE_LDACQ
27626 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27627 || is_mm_release (mod_s));
27628
27629 bool use_release = TARGET_HAVE_LDACQ
27630 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27631 || is_mm_acquire (mod_s));
27632
27633 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
27634 a full barrier is emitted after the store-release. */
27635 if (is_armv8_sync)
27636 use_acquire = false;
27637
27638 /* Checks whether a barrier is needed and emits one accordingly. */
27639 if (!(use_acquire || use_release))
27640 arm_pre_atomic_barrier (mod_s);
27641
27642 label1 = NULL;
27643 if (!is_weak)
27644 {
27645 label1 = gen_label_rtx ();
27646 emit_label (label1);
27647 }
27648 label2 = gen_label_rtx ();
27649
27650 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27651
27652 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27653 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27654 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27655 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27656 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
27657
27658 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27659
27660 /* Weak or strong, we want EQ to be true for success, so that we
27661 match the flags that we got from the compare above. */
27662 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27663 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27664 emit_insn (gen_rtx_SET (cond, x));
27665
27666 if (!is_weak)
27667 {
27668 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27669 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27670 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27671 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
27672 }
27673
27674 if (!is_mm_relaxed (mod_f))
27675 emit_label (label2);
27676
27677 /* Checks whether a barrier is needed and emits one accordingly. */
27678 if (is_armv8_sync
27679 || !(use_acquire || use_release))
27680 arm_post_atomic_barrier (mod_s);
27681
27682 if (is_mm_relaxed (mod_f))
27683 emit_label (label2);
27684 }
27685
27686 void
27687 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27688 rtx value, rtx model_rtx, rtx cond)
27689 {
27690 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
27691 machine_mode mode = GET_MODE (mem);
27692 machine_mode wmode = (mode == DImode ? DImode : SImode);
27693 rtx_code_label *label;
27694 rtx x;
27695
27696 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
27697
27698 bool use_acquire = TARGET_HAVE_LDACQ
27699 && !(is_mm_relaxed (model) || is_mm_consume (model)
27700 || is_mm_release (model));
27701
27702 bool use_release = TARGET_HAVE_LDACQ
27703 && !(is_mm_relaxed (model) || is_mm_consume (model)
27704 || is_mm_acquire (model));
27705
27706 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
27707 a full barrier is emitted after the store-release. */
27708 if (is_armv8_sync)
27709 use_acquire = false;
27710
27711 /* Checks whether a barrier is needed and emits one accordingly. */
27712 if (!(use_acquire || use_release))
27713 arm_pre_atomic_barrier (model);
27714
27715 label = gen_label_rtx ();
27716 emit_label (label);
27717
27718 if (new_out)
27719 new_out = gen_lowpart (wmode, new_out);
27720 if (old_out)
27721 old_out = gen_lowpart (wmode, old_out);
27722 else
27723 old_out = new_out;
27724 value = simplify_gen_subreg (wmode, value, mode, 0);
27725
27726 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27727
27728 switch (code)
27729 {
27730 case SET:
27731 new_out = value;
27732 break;
27733
27734 case NOT:
27735 x = gen_rtx_AND (wmode, old_out, value);
27736 emit_insn (gen_rtx_SET (new_out, x));
27737 x = gen_rtx_NOT (wmode, new_out);
27738 emit_insn (gen_rtx_SET (new_out, x));
27739 break;
27740
27741 case MINUS:
27742 if (CONST_INT_P (value))
27743 {
27744 value = GEN_INT (-INTVAL (value));
27745 code = PLUS;
27746 }
27747 /* FALLTHRU */
27748
27749 case PLUS:
27750 if (mode == DImode)
27751 {
27752 /* DImode plus/minus need to clobber flags. */
27753 /* The adddi3 and subdi3 patterns are incorrectly written so that
27754 they require matching operands, even when we could easily support
27755 three operands. Thankfully, this can be fixed up post-splitting,
27756 as the individual add+adc patterns do accept three operands and
27757 post-reload cprop can make these moves go away. */
27758 emit_move_insn (new_out, old_out);
27759 if (code == PLUS)
27760 x = gen_adddi3 (new_out, new_out, value);
27761 else
27762 x = gen_subdi3 (new_out, new_out, value);
27763 emit_insn (x);
27764 break;
27765 }
27766 /* FALLTHRU */
27767
27768 default:
27769 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27770 emit_insn (gen_rtx_SET (new_out, x));
27771 break;
27772 }
27773
27774 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27775 use_release);
27776
27777 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27778 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27779
27780 /* Checks whether a barrier is needed and emits one accordingly. */
27781 if (is_armv8_sync
27782 || !(use_acquire || use_release))
27783 arm_post_atomic_barrier (model);
27784 }
27785 \f
27786 #define MAX_VECT_LEN 16
27787
27788 struct expand_vec_perm_d
27789 {
27790 rtx target, op0, op1;
27791 unsigned char perm[MAX_VECT_LEN];
27792 machine_mode vmode;
27793 unsigned char nelt;
27794 bool one_vector_p;
27795 bool testing_p;
27796 };
27797
27798 /* Generate a variable permutation. */
27799
27800 static void
27801 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27802 {
27803 machine_mode vmode = GET_MODE (target);
27804 bool one_vector_p = rtx_equal_p (op0, op1);
27805
27806 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27807 gcc_checking_assert (GET_MODE (op0) == vmode);
27808 gcc_checking_assert (GET_MODE (op1) == vmode);
27809 gcc_checking_assert (GET_MODE (sel) == vmode);
27810 gcc_checking_assert (TARGET_NEON);
27811
27812 if (one_vector_p)
27813 {
27814 if (vmode == V8QImode)
27815 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27816 else
27817 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27818 }
27819 else
27820 {
27821 rtx pair;
27822
27823 if (vmode == V8QImode)
27824 {
27825 pair = gen_reg_rtx (V16QImode);
27826 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27827 pair = gen_lowpart (TImode, pair);
27828 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27829 }
27830 else
27831 {
27832 pair = gen_reg_rtx (OImode);
27833 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27834 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27835 }
27836 }
27837 }
27838
27839 void
27840 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27841 {
27842 machine_mode vmode = GET_MODE (target);
27843 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27844 bool one_vector_p = rtx_equal_p (op0, op1);
27845 rtx rmask[MAX_VECT_LEN], mask;
27846
27847 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27848 numbering of elements for big-endian, we must reverse the order. */
27849 gcc_checking_assert (!BYTES_BIG_ENDIAN);
27850
27851 /* The VTBL instruction does not use a modulo index, so we must take care
27852 of that ourselves. */
27853 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27854 for (i = 0; i < nelt; ++i)
27855 rmask[i] = mask;
27856 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27857 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27858
27859 arm_expand_vec_perm_1 (target, op0, op1, sel);
27860 }
27861
27862 /* Generate or test for an insn that supports a constant permutation. */
27863
27864 /* Recognize patterns for the VUZP insns. */
27865
27866 static bool
27867 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27868 {
27869 unsigned int i, odd, mask, nelt = d->nelt;
27870 rtx out0, out1, in0, in1, x;
27871 rtx (*gen)(rtx, rtx, rtx, rtx);
27872
27873 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27874 return false;
27875
27876 /* Note that these are little-endian tests. Adjust for big-endian later. */
27877 if (d->perm[0] == 0)
27878 odd = 0;
27879 else if (d->perm[0] == 1)
27880 odd = 1;
27881 else
27882 return false;
27883 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27884
27885 for (i = 0; i < nelt; i++)
27886 {
27887 unsigned elt = (i * 2 + odd) & mask;
27888 if (d->perm[i] != elt)
27889 return false;
27890 }
27891
27892 /* Success! */
27893 if (d->testing_p)
27894 return true;
27895
27896 switch (d->vmode)
27897 {
27898 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27899 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
27900 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
27901 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
27902 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
27903 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
27904 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
27905 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
27906 default:
27907 gcc_unreachable ();
27908 }
27909
27910 in0 = d->op0;
27911 in1 = d->op1;
27912 if (BYTES_BIG_ENDIAN)
27913 {
27914 x = in0, in0 = in1, in1 = x;
27915 odd = !odd;
27916 }
27917
27918 out0 = d->target;
27919 out1 = gen_reg_rtx (d->vmode);
27920 if (odd)
27921 x = out0, out0 = out1, out1 = x;
27922
27923 emit_insn (gen (out0, in0, in1, out1));
27924 return true;
27925 }
27926
27927 /* Recognize patterns for the VZIP insns. */
27928
27929 static bool
27930 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27931 {
27932 unsigned int i, high, mask, nelt = d->nelt;
27933 rtx out0, out1, in0, in1, x;
27934 rtx (*gen)(rtx, rtx, rtx, rtx);
27935
27936 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27937 return false;
27938
27939 /* Note that these are little-endian tests. Adjust for big-endian later. */
27940 high = nelt / 2;
27941 if (d->perm[0] == high)
27942 ;
27943 else if (d->perm[0] == 0)
27944 high = 0;
27945 else
27946 return false;
27947 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27948
27949 for (i = 0; i < nelt / 2; i++)
27950 {
27951 unsigned elt = (i + high) & mask;
27952 if (d->perm[i * 2] != elt)
27953 return false;
27954 elt = (elt + nelt) & mask;
27955 if (d->perm[i * 2 + 1] != elt)
27956 return false;
27957 }
27958
27959 /* Success! */
27960 if (d->testing_p)
27961 return true;
27962
27963 switch (d->vmode)
27964 {
27965 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27966 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
27967 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
27968 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
27969 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
27970 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
27971 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
27972 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
27973 default:
27974 gcc_unreachable ();
27975 }
27976
27977 in0 = d->op0;
27978 in1 = d->op1;
27979 if (BYTES_BIG_ENDIAN)
27980 {
27981 x = in0, in0 = in1, in1 = x;
27982 high = !high;
27983 }
27984
27985 out0 = d->target;
27986 out1 = gen_reg_rtx (d->vmode);
27987 if (high)
27988 x = out0, out0 = out1, out1 = x;
27989
27990 emit_insn (gen (out0, in0, in1, out1));
27991 return true;
27992 }
27993
27994 /* Recognize patterns for the VREV insns. */
27995
27996 static bool
27997 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27998 {
27999 unsigned int i, j, diff, nelt = d->nelt;
28000 rtx (*gen)(rtx, rtx);
28001
28002 if (!d->one_vector_p)
28003 return false;
28004
28005 diff = d->perm[0];
28006 switch (diff)
28007 {
28008 case 7:
28009 switch (d->vmode)
28010 {
28011 case V16QImode: gen = gen_neon_vrev64v16qi; break;
28012 case V8QImode: gen = gen_neon_vrev64v8qi; break;
28013 default:
28014 return false;
28015 }
28016 break;
28017 case 3:
28018 switch (d->vmode)
28019 {
28020 case V16QImode: gen = gen_neon_vrev32v16qi; break;
28021 case V8QImode: gen = gen_neon_vrev32v8qi; break;
28022 case V8HImode: gen = gen_neon_vrev64v8hi; break;
28023 case V4HImode: gen = gen_neon_vrev64v4hi; break;
28024 default:
28025 return false;
28026 }
28027 break;
28028 case 1:
28029 switch (d->vmode)
28030 {
28031 case V16QImode: gen = gen_neon_vrev16v16qi; break;
28032 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28033 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28034 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28035 case V4SImode: gen = gen_neon_vrev64v4si; break;
28036 case V2SImode: gen = gen_neon_vrev64v2si; break;
28037 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28038 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28039 default:
28040 return false;
28041 }
28042 break;
28043 default:
28044 return false;
28045 }
28046
28047 for (i = 0; i < nelt ; i += diff + 1)
28048 for (j = 0; j <= diff; j += 1)
28049 {
28050 /* This is guaranteed to be true as the value of diff
28051 is 7, 3, 1 and we should have enough elements in the
28052 queue to generate this. Getting a vector mask with a
28053 value of diff other than these values implies that
28054 something is wrong by the time we get here. */
28055 gcc_assert (i + j < nelt);
28056 if (d->perm[i + j] != i + diff - j)
28057 return false;
28058 }
28059
28060 /* Success! */
28061 if (d->testing_p)
28062 return true;
28063
28064 emit_insn (gen (d->target, d->op0));
28065 return true;
28066 }
28067
28068 /* Recognize patterns for the VTRN insns. */
28069
28070 static bool
28071 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28072 {
28073 unsigned int i, odd, mask, nelt = d->nelt;
28074 rtx out0, out1, in0, in1, x;
28075 rtx (*gen)(rtx, rtx, rtx, rtx);
28076
28077 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28078 return false;
28079
28080 /* Note that these are little-endian tests. Adjust for big-endian later. */
28081 if (d->perm[0] == 0)
28082 odd = 0;
28083 else if (d->perm[0] == 1)
28084 odd = 1;
28085 else
28086 return false;
28087 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28088
28089 for (i = 0; i < nelt; i += 2)
28090 {
28091 if (d->perm[i] != i + odd)
28092 return false;
28093 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28094 return false;
28095 }
28096
28097 /* Success! */
28098 if (d->testing_p)
28099 return true;
28100
28101 switch (d->vmode)
28102 {
28103 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28104 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28105 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28106 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28107 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28108 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28109 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28110 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28111 default:
28112 gcc_unreachable ();
28113 }
28114
28115 in0 = d->op0;
28116 in1 = d->op1;
28117 if (BYTES_BIG_ENDIAN)
28118 {
28119 x = in0, in0 = in1, in1 = x;
28120 odd = !odd;
28121 }
28122
28123 out0 = d->target;
28124 out1 = gen_reg_rtx (d->vmode);
28125 if (odd)
28126 x = out0, out0 = out1, out1 = x;
28127
28128 emit_insn (gen (out0, in0, in1, out1));
28129 return true;
28130 }
28131
28132 /* Recognize patterns for the VEXT insns. */
28133
28134 static bool
28135 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28136 {
28137 unsigned int i, nelt = d->nelt;
28138 rtx (*gen) (rtx, rtx, rtx, rtx);
28139 rtx offset;
28140
28141 unsigned int location;
28142
28143 unsigned int next = d->perm[0] + 1;
28144
28145 /* TODO: Handle GCC's numbering of elements for big-endian. */
28146 if (BYTES_BIG_ENDIAN)
28147 return false;
28148
28149 /* Check if the extracted indexes are increasing by one. */
28150 for (i = 1; i < nelt; next++, i++)
28151 {
28152 /* If we hit the most significant element of the 2nd vector in
28153 the previous iteration, no need to test further. */
28154 if (next == 2 * nelt)
28155 return false;
28156
28157 /* If we are operating on only one vector: it could be a
28158 rotation. If there are only two elements of size < 64, let
28159 arm_evpc_neon_vrev catch it. */
28160 if (d->one_vector_p && (next == nelt))
28161 {
28162 if ((nelt == 2) && (d->vmode != V2DImode))
28163 return false;
28164 else
28165 next = 0;
28166 }
28167
28168 if (d->perm[i] != next)
28169 return false;
28170 }
28171
28172 location = d->perm[0];
28173
28174 switch (d->vmode)
28175 {
28176 case V16QImode: gen = gen_neon_vextv16qi; break;
28177 case V8QImode: gen = gen_neon_vextv8qi; break;
28178 case V4HImode: gen = gen_neon_vextv4hi; break;
28179 case V8HImode: gen = gen_neon_vextv8hi; break;
28180 case V2SImode: gen = gen_neon_vextv2si; break;
28181 case V4SImode: gen = gen_neon_vextv4si; break;
28182 case V2SFmode: gen = gen_neon_vextv2sf; break;
28183 case V4SFmode: gen = gen_neon_vextv4sf; break;
28184 case V2DImode: gen = gen_neon_vextv2di; break;
28185 default:
28186 return false;
28187 }
28188
28189 /* Success! */
28190 if (d->testing_p)
28191 return true;
28192
28193 offset = GEN_INT (location);
28194 emit_insn (gen (d->target, d->op0, d->op1, offset));
28195 return true;
28196 }
28197
28198 /* The NEON VTBL instruction is a fully variable permuation that's even
28199 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28200 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28201 can do slightly better by expanding this as a constant where we don't
28202 have to apply a mask. */
28203
28204 static bool
28205 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28206 {
28207 rtx rperm[MAX_VECT_LEN], sel;
28208 machine_mode vmode = d->vmode;
28209 unsigned int i, nelt = d->nelt;
28210
28211 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28212 numbering of elements for big-endian, we must reverse the order. */
28213 if (BYTES_BIG_ENDIAN)
28214 return false;
28215
28216 if (d->testing_p)
28217 return true;
28218
28219 /* Generic code will try constant permutation twice. Once with the
28220 original mode and again with the elements lowered to QImode.
28221 So wait and don't do the selector expansion ourselves. */
28222 if (vmode != V8QImode && vmode != V16QImode)
28223 return false;
28224
28225 for (i = 0; i < nelt; ++i)
28226 rperm[i] = GEN_INT (d->perm[i]);
28227 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28228 sel = force_reg (vmode, sel);
28229
28230 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28231 return true;
28232 }
28233
28234 static bool
28235 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28236 {
28237 /* Check if the input mask matches vext before reordering the
28238 operands. */
28239 if (TARGET_NEON)
28240 if (arm_evpc_neon_vext (d))
28241 return true;
28242
28243 /* The pattern matching functions above are written to look for a small
28244 number to begin the sequence (0, 1, N/2). If we begin with an index
28245 from the second operand, we can swap the operands. */
28246 if (d->perm[0] >= d->nelt)
28247 {
28248 unsigned i, nelt = d->nelt;
28249 rtx x;
28250
28251 for (i = 0; i < nelt; ++i)
28252 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28253
28254 x = d->op0;
28255 d->op0 = d->op1;
28256 d->op1 = x;
28257 }
28258
28259 if (TARGET_NEON)
28260 {
28261 if (arm_evpc_neon_vuzp (d))
28262 return true;
28263 if (arm_evpc_neon_vzip (d))
28264 return true;
28265 if (arm_evpc_neon_vrev (d))
28266 return true;
28267 if (arm_evpc_neon_vtrn (d))
28268 return true;
28269 return arm_evpc_neon_vtbl (d);
28270 }
28271 return false;
28272 }
28273
28274 /* Expand a vec_perm_const pattern. */
28275
28276 bool
28277 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28278 {
28279 struct expand_vec_perm_d d;
28280 int i, nelt, which;
28281
28282 d.target = target;
28283 d.op0 = op0;
28284 d.op1 = op1;
28285
28286 d.vmode = GET_MODE (target);
28287 gcc_assert (VECTOR_MODE_P (d.vmode));
28288 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28289 d.testing_p = false;
28290
28291 for (i = which = 0; i < nelt; ++i)
28292 {
28293 rtx e = XVECEXP (sel, 0, i);
28294 int ei = INTVAL (e) & (2 * nelt - 1);
28295 which |= (ei < nelt ? 1 : 2);
28296 d.perm[i] = ei;
28297 }
28298
28299 switch (which)
28300 {
28301 default:
28302 gcc_unreachable();
28303
28304 case 3:
28305 d.one_vector_p = false;
28306 if (!rtx_equal_p (op0, op1))
28307 break;
28308
28309 /* The elements of PERM do not suggest that only the first operand
28310 is used, but both operands are identical. Allow easier matching
28311 of the permutation by folding the permutation into the single
28312 input vector. */
28313 /* FALLTHRU */
28314 case 2:
28315 for (i = 0; i < nelt; ++i)
28316 d.perm[i] &= nelt - 1;
28317 d.op0 = op1;
28318 d.one_vector_p = true;
28319 break;
28320
28321 case 1:
28322 d.op1 = op0;
28323 d.one_vector_p = true;
28324 break;
28325 }
28326
28327 return arm_expand_vec_perm_const_1 (&d);
28328 }
28329
28330 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28331
28332 static bool
28333 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28334 const unsigned char *sel)
28335 {
28336 struct expand_vec_perm_d d;
28337 unsigned int i, nelt, which;
28338 bool ret;
28339
28340 d.vmode = vmode;
28341 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28342 d.testing_p = true;
28343 memcpy (d.perm, sel, nelt);
28344
28345 /* Categorize the set of elements in the selector. */
28346 for (i = which = 0; i < nelt; ++i)
28347 {
28348 unsigned char e = d.perm[i];
28349 gcc_assert (e < 2 * nelt);
28350 which |= (e < nelt ? 1 : 2);
28351 }
28352
28353 /* For all elements from second vector, fold the elements to first. */
28354 if (which == 2)
28355 for (i = 0; i < nelt; ++i)
28356 d.perm[i] -= nelt;
28357
28358 /* Check whether the mask can be applied to the vector type. */
28359 d.one_vector_p = (which != 3);
28360
28361 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28362 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28363 if (!d.one_vector_p)
28364 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28365
28366 start_sequence ();
28367 ret = arm_expand_vec_perm_const_1 (&d);
28368 end_sequence ();
28369
28370 return ret;
28371 }
28372
28373 bool
28374 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28375 {
28376 /* If we are soft float and we do not have ldrd
28377 then all auto increment forms are ok. */
28378 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28379 return true;
28380
28381 switch (code)
28382 {
28383 /* Post increment and Pre Decrement are supported for all
28384 instruction forms except for vector forms. */
28385 case ARM_POST_INC:
28386 case ARM_PRE_DEC:
28387 if (VECTOR_MODE_P (mode))
28388 {
28389 if (code != ARM_PRE_DEC)
28390 return true;
28391 else
28392 return false;
28393 }
28394
28395 return true;
28396
28397 case ARM_POST_DEC:
28398 case ARM_PRE_INC:
28399 /* Without LDRD and mode size greater than
28400 word size, there is no point in auto-incrementing
28401 because ldm and stm will not have these forms. */
28402 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28403 return false;
28404
28405 /* Vector and floating point modes do not support
28406 these auto increment forms. */
28407 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28408 return false;
28409
28410 return true;
28411
28412 default:
28413 return false;
28414
28415 }
28416
28417 return false;
28418 }
28419
28420 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28421 on ARM, since we know that shifts by negative amounts are no-ops.
28422 Additionally, the default expansion code is not available or suitable
28423 for post-reload insn splits (this can occur when the register allocator
28424 chooses not to do a shift in NEON).
28425
28426 This function is used in both initial expand and post-reload splits, and
28427 handles all kinds of 64-bit shifts.
28428
28429 Input requirements:
28430 - It is safe for the input and output to be the same register, but
28431 early-clobber rules apply for the shift amount and scratch registers.
28432 - Shift by register requires both scratch registers. In all other cases
28433 the scratch registers may be NULL.
28434 - Ashiftrt by a register also clobbers the CC register. */
28435 void
28436 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28437 rtx amount, rtx scratch1, rtx scratch2)
28438 {
28439 rtx out_high = gen_highpart (SImode, out);
28440 rtx out_low = gen_lowpart (SImode, out);
28441 rtx in_high = gen_highpart (SImode, in);
28442 rtx in_low = gen_lowpart (SImode, in);
28443
28444 /* Terminology:
28445 in = the register pair containing the input value.
28446 out = the destination register pair.
28447 up = the high- or low-part of each pair.
28448 down = the opposite part to "up".
28449 In a shift, we can consider bits to shift from "up"-stream to
28450 "down"-stream, so in a left-shift "up" is the low-part and "down"
28451 is the high-part of each register pair. */
28452
28453 rtx out_up = code == ASHIFT ? out_low : out_high;
28454 rtx out_down = code == ASHIFT ? out_high : out_low;
28455 rtx in_up = code == ASHIFT ? in_low : in_high;
28456 rtx in_down = code == ASHIFT ? in_high : in_low;
28457
28458 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28459 gcc_assert (out
28460 && (REG_P (out) || GET_CODE (out) == SUBREG)
28461 && GET_MODE (out) == DImode);
28462 gcc_assert (in
28463 && (REG_P (in) || GET_CODE (in) == SUBREG)
28464 && GET_MODE (in) == DImode);
28465 gcc_assert (amount
28466 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28467 && GET_MODE (amount) == SImode)
28468 || CONST_INT_P (amount)));
28469 gcc_assert (scratch1 == NULL
28470 || (GET_CODE (scratch1) == SCRATCH)
28471 || (GET_MODE (scratch1) == SImode
28472 && REG_P (scratch1)));
28473 gcc_assert (scratch2 == NULL
28474 || (GET_CODE (scratch2) == SCRATCH)
28475 || (GET_MODE (scratch2) == SImode
28476 && REG_P (scratch2)));
28477 gcc_assert (!REG_P (out) || !REG_P (amount)
28478 || !HARD_REGISTER_P (out)
28479 || (REGNO (out) != REGNO (amount)
28480 && REGNO (out) + 1 != REGNO (amount)));
28481
28482 /* Macros to make following code more readable. */
28483 #define SUB_32(DEST,SRC) \
28484 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28485 #define RSB_32(DEST,SRC) \
28486 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28487 #define SUB_S_32(DEST,SRC) \
28488 gen_addsi3_compare0 ((DEST), (SRC), \
28489 GEN_INT (-32))
28490 #define SET(DEST,SRC) \
28491 gen_rtx_SET ((DEST), (SRC))
28492 #define SHIFT(CODE,SRC,AMOUNT) \
28493 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28494 #define LSHIFT(CODE,SRC,AMOUNT) \
28495 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28496 SImode, (SRC), (AMOUNT))
28497 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28498 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28499 SImode, (SRC), (AMOUNT))
28500 #define ORR(A,B) \
28501 gen_rtx_IOR (SImode, (A), (B))
28502 #define BRANCH(COND,LABEL) \
28503 gen_arm_cond_branch ((LABEL), \
28504 gen_rtx_ ## COND (CCmode, cc_reg, \
28505 const0_rtx), \
28506 cc_reg)
28507
28508 /* Shifts by register and shifts by constant are handled separately. */
28509 if (CONST_INT_P (amount))
28510 {
28511 /* We have a shift-by-constant. */
28512
28513 /* First, handle out-of-range shift amounts.
28514 In both cases we try to match the result an ARM instruction in a
28515 shift-by-register would give. This helps reduce execution
28516 differences between optimization levels, but it won't stop other
28517 parts of the compiler doing different things. This is "undefined
28518 behaviour, in any case. */
28519 if (INTVAL (amount) <= 0)
28520 emit_insn (gen_movdi (out, in));
28521 else if (INTVAL (amount) >= 64)
28522 {
28523 if (code == ASHIFTRT)
28524 {
28525 rtx const31_rtx = GEN_INT (31);
28526 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28527 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28528 }
28529 else
28530 emit_insn (gen_movdi (out, const0_rtx));
28531 }
28532
28533 /* Now handle valid shifts. */
28534 else if (INTVAL (amount) < 32)
28535 {
28536 /* Shifts by a constant less than 32. */
28537 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28538
28539 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28540 emit_insn (SET (out_down,
28541 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28542 out_down)));
28543 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28544 }
28545 else
28546 {
28547 /* Shifts by a constant greater than 31. */
28548 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28549
28550 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28551 if (code == ASHIFTRT)
28552 emit_insn (gen_ashrsi3 (out_up, in_up,
28553 GEN_INT (31)));
28554 else
28555 emit_insn (SET (out_up, const0_rtx));
28556 }
28557 }
28558 else
28559 {
28560 /* We have a shift-by-register. */
28561 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28562
28563 /* This alternative requires the scratch registers. */
28564 gcc_assert (scratch1 && REG_P (scratch1));
28565 gcc_assert (scratch2 && REG_P (scratch2));
28566
28567 /* We will need the values "amount-32" and "32-amount" later.
28568 Swapping them around now allows the later code to be more general. */
28569 switch (code)
28570 {
28571 case ASHIFT:
28572 emit_insn (SUB_32 (scratch1, amount));
28573 emit_insn (RSB_32 (scratch2, amount));
28574 break;
28575 case ASHIFTRT:
28576 emit_insn (RSB_32 (scratch1, amount));
28577 /* Also set CC = amount > 32. */
28578 emit_insn (SUB_S_32 (scratch2, amount));
28579 break;
28580 case LSHIFTRT:
28581 emit_insn (RSB_32 (scratch1, amount));
28582 emit_insn (SUB_32 (scratch2, amount));
28583 break;
28584 default:
28585 gcc_unreachable ();
28586 }
28587
28588 /* Emit code like this:
28589
28590 arithmetic-left:
28591 out_down = in_down << amount;
28592 out_down = (in_up << (amount - 32)) | out_down;
28593 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28594 out_up = in_up << amount;
28595
28596 arithmetic-right:
28597 out_down = in_down >> amount;
28598 out_down = (in_up << (32 - amount)) | out_down;
28599 if (amount < 32)
28600 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28601 out_up = in_up << amount;
28602
28603 logical-right:
28604 out_down = in_down >> amount;
28605 out_down = (in_up << (32 - amount)) | out_down;
28606 if (amount < 32)
28607 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28608 out_up = in_up << amount;
28609
28610 The ARM and Thumb2 variants are the same but implemented slightly
28611 differently. If this were only called during expand we could just
28612 use the Thumb2 case and let combine do the right thing, but this
28613 can also be called from post-reload splitters. */
28614
28615 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28616
28617 if (!TARGET_THUMB2)
28618 {
28619 /* Emit code for ARM mode. */
28620 emit_insn (SET (out_down,
28621 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28622 if (code == ASHIFTRT)
28623 {
28624 rtx_code_label *done_label = gen_label_rtx ();
28625 emit_jump_insn (BRANCH (LT, done_label));
28626 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28627 out_down)));
28628 emit_label (done_label);
28629 }
28630 else
28631 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28632 out_down)));
28633 }
28634 else
28635 {
28636 /* Emit code for Thumb2 mode.
28637 Thumb2 can't do shift and or in one insn. */
28638 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28639 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28640
28641 if (code == ASHIFTRT)
28642 {
28643 rtx_code_label *done_label = gen_label_rtx ();
28644 emit_jump_insn (BRANCH (LT, done_label));
28645 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28646 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28647 emit_label (done_label);
28648 }
28649 else
28650 {
28651 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28652 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28653 }
28654 }
28655
28656 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28657 }
28658
28659 #undef SUB_32
28660 #undef RSB_32
28661 #undef SUB_S_32
28662 #undef SET
28663 #undef SHIFT
28664 #undef LSHIFT
28665 #undef REV_LSHIFT
28666 #undef ORR
28667 #undef BRANCH
28668 }
28669
28670
28671 /* Returns true if a valid comparison operation and makes
28672 the operands in a form that is valid. */
28673 bool
28674 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28675 {
28676 enum rtx_code code = GET_CODE (*comparison);
28677 int code_int;
28678 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28679 ? GET_MODE (*op2) : GET_MODE (*op1);
28680
28681 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28682
28683 if (code == UNEQ || code == LTGT)
28684 return false;
28685
28686 code_int = (int)code;
28687 arm_canonicalize_comparison (&code_int, op1, op2, 0);
28688 PUT_CODE (*comparison, (enum rtx_code)code_int);
28689
28690 switch (mode)
28691 {
28692 case SImode:
28693 if (!arm_add_operand (*op1, mode))
28694 *op1 = force_reg (mode, *op1);
28695 if (!arm_add_operand (*op2, mode))
28696 *op2 = force_reg (mode, *op2);
28697 return true;
28698
28699 case DImode:
28700 if (!cmpdi_operand (*op1, mode))
28701 *op1 = force_reg (mode, *op1);
28702 if (!cmpdi_operand (*op2, mode))
28703 *op2 = force_reg (mode, *op2);
28704 return true;
28705
28706 case SFmode:
28707 case DFmode:
28708 if (!arm_float_compare_operand (*op1, mode))
28709 *op1 = force_reg (mode, *op1);
28710 if (!arm_float_compare_operand (*op2, mode))
28711 *op2 = force_reg (mode, *op2);
28712 return true;
28713 default:
28714 break;
28715 }
28716
28717 return false;
28718
28719 }
28720
28721 /* Maximum number of instructions to set block of memory. */
28722 static int
28723 arm_block_set_max_insns (void)
28724 {
28725 if (optimize_function_for_size_p (cfun))
28726 return 4;
28727 else
28728 return current_tune->max_insns_inline_memset;
28729 }
28730
28731 /* Return TRUE if it's profitable to set block of memory for
28732 non-vectorized case. VAL is the value to set the memory
28733 with. LENGTH is the number of bytes to set. ALIGN is the
28734 alignment of the destination memory in bytes. UNALIGNED_P
28735 is TRUE if we can only set the memory with instructions
28736 meeting alignment requirements. USE_STRD_P is TRUE if we
28737 can use strd to set the memory. */
28738 static bool
28739 arm_block_set_non_vect_profit_p (rtx val,
28740 unsigned HOST_WIDE_INT length,
28741 unsigned HOST_WIDE_INT align,
28742 bool unaligned_p, bool use_strd_p)
28743 {
28744 int num = 0;
28745 /* For leftovers in bytes of 0-7, we can set the memory block using
28746 strb/strh/str with minimum instruction number. */
28747 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28748
28749 if (unaligned_p)
28750 {
28751 num = arm_const_inline_cost (SET, val);
28752 num += length / align + length % align;
28753 }
28754 else if (use_strd_p)
28755 {
28756 num = arm_const_double_inline_cost (val);
28757 num += (length >> 3) + leftover[length & 7];
28758 }
28759 else
28760 {
28761 num = arm_const_inline_cost (SET, val);
28762 num += (length >> 2) + leftover[length & 3];
28763 }
28764
28765 /* We may be able to combine last pair STRH/STRB into a single STR
28766 by shifting one byte back. */
28767 if (unaligned_access && length > 3 && (length & 3) == 3)
28768 num--;
28769
28770 return (num <= arm_block_set_max_insns ());
28771 }
28772
28773 /* Return TRUE if it's profitable to set block of memory for
28774 vectorized case. LENGTH is the number of bytes to set.
28775 ALIGN is the alignment of destination memory in bytes.
28776 MODE is the vector mode used to set the memory. */
28777 static bool
28778 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28779 unsigned HOST_WIDE_INT align,
28780 machine_mode mode)
28781 {
28782 int num;
28783 bool unaligned_p = ((align & 3) != 0);
28784 unsigned int nelt = GET_MODE_NUNITS (mode);
28785
28786 /* Instruction loading constant value. */
28787 num = 1;
28788 /* Instructions storing the memory. */
28789 num += (length + nelt - 1) / nelt;
28790 /* Instructions adjusting the address expression. Only need to
28791 adjust address expression if it's 4 bytes aligned and bytes
28792 leftover can only be stored by mis-aligned store instruction. */
28793 if (!unaligned_p && (length & 3) != 0)
28794 num++;
28795
28796 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28797 if (!unaligned_p && mode == V16QImode)
28798 num--;
28799
28800 return (num <= arm_block_set_max_insns ());
28801 }
28802
28803 /* Set a block of memory using vectorization instructions for the
28804 unaligned case. We fill the first LENGTH bytes of the memory
28805 area starting from DSTBASE with byte constant VALUE. ALIGN is
28806 the alignment requirement of memory. Return TRUE if succeeded. */
28807 static bool
28808 arm_block_set_unaligned_vect (rtx dstbase,
28809 unsigned HOST_WIDE_INT length,
28810 unsigned HOST_WIDE_INT value,
28811 unsigned HOST_WIDE_INT align)
28812 {
28813 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28814 rtx dst, mem;
28815 rtx val_elt, val_vec, reg;
28816 rtx rval[MAX_VECT_LEN];
28817 rtx (*gen_func) (rtx, rtx);
28818 machine_mode mode;
28819 unsigned HOST_WIDE_INT v = value;
28820
28821 gcc_assert ((align & 0x3) != 0);
28822 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28823 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28824 if (length >= nelt_v16)
28825 {
28826 mode = V16QImode;
28827 gen_func = gen_movmisalignv16qi;
28828 }
28829 else
28830 {
28831 mode = V8QImode;
28832 gen_func = gen_movmisalignv8qi;
28833 }
28834 nelt_mode = GET_MODE_NUNITS (mode);
28835 gcc_assert (length >= nelt_mode);
28836 /* Skip if it isn't profitable. */
28837 if (!arm_block_set_vect_profit_p (length, align, mode))
28838 return false;
28839
28840 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28841 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28842
28843 v = sext_hwi (v, BITS_PER_WORD);
28844 val_elt = GEN_INT (v);
28845 for (j = 0; j < nelt_mode; j++)
28846 rval[j] = val_elt;
28847
28848 reg = gen_reg_rtx (mode);
28849 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28850 /* Emit instruction loading the constant value. */
28851 emit_move_insn (reg, val_vec);
28852
28853 /* Handle nelt_mode bytes in a vector. */
28854 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28855 {
28856 emit_insn ((*gen_func) (mem, reg));
28857 if (i + 2 * nelt_mode <= length)
28858 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28859 }
28860
28861 /* If there are not less than nelt_v8 bytes leftover, we must be in
28862 V16QI mode. */
28863 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28864
28865 /* Handle (8, 16) bytes leftover. */
28866 if (i + nelt_v8 < length)
28867 {
28868 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28869 /* We are shifting bytes back, set the alignment accordingly. */
28870 if ((length & 1) != 0 && align >= 2)
28871 set_mem_align (mem, BITS_PER_UNIT);
28872
28873 emit_insn (gen_movmisalignv16qi (mem, reg));
28874 }
28875 /* Handle (0, 8] bytes leftover. */
28876 else if (i < length && i + nelt_v8 >= length)
28877 {
28878 if (mode == V16QImode)
28879 {
28880 reg = gen_lowpart (V8QImode, reg);
28881 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
28882 }
28883 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28884 + (nelt_mode - nelt_v8))));
28885 /* We are shifting bytes back, set the alignment accordingly. */
28886 if ((length & 1) != 0 && align >= 2)
28887 set_mem_align (mem, BITS_PER_UNIT);
28888
28889 emit_insn (gen_movmisalignv8qi (mem, reg));
28890 }
28891
28892 return true;
28893 }
28894
28895 /* Set a block of memory using vectorization instructions for the
28896 aligned case. We fill the first LENGTH bytes of the memory area
28897 starting from DSTBASE with byte constant VALUE. ALIGN is the
28898 alignment requirement of memory. Return TRUE if succeeded. */
28899 static bool
28900 arm_block_set_aligned_vect (rtx dstbase,
28901 unsigned HOST_WIDE_INT length,
28902 unsigned HOST_WIDE_INT value,
28903 unsigned HOST_WIDE_INT align)
28904 {
28905 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
28906 rtx dst, addr, mem;
28907 rtx val_elt, val_vec, reg;
28908 rtx rval[MAX_VECT_LEN];
28909 machine_mode mode;
28910 unsigned HOST_WIDE_INT v = value;
28911
28912 gcc_assert ((align & 0x3) == 0);
28913 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28914 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28915 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
28916 mode = V16QImode;
28917 else
28918 mode = V8QImode;
28919
28920 nelt_mode = GET_MODE_NUNITS (mode);
28921 gcc_assert (length >= nelt_mode);
28922 /* Skip if it isn't profitable. */
28923 if (!arm_block_set_vect_profit_p (length, align, mode))
28924 return false;
28925
28926 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28927
28928 v = sext_hwi (v, BITS_PER_WORD);
28929 val_elt = GEN_INT (v);
28930 for (j = 0; j < nelt_mode; j++)
28931 rval[j] = val_elt;
28932
28933 reg = gen_reg_rtx (mode);
28934 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28935 /* Emit instruction loading the constant value. */
28936 emit_move_insn (reg, val_vec);
28937
28938 i = 0;
28939 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28940 if (mode == V16QImode)
28941 {
28942 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28943 emit_insn (gen_movmisalignv16qi (mem, reg));
28944 i += nelt_mode;
28945 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28946 if (i + nelt_v8 < length && i + nelt_v16 > length)
28947 {
28948 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28949 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28950 /* We are shifting bytes back, set the alignment accordingly. */
28951 if ((length & 0x3) == 0)
28952 set_mem_align (mem, BITS_PER_UNIT * 4);
28953 else if ((length & 0x1) == 0)
28954 set_mem_align (mem, BITS_PER_UNIT * 2);
28955 else
28956 set_mem_align (mem, BITS_PER_UNIT);
28957
28958 emit_insn (gen_movmisalignv16qi (mem, reg));
28959 return true;
28960 }
28961 /* Fall through for bytes leftover. */
28962 mode = V8QImode;
28963 nelt_mode = GET_MODE_NUNITS (mode);
28964 reg = gen_lowpart (V8QImode, reg);
28965 }
28966
28967 /* Handle 8 bytes in a vector. */
28968 for (; (i + nelt_mode <= length); i += nelt_mode)
28969 {
28970 addr = plus_constant (Pmode, dst, i);
28971 mem = adjust_automodify_address (dstbase, mode, addr, i);
28972 emit_move_insn (mem, reg);
28973 }
28974
28975 /* Handle single word leftover by shifting 4 bytes back. We can
28976 use aligned access for this case. */
28977 if (i + UNITS_PER_WORD == length)
28978 {
28979 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
28980 mem = adjust_automodify_address (dstbase, mode,
28981 addr, i - UNITS_PER_WORD);
28982 /* We are shifting 4 bytes back, set the alignment accordingly. */
28983 if (align > UNITS_PER_WORD)
28984 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
28985
28986 emit_move_insn (mem, reg);
28987 }
28988 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28989 We have to use unaligned access for this case. */
28990 else if (i < length)
28991 {
28992 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28993 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28994 /* We are shifting bytes back, set the alignment accordingly. */
28995 if ((length & 1) == 0)
28996 set_mem_align (mem, BITS_PER_UNIT * 2);
28997 else
28998 set_mem_align (mem, BITS_PER_UNIT);
28999
29000 emit_insn (gen_movmisalignv8qi (mem, reg));
29001 }
29002
29003 return true;
29004 }
29005
29006 /* Set a block of memory using plain strh/strb instructions, only
29007 using instructions allowed by ALIGN on processor. We fill the
29008 first LENGTH bytes of the memory area starting from DSTBASE
29009 with byte constant VALUE. ALIGN is the alignment requirement
29010 of memory. */
29011 static bool
29012 arm_block_set_unaligned_non_vect (rtx dstbase,
29013 unsigned HOST_WIDE_INT length,
29014 unsigned HOST_WIDE_INT value,
29015 unsigned HOST_WIDE_INT align)
29016 {
29017 unsigned int i;
29018 rtx dst, addr, mem;
29019 rtx val_exp, val_reg, reg;
29020 machine_mode mode;
29021 HOST_WIDE_INT v = value;
29022
29023 gcc_assert (align == 1 || align == 2);
29024
29025 if (align == 2)
29026 v |= (value << BITS_PER_UNIT);
29027
29028 v = sext_hwi (v, BITS_PER_WORD);
29029 val_exp = GEN_INT (v);
29030 /* Skip if it isn't profitable. */
29031 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29032 align, true, false))
29033 return false;
29034
29035 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29036 mode = (align == 2 ? HImode : QImode);
29037 val_reg = force_reg (SImode, val_exp);
29038 reg = gen_lowpart (mode, val_reg);
29039
29040 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29041 {
29042 addr = plus_constant (Pmode, dst, i);
29043 mem = adjust_automodify_address (dstbase, mode, addr, i);
29044 emit_move_insn (mem, reg);
29045 }
29046
29047 /* Handle single byte leftover. */
29048 if (i + 1 == length)
29049 {
29050 reg = gen_lowpart (QImode, val_reg);
29051 addr = plus_constant (Pmode, dst, i);
29052 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29053 emit_move_insn (mem, reg);
29054 i++;
29055 }
29056
29057 gcc_assert (i == length);
29058 return true;
29059 }
29060
29061 /* Set a block of memory using plain strd/str/strh/strb instructions,
29062 to permit unaligned copies on processors which support unaligned
29063 semantics for those instructions. We fill the first LENGTH bytes
29064 of the memory area starting from DSTBASE with byte constant VALUE.
29065 ALIGN is the alignment requirement of memory. */
29066 static bool
29067 arm_block_set_aligned_non_vect (rtx dstbase,
29068 unsigned HOST_WIDE_INT length,
29069 unsigned HOST_WIDE_INT value,
29070 unsigned HOST_WIDE_INT align)
29071 {
29072 unsigned int i;
29073 rtx dst, addr, mem;
29074 rtx val_exp, val_reg, reg;
29075 unsigned HOST_WIDE_INT v;
29076 bool use_strd_p;
29077
29078 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29079 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29080
29081 v = (value | (value << 8) | (value << 16) | (value << 24));
29082 if (length < UNITS_PER_WORD)
29083 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29084
29085 if (use_strd_p)
29086 v |= (v << BITS_PER_WORD);
29087 else
29088 v = sext_hwi (v, BITS_PER_WORD);
29089
29090 val_exp = GEN_INT (v);
29091 /* Skip if it isn't profitable. */
29092 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29093 align, false, use_strd_p))
29094 {
29095 if (!use_strd_p)
29096 return false;
29097
29098 /* Try without strd. */
29099 v = (v >> BITS_PER_WORD);
29100 v = sext_hwi (v, BITS_PER_WORD);
29101 val_exp = GEN_INT (v);
29102 use_strd_p = false;
29103 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29104 align, false, use_strd_p))
29105 return false;
29106 }
29107
29108 i = 0;
29109 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29110 /* Handle double words using strd if possible. */
29111 if (use_strd_p)
29112 {
29113 val_reg = force_reg (DImode, val_exp);
29114 reg = val_reg;
29115 for (; (i + 8 <= length); i += 8)
29116 {
29117 addr = plus_constant (Pmode, dst, i);
29118 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29119 emit_move_insn (mem, reg);
29120 }
29121 }
29122 else
29123 val_reg = force_reg (SImode, val_exp);
29124
29125 /* Handle words. */
29126 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29127 for (; (i + 4 <= length); i += 4)
29128 {
29129 addr = plus_constant (Pmode, dst, i);
29130 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29131 if ((align & 3) == 0)
29132 emit_move_insn (mem, reg);
29133 else
29134 emit_insn (gen_unaligned_storesi (mem, reg));
29135 }
29136
29137 /* Merge last pair of STRH and STRB into a STR if possible. */
29138 if (unaligned_access && i > 0 && (i + 3) == length)
29139 {
29140 addr = plus_constant (Pmode, dst, i - 1);
29141 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29142 /* We are shifting one byte back, set the alignment accordingly. */
29143 if ((align & 1) == 0)
29144 set_mem_align (mem, BITS_PER_UNIT);
29145
29146 /* Most likely this is an unaligned access, and we can't tell at
29147 compilation time. */
29148 emit_insn (gen_unaligned_storesi (mem, reg));
29149 return true;
29150 }
29151
29152 /* Handle half word leftover. */
29153 if (i + 2 <= length)
29154 {
29155 reg = gen_lowpart (HImode, val_reg);
29156 addr = plus_constant (Pmode, dst, i);
29157 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29158 if ((align & 1) == 0)
29159 emit_move_insn (mem, reg);
29160 else
29161 emit_insn (gen_unaligned_storehi (mem, reg));
29162
29163 i += 2;
29164 }
29165
29166 /* Handle single byte leftover. */
29167 if (i + 1 == length)
29168 {
29169 reg = gen_lowpart (QImode, val_reg);
29170 addr = plus_constant (Pmode, dst, i);
29171 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29172 emit_move_insn (mem, reg);
29173 }
29174
29175 return true;
29176 }
29177
29178 /* Set a block of memory using vectorization instructions for both
29179 aligned and unaligned cases. We fill the first LENGTH bytes of
29180 the memory area starting from DSTBASE with byte constant VALUE.
29181 ALIGN is the alignment requirement of memory. */
29182 static bool
29183 arm_block_set_vect (rtx dstbase,
29184 unsigned HOST_WIDE_INT length,
29185 unsigned HOST_WIDE_INT value,
29186 unsigned HOST_WIDE_INT align)
29187 {
29188 /* Check whether we need to use unaligned store instruction. */
29189 if (((align & 3) != 0 || (length & 3) != 0)
29190 /* Check whether unaligned store instruction is available. */
29191 && (!unaligned_access || BYTES_BIG_ENDIAN))
29192 return false;
29193
29194 if ((align & 3) == 0)
29195 return arm_block_set_aligned_vect (dstbase, length, value, align);
29196 else
29197 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29198 }
29199
29200 /* Expand string store operation. Firstly we try to do that by using
29201 vectorization instructions, then try with ARM unaligned access and
29202 double-word store if profitable. OPERANDS[0] is the destination,
29203 OPERANDS[1] is the number of bytes, operands[2] is the value to
29204 initialize the memory, OPERANDS[3] is the known alignment of the
29205 destination. */
29206 bool
29207 arm_gen_setmem (rtx *operands)
29208 {
29209 rtx dstbase = operands[0];
29210 unsigned HOST_WIDE_INT length;
29211 unsigned HOST_WIDE_INT value;
29212 unsigned HOST_WIDE_INT align;
29213
29214 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29215 return false;
29216
29217 length = UINTVAL (operands[1]);
29218 if (length > 64)
29219 return false;
29220
29221 value = (UINTVAL (operands[2]) & 0xFF);
29222 align = UINTVAL (operands[3]);
29223 if (TARGET_NEON && length >= 8
29224 && current_tune->string_ops_prefer_neon
29225 && arm_block_set_vect (dstbase, length, value, align))
29226 return true;
29227
29228 if (!unaligned_access && (align & 3) != 0)
29229 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29230
29231 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29232 }
29233
29234
29235 static bool
29236 arm_macro_fusion_p (void)
29237 {
29238 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
29239 }
29240
29241
29242 static bool
29243 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
29244 {
29245 rtx set_dest;
29246 rtx prev_set = single_set (prev);
29247 rtx curr_set = single_set (curr);
29248
29249 if (!prev_set
29250 || !curr_set)
29251 return false;
29252
29253 if (any_condjump_p (curr))
29254 return false;
29255
29256 if (!arm_macro_fusion_p ())
29257 return false;
29258
29259 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT)
29260 {
29261 /* We are trying to fuse
29262 movw imm / movt imm
29263 instructions as a group that gets scheduled together. */
29264
29265 set_dest = SET_DEST (curr_set);
29266
29267 if (GET_MODE (set_dest) != SImode)
29268 return false;
29269
29270 /* We are trying to match:
29271 prev (movw) == (set (reg r0) (const_int imm16))
29272 curr (movt) == (set (zero_extract (reg r0)
29273 (const_int 16)
29274 (const_int 16))
29275 (const_int imm16_1))
29276 or
29277 prev (movw) == (set (reg r1)
29278 (high (symbol_ref ("SYM"))))
29279 curr (movt) == (set (reg r0)
29280 (lo_sum (reg r1)
29281 (symbol_ref ("SYM")))) */
29282 if (GET_CODE (set_dest) == ZERO_EXTRACT)
29283 {
29284 if (CONST_INT_P (SET_SRC (curr_set))
29285 && CONST_INT_P (SET_SRC (prev_set))
29286 && REG_P (XEXP (set_dest, 0))
29287 && REG_P (SET_DEST (prev_set))
29288 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
29289 return true;
29290 }
29291 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
29292 && REG_P (SET_DEST (curr_set))
29293 && REG_P (SET_DEST (prev_set))
29294 && GET_CODE (SET_SRC (prev_set)) == HIGH
29295 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
29296 return true;
29297 }
29298 return false;
29299 }
29300
29301 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29302
29303 static unsigned HOST_WIDE_INT
29304 arm_asan_shadow_offset (void)
29305 {
29306 return (unsigned HOST_WIDE_INT) 1 << 29;
29307 }
29308
29309
29310 /* This is a temporary fix for PR60655. Ideally we need
29311 to handle most of these cases in the generic part but
29312 currently we reject minus (..) (sym_ref). We try to
29313 ameliorate the case with minus (sym_ref1) (sym_ref2)
29314 where they are in the same section. */
29315
29316 static bool
29317 arm_const_not_ok_for_debug_p (rtx p)
29318 {
29319 tree decl_op0 = NULL;
29320 tree decl_op1 = NULL;
29321
29322 if (GET_CODE (p) == MINUS)
29323 {
29324 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29325 {
29326 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29327 if (decl_op1
29328 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29329 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29330 {
29331 if ((TREE_CODE (decl_op1) == VAR_DECL
29332 || TREE_CODE (decl_op1) == CONST_DECL)
29333 && (TREE_CODE (decl_op0) == VAR_DECL
29334 || TREE_CODE (decl_op0) == CONST_DECL))
29335 return (get_variable_section (decl_op1, false)
29336 != get_variable_section (decl_op0, false));
29337
29338 if (TREE_CODE (decl_op1) == LABEL_DECL
29339 && TREE_CODE (decl_op0) == LABEL_DECL)
29340 return (DECL_CONTEXT (decl_op1)
29341 != DECL_CONTEXT (decl_op0));
29342 }
29343
29344 return true;
29345 }
29346 }
29347
29348 return false;
29349 }
29350
29351 /* return TRUE if x is a reference to a value in a constant pool */
29352 extern bool
29353 arm_is_constant_pool_ref (rtx x)
29354 {
29355 return (MEM_P (x)
29356 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29357 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29358 }
29359
29360 /* Remember the last target of arm_set_current_function. */
29361 static GTY(()) tree arm_previous_fndecl;
29362
29363 /* Invalidate arm_previous_fndecl. */
29364 void
29365 arm_reset_previous_fndecl (void)
29366 {
29367 arm_previous_fndecl = NULL_TREE;
29368 }
29369
29370 /* Establish appropriate back-end context for processing the function
29371 FNDECL. The argument might be NULL to indicate processing at top
29372 level, outside of any function scope. */
29373 static void
29374 arm_set_current_function (tree fndecl)
29375 {
29376 if (!fndecl || fndecl == arm_previous_fndecl)
29377 return;
29378
29379 tree old_tree = (arm_previous_fndecl
29380 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
29381 : NULL_TREE);
29382
29383 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
29384
29385 arm_previous_fndecl = fndecl;
29386 if (old_tree == new_tree)
29387 return;
29388
29389 if (new_tree && new_tree != target_option_default_node)
29390 {
29391 cl_target_option_restore (&global_options,
29392 TREE_TARGET_OPTION (new_tree));
29393
29394 if (TREE_TARGET_GLOBALS (new_tree))
29395 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
29396 else
29397 TREE_TARGET_GLOBALS (new_tree)
29398 = save_target_globals_default_opts ();
29399 }
29400
29401 else if (old_tree && old_tree != target_option_default_node)
29402 {
29403 new_tree = target_option_current_node;
29404
29405 cl_target_option_restore (&global_options,
29406 TREE_TARGET_OPTION (new_tree));
29407 if (TREE_TARGET_GLOBALS (new_tree))
29408 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
29409 else if (new_tree == target_option_default_node)
29410 restore_target_globals (&default_target_globals);
29411 else
29412 TREE_TARGET_GLOBALS (new_tree)
29413 = save_target_globals_default_opts ();
29414 }
29415
29416 arm_option_params_internal (&global_options);
29417 }
29418
29419 /* Hook to determine if one function can safely inline another. */
29420
29421 static bool
29422 arm_can_inline_p (tree caller ATTRIBUTE_UNUSED, tree callee ATTRIBUTE_UNUSED)
29423 {
29424 /* Overidde default hook: Always OK to inline between different modes.
29425 Function with mode specific instructions, e.g using asm, must be explicitely
29426 protected with noinline. */
29427 return true;
29428 }
29429
29430 /* Inner function to process the attribute((target(...))), take an argument and
29431 set the current options from the argument. If we have a list, recursively
29432 go over the list. */
29433
29434 static bool
29435 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
29436 {
29437 if (TREE_CODE (args) == TREE_LIST)
29438 {
29439 bool ret = true;
29440 for (; args; args = TREE_CHAIN (args))
29441 if (TREE_VALUE (args)
29442 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
29443 ret = false;
29444 return ret;
29445 }
29446
29447 else if (TREE_CODE (args) != STRING_CST)
29448 {
29449 error ("attribute %<target%> argument not a string");
29450 return false;
29451 }
29452
29453 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
29454 while (argstr && *argstr != '\0')
29455 {
29456 while (ISSPACE (*argstr))
29457 argstr++;
29458
29459 if (!strcmp (argstr, "thumb"))
29460 {
29461 opts->x_target_flags |= MASK_THUMB;
29462 arm_option_check_internal (opts);
29463 return true;
29464 }
29465
29466 if (!strcmp (argstr, "arm"))
29467 {
29468 opts->x_target_flags &= ~MASK_THUMB;
29469 arm_option_check_internal (opts);
29470 return true;
29471 }
29472
29473 warning (0, "attribute(target(\"%s\")) is unknown", argstr);
29474 return false;
29475 }
29476
29477 return false;
29478 }
29479
29480 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
29481
29482 tree
29483 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
29484 struct gcc_options *opts_set)
29485 {
29486 if (!arm_valid_target_attribute_rec (args, opts))
29487 return NULL_TREE;
29488
29489 /* Do any overrides, such as global options arch=xxx. */
29490 arm_option_override_internal (opts, opts_set);
29491
29492 return build_target_option_node (opts);
29493 }
29494
29495 static void
29496 add_attribute (const char * mode, tree *attributes)
29497 {
29498 size_t len = strlen (mode);
29499 tree value = build_string (len, mode);
29500
29501 TREE_TYPE (value) = build_array_type (char_type_node,
29502 build_index_type (size_int (len)));
29503
29504 *attributes = tree_cons (get_identifier ("target"),
29505 build_tree_list (NULL_TREE, value),
29506 *attributes);
29507 }
29508
29509 /* For testing. Insert thumb or arm modes alternatively on functions. */
29510
29511 static void
29512 arm_insert_attributes (tree fndecl, tree * attributes)
29513 {
29514 const char *mode;
29515
29516 if (! TARGET_FLIP_THUMB)
29517 return;
29518
29519 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
29520 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
29521 return;
29522
29523 /* Nested definitions must inherit mode. */
29524 if (current_function_decl)
29525 {
29526 mode = TARGET_THUMB ? "thumb" : "arm";
29527 add_attribute (mode, attributes);
29528 return;
29529 }
29530
29531 /* If there is already a setting don't change it. */
29532 if (lookup_attribute ("target", *attributes) != NULL)
29533 return;
29534
29535 mode = thumb_flipper ? "thumb" : "arm";
29536 add_attribute (mode, attributes);
29537
29538 thumb_flipper = !thumb_flipper;
29539 }
29540
29541 /* Hook to validate attribute((target("string"))). */
29542
29543 static bool
29544 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
29545 tree args, int ARG_UNUSED (flags))
29546 {
29547 bool ret = true;
29548 struct gcc_options func_options;
29549 tree cur_tree, new_optimize;
29550 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
29551
29552 /* Get the optimization options of the current function. */
29553 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
29554
29555 /* If the function changed the optimization levels as well as setting target
29556 options, start with the optimizations specified. */
29557 if (!func_optimize)
29558 func_optimize = optimization_default_node;
29559
29560 /* Init func_options. */
29561 memset (&func_options, 0, sizeof (func_options));
29562 init_options_struct (&func_options, NULL);
29563 lang_hooks.init_options_struct (&func_options);
29564
29565 /* Initialize func_options to the defaults. */
29566 cl_optimization_restore (&func_options,
29567 TREE_OPTIMIZATION (func_optimize));
29568
29569 cl_target_option_restore (&func_options,
29570 TREE_TARGET_OPTION (target_option_default_node));
29571
29572 /* Set func_options flags with new target mode. */
29573 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
29574 &global_options_set);
29575
29576 if (cur_tree == NULL_TREE)
29577 ret = false;
29578
29579 new_optimize = build_optimization_node (&func_options);
29580
29581 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
29582
29583 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
29584
29585 return ret;
29586 }
29587
29588 void
29589 arm_declare_function_name (FILE *stream, const char *name, tree decl)
29590 {
29591 if (TARGET_UNIFIED_ASM)
29592 fprintf (stream, "\t.syntax unified\n");
29593 else
29594 fprintf (stream, "\t.syntax divided\n");
29595
29596 if (TARGET_THUMB)
29597 {
29598 if (is_called_in_ARM_mode (decl)
29599 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
29600 && cfun->is_thunk))
29601 fprintf (stream, "\t.code 32\n");
29602 else if (TARGET_THUMB1)
29603 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
29604 else
29605 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
29606 }
29607 else
29608 fprintf (stream, "\t.arm\n");
29609
29610 if (TARGET_POKE_FUNCTION_NAME)
29611 arm_poke_function_name (stream, (const char *) name);
29612 }
29613
29614 /* If MEM is in the form of [base+offset], extract the two parts
29615 of address and set to BASE and OFFSET, otherwise return false
29616 after clearing BASE and OFFSET. */
29617
29618 static bool
29619 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29620 {
29621 rtx addr;
29622
29623 gcc_assert (MEM_P (mem));
29624
29625 addr = XEXP (mem, 0);
29626
29627 /* Strip off const from addresses like (const (addr)). */
29628 if (GET_CODE (addr) == CONST)
29629 addr = XEXP (addr, 0);
29630
29631 if (GET_CODE (addr) == REG)
29632 {
29633 *base = addr;
29634 *offset = const0_rtx;
29635 return true;
29636 }
29637
29638 if (GET_CODE (addr) == PLUS
29639 && GET_CODE (XEXP (addr, 0)) == REG
29640 && CONST_INT_P (XEXP (addr, 1)))
29641 {
29642 *base = XEXP (addr, 0);
29643 *offset = XEXP (addr, 1);
29644 return true;
29645 }
29646
29647 *base = NULL_RTX;
29648 *offset = NULL_RTX;
29649
29650 return false;
29651 }
29652
29653 /* If INSN is a load or store of address in the form of [base+offset],
29654 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29655 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29656 otherwise return FALSE. */
29657
29658 static bool
29659 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29660 {
29661 rtx x, dest, src;
29662
29663 gcc_assert (INSN_P (insn));
29664 x = PATTERN (insn);
29665 if (GET_CODE (x) != SET)
29666 return false;
29667
29668 src = SET_SRC (x);
29669 dest = SET_DEST (x);
29670 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29671 {
29672 *is_load = false;
29673 extract_base_offset_in_addr (dest, base, offset);
29674 }
29675 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29676 {
29677 *is_load = true;
29678 extract_base_offset_in_addr (src, base, offset);
29679 }
29680 else
29681 return false;
29682
29683 return (*base != NULL_RTX && *offset != NULL_RTX);
29684 }
29685
29686 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29687
29688 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29689 and PRI are only calculated for these instructions. For other instruction,
29690 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29691 instruction fusion can be supported by returning different priorities.
29692
29693 It's important that irrelevant instructions get the largest FUSION_PRI. */
29694
29695 static void
29696 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29697 int *fusion_pri, int *pri)
29698 {
29699 int tmp, off_val;
29700 bool is_load;
29701 rtx base, offset;
29702
29703 gcc_assert (INSN_P (insn));
29704
29705 tmp = max_pri - 1;
29706 if (!fusion_load_store (insn, &base, &offset, &is_load))
29707 {
29708 *pri = tmp;
29709 *fusion_pri = tmp;
29710 return;
29711 }
29712
29713 /* Load goes first. */
29714 if (is_load)
29715 *fusion_pri = tmp - 1;
29716 else
29717 *fusion_pri = tmp - 2;
29718
29719 tmp /= 2;
29720
29721 /* INSN with smaller base register goes first. */
29722 tmp -= ((REGNO (base) & 0xff) << 20);
29723
29724 /* INSN with smaller offset goes first. */
29725 off_val = (int)(INTVAL (offset));
29726 if (off_val >= 0)
29727 tmp -= (off_val & 0xfffff);
29728 else
29729 tmp += ((- off_val) & 0xfffff);
29730
29731 *pri = tmp;
29732 return;
29733 }
29734 #include "gt-arm.h"