]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
2015-06-17 Andrew MacLeod <amacleod@redhat.com>
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "alias.h"
29 #include "symtab.h"
30 #include "tree.h"
31 #include "fold-const.h"
32 #include "stringpool.h"
33 #include "stor-layout.h"
34 #include "calls.h"
35 #include "varasm.h"
36 #include "obstack.h"
37 #include "regs.h"
38 #include "hard-reg-set.h"
39 #include "insn-config.h"
40 #include "conditions.h"
41 #include "output.h"
42 #include "insn-attr.h"
43 #include "flags.h"
44 #include "reload.h"
45 #include "function.h"
46 #include "expmed.h"
47 #include "dojump.h"
48 #include "explow.h"
49 #include "emit-rtl.h"
50 #include "stmt.h"
51 #include "expr.h"
52 #include "insn-codes.h"
53 #include "optabs.h"
54 #include "diagnostic-core.h"
55 #include "recog.h"
56 #include "predict.h"
57 #include "dominance.h"
58 #include "cfg.h"
59 #include "cfgrtl.h"
60 #include "cfganal.h"
61 #include "lcm.h"
62 #include "cfgbuild.h"
63 #include "cfgcleanup.h"
64 #include "basic-block.h"
65 #include "plugin-api.h"
66 #include "ipa-ref.h"
67 #include "cgraph.h"
68 #include "except.h"
69 #include "tm_p.h"
70 #include "target.h"
71 #include "sched-int.h"
72 #include "target-def.h"
73 #include "debug.h"
74 #include "langhooks.h"
75 #include "df.h"
76 #include "intl.h"
77 #include "libfuncs.h"
78 #include "params.h"
79 #include "opts.h"
80 #include "dumpfile.h"
81 #include "gimple-expr.h"
82 #include "target-globals.h"
83 #include "builtins.h"
84 #include "tm-constrs.h"
85 #include "rtl-iter.h"
86 #include "sched-int.h"
87
88 /* Forward definitions of types. */
89 typedef struct minipool_node Mnode;
90 typedef struct minipool_fixup Mfix;
91
92 void (*arm_lang_output_object_attributes_hook)(void);
93
94 struct four_ints
95 {
96 int i[4];
97 };
98
99 /* Forward function declarations. */
100 static bool arm_const_not_ok_for_debug_p (rtx);
101 static bool arm_needs_doubleword_align (machine_mode, const_tree);
102 static int arm_compute_static_chain_stack_bytes (void);
103 static arm_stack_offsets *arm_get_frame_offsets (void);
104 static void arm_add_gc_roots (void);
105 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
106 HOST_WIDE_INT, rtx, rtx, int, int);
107 static unsigned bit_count (unsigned long);
108 static int arm_address_register_rtx_p (rtx, int);
109 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
110 static bool is_called_in_ARM_mode (tree);
111 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
112 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
113 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
114 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
115 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
116 inline static int thumb1_index_register_rtx_p (rtx, int);
117 static int thumb_far_jump_used_p (void);
118 static bool thumb_force_lr_save (void);
119 static unsigned arm_size_return_regs (void);
120 static bool arm_assemble_integer (rtx, unsigned int, int);
121 static void arm_print_operand (FILE *, rtx, int);
122 static void arm_print_operand_address (FILE *, rtx);
123 static bool arm_print_operand_punct_valid_p (unsigned char code);
124 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
125 static arm_cc get_arm_condition_code (rtx);
126 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
127 static const char *output_multi_immediate (rtx *, const char *, const char *,
128 int, HOST_WIDE_INT);
129 static const char *shift_op (rtx, HOST_WIDE_INT *);
130 static struct machine_function *arm_init_machine_status (void);
131 static void thumb_exit (FILE *, int);
132 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
133 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
134 static Mnode *add_minipool_forward_ref (Mfix *);
135 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
136 static Mnode *add_minipool_backward_ref (Mfix *);
137 static void assign_minipool_offsets (Mfix *);
138 static void arm_print_value (FILE *, rtx);
139 static void dump_minipool (rtx_insn *);
140 static int arm_barrier_cost (rtx_insn *);
141 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
142 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
143 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
144 machine_mode, rtx);
145 static void arm_reorg (void);
146 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
147 static unsigned long arm_compute_save_reg0_reg12_mask (void);
148 static unsigned long arm_compute_save_reg_mask (void);
149 static unsigned long arm_isr_value (tree);
150 static unsigned long arm_compute_func_type (void);
151 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
152 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
153 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
154 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
155 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
156 #endif
157 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
158 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
159 static int arm_comp_type_attributes (const_tree, const_tree);
160 static void arm_set_default_type_attributes (tree);
161 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
162 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
163 static int optimal_immediate_sequence (enum rtx_code code,
164 unsigned HOST_WIDE_INT val,
165 struct four_ints *return_sequence);
166 static int optimal_immediate_sequence_1 (enum rtx_code code,
167 unsigned HOST_WIDE_INT val,
168 struct four_ints *return_sequence,
169 int i);
170 static int arm_get_strip_length (int);
171 static bool arm_function_ok_for_sibcall (tree, tree);
172 static machine_mode arm_promote_function_mode (const_tree,
173 machine_mode, int *,
174 const_tree, int);
175 static bool arm_return_in_memory (const_tree, const_tree);
176 static rtx arm_function_value (const_tree, const_tree, bool);
177 static rtx arm_libcall_value_1 (machine_mode);
178 static rtx arm_libcall_value (machine_mode, const_rtx);
179 static bool arm_function_value_regno_p (const unsigned int);
180 static void arm_internal_label (FILE *, const char *, unsigned long);
181 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
182 tree);
183 static bool arm_have_conditional_execution (void);
184 static bool arm_cannot_force_const_mem (machine_mode, rtx);
185 static bool arm_legitimate_constant_p (machine_mode, rtx);
186 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
187 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
188 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
189 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
190 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
191 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
192 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
193 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
194 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
195 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
196 static void emit_constant_insn (rtx cond, rtx pattern);
197 static rtx_insn *emit_set_insn (rtx, rtx);
198 static rtx emit_multi_reg_push (unsigned long, unsigned long);
199 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
200 tree, bool);
201 static rtx arm_function_arg (cumulative_args_t, machine_mode,
202 const_tree, bool);
203 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
204 const_tree, bool);
205 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
206 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
207 const_tree);
208 static rtx aapcs_libcall_value (machine_mode);
209 static int aapcs_select_return_coproc (const_tree, const_tree);
210
211 #ifdef OBJECT_FORMAT_ELF
212 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
213 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
214 #endif
215 #ifndef ARM_PE
216 static void arm_encode_section_info (tree, rtx, int);
217 #endif
218
219 static void arm_file_end (void);
220 static void arm_file_start (void);
221
222 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
223 tree, int *, int);
224 static bool arm_pass_by_reference (cumulative_args_t,
225 machine_mode, const_tree, bool);
226 static bool arm_promote_prototypes (const_tree);
227 static bool arm_default_short_enums (void);
228 static bool arm_align_anon_bitfield (void);
229 static bool arm_return_in_msb (const_tree);
230 static bool arm_must_pass_in_stack (machine_mode, const_tree);
231 static bool arm_return_in_memory (const_tree, const_tree);
232 #if ARM_UNWIND_INFO
233 static void arm_unwind_emit (FILE *, rtx_insn *);
234 static bool arm_output_ttype (rtx);
235 static void arm_asm_emit_except_personality (rtx);
236 static void arm_asm_init_sections (void);
237 #endif
238 static rtx arm_dwarf_register_span (rtx);
239
240 static tree arm_cxx_guard_type (void);
241 static bool arm_cxx_guard_mask_bit (void);
242 static tree arm_get_cookie_size (tree);
243 static bool arm_cookie_has_size (void);
244 static bool arm_cxx_cdtor_returns_this (void);
245 static bool arm_cxx_key_method_may_be_inline (void);
246 static void arm_cxx_determine_class_data_visibility (tree);
247 static bool arm_cxx_class_data_always_comdat (void);
248 static bool arm_cxx_use_aeabi_atexit (void);
249 static void arm_init_libfuncs (void);
250 static tree arm_build_builtin_va_list (void);
251 static void arm_expand_builtin_va_start (tree, rtx);
252 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
253 static void arm_option_override (void);
254 static void arm_set_current_function (tree);
255 static bool arm_can_inline_p (tree, tree);
256 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
257 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
258 static bool arm_macro_fusion_p (void);
259 static bool arm_cannot_copy_insn_p (rtx_insn *);
260 static int arm_issue_rate (void);
261 static int arm_first_cycle_multipass_dfa_lookahead (void);
262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
263 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
264 static bool arm_output_addr_const_extra (FILE *, rtx);
265 static bool arm_allocate_stack_slots_for_args (void);
266 static bool arm_warn_func_return (tree);
267 static const char *arm_invalid_parameter_type (const_tree t);
268 static const char *arm_invalid_return_type (const_tree t);
269 static tree arm_promoted_type (const_tree t);
270 static tree arm_convert_to_type (tree type, tree expr);
271 static bool arm_scalar_mode_supported_p (machine_mode);
272 static bool arm_frame_pointer_required (void);
273 static bool arm_can_eliminate (const int, const int);
274 static void arm_asm_trampoline_template (FILE *);
275 static void arm_trampoline_init (rtx, tree, rtx);
276 static rtx arm_trampoline_adjust_address (rtx);
277 static rtx arm_pic_static_addr (rtx orig, rtx reg);
278 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
279 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
280 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
281 static bool arm_array_mode_supported_p (machine_mode,
282 unsigned HOST_WIDE_INT);
283 static machine_mode arm_preferred_simd_mode (machine_mode);
284 static bool arm_class_likely_spilled_p (reg_class_t);
285 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
286 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
287 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
288 const_tree type,
289 int misalignment,
290 bool is_packed);
291 static void arm_conditional_register_usage (void);
292 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
293 static unsigned int arm_autovectorize_vector_sizes (void);
294 static int arm_default_branch_cost (bool, bool);
295 static int arm_cortex_a5_branch_cost (bool, bool);
296 static int arm_cortex_m_branch_cost (bool, bool);
297 static int arm_cortex_m7_branch_cost (bool, bool);
298
299 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
300 const unsigned char *sel);
301
302 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
303
304 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
305 tree vectype,
306 int misalign ATTRIBUTE_UNUSED);
307 static unsigned arm_add_stmt_cost (void *data, int count,
308 enum vect_cost_for_stmt kind,
309 struct _stmt_vec_info *stmt_info,
310 int misalign,
311 enum vect_cost_model_location where);
312
313 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
314 bool op0_preserve_value);
315 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
316
317 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
318 \f
319 /* Table of machine attributes. */
320 static const struct attribute_spec arm_attribute_table[] =
321 {
322 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
323 affects_type_identity } */
324 /* Function calls made to this symbol must be done indirectly, because
325 it may lie outside of the 26 bit addressing range of a normal function
326 call. */
327 { "long_call", 0, 0, false, true, true, NULL, false },
328 /* Whereas these functions are always known to reside within the 26 bit
329 addressing range. */
330 { "short_call", 0, 0, false, true, true, NULL, false },
331 /* Specify the procedure call conventions for a function. */
332 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
333 false },
334 /* Interrupt Service Routines have special prologue and epilogue requirements. */
335 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
336 false },
337 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
338 false },
339 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
340 false },
341 #ifdef ARM_PE
342 /* ARM/PE has three new attributes:
343 interfacearm - ?
344 dllexport - for exporting a function/variable that will live in a dll
345 dllimport - for importing a function/variable from a dll
346
347 Microsoft allows multiple declspecs in one __declspec, separating
348 them with spaces. We do NOT support this. Instead, use __declspec
349 multiple times.
350 */
351 { "dllimport", 0, 0, true, false, false, NULL, false },
352 { "dllexport", 0, 0, true, false, false, NULL, false },
353 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
354 false },
355 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
356 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
357 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
358 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
359 false },
360 #endif
361 { NULL, 0, 0, false, false, false, NULL, false }
362 };
363 \f
364 /* Initialize the GCC target structure. */
365 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
366 #undef TARGET_MERGE_DECL_ATTRIBUTES
367 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
368 #endif
369
370 #undef TARGET_LEGITIMIZE_ADDRESS
371 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
372
373 #undef TARGET_LRA_P
374 #define TARGET_LRA_P hook_bool_void_true
375
376 #undef TARGET_ATTRIBUTE_TABLE
377 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
378
379 #undef TARGET_ASM_FILE_START
380 #define TARGET_ASM_FILE_START arm_file_start
381 #undef TARGET_ASM_FILE_END
382 #define TARGET_ASM_FILE_END arm_file_end
383
384 #undef TARGET_ASM_ALIGNED_SI_OP
385 #define TARGET_ASM_ALIGNED_SI_OP NULL
386 #undef TARGET_ASM_INTEGER
387 #define TARGET_ASM_INTEGER arm_assemble_integer
388
389 #undef TARGET_PRINT_OPERAND
390 #define TARGET_PRINT_OPERAND arm_print_operand
391 #undef TARGET_PRINT_OPERAND_ADDRESS
392 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
393 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
394 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
395
396 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
397 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
398
399 #undef TARGET_ASM_FUNCTION_PROLOGUE
400 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
401
402 #undef TARGET_ASM_FUNCTION_EPILOGUE
403 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
404
405 #undef TARGET_CAN_INLINE_P
406 #define TARGET_CAN_INLINE_P arm_can_inline_p
407
408 #undef TARGET_OPTION_OVERRIDE
409 #define TARGET_OPTION_OVERRIDE arm_option_override
410
411 #undef TARGET_COMP_TYPE_ATTRIBUTES
412 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
413
414 #undef TARGET_SCHED_MACRO_FUSION_P
415 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
416
417 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
418 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
419
420 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
421 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
422
423 #undef TARGET_SCHED_ADJUST_COST
424 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
425
426 #undef TARGET_SET_CURRENT_FUNCTION
427 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
428
429 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
430 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
431
432 #undef TARGET_SCHED_REORDER
433 #define TARGET_SCHED_REORDER arm_sched_reorder
434
435 #undef TARGET_REGISTER_MOVE_COST
436 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
437
438 #undef TARGET_MEMORY_MOVE_COST
439 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
440
441 #undef TARGET_ENCODE_SECTION_INFO
442 #ifdef ARM_PE
443 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
444 #else
445 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
446 #endif
447
448 #undef TARGET_STRIP_NAME_ENCODING
449 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
450
451 #undef TARGET_ASM_INTERNAL_LABEL
452 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
453
454 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
455 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
456
457 #undef TARGET_FUNCTION_VALUE
458 #define TARGET_FUNCTION_VALUE arm_function_value
459
460 #undef TARGET_LIBCALL_VALUE
461 #define TARGET_LIBCALL_VALUE arm_libcall_value
462
463 #undef TARGET_FUNCTION_VALUE_REGNO_P
464 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
465
466 #undef TARGET_ASM_OUTPUT_MI_THUNK
467 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
468 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
469 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
470
471 #undef TARGET_RTX_COSTS
472 #define TARGET_RTX_COSTS arm_rtx_costs
473 #undef TARGET_ADDRESS_COST
474 #define TARGET_ADDRESS_COST arm_address_cost
475
476 #undef TARGET_SHIFT_TRUNCATION_MASK
477 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
478 #undef TARGET_VECTOR_MODE_SUPPORTED_P
479 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
480 #undef TARGET_ARRAY_MODE_SUPPORTED_P
481 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
482 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
483 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
484 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
485 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
486 arm_autovectorize_vector_sizes
487
488 #undef TARGET_MACHINE_DEPENDENT_REORG
489 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
490
491 #undef TARGET_INIT_BUILTINS
492 #define TARGET_INIT_BUILTINS arm_init_builtins
493 #undef TARGET_EXPAND_BUILTIN
494 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
495 #undef TARGET_BUILTIN_DECL
496 #define TARGET_BUILTIN_DECL arm_builtin_decl
497
498 #undef TARGET_INIT_LIBFUNCS
499 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
500
501 #undef TARGET_PROMOTE_FUNCTION_MODE
502 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
503 #undef TARGET_PROMOTE_PROTOTYPES
504 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
505 #undef TARGET_PASS_BY_REFERENCE
506 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
507 #undef TARGET_ARG_PARTIAL_BYTES
508 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
509 #undef TARGET_FUNCTION_ARG
510 #define TARGET_FUNCTION_ARG arm_function_arg
511 #undef TARGET_FUNCTION_ARG_ADVANCE
512 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
513 #undef TARGET_FUNCTION_ARG_BOUNDARY
514 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
515
516 #undef TARGET_SETUP_INCOMING_VARARGS
517 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
518
519 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
520 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
521
522 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
523 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
524 #undef TARGET_TRAMPOLINE_INIT
525 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
526 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
527 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
528
529 #undef TARGET_WARN_FUNC_RETURN
530 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
531
532 #undef TARGET_DEFAULT_SHORT_ENUMS
533 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
534
535 #undef TARGET_ALIGN_ANON_BITFIELD
536 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
537
538 #undef TARGET_NARROW_VOLATILE_BITFIELD
539 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
540
541 #undef TARGET_CXX_GUARD_TYPE
542 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
543
544 #undef TARGET_CXX_GUARD_MASK_BIT
545 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
546
547 #undef TARGET_CXX_GET_COOKIE_SIZE
548 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
549
550 #undef TARGET_CXX_COOKIE_HAS_SIZE
551 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
552
553 #undef TARGET_CXX_CDTOR_RETURNS_THIS
554 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
555
556 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
557 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
558
559 #undef TARGET_CXX_USE_AEABI_ATEXIT
560 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
561
562 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
563 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
564 arm_cxx_determine_class_data_visibility
565
566 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
567 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
568
569 #undef TARGET_RETURN_IN_MSB
570 #define TARGET_RETURN_IN_MSB arm_return_in_msb
571
572 #undef TARGET_RETURN_IN_MEMORY
573 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
574
575 #undef TARGET_MUST_PASS_IN_STACK
576 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
577
578 #if ARM_UNWIND_INFO
579 #undef TARGET_ASM_UNWIND_EMIT
580 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
581
582 /* EABI unwinding tables use a different format for the typeinfo tables. */
583 #undef TARGET_ASM_TTYPE
584 #define TARGET_ASM_TTYPE arm_output_ttype
585
586 #undef TARGET_ARM_EABI_UNWINDER
587 #define TARGET_ARM_EABI_UNWINDER true
588
589 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
590 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
591
592 #undef TARGET_ASM_INIT_SECTIONS
593 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
594 #endif /* ARM_UNWIND_INFO */
595
596 #undef TARGET_DWARF_REGISTER_SPAN
597 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
598
599 #undef TARGET_CANNOT_COPY_INSN_P
600 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
601
602 #ifdef HAVE_AS_TLS
603 #undef TARGET_HAVE_TLS
604 #define TARGET_HAVE_TLS true
605 #endif
606
607 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
608 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
609
610 #undef TARGET_LEGITIMATE_CONSTANT_P
611 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
612
613 #undef TARGET_CANNOT_FORCE_CONST_MEM
614 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
615
616 #undef TARGET_MAX_ANCHOR_OFFSET
617 #define TARGET_MAX_ANCHOR_OFFSET 4095
618
619 /* The minimum is set such that the total size of the block
620 for a particular anchor is -4088 + 1 + 4095 bytes, which is
621 divisible by eight, ensuring natural spacing of anchors. */
622 #undef TARGET_MIN_ANCHOR_OFFSET
623 #define TARGET_MIN_ANCHOR_OFFSET -4088
624
625 #undef TARGET_SCHED_ISSUE_RATE
626 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
627
628 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
629 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
630 arm_first_cycle_multipass_dfa_lookahead
631
632 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
633 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
634 arm_first_cycle_multipass_dfa_lookahead_guard
635
636 #undef TARGET_MANGLE_TYPE
637 #define TARGET_MANGLE_TYPE arm_mangle_type
638
639 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
640 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
641
642 #undef TARGET_BUILD_BUILTIN_VA_LIST
643 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
644 #undef TARGET_EXPAND_BUILTIN_VA_START
645 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
646 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
647 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
648
649 #ifdef HAVE_AS_TLS
650 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
651 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
652 #endif
653
654 #undef TARGET_LEGITIMATE_ADDRESS_P
655 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
656
657 #undef TARGET_PREFERRED_RELOAD_CLASS
658 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
659
660 #undef TARGET_INVALID_PARAMETER_TYPE
661 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
662
663 #undef TARGET_INVALID_RETURN_TYPE
664 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
665
666 #undef TARGET_PROMOTED_TYPE
667 #define TARGET_PROMOTED_TYPE arm_promoted_type
668
669 #undef TARGET_CONVERT_TO_TYPE
670 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
671
672 #undef TARGET_SCALAR_MODE_SUPPORTED_P
673 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
674
675 #undef TARGET_FRAME_POINTER_REQUIRED
676 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
677
678 #undef TARGET_CAN_ELIMINATE
679 #define TARGET_CAN_ELIMINATE arm_can_eliminate
680
681 #undef TARGET_CONDITIONAL_REGISTER_USAGE
682 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
683
684 #undef TARGET_CLASS_LIKELY_SPILLED_P
685 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
686
687 #undef TARGET_VECTORIZE_BUILTINS
688 #define TARGET_VECTORIZE_BUILTINS
689
690 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
691 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
692 arm_builtin_vectorized_function
693
694 #undef TARGET_VECTOR_ALIGNMENT
695 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
696
697 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
698 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
699 arm_vector_alignment_reachable
700
701 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
702 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
703 arm_builtin_support_vector_misalignment
704
705 #undef TARGET_PREFERRED_RENAME_CLASS
706 #define TARGET_PREFERRED_RENAME_CLASS \
707 arm_preferred_rename_class
708
709 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
710 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
711 arm_vectorize_vec_perm_const_ok
712
713 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
714 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
715 arm_builtin_vectorization_cost
716 #undef TARGET_VECTORIZE_ADD_STMT_COST
717 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
718
719 #undef TARGET_CANONICALIZE_COMPARISON
720 #define TARGET_CANONICALIZE_COMPARISON \
721 arm_canonicalize_comparison
722
723 #undef TARGET_ASAN_SHADOW_OFFSET
724 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
725
726 #undef MAX_INSN_PER_IT_BLOCK
727 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
728
729 #undef TARGET_CAN_USE_DOLOOP_P
730 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
731
732 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
733 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
734
735 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
736 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
737
738 #undef TARGET_SCHED_FUSION_PRIORITY
739 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
740
741 struct gcc_target targetm = TARGET_INITIALIZER;
742 \f
743 /* Obstack for minipool constant handling. */
744 static struct obstack minipool_obstack;
745 static char * minipool_startobj;
746
747 /* The maximum number of insns skipped which
748 will be conditionalised if possible. */
749 static int max_insns_skipped = 5;
750
751 extern FILE * asm_out_file;
752
753 /* True if we are currently building a constant table. */
754 int making_const_table;
755
756 /* The processor for which instructions should be scheduled. */
757 enum processor_type arm_tune = arm_none;
758
759 /* The current tuning set. */
760 const struct tune_params *current_tune;
761
762 /* Which floating point hardware to schedule for. */
763 int arm_fpu_attr;
764
765 /* Which floating popint hardware to use. */
766 const struct arm_fpu_desc *arm_fpu_desc;
767
768 /* Used for Thumb call_via trampolines. */
769 rtx thumb_call_via_label[14];
770 static int thumb_call_reg_needed;
771
772 /* The bits in this mask specify which
773 instructions we are allowed to generate. */
774 unsigned long insn_flags = 0;
775
776 /* The bits in this mask specify which instruction scheduling options should
777 be used. */
778 unsigned long tune_flags = 0;
779
780 /* The highest ARM architecture version supported by the
781 target. */
782 enum base_architecture arm_base_arch = BASE_ARCH_0;
783
784 /* The following are used in the arm.md file as equivalents to bits
785 in the above two flag variables. */
786
787 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
788 int arm_arch3m = 0;
789
790 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
791 int arm_arch4 = 0;
792
793 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
794 int arm_arch4t = 0;
795
796 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
797 int arm_arch5 = 0;
798
799 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
800 int arm_arch5e = 0;
801
802 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
803 int arm_arch6 = 0;
804
805 /* Nonzero if this chip supports the ARM 6K extensions. */
806 int arm_arch6k = 0;
807
808 /* Nonzero if instructions present in ARMv6-M can be used. */
809 int arm_arch6m = 0;
810
811 /* Nonzero if this chip supports the ARM 7 extensions. */
812 int arm_arch7 = 0;
813
814 /* Nonzero if instructions not present in the 'M' profile can be used. */
815 int arm_arch_notm = 0;
816
817 /* Nonzero if instructions present in ARMv7E-M can be used. */
818 int arm_arch7em = 0;
819
820 /* Nonzero if instructions present in ARMv8 can be used. */
821 int arm_arch8 = 0;
822
823 /* Nonzero if this chip can benefit from load scheduling. */
824 int arm_ld_sched = 0;
825
826 /* Nonzero if this chip is a StrongARM. */
827 int arm_tune_strongarm = 0;
828
829 /* Nonzero if this chip supports Intel Wireless MMX technology. */
830 int arm_arch_iwmmxt = 0;
831
832 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
833 int arm_arch_iwmmxt2 = 0;
834
835 /* Nonzero if this chip is an XScale. */
836 int arm_arch_xscale = 0;
837
838 /* Nonzero if tuning for XScale */
839 int arm_tune_xscale = 0;
840
841 /* Nonzero if we want to tune for stores that access the write-buffer.
842 This typically means an ARM6 or ARM7 with MMU or MPU. */
843 int arm_tune_wbuf = 0;
844
845 /* Nonzero if tuning for Cortex-A9. */
846 int arm_tune_cortex_a9 = 0;
847
848 /* Nonzero if we should define __THUMB_INTERWORK__ in the
849 preprocessor.
850 XXX This is a bit of a hack, it's intended to help work around
851 problems in GLD which doesn't understand that armv5t code is
852 interworking clean. */
853 int arm_cpp_interwork = 0;
854
855 /* Nonzero if chip supports Thumb 2. */
856 int arm_arch_thumb2;
857
858 /* Nonzero if chip supports integer division instruction. */
859 int arm_arch_arm_hwdiv;
860 int arm_arch_thumb_hwdiv;
861
862 /* Nonzero if chip disallows volatile memory access in IT block. */
863 int arm_arch_no_volatile_ce;
864
865 /* Nonzero if we should use Neon to handle 64-bits operations rather
866 than core registers. */
867 int prefer_neon_for_64bits = 0;
868
869 /* Nonzero if we shouldn't use literal pools. */
870 bool arm_disable_literal_pool = false;
871
872 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
873 we must report the mode of the memory reference from
874 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
875 machine_mode output_memory_reference_mode;
876
877 /* The register number to be used for the PIC offset register. */
878 unsigned arm_pic_register = INVALID_REGNUM;
879
880 enum arm_pcs arm_pcs_default;
881
882 /* For an explanation of these variables, see final_prescan_insn below. */
883 int arm_ccfsm_state;
884 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
885 enum arm_cond_code arm_current_cc;
886
887 rtx arm_target_insn;
888 int arm_target_label;
889 /* The number of conditionally executed insns, including the current insn. */
890 int arm_condexec_count = 0;
891 /* A bitmask specifying the patterns for the IT block.
892 Zero means do not output an IT block before this insn. */
893 int arm_condexec_mask = 0;
894 /* The number of bits used in arm_condexec_mask. */
895 int arm_condexec_masklen = 0;
896
897 /* Nonzero if chip supports the ARMv8 CRC instructions. */
898 int arm_arch_crc = 0;
899
900 /* Nonzero if the core has a very small, high-latency, multiply unit. */
901 int arm_m_profile_small_mul = 0;
902
903 /* The condition codes of the ARM, and the inverse function. */
904 static const char * const arm_condition_codes[] =
905 {
906 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
907 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
908 };
909
910 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
911 int arm_regs_in_sequence[] =
912 {
913 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
914 };
915
916 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
917 #define streq(string1, string2) (strcmp (string1, string2) == 0)
918
919 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
920 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
921 | (1 << PIC_OFFSET_TABLE_REGNUM)))
922 \f
923 /* Initialization code. */
924
925 struct processors
926 {
927 const char *const name;
928 enum processor_type core;
929 const char *arch;
930 enum base_architecture base_arch;
931 const unsigned long flags;
932 const struct tune_params *const tune;
933 };
934
935
936 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
937 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
938 { \
939 num_slots, \
940 l1_size, \
941 l1_line_size \
942 }
943
944 /* arm generic vectorizer costs. */
945 static const
946 struct cpu_vec_costs arm_default_vec_cost = {
947 1, /* scalar_stmt_cost. */
948 1, /* scalar load_cost. */
949 1, /* scalar_store_cost. */
950 1, /* vec_stmt_cost. */
951 1, /* vec_to_scalar_cost. */
952 1, /* scalar_to_vec_cost. */
953 1, /* vec_align_load_cost. */
954 1, /* vec_unalign_load_cost. */
955 1, /* vec_unalign_store_cost. */
956 1, /* vec_store_cost. */
957 3, /* cond_taken_branch_cost. */
958 1, /* cond_not_taken_branch_cost. */
959 };
960
961 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
962 #include "aarch-cost-tables.h"
963
964
965
966 const struct cpu_cost_table cortexa9_extra_costs =
967 {
968 /* ALU */
969 {
970 0, /* arith. */
971 0, /* logical. */
972 0, /* shift. */
973 COSTS_N_INSNS (1), /* shift_reg. */
974 COSTS_N_INSNS (1), /* arith_shift. */
975 COSTS_N_INSNS (2), /* arith_shift_reg. */
976 0, /* log_shift. */
977 COSTS_N_INSNS (1), /* log_shift_reg. */
978 COSTS_N_INSNS (1), /* extend. */
979 COSTS_N_INSNS (2), /* extend_arith. */
980 COSTS_N_INSNS (1), /* bfi. */
981 COSTS_N_INSNS (1), /* bfx. */
982 0, /* clz. */
983 0, /* rev. */
984 0, /* non_exec. */
985 true /* non_exec_costs_exec. */
986 },
987 {
988 /* MULT SImode */
989 {
990 COSTS_N_INSNS (3), /* simple. */
991 COSTS_N_INSNS (3), /* flag_setting. */
992 COSTS_N_INSNS (2), /* extend. */
993 COSTS_N_INSNS (3), /* add. */
994 COSTS_N_INSNS (2), /* extend_add. */
995 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
996 },
997 /* MULT DImode */
998 {
999 0, /* simple (N/A). */
1000 0, /* flag_setting (N/A). */
1001 COSTS_N_INSNS (4), /* extend. */
1002 0, /* add (N/A). */
1003 COSTS_N_INSNS (4), /* extend_add. */
1004 0 /* idiv (N/A). */
1005 }
1006 },
1007 /* LD/ST */
1008 {
1009 COSTS_N_INSNS (2), /* load. */
1010 COSTS_N_INSNS (2), /* load_sign_extend. */
1011 COSTS_N_INSNS (2), /* ldrd. */
1012 COSTS_N_INSNS (2), /* ldm_1st. */
1013 1, /* ldm_regs_per_insn_1st. */
1014 2, /* ldm_regs_per_insn_subsequent. */
1015 COSTS_N_INSNS (5), /* loadf. */
1016 COSTS_N_INSNS (5), /* loadd. */
1017 COSTS_N_INSNS (1), /* load_unaligned. */
1018 COSTS_N_INSNS (2), /* store. */
1019 COSTS_N_INSNS (2), /* strd. */
1020 COSTS_N_INSNS (2), /* stm_1st. */
1021 1, /* stm_regs_per_insn_1st. */
1022 2, /* stm_regs_per_insn_subsequent. */
1023 COSTS_N_INSNS (1), /* storef. */
1024 COSTS_N_INSNS (1), /* stored. */
1025 COSTS_N_INSNS (1), /* store_unaligned. */
1026 COSTS_N_INSNS (1), /* loadv. */
1027 COSTS_N_INSNS (1) /* storev. */
1028 },
1029 {
1030 /* FP SFmode */
1031 {
1032 COSTS_N_INSNS (14), /* div. */
1033 COSTS_N_INSNS (4), /* mult. */
1034 COSTS_N_INSNS (7), /* mult_addsub. */
1035 COSTS_N_INSNS (30), /* fma. */
1036 COSTS_N_INSNS (3), /* addsub. */
1037 COSTS_N_INSNS (1), /* fpconst. */
1038 COSTS_N_INSNS (1), /* neg. */
1039 COSTS_N_INSNS (3), /* compare. */
1040 COSTS_N_INSNS (3), /* widen. */
1041 COSTS_N_INSNS (3), /* narrow. */
1042 COSTS_N_INSNS (3), /* toint. */
1043 COSTS_N_INSNS (3), /* fromint. */
1044 COSTS_N_INSNS (3) /* roundint. */
1045 },
1046 /* FP DFmode */
1047 {
1048 COSTS_N_INSNS (24), /* div. */
1049 COSTS_N_INSNS (5), /* mult. */
1050 COSTS_N_INSNS (8), /* mult_addsub. */
1051 COSTS_N_INSNS (30), /* fma. */
1052 COSTS_N_INSNS (3), /* addsub. */
1053 COSTS_N_INSNS (1), /* fpconst. */
1054 COSTS_N_INSNS (1), /* neg. */
1055 COSTS_N_INSNS (3), /* compare. */
1056 COSTS_N_INSNS (3), /* widen. */
1057 COSTS_N_INSNS (3), /* narrow. */
1058 COSTS_N_INSNS (3), /* toint. */
1059 COSTS_N_INSNS (3), /* fromint. */
1060 COSTS_N_INSNS (3) /* roundint. */
1061 }
1062 },
1063 /* Vector */
1064 {
1065 COSTS_N_INSNS (1) /* alu. */
1066 }
1067 };
1068
1069 const struct cpu_cost_table cortexa8_extra_costs =
1070 {
1071 /* ALU */
1072 {
1073 0, /* arith. */
1074 0, /* logical. */
1075 COSTS_N_INSNS (1), /* shift. */
1076 0, /* shift_reg. */
1077 COSTS_N_INSNS (1), /* arith_shift. */
1078 0, /* arith_shift_reg. */
1079 COSTS_N_INSNS (1), /* log_shift. */
1080 0, /* log_shift_reg. */
1081 0, /* extend. */
1082 0, /* extend_arith. */
1083 0, /* bfi. */
1084 0, /* bfx. */
1085 0, /* clz. */
1086 0, /* rev. */
1087 0, /* non_exec. */
1088 true /* non_exec_costs_exec. */
1089 },
1090 {
1091 /* MULT SImode */
1092 {
1093 COSTS_N_INSNS (1), /* simple. */
1094 COSTS_N_INSNS (1), /* flag_setting. */
1095 COSTS_N_INSNS (1), /* extend. */
1096 COSTS_N_INSNS (1), /* add. */
1097 COSTS_N_INSNS (1), /* extend_add. */
1098 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1099 },
1100 /* MULT DImode */
1101 {
1102 0, /* simple (N/A). */
1103 0, /* flag_setting (N/A). */
1104 COSTS_N_INSNS (2), /* extend. */
1105 0, /* add (N/A). */
1106 COSTS_N_INSNS (2), /* extend_add. */
1107 0 /* idiv (N/A). */
1108 }
1109 },
1110 /* LD/ST */
1111 {
1112 COSTS_N_INSNS (1), /* load. */
1113 COSTS_N_INSNS (1), /* load_sign_extend. */
1114 COSTS_N_INSNS (1), /* ldrd. */
1115 COSTS_N_INSNS (1), /* ldm_1st. */
1116 1, /* ldm_regs_per_insn_1st. */
1117 2, /* ldm_regs_per_insn_subsequent. */
1118 COSTS_N_INSNS (1), /* loadf. */
1119 COSTS_N_INSNS (1), /* loadd. */
1120 COSTS_N_INSNS (1), /* load_unaligned. */
1121 COSTS_N_INSNS (1), /* store. */
1122 COSTS_N_INSNS (1), /* strd. */
1123 COSTS_N_INSNS (1), /* stm_1st. */
1124 1, /* stm_regs_per_insn_1st. */
1125 2, /* stm_regs_per_insn_subsequent. */
1126 COSTS_N_INSNS (1), /* storef. */
1127 COSTS_N_INSNS (1), /* stored. */
1128 COSTS_N_INSNS (1), /* store_unaligned. */
1129 COSTS_N_INSNS (1), /* loadv. */
1130 COSTS_N_INSNS (1) /* storev. */
1131 },
1132 {
1133 /* FP SFmode */
1134 {
1135 COSTS_N_INSNS (36), /* div. */
1136 COSTS_N_INSNS (11), /* mult. */
1137 COSTS_N_INSNS (20), /* mult_addsub. */
1138 COSTS_N_INSNS (30), /* fma. */
1139 COSTS_N_INSNS (9), /* addsub. */
1140 COSTS_N_INSNS (3), /* fpconst. */
1141 COSTS_N_INSNS (3), /* neg. */
1142 COSTS_N_INSNS (6), /* compare. */
1143 COSTS_N_INSNS (4), /* widen. */
1144 COSTS_N_INSNS (4), /* narrow. */
1145 COSTS_N_INSNS (8), /* toint. */
1146 COSTS_N_INSNS (8), /* fromint. */
1147 COSTS_N_INSNS (8) /* roundint. */
1148 },
1149 /* FP DFmode */
1150 {
1151 COSTS_N_INSNS (64), /* div. */
1152 COSTS_N_INSNS (16), /* mult. */
1153 COSTS_N_INSNS (25), /* mult_addsub. */
1154 COSTS_N_INSNS (30), /* fma. */
1155 COSTS_N_INSNS (9), /* addsub. */
1156 COSTS_N_INSNS (3), /* fpconst. */
1157 COSTS_N_INSNS (3), /* neg. */
1158 COSTS_N_INSNS (6), /* compare. */
1159 COSTS_N_INSNS (6), /* widen. */
1160 COSTS_N_INSNS (6), /* narrow. */
1161 COSTS_N_INSNS (8), /* toint. */
1162 COSTS_N_INSNS (8), /* fromint. */
1163 COSTS_N_INSNS (8) /* roundint. */
1164 }
1165 },
1166 /* Vector */
1167 {
1168 COSTS_N_INSNS (1) /* alu. */
1169 }
1170 };
1171
1172 const struct cpu_cost_table cortexa5_extra_costs =
1173 {
1174 /* ALU */
1175 {
1176 0, /* arith. */
1177 0, /* logical. */
1178 COSTS_N_INSNS (1), /* shift. */
1179 COSTS_N_INSNS (1), /* shift_reg. */
1180 COSTS_N_INSNS (1), /* arith_shift. */
1181 COSTS_N_INSNS (1), /* arith_shift_reg. */
1182 COSTS_N_INSNS (1), /* log_shift. */
1183 COSTS_N_INSNS (1), /* log_shift_reg. */
1184 COSTS_N_INSNS (1), /* extend. */
1185 COSTS_N_INSNS (1), /* extend_arith. */
1186 COSTS_N_INSNS (1), /* bfi. */
1187 COSTS_N_INSNS (1), /* bfx. */
1188 COSTS_N_INSNS (1), /* clz. */
1189 COSTS_N_INSNS (1), /* rev. */
1190 0, /* non_exec. */
1191 true /* non_exec_costs_exec. */
1192 },
1193
1194 {
1195 /* MULT SImode */
1196 {
1197 0, /* simple. */
1198 COSTS_N_INSNS (1), /* flag_setting. */
1199 COSTS_N_INSNS (1), /* extend. */
1200 COSTS_N_INSNS (1), /* add. */
1201 COSTS_N_INSNS (1), /* extend_add. */
1202 COSTS_N_INSNS (7) /* idiv. */
1203 },
1204 /* MULT DImode */
1205 {
1206 0, /* simple (N/A). */
1207 0, /* flag_setting (N/A). */
1208 COSTS_N_INSNS (1), /* extend. */
1209 0, /* add. */
1210 COSTS_N_INSNS (2), /* extend_add. */
1211 0 /* idiv (N/A). */
1212 }
1213 },
1214 /* LD/ST */
1215 {
1216 COSTS_N_INSNS (1), /* load. */
1217 COSTS_N_INSNS (1), /* load_sign_extend. */
1218 COSTS_N_INSNS (6), /* ldrd. */
1219 COSTS_N_INSNS (1), /* ldm_1st. */
1220 1, /* ldm_regs_per_insn_1st. */
1221 2, /* ldm_regs_per_insn_subsequent. */
1222 COSTS_N_INSNS (2), /* loadf. */
1223 COSTS_N_INSNS (4), /* loadd. */
1224 COSTS_N_INSNS (1), /* load_unaligned. */
1225 COSTS_N_INSNS (1), /* store. */
1226 COSTS_N_INSNS (3), /* strd. */
1227 COSTS_N_INSNS (1), /* stm_1st. */
1228 1, /* stm_regs_per_insn_1st. */
1229 2, /* stm_regs_per_insn_subsequent. */
1230 COSTS_N_INSNS (2), /* storef. */
1231 COSTS_N_INSNS (2), /* stored. */
1232 COSTS_N_INSNS (1), /* store_unaligned. */
1233 COSTS_N_INSNS (1), /* loadv. */
1234 COSTS_N_INSNS (1) /* storev. */
1235 },
1236 {
1237 /* FP SFmode */
1238 {
1239 COSTS_N_INSNS (15), /* div. */
1240 COSTS_N_INSNS (3), /* mult. */
1241 COSTS_N_INSNS (7), /* mult_addsub. */
1242 COSTS_N_INSNS (7), /* fma. */
1243 COSTS_N_INSNS (3), /* addsub. */
1244 COSTS_N_INSNS (3), /* fpconst. */
1245 COSTS_N_INSNS (3), /* neg. */
1246 COSTS_N_INSNS (3), /* compare. */
1247 COSTS_N_INSNS (3), /* widen. */
1248 COSTS_N_INSNS (3), /* narrow. */
1249 COSTS_N_INSNS (3), /* toint. */
1250 COSTS_N_INSNS (3), /* fromint. */
1251 COSTS_N_INSNS (3) /* roundint. */
1252 },
1253 /* FP DFmode */
1254 {
1255 COSTS_N_INSNS (30), /* div. */
1256 COSTS_N_INSNS (6), /* mult. */
1257 COSTS_N_INSNS (10), /* mult_addsub. */
1258 COSTS_N_INSNS (7), /* fma. */
1259 COSTS_N_INSNS (3), /* addsub. */
1260 COSTS_N_INSNS (3), /* fpconst. */
1261 COSTS_N_INSNS (3), /* neg. */
1262 COSTS_N_INSNS (3), /* compare. */
1263 COSTS_N_INSNS (3), /* widen. */
1264 COSTS_N_INSNS (3), /* narrow. */
1265 COSTS_N_INSNS (3), /* toint. */
1266 COSTS_N_INSNS (3), /* fromint. */
1267 COSTS_N_INSNS (3) /* roundint. */
1268 }
1269 },
1270 /* Vector */
1271 {
1272 COSTS_N_INSNS (1) /* alu. */
1273 }
1274 };
1275
1276
1277 const struct cpu_cost_table cortexa7_extra_costs =
1278 {
1279 /* ALU */
1280 {
1281 0, /* arith. */
1282 0, /* logical. */
1283 COSTS_N_INSNS (1), /* shift. */
1284 COSTS_N_INSNS (1), /* shift_reg. */
1285 COSTS_N_INSNS (1), /* arith_shift. */
1286 COSTS_N_INSNS (1), /* arith_shift_reg. */
1287 COSTS_N_INSNS (1), /* log_shift. */
1288 COSTS_N_INSNS (1), /* log_shift_reg. */
1289 COSTS_N_INSNS (1), /* extend. */
1290 COSTS_N_INSNS (1), /* extend_arith. */
1291 COSTS_N_INSNS (1), /* bfi. */
1292 COSTS_N_INSNS (1), /* bfx. */
1293 COSTS_N_INSNS (1), /* clz. */
1294 COSTS_N_INSNS (1), /* rev. */
1295 0, /* non_exec. */
1296 true /* non_exec_costs_exec. */
1297 },
1298
1299 {
1300 /* MULT SImode */
1301 {
1302 0, /* simple. */
1303 COSTS_N_INSNS (1), /* flag_setting. */
1304 COSTS_N_INSNS (1), /* extend. */
1305 COSTS_N_INSNS (1), /* add. */
1306 COSTS_N_INSNS (1), /* extend_add. */
1307 COSTS_N_INSNS (7) /* idiv. */
1308 },
1309 /* MULT DImode */
1310 {
1311 0, /* simple (N/A). */
1312 0, /* flag_setting (N/A). */
1313 COSTS_N_INSNS (1), /* extend. */
1314 0, /* add. */
1315 COSTS_N_INSNS (2), /* extend_add. */
1316 0 /* idiv (N/A). */
1317 }
1318 },
1319 /* LD/ST */
1320 {
1321 COSTS_N_INSNS (1), /* load. */
1322 COSTS_N_INSNS (1), /* load_sign_extend. */
1323 COSTS_N_INSNS (3), /* ldrd. */
1324 COSTS_N_INSNS (1), /* ldm_1st. */
1325 1, /* ldm_regs_per_insn_1st. */
1326 2, /* ldm_regs_per_insn_subsequent. */
1327 COSTS_N_INSNS (2), /* loadf. */
1328 COSTS_N_INSNS (2), /* loadd. */
1329 COSTS_N_INSNS (1), /* load_unaligned. */
1330 COSTS_N_INSNS (1), /* store. */
1331 COSTS_N_INSNS (3), /* strd. */
1332 COSTS_N_INSNS (1), /* stm_1st. */
1333 1, /* stm_regs_per_insn_1st. */
1334 2, /* stm_regs_per_insn_subsequent. */
1335 COSTS_N_INSNS (2), /* storef. */
1336 COSTS_N_INSNS (2), /* stored. */
1337 COSTS_N_INSNS (1), /* store_unaligned. */
1338 COSTS_N_INSNS (1), /* loadv. */
1339 COSTS_N_INSNS (1) /* storev. */
1340 },
1341 {
1342 /* FP SFmode */
1343 {
1344 COSTS_N_INSNS (15), /* div. */
1345 COSTS_N_INSNS (3), /* mult. */
1346 COSTS_N_INSNS (7), /* mult_addsub. */
1347 COSTS_N_INSNS (7), /* fma. */
1348 COSTS_N_INSNS (3), /* addsub. */
1349 COSTS_N_INSNS (3), /* fpconst. */
1350 COSTS_N_INSNS (3), /* neg. */
1351 COSTS_N_INSNS (3), /* compare. */
1352 COSTS_N_INSNS (3), /* widen. */
1353 COSTS_N_INSNS (3), /* narrow. */
1354 COSTS_N_INSNS (3), /* toint. */
1355 COSTS_N_INSNS (3), /* fromint. */
1356 COSTS_N_INSNS (3) /* roundint. */
1357 },
1358 /* FP DFmode */
1359 {
1360 COSTS_N_INSNS (30), /* div. */
1361 COSTS_N_INSNS (6), /* mult. */
1362 COSTS_N_INSNS (10), /* mult_addsub. */
1363 COSTS_N_INSNS (7), /* fma. */
1364 COSTS_N_INSNS (3), /* addsub. */
1365 COSTS_N_INSNS (3), /* fpconst. */
1366 COSTS_N_INSNS (3), /* neg. */
1367 COSTS_N_INSNS (3), /* compare. */
1368 COSTS_N_INSNS (3), /* widen. */
1369 COSTS_N_INSNS (3), /* narrow. */
1370 COSTS_N_INSNS (3), /* toint. */
1371 COSTS_N_INSNS (3), /* fromint. */
1372 COSTS_N_INSNS (3) /* roundint. */
1373 }
1374 },
1375 /* Vector */
1376 {
1377 COSTS_N_INSNS (1) /* alu. */
1378 }
1379 };
1380
1381 const struct cpu_cost_table cortexa12_extra_costs =
1382 {
1383 /* ALU */
1384 {
1385 0, /* arith. */
1386 0, /* logical. */
1387 0, /* shift. */
1388 COSTS_N_INSNS (1), /* shift_reg. */
1389 COSTS_N_INSNS (1), /* arith_shift. */
1390 COSTS_N_INSNS (1), /* arith_shift_reg. */
1391 COSTS_N_INSNS (1), /* log_shift. */
1392 COSTS_N_INSNS (1), /* log_shift_reg. */
1393 0, /* extend. */
1394 COSTS_N_INSNS (1), /* extend_arith. */
1395 0, /* bfi. */
1396 COSTS_N_INSNS (1), /* bfx. */
1397 COSTS_N_INSNS (1), /* clz. */
1398 COSTS_N_INSNS (1), /* rev. */
1399 0, /* non_exec. */
1400 true /* non_exec_costs_exec. */
1401 },
1402 /* MULT SImode */
1403 {
1404 {
1405 COSTS_N_INSNS (2), /* simple. */
1406 COSTS_N_INSNS (3), /* flag_setting. */
1407 COSTS_N_INSNS (2), /* extend. */
1408 COSTS_N_INSNS (3), /* add. */
1409 COSTS_N_INSNS (2), /* extend_add. */
1410 COSTS_N_INSNS (18) /* idiv. */
1411 },
1412 /* MULT DImode */
1413 {
1414 0, /* simple (N/A). */
1415 0, /* flag_setting (N/A). */
1416 COSTS_N_INSNS (3), /* extend. */
1417 0, /* add (N/A). */
1418 COSTS_N_INSNS (3), /* extend_add. */
1419 0 /* idiv (N/A). */
1420 }
1421 },
1422 /* LD/ST */
1423 {
1424 COSTS_N_INSNS (3), /* load. */
1425 COSTS_N_INSNS (3), /* load_sign_extend. */
1426 COSTS_N_INSNS (3), /* ldrd. */
1427 COSTS_N_INSNS (3), /* ldm_1st. */
1428 1, /* ldm_regs_per_insn_1st. */
1429 2, /* ldm_regs_per_insn_subsequent. */
1430 COSTS_N_INSNS (3), /* loadf. */
1431 COSTS_N_INSNS (3), /* loadd. */
1432 0, /* load_unaligned. */
1433 0, /* store. */
1434 0, /* strd. */
1435 0, /* stm_1st. */
1436 1, /* stm_regs_per_insn_1st. */
1437 2, /* stm_regs_per_insn_subsequent. */
1438 COSTS_N_INSNS (2), /* storef. */
1439 COSTS_N_INSNS (2), /* stored. */
1440 0, /* store_unaligned. */
1441 COSTS_N_INSNS (1), /* loadv. */
1442 COSTS_N_INSNS (1) /* storev. */
1443 },
1444 {
1445 /* FP SFmode */
1446 {
1447 COSTS_N_INSNS (17), /* div. */
1448 COSTS_N_INSNS (4), /* mult. */
1449 COSTS_N_INSNS (8), /* mult_addsub. */
1450 COSTS_N_INSNS (8), /* fma. */
1451 COSTS_N_INSNS (4), /* addsub. */
1452 COSTS_N_INSNS (2), /* fpconst. */
1453 COSTS_N_INSNS (2), /* neg. */
1454 COSTS_N_INSNS (2), /* compare. */
1455 COSTS_N_INSNS (4), /* widen. */
1456 COSTS_N_INSNS (4), /* narrow. */
1457 COSTS_N_INSNS (4), /* toint. */
1458 COSTS_N_INSNS (4), /* fromint. */
1459 COSTS_N_INSNS (4) /* roundint. */
1460 },
1461 /* FP DFmode */
1462 {
1463 COSTS_N_INSNS (31), /* div. */
1464 COSTS_N_INSNS (4), /* mult. */
1465 COSTS_N_INSNS (8), /* mult_addsub. */
1466 COSTS_N_INSNS (8), /* fma. */
1467 COSTS_N_INSNS (4), /* addsub. */
1468 COSTS_N_INSNS (2), /* fpconst. */
1469 COSTS_N_INSNS (2), /* neg. */
1470 COSTS_N_INSNS (2), /* compare. */
1471 COSTS_N_INSNS (4), /* widen. */
1472 COSTS_N_INSNS (4), /* narrow. */
1473 COSTS_N_INSNS (4), /* toint. */
1474 COSTS_N_INSNS (4), /* fromint. */
1475 COSTS_N_INSNS (4) /* roundint. */
1476 }
1477 },
1478 /* Vector */
1479 {
1480 COSTS_N_INSNS (1) /* alu. */
1481 }
1482 };
1483
1484 const struct cpu_cost_table cortexa15_extra_costs =
1485 {
1486 /* ALU */
1487 {
1488 0, /* arith. */
1489 0, /* logical. */
1490 0, /* shift. */
1491 0, /* shift_reg. */
1492 COSTS_N_INSNS (1), /* arith_shift. */
1493 COSTS_N_INSNS (1), /* arith_shift_reg. */
1494 COSTS_N_INSNS (1), /* log_shift. */
1495 COSTS_N_INSNS (1), /* log_shift_reg. */
1496 0, /* extend. */
1497 COSTS_N_INSNS (1), /* extend_arith. */
1498 COSTS_N_INSNS (1), /* bfi. */
1499 0, /* bfx. */
1500 0, /* clz. */
1501 0, /* rev. */
1502 0, /* non_exec. */
1503 true /* non_exec_costs_exec. */
1504 },
1505 /* MULT SImode */
1506 {
1507 {
1508 COSTS_N_INSNS (2), /* simple. */
1509 COSTS_N_INSNS (3), /* flag_setting. */
1510 COSTS_N_INSNS (2), /* extend. */
1511 COSTS_N_INSNS (2), /* add. */
1512 COSTS_N_INSNS (2), /* extend_add. */
1513 COSTS_N_INSNS (18) /* idiv. */
1514 },
1515 /* MULT DImode */
1516 {
1517 0, /* simple (N/A). */
1518 0, /* flag_setting (N/A). */
1519 COSTS_N_INSNS (3), /* extend. */
1520 0, /* add (N/A). */
1521 COSTS_N_INSNS (3), /* extend_add. */
1522 0 /* idiv (N/A). */
1523 }
1524 },
1525 /* LD/ST */
1526 {
1527 COSTS_N_INSNS (3), /* load. */
1528 COSTS_N_INSNS (3), /* load_sign_extend. */
1529 COSTS_N_INSNS (3), /* ldrd. */
1530 COSTS_N_INSNS (4), /* ldm_1st. */
1531 1, /* ldm_regs_per_insn_1st. */
1532 2, /* ldm_regs_per_insn_subsequent. */
1533 COSTS_N_INSNS (4), /* loadf. */
1534 COSTS_N_INSNS (4), /* loadd. */
1535 0, /* load_unaligned. */
1536 0, /* store. */
1537 0, /* strd. */
1538 COSTS_N_INSNS (1), /* stm_1st. */
1539 1, /* stm_regs_per_insn_1st. */
1540 2, /* stm_regs_per_insn_subsequent. */
1541 0, /* storef. */
1542 0, /* stored. */
1543 0, /* store_unaligned. */
1544 COSTS_N_INSNS (1), /* loadv. */
1545 COSTS_N_INSNS (1) /* storev. */
1546 },
1547 {
1548 /* FP SFmode */
1549 {
1550 COSTS_N_INSNS (17), /* div. */
1551 COSTS_N_INSNS (4), /* mult. */
1552 COSTS_N_INSNS (8), /* mult_addsub. */
1553 COSTS_N_INSNS (8), /* fma. */
1554 COSTS_N_INSNS (4), /* addsub. */
1555 COSTS_N_INSNS (2), /* fpconst. */
1556 COSTS_N_INSNS (2), /* neg. */
1557 COSTS_N_INSNS (5), /* compare. */
1558 COSTS_N_INSNS (4), /* widen. */
1559 COSTS_N_INSNS (4), /* narrow. */
1560 COSTS_N_INSNS (4), /* toint. */
1561 COSTS_N_INSNS (4), /* fromint. */
1562 COSTS_N_INSNS (4) /* roundint. */
1563 },
1564 /* FP DFmode */
1565 {
1566 COSTS_N_INSNS (31), /* div. */
1567 COSTS_N_INSNS (4), /* mult. */
1568 COSTS_N_INSNS (8), /* mult_addsub. */
1569 COSTS_N_INSNS (8), /* fma. */
1570 COSTS_N_INSNS (4), /* addsub. */
1571 COSTS_N_INSNS (2), /* fpconst. */
1572 COSTS_N_INSNS (2), /* neg. */
1573 COSTS_N_INSNS (2), /* compare. */
1574 COSTS_N_INSNS (4), /* widen. */
1575 COSTS_N_INSNS (4), /* narrow. */
1576 COSTS_N_INSNS (4), /* toint. */
1577 COSTS_N_INSNS (4), /* fromint. */
1578 COSTS_N_INSNS (4) /* roundint. */
1579 }
1580 },
1581 /* Vector */
1582 {
1583 COSTS_N_INSNS (1) /* alu. */
1584 }
1585 };
1586
1587 const struct cpu_cost_table v7m_extra_costs =
1588 {
1589 /* ALU */
1590 {
1591 0, /* arith. */
1592 0, /* logical. */
1593 0, /* shift. */
1594 0, /* shift_reg. */
1595 0, /* arith_shift. */
1596 COSTS_N_INSNS (1), /* arith_shift_reg. */
1597 0, /* log_shift. */
1598 COSTS_N_INSNS (1), /* log_shift_reg. */
1599 0, /* extend. */
1600 COSTS_N_INSNS (1), /* extend_arith. */
1601 0, /* bfi. */
1602 0, /* bfx. */
1603 0, /* clz. */
1604 0, /* rev. */
1605 COSTS_N_INSNS (1), /* non_exec. */
1606 false /* non_exec_costs_exec. */
1607 },
1608 {
1609 /* MULT SImode */
1610 {
1611 COSTS_N_INSNS (1), /* simple. */
1612 COSTS_N_INSNS (1), /* flag_setting. */
1613 COSTS_N_INSNS (2), /* extend. */
1614 COSTS_N_INSNS (1), /* add. */
1615 COSTS_N_INSNS (3), /* extend_add. */
1616 COSTS_N_INSNS (8) /* idiv. */
1617 },
1618 /* MULT DImode */
1619 {
1620 0, /* simple (N/A). */
1621 0, /* flag_setting (N/A). */
1622 COSTS_N_INSNS (2), /* extend. */
1623 0, /* add (N/A). */
1624 COSTS_N_INSNS (3), /* extend_add. */
1625 0 /* idiv (N/A). */
1626 }
1627 },
1628 /* LD/ST */
1629 {
1630 COSTS_N_INSNS (2), /* load. */
1631 0, /* load_sign_extend. */
1632 COSTS_N_INSNS (3), /* ldrd. */
1633 COSTS_N_INSNS (2), /* ldm_1st. */
1634 1, /* ldm_regs_per_insn_1st. */
1635 1, /* ldm_regs_per_insn_subsequent. */
1636 COSTS_N_INSNS (2), /* loadf. */
1637 COSTS_N_INSNS (3), /* loadd. */
1638 COSTS_N_INSNS (1), /* load_unaligned. */
1639 COSTS_N_INSNS (2), /* store. */
1640 COSTS_N_INSNS (3), /* strd. */
1641 COSTS_N_INSNS (2), /* stm_1st. */
1642 1, /* stm_regs_per_insn_1st. */
1643 1, /* stm_regs_per_insn_subsequent. */
1644 COSTS_N_INSNS (2), /* storef. */
1645 COSTS_N_INSNS (3), /* stored. */
1646 COSTS_N_INSNS (1), /* store_unaligned. */
1647 COSTS_N_INSNS (1), /* loadv. */
1648 COSTS_N_INSNS (1) /* storev. */
1649 },
1650 {
1651 /* FP SFmode */
1652 {
1653 COSTS_N_INSNS (7), /* div. */
1654 COSTS_N_INSNS (2), /* mult. */
1655 COSTS_N_INSNS (5), /* mult_addsub. */
1656 COSTS_N_INSNS (3), /* fma. */
1657 COSTS_N_INSNS (1), /* addsub. */
1658 0, /* fpconst. */
1659 0, /* neg. */
1660 0, /* compare. */
1661 0, /* widen. */
1662 0, /* narrow. */
1663 0, /* toint. */
1664 0, /* fromint. */
1665 0 /* roundint. */
1666 },
1667 /* FP DFmode */
1668 {
1669 COSTS_N_INSNS (15), /* div. */
1670 COSTS_N_INSNS (5), /* mult. */
1671 COSTS_N_INSNS (7), /* mult_addsub. */
1672 COSTS_N_INSNS (7), /* fma. */
1673 COSTS_N_INSNS (3), /* addsub. */
1674 0, /* fpconst. */
1675 0, /* neg. */
1676 0, /* compare. */
1677 0, /* widen. */
1678 0, /* narrow. */
1679 0, /* toint. */
1680 0, /* fromint. */
1681 0 /* roundint. */
1682 }
1683 },
1684 /* Vector */
1685 {
1686 COSTS_N_INSNS (1) /* alu. */
1687 }
1688 };
1689
1690 const struct tune_params arm_slowmul_tune =
1691 {
1692 arm_slowmul_rtx_costs,
1693 NULL, /* Insn extra costs. */
1694 NULL, /* Sched adj cost. */
1695 arm_default_branch_cost,
1696 &arm_default_vec_cost,
1697 3, /* Constant limit. */
1698 5, /* Max cond insns. */
1699 8, /* Memset max inline. */
1700 1, /* Issue rate. */
1701 ARM_PREFETCH_NOT_BENEFICIAL,
1702 tune_params::PREF_CONST_POOL_TRUE,
1703 tune_params::PREF_LDRD_FALSE,
1704 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1705 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1706 tune_params::DISPARAGE_FLAGS_NEITHER,
1707 tune_params::PREF_NEON_64_FALSE,
1708 tune_params::PREF_NEON_STRINGOPS_FALSE,
1709 tune_params::FUSE_NOTHING,
1710 tune_params::SCHED_AUTOPREF_OFF
1711 };
1712
1713 const struct tune_params arm_fastmul_tune =
1714 {
1715 arm_fastmul_rtx_costs,
1716 NULL, /* Insn extra costs. */
1717 NULL, /* Sched adj cost. */
1718 arm_default_branch_cost,
1719 &arm_default_vec_cost,
1720 1, /* Constant limit. */
1721 5, /* Max cond insns. */
1722 8, /* Memset max inline. */
1723 1, /* Issue rate. */
1724 ARM_PREFETCH_NOT_BENEFICIAL,
1725 tune_params::PREF_CONST_POOL_TRUE,
1726 tune_params::PREF_LDRD_FALSE,
1727 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1728 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1729 tune_params::DISPARAGE_FLAGS_NEITHER,
1730 tune_params::PREF_NEON_64_FALSE,
1731 tune_params::PREF_NEON_STRINGOPS_FALSE,
1732 tune_params::FUSE_NOTHING,
1733 tune_params::SCHED_AUTOPREF_OFF
1734 };
1735
1736 /* StrongARM has early execution of branches, so a sequence that is worth
1737 skipping is shorter. Set max_insns_skipped to a lower value. */
1738
1739 const struct tune_params arm_strongarm_tune =
1740 {
1741 arm_fastmul_rtx_costs,
1742 NULL, /* Insn extra costs. */
1743 NULL, /* Sched adj cost. */
1744 arm_default_branch_cost,
1745 &arm_default_vec_cost,
1746 1, /* Constant limit. */
1747 3, /* Max cond insns. */
1748 8, /* Memset max inline. */
1749 1, /* Issue rate. */
1750 ARM_PREFETCH_NOT_BENEFICIAL,
1751 tune_params::PREF_CONST_POOL_TRUE,
1752 tune_params::PREF_LDRD_FALSE,
1753 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1754 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1755 tune_params::DISPARAGE_FLAGS_NEITHER,
1756 tune_params::PREF_NEON_64_FALSE,
1757 tune_params::PREF_NEON_STRINGOPS_FALSE,
1758 tune_params::FUSE_NOTHING,
1759 tune_params::SCHED_AUTOPREF_OFF
1760 };
1761
1762 const struct tune_params arm_xscale_tune =
1763 {
1764 arm_xscale_rtx_costs,
1765 NULL, /* Insn extra costs. */
1766 xscale_sched_adjust_cost,
1767 arm_default_branch_cost,
1768 &arm_default_vec_cost,
1769 2, /* Constant limit. */
1770 3, /* Max cond insns. */
1771 8, /* Memset max inline. */
1772 1, /* Issue rate. */
1773 ARM_PREFETCH_NOT_BENEFICIAL,
1774 tune_params::PREF_CONST_POOL_TRUE,
1775 tune_params::PREF_LDRD_FALSE,
1776 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1777 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1778 tune_params::DISPARAGE_FLAGS_NEITHER,
1779 tune_params::PREF_NEON_64_FALSE,
1780 tune_params::PREF_NEON_STRINGOPS_FALSE,
1781 tune_params::FUSE_NOTHING,
1782 tune_params::SCHED_AUTOPREF_OFF
1783 };
1784
1785 const struct tune_params arm_9e_tune =
1786 {
1787 arm_9e_rtx_costs,
1788 NULL, /* Insn extra costs. */
1789 NULL, /* Sched adj cost. */
1790 arm_default_branch_cost,
1791 &arm_default_vec_cost,
1792 1, /* Constant limit. */
1793 5, /* Max cond insns. */
1794 8, /* Memset max inline. */
1795 1, /* Issue rate. */
1796 ARM_PREFETCH_NOT_BENEFICIAL,
1797 tune_params::PREF_CONST_POOL_TRUE,
1798 tune_params::PREF_LDRD_FALSE,
1799 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1800 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1801 tune_params::DISPARAGE_FLAGS_NEITHER,
1802 tune_params::PREF_NEON_64_FALSE,
1803 tune_params::PREF_NEON_STRINGOPS_FALSE,
1804 tune_params::FUSE_NOTHING,
1805 tune_params::SCHED_AUTOPREF_OFF
1806 };
1807
1808 const struct tune_params arm_marvell_pj4_tune =
1809 {
1810 arm_9e_rtx_costs,
1811 NULL, /* Insn extra costs. */
1812 NULL, /* Sched adj cost. */
1813 arm_default_branch_cost,
1814 &arm_default_vec_cost,
1815 1, /* Constant limit. */
1816 5, /* Max cond insns. */
1817 8, /* Memset max inline. */
1818 2, /* Issue rate. */
1819 ARM_PREFETCH_NOT_BENEFICIAL,
1820 tune_params::PREF_CONST_POOL_TRUE,
1821 tune_params::PREF_LDRD_FALSE,
1822 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1823 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1824 tune_params::DISPARAGE_FLAGS_NEITHER,
1825 tune_params::PREF_NEON_64_FALSE,
1826 tune_params::PREF_NEON_STRINGOPS_FALSE,
1827 tune_params::FUSE_NOTHING,
1828 tune_params::SCHED_AUTOPREF_OFF
1829 };
1830
1831 const struct tune_params arm_v6t2_tune =
1832 {
1833 arm_9e_rtx_costs,
1834 NULL, /* Insn extra costs. */
1835 NULL, /* Sched adj cost. */
1836 arm_default_branch_cost,
1837 &arm_default_vec_cost,
1838 1, /* Constant limit. */
1839 5, /* Max cond insns. */
1840 8, /* Memset max inline. */
1841 1, /* Issue rate. */
1842 ARM_PREFETCH_NOT_BENEFICIAL,
1843 tune_params::PREF_CONST_POOL_FALSE,
1844 tune_params::PREF_LDRD_FALSE,
1845 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1846 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1847 tune_params::DISPARAGE_FLAGS_NEITHER,
1848 tune_params::PREF_NEON_64_FALSE,
1849 tune_params::PREF_NEON_STRINGOPS_FALSE,
1850 tune_params::FUSE_NOTHING,
1851 tune_params::SCHED_AUTOPREF_OFF
1852 };
1853
1854
1855 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1856 const struct tune_params arm_cortex_tune =
1857 {
1858 arm_9e_rtx_costs,
1859 &generic_extra_costs,
1860 NULL, /* Sched adj cost. */
1861 arm_default_branch_cost,
1862 &arm_default_vec_cost,
1863 1, /* Constant limit. */
1864 5, /* Max cond insns. */
1865 8, /* Memset max inline. */
1866 2, /* Issue rate. */
1867 ARM_PREFETCH_NOT_BENEFICIAL,
1868 tune_params::PREF_CONST_POOL_FALSE,
1869 tune_params::PREF_LDRD_FALSE,
1870 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1871 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1872 tune_params::DISPARAGE_FLAGS_NEITHER,
1873 tune_params::PREF_NEON_64_FALSE,
1874 tune_params::PREF_NEON_STRINGOPS_FALSE,
1875 tune_params::FUSE_NOTHING,
1876 tune_params::SCHED_AUTOPREF_OFF
1877 };
1878
1879 const struct tune_params arm_cortex_a8_tune =
1880 {
1881 arm_9e_rtx_costs,
1882 &cortexa8_extra_costs,
1883 NULL, /* Sched adj cost. */
1884 arm_default_branch_cost,
1885 &arm_default_vec_cost,
1886 1, /* Constant limit. */
1887 5, /* Max cond insns. */
1888 8, /* Memset max inline. */
1889 2, /* Issue rate. */
1890 ARM_PREFETCH_NOT_BENEFICIAL,
1891 tune_params::PREF_CONST_POOL_FALSE,
1892 tune_params::PREF_LDRD_FALSE,
1893 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1894 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1895 tune_params::DISPARAGE_FLAGS_NEITHER,
1896 tune_params::PREF_NEON_64_FALSE,
1897 tune_params::PREF_NEON_STRINGOPS_TRUE,
1898 tune_params::FUSE_NOTHING,
1899 tune_params::SCHED_AUTOPREF_OFF
1900 };
1901
1902 const struct tune_params arm_cortex_a7_tune =
1903 {
1904 arm_9e_rtx_costs,
1905 &cortexa7_extra_costs,
1906 NULL, /* Sched adj cost. */
1907 arm_default_branch_cost,
1908 &arm_default_vec_cost,
1909 1, /* Constant limit. */
1910 5, /* Max cond insns. */
1911 8, /* Memset max inline. */
1912 2, /* Issue rate. */
1913 ARM_PREFETCH_NOT_BENEFICIAL,
1914 tune_params::PREF_CONST_POOL_FALSE,
1915 tune_params::PREF_LDRD_FALSE,
1916 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1917 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1918 tune_params::DISPARAGE_FLAGS_NEITHER,
1919 tune_params::PREF_NEON_64_FALSE,
1920 tune_params::PREF_NEON_STRINGOPS_TRUE,
1921 tune_params::FUSE_NOTHING,
1922 tune_params::SCHED_AUTOPREF_OFF
1923 };
1924
1925 const struct tune_params arm_cortex_a15_tune =
1926 {
1927 arm_9e_rtx_costs,
1928 &cortexa15_extra_costs,
1929 NULL, /* Sched adj cost. */
1930 arm_default_branch_cost,
1931 &arm_default_vec_cost,
1932 1, /* Constant limit. */
1933 2, /* Max cond insns. */
1934 8, /* Memset max inline. */
1935 3, /* Issue rate. */
1936 ARM_PREFETCH_NOT_BENEFICIAL,
1937 tune_params::PREF_CONST_POOL_FALSE,
1938 tune_params::PREF_LDRD_TRUE,
1939 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1940 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1941 tune_params::DISPARAGE_FLAGS_ALL,
1942 tune_params::PREF_NEON_64_FALSE,
1943 tune_params::PREF_NEON_STRINGOPS_TRUE,
1944 tune_params::FUSE_NOTHING,
1945 tune_params::SCHED_AUTOPREF_FULL
1946 };
1947
1948 const struct tune_params arm_cortex_a53_tune =
1949 {
1950 arm_9e_rtx_costs,
1951 &cortexa53_extra_costs,
1952 NULL, /* Sched adj cost. */
1953 arm_default_branch_cost,
1954 &arm_default_vec_cost,
1955 1, /* Constant limit. */
1956 5, /* Max cond insns. */
1957 8, /* Memset max inline. */
1958 2, /* Issue rate. */
1959 ARM_PREFETCH_NOT_BENEFICIAL,
1960 tune_params::PREF_CONST_POOL_FALSE,
1961 tune_params::PREF_LDRD_FALSE,
1962 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1963 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1964 tune_params::DISPARAGE_FLAGS_NEITHER,
1965 tune_params::PREF_NEON_64_FALSE,
1966 tune_params::PREF_NEON_STRINGOPS_TRUE,
1967 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1968 tune_params::SCHED_AUTOPREF_OFF
1969 };
1970
1971 const struct tune_params arm_cortex_a57_tune =
1972 {
1973 arm_9e_rtx_costs,
1974 &cortexa57_extra_costs,
1975 NULL, /* Sched adj cost. */
1976 arm_default_branch_cost,
1977 &arm_default_vec_cost,
1978 1, /* Constant limit. */
1979 2, /* Max cond insns. */
1980 8, /* Memset max inline. */
1981 3, /* Issue rate. */
1982 ARM_PREFETCH_NOT_BENEFICIAL,
1983 tune_params::PREF_CONST_POOL_FALSE,
1984 tune_params::PREF_LDRD_TRUE,
1985 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1986 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1987 tune_params::DISPARAGE_FLAGS_ALL,
1988 tune_params::PREF_NEON_64_FALSE,
1989 tune_params::PREF_NEON_STRINGOPS_TRUE,
1990 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1991 tune_params::SCHED_AUTOPREF_FULL
1992 };
1993
1994 const struct tune_params arm_xgene1_tune =
1995 {
1996 arm_9e_rtx_costs,
1997 &xgene1_extra_costs,
1998 NULL, /* Sched adj cost. */
1999 arm_default_branch_cost,
2000 &arm_default_vec_cost,
2001 1, /* Constant limit. */
2002 2, /* Max cond insns. */
2003 32, /* Memset max inline. */
2004 4, /* Issue rate. */
2005 ARM_PREFETCH_NOT_BENEFICIAL,
2006 tune_params::PREF_CONST_POOL_FALSE,
2007 tune_params::PREF_LDRD_TRUE,
2008 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2009 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2010 tune_params::DISPARAGE_FLAGS_ALL,
2011 tune_params::PREF_NEON_64_FALSE,
2012 tune_params::PREF_NEON_STRINGOPS_FALSE,
2013 tune_params::FUSE_NOTHING,
2014 tune_params::SCHED_AUTOPREF_OFF
2015 };
2016
2017 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2018 less appealing. Set max_insns_skipped to a low value. */
2019
2020 const struct tune_params arm_cortex_a5_tune =
2021 {
2022 arm_9e_rtx_costs,
2023 &cortexa5_extra_costs,
2024 NULL, /* Sched adj cost. */
2025 arm_cortex_a5_branch_cost,
2026 &arm_default_vec_cost,
2027 1, /* Constant limit. */
2028 1, /* Max cond insns. */
2029 8, /* Memset max inline. */
2030 2, /* Issue rate. */
2031 ARM_PREFETCH_NOT_BENEFICIAL,
2032 tune_params::PREF_CONST_POOL_FALSE,
2033 tune_params::PREF_LDRD_FALSE,
2034 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2035 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2036 tune_params::DISPARAGE_FLAGS_NEITHER,
2037 tune_params::PREF_NEON_64_FALSE,
2038 tune_params::PREF_NEON_STRINGOPS_TRUE,
2039 tune_params::FUSE_NOTHING,
2040 tune_params::SCHED_AUTOPREF_OFF
2041 };
2042
2043 const struct tune_params arm_cortex_a9_tune =
2044 {
2045 arm_9e_rtx_costs,
2046 &cortexa9_extra_costs,
2047 cortex_a9_sched_adjust_cost,
2048 arm_default_branch_cost,
2049 &arm_default_vec_cost,
2050 1, /* Constant limit. */
2051 5, /* Max cond insns. */
2052 8, /* Memset max inline. */
2053 2, /* Issue rate. */
2054 ARM_PREFETCH_BENEFICIAL(4,32,32),
2055 tune_params::PREF_CONST_POOL_FALSE,
2056 tune_params::PREF_LDRD_FALSE,
2057 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2058 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2059 tune_params::DISPARAGE_FLAGS_NEITHER,
2060 tune_params::PREF_NEON_64_FALSE,
2061 tune_params::PREF_NEON_STRINGOPS_FALSE,
2062 tune_params::FUSE_NOTHING,
2063 tune_params::SCHED_AUTOPREF_OFF
2064 };
2065
2066 const struct tune_params arm_cortex_a12_tune =
2067 {
2068 arm_9e_rtx_costs,
2069 &cortexa12_extra_costs,
2070 NULL, /* Sched adj cost. */
2071 arm_default_branch_cost,
2072 &arm_default_vec_cost, /* Vectorizer costs. */
2073 1, /* Constant limit. */
2074 2, /* Max cond insns. */
2075 8, /* Memset max inline. */
2076 2, /* Issue rate. */
2077 ARM_PREFETCH_NOT_BENEFICIAL,
2078 tune_params::PREF_CONST_POOL_FALSE,
2079 tune_params::PREF_LDRD_TRUE,
2080 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2081 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2082 tune_params::DISPARAGE_FLAGS_ALL,
2083 tune_params::PREF_NEON_64_FALSE,
2084 tune_params::PREF_NEON_STRINGOPS_TRUE,
2085 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2086 tune_params::SCHED_AUTOPREF_OFF
2087 };
2088
2089 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2090 cycle to execute each. An LDR from the constant pool also takes two cycles
2091 to execute, but mildly increases pipelining opportunity (consecutive
2092 loads/stores can be pipelined together, saving one cycle), and may also
2093 improve icache utilisation. Hence we prefer the constant pool for such
2094 processors. */
2095
2096 const struct tune_params arm_v7m_tune =
2097 {
2098 arm_9e_rtx_costs,
2099 &v7m_extra_costs,
2100 NULL, /* Sched adj cost. */
2101 arm_cortex_m_branch_cost,
2102 &arm_default_vec_cost,
2103 1, /* Constant limit. */
2104 2, /* Max cond insns. */
2105 8, /* Memset max inline. */
2106 1, /* Issue rate. */
2107 ARM_PREFETCH_NOT_BENEFICIAL,
2108 tune_params::PREF_CONST_POOL_TRUE,
2109 tune_params::PREF_LDRD_FALSE,
2110 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2111 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2112 tune_params::DISPARAGE_FLAGS_NEITHER,
2113 tune_params::PREF_NEON_64_FALSE,
2114 tune_params::PREF_NEON_STRINGOPS_FALSE,
2115 tune_params::FUSE_NOTHING,
2116 tune_params::SCHED_AUTOPREF_OFF
2117 };
2118
2119 /* Cortex-M7 tuning. */
2120
2121 const struct tune_params arm_cortex_m7_tune =
2122 {
2123 arm_9e_rtx_costs,
2124 &v7m_extra_costs,
2125 NULL, /* Sched adj cost. */
2126 arm_cortex_m7_branch_cost,
2127 &arm_default_vec_cost,
2128 0, /* Constant limit. */
2129 1, /* Max cond insns. */
2130 8, /* Memset max inline. */
2131 2, /* Issue rate. */
2132 ARM_PREFETCH_NOT_BENEFICIAL,
2133 tune_params::PREF_CONST_POOL_TRUE,
2134 tune_params::PREF_LDRD_FALSE,
2135 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2136 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2137 tune_params::DISPARAGE_FLAGS_NEITHER,
2138 tune_params::PREF_NEON_64_FALSE,
2139 tune_params::PREF_NEON_STRINGOPS_FALSE,
2140 tune_params::FUSE_NOTHING,
2141 tune_params::SCHED_AUTOPREF_OFF
2142 };
2143
2144 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2145 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2146 const struct tune_params arm_v6m_tune =
2147 {
2148 arm_9e_rtx_costs,
2149 NULL, /* Insn extra costs. */
2150 NULL, /* Sched adj cost. */
2151 arm_default_branch_cost,
2152 &arm_default_vec_cost, /* Vectorizer costs. */
2153 1, /* Constant limit. */
2154 5, /* Max cond insns. */
2155 8, /* Memset max inline. */
2156 1, /* Issue rate. */
2157 ARM_PREFETCH_NOT_BENEFICIAL,
2158 tune_params::PREF_CONST_POOL_FALSE,
2159 tune_params::PREF_LDRD_FALSE,
2160 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2161 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2162 tune_params::DISPARAGE_FLAGS_NEITHER,
2163 tune_params::PREF_NEON_64_FALSE,
2164 tune_params::PREF_NEON_STRINGOPS_FALSE,
2165 tune_params::FUSE_NOTHING,
2166 tune_params::SCHED_AUTOPREF_OFF
2167 };
2168
2169 const struct tune_params arm_fa726te_tune =
2170 {
2171 arm_9e_rtx_costs,
2172 NULL, /* Insn extra costs. */
2173 fa726te_sched_adjust_cost,
2174 arm_default_branch_cost,
2175 &arm_default_vec_cost,
2176 1, /* Constant limit. */
2177 5, /* Max cond insns. */
2178 8, /* Memset max inline. */
2179 2, /* Issue rate. */
2180 ARM_PREFETCH_NOT_BENEFICIAL,
2181 tune_params::PREF_CONST_POOL_TRUE,
2182 tune_params::PREF_LDRD_FALSE,
2183 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2184 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2185 tune_params::DISPARAGE_FLAGS_NEITHER,
2186 tune_params::PREF_NEON_64_FALSE,
2187 tune_params::PREF_NEON_STRINGOPS_FALSE,
2188 tune_params::FUSE_NOTHING,
2189 tune_params::SCHED_AUTOPREF_OFF
2190 };
2191
2192
2193 /* Not all of these give usefully different compilation alternatives,
2194 but there is no simple way of generalizing them. */
2195 static const struct processors all_cores[] =
2196 {
2197 /* ARM Cores */
2198 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2199 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2200 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2201 #include "arm-cores.def"
2202 #undef ARM_CORE
2203 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2204 };
2205
2206 static const struct processors all_architectures[] =
2207 {
2208 /* ARM Architectures */
2209 /* We don't specify tuning costs here as it will be figured out
2210 from the core. */
2211
2212 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2213 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2214 #include "arm-arches.def"
2215 #undef ARM_ARCH
2216 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2217 };
2218
2219
2220 /* These are populated as commandline arguments are processed, or NULL
2221 if not specified. */
2222 static const struct processors *arm_selected_arch;
2223 static const struct processors *arm_selected_cpu;
2224 static const struct processors *arm_selected_tune;
2225
2226 /* The name of the preprocessor macro to define for this architecture. */
2227
2228 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2229
2230 /* Available values for -mfpu=. */
2231
2232 static const struct arm_fpu_desc all_fpus[] =
2233 {
2234 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2235 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2236 #include "arm-fpus.def"
2237 #undef ARM_FPU
2238 };
2239
2240
2241 /* Supported TLS relocations. */
2242
2243 enum tls_reloc {
2244 TLS_GD32,
2245 TLS_LDM32,
2246 TLS_LDO32,
2247 TLS_IE32,
2248 TLS_LE32,
2249 TLS_DESCSEQ /* GNU scheme */
2250 };
2251
2252 /* The maximum number of insns to be used when loading a constant. */
2253 inline static int
2254 arm_constant_limit (bool size_p)
2255 {
2256 return size_p ? 1 : current_tune->constant_limit;
2257 }
2258
2259 /* Emit an insn that's a simple single-set. Both the operands must be known
2260 to be valid. */
2261 inline static rtx_insn *
2262 emit_set_insn (rtx x, rtx y)
2263 {
2264 return emit_insn (gen_rtx_SET (x, y));
2265 }
2266
2267 /* Return the number of bits set in VALUE. */
2268 static unsigned
2269 bit_count (unsigned long value)
2270 {
2271 unsigned long count = 0;
2272
2273 while (value)
2274 {
2275 count++;
2276 value &= value - 1; /* Clear the least-significant set bit. */
2277 }
2278
2279 return count;
2280 }
2281
2282 typedef struct
2283 {
2284 machine_mode mode;
2285 const char *name;
2286 } arm_fixed_mode_set;
2287
2288 /* A small helper for setting fixed-point library libfuncs. */
2289
2290 static void
2291 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2292 const char *funcname, const char *modename,
2293 int num_suffix)
2294 {
2295 char buffer[50];
2296
2297 if (num_suffix == 0)
2298 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2299 else
2300 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2301
2302 set_optab_libfunc (optable, mode, buffer);
2303 }
2304
2305 static void
2306 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2307 machine_mode from, const char *funcname,
2308 const char *toname, const char *fromname)
2309 {
2310 char buffer[50];
2311 const char *maybe_suffix_2 = "";
2312
2313 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2314 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2315 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2316 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2317 maybe_suffix_2 = "2";
2318
2319 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2320 maybe_suffix_2);
2321
2322 set_conv_libfunc (optable, to, from, buffer);
2323 }
2324
2325 /* Set up library functions unique to ARM. */
2326
2327 static void
2328 arm_init_libfuncs (void)
2329 {
2330 /* For Linux, we have access to kernel support for atomic operations. */
2331 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2332 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2333
2334 /* There are no special library functions unless we are using the
2335 ARM BPABI. */
2336 if (!TARGET_BPABI)
2337 return;
2338
2339 /* The functions below are described in Section 4 of the "Run-Time
2340 ABI for the ARM architecture", Version 1.0. */
2341
2342 /* Double-precision floating-point arithmetic. Table 2. */
2343 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2344 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2345 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2346 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2347 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2348
2349 /* Double-precision comparisons. Table 3. */
2350 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2351 set_optab_libfunc (ne_optab, DFmode, NULL);
2352 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2353 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2354 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2355 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2356 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2357
2358 /* Single-precision floating-point arithmetic. Table 4. */
2359 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2360 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2361 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2362 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2363 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2364
2365 /* Single-precision comparisons. Table 5. */
2366 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2367 set_optab_libfunc (ne_optab, SFmode, NULL);
2368 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2369 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2370 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2371 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2372 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2373
2374 /* Floating-point to integer conversions. Table 6. */
2375 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2376 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2377 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2378 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2379 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2380 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2381 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2382 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2383
2384 /* Conversions between floating types. Table 7. */
2385 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2386 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2387
2388 /* Integer to floating-point conversions. Table 8. */
2389 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2390 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2391 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2392 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2393 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2394 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2395 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2396 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2397
2398 /* Long long. Table 9. */
2399 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2400 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2401 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2402 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2403 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2404 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2405 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2406 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2407
2408 /* Integer (32/32->32) division. \S 4.3.1. */
2409 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2410 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2411
2412 /* The divmod functions are designed so that they can be used for
2413 plain division, even though they return both the quotient and the
2414 remainder. The quotient is returned in the usual location (i.e.,
2415 r0 for SImode, {r0, r1} for DImode), just as would be expected
2416 for an ordinary division routine. Because the AAPCS calling
2417 conventions specify that all of { r0, r1, r2, r3 } are
2418 callee-saved registers, there is no need to tell the compiler
2419 explicitly that those registers are clobbered by these
2420 routines. */
2421 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2422 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2423
2424 /* For SImode division the ABI provides div-without-mod routines,
2425 which are faster. */
2426 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2427 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2428
2429 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2430 divmod libcalls instead. */
2431 set_optab_libfunc (smod_optab, DImode, NULL);
2432 set_optab_libfunc (umod_optab, DImode, NULL);
2433 set_optab_libfunc (smod_optab, SImode, NULL);
2434 set_optab_libfunc (umod_optab, SImode, NULL);
2435
2436 /* Half-precision float operations. The compiler handles all operations
2437 with NULL libfuncs by converting the SFmode. */
2438 switch (arm_fp16_format)
2439 {
2440 case ARM_FP16_FORMAT_IEEE:
2441 case ARM_FP16_FORMAT_ALTERNATIVE:
2442
2443 /* Conversions. */
2444 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2445 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2446 ? "__gnu_f2h_ieee"
2447 : "__gnu_f2h_alternative"));
2448 set_conv_libfunc (sext_optab, SFmode, HFmode,
2449 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2450 ? "__gnu_h2f_ieee"
2451 : "__gnu_h2f_alternative"));
2452
2453 /* Arithmetic. */
2454 set_optab_libfunc (add_optab, HFmode, NULL);
2455 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2456 set_optab_libfunc (smul_optab, HFmode, NULL);
2457 set_optab_libfunc (neg_optab, HFmode, NULL);
2458 set_optab_libfunc (sub_optab, HFmode, NULL);
2459
2460 /* Comparisons. */
2461 set_optab_libfunc (eq_optab, HFmode, NULL);
2462 set_optab_libfunc (ne_optab, HFmode, NULL);
2463 set_optab_libfunc (lt_optab, HFmode, NULL);
2464 set_optab_libfunc (le_optab, HFmode, NULL);
2465 set_optab_libfunc (ge_optab, HFmode, NULL);
2466 set_optab_libfunc (gt_optab, HFmode, NULL);
2467 set_optab_libfunc (unord_optab, HFmode, NULL);
2468 break;
2469
2470 default:
2471 break;
2472 }
2473
2474 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2475 {
2476 const arm_fixed_mode_set fixed_arith_modes[] =
2477 {
2478 { QQmode, "qq" },
2479 { UQQmode, "uqq" },
2480 { HQmode, "hq" },
2481 { UHQmode, "uhq" },
2482 { SQmode, "sq" },
2483 { USQmode, "usq" },
2484 { DQmode, "dq" },
2485 { UDQmode, "udq" },
2486 { TQmode, "tq" },
2487 { UTQmode, "utq" },
2488 { HAmode, "ha" },
2489 { UHAmode, "uha" },
2490 { SAmode, "sa" },
2491 { USAmode, "usa" },
2492 { DAmode, "da" },
2493 { UDAmode, "uda" },
2494 { TAmode, "ta" },
2495 { UTAmode, "uta" }
2496 };
2497 const arm_fixed_mode_set fixed_conv_modes[] =
2498 {
2499 { QQmode, "qq" },
2500 { UQQmode, "uqq" },
2501 { HQmode, "hq" },
2502 { UHQmode, "uhq" },
2503 { SQmode, "sq" },
2504 { USQmode, "usq" },
2505 { DQmode, "dq" },
2506 { UDQmode, "udq" },
2507 { TQmode, "tq" },
2508 { UTQmode, "utq" },
2509 { HAmode, "ha" },
2510 { UHAmode, "uha" },
2511 { SAmode, "sa" },
2512 { USAmode, "usa" },
2513 { DAmode, "da" },
2514 { UDAmode, "uda" },
2515 { TAmode, "ta" },
2516 { UTAmode, "uta" },
2517 { QImode, "qi" },
2518 { HImode, "hi" },
2519 { SImode, "si" },
2520 { DImode, "di" },
2521 { TImode, "ti" },
2522 { SFmode, "sf" },
2523 { DFmode, "df" }
2524 };
2525 unsigned int i, j;
2526
2527 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2528 {
2529 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2530 "add", fixed_arith_modes[i].name, 3);
2531 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2532 "ssadd", fixed_arith_modes[i].name, 3);
2533 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2534 "usadd", fixed_arith_modes[i].name, 3);
2535 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2536 "sub", fixed_arith_modes[i].name, 3);
2537 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2538 "sssub", fixed_arith_modes[i].name, 3);
2539 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2540 "ussub", fixed_arith_modes[i].name, 3);
2541 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2542 "mul", fixed_arith_modes[i].name, 3);
2543 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2544 "ssmul", fixed_arith_modes[i].name, 3);
2545 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2546 "usmul", fixed_arith_modes[i].name, 3);
2547 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2548 "div", fixed_arith_modes[i].name, 3);
2549 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2550 "udiv", fixed_arith_modes[i].name, 3);
2551 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2552 "ssdiv", fixed_arith_modes[i].name, 3);
2553 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2554 "usdiv", fixed_arith_modes[i].name, 3);
2555 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2556 "neg", fixed_arith_modes[i].name, 2);
2557 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2558 "ssneg", fixed_arith_modes[i].name, 2);
2559 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2560 "usneg", fixed_arith_modes[i].name, 2);
2561 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2562 "ashl", fixed_arith_modes[i].name, 3);
2563 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2564 "ashr", fixed_arith_modes[i].name, 3);
2565 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2566 "lshr", fixed_arith_modes[i].name, 3);
2567 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2568 "ssashl", fixed_arith_modes[i].name, 3);
2569 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2570 "usashl", fixed_arith_modes[i].name, 3);
2571 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2572 "cmp", fixed_arith_modes[i].name, 2);
2573 }
2574
2575 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2576 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2577 {
2578 if (i == j
2579 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2580 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2581 continue;
2582
2583 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2584 fixed_conv_modes[j].mode, "fract",
2585 fixed_conv_modes[i].name,
2586 fixed_conv_modes[j].name);
2587 arm_set_fixed_conv_libfunc (satfract_optab,
2588 fixed_conv_modes[i].mode,
2589 fixed_conv_modes[j].mode, "satfract",
2590 fixed_conv_modes[i].name,
2591 fixed_conv_modes[j].name);
2592 arm_set_fixed_conv_libfunc (fractuns_optab,
2593 fixed_conv_modes[i].mode,
2594 fixed_conv_modes[j].mode, "fractuns",
2595 fixed_conv_modes[i].name,
2596 fixed_conv_modes[j].name);
2597 arm_set_fixed_conv_libfunc (satfractuns_optab,
2598 fixed_conv_modes[i].mode,
2599 fixed_conv_modes[j].mode, "satfractuns",
2600 fixed_conv_modes[i].name,
2601 fixed_conv_modes[j].name);
2602 }
2603 }
2604
2605 if (TARGET_AAPCS_BASED)
2606 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2607 }
2608
2609 /* On AAPCS systems, this is the "struct __va_list". */
2610 static GTY(()) tree va_list_type;
2611
2612 /* Return the type to use as __builtin_va_list. */
2613 static tree
2614 arm_build_builtin_va_list (void)
2615 {
2616 tree va_list_name;
2617 tree ap_field;
2618
2619 if (!TARGET_AAPCS_BASED)
2620 return std_build_builtin_va_list ();
2621
2622 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2623 defined as:
2624
2625 struct __va_list
2626 {
2627 void *__ap;
2628 };
2629
2630 The C Library ABI further reinforces this definition in \S
2631 4.1.
2632
2633 We must follow this definition exactly. The structure tag
2634 name is visible in C++ mangled names, and thus forms a part
2635 of the ABI. The field name may be used by people who
2636 #include <stdarg.h>. */
2637 /* Create the type. */
2638 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2639 /* Give it the required name. */
2640 va_list_name = build_decl (BUILTINS_LOCATION,
2641 TYPE_DECL,
2642 get_identifier ("__va_list"),
2643 va_list_type);
2644 DECL_ARTIFICIAL (va_list_name) = 1;
2645 TYPE_NAME (va_list_type) = va_list_name;
2646 TYPE_STUB_DECL (va_list_type) = va_list_name;
2647 /* Create the __ap field. */
2648 ap_field = build_decl (BUILTINS_LOCATION,
2649 FIELD_DECL,
2650 get_identifier ("__ap"),
2651 ptr_type_node);
2652 DECL_ARTIFICIAL (ap_field) = 1;
2653 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2654 TYPE_FIELDS (va_list_type) = ap_field;
2655 /* Compute its layout. */
2656 layout_type (va_list_type);
2657
2658 return va_list_type;
2659 }
2660
2661 /* Return an expression of type "void *" pointing to the next
2662 available argument in a variable-argument list. VALIST is the
2663 user-level va_list object, of type __builtin_va_list. */
2664 static tree
2665 arm_extract_valist_ptr (tree valist)
2666 {
2667 if (TREE_TYPE (valist) == error_mark_node)
2668 return error_mark_node;
2669
2670 /* On an AAPCS target, the pointer is stored within "struct
2671 va_list". */
2672 if (TARGET_AAPCS_BASED)
2673 {
2674 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2675 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2676 valist, ap_field, NULL_TREE);
2677 }
2678
2679 return valist;
2680 }
2681
2682 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2683 static void
2684 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2685 {
2686 valist = arm_extract_valist_ptr (valist);
2687 std_expand_builtin_va_start (valist, nextarg);
2688 }
2689
2690 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2691 static tree
2692 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2693 gimple_seq *post_p)
2694 {
2695 valist = arm_extract_valist_ptr (valist);
2696 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2697 }
2698
2699 /* Check any incompatible options that the user has specified. */
2700 static void
2701 arm_option_check_internal (struct gcc_options *opts)
2702 {
2703 int flags = opts->x_target_flags;
2704
2705 /* Make sure that the processor choice does not conflict with any of the
2706 other command line choices. */
2707 if (TARGET_ARM_P (flags) && !(insn_flags & FL_NOTM))
2708 error ("target CPU does not support ARM mode");
2709
2710 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2711 from here where no function is being compiled currently. */
2712 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2713 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2714
2715 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2716 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2717
2718 /* If this target is normally configured to use APCS frames, warn if they
2719 are turned off and debugging is turned on. */
2720 if (TARGET_ARM_P (flags)
2721 && write_symbols != NO_DEBUG
2722 && !TARGET_APCS_FRAME
2723 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2724 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2725
2726 /* iWMMXt unsupported under Thumb mode. */
2727 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2728 error ("iWMMXt unsupported under Thumb mode");
2729
2730 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2731 error ("can not use -mtp=cp15 with 16-bit Thumb");
2732
2733 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2734 {
2735 error ("RTP PIC is incompatible with Thumb");
2736 flag_pic = 0;
2737 }
2738
2739 /* We only support -mslow-flash-data on armv7-m targets. */
2740 if (target_slow_flash_data
2741 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2742 || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON)))
2743 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2744 }
2745
2746 /* Set params depending on attributes and optimization options. */
2747 static void
2748 arm_option_params_internal (struct gcc_options *opts)
2749 {
2750 int flags = opts->x_target_flags;
2751
2752 /* If we are not using the default (ARM mode) section anchor offset
2753 ranges, then set the correct ranges now. */
2754 if (TARGET_THUMB1_P (flags))
2755 {
2756 /* Thumb-1 LDR instructions cannot have negative offsets.
2757 Permissible positive offset ranges are 5-bit (for byte loads),
2758 6-bit (for halfword loads), or 7-bit (for word loads).
2759 Empirical results suggest a 7-bit anchor range gives the best
2760 overall code size. */
2761 targetm.min_anchor_offset = 0;
2762 targetm.max_anchor_offset = 127;
2763 }
2764 else if (TARGET_THUMB2_P (flags))
2765 {
2766 /* The minimum is set such that the total size of the block
2767 for a particular anchor is 248 + 1 + 4095 bytes, which is
2768 divisible by eight, ensuring natural spacing of anchors. */
2769 targetm.min_anchor_offset = -248;
2770 targetm.max_anchor_offset = 4095;
2771 }
2772 else
2773 {
2774 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2775 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2776 }
2777
2778 if (optimize_size)
2779 {
2780 /* If optimizing for size, bump the number of instructions that we
2781 are prepared to conditionally execute (even on a StrongARM). */
2782 max_insns_skipped = 6;
2783
2784 /* For THUMB2, we limit the conditional sequence to one IT block. */
2785 if (TARGET_THUMB2_P (flags))
2786 max_insns_skipped = opts->x_arm_restrict_it ? 1 : 4;
2787 }
2788 else
2789 /* When -mrestrict-it is in use tone down the if-conversion. */
2790 max_insns_skipped
2791 = (TARGET_THUMB2_P (opts->x_target_flags) && opts->x_arm_restrict_it)
2792 ? 1 : current_tune->max_insns_skipped;
2793 }
2794
2795 /* Options after initial target override. */
2796 static GTY(()) tree init_optimize;
2797
2798 /* Reset options between modes that the user has specified. */
2799 static void
2800 arm_option_override_internal (struct gcc_options *opts,
2801 struct gcc_options *opts_set)
2802 {
2803 if (TARGET_THUMB_P (opts->x_target_flags) && !(insn_flags & FL_THUMB))
2804 {
2805 warning (0, "target CPU does not support THUMB instructions");
2806 opts->x_target_flags &= ~MASK_THUMB;
2807 }
2808
2809 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2810 {
2811 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2812 opts->x_target_flags &= ~MASK_APCS_FRAME;
2813 }
2814
2815 /* Callee super interworking implies thumb interworking. Adding
2816 this to the flags here simplifies the logic elsewhere. */
2817 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2818 opts->x_target_flags |= MASK_INTERWORK;
2819
2820 /* need to remember initial values so combinaisons of options like
2821 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2822 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2823
2824 if (! opts_set->x_arm_restrict_it)
2825 opts->x_arm_restrict_it = arm_arch8;
2826
2827 if (!TARGET_THUMB2_P (opts->x_target_flags))
2828 opts->x_arm_restrict_it = 0;
2829
2830 /* Don't warn since it's on by default in -O2. */
2831 if (TARGET_THUMB1_P (opts->x_target_flags))
2832 opts->x_flag_schedule_insns = 0;
2833 else
2834 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
2835
2836 /* Disable shrink-wrap when optimizing function for size, since it tends to
2837 generate additional returns. */
2838 if (optimize_function_for_size_p (cfun)
2839 && TARGET_THUMB2_P (opts->x_target_flags))
2840 opts->x_flag_shrink_wrap = false;
2841 else
2842 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
2843
2844 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2845 - epilogue_insns - does not accurately model the corresponding insns
2846 emitted in the asm file. In particular, see the comment in thumb_exit
2847 'Find out how many of the (return) argument registers we can corrupt'.
2848 As a consequence, the epilogue may clobber registers without fipa-ra
2849 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
2850 TODO: Accurately model clobbers for epilogue_insns and reenable
2851 fipa-ra. */
2852 if (TARGET_THUMB1_P (opts->x_target_flags))
2853 opts->x_flag_ipa_ra = 0;
2854 else
2855 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
2856
2857 /* Thumb2 inline assembly code should always use unified syntax.
2858 This will apply to ARM and Thumb1 eventually. */
2859 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
2860 }
2861
2862 /* Fix up any incompatible options that the user has specified. */
2863 static void
2864 arm_option_override (void)
2865 {
2866 arm_selected_arch = NULL;
2867 arm_selected_cpu = NULL;
2868 arm_selected_tune = NULL;
2869
2870 if (global_options_set.x_arm_arch_option)
2871 arm_selected_arch = &all_architectures[arm_arch_option];
2872
2873 if (global_options_set.x_arm_cpu_option)
2874 {
2875 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2876 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2877 }
2878
2879 if (global_options_set.x_arm_tune_option)
2880 arm_selected_tune = &all_cores[(int) arm_tune_option];
2881
2882 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2883 SUBTARGET_OVERRIDE_OPTIONS;
2884 #endif
2885
2886 if (arm_selected_arch)
2887 {
2888 if (arm_selected_cpu)
2889 {
2890 /* Check for conflict between mcpu and march. */
2891 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2892 {
2893 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2894 arm_selected_cpu->name, arm_selected_arch->name);
2895 /* -march wins for code generation.
2896 -mcpu wins for default tuning. */
2897 if (!arm_selected_tune)
2898 arm_selected_tune = arm_selected_cpu;
2899
2900 arm_selected_cpu = arm_selected_arch;
2901 }
2902 else
2903 /* -mcpu wins. */
2904 arm_selected_arch = NULL;
2905 }
2906 else
2907 /* Pick a CPU based on the architecture. */
2908 arm_selected_cpu = arm_selected_arch;
2909 }
2910
2911 /* If the user did not specify a processor, choose one for them. */
2912 if (!arm_selected_cpu)
2913 {
2914 const struct processors * sel;
2915 unsigned int sought;
2916
2917 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2918 if (!arm_selected_cpu->name)
2919 {
2920 #ifdef SUBTARGET_CPU_DEFAULT
2921 /* Use the subtarget default CPU if none was specified by
2922 configure. */
2923 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2924 #endif
2925 /* Default to ARM6. */
2926 if (!arm_selected_cpu->name)
2927 arm_selected_cpu = &all_cores[arm6];
2928 }
2929
2930 sel = arm_selected_cpu;
2931 insn_flags = sel->flags;
2932
2933 /* Now check to see if the user has specified some command line
2934 switch that require certain abilities from the cpu. */
2935 sought = 0;
2936
2937 if (TARGET_INTERWORK || TARGET_THUMB)
2938 {
2939 sought |= (FL_THUMB | FL_MODE32);
2940
2941 /* There are no ARM processors that support both APCS-26 and
2942 interworking. Therefore we force FL_MODE26 to be removed
2943 from insn_flags here (if it was set), so that the search
2944 below will always be able to find a compatible processor. */
2945 insn_flags &= ~FL_MODE26;
2946 }
2947
2948 if (sought != 0 && ((sought & insn_flags) != sought))
2949 {
2950 /* Try to locate a CPU type that supports all of the abilities
2951 of the default CPU, plus the extra abilities requested by
2952 the user. */
2953 for (sel = all_cores; sel->name != NULL; sel++)
2954 if ((sel->flags & sought) == (sought | insn_flags))
2955 break;
2956
2957 if (sel->name == NULL)
2958 {
2959 unsigned current_bit_count = 0;
2960 const struct processors * best_fit = NULL;
2961
2962 /* Ideally we would like to issue an error message here
2963 saying that it was not possible to find a CPU compatible
2964 with the default CPU, but which also supports the command
2965 line options specified by the programmer, and so they
2966 ought to use the -mcpu=<name> command line option to
2967 override the default CPU type.
2968
2969 If we cannot find a cpu that has both the
2970 characteristics of the default cpu and the given
2971 command line options we scan the array again looking
2972 for a best match. */
2973 for (sel = all_cores; sel->name != NULL; sel++)
2974 if ((sel->flags & sought) == sought)
2975 {
2976 unsigned count;
2977
2978 count = bit_count (sel->flags & insn_flags);
2979
2980 if (count >= current_bit_count)
2981 {
2982 best_fit = sel;
2983 current_bit_count = count;
2984 }
2985 }
2986
2987 gcc_assert (best_fit);
2988 sel = best_fit;
2989 }
2990
2991 arm_selected_cpu = sel;
2992 }
2993 }
2994
2995 gcc_assert (arm_selected_cpu);
2996 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2997 if (!arm_selected_tune)
2998 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2999
3000 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
3001 insn_flags = arm_selected_cpu->flags;
3002 arm_base_arch = arm_selected_cpu->base_arch;
3003
3004 arm_tune = arm_selected_tune->core;
3005 tune_flags = arm_selected_tune->flags;
3006 current_tune = arm_selected_tune->tune;
3007
3008 /* TBD: Dwarf info for apcs frame is not handled yet. */
3009 if (TARGET_APCS_FRAME)
3010 flag_shrink_wrap = false;
3011
3012 /* BPABI targets use linker tricks to allow interworking on cores
3013 without thumb support. */
3014 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
3015 {
3016 warning (0, "target CPU does not support interworking" );
3017 target_flags &= ~MASK_INTERWORK;
3018 }
3019
3020 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3021 {
3022 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3023 target_flags |= MASK_APCS_FRAME;
3024 }
3025
3026 if (TARGET_POKE_FUNCTION_NAME)
3027 target_flags |= MASK_APCS_FRAME;
3028
3029 if (TARGET_APCS_REENT && flag_pic)
3030 error ("-fpic and -mapcs-reent are incompatible");
3031
3032 if (TARGET_APCS_REENT)
3033 warning (0, "APCS reentrant code not supported. Ignored");
3034
3035 if (TARGET_APCS_FLOAT)
3036 warning (0, "passing floating point arguments in fp regs not yet supported");
3037
3038 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3039 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
3040 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
3041 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
3042 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
3043 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
3044 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
3045 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
3046 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
3047 arm_arch6m = arm_arch6 && !arm_arch_notm;
3048 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
3049 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
3050 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
3051 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
3052 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
3053
3054 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
3055 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
3056 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
3057 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
3058 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
3059 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
3060 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
3061 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
3062 arm_arch_no_volatile_ce = (insn_flags & FL_NO_VOLATILE_CE) != 0;
3063 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
3064 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
3065 arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
3066
3067 /* V5 code we generate is completely interworking capable, so we turn off
3068 TARGET_INTERWORK here to avoid many tests later on. */
3069
3070 /* XXX However, we must pass the right pre-processor defines to CPP
3071 or GLD can get confused. This is a hack. */
3072 if (TARGET_INTERWORK)
3073 arm_cpp_interwork = 1;
3074
3075 if (arm_arch5)
3076 target_flags &= ~MASK_INTERWORK;
3077
3078 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3079 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3080
3081 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3082 error ("iwmmxt abi requires an iwmmxt capable cpu");
3083
3084 if (!global_options_set.x_arm_fpu_index)
3085 {
3086 const char *target_fpu_name;
3087 bool ok;
3088
3089 #ifdef FPUTYPE_DEFAULT
3090 target_fpu_name = FPUTYPE_DEFAULT;
3091 #else
3092 target_fpu_name = "vfp";
3093 #endif
3094
3095 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
3096 CL_TARGET);
3097 gcc_assert (ok);
3098 }
3099
3100 arm_fpu_desc = &all_fpus[arm_fpu_index];
3101
3102 switch (arm_fpu_desc->model)
3103 {
3104 case ARM_FP_MODEL_VFP:
3105 arm_fpu_attr = FPU_VFP;
3106 break;
3107
3108 default:
3109 gcc_unreachable();
3110 }
3111
3112 if (TARGET_AAPCS_BASED)
3113 {
3114 if (TARGET_CALLER_INTERWORKING)
3115 error ("AAPCS does not support -mcaller-super-interworking");
3116 else
3117 if (TARGET_CALLEE_INTERWORKING)
3118 error ("AAPCS does not support -mcallee-super-interworking");
3119 }
3120
3121 /* iWMMXt and NEON are incompatible. */
3122 if (TARGET_IWMMXT && TARGET_NEON)
3123 error ("iWMMXt and NEON are incompatible");
3124
3125 /* __fp16 support currently assumes the core has ldrh. */
3126 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3127 sorry ("__fp16 and no ldrh");
3128
3129 /* If soft-float is specified then don't use FPU. */
3130 if (TARGET_SOFT_FLOAT)
3131 arm_fpu_attr = FPU_NONE;
3132
3133 if (TARGET_AAPCS_BASED)
3134 {
3135 if (arm_abi == ARM_ABI_IWMMXT)
3136 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3137 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3138 && TARGET_HARD_FLOAT
3139 && TARGET_VFP)
3140 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3141 else
3142 arm_pcs_default = ARM_PCS_AAPCS;
3143 }
3144 else
3145 {
3146 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
3147 sorry ("-mfloat-abi=hard and VFP");
3148
3149 if (arm_abi == ARM_ABI_APCS)
3150 arm_pcs_default = ARM_PCS_APCS;
3151 else
3152 arm_pcs_default = ARM_PCS_ATPCS;
3153 }
3154
3155 /* For arm2/3 there is no need to do any scheduling if we are doing
3156 software floating-point. */
3157 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
3158 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3159
3160 /* Use the cp15 method if it is available. */
3161 if (target_thread_pointer == TP_AUTO)
3162 {
3163 if (arm_arch6k && !TARGET_THUMB1)
3164 target_thread_pointer = TP_CP15;
3165 else
3166 target_thread_pointer = TP_SOFT;
3167 }
3168
3169 /* Override the default structure alignment for AAPCS ABI. */
3170 if (!global_options_set.x_arm_structure_size_boundary)
3171 {
3172 if (TARGET_AAPCS_BASED)
3173 arm_structure_size_boundary = 8;
3174 }
3175 else
3176 {
3177 if (arm_structure_size_boundary != 8
3178 && arm_structure_size_boundary != 32
3179 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3180 {
3181 if (ARM_DOUBLEWORD_ALIGN)
3182 warning (0,
3183 "structure size boundary can only be set to 8, 32 or 64");
3184 else
3185 warning (0, "structure size boundary can only be set to 8 or 32");
3186 arm_structure_size_boundary
3187 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3188 }
3189 }
3190
3191 /* If stack checking is disabled, we can use r10 as the PIC register,
3192 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3193 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3194 {
3195 if (TARGET_VXWORKS_RTP)
3196 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3197 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3198 }
3199
3200 if (flag_pic && TARGET_VXWORKS_RTP)
3201 arm_pic_register = 9;
3202
3203 if (arm_pic_register_string != NULL)
3204 {
3205 int pic_register = decode_reg_name (arm_pic_register_string);
3206
3207 if (!flag_pic)
3208 warning (0, "-mpic-register= is useless without -fpic");
3209
3210 /* Prevent the user from choosing an obviously stupid PIC register. */
3211 else if (pic_register < 0 || call_used_regs[pic_register]
3212 || pic_register == HARD_FRAME_POINTER_REGNUM
3213 || pic_register == STACK_POINTER_REGNUM
3214 || pic_register >= PC_REGNUM
3215 || (TARGET_VXWORKS_RTP
3216 && (unsigned int) pic_register != arm_pic_register))
3217 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3218 else
3219 arm_pic_register = pic_register;
3220 }
3221
3222 if (TARGET_VXWORKS_RTP
3223 && !global_options_set.x_arm_pic_data_is_text_relative)
3224 arm_pic_data_is_text_relative = 0;
3225
3226 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3227 if (fix_cm3_ldrd == 2)
3228 {
3229 if (arm_selected_cpu->core == cortexm3)
3230 fix_cm3_ldrd = 1;
3231 else
3232 fix_cm3_ldrd = 0;
3233 }
3234
3235 /* Enable -munaligned-access by default for
3236 - all ARMv6 architecture-based processors
3237 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3238 - ARMv8 architecture-base processors.
3239
3240 Disable -munaligned-access by default for
3241 - all pre-ARMv6 architecture-based processors
3242 - ARMv6-M architecture-based processors. */
3243
3244 if (unaligned_access == 2)
3245 {
3246 if (arm_arch6 && (arm_arch_notm || arm_arch7))
3247 unaligned_access = 1;
3248 else
3249 unaligned_access = 0;
3250 }
3251 else if (unaligned_access == 1
3252 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3253 {
3254 warning (0, "target CPU does not support unaligned accesses");
3255 unaligned_access = 0;
3256 }
3257
3258 /* Hot/Cold partitioning is not currently supported, since we can't
3259 handle literal pool placement in that case. */
3260 if (flag_reorder_blocks_and_partition)
3261 {
3262 inform (input_location,
3263 "-freorder-blocks-and-partition not supported on this architecture");
3264 flag_reorder_blocks_and_partition = 0;
3265 flag_reorder_blocks = 1;
3266 }
3267
3268 if (flag_pic)
3269 /* Hoisting PIC address calculations more aggressively provides a small,
3270 but measurable, size reduction for PIC code. Therefore, we decrease
3271 the bar for unrestricted expression hoisting to the cost of PIC address
3272 calculation, which is 2 instructions. */
3273 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3274 global_options.x_param_values,
3275 global_options_set.x_param_values);
3276
3277 /* ARM EABI defaults to strict volatile bitfields. */
3278 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3279 && abi_version_at_least(2))
3280 flag_strict_volatile_bitfields = 1;
3281
3282 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3283 have deemed it beneficial (signified by setting
3284 prefetch.num_slots to 1 or more). */
3285 if (flag_prefetch_loop_arrays < 0
3286 && HAVE_prefetch
3287 && optimize >= 3
3288 && current_tune->prefetch.num_slots > 0)
3289 flag_prefetch_loop_arrays = 1;
3290
3291 /* Set up parameters to be used in prefetching algorithm. Do not
3292 override the defaults unless we are tuning for a core we have
3293 researched values for. */
3294 if (current_tune->prefetch.num_slots > 0)
3295 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3296 current_tune->prefetch.num_slots,
3297 global_options.x_param_values,
3298 global_options_set.x_param_values);
3299 if (current_tune->prefetch.l1_cache_line_size >= 0)
3300 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3301 current_tune->prefetch.l1_cache_line_size,
3302 global_options.x_param_values,
3303 global_options_set.x_param_values);
3304 if (current_tune->prefetch.l1_cache_size >= 0)
3305 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3306 current_tune->prefetch.l1_cache_size,
3307 global_options.x_param_values,
3308 global_options_set.x_param_values);
3309
3310 /* Use Neon to perform 64-bits operations rather than core
3311 registers. */
3312 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3313 if (use_neon_for_64bits == 1)
3314 prefer_neon_for_64bits = true;
3315
3316 /* Use the alternative scheduling-pressure algorithm by default. */
3317 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3318 global_options.x_param_values,
3319 global_options_set.x_param_values);
3320
3321 /* Look through ready list and all of queue for instructions
3322 relevant for L2 auto-prefetcher. */
3323 int param_sched_autopref_queue_depth;
3324
3325 switch (current_tune->sched_autopref)
3326 {
3327 case tune_params::SCHED_AUTOPREF_OFF:
3328 param_sched_autopref_queue_depth = -1;
3329 break;
3330
3331 case tune_params::SCHED_AUTOPREF_RANK:
3332 param_sched_autopref_queue_depth = 0;
3333 break;
3334
3335 case tune_params::SCHED_AUTOPREF_FULL:
3336 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3337 break;
3338
3339 default:
3340 gcc_unreachable ();
3341 }
3342
3343 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3344 param_sched_autopref_queue_depth,
3345 global_options.x_param_values,
3346 global_options_set.x_param_values);
3347
3348 /* Currently, for slow flash data, we just disable literal pools. */
3349 if (target_slow_flash_data)
3350 arm_disable_literal_pool = true;
3351
3352 /* Disable scheduling fusion by default if it's not armv7 processor
3353 or doesn't prefer ldrd/strd. */
3354 if (flag_schedule_fusion == 2
3355 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3356 flag_schedule_fusion = 0;
3357
3358 /* Need to remember initial options before they are overriden. */
3359 init_optimize = build_optimization_node (&global_options);
3360
3361 arm_option_override_internal (&global_options, &global_options_set);
3362 arm_option_check_internal (&global_options);
3363 arm_option_params_internal (&global_options);
3364
3365 /* Register global variables with the garbage collector. */
3366 arm_add_gc_roots ();
3367
3368 /* Save the initial options in case the user does function specific
3369 options. */
3370 target_option_default_node = target_option_current_node
3371 = build_target_option_node (&global_options);
3372 }
3373
3374 static void
3375 arm_add_gc_roots (void)
3376 {
3377 gcc_obstack_init(&minipool_obstack);
3378 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3379 }
3380 \f
3381 /* A table of known ARM exception types.
3382 For use with the interrupt function attribute. */
3383
3384 typedef struct
3385 {
3386 const char *const arg;
3387 const unsigned long return_value;
3388 }
3389 isr_attribute_arg;
3390
3391 static const isr_attribute_arg isr_attribute_args [] =
3392 {
3393 { "IRQ", ARM_FT_ISR },
3394 { "irq", ARM_FT_ISR },
3395 { "FIQ", ARM_FT_FIQ },
3396 { "fiq", ARM_FT_FIQ },
3397 { "ABORT", ARM_FT_ISR },
3398 { "abort", ARM_FT_ISR },
3399 { "ABORT", ARM_FT_ISR },
3400 { "abort", ARM_FT_ISR },
3401 { "UNDEF", ARM_FT_EXCEPTION },
3402 { "undef", ARM_FT_EXCEPTION },
3403 { "SWI", ARM_FT_EXCEPTION },
3404 { "swi", ARM_FT_EXCEPTION },
3405 { NULL, ARM_FT_NORMAL }
3406 };
3407
3408 /* Returns the (interrupt) function type of the current
3409 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3410
3411 static unsigned long
3412 arm_isr_value (tree argument)
3413 {
3414 const isr_attribute_arg * ptr;
3415 const char * arg;
3416
3417 if (!arm_arch_notm)
3418 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3419
3420 /* No argument - default to IRQ. */
3421 if (argument == NULL_TREE)
3422 return ARM_FT_ISR;
3423
3424 /* Get the value of the argument. */
3425 if (TREE_VALUE (argument) == NULL_TREE
3426 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3427 return ARM_FT_UNKNOWN;
3428
3429 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3430
3431 /* Check it against the list of known arguments. */
3432 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3433 if (streq (arg, ptr->arg))
3434 return ptr->return_value;
3435
3436 /* An unrecognized interrupt type. */
3437 return ARM_FT_UNKNOWN;
3438 }
3439
3440 /* Computes the type of the current function. */
3441
3442 static unsigned long
3443 arm_compute_func_type (void)
3444 {
3445 unsigned long type = ARM_FT_UNKNOWN;
3446 tree a;
3447 tree attr;
3448
3449 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3450
3451 /* Decide if the current function is volatile. Such functions
3452 never return, and many memory cycles can be saved by not storing
3453 register values that will never be needed again. This optimization
3454 was added to speed up context switching in a kernel application. */
3455 if (optimize > 0
3456 && (TREE_NOTHROW (current_function_decl)
3457 || !(flag_unwind_tables
3458 || (flag_exceptions
3459 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3460 && TREE_THIS_VOLATILE (current_function_decl))
3461 type |= ARM_FT_VOLATILE;
3462
3463 if (cfun->static_chain_decl != NULL)
3464 type |= ARM_FT_NESTED;
3465
3466 attr = DECL_ATTRIBUTES (current_function_decl);
3467
3468 a = lookup_attribute ("naked", attr);
3469 if (a != NULL_TREE)
3470 type |= ARM_FT_NAKED;
3471
3472 a = lookup_attribute ("isr", attr);
3473 if (a == NULL_TREE)
3474 a = lookup_attribute ("interrupt", attr);
3475
3476 if (a == NULL_TREE)
3477 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3478 else
3479 type |= arm_isr_value (TREE_VALUE (a));
3480
3481 return type;
3482 }
3483
3484 /* Returns the type of the current function. */
3485
3486 unsigned long
3487 arm_current_func_type (void)
3488 {
3489 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3490 cfun->machine->func_type = arm_compute_func_type ();
3491
3492 return cfun->machine->func_type;
3493 }
3494
3495 bool
3496 arm_allocate_stack_slots_for_args (void)
3497 {
3498 /* Naked functions should not allocate stack slots for arguments. */
3499 return !IS_NAKED (arm_current_func_type ());
3500 }
3501
3502 static bool
3503 arm_warn_func_return (tree decl)
3504 {
3505 /* Naked functions are implemented entirely in assembly, including the
3506 return sequence, so suppress warnings about this. */
3507 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3508 }
3509
3510 \f
3511 /* Output assembler code for a block containing the constant parts
3512 of a trampoline, leaving space for the variable parts.
3513
3514 On the ARM, (if r8 is the static chain regnum, and remembering that
3515 referencing pc adds an offset of 8) the trampoline looks like:
3516 ldr r8, [pc, #0]
3517 ldr pc, [pc]
3518 .word static chain value
3519 .word function's address
3520 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3521
3522 static void
3523 arm_asm_trampoline_template (FILE *f)
3524 {
3525 if (TARGET_UNIFIED_ASM)
3526 fprintf (f, "\t.syntax unified\n");
3527 else
3528 fprintf (f, "\t.syntax divided\n");
3529
3530 if (TARGET_ARM)
3531 {
3532 fprintf (f, "\t.arm\n");
3533 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3534 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3535 }
3536 else if (TARGET_THUMB2)
3537 {
3538 fprintf (f, "\t.thumb\n");
3539 /* The Thumb-2 trampoline is similar to the arm implementation.
3540 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3541 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3542 STATIC_CHAIN_REGNUM, PC_REGNUM);
3543 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3544 }
3545 else
3546 {
3547 ASM_OUTPUT_ALIGN (f, 2);
3548 fprintf (f, "\t.code\t16\n");
3549 fprintf (f, ".Ltrampoline_start:\n");
3550 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3551 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3552 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3553 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3554 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3555 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3556 }
3557 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3558 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3559 }
3560
3561 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3562
3563 static void
3564 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3565 {
3566 rtx fnaddr, mem, a_tramp;
3567
3568 emit_block_move (m_tramp, assemble_trampoline_template (),
3569 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3570
3571 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3572 emit_move_insn (mem, chain_value);
3573
3574 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3575 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3576 emit_move_insn (mem, fnaddr);
3577
3578 a_tramp = XEXP (m_tramp, 0);
3579 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3580 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3581 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3582 }
3583
3584 /* Thumb trampolines should be entered in thumb mode, so set
3585 the bottom bit of the address. */
3586
3587 static rtx
3588 arm_trampoline_adjust_address (rtx addr)
3589 {
3590 if (TARGET_THUMB)
3591 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3592 NULL, 0, OPTAB_LIB_WIDEN);
3593 return addr;
3594 }
3595 \f
3596 /* Return 1 if it is possible to return using a single instruction.
3597 If SIBLING is non-null, this is a test for a return before a sibling
3598 call. SIBLING is the call insn, so we can examine its register usage. */
3599
3600 int
3601 use_return_insn (int iscond, rtx sibling)
3602 {
3603 int regno;
3604 unsigned int func_type;
3605 unsigned long saved_int_regs;
3606 unsigned HOST_WIDE_INT stack_adjust;
3607 arm_stack_offsets *offsets;
3608
3609 /* Never use a return instruction before reload has run. */
3610 if (!reload_completed)
3611 return 0;
3612
3613 func_type = arm_current_func_type ();
3614
3615 /* Naked, volatile and stack alignment functions need special
3616 consideration. */
3617 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3618 return 0;
3619
3620 /* So do interrupt functions that use the frame pointer and Thumb
3621 interrupt functions. */
3622 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3623 return 0;
3624
3625 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3626 && !optimize_function_for_size_p (cfun))
3627 return 0;
3628
3629 offsets = arm_get_frame_offsets ();
3630 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3631
3632 /* As do variadic functions. */
3633 if (crtl->args.pretend_args_size
3634 || cfun->machine->uses_anonymous_args
3635 /* Or if the function calls __builtin_eh_return () */
3636 || crtl->calls_eh_return
3637 /* Or if the function calls alloca */
3638 || cfun->calls_alloca
3639 /* Or if there is a stack adjustment. However, if the stack pointer
3640 is saved on the stack, we can use a pre-incrementing stack load. */
3641 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3642 && stack_adjust == 4)))
3643 return 0;
3644
3645 saved_int_regs = offsets->saved_regs_mask;
3646
3647 /* Unfortunately, the insn
3648
3649 ldmib sp, {..., sp, ...}
3650
3651 triggers a bug on most SA-110 based devices, such that the stack
3652 pointer won't be correctly restored if the instruction takes a
3653 page fault. We work around this problem by popping r3 along with
3654 the other registers, since that is never slower than executing
3655 another instruction.
3656
3657 We test for !arm_arch5 here, because code for any architecture
3658 less than this could potentially be run on one of the buggy
3659 chips. */
3660 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3661 {
3662 /* Validate that r3 is a call-clobbered register (always true in
3663 the default abi) ... */
3664 if (!call_used_regs[3])
3665 return 0;
3666
3667 /* ... that it isn't being used for a return value ... */
3668 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3669 return 0;
3670
3671 /* ... or for a tail-call argument ... */
3672 if (sibling)
3673 {
3674 gcc_assert (CALL_P (sibling));
3675
3676 if (find_regno_fusage (sibling, USE, 3))
3677 return 0;
3678 }
3679
3680 /* ... and that there are no call-saved registers in r0-r2
3681 (always true in the default ABI). */
3682 if (saved_int_regs & 0x7)
3683 return 0;
3684 }
3685
3686 /* Can't be done if interworking with Thumb, and any registers have been
3687 stacked. */
3688 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3689 return 0;
3690
3691 /* On StrongARM, conditional returns are expensive if they aren't
3692 taken and multiple registers have been stacked. */
3693 if (iscond && arm_tune_strongarm)
3694 {
3695 /* Conditional return when just the LR is stored is a simple
3696 conditional-load instruction, that's not expensive. */
3697 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3698 return 0;
3699
3700 if (flag_pic
3701 && arm_pic_register != INVALID_REGNUM
3702 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3703 return 0;
3704 }
3705
3706 /* If there are saved registers but the LR isn't saved, then we need
3707 two instructions for the return. */
3708 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3709 return 0;
3710
3711 /* Can't be done if any of the VFP regs are pushed,
3712 since this also requires an insn. */
3713 if (TARGET_HARD_FLOAT && TARGET_VFP)
3714 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3715 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3716 return 0;
3717
3718 if (TARGET_REALLY_IWMMXT)
3719 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3720 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3721 return 0;
3722
3723 return 1;
3724 }
3725
3726 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3727 shrink-wrapping if possible. This is the case if we need to emit a
3728 prologue, which we can test by looking at the offsets. */
3729 bool
3730 use_simple_return_p (void)
3731 {
3732 arm_stack_offsets *offsets;
3733
3734 offsets = arm_get_frame_offsets ();
3735 return offsets->outgoing_args != 0;
3736 }
3737
3738 /* Return TRUE if int I is a valid immediate ARM constant. */
3739
3740 int
3741 const_ok_for_arm (HOST_WIDE_INT i)
3742 {
3743 int lowbit;
3744
3745 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3746 be all zero, or all one. */
3747 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3748 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3749 != ((~(unsigned HOST_WIDE_INT) 0)
3750 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3751 return FALSE;
3752
3753 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3754
3755 /* Fast return for 0 and small values. We must do this for zero, since
3756 the code below can't handle that one case. */
3757 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3758 return TRUE;
3759
3760 /* Get the number of trailing zeros. */
3761 lowbit = ffs((int) i) - 1;
3762
3763 /* Only even shifts are allowed in ARM mode so round down to the
3764 nearest even number. */
3765 if (TARGET_ARM)
3766 lowbit &= ~1;
3767
3768 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3769 return TRUE;
3770
3771 if (TARGET_ARM)
3772 {
3773 /* Allow rotated constants in ARM mode. */
3774 if (lowbit <= 4
3775 && ((i & ~0xc000003f) == 0
3776 || (i & ~0xf000000f) == 0
3777 || (i & ~0xfc000003) == 0))
3778 return TRUE;
3779 }
3780 else
3781 {
3782 HOST_WIDE_INT v;
3783
3784 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3785 v = i & 0xff;
3786 v |= v << 16;
3787 if (i == v || i == (v | (v << 8)))
3788 return TRUE;
3789
3790 /* Allow repeated pattern 0xXY00XY00. */
3791 v = i & 0xff00;
3792 v |= v << 16;
3793 if (i == v)
3794 return TRUE;
3795 }
3796
3797 return FALSE;
3798 }
3799
3800 /* Return true if I is a valid constant for the operation CODE. */
3801 int
3802 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3803 {
3804 if (const_ok_for_arm (i))
3805 return 1;
3806
3807 switch (code)
3808 {
3809 case SET:
3810 /* See if we can use movw. */
3811 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3812 return 1;
3813 else
3814 /* Otherwise, try mvn. */
3815 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3816
3817 case PLUS:
3818 /* See if we can use addw or subw. */
3819 if (TARGET_THUMB2
3820 && ((i & 0xfffff000) == 0
3821 || ((-i) & 0xfffff000) == 0))
3822 return 1;
3823 /* else fall through. */
3824
3825 case COMPARE:
3826 case EQ:
3827 case NE:
3828 case GT:
3829 case LE:
3830 case LT:
3831 case GE:
3832 case GEU:
3833 case LTU:
3834 case GTU:
3835 case LEU:
3836 case UNORDERED:
3837 case ORDERED:
3838 case UNEQ:
3839 case UNGE:
3840 case UNLT:
3841 case UNGT:
3842 case UNLE:
3843 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3844
3845 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3846 case XOR:
3847 return 0;
3848
3849 case IOR:
3850 if (TARGET_THUMB2)
3851 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3852 return 0;
3853
3854 case AND:
3855 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3856
3857 default:
3858 gcc_unreachable ();
3859 }
3860 }
3861
3862 /* Return true if I is a valid di mode constant for the operation CODE. */
3863 int
3864 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3865 {
3866 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3867 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3868 rtx hi = GEN_INT (hi_val);
3869 rtx lo = GEN_INT (lo_val);
3870
3871 if (TARGET_THUMB1)
3872 return 0;
3873
3874 switch (code)
3875 {
3876 case AND:
3877 case IOR:
3878 case XOR:
3879 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3880 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3881 case PLUS:
3882 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3883
3884 default:
3885 return 0;
3886 }
3887 }
3888
3889 /* Emit a sequence of insns to handle a large constant.
3890 CODE is the code of the operation required, it can be any of SET, PLUS,
3891 IOR, AND, XOR, MINUS;
3892 MODE is the mode in which the operation is being performed;
3893 VAL is the integer to operate on;
3894 SOURCE is the other operand (a register, or a null-pointer for SET);
3895 SUBTARGETS means it is safe to create scratch registers if that will
3896 either produce a simpler sequence, or we will want to cse the values.
3897 Return value is the number of insns emitted. */
3898
3899 /* ??? Tweak this for thumb2. */
3900 int
3901 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3902 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3903 {
3904 rtx cond;
3905
3906 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3907 cond = COND_EXEC_TEST (PATTERN (insn));
3908 else
3909 cond = NULL_RTX;
3910
3911 if (subtargets || code == SET
3912 || (REG_P (target) && REG_P (source)
3913 && REGNO (target) != REGNO (source)))
3914 {
3915 /* After arm_reorg has been called, we can't fix up expensive
3916 constants by pushing them into memory so we must synthesize
3917 them in-line, regardless of the cost. This is only likely to
3918 be more costly on chips that have load delay slots and we are
3919 compiling without running the scheduler (so no splitting
3920 occurred before the final instruction emission).
3921
3922 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3923 */
3924 if (!cfun->machine->after_arm_reorg
3925 && !cond
3926 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3927 1, 0)
3928 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3929 + (code != SET))))
3930 {
3931 if (code == SET)
3932 {
3933 /* Currently SET is the only monadic value for CODE, all
3934 the rest are diadic. */
3935 if (TARGET_USE_MOVT)
3936 arm_emit_movpair (target, GEN_INT (val));
3937 else
3938 emit_set_insn (target, GEN_INT (val));
3939
3940 return 1;
3941 }
3942 else
3943 {
3944 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3945
3946 if (TARGET_USE_MOVT)
3947 arm_emit_movpair (temp, GEN_INT (val));
3948 else
3949 emit_set_insn (temp, GEN_INT (val));
3950
3951 /* For MINUS, the value is subtracted from, since we never
3952 have subtraction of a constant. */
3953 if (code == MINUS)
3954 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3955 else
3956 emit_set_insn (target,
3957 gen_rtx_fmt_ee (code, mode, source, temp));
3958 return 2;
3959 }
3960 }
3961 }
3962
3963 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3964 1);
3965 }
3966
3967 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3968 ARM/THUMB2 immediates, and add up to VAL.
3969 Thr function return value gives the number of insns required. */
3970 static int
3971 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3972 struct four_ints *return_sequence)
3973 {
3974 int best_consecutive_zeros = 0;
3975 int i;
3976 int best_start = 0;
3977 int insns1, insns2;
3978 struct four_ints tmp_sequence;
3979
3980 /* If we aren't targeting ARM, the best place to start is always at
3981 the bottom, otherwise look more closely. */
3982 if (TARGET_ARM)
3983 {
3984 for (i = 0; i < 32; i += 2)
3985 {
3986 int consecutive_zeros = 0;
3987
3988 if (!(val & (3 << i)))
3989 {
3990 while ((i < 32) && !(val & (3 << i)))
3991 {
3992 consecutive_zeros += 2;
3993 i += 2;
3994 }
3995 if (consecutive_zeros > best_consecutive_zeros)
3996 {
3997 best_consecutive_zeros = consecutive_zeros;
3998 best_start = i - consecutive_zeros;
3999 }
4000 i -= 2;
4001 }
4002 }
4003 }
4004
4005 /* So long as it won't require any more insns to do so, it's
4006 desirable to emit a small constant (in bits 0...9) in the last
4007 insn. This way there is more chance that it can be combined with
4008 a later addressing insn to form a pre-indexed load or store
4009 operation. Consider:
4010
4011 *((volatile int *)0xe0000100) = 1;
4012 *((volatile int *)0xe0000110) = 2;
4013
4014 We want this to wind up as:
4015
4016 mov rA, #0xe0000000
4017 mov rB, #1
4018 str rB, [rA, #0x100]
4019 mov rB, #2
4020 str rB, [rA, #0x110]
4021
4022 rather than having to synthesize both large constants from scratch.
4023
4024 Therefore, we calculate how many insns would be required to emit
4025 the constant starting from `best_start', and also starting from
4026 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4027 yield a shorter sequence, we may as well use zero. */
4028 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4029 if (best_start != 0
4030 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
4031 {
4032 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4033 if (insns2 <= insns1)
4034 {
4035 *return_sequence = tmp_sequence;
4036 insns1 = insns2;
4037 }
4038 }
4039
4040 return insns1;
4041 }
4042
4043 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4044 static int
4045 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4046 struct four_ints *return_sequence, int i)
4047 {
4048 int remainder = val & 0xffffffff;
4049 int insns = 0;
4050
4051 /* Try and find a way of doing the job in either two or three
4052 instructions.
4053
4054 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4055 location. We start at position I. This may be the MSB, or
4056 optimial_immediate_sequence may have positioned it at the largest block
4057 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4058 wrapping around to the top of the word when we drop off the bottom.
4059 In the worst case this code should produce no more than four insns.
4060
4061 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4062 constants, shifted to any arbitrary location. We should always start
4063 at the MSB. */
4064 do
4065 {
4066 int end;
4067 unsigned int b1, b2, b3, b4;
4068 unsigned HOST_WIDE_INT result;
4069 int loc;
4070
4071 gcc_assert (insns < 4);
4072
4073 if (i <= 0)
4074 i += 32;
4075
4076 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4077 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4078 {
4079 loc = i;
4080 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4081 /* We can use addw/subw for the last 12 bits. */
4082 result = remainder;
4083 else
4084 {
4085 /* Use an 8-bit shifted/rotated immediate. */
4086 end = i - 8;
4087 if (end < 0)
4088 end += 32;
4089 result = remainder & ((0x0ff << end)
4090 | ((i < end) ? (0xff >> (32 - end))
4091 : 0));
4092 i -= 8;
4093 }
4094 }
4095 else
4096 {
4097 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4098 arbitrary shifts. */
4099 i -= TARGET_ARM ? 2 : 1;
4100 continue;
4101 }
4102
4103 /* Next, see if we can do a better job with a thumb2 replicated
4104 constant.
4105
4106 We do it this way around to catch the cases like 0x01F001E0 where
4107 two 8-bit immediates would work, but a replicated constant would
4108 make it worse.
4109
4110 TODO: 16-bit constants that don't clear all the bits, but still win.
4111 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4112 if (TARGET_THUMB2)
4113 {
4114 b1 = (remainder & 0xff000000) >> 24;
4115 b2 = (remainder & 0x00ff0000) >> 16;
4116 b3 = (remainder & 0x0000ff00) >> 8;
4117 b4 = remainder & 0xff;
4118
4119 if (loc > 24)
4120 {
4121 /* The 8-bit immediate already found clears b1 (and maybe b2),
4122 but must leave b3 and b4 alone. */
4123
4124 /* First try to find a 32-bit replicated constant that clears
4125 almost everything. We can assume that we can't do it in one,
4126 or else we wouldn't be here. */
4127 unsigned int tmp = b1 & b2 & b3 & b4;
4128 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4129 + (tmp << 24);
4130 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4131 + (tmp == b3) + (tmp == b4);
4132 if (tmp
4133 && (matching_bytes >= 3
4134 || (matching_bytes == 2
4135 && const_ok_for_op (remainder & ~tmp2, code))))
4136 {
4137 /* At least 3 of the bytes match, and the fourth has at
4138 least as many bits set, or two of the bytes match
4139 and it will only require one more insn to finish. */
4140 result = tmp2;
4141 i = tmp != b1 ? 32
4142 : tmp != b2 ? 24
4143 : tmp != b3 ? 16
4144 : 8;
4145 }
4146
4147 /* Second, try to find a 16-bit replicated constant that can
4148 leave three of the bytes clear. If b2 or b4 is already
4149 zero, then we can. If the 8-bit from above would not
4150 clear b2 anyway, then we still win. */
4151 else if (b1 == b3 && (!b2 || !b4
4152 || (remainder & 0x00ff0000 & ~result)))
4153 {
4154 result = remainder & 0xff00ff00;
4155 i = 24;
4156 }
4157 }
4158 else if (loc > 16)
4159 {
4160 /* The 8-bit immediate already found clears b2 (and maybe b3)
4161 and we don't get here unless b1 is alredy clear, but it will
4162 leave b4 unchanged. */
4163
4164 /* If we can clear b2 and b4 at once, then we win, since the
4165 8-bits couldn't possibly reach that far. */
4166 if (b2 == b4)
4167 {
4168 result = remainder & 0x00ff00ff;
4169 i = 16;
4170 }
4171 }
4172 }
4173
4174 return_sequence->i[insns++] = result;
4175 remainder &= ~result;
4176
4177 if (code == SET || code == MINUS)
4178 code = PLUS;
4179 }
4180 while (remainder);
4181
4182 return insns;
4183 }
4184
4185 /* Emit an instruction with the indicated PATTERN. If COND is
4186 non-NULL, conditionalize the execution of the instruction on COND
4187 being true. */
4188
4189 static void
4190 emit_constant_insn (rtx cond, rtx pattern)
4191 {
4192 if (cond)
4193 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4194 emit_insn (pattern);
4195 }
4196
4197 /* As above, but extra parameter GENERATE which, if clear, suppresses
4198 RTL generation. */
4199
4200 static int
4201 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4202 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
4203 int generate)
4204 {
4205 int can_invert = 0;
4206 int can_negate = 0;
4207 int final_invert = 0;
4208 int i;
4209 int set_sign_bit_copies = 0;
4210 int clear_sign_bit_copies = 0;
4211 int clear_zero_bit_copies = 0;
4212 int set_zero_bit_copies = 0;
4213 int insns = 0, neg_insns, inv_insns;
4214 unsigned HOST_WIDE_INT temp1, temp2;
4215 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4216 struct four_ints *immediates;
4217 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4218
4219 /* Find out which operations are safe for a given CODE. Also do a quick
4220 check for degenerate cases; these can occur when DImode operations
4221 are split. */
4222 switch (code)
4223 {
4224 case SET:
4225 can_invert = 1;
4226 break;
4227
4228 case PLUS:
4229 can_negate = 1;
4230 break;
4231
4232 case IOR:
4233 if (remainder == 0xffffffff)
4234 {
4235 if (generate)
4236 emit_constant_insn (cond,
4237 gen_rtx_SET (target,
4238 GEN_INT (ARM_SIGN_EXTEND (val))));
4239 return 1;
4240 }
4241
4242 if (remainder == 0)
4243 {
4244 if (reload_completed && rtx_equal_p (target, source))
4245 return 0;
4246
4247 if (generate)
4248 emit_constant_insn (cond, gen_rtx_SET (target, source));
4249 return 1;
4250 }
4251 break;
4252
4253 case AND:
4254 if (remainder == 0)
4255 {
4256 if (generate)
4257 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4258 return 1;
4259 }
4260 if (remainder == 0xffffffff)
4261 {
4262 if (reload_completed && rtx_equal_p (target, source))
4263 return 0;
4264 if (generate)
4265 emit_constant_insn (cond, gen_rtx_SET (target, source));
4266 return 1;
4267 }
4268 can_invert = 1;
4269 break;
4270
4271 case XOR:
4272 if (remainder == 0)
4273 {
4274 if (reload_completed && rtx_equal_p (target, source))
4275 return 0;
4276 if (generate)
4277 emit_constant_insn (cond, gen_rtx_SET (target, source));
4278 return 1;
4279 }
4280
4281 if (remainder == 0xffffffff)
4282 {
4283 if (generate)
4284 emit_constant_insn (cond,
4285 gen_rtx_SET (target,
4286 gen_rtx_NOT (mode, source)));
4287 return 1;
4288 }
4289 final_invert = 1;
4290 break;
4291
4292 case MINUS:
4293 /* We treat MINUS as (val - source), since (source - val) is always
4294 passed as (source + (-val)). */
4295 if (remainder == 0)
4296 {
4297 if (generate)
4298 emit_constant_insn (cond,
4299 gen_rtx_SET (target,
4300 gen_rtx_NEG (mode, source)));
4301 return 1;
4302 }
4303 if (const_ok_for_arm (val))
4304 {
4305 if (generate)
4306 emit_constant_insn (cond,
4307 gen_rtx_SET (target,
4308 gen_rtx_MINUS (mode, GEN_INT (val),
4309 source)));
4310 return 1;
4311 }
4312
4313 break;
4314
4315 default:
4316 gcc_unreachable ();
4317 }
4318
4319 /* If we can do it in one insn get out quickly. */
4320 if (const_ok_for_op (val, code))
4321 {
4322 if (generate)
4323 emit_constant_insn (cond,
4324 gen_rtx_SET (target,
4325 (source
4326 ? gen_rtx_fmt_ee (code, mode, source,
4327 GEN_INT (val))
4328 : GEN_INT (val))));
4329 return 1;
4330 }
4331
4332 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4333 insn. */
4334 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4335 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4336 {
4337 if (generate)
4338 {
4339 if (mode == SImode && i == 16)
4340 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4341 smaller insn. */
4342 emit_constant_insn (cond,
4343 gen_zero_extendhisi2
4344 (target, gen_lowpart (HImode, source)));
4345 else
4346 /* Extz only supports SImode, but we can coerce the operands
4347 into that mode. */
4348 emit_constant_insn (cond,
4349 gen_extzv_t2 (gen_lowpart (SImode, target),
4350 gen_lowpart (SImode, source),
4351 GEN_INT (i), const0_rtx));
4352 }
4353
4354 return 1;
4355 }
4356
4357 /* Calculate a few attributes that may be useful for specific
4358 optimizations. */
4359 /* Count number of leading zeros. */
4360 for (i = 31; i >= 0; i--)
4361 {
4362 if ((remainder & (1 << i)) == 0)
4363 clear_sign_bit_copies++;
4364 else
4365 break;
4366 }
4367
4368 /* Count number of leading 1's. */
4369 for (i = 31; i >= 0; i--)
4370 {
4371 if ((remainder & (1 << i)) != 0)
4372 set_sign_bit_copies++;
4373 else
4374 break;
4375 }
4376
4377 /* Count number of trailing zero's. */
4378 for (i = 0; i <= 31; i++)
4379 {
4380 if ((remainder & (1 << i)) == 0)
4381 clear_zero_bit_copies++;
4382 else
4383 break;
4384 }
4385
4386 /* Count number of trailing 1's. */
4387 for (i = 0; i <= 31; i++)
4388 {
4389 if ((remainder & (1 << i)) != 0)
4390 set_zero_bit_copies++;
4391 else
4392 break;
4393 }
4394
4395 switch (code)
4396 {
4397 case SET:
4398 /* See if we can do this by sign_extending a constant that is known
4399 to be negative. This is a good, way of doing it, since the shift
4400 may well merge into a subsequent insn. */
4401 if (set_sign_bit_copies > 1)
4402 {
4403 if (const_ok_for_arm
4404 (temp1 = ARM_SIGN_EXTEND (remainder
4405 << (set_sign_bit_copies - 1))))
4406 {
4407 if (generate)
4408 {
4409 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4410 emit_constant_insn (cond,
4411 gen_rtx_SET (new_src, GEN_INT (temp1)));
4412 emit_constant_insn (cond,
4413 gen_ashrsi3 (target, new_src,
4414 GEN_INT (set_sign_bit_copies - 1)));
4415 }
4416 return 2;
4417 }
4418 /* For an inverted constant, we will need to set the low bits,
4419 these will be shifted out of harm's way. */
4420 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4421 if (const_ok_for_arm (~temp1))
4422 {
4423 if (generate)
4424 {
4425 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4426 emit_constant_insn (cond,
4427 gen_rtx_SET (new_src, GEN_INT (temp1)));
4428 emit_constant_insn (cond,
4429 gen_ashrsi3 (target, new_src,
4430 GEN_INT (set_sign_bit_copies - 1)));
4431 }
4432 return 2;
4433 }
4434 }
4435
4436 /* See if we can calculate the value as the difference between two
4437 valid immediates. */
4438 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4439 {
4440 int topshift = clear_sign_bit_copies & ~1;
4441
4442 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4443 & (0xff000000 >> topshift));
4444
4445 /* If temp1 is zero, then that means the 9 most significant
4446 bits of remainder were 1 and we've caused it to overflow.
4447 When topshift is 0 we don't need to do anything since we
4448 can borrow from 'bit 32'. */
4449 if (temp1 == 0 && topshift != 0)
4450 temp1 = 0x80000000 >> (topshift - 1);
4451
4452 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4453
4454 if (const_ok_for_arm (temp2))
4455 {
4456 if (generate)
4457 {
4458 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4459 emit_constant_insn (cond,
4460 gen_rtx_SET (new_src, GEN_INT (temp1)));
4461 emit_constant_insn (cond,
4462 gen_addsi3 (target, new_src,
4463 GEN_INT (-temp2)));
4464 }
4465
4466 return 2;
4467 }
4468 }
4469
4470 /* See if we can generate this by setting the bottom (or the top)
4471 16 bits, and then shifting these into the other half of the
4472 word. We only look for the simplest cases, to do more would cost
4473 too much. Be careful, however, not to generate this when the
4474 alternative would take fewer insns. */
4475 if (val & 0xffff0000)
4476 {
4477 temp1 = remainder & 0xffff0000;
4478 temp2 = remainder & 0x0000ffff;
4479
4480 /* Overlaps outside this range are best done using other methods. */
4481 for (i = 9; i < 24; i++)
4482 {
4483 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4484 && !const_ok_for_arm (temp2))
4485 {
4486 rtx new_src = (subtargets
4487 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4488 : target);
4489 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4490 source, subtargets, generate);
4491 source = new_src;
4492 if (generate)
4493 emit_constant_insn
4494 (cond,
4495 gen_rtx_SET
4496 (target,
4497 gen_rtx_IOR (mode,
4498 gen_rtx_ASHIFT (mode, source,
4499 GEN_INT (i)),
4500 source)));
4501 return insns + 1;
4502 }
4503 }
4504
4505 /* Don't duplicate cases already considered. */
4506 for (i = 17; i < 24; i++)
4507 {
4508 if (((temp1 | (temp1 >> i)) == remainder)
4509 && !const_ok_for_arm (temp1))
4510 {
4511 rtx new_src = (subtargets
4512 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4513 : target);
4514 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4515 source, subtargets, generate);
4516 source = new_src;
4517 if (generate)
4518 emit_constant_insn
4519 (cond,
4520 gen_rtx_SET (target,
4521 gen_rtx_IOR
4522 (mode,
4523 gen_rtx_LSHIFTRT (mode, source,
4524 GEN_INT (i)),
4525 source)));
4526 return insns + 1;
4527 }
4528 }
4529 }
4530 break;
4531
4532 case IOR:
4533 case XOR:
4534 /* If we have IOR or XOR, and the constant can be loaded in a
4535 single instruction, and we can find a temporary to put it in,
4536 then this can be done in two instructions instead of 3-4. */
4537 if (subtargets
4538 /* TARGET can't be NULL if SUBTARGETS is 0 */
4539 || (reload_completed && !reg_mentioned_p (target, source)))
4540 {
4541 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4542 {
4543 if (generate)
4544 {
4545 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4546
4547 emit_constant_insn (cond,
4548 gen_rtx_SET (sub, GEN_INT (val)));
4549 emit_constant_insn (cond,
4550 gen_rtx_SET (target,
4551 gen_rtx_fmt_ee (code, mode,
4552 source, sub)));
4553 }
4554 return 2;
4555 }
4556 }
4557
4558 if (code == XOR)
4559 break;
4560
4561 /* Convert.
4562 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4563 and the remainder 0s for e.g. 0xfff00000)
4564 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4565
4566 This can be done in 2 instructions by using shifts with mov or mvn.
4567 e.g. for
4568 x = x | 0xfff00000;
4569 we generate.
4570 mvn r0, r0, asl #12
4571 mvn r0, r0, lsr #12 */
4572 if (set_sign_bit_copies > 8
4573 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4574 {
4575 if (generate)
4576 {
4577 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4578 rtx shift = GEN_INT (set_sign_bit_copies);
4579
4580 emit_constant_insn
4581 (cond,
4582 gen_rtx_SET (sub,
4583 gen_rtx_NOT (mode,
4584 gen_rtx_ASHIFT (mode,
4585 source,
4586 shift))));
4587 emit_constant_insn
4588 (cond,
4589 gen_rtx_SET (target,
4590 gen_rtx_NOT (mode,
4591 gen_rtx_LSHIFTRT (mode, sub,
4592 shift))));
4593 }
4594 return 2;
4595 }
4596
4597 /* Convert
4598 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4599 to
4600 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4601
4602 For eg. r0 = r0 | 0xfff
4603 mvn r0, r0, lsr #12
4604 mvn r0, r0, asl #12
4605
4606 */
4607 if (set_zero_bit_copies > 8
4608 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4609 {
4610 if (generate)
4611 {
4612 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4613 rtx shift = GEN_INT (set_zero_bit_copies);
4614
4615 emit_constant_insn
4616 (cond,
4617 gen_rtx_SET (sub,
4618 gen_rtx_NOT (mode,
4619 gen_rtx_LSHIFTRT (mode,
4620 source,
4621 shift))));
4622 emit_constant_insn
4623 (cond,
4624 gen_rtx_SET (target,
4625 gen_rtx_NOT (mode,
4626 gen_rtx_ASHIFT (mode, sub,
4627 shift))));
4628 }
4629 return 2;
4630 }
4631
4632 /* This will never be reached for Thumb2 because orn is a valid
4633 instruction. This is for Thumb1 and the ARM 32 bit cases.
4634
4635 x = y | constant (such that ~constant is a valid constant)
4636 Transform this to
4637 x = ~(~y & ~constant).
4638 */
4639 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4640 {
4641 if (generate)
4642 {
4643 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4644 emit_constant_insn (cond,
4645 gen_rtx_SET (sub,
4646 gen_rtx_NOT (mode, source)));
4647 source = sub;
4648 if (subtargets)
4649 sub = gen_reg_rtx (mode);
4650 emit_constant_insn (cond,
4651 gen_rtx_SET (sub,
4652 gen_rtx_AND (mode, source,
4653 GEN_INT (temp1))));
4654 emit_constant_insn (cond,
4655 gen_rtx_SET (target,
4656 gen_rtx_NOT (mode, sub)));
4657 }
4658 return 3;
4659 }
4660 break;
4661
4662 case AND:
4663 /* See if two shifts will do 2 or more insn's worth of work. */
4664 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4665 {
4666 HOST_WIDE_INT shift_mask = ((0xffffffff
4667 << (32 - clear_sign_bit_copies))
4668 & 0xffffffff);
4669
4670 if ((remainder | shift_mask) != 0xffffffff)
4671 {
4672 HOST_WIDE_INT new_val
4673 = ARM_SIGN_EXTEND (remainder | shift_mask);
4674
4675 if (generate)
4676 {
4677 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4678 insns = arm_gen_constant (AND, SImode, cond, new_val,
4679 new_src, source, subtargets, 1);
4680 source = new_src;
4681 }
4682 else
4683 {
4684 rtx targ = subtargets ? NULL_RTX : target;
4685 insns = arm_gen_constant (AND, mode, cond, new_val,
4686 targ, source, subtargets, 0);
4687 }
4688 }
4689
4690 if (generate)
4691 {
4692 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4693 rtx shift = GEN_INT (clear_sign_bit_copies);
4694
4695 emit_insn (gen_ashlsi3 (new_src, source, shift));
4696 emit_insn (gen_lshrsi3 (target, new_src, shift));
4697 }
4698
4699 return insns + 2;
4700 }
4701
4702 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4703 {
4704 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4705
4706 if ((remainder | shift_mask) != 0xffffffff)
4707 {
4708 HOST_WIDE_INT new_val
4709 = ARM_SIGN_EXTEND (remainder | shift_mask);
4710 if (generate)
4711 {
4712 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4713
4714 insns = arm_gen_constant (AND, mode, cond, new_val,
4715 new_src, source, subtargets, 1);
4716 source = new_src;
4717 }
4718 else
4719 {
4720 rtx targ = subtargets ? NULL_RTX : target;
4721
4722 insns = arm_gen_constant (AND, mode, cond, new_val,
4723 targ, source, subtargets, 0);
4724 }
4725 }
4726
4727 if (generate)
4728 {
4729 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4730 rtx shift = GEN_INT (clear_zero_bit_copies);
4731
4732 emit_insn (gen_lshrsi3 (new_src, source, shift));
4733 emit_insn (gen_ashlsi3 (target, new_src, shift));
4734 }
4735
4736 return insns + 2;
4737 }
4738
4739 break;
4740
4741 default:
4742 break;
4743 }
4744
4745 /* Calculate what the instruction sequences would be if we generated it
4746 normally, negated, or inverted. */
4747 if (code == AND)
4748 /* AND cannot be split into multiple insns, so invert and use BIC. */
4749 insns = 99;
4750 else
4751 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4752
4753 if (can_negate)
4754 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4755 &neg_immediates);
4756 else
4757 neg_insns = 99;
4758
4759 if (can_invert || final_invert)
4760 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4761 &inv_immediates);
4762 else
4763 inv_insns = 99;
4764
4765 immediates = &pos_immediates;
4766
4767 /* Is the negated immediate sequence more efficient? */
4768 if (neg_insns < insns && neg_insns <= inv_insns)
4769 {
4770 insns = neg_insns;
4771 immediates = &neg_immediates;
4772 }
4773 else
4774 can_negate = 0;
4775
4776 /* Is the inverted immediate sequence more efficient?
4777 We must allow for an extra NOT instruction for XOR operations, although
4778 there is some chance that the final 'mvn' will get optimized later. */
4779 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4780 {
4781 insns = inv_insns;
4782 immediates = &inv_immediates;
4783 }
4784 else
4785 {
4786 can_invert = 0;
4787 final_invert = 0;
4788 }
4789
4790 /* Now output the chosen sequence as instructions. */
4791 if (generate)
4792 {
4793 for (i = 0; i < insns; i++)
4794 {
4795 rtx new_src, temp1_rtx;
4796
4797 temp1 = immediates->i[i];
4798
4799 if (code == SET || code == MINUS)
4800 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4801 else if ((final_invert || i < (insns - 1)) && subtargets)
4802 new_src = gen_reg_rtx (mode);
4803 else
4804 new_src = target;
4805
4806 if (can_invert)
4807 temp1 = ~temp1;
4808 else if (can_negate)
4809 temp1 = -temp1;
4810
4811 temp1 = trunc_int_for_mode (temp1, mode);
4812 temp1_rtx = GEN_INT (temp1);
4813
4814 if (code == SET)
4815 ;
4816 else if (code == MINUS)
4817 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4818 else
4819 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4820
4821 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
4822 source = new_src;
4823
4824 if (code == SET)
4825 {
4826 can_negate = can_invert;
4827 can_invert = 0;
4828 code = PLUS;
4829 }
4830 else if (code == MINUS)
4831 code = PLUS;
4832 }
4833 }
4834
4835 if (final_invert)
4836 {
4837 if (generate)
4838 emit_constant_insn (cond, gen_rtx_SET (target,
4839 gen_rtx_NOT (mode, source)));
4840 insns++;
4841 }
4842
4843 return insns;
4844 }
4845
4846 /* Canonicalize a comparison so that we are more likely to recognize it.
4847 This can be done for a few constant compares, where we can make the
4848 immediate value easier to load. */
4849
4850 static void
4851 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4852 bool op0_preserve_value)
4853 {
4854 machine_mode mode;
4855 unsigned HOST_WIDE_INT i, maxval;
4856
4857 mode = GET_MODE (*op0);
4858 if (mode == VOIDmode)
4859 mode = GET_MODE (*op1);
4860
4861 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4862
4863 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4864 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4865 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4866 for GTU/LEU in Thumb mode. */
4867 if (mode == DImode)
4868 {
4869
4870 if (*code == GT || *code == LE
4871 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4872 {
4873 /* Missing comparison. First try to use an available
4874 comparison. */
4875 if (CONST_INT_P (*op1))
4876 {
4877 i = INTVAL (*op1);
4878 switch (*code)
4879 {
4880 case GT:
4881 case LE:
4882 if (i != maxval
4883 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4884 {
4885 *op1 = GEN_INT (i + 1);
4886 *code = *code == GT ? GE : LT;
4887 return;
4888 }
4889 break;
4890 case GTU:
4891 case LEU:
4892 if (i != ~((unsigned HOST_WIDE_INT) 0)
4893 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4894 {
4895 *op1 = GEN_INT (i + 1);
4896 *code = *code == GTU ? GEU : LTU;
4897 return;
4898 }
4899 break;
4900 default:
4901 gcc_unreachable ();
4902 }
4903 }
4904
4905 /* If that did not work, reverse the condition. */
4906 if (!op0_preserve_value)
4907 {
4908 std::swap (*op0, *op1);
4909 *code = (int)swap_condition ((enum rtx_code)*code);
4910 }
4911 }
4912 return;
4913 }
4914
4915 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4916 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4917 to facilitate possible combining with a cmp into 'ands'. */
4918 if (mode == SImode
4919 && GET_CODE (*op0) == ZERO_EXTEND
4920 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4921 && GET_MODE (XEXP (*op0, 0)) == QImode
4922 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4923 && subreg_lowpart_p (XEXP (*op0, 0))
4924 && *op1 == const0_rtx)
4925 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4926 GEN_INT (255));
4927
4928 /* Comparisons smaller than DImode. Only adjust comparisons against
4929 an out-of-range constant. */
4930 if (!CONST_INT_P (*op1)
4931 || const_ok_for_arm (INTVAL (*op1))
4932 || const_ok_for_arm (- INTVAL (*op1)))
4933 return;
4934
4935 i = INTVAL (*op1);
4936
4937 switch (*code)
4938 {
4939 case EQ:
4940 case NE:
4941 return;
4942
4943 case GT:
4944 case LE:
4945 if (i != maxval
4946 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4947 {
4948 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4949 *code = *code == GT ? GE : LT;
4950 return;
4951 }
4952 break;
4953
4954 case GE:
4955 case LT:
4956 if (i != ~maxval
4957 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4958 {
4959 *op1 = GEN_INT (i - 1);
4960 *code = *code == GE ? GT : LE;
4961 return;
4962 }
4963 break;
4964
4965 case GTU:
4966 case LEU:
4967 if (i != ~((unsigned HOST_WIDE_INT) 0)
4968 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4969 {
4970 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4971 *code = *code == GTU ? GEU : LTU;
4972 return;
4973 }
4974 break;
4975
4976 case GEU:
4977 case LTU:
4978 if (i != 0
4979 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4980 {
4981 *op1 = GEN_INT (i - 1);
4982 *code = *code == GEU ? GTU : LEU;
4983 return;
4984 }
4985 break;
4986
4987 default:
4988 gcc_unreachable ();
4989 }
4990 }
4991
4992
4993 /* Define how to find the value returned by a function. */
4994
4995 static rtx
4996 arm_function_value(const_tree type, const_tree func,
4997 bool outgoing ATTRIBUTE_UNUSED)
4998 {
4999 machine_mode mode;
5000 int unsignedp ATTRIBUTE_UNUSED;
5001 rtx r ATTRIBUTE_UNUSED;
5002
5003 mode = TYPE_MODE (type);
5004
5005 if (TARGET_AAPCS_BASED)
5006 return aapcs_allocate_return_reg (mode, type, func);
5007
5008 /* Promote integer types. */
5009 if (INTEGRAL_TYPE_P (type))
5010 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5011
5012 /* Promotes small structs returned in a register to full-word size
5013 for big-endian AAPCS. */
5014 if (arm_return_in_msb (type))
5015 {
5016 HOST_WIDE_INT size = int_size_in_bytes (type);
5017 if (size % UNITS_PER_WORD != 0)
5018 {
5019 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5020 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5021 }
5022 }
5023
5024 return arm_libcall_value_1 (mode);
5025 }
5026
5027 /* libcall hashtable helpers. */
5028
5029 struct libcall_hasher : typed_noop_remove <rtx_def>
5030 {
5031 typedef const rtx_def *value_type;
5032 typedef const rtx_def *compare_type;
5033 static inline hashval_t hash (const rtx_def *);
5034 static inline bool equal (const rtx_def *, const rtx_def *);
5035 static inline void remove (rtx_def *);
5036 };
5037
5038 inline bool
5039 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5040 {
5041 return rtx_equal_p (p1, p2);
5042 }
5043
5044 inline hashval_t
5045 libcall_hasher::hash (const rtx_def *p1)
5046 {
5047 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5048 }
5049
5050 typedef hash_table<libcall_hasher> libcall_table_type;
5051
5052 static void
5053 add_libcall (libcall_table_type *htab, rtx libcall)
5054 {
5055 *htab->find_slot (libcall, INSERT) = libcall;
5056 }
5057
5058 static bool
5059 arm_libcall_uses_aapcs_base (const_rtx libcall)
5060 {
5061 static bool init_done = false;
5062 static libcall_table_type *libcall_htab = NULL;
5063
5064 if (!init_done)
5065 {
5066 init_done = true;
5067
5068 libcall_htab = new libcall_table_type (31);
5069 add_libcall (libcall_htab,
5070 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5071 add_libcall (libcall_htab,
5072 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5073 add_libcall (libcall_htab,
5074 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5075 add_libcall (libcall_htab,
5076 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5077
5078 add_libcall (libcall_htab,
5079 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5080 add_libcall (libcall_htab,
5081 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5082 add_libcall (libcall_htab,
5083 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5084 add_libcall (libcall_htab,
5085 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5086
5087 add_libcall (libcall_htab,
5088 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5089 add_libcall (libcall_htab,
5090 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5091 add_libcall (libcall_htab,
5092 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5093 add_libcall (libcall_htab,
5094 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5095 add_libcall (libcall_htab,
5096 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5097 add_libcall (libcall_htab,
5098 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5099 add_libcall (libcall_htab,
5100 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5101 add_libcall (libcall_htab,
5102 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5103
5104 /* Values from double-precision helper functions are returned in core
5105 registers if the selected core only supports single-precision
5106 arithmetic, even if we are using the hard-float ABI. The same is
5107 true for single-precision helpers, but we will never be using the
5108 hard-float ABI on a CPU which doesn't support single-precision
5109 operations in hardware. */
5110 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5111 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5112 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5113 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5114 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5115 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5116 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5117 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5118 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5119 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5120 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5121 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5122 SFmode));
5123 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5124 DFmode));
5125 }
5126
5127 return libcall && libcall_htab->find (libcall) != NULL;
5128 }
5129
5130 static rtx
5131 arm_libcall_value_1 (machine_mode mode)
5132 {
5133 if (TARGET_AAPCS_BASED)
5134 return aapcs_libcall_value (mode);
5135 else if (TARGET_IWMMXT_ABI
5136 && arm_vector_mode_supported_p (mode))
5137 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5138 else
5139 return gen_rtx_REG (mode, ARG_REGISTER (1));
5140 }
5141
5142 /* Define how to find the value returned by a library function
5143 assuming the value has mode MODE. */
5144
5145 static rtx
5146 arm_libcall_value (machine_mode mode, const_rtx libcall)
5147 {
5148 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5149 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5150 {
5151 /* The following libcalls return their result in integer registers,
5152 even though they return a floating point value. */
5153 if (arm_libcall_uses_aapcs_base (libcall))
5154 return gen_rtx_REG (mode, ARG_REGISTER(1));
5155
5156 }
5157
5158 return arm_libcall_value_1 (mode);
5159 }
5160
5161 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5162
5163 static bool
5164 arm_function_value_regno_p (const unsigned int regno)
5165 {
5166 if (regno == ARG_REGISTER (1)
5167 || (TARGET_32BIT
5168 && TARGET_AAPCS_BASED
5169 && TARGET_VFP
5170 && TARGET_HARD_FLOAT
5171 && regno == FIRST_VFP_REGNUM)
5172 || (TARGET_IWMMXT_ABI
5173 && regno == FIRST_IWMMXT_REGNUM))
5174 return true;
5175
5176 return false;
5177 }
5178
5179 /* Determine the amount of memory needed to store the possible return
5180 registers of an untyped call. */
5181 int
5182 arm_apply_result_size (void)
5183 {
5184 int size = 16;
5185
5186 if (TARGET_32BIT)
5187 {
5188 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
5189 size += 32;
5190 if (TARGET_IWMMXT_ABI)
5191 size += 8;
5192 }
5193
5194 return size;
5195 }
5196
5197 /* Decide whether TYPE should be returned in memory (true)
5198 or in a register (false). FNTYPE is the type of the function making
5199 the call. */
5200 static bool
5201 arm_return_in_memory (const_tree type, const_tree fntype)
5202 {
5203 HOST_WIDE_INT size;
5204
5205 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5206
5207 if (TARGET_AAPCS_BASED)
5208 {
5209 /* Simple, non-aggregate types (ie not including vectors and
5210 complex) are always returned in a register (or registers).
5211 We don't care about which register here, so we can short-cut
5212 some of the detail. */
5213 if (!AGGREGATE_TYPE_P (type)
5214 && TREE_CODE (type) != VECTOR_TYPE
5215 && TREE_CODE (type) != COMPLEX_TYPE)
5216 return false;
5217
5218 /* Any return value that is no larger than one word can be
5219 returned in r0. */
5220 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5221 return false;
5222
5223 /* Check any available co-processors to see if they accept the
5224 type as a register candidate (VFP, for example, can return
5225 some aggregates in consecutive registers). These aren't
5226 available if the call is variadic. */
5227 if (aapcs_select_return_coproc (type, fntype) >= 0)
5228 return false;
5229
5230 /* Vector values should be returned using ARM registers, not
5231 memory (unless they're over 16 bytes, which will break since
5232 we only have four call-clobbered registers to play with). */
5233 if (TREE_CODE (type) == VECTOR_TYPE)
5234 return (size < 0 || size > (4 * UNITS_PER_WORD));
5235
5236 /* The rest go in memory. */
5237 return true;
5238 }
5239
5240 if (TREE_CODE (type) == VECTOR_TYPE)
5241 return (size < 0 || size > (4 * UNITS_PER_WORD));
5242
5243 if (!AGGREGATE_TYPE_P (type) &&
5244 (TREE_CODE (type) != VECTOR_TYPE))
5245 /* All simple types are returned in registers. */
5246 return false;
5247
5248 if (arm_abi != ARM_ABI_APCS)
5249 {
5250 /* ATPCS and later return aggregate types in memory only if they are
5251 larger than a word (or are variable size). */
5252 return (size < 0 || size > UNITS_PER_WORD);
5253 }
5254
5255 /* For the arm-wince targets we choose to be compatible with Microsoft's
5256 ARM and Thumb compilers, which always return aggregates in memory. */
5257 #ifndef ARM_WINCE
5258 /* All structures/unions bigger than one word are returned in memory.
5259 Also catch the case where int_size_in_bytes returns -1. In this case
5260 the aggregate is either huge or of variable size, and in either case
5261 we will want to return it via memory and not in a register. */
5262 if (size < 0 || size > UNITS_PER_WORD)
5263 return true;
5264
5265 if (TREE_CODE (type) == RECORD_TYPE)
5266 {
5267 tree field;
5268
5269 /* For a struct the APCS says that we only return in a register
5270 if the type is 'integer like' and every addressable element
5271 has an offset of zero. For practical purposes this means
5272 that the structure can have at most one non bit-field element
5273 and that this element must be the first one in the structure. */
5274
5275 /* Find the first field, ignoring non FIELD_DECL things which will
5276 have been created by C++. */
5277 for (field = TYPE_FIELDS (type);
5278 field && TREE_CODE (field) != FIELD_DECL;
5279 field = DECL_CHAIN (field))
5280 continue;
5281
5282 if (field == NULL)
5283 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5284
5285 /* Check that the first field is valid for returning in a register. */
5286
5287 /* ... Floats are not allowed */
5288 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5289 return true;
5290
5291 /* ... Aggregates that are not themselves valid for returning in
5292 a register are not allowed. */
5293 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5294 return true;
5295
5296 /* Now check the remaining fields, if any. Only bitfields are allowed,
5297 since they are not addressable. */
5298 for (field = DECL_CHAIN (field);
5299 field;
5300 field = DECL_CHAIN (field))
5301 {
5302 if (TREE_CODE (field) != FIELD_DECL)
5303 continue;
5304
5305 if (!DECL_BIT_FIELD_TYPE (field))
5306 return true;
5307 }
5308
5309 return false;
5310 }
5311
5312 if (TREE_CODE (type) == UNION_TYPE)
5313 {
5314 tree field;
5315
5316 /* Unions can be returned in registers if every element is
5317 integral, or can be returned in an integer register. */
5318 for (field = TYPE_FIELDS (type);
5319 field;
5320 field = DECL_CHAIN (field))
5321 {
5322 if (TREE_CODE (field) != FIELD_DECL)
5323 continue;
5324
5325 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5326 return true;
5327
5328 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5329 return true;
5330 }
5331
5332 return false;
5333 }
5334 #endif /* not ARM_WINCE */
5335
5336 /* Return all other types in memory. */
5337 return true;
5338 }
5339
5340 const struct pcs_attribute_arg
5341 {
5342 const char *arg;
5343 enum arm_pcs value;
5344 } pcs_attribute_args[] =
5345 {
5346 {"aapcs", ARM_PCS_AAPCS},
5347 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5348 #if 0
5349 /* We could recognize these, but changes would be needed elsewhere
5350 * to implement them. */
5351 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5352 {"atpcs", ARM_PCS_ATPCS},
5353 {"apcs", ARM_PCS_APCS},
5354 #endif
5355 {NULL, ARM_PCS_UNKNOWN}
5356 };
5357
5358 static enum arm_pcs
5359 arm_pcs_from_attribute (tree attr)
5360 {
5361 const struct pcs_attribute_arg *ptr;
5362 const char *arg;
5363
5364 /* Get the value of the argument. */
5365 if (TREE_VALUE (attr) == NULL_TREE
5366 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5367 return ARM_PCS_UNKNOWN;
5368
5369 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5370
5371 /* Check it against the list of known arguments. */
5372 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5373 if (streq (arg, ptr->arg))
5374 return ptr->value;
5375
5376 /* An unrecognized interrupt type. */
5377 return ARM_PCS_UNKNOWN;
5378 }
5379
5380 /* Get the PCS variant to use for this call. TYPE is the function's type
5381 specification, DECL is the specific declartion. DECL may be null if
5382 the call could be indirect or if this is a library call. */
5383 static enum arm_pcs
5384 arm_get_pcs_model (const_tree type, const_tree decl)
5385 {
5386 bool user_convention = false;
5387 enum arm_pcs user_pcs = arm_pcs_default;
5388 tree attr;
5389
5390 gcc_assert (type);
5391
5392 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5393 if (attr)
5394 {
5395 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5396 user_convention = true;
5397 }
5398
5399 if (TARGET_AAPCS_BASED)
5400 {
5401 /* Detect varargs functions. These always use the base rules
5402 (no argument is ever a candidate for a co-processor
5403 register). */
5404 bool base_rules = stdarg_p (type);
5405
5406 if (user_convention)
5407 {
5408 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5409 sorry ("non-AAPCS derived PCS variant");
5410 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5411 error ("variadic functions must use the base AAPCS variant");
5412 }
5413
5414 if (base_rules)
5415 return ARM_PCS_AAPCS;
5416 else if (user_convention)
5417 return user_pcs;
5418 else if (decl && flag_unit_at_a_time)
5419 {
5420 /* Local functions never leak outside this compilation unit,
5421 so we are free to use whatever conventions are
5422 appropriate. */
5423 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5424 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5425 if (i && i->local)
5426 return ARM_PCS_AAPCS_LOCAL;
5427 }
5428 }
5429 else if (user_convention && user_pcs != arm_pcs_default)
5430 sorry ("PCS variant");
5431
5432 /* For everything else we use the target's default. */
5433 return arm_pcs_default;
5434 }
5435
5436
5437 static void
5438 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5439 const_tree fntype ATTRIBUTE_UNUSED,
5440 rtx libcall ATTRIBUTE_UNUSED,
5441 const_tree fndecl ATTRIBUTE_UNUSED)
5442 {
5443 /* Record the unallocated VFP registers. */
5444 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5445 pcum->aapcs_vfp_reg_alloc = 0;
5446 }
5447
5448 /* Walk down the type tree of TYPE counting consecutive base elements.
5449 If *MODEP is VOIDmode, then set it to the first valid floating point
5450 type. If a non-floating point type is found, or if a floating point
5451 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5452 otherwise return the count in the sub-tree. */
5453 static int
5454 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5455 {
5456 machine_mode mode;
5457 HOST_WIDE_INT size;
5458
5459 switch (TREE_CODE (type))
5460 {
5461 case REAL_TYPE:
5462 mode = TYPE_MODE (type);
5463 if (mode != DFmode && mode != SFmode)
5464 return -1;
5465
5466 if (*modep == VOIDmode)
5467 *modep = mode;
5468
5469 if (*modep == mode)
5470 return 1;
5471
5472 break;
5473
5474 case COMPLEX_TYPE:
5475 mode = TYPE_MODE (TREE_TYPE (type));
5476 if (mode != DFmode && mode != SFmode)
5477 return -1;
5478
5479 if (*modep == VOIDmode)
5480 *modep = mode;
5481
5482 if (*modep == mode)
5483 return 2;
5484
5485 break;
5486
5487 case VECTOR_TYPE:
5488 /* Use V2SImode and V4SImode as representatives of all 64-bit
5489 and 128-bit vector types, whether or not those modes are
5490 supported with the present options. */
5491 size = int_size_in_bytes (type);
5492 switch (size)
5493 {
5494 case 8:
5495 mode = V2SImode;
5496 break;
5497 case 16:
5498 mode = V4SImode;
5499 break;
5500 default:
5501 return -1;
5502 }
5503
5504 if (*modep == VOIDmode)
5505 *modep = mode;
5506
5507 /* Vector modes are considered to be opaque: two vectors are
5508 equivalent for the purposes of being homogeneous aggregates
5509 if they are the same size. */
5510 if (*modep == mode)
5511 return 1;
5512
5513 break;
5514
5515 case ARRAY_TYPE:
5516 {
5517 int count;
5518 tree index = TYPE_DOMAIN (type);
5519
5520 /* Can't handle incomplete types nor sizes that are not
5521 fixed. */
5522 if (!COMPLETE_TYPE_P (type)
5523 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5524 return -1;
5525
5526 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5527 if (count == -1
5528 || !index
5529 || !TYPE_MAX_VALUE (index)
5530 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5531 || !TYPE_MIN_VALUE (index)
5532 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5533 || count < 0)
5534 return -1;
5535
5536 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5537 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5538
5539 /* There must be no padding. */
5540 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5541 return -1;
5542
5543 return count;
5544 }
5545
5546 case RECORD_TYPE:
5547 {
5548 int count = 0;
5549 int sub_count;
5550 tree field;
5551
5552 /* Can't handle incomplete types nor sizes that are not
5553 fixed. */
5554 if (!COMPLETE_TYPE_P (type)
5555 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5556 return -1;
5557
5558 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5559 {
5560 if (TREE_CODE (field) != FIELD_DECL)
5561 continue;
5562
5563 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5564 if (sub_count < 0)
5565 return -1;
5566 count += sub_count;
5567 }
5568
5569 /* There must be no padding. */
5570 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5571 return -1;
5572
5573 return count;
5574 }
5575
5576 case UNION_TYPE:
5577 case QUAL_UNION_TYPE:
5578 {
5579 /* These aren't very interesting except in a degenerate case. */
5580 int count = 0;
5581 int sub_count;
5582 tree field;
5583
5584 /* Can't handle incomplete types nor sizes that are not
5585 fixed. */
5586 if (!COMPLETE_TYPE_P (type)
5587 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5588 return -1;
5589
5590 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5591 {
5592 if (TREE_CODE (field) != FIELD_DECL)
5593 continue;
5594
5595 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5596 if (sub_count < 0)
5597 return -1;
5598 count = count > sub_count ? count : sub_count;
5599 }
5600
5601 /* There must be no padding. */
5602 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5603 return -1;
5604
5605 return count;
5606 }
5607
5608 default:
5609 break;
5610 }
5611
5612 return -1;
5613 }
5614
5615 /* Return true if PCS_VARIANT should use VFP registers. */
5616 static bool
5617 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5618 {
5619 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5620 {
5621 static bool seen_thumb1_vfp = false;
5622
5623 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5624 {
5625 sorry ("Thumb-1 hard-float VFP ABI");
5626 /* sorry() is not immediately fatal, so only display this once. */
5627 seen_thumb1_vfp = true;
5628 }
5629
5630 return true;
5631 }
5632
5633 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5634 return false;
5635
5636 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5637 (TARGET_VFP_DOUBLE || !is_double));
5638 }
5639
5640 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5641 suitable for passing or returning in VFP registers for the PCS
5642 variant selected. If it is, then *BASE_MODE is updated to contain
5643 a machine mode describing each element of the argument's type and
5644 *COUNT to hold the number of such elements. */
5645 static bool
5646 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5647 machine_mode mode, const_tree type,
5648 machine_mode *base_mode, int *count)
5649 {
5650 machine_mode new_mode = VOIDmode;
5651
5652 /* If we have the type information, prefer that to working things
5653 out from the mode. */
5654 if (type)
5655 {
5656 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5657
5658 if (ag_count > 0 && ag_count <= 4)
5659 *count = ag_count;
5660 else
5661 return false;
5662 }
5663 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5664 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5665 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5666 {
5667 *count = 1;
5668 new_mode = mode;
5669 }
5670 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5671 {
5672 *count = 2;
5673 new_mode = (mode == DCmode ? DFmode : SFmode);
5674 }
5675 else
5676 return false;
5677
5678
5679 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5680 return false;
5681
5682 *base_mode = new_mode;
5683 return true;
5684 }
5685
5686 static bool
5687 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5688 machine_mode mode, const_tree type)
5689 {
5690 int count ATTRIBUTE_UNUSED;
5691 machine_mode ag_mode ATTRIBUTE_UNUSED;
5692
5693 if (!use_vfp_abi (pcs_variant, false))
5694 return false;
5695 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5696 &ag_mode, &count);
5697 }
5698
5699 static bool
5700 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5701 const_tree type)
5702 {
5703 if (!use_vfp_abi (pcum->pcs_variant, false))
5704 return false;
5705
5706 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5707 &pcum->aapcs_vfp_rmode,
5708 &pcum->aapcs_vfp_rcount);
5709 }
5710
5711 static bool
5712 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5713 const_tree type ATTRIBUTE_UNUSED)
5714 {
5715 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5716 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5717 int regno;
5718
5719 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5720 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5721 {
5722 pcum->aapcs_vfp_reg_alloc = mask << regno;
5723 if (mode == BLKmode
5724 || (mode == TImode && ! TARGET_NEON)
5725 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5726 {
5727 int i;
5728 int rcount = pcum->aapcs_vfp_rcount;
5729 int rshift = shift;
5730 machine_mode rmode = pcum->aapcs_vfp_rmode;
5731 rtx par;
5732 if (!TARGET_NEON)
5733 {
5734 /* Avoid using unsupported vector modes. */
5735 if (rmode == V2SImode)
5736 rmode = DImode;
5737 else if (rmode == V4SImode)
5738 {
5739 rmode = DImode;
5740 rcount *= 2;
5741 rshift /= 2;
5742 }
5743 }
5744 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5745 for (i = 0; i < rcount; i++)
5746 {
5747 rtx tmp = gen_rtx_REG (rmode,
5748 FIRST_VFP_REGNUM + regno + i * rshift);
5749 tmp = gen_rtx_EXPR_LIST
5750 (VOIDmode, tmp,
5751 GEN_INT (i * GET_MODE_SIZE (rmode)));
5752 XVECEXP (par, 0, i) = tmp;
5753 }
5754
5755 pcum->aapcs_reg = par;
5756 }
5757 else
5758 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5759 return true;
5760 }
5761 return false;
5762 }
5763
5764 static rtx
5765 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5766 machine_mode mode,
5767 const_tree type ATTRIBUTE_UNUSED)
5768 {
5769 if (!use_vfp_abi (pcs_variant, false))
5770 return NULL;
5771
5772 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5773 {
5774 int count;
5775 machine_mode ag_mode;
5776 int i;
5777 rtx par;
5778 int shift;
5779
5780 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5781 &ag_mode, &count);
5782
5783 if (!TARGET_NEON)
5784 {
5785 if (ag_mode == V2SImode)
5786 ag_mode = DImode;
5787 else if (ag_mode == V4SImode)
5788 {
5789 ag_mode = DImode;
5790 count *= 2;
5791 }
5792 }
5793 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5794 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5795 for (i = 0; i < count; i++)
5796 {
5797 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5798 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5799 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5800 XVECEXP (par, 0, i) = tmp;
5801 }
5802
5803 return par;
5804 }
5805
5806 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5807 }
5808
5809 static void
5810 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5811 machine_mode mode ATTRIBUTE_UNUSED,
5812 const_tree type ATTRIBUTE_UNUSED)
5813 {
5814 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5815 pcum->aapcs_vfp_reg_alloc = 0;
5816 return;
5817 }
5818
5819 #define AAPCS_CP(X) \
5820 { \
5821 aapcs_ ## X ## _cum_init, \
5822 aapcs_ ## X ## _is_call_candidate, \
5823 aapcs_ ## X ## _allocate, \
5824 aapcs_ ## X ## _is_return_candidate, \
5825 aapcs_ ## X ## _allocate_return_reg, \
5826 aapcs_ ## X ## _advance \
5827 }
5828
5829 /* Table of co-processors that can be used to pass arguments in
5830 registers. Idealy no arugment should be a candidate for more than
5831 one co-processor table entry, but the table is processed in order
5832 and stops after the first match. If that entry then fails to put
5833 the argument into a co-processor register, the argument will go on
5834 the stack. */
5835 static struct
5836 {
5837 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5838 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5839
5840 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5841 BLKmode) is a candidate for this co-processor's registers; this
5842 function should ignore any position-dependent state in
5843 CUMULATIVE_ARGS and only use call-type dependent information. */
5844 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5845
5846 /* Return true if the argument does get a co-processor register; it
5847 should set aapcs_reg to an RTX of the register allocated as is
5848 required for a return from FUNCTION_ARG. */
5849 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5850
5851 /* Return true if a result of mode MODE (or type TYPE if MODE is
5852 BLKmode) is can be returned in this co-processor's registers. */
5853 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5854
5855 /* Allocate and return an RTX element to hold the return type of a
5856 call, this routine must not fail and will only be called if
5857 is_return_candidate returned true with the same parameters. */
5858 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5859
5860 /* Finish processing this argument and prepare to start processing
5861 the next one. */
5862 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5863 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5864 {
5865 AAPCS_CP(vfp)
5866 };
5867
5868 #undef AAPCS_CP
5869
5870 static int
5871 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5872 const_tree type)
5873 {
5874 int i;
5875
5876 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5877 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5878 return i;
5879
5880 return -1;
5881 }
5882
5883 static int
5884 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5885 {
5886 /* We aren't passed a decl, so we can't check that a call is local.
5887 However, it isn't clear that that would be a win anyway, since it
5888 might limit some tail-calling opportunities. */
5889 enum arm_pcs pcs_variant;
5890
5891 if (fntype)
5892 {
5893 const_tree fndecl = NULL_TREE;
5894
5895 if (TREE_CODE (fntype) == FUNCTION_DECL)
5896 {
5897 fndecl = fntype;
5898 fntype = TREE_TYPE (fntype);
5899 }
5900
5901 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5902 }
5903 else
5904 pcs_variant = arm_pcs_default;
5905
5906 if (pcs_variant != ARM_PCS_AAPCS)
5907 {
5908 int i;
5909
5910 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5911 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5912 TYPE_MODE (type),
5913 type))
5914 return i;
5915 }
5916 return -1;
5917 }
5918
5919 static rtx
5920 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5921 const_tree fntype)
5922 {
5923 /* We aren't passed a decl, so we can't check that a call is local.
5924 However, it isn't clear that that would be a win anyway, since it
5925 might limit some tail-calling opportunities. */
5926 enum arm_pcs pcs_variant;
5927 int unsignedp ATTRIBUTE_UNUSED;
5928
5929 if (fntype)
5930 {
5931 const_tree fndecl = NULL_TREE;
5932
5933 if (TREE_CODE (fntype) == FUNCTION_DECL)
5934 {
5935 fndecl = fntype;
5936 fntype = TREE_TYPE (fntype);
5937 }
5938
5939 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5940 }
5941 else
5942 pcs_variant = arm_pcs_default;
5943
5944 /* Promote integer types. */
5945 if (type && INTEGRAL_TYPE_P (type))
5946 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5947
5948 if (pcs_variant != ARM_PCS_AAPCS)
5949 {
5950 int i;
5951
5952 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5953 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5954 type))
5955 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5956 mode, type);
5957 }
5958
5959 /* Promotes small structs returned in a register to full-word size
5960 for big-endian AAPCS. */
5961 if (type && arm_return_in_msb (type))
5962 {
5963 HOST_WIDE_INT size = int_size_in_bytes (type);
5964 if (size % UNITS_PER_WORD != 0)
5965 {
5966 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5967 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5968 }
5969 }
5970
5971 return gen_rtx_REG (mode, R0_REGNUM);
5972 }
5973
5974 static rtx
5975 aapcs_libcall_value (machine_mode mode)
5976 {
5977 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5978 && GET_MODE_SIZE (mode) <= 4)
5979 mode = SImode;
5980
5981 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5982 }
5983
5984 /* Lay out a function argument using the AAPCS rules. The rule
5985 numbers referred to here are those in the AAPCS. */
5986 static void
5987 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5988 const_tree type, bool named)
5989 {
5990 int nregs, nregs2;
5991 int ncrn;
5992
5993 /* We only need to do this once per argument. */
5994 if (pcum->aapcs_arg_processed)
5995 return;
5996
5997 pcum->aapcs_arg_processed = true;
5998
5999 /* Special case: if named is false then we are handling an incoming
6000 anonymous argument which is on the stack. */
6001 if (!named)
6002 return;
6003
6004 /* Is this a potential co-processor register candidate? */
6005 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6006 {
6007 int slot = aapcs_select_call_coproc (pcum, mode, type);
6008 pcum->aapcs_cprc_slot = slot;
6009
6010 /* We don't have to apply any of the rules from part B of the
6011 preparation phase, these are handled elsewhere in the
6012 compiler. */
6013
6014 if (slot >= 0)
6015 {
6016 /* A Co-processor register candidate goes either in its own
6017 class of registers or on the stack. */
6018 if (!pcum->aapcs_cprc_failed[slot])
6019 {
6020 /* C1.cp - Try to allocate the argument to co-processor
6021 registers. */
6022 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6023 return;
6024
6025 /* C2.cp - Put the argument on the stack and note that we
6026 can't assign any more candidates in this slot. We also
6027 need to note that we have allocated stack space, so that
6028 we won't later try to split a non-cprc candidate between
6029 core registers and the stack. */
6030 pcum->aapcs_cprc_failed[slot] = true;
6031 pcum->can_split = false;
6032 }
6033
6034 /* We didn't get a register, so this argument goes on the
6035 stack. */
6036 gcc_assert (pcum->can_split == false);
6037 return;
6038 }
6039 }
6040
6041 /* C3 - For double-word aligned arguments, round the NCRN up to the
6042 next even number. */
6043 ncrn = pcum->aapcs_ncrn;
6044 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
6045 ncrn++;
6046
6047 nregs = ARM_NUM_REGS2(mode, type);
6048
6049 /* Sigh, this test should really assert that nregs > 0, but a GCC
6050 extension allows empty structs and then gives them empty size; it
6051 then allows such a structure to be passed by value. For some of
6052 the code below we have to pretend that such an argument has
6053 non-zero size so that we 'locate' it correctly either in
6054 registers or on the stack. */
6055 gcc_assert (nregs >= 0);
6056
6057 nregs2 = nregs ? nregs : 1;
6058
6059 /* C4 - Argument fits entirely in core registers. */
6060 if (ncrn + nregs2 <= NUM_ARG_REGS)
6061 {
6062 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6063 pcum->aapcs_next_ncrn = ncrn + nregs;
6064 return;
6065 }
6066
6067 /* C5 - Some core registers left and there are no arguments already
6068 on the stack: split this argument between the remaining core
6069 registers and the stack. */
6070 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6071 {
6072 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6073 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6074 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6075 return;
6076 }
6077
6078 /* C6 - NCRN is set to 4. */
6079 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6080
6081 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6082 return;
6083 }
6084
6085 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6086 for a call to a function whose data type is FNTYPE.
6087 For a library call, FNTYPE is NULL. */
6088 void
6089 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6090 rtx libname,
6091 tree fndecl ATTRIBUTE_UNUSED)
6092 {
6093 /* Long call handling. */
6094 if (fntype)
6095 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6096 else
6097 pcum->pcs_variant = arm_pcs_default;
6098
6099 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6100 {
6101 if (arm_libcall_uses_aapcs_base (libname))
6102 pcum->pcs_variant = ARM_PCS_AAPCS;
6103
6104 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6105 pcum->aapcs_reg = NULL_RTX;
6106 pcum->aapcs_partial = 0;
6107 pcum->aapcs_arg_processed = false;
6108 pcum->aapcs_cprc_slot = -1;
6109 pcum->can_split = true;
6110
6111 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6112 {
6113 int i;
6114
6115 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6116 {
6117 pcum->aapcs_cprc_failed[i] = false;
6118 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6119 }
6120 }
6121 return;
6122 }
6123
6124 /* Legacy ABIs */
6125
6126 /* On the ARM, the offset starts at 0. */
6127 pcum->nregs = 0;
6128 pcum->iwmmxt_nregs = 0;
6129 pcum->can_split = true;
6130
6131 /* Varargs vectors are treated the same as long long.
6132 named_count avoids having to change the way arm handles 'named' */
6133 pcum->named_count = 0;
6134 pcum->nargs = 0;
6135
6136 if (TARGET_REALLY_IWMMXT && fntype)
6137 {
6138 tree fn_arg;
6139
6140 for (fn_arg = TYPE_ARG_TYPES (fntype);
6141 fn_arg;
6142 fn_arg = TREE_CHAIN (fn_arg))
6143 pcum->named_count += 1;
6144
6145 if (! pcum->named_count)
6146 pcum->named_count = INT_MAX;
6147 }
6148 }
6149
6150 /* Return true if mode/type need doubleword alignment. */
6151 static bool
6152 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6153 {
6154 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
6155 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
6156 }
6157
6158
6159 /* Determine where to put an argument to a function.
6160 Value is zero to push the argument on the stack,
6161 or a hard register in which to store the argument.
6162
6163 MODE is the argument's machine mode.
6164 TYPE is the data type of the argument (as a tree).
6165 This is null for libcalls where that information may
6166 not be available.
6167 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6168 the preceding args and about the function being called.
6169 NAMED is nonzero if this argument is a named parameter
6170 (otherwise it is an extra parameter matching an ellipsis).
6171
6172 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6173 other arguments are passed on the stack. If (NAMED == 0) (which happens
6174 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6175 defined), say it is passed in the stack (function_prologue will
6176 indeed make it pass in the stack if necessary). */
6177
6178 static rtx
6179 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6180 const_tree type, bool named)
6181 {
6182 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6183 int nregs;
6184
6185 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6186 a call insn (op3 of a call_value insn). */
6187 if (mode == VOIDmode)
6188 return const0_rtx;
6189
6190 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6191 {
6192 aapcs_layout_arg (pcum, mode, type, named);
6193 return pcum->aapcs_reg;
6194 }
6195
6196 /* Varargs vectors are treated the same as long long.
6197 named_count avoids having to change the way arm handles 'named' */
6198 if (TARGET_IWMMXT_ABI
6199 && arm_vector_mode_supported_p (mode)
6200 && pcum->named_count > pcum->nargs + 1)
6201 {
6202 if (pcum->iwmmxt_nregs <= 9)
6203 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6204 else
6205 {
6206 pcum->can_split = false;
6207 return NULL_RTX;
6208 }
6209 }
6210
6211 /* Put doubleword aligned quantities in even register pairs. */
6212 if (pcum->nregs & 1
6213 && ARM_DOUBLEWORD_ALIGN
6214 && arm_needs_doubleword_align (mode, type))
6215 pcum->nregs++;
6216
6217 /* Only allow splitting an arg between regs and memory if all preceding
6218 args were allocated to regs. For args passed by reference we only count
6219 the reference pointer. */
6220 if (pcum->can_split)
6221 nregs = 1;
6222 else
6223 nregs = ARM_NUM_REGS2 (mode, type);
6224
6225 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6226 return NULL_RTX;
6227
6228 return gen_rtx_REG (mode, pcum->nregs);
6229 }
6230
6231 static unsigned int
6232 arm_function_arg_boundary (machine_mode mode, const_tree type)
6233 {
6234 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6235 ? DOUBLEWORD_ALIGNMENT
6236 : PARM_BOUNDARY);
6237 }
6238
6239 static int
6240 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6241 tree type, bool named)
6242 {
6243 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6244 int nregs = pcum->nregs;
6245
6246 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6247 {
6248 aapcs_layout_arg (pcum, mode, type, named);
6249 return pcum->aapcs_partial;
6250 }
6251
6252 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6253 return 0;
6254
6255 if (NUM_ARG_REGS > nregs
6256 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6257 && pcum->can_split)
6258 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6259
6260 return 0;
6261 }
6262
6263 /* Update the data in PCUM to advance over an argument
6264 of mode MODE and data type TYPE.
6265 (TYPE is null for libcalls where that information may not be available.) */
6266
6267 static void
6268 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6269 const_tree type, bool named)
6270 {
6271 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6272
6273 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6274 {
6275 aapcs_layout_arg (pcum, mode, type, named);
6276
6277 if (pcum->aapcs_cprc_slot >= 0)
6278 {
6279 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6280 type);
6281 pcum->aapcs_cprc_slot = -1;
6282 }
6283
6284 /* Generic stuff. */
6285 pcum->aapcs_arg_processed = false;
6286 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6287 pcum->aapcs_reg = NULL_RTX;
6288 pcum->aapcs_partial = 0;
6289 }
6290 else
6291 {
6292 pcum->nargs += 1;
6293 if (arm_vector_mode_supported_p (mode)
6294 && pcum->named_count > pcum->nargs
6295 && TARGET_IWMMXT_ABI)
6296 pcum->iwmmxt_nregs += 1;
6297 else
6298 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6299 }
6300 }
6301
6302 /* Variable sized types are passed by reference. This is a GCC
6303 extension to the ARM ABI. */
6304
6305 static bool
6306 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6307 machine_mode mode ATTRIBUTE_UNUSED,
6308 const_tree type, bool named ATTRIBUTE_UNUSED)
6309 {
6310 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6311 }
6312 \f
6313 /* Encode the current state of the #pragma [no_]long_calls. */
6314 typedef enum
6315 {
6316 OFF, /* No #pragma [no_]long_calls is in effect. */
6317 LONG, /* #pragma long_calls is in effect. */
6318 SHORT /* #pragma no_long_calls is in effect. */
6319 } arm_pragma_enum;
6320
6321 static arm_pragma_enum arm_pragma_long_calls = OFF;
6322
6323 void
6324 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6325 {
6326 arm_pragma_long_calls = LONG;
6327 }
6328
6329 void
6330 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6331 {
6332 arm_pragma_long_calls = SHORT;
6333 }
6334
6335 void
6336 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6337 {
6338 arm_pragma_long_calls = OFF;
6339 }
6340 \f
6341 /* Handle an attribute requiring a FUNCTION_DECL;
6342 arguments as in struct attribute_spec.handler. */
6343 static tree
6344 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6345 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6346 {
6347 if (TREE_CODE (*node) != FUNCTION_DECL)
6348 {
6349 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6350 name);
6351 *no_add_attrs = true;
6352 }
6353
6354 return NULL_TREE;
6355 }
6356
6357 /* Handle an "interrupt" or "isr" attribute;
6358 arguments as in struct attribute_spec.handler. */
6359 static tree
6360 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6361 bool *no_add_attrs)
6362 {
6363 if (DECL_P (*node))
6364 {
6365 if (TREE_CODE (*node) != FUNCTION_DECL)
6366 {
6367 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6368 name);
6369 *no_add_attrs = true;
6370 }
6371 /* FIXME: the argument if any is checked for type attributes;
6372 should it be checked for decl ones? */
6373 }
6374 else
6375 {
6376 if (TREE_CODE (*node) == FUNCTION_TYPE
6377 || TREE_CODE (*node) == METHOD_TYPE)
6378 {
6379 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6380 {
6381 warning (OPT_Wattributes, "%qE attribute ignored",
6382 name);
6383 *no_add_attrs = true;
6384 }
6385 }
6386 else if (TREE_CODE (*node) == POINTER_TYPE
6387 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6388 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6389 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6390 {
6391 *node = build_variant_type_copy (*node);
6392 TREE_TYPE (*node) = build_type_attribute_variant
6393 (TREE_TYPE (*node),
6394 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6395 *no_add_attrs = true;
6396 }
6397 else
6398 {
6399 /* Possibly pass this attribute on from the type to a decl. */
6400 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6401 | (int) ATTR_FLAG_FUNCTION_NEXT
6402 | (int) ATTR_FLAG_ARRAY_NEXT))
6403 {
6404 *no_add_attrs = true;
6405 return tree_cons (name, args, NULL_TREE);
6406 }
6407 else
6408 {
6409 warning (OPT_Wattributes, "%qE attribute ignored",
6410 name);
6411 }
6412 }
6413 }
6414
6415 return NULL_TREE;
6416 }
6417
6418 /* Handle a "pcs" attribute; arguments as in struct
6419 attribute_spec.handler. */
6420 static tree
6421 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6422 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6423 {
6424 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6425 {
6426 warning (OPT_Wattributes, "%qE attribute ignored", name);
6427 *no_add_attrs = true;
6428 }
6429 return NULL_TREE;
6430 }
6431
6432 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6433 /* Handle the "notshared" attribute. This attribute is another way of
6434 requesting hidden visibility. ARM's compiler supports
6435 "__declspec(notshared)"; we support the same thing via an
6436 attribute. */
6437
6438 static tree
6439 arm_handle_notshared_attribute (tree *node,
6440 tree name ATTRIBUTE_UNUSED,
6441 tree args ATTRIBUTE_UNUSED,
6442 int flags ATTRIBUTE_UNUSED,
6443 bool *no_add_attrs)
6444 {
6445 tree decl = TYPE_NAME (*node);
6446
6447 if (decl)
6448 {
6449 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6450 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6451 *no_add_attrs = false;
6452 }
6453 return NULL_TREE;
6454 }
6455 #endif
6456
6457 /* Return 0 if the attributes for two types are incompatible, 1 if they
6458 are compatible, and 2 if they are nearly compatible (which causes a
6459 warning to be generated). */
6460 static int
6461 arm_comp_type_attributes (const_tree type1, const_tree type2)
6462 {
6463 int l1, l2, s1, s2;
6464
6465 /* Check for mismatch of non-default calling convention. */
6466 if (TREE_CODE (type1) != FUNCTION_TYPE)
6467 return 1;
6468
6469 /* Check for mismatched call attributes. */
6470 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6471 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6472 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6473 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6474
6475 /* Only bother to check if an attribute is defined. */
6476 if (l1 | l2 | s1 | s2)
6477 {
6478 /* If one type has an attribute, the other must have the same attribute. */
6479 if ((l1 != l2) || (s1 != s2))
6480 return 0;
6481
6482 /* Disallow mixed attributes. */
6483 if ((l1 & s2) || (l2 & s1))
6484 return 0;
6485 }
6486
6487 /* Check for mismatched ISR attribute. */
6488 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6489 if (! l1)
6490 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6491 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6492 if (! l2)
6493 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6494 if (l1 != l2)
6495 return 0;
6496
6497 return 1;
6498 }
6499
6500 /* Assigns default attributes to newly defined type. This is used to
6501 set short_call/long_call attributes for function types of
6502 functions defined inside corresponding #pragma scopes. */
6503 static void
6504 arm_set_default_type_attributes (tree type)
6505 {
6506 /* Add __attribute__ ((long_call)) to all functions, when
6507 inside #pragma long_calls or __attribute__ ((short_call)),
6508 when inside #pragma no_long_calls. */
6509 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6510 {
6511 tree type_attr_list, attr_name;
6512 type_attr_list = TYPE_ATTRIBUTES (type);
6513
6514 if (arm_pragma_long_calls == LONG)
6515 attr_name = get_identifier ("long_call");
6516 else if (arm_pragma_long_calls == SHORT)
6517 attr_name = get_identifier ("short_call");
6518 else
6519 return;
6520
6521 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6522 TYPE_ATTRIBUTES (type) = type_attr_list;
6523 }
6524 }
6525 \f
6526 /* Return true if DECL is known to be linked into section SECTION. */
6527
6528 static bool
6529 arm_function_in_section_p (tree decl, section *section)
6530 {
6531 /* We can only be certain about the prevailing symbol definition. */
6532 if (!decl_binds_to_current_def_p (decl))
6533 return false;
6534
6535 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6536 if (!DECL_SECTION_NAME (decl))
6537 {
6538 /* Make sure that we will not create a unique section for DECL. */
6539 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6540 return false;
6541 }
6542
6543 return function_section (decl) == section;
6544 }
6545
6546 /* Return nonzero if a 32-bit "long_call" should be generated for
6547 a call from the current function to DECL. We generate a long_call
6548 if the function:
6549
6550 a. has an __attribute__((long call))
6551 or b. is within the scope of a #pragma long_calls
6552 or c. the -mlong-calls command line switch has been specified
6553
6554 However we do not generate a long call if the function:
6555
6556 d. has an __attribute__ ((short_call))
6557 or e. is inside the scope of a #pragma no_long_calls
6558 or f. is defined in the same section as the current function. */
6559
6560 bool
6561 arm_is_long_call_p (tree decl)
6562 {
6563 tree attrs;
6564
6565 if (!decl)
6566 return TARGET_LONG_CALLS;
6567
6568 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6569 if (lookup_attribute ("short_call", attrs))
6570 return false;
6571
6572 /* For "f", be conservative, and only cater for cases in which the
6573 whole of the current function is placed in the same section. */
6574 if (!flag_reorder_blocks_and_partition
6575 && TREE_CODE (decl) == FUNCTION_DECL
6576 && arm_function_in_section_p (decl, current_function_section ()))
6577 return false;
6578
6579 if (lookup_attribute ("long_call", attrs))
6580 return true;
6581
6582 return TARGET_LONG_CALLS;
6583 }
6584
6585 /* Return nonzero if it is ok to make a tail-call to DECL. */
6586 static bool
6587 arm_function_ok_for_sibcall (tree decl, tree exp)
6588 {
6589 unsigned long func_type;
6590
6591 if (cfun->machine->sibcall_blocked)
6592 return false;
6593
6594 /* Never tailcall something if we are generating code for Thumb-1. */
6595 if (TARGET_THUMB1)
6596 return false;
6597
6598 /* The PIC register is live on entry to VxWorks PLT entries, so we
6599 must make the call before restoring the PIC register. */
6600 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6601 return false;
6602
6603 /* If we are interworking and the function is not declared static
6604 then we can't tail-call it unless we know that it exists in this
6605 compilation unit (since it might be a Thumb routine). */
6606 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6607 && !TREE_ASM_WRITTEN (decl))
6608 return false;
6609
6610 func_type = arm_current_func_type ();
6611 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6612 if (IS_INTERRUPT (func_type))
6613 return false;
6614
6615 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6616 {
6617 /* Check that the return value locations are the same. For
6618 example that we aren't returning a value from the sibling in
6619 a VFP register but then need to transfer it to a core
6620 register. */
6621 rtx a, b;
6622
6623 a = arm_function_value (TREE_TYPE (exp), decl, false);
6624 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6625 cfun->decl, false);
6626 if (!rtx_equal_p (a, b))
6627 return false;
6628 }
6629
6630 /* Never tailcall if function may be called with a misaligned SP. */
6631 if (IS_STACKALIGN (func_type))
6632 return false;
6633
6634 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6635 references should become a NOP. Don't convert such calls into
6636 sibling calls. */
6637 if (TARGET_AAPCS_BASED
6638 && arm_abi == ARM_ABI_AAPCS
6639 && decl
6640 && DECL_WEAK (decl))
6641 return false;
6642
6643 /* Everything else is ok. */
6644 return true;
6645 }
6646
6647 \f
6648 /* Addressing mode support functions. */
6649
6650 /* Return nonzero if X is a legitimate immediate operand when compiling
6651 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6652 int
6653 legitimate_pic_operand_p (rtx x)
6654 {
6655 if (GET_CODE (x) == SYMBOL_REF
6656 || (GET_CODE (x) == CONST
6657 && GET_CODE (XEXP (x, 0)) == PLUS
6658 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6659 return 0;
6660
6661 return 1;
6662 }
6663
6664 /* Record that the current function needs a PIC register. Initialize
6665 cfun->machine->pic_reg if we have not already done so. */
6666
6667 static void
6668 require_pic_register (void)
6669 {
6670 /* A lot of the logic here is made obscure by the fact that this
6671 routine gets called as part of the rtx cost estimation process.
6672 We don't want those calls to affect any assumptions about the real
6673 function; and further, we can't call entry_of_function() until we
6674 start the real expansion process. */
6675 if (!crtl->uses_pic_offset_table)
6676 {
6677 gcc_assert (can_create_pseudo_p ());
6678 if (arm_pic_register != INVALID_REGNUM
6679 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6680 {
6681 if (!cfun->machine->pic_reg)
6682 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6683
6684 /* Play games to avoid marking the function as needing pic
6685 if we are being called as part of the cost-estimation
6686 process. */
6687 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6688 crtl->uses_pic_offset_table = 1;
6689 }
6690 else
6691 {
6692 rtx_insn *seq, *insn;
6693
6694 if (!cfun->machine->pic_reg)
6695 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6696
6697 /* Play games to avoid marking the function as needing pic
6698 if we are being called as part of the cost-estimation
6699 process. */
6700 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6701 {
6702 crtl->uses_pic_offset_table = 1;
6703 start_sequence ();
6704
6705 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6706 && arm_pic_register > LAST_LO_REGNUM)
6707 emit_move_insn (cfun->machine->pic_reg,
6708 gen_rtx_REG (Pmode, arm_pic_register));
6709 else
6710 arm_load_pic_register (0UL);
6711
6712 seq = get_insns ();
6713 end_sequence ();
6714
6715 for (insn = seq; insn; insn = NEXT_INSN (insn))
6716 if (INSN_P (insn))
6717 INSN_LOCATION (insn) = prologue_location;
6718
6719 /* We can be called during expansion of PHI nodes, where
6720 we can't yet emit instructions directly in the final
6721 insn stream. Queue the insns on the entry edge, they will
6722 be committed after everything else is expanded. */
6723 insert_insn_on_edge (seq,
6724 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6725 }
6726 }
6727 }
6728 }
6729
6730 rtx
6731 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6732 {
6733 if (GET_CODE (orig) == SYMBOL_REF
6734 || GET_CODE (orig) == LABEL_REF)
6735 {
6736 rtx insn;
6737
6738 if (reg == 0)
6739 {
6740 gcc_assert (can_create_pseudo_p ());
6741 reg = gen_reg_rtx (Pmode);
6742 }
6743
6744 /* VxWorks does not impose a fixed gap between segments; the run-time
6745 gap can be different from the object-file gap. We therefore can't
6746 use GOTOFF unless we are absolutely sure that the symbol is in the
6747 same segment as the GOT. Unfortunately, the flexibility of linker
6748 scripts means that we can't be sure of that in general, so assume
6749 that GOTOFF is never valid on VxWorks. */
6750 if ((GET_CODE (orig) == LABEL_REF
6751 || (GET_CODE (orig) == SYMBOL_REF &&
6752 SYMBOL_REF_LOCAL_P (orig)))
6753 && NEED_GOT_RELOC
6754 && arm_pic_data_is_text_relative)
6755 insn = arm_pic_static_addr (orig, reg);
6756 else
6757 {
6758 rtx pat;
6759 rtx mem;
6760
6761 /* If this function doesn't have a pic register, create one now. */
6762 require_pic_register ();
6763
6764 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6765
6766 /* Make the MEM as close to a constant as possible. */
6767 mem = SET_SRC (pat);
6768 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6769 MEM_READONLY_P (mem) = 1;
6770 MEM_NOTRAP_P (mem) = 1;
6771
6772 insn = emit_insn (pat);
6773 }
6774
6775 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6776 by loop. */
6777 set_unique_reg_note (insn, REG_EQUAL, orig);
6778
6779 return reg;
6780 }
6781 else if (GET_CODE (orig) == CONST)
6782 {
6783 rtx base, offset;
6784
6785 if (GET_CODE (XEXP (orig, 0)) == PLUS
6786 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6787 return orig;
6788
6789 /* Handle the case where we have: const (UNSPEC_TLS). */
6790 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6791 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6792 return orig;
6793
6794 /* Handle the case where we have:
6795 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6796 CONST_INT. */
6797 if (GET_CODE (XEXP (orig, 0)) == PLUS
6798 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6799 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6800 {
6801 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6802 return orig;
6803 }
6804
6805 if (reg == 0)
6806 {
6807 gcc_assert (can_create_pseudo_p ());
6808 reg = gen_reg_rtx (Pmode);
6809 }
6810
6811 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6812
6813 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6814 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6815 base == reg ? 0 : reg);
6816
6817 if (CONST_INT_P (offset))
6818 {
6819 /* The base register doesn't really matter, we only want to
6820 test the index for the appropriate mode. */
6821 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6822 {
6823 gcc_assert (can_create_pseudo_p ());
6824 offset = force_reg (Pmode, offset);
6825 }
6826
6827 if (CONST_INT_P (offset))
6828 return plus_constant (Pmode, base, INTVAL (offset));
6829 }
6830
6831 if (GET_MODE_SIZE (mode) > 4
6832 && (GET_MODE_CLASS (mode) == MODE_INT
6833 || TARGET_SOFT_FLOAT))
6834 {
6835 emit_insn (gen_addsi3 (reg, base, offset));
6836 return reg;
6837 }
6838
6839 return gen_rtx_PLUS (Pmode, base, offset);
6840 }
6841
6842 return orig;
6843 }
6844
6845
6846 /* Find a spare register to use during the prolog of a function. */
6847
6848 static int
6849 thumb_find_work_register (unsigned long pushed_regs_mask)
6850 {
6851 int reg;
6852
6853 /* Check the argument registers first as these are call-used. The
6854 register allocation order means that sometimes r3 might be used
6855 but earlier argument registers might not, so check them all. */
6856 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6857 if (!df_regs_ever_live_p (reg))
6858 return reg;
6859
6860 /* Before going on to check the call-saved registers we can try a couple
6861 more ways of deducing that r3 is available. The first is when we are
6862 pushing anonymous arguments onto the stack and we have less than 4
6863 registers worth of fixed arguments(*). In this case r3 will be part of
6864 the variable argument list and so we can be sure that it will be
6865 pushed right at the start of the function. Hence it will be available
6866 for the rest of the prologue.
6867 (*): ie crtl->args.pretend_args_size is greater than 0. */
6868 if (cfun->machine->uses_anonymous_args
6869 && crtl->args.pretend_args_size > 0)
6870 return LAST_ARG_REGNUM;
6871
6872 /* The other case is when we have fixed arguments but less than 4 registers
6873 worth. In this case r3 might be used in the body of the function, but
6874 it is not being used to convey an argument into the function. In theory
6875 we could just check crtl->args.size to see how many bytes are
6876 being passed in argument registers, but it seems that it is unreliable.
6877 Sometimes it will have the value 0 when in fact arguments are being
6878 passed. (See testcase execute/20021111-1.c for an example). So we also
6879 check the args_info.nregs field as well. The problem with this field is
6880 that it makes no allowances for arguments that are passed to the
6881 function but which are not used. Hence we could miss an opportunity
6882 when a function has an unused argument in r3. But it is better to be
6883 safe than to be sorry. */
6884 if (! cfun->machine->uses_anonymous_args
6885 && crtl->args.size >= 0
6886 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6887 && (TARGET_AAPCS_BASED
6888 ? crtl->args.info.aapcs_ncrn < 4
6889 : crtl->args.info.nregs < 4))
6890 return LAST_ARG_REGNUM;
6891
6892 /* Otherwise look for a call-saved register that is going to be pushed. */
6893 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6894 if (pushed_regs_mask & (1 << reg))
6895 return reg;
6896
6897 if (TARGET_THUMB2)
6898 {
6899 /* Thumb-2 can use high regs. */
6900 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6901 if (pushed_regs_mask & (1 << reg))
6902 return reg;
6903 }
6904 /* Something went wrong - thumb_compute_save_reg_mask()
6905 should have arranged for a suitable register to be pushed. */
6906 gcc_unreachable ();
6907 }
6908
6909 static GTY(()) int pic_labelno;
6910
6911 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6912 low register. */
6913
6914 void
6915 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6916 {
6917 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6918
6919 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6920 return;
6921
6922 gcc_assert (flag_pic);
6923
6924 pic_reg = cfun->machine->pic_reg;
6925 if (TARGET_VXWORKS_RTP)
6926 {
6927 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6928 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6929 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6930
6931 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6932
6933 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6934 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6935 }
6936 else
6937 {
6938 /* We use an UNSPEC rather than a LABEL_REF because this label
6939 never appears in the code stream. */
6940
6941 labelno = GEN_INT (pic_labelno++);
6942 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6943 l1 = gen_rtx_CONST (VOIDmode, l1);
6944
6945 /* On the ARM the PC register contains 'dot + 8' at the time of the
6946 addition, on the Thumb it is 'dot + 4'. */
6947 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6948 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6949 UNSPEC_GOTSYM_OFF);
6950 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6951
6952 if (TARGET_32BIT)
6953 {
6954 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6955 }
6956 else /* TARGET_THUMB1 */
6957 {
6958 if (arm_pic_register != INVALID_REGNUM
6959 && REGNO (pic_reg) > LAST_LO_REGNUM)
6960 {
6961 /* We will have pushed the pic register, so we should always be
6962 able to find a work register. */
6963 pic_tmp = gen_rtx_REG (SImode,
6964 thumb_find_work_register (saved_regs));
6965 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6966 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6967 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6968 }
6969 else if (arm_pic_register != INVALID_REGNUM
6970 && arm_pic_register > LAST_LO_REGNUM
6971 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6972 {
6973 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6974 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6975 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6976 }
6977 else
6978 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6979 }
6980 }
6981
6982 /* Need to emit this whether or not we obey regdecls,
6983 since setjmp/longjmp can cause life info to screw up. */
6984 emit_use (pic_reg);
6985 }
6986
6987 /* Generate code to load the address of a static var when flag_pic is set. */
6988 static rtx
6989 arm_pic_static_addr (rtx orig, rtx reg)
6990 {
6991 rtx l1, labelno, offset_rtx, insn;
6992
6993 gcc_assert (flag_pic);
6994
6995 /* We use an UNSPEC rather than a LABEL_REF because this label
6996 never appears in the code stream. */
6997 labelno = GEN_INT (pic_labelno++);
6998 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6999 l1 = gen_rtx_CONST (VOIDmode, l1);
7000
7001 /* On the ARM the PC register contains 'dot + 8' at the time of the
7002 addition, on the Thumb it is 'dot + 4'. */
7003 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7004 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7005 UNSPEC_SYMBOL_OFFSET);
7006 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7007
7008 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7009 return insn;
7010 }
7011
7012 /* Return nonzero if X is valid as an ARM state addressing register. */
7013 static int
7014 arm_address_register_rtx_p (rtx x, int strict_p)
7015 {
7016 int regno;
7017
7018 if (!REG_P (x))
7019 return 0;
7020
7021 regno = REGNO (x);
7022
7023 if (strict_p)
7024 return ARM_REGNO_OK_FOR_BASE_P (regno);
7025
7026 return (regno <= LAST_ARM_REGNUM
7027 || regno >= FIRST_PSEUDO_REGISTER
7028 || regno == FRAME_POINTER_REGNUM
7029 || regno == ARG_POINTER_REGNUM);
7030 }
7031
7032 /* Return TRUE if this rtx is the difference of a symbol and a label,
7033 and will reduce to a PC-relative relocation in the object file.
7034 Expressions like this can be left alone when generating PIC, rather
7035 than forced through the GOT. */
7036 static int
7037 pcrel_constant_p (rtx x)
7038 {
7039 if (GET_CODE (x) == MINUS)
7040 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7041
7042 return FALSE;
7043 }
7044
7045 /* Return true if X will surely end up in an index register after next
7046 splitting pass. */
7047 static bool
7048 will_be_in_index_register (const_rtx x)
7049 {
7050 /* arm.md: calculate_pic_address will split this into a register. */
7051 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7052 }
7053
7054 /* Return nonzero if X is a valid ARM state address operand. */
7055 int
7056 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7057 int strict_p)
7058 {
7059 bool use_ldrd;
7060 enum rtx_code code = GET_CODE (x);
7061
7062 if (arm_address_register_rtx_p (x, strict_p))
7063 return 1;
7064
7065 use_ldrd = (TARGET_LDRD
7066 && (mode == DImode
7067 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7068
7069 if (code == POST_INC || code == PRE_DEC
7070 || ((code == PRE_INC || code == POST_DEC)
7071 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7072 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7073
7074 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7075 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7076 && GET_CODE (XEXP (x, 1)) == PLUS
7077 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7078 {
7079 rtx addend = XEXP (XEXP (x, 1), 1);
7080
7081 /* Don't allow ldrd post increment by register because it's hard
7082 to fixup invalid register choices. */
7083 if (use_ldrd
7084 && GET_CODE (x) == POST_MODIFY
7085 && REG_P (addend))
7086 return 0;
7087
7088 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7089 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7090 }
7091
7092 /* After reload constants split into minipools will have addresses
7093 from a LABEL_REF. */
7094 else if (reload_completed
7095 && (code == LABEL_REF
7096 || (code == CONST
7097 && GET_CODE (XEXP (x, 0)) == PLUS
7098 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7099 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7100 return 1;
7101
7102 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7103 return 0;
7104
7105 else if (code == PLUS)
7106 {
7107 rtx xop0 = XEXP (x, 0);
7108 rtx xop1 = XEXP (x, 1);
7109
7110 return ((arm_address_register_rtx_p (xop0, strict_p)
7111 && ((CONST_INT_P (xop1)
7112 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7113 || (!strict_p && will_be_in_index_register (xop1))))
7114 || (arm_address_register_rtx_p (xop1, strict_p)
7115 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7116 }
7117
7118 #if 0
7119 /* Reload currently can't handle MINUS, so disable this for now */
7120 else if (GET_CODE (x) == MINUS)
7121 {
7122 rtx xop0 = XEXP (x, 0);
7123 rtx xop1 = XEXP (x, 1);
7124
7125 return (arm_address_register_rtx_p (xop0, strict_p)
7126 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7127 }
7128 #endif
7129
7130 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7131 && code == SYMBOL_REF
7132 && CONSTANT_POOL_ADDRESS_P (x)
7133 && ! (flag_pic
7134 && symbol_mentioned_p (get_pool_constant (x))
7135 && ! pcrel_constant_p (get_pool_constant (x))))
7136 return 1;
7137
7138 return 0;
7139 }
7140
7141 /* Return nonzero if X is a valid Thumb-2 address operand. */
7142 static int
7143 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7144 {
7145 bool use_ldrd;
7146 enum rtx_code code = GET_CODE (x);
7147
7148 if (arm_address_register_rtx_p (x, strict_p))
7149 return 1;
7150
7151 use_ldrd = (TARGET_LDRD
7152 && (mode == DImode
7153 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7154
7155 if (code == POST_INC || code == PRE_DEC
7156 || ((code == PRE_INC || code == POST_DEC)
7157 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7158 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7159
7160 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7161 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7162 && GET_CODE (XEXP (x, 1)) == PLUS
7163 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7164 {
7165 /* Thumb-2 only has autoincrement by constant. */
7166 rtx addend = XEXP (XEXP (x, 1), 1);
7167 HOST_WIDE_INT offset;
7168
7169 if (!CONST_INT_P (addend))
7170 return 0;
7171
7172 offset = INTVAL(addend);
7173 if (GET_MODE_SIZE (mode) <= 4)
7174 return (offset > -256 && offset < 256);
7175
7176 return (use_ldrd && offset > -1024 && offset < 1024
7177 && (offset & 3) == 0);
7178 }
7179
7180 /* After reload constants split into minipools will have addresses
7181 from a LABEL_REF. */
7182 else if (reload_completed
7183 && (code == LABEL_REF
7184 || (code == CONST
7185 && GET_CODE (XEXP (x, 0)) == PLUS
7186 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7187 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7188 return 1;
7189
7190 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7191 return 0;
7192
7193 else if (code == PLUS)
7194 {
7195 rtx xop0 = XEXP (x, 0);
7196 rtx xop1 = XEXP (x, 1);
7197
7198 return ((arm_address_register_rtx_p (xop0, strict_p)
7199 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7200 || (!strict_p && will_be_in_index_register (xop1))))
7201 || (arm_address_register_rtx_p (xop1, strict_p)
7202 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7203 }
7204
7205 /* Normally we can assign constant values to target registers without
7206 the help of constant pool. But there are cases we have to use constant
7207 pool like:
7208 1) assign a label to register.
7209 2) sign-extend a 8bit value to 32bit and then assign to register.
7210
7211 Constant pool access in format:
7212 (set (reg r0) (mem (symbol_ref (".LC0"))))
7213 will cause the use of literal pool (later in function arm_reorg).
7214 So here we mark such format as an invalid format, then the compiler
7215 will adjust it into:
7216 (set (reg r0) (symbol_ref (".LC0")))
7217 (set (reg r0) (mem (reg r0))).
7218 No extra register is required, and (mem (reg r0)) won't cause the use
7219 of literal pools. */
7220 else if (arm_disable_literal_pool && code == SYMBOL_REF
7221 && CONSTANT_POOL_ADDRESS_P (x))
7222 return 0;
7223
7224 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7225 && code == SYMBOL_REF
7226 && CONSTANT_POOL_ADDRESS_P (x)
7227 && ! (flag_pic
7228 && symbol_mentioned_p (get_pool_constant (x))
7229 && ! pcrel_constant_p (get_pool_constant (x))))
7230 return 1;
7231
7232 return 0;
7233 }
7234
7235 /* Return nonzero if INDEX is valid for an address index operand in
7236 ARM state. */
7237 static int
7238 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7239 int strict_p)
7240 {
7241 HOST_WIDE_INT range;
7242 enum rtx_code code = GET_CODE (index);
7243
7244 /* Standard coprocessor addressing modes. */
7245 if (TARGET_HARD_FLOAT
7246 && TARGET_VFP
7247 && (mode == SFmode || mode == DFmode))
7248 return (code == CONST_INT && INTVAL (index) < 1024
7249 && INTVAL (index) > -1024
7250 && (INTVAL (index) & 3) == 0);
7251
7252 /* For quad modes, we restrict the constant offset to be slightly less
7253 than what the instruction format permits. We do this because for
7254 quad mode moves, we will actually decompose them into two separate
7255 double-mode reads or writes. INDEX must therefore be a valid
7256 (double-mode) offset and so should INDEX+8. */
7257 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7258 return (code == CONST_INT
7259 && INTVAL (index) < 1016
7260 && INTVAL (index) > -1024
7261 && (INTVAL (index) & 3) == 0);
7262
7263 /* We have no such constraint on double mode offsets, so we permit the
7264 full range of the instruction format. */
7265 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7266 return (code == CONST_INT
7267 && INTVAL (index) < 1024
7268 && INTVAL (index) > -1024
7269 && (INTVAL (index) & 3) == 0);
7270
7271 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7272 return (code == CONST_INT
7273 && INTVAL (index) < 1024
7274 && INTVAL (index) > -1024
7275 && (INTVAL (index) & 3) == 0);
7276
7277 if (arm_address_register_rtx_p (index, strict_p)
7278 && (GET_MODE_SIZE (mode) <= 4))
7279 return 1;
7280
7281 if (mode == DImode || mode == DFmode)
7282 {
7283 if (code == CONST_INT)
7284 {
7285 HOST_WIDE_INT val = INTVAL (index);
7286
7287 if (TARGET_LDRD)
7288 return val > -256 && val < 256;
7289 else
7290 return val > -4096 && val < 4092;
7291 }
7292
7293 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7294 }
7295
7296 if (GET_MODE_SIZE (mode) <= 4
7297 && ! (arm_arch4
7298 && (mode == HImode
7299 || mode == HFmode
7300 || (mode == QImode && outer == SIGN_EXTEND))))
7301 {
7302 if (code == MULT)
7303 {
7304 rtx xiop0 = XEXP (index, 0);
7305 rtx xiop1 = XEXP (index, 1);
7306
7307 return ((arm_address_register_rtx_p (xiop0, strict_p)
7308 && power_of_two_operand (xiop1, SImode))
7309 || (arm_address_register_rtx_p (xiop1, strict_p)
7310 && power_of_two_operand (xiop0, SImode)));
7311 }
7312 else if (code == LSHIFTRT || code == ASHIFTRT
7313 || code == ASHIFT || code == ROTATERT)
7314 {
7315 rtx op = XEXP (index, 1);
7316
7317 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7318 && CONST_INT_P (op)
7319 && INTVAL (op) > 0
7320 && INTVAL (op) <= 31);
7321 }
7322 }
7323
7324 /* For ARM v4 we may be doing a sign-extend operation during the
7325 load. */
7326 if (arm_arch4)
7327 {
7328 if (mode == HImode
7329 || mode == HFmode
7330 || (outer == SIGN_EXTEND && mode == QImode))
7331 range = 256;
7332 else
7333 range = 4096;
7334 }
7335 else
7336 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7337
7338 return (code == CONST_INT
7339 && INTVAL (index) < range
7340 && INTVAL (index) > -range);
7341 }
7342
7343 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7344 index operand. i.e. 1, 2, 4 or 8. */
7345 static bool
7346 thumb2_index_mul_operand (rtx op)
7347 {
7348 HOST_WIDE_INT val;
7349
7350 if (!CONST_INT_P (op))
7351 return false;
7352
7353 val = INTVAL(op);
7354 return (val == 1 || val == 2 || val == 4 || val == 8);
7355 }
7356
7357 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7358 static int
7359 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7360 {
7361 enum rtx_code code = GET_CODE (index);
7362
7363 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7364 /* Standard coprocessor addressing modes. */
7365 if (TARGET_HARD_FLOAT
7366 && TARGET_VFP
7367 && (mode == SFmode || mode == DFmode))
7368 return (code == CONST_INT && INTVAL (index) < 1024
7369 /* Thumb-2 allows only > -256 index range for it's core register
7370 load/stores. Since we allow SF/DF in core registers, we have
7371 to use the intersection between -256~4096 (core) and -1024~1024
7372 (coprocessor). */
7373 && INTVAL (index) > -256
7374 && (INTVAL (index) & 3) == 0);
7375
7376 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7377 {
7378 /* For DImode assume values will usually live in core regs
7379 and only allow LDRD addressing modes. */
7380 if (!TARGET_LDRD || mode != DImode)
7381 return (code == CONST_INT
7382 && INTVAL (index) < 1024
7383 && INTVAL (index) > -1024
7384 && (INTVAL (index) & 3) == 0);
7385 }
7386
7387 /* For quad modes, we restrict the constant offset to be slightly less
7388 than what the instruction format permits. We do this because for
7389 quad mode moves, we will actually decompose them into two separate
7390 double-mode reads or writes. INDEX must therefore be a valid
7391 (double-mode) offset and so should INDEX+8. */
7392 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7393 return (code == CONST_INT
7394 && INTVAL (index) < 1016
7395 && INTVAL (index) > -1024
7396 && (INTVAL (index) & 3) == 0);
7397
7398 /* We have no such constraint on double mode offsets, so we permit the
7399 full range of the instruction format. */
7400 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7401 return (code == CONST_INT
7402 && INTVAL (index) < 1024
7403 && INTVAL (index) > -1024
7404 && (INTVAL (index) & 3) == 0);
7405
7406 if (arm_address_register_rtx_p (index, strict_p)
7407 && (GET_MODE_SIZE (mode) <= 4))
7408 return 1;
7409
7410 if (mode == DImode || mode == DFmode)
7411 {
7412 if (code == CONST_INT)
7413 {
7414 HOST_WIDE_INT val = INTVAL (index);
7415 /* ??? Can we assume ldrd for thumb2? */
7416 /* Thumb-2 ldrd only has reg+const addressing modes. */
7417 /* ldrd supports offsets of +-1020.
7418 However the ldr fallback does not. */
7419 return val > -256 && val < 256 && (val & 3) == 0;
7420 }
7421 else
7422 return 0;
7423 }
7424
7425 if (code == MULT)
7426 {
7427 rtx xiop0 = XEXP (index, 0);
7428 rtx xiop1 = XEXP (index, 1);
7429
7430 return ((arm_address_register_rtx_p (xiop0, strict_p)
7431 && thumb2_index_mul_operand (xiop1))
7432 || (arm_address_register_rtx_p (xiop1, strict_p)
7433 && thumb2_index_mul_operand (xiop0)));
7434 }
7435 else if (code == ASHIFT)
7436 {
7437 rtx op = XEXP (index, 1);
7438
7439 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7440 && CONST_INT_P (op)
7441 && INTVAL (op) > 0
7442 && INTVAL (op) <= 3);
7443 }
7444
7445 return (code == CONST_INT
7446 && INTVAL (index) < 4096
7447 && INTVAL (index) > -256);
7448 }
7449
7450 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7451 static int
7452 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7453 {
7454 int regno;
7455
7456 if (!REG_P (x))
7457 return 0;
7458
7459 regno = REGNO (x);
7460
7461 if (strict_p)
7462 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7463
7464 return (regno <= LAST_LO_REGNUM
7465 || regno > LAST_VIRTUAL_REGISTER
7466 || regno == FRAME_POINTER_REGNUM
7467 || (GET_MODE_SIZE (mode) >= 4
7468 && (regno == STACK_POINTER_REGNUM
7469 || regno >= FIRST_PSEUDO_REGISTER
7470 || x == hard_frame_pointer_rtx
7471 || x == arg_pointer_rtx)));
7472 }
7473
7474 /* Return nonzero if x is a legitimate index register. This is the case
7475 for any base register that can access a QImode object. */
7476 inline static int
7477 thumb1_index_register_rtx_p (rtx x, int strict_p)
7478 {
7479 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7480 }
7481
7482 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7483
7484 The AP may be eliminated to either the SP or the FP, so we use the
7485 least common denominator, e.g. SImode, and offsets from 0 to 64.
7486
7487 ??? Verify whether the above is the right approach.
7488
7489 ??? Also, the FP may be eliminated to the SP, so perhaps that
7490 needs special handling also.
7491
7492 ??? Look at how the mips16 port solves this problem. It probably uses
7493 better ways to solve some of these problems.
7494
7495 Although it is not incorrect, we don't accept QImode and HImode
7496 addresses based on the frame pointer or arg pointer until the
7497 reload pass starts. This is so that eliminating such addresses
7498 into stack based ones won't produce impossible code. */
7499 int
7500 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7501 {
7502 /* ??? Not clear if this is right. Experiment. */
7503 if (GET_MODE_SIZE (mode) < 4
7504 && !(reload_in_progress || reload_completed)
7505 && (reg_mentioned_p (frame_pointer_rtx, x)
7506 || reg_mentioned_p (arg_pointer_rtx, x)
7507 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7508 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7509 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7510 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7511 return 0;
7512
7513 /* Accept any base register. SP only in SImode or larger. */
7514 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7515 return 1;
7516
7517 /* This is PC relative data before arm_reorg runs. */
7518 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7519 && GET_CODE (x) == SYMBOL_REF
7520 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7521 return 1;
7522
7523 /* This is PC relative data after arm_reorg runs. */
7524 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7525 && reload_completed
7526 && (GET_CODE (x) == LABEL_REF
7527 || (GET_CODE (x) == CONST
7528 && GET_CODE (XEXP (x, 0)) == PLUS
7529 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7530 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7531 return 1;
7532
7533 /* Post-inc indexing only supported for SImode and larger. */
7534 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7535 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7536 return 1;
7537
7538 else if (GET_CODE (x) == PLUS)
7539 {
7540 /* REG+REG address can be any two index registers. */
7541 /* We disallow FRAME+REG addressing since we know that FRAME
7542 will be replaced with STACK, and SP relative addressing only
7543 permits SP+OFFSET. */
7544 if (GET_MODE_SIZE (mode) <= 4
7545 && XEXP (x, 0) != frame_pointer_rtx
7546 && XEXP (x, 1) != frame_pointer_rtx
7547 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7548 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7549 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7550 return 1;
7551
7552 /* REG+const has 5-7 bit offset for non-SP registers. */
7553 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7554 || XEXP (x, 0) == arg_pointer_rtx)
7555 && CONST_INT_P (XEXP (x, 1))
7556 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7557 return 1;
7558
7559 /* REG+const has 10-bit offset for SP, but only SImode and
7560 larger is supported. */
7561 /* ??? Should probably check for DI/DFmode overflow here
7562 just like GO_IF_LEGITIMATE_OFFSET does. */
7563 else if (REG_P (XEXP (x, 0))
7564 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7565 && GET_MODE_SIZE (mode) >= 4
7566 && CONST_INT_P (XEXP (x, 1))
7567 && INTVAL (XEXP (x, 1)) >= 0
7568 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7569 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7570 return 1;
7571
7572 else if (REG_P (XEXP (x, 0))
7573 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7574 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7575 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7576 && REGNO (XEXP (x, 0))
7577 <= LAST_VIRTUAL_POINTER_REGISTER))
7578 && GET_MODE_SIZE (mode) >= 4
7579 && CONST_INT_P (XEXP (x, 1))
7580 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7581 return 1;
7582 }
7583
7584 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7585 && GET_MODE_SIZE (mode) == 4
7586 && GET_CODE (x) == SYMBOL_REF
7587 && CONSTANT_POOL_ADDRESS_P (x)
7588 && ! (flag_pic
7589 && symbol_mentioned_p (get_pool_constant (x))
7590 && ! pcrel_constant_p (get_pool_constant (x))))
7591 return 1;
7592
7593 return 0;
7594 }
7595
7596 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7597 instruction of mode MODE. */
7598 int
7599 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7600 {
7601 switch (GET_MODE_SIZE (mode))
7602 {
7603 case 1:
7604 return val >= 0 && val < 32;
7605
7606 case 2:
7607 return val >= 0 && val < 64 && (val & 1) == 0;
7608
7609 default:
7610 return (val >= 0
7611 && (val + GET_MODE_SIZE (mode)) <= 128
7612 && (val & 3) == 0);
7613 }
7614 }
7615
7616 bool
7617 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7618 {
7619 if (TARGET_ARM)
7620 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7621 else if (TARGET_THUMB2)
7622 return thumb2_legitimate_address_p (mode, x, strict_p);
7623 else /* if (TARGET_THUMB1) */
7624 return thumb1_legitimate_address_p (mode, x, strict_p);
7625 }
7626
7627 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7628
7629 Given an rtx X being reloaded into a reg required to be
7630 in class CLASS, return the class of reg to actually use.
7631 In general this is just CLASS, but for the Thumb core registers and
7632 immediate constants we prefer a LO_REGS class or a subset. */
7633
7634 static reg_class_t
7635 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7636 {
7637 if (TARGET_32BIT)
7638 return rclass;
7639 else
7640 {
7641 if (rclass == GENERAL_REGS)
7642 return LO_REGS;
7643 else
7644 return rclass;
7645 }
7646 }
7647
7648 /* Build the SYMBOL_REF for __tls_get_addr. */
7649
7650 static GTY(()) rtx tls_get_addr_libfunc;
7651
7652 static rtx
7653 get_tls_get_addr (void)
7654 {
7655 if (!tls_get_addr_libfunc)
7656 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7657 return tls_get_addr_libfunc;
7658 }
7659
7660 rtx
7661 arm_load_tp (rtx target)
7662 {
7663 if (!target)
7664 target = gen_reg_rtx (SImode);
7665
7666 if (TARGET_HARD_TP)
7667 {
7668 /* Can return in any reg. */
7669 emit_insn (gen_load_tp_hard (target));
7670 }
7671 else
7672 {
7673 /* Always returned in r0. Immediately copy the result into a pseudo,
7674 otherwise other uses of r0 (e.g. setting up function arguments) may
7675 clobber the value. */
7676
7677 rtx tmp;
7678
7679 emit_insn (gen_load_tp_soft ());
7680
7681 tmp = gen_rtx_REG (SImode, R0_REGNUM);
7682 emit_move_insn (target, tmp);
7683 }
7684 return target;
7685 }
7686
7687 static rtx
7688 load_tls_operand (rtx x, rtx reg)
7689 {
7690 rtx tmp;
7691
7692 if (reg == NULL_RTX)
7693 reg = gen_reg_rtx (SImode);
7694
7695 tmp = gen_rtx_CONST (SImode, x);
7696
7697 emit_move_insn (reg, tmp);
7698
7699 return reg;
7700 }
7701
7702 static rtx
7703 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7704 {
7705 rtx insns, label, labelno, sum;
7706
7707 gcc_assert (reloc != TLS_DESCSEQ);
7708 start_sequence ();
7709
7710 labelno = GEN_INT (pic_labelno++);
7711 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7712 label = gen_rtx_CONST (VOIDmode, label);
7713
7714 sum = gen_rtx_UNSPEC (Pmode,
7715 gen_rtvec (4, x, GEN_INT (reloc), label,
7716 GEN_INT (TARGET_ARM ? 8 : 4)),
7717 UNSPEC_TLS);
7718 reg = load_tls_operand (sum, reg);
7719
7720 if (TARGET_ARM)
7721 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7722 else
7723 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7724
7725 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7726 LCT_PURE, /* LCT_CONST? */
7727 Pmode, 1, reg, Pmode);
7728
7729 insns = get_insns ();
7730 end_sequence ();
7731
7732 return insns;
7733 }
7734
7735 static rtx
7736 arm_tls_descseq_addr (rtx x, rtx reg)
7737 {
7738 rtx labelno = GEN_INT (pic_labelno++);
7739 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7740 rtx sum = gen_rtx_UNSPEC (Pmode,
7741 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7742 gen_rtx_CONST (VOIDmode, label),
7743 GEN_INT (!TARGET_ARM)),
7744 UNSPEC_TLS);
7745 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7746
7747 emit_insn (gen_tlscall (x, labelno));
7748 if (!reg)
7749 reg = gen_reg_rtx (SImode);
7750 else
7751 gcc_assert (REGNO (reg) != R0_REGNUM);
7752
7753 emit_move_insn (reg, reg0);
7754
7755 return reg;
7756 }
7757
7758 rtx
7759 legitimize_tls_address (rtx x, rtx reg)
7760 {
7761 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7762 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7763
7764 switch (model)
7765 {
7766 case TLS_MODEL_GLOBAL_DYNAMIC:
7767 if (TARGET_GNU2_TLS)
7768 {
7769 reg = arm_tls_descseq_addr (x, reg);
7770
7771 tp = arm_load_tp (NULL_RTX);
7772
7773 dest = gen_rtx_PLUS (Pmode, tp, reg);
7774 }
7775 else
7776 {
7777 /* Original scheme */
7778 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7779 dest = gen_reg_rtx (Pmode);
7780 emit_libcall_block (insns, dest, ret, x);
7781 }
7782 return dest;
7783
7784 case TLS_MODEL_LOCAL_DYNAMIC:
7785 if (TARGET_GNU2_TLS)
7786 {
7787 reg = arm_tls_descseq_addr (x, reg);
7788
7789 tp = arm_load_tp (NULL_RTX);
7790
7791 dest = gen_rtx_PLUS (Pmode, tp, reg);
7792 }
7793 else
7794 {
7795 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7796
7797 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7798 share the LDM result with other LD model accesses. */
7799 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7800 UNSPEC_TLS);
7801 dest = gen_reg_rtx (Pmode);
7802 emit_libcall_block (insns, dest, ret, eqv);
7803
7804 /* Load the addend. */
7805 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7806 GEN_INT (TLS_LDO32)),
7807 UNSPEC_TLS);
7808 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7809 dest = gen_rtx_PLUS (Pmode, dest, addend);
7810 }
7811 return dest;
7812
7813 case TLS_MODEL_INITIAL_EXEC:
7814 labelno = GEN_INT (pic_labelno++);
7815 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7816 label = gen_rtx_CONST (VOIDmode, label);
7817 sum = gen_rtx_UNSPEC (Pmode,
7818 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7819 GEN_INT (TARGET_ARM ? 8 : 4)),
7820 UNSPEC_TLS);
7821 reg = load_tls_operand (sum, reg);
7822
7823 if (TARGET_ARM)
7824 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7825 else if (TARGET_THUMB2)
7826 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7827 else
7828 {
7829 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7830 emit_move_insn (reg, gen_const_mem (SImode, reg));
7831 }
7832
7833 tp = arm_load_tp (NULL_RTX);
7834
7835 return gen_rtx_PLUS (Pmode, tp, reg);
7836
7837 case TLS_MODEL_LOCAL_EXEC:
7838 tp = arm_load_tp (NULL_RTX);
7839
7840 reg = gen_rtx_UNSPEC (Pmode,
7841 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7842 UNSPEC_TLS);
7843 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7844
7845 return gen_rtx_PLUS (Pmode, tp, reg);
7846
7847 default:
7848 abort ();
7849 }
7850 }
7851
7852 /* Try machine-dependent ways of modifying an illegitimate address
7853 to be legitimate. If we find one, return the new, valid address. */
7854 rtx
7855 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7856 {
7857 if (arm_tls_referenced_p (x))
7858 {
7859 rtx addend = NULL;
7860
7861 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7862 {
7863 addend = XEXP (XEXP (x, 0), 1);
7864 x = XEXP (XEXP (x, 0), 0);
7865 }
7866
7867 if (GET_CODE (x) != SYMBOL_REF)
7868 return x;
7869
7870 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7871
7872 x = legitimize_tls_address (x, NULL_RTX);
7873
7874 if (addend)
7875 {
7876 x = gen_rtx_PLUS (SImode, x, addend);
7877 orig_x = x;
7878 }
7879 else
7880 return x;
7881 }
7882
7883 if (!TARGET_ARM)
7884 {
7885 /* TODO: legitimize_address for Thumb2. */
7886 if (TARGET_THUMB2)
7887 return x;
7888 return thumb_legitimize_address (x, orig_x, mode);
7889 }
7890
7891 if (GET_CODE (x) == PLUS)
7892 {
7893 rtx xop0 = XEXP (x, 0);
7894 rtx xop1 = XEXP (x, 1);
7895
7896 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7897 xop0 = force_reg (SImode, xop0);
7898
7899 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7900 && !symbol_mentioned_p (xop1))
7901 xop1 = force_reg (SImode, xop1);
7902
7903 if (ARM_BASE_REGISTER_RTX_P (xop0)
7904 && CONST_INT_P (xop1))
7905 {
7906 HOST_WIDE_INT n, low_n;
7907 rtx base_reg, val;
7908 n = INTVAL (xop1);
7909
7910 /* VFP addressing modes actually allow greater offsets, but for
7911 now we just stick with the lowest common denominator. */
7912 if (mode == DImode
7913 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7914 {
7915 low_n = n & 0x0f;
7916 n &= ~0x0f;
7917 if (low_n > 4)
7918 {
7919 n += 16;
7920 low_n -= 16;
7921 }
7922 }
7923 else
7924 {
7925 low_n = ((mode) == TImode ? 0
7926 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7927 n -= low_n;
7928 }
7929
7930 base_reg = gen_reg_rtx (SImode);
7931 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7932 emit_move_insn (base_reg, val);
7933 x = plus_constant (Pmode, base_reg, low_n);
7934 }
7935 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7936 x = gen_rtx_PLUS (SImode, xop0, xop1);
7937 }
7938
7939 /* XXX We don't allow MINUS any more -- see comment in
7940 arm_legitimate_address_outer_p (). */
7941 else if (GET_CODE (x) == MINUS)
7942 {
7943 rtx xop0 = XEXP (x, 0);
7944 rtx xop1 = XEXP (x, 1);
7945
7946 if (CONSTANT_P (xop0))
7947 xop0 = force_reg (SImode, xop0);
7948
7949 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7950 xop1 = force_reg (SImode, xop1);
7951
7952 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7953 x = gen_rtx_MINUS (SImode, xop0, xop1);
7954 }
7955
7956 /* Make sure to take full advantage of the pre-indexed addressing mode
7957 with absolute addresses which often allows for the base register to
7958 be factorized for multiple adjacent memory references, and it might
7959 even allows for the mini pool to be avoided entirely. */
7960 else if (CONST_INT_P (x) && optimize > 0)
7961 {
7962 unsigned int bits;
7963 HOST_WIDE_INT mask, base, index;
7964 rtx base_reg;
7965
7966 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7967 use a 8-bit index. So let's use a 12-bit index for SImode only and
7968 hope that arm_gen_constant will enable ldrb to use more bits. */
7969 bits = (mode == SImode) ? 12 : 8;
7970 mask = (1 << bits) - 1;
7971 base = INTVAL (x) & ~mask;
7972 index = INTVAL (x) & mask;
7973 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7974 {
7975 /* It'll most probably be more efficient to generate the base
7976 with more bits set and use a negative index instead. */
7977 base |= mask;
7978 index -= mask;
7979 }
7980 base_reg = force_reg (SImode, GEN_INT (base));
7981 x = plus_constant (Pmode, base_reg, index);
7982 }
7983
7984 if (flag_pic)
7985 {
7986 /* We need to find and carefully transform any SYMBOL and LABEL
7987 references; so go back to the original address expression. */
7988 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7989
7990 if (new_x != orig_x)
7991 x = new_x;
7992 }
7993
7994 return x;
7995 }
7996
7997
7998 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7999 to be legitimate. If we find one, return the new, valid address. */
8000 rtx
8001 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8002 {
8003 if (GET_CODE (x) == PLUS
8004 && CONST_INT_P (XEXP (x, 1))
8005 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8006 || INTVAL (XEXP (x, 1)) < 0))
8007 {
8008 rtx xop0 = XEXP (x, 0);
8009 rtx xop1 = XEXP (x, 1);
8010 HOST_WIDE_INT offset = INTVAL (xop1);
8011
8012 /* Try and fold the offset into a biasing of the base register and
8013 then offsetting that. Don't do this when optimizing for space
8014 since it can cause too many CSEs. */
8015 if (optimize_size && offset >= 0
8016 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8017 {
8018 HOST_WIDE_INT delta;
8019
8020 if (offset >= 256)
8021 delta = offset - (256 - GET_MODE_SIZE (mode));
8022 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8023 delta = 31 * GET_MODE_SIZE (mode);
8024 else
8025 delta = offset & (~31 * GET_MODE_SIZE (mode));
8026
8027 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8028 NULL_RTX);
8029 x = plus_constant (Pmode, xop0, delta);
8030 }
8031 else if (offset < 0 && offset > -256)
8032 /* Small negative offsets are best done with a subtract before the
8033 dereference, forcing these into a register normally takes two
8034 instructions. */
8035 x = force_operand (x, NULL_RTX);
8036 else
8037 {
8038 /* For the remaining cases, force the constant into a register. */
8039 xop1 = force_reg (SImode, xop1);
8040 x = gen_rtx_PLUS (SImode, xop0, xop1);
8041 }
8042 }
8043 else if (GET_CODE (x) == PLUS
8044 && s_register_operand (XEXP (x, 1), SImode)
8045 && !s_register_operand (XEXP (x, 0), SImode))
8046 {
8047 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8048
8049 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8050 }
8051
8052 if (flag_pic)
8053 {
8054 /* We need to find and carefully transform any SYMBOL and LABEL
8055 references; so go back to the original address expression. */
8056 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8057
8058 if (new_x != orig_x)
8059 x = new_x;
8060 }
8061
8062 return x;
8063 }
8064
8065 /* Return TRUE if X contains any TLS symbol references. */
8066
8067 bool
8068 arm_tls_referenced_p (rtx x)
8069 {
8070 if (! TARGET_HAVE_TLS)
8071 return false;
8072
8073 subrtx_iterator::array_type array;
8074 FOR_EACH_SUBRTX (iter, array, x, ALL)
8075 {
8076 const_rtx x = *iter;
8077 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8078 return true;
8079
8080 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8081 TLS offsets, not real symbol references. */
8082 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8083 iter.skip_subrtxes ();
8084 }
8085 return false;
8086 }
8087
8088 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8089
8090 On the ARM, allow any integer (invalid ones are removed later by insn
8091 patterns), nice doubles and symbol_refs which refer to the function's
8092 constant pool XXX.
8093
8094 When generating pic allow anything. */
8095
8096 static bool
8097 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8098 {
8099 return flag_pic || !label_mentioned_p (x);
8100 }
8101
8102 static bool
8103 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8104 {
8105 return (CONST_INT_P (x)
8106 || CONST_DOUBLE_P (x)
8107 || CONSTANT_ADDRESS_P (x)
8108 || flag_pic);
8109 }
8110
8111 static bool
8112 arm_legitimate_constant_p (machine_mode mode, rtx x)
8113 {
8114 return (!arm_cannot_force_const_mem (mode, x)
8115 && (TARGET_32BIT
8116 ? arm_legitimate_constant_p_1 (mode, x)
8117 : thumb_legitimate_constant_p (mode, x)));
8118 }
8119
8120 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8121
8122 static bool
8123 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8124 {
8125 rtx base, offset;
8126
8127 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8128 {
8129 split_const (x, &base, &offset);
8130 if (GET_CODE (base) == SYMBOL_REF
8131 && !offset_within_block_p (base, INTVAL (offset)))
8132 return true;
8133 }
8134 return arm_tls_referenced_p (x);
8135 }
8136 \f
8137 #define REG_OR_SUBREG_REG(X) \
8138 (REG_P (X) \
8139 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8140
8141 #define REG_OR_SUBREG_RTX(X) \
8142 (REG_P (X) ? (X) : SUBREG_REG (X))
8143
8144 static inline int
8145 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8146 {
8147 machine_mode mode = GET_MODE (x);
8148 int total, words;
8149
8150 switch (code)
8151 {
8152 case ASHIFT:
8153 case ASHIFTRT:
8154 case LSHIFTRT:
8155 case ROTATERT:
8156 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8157
8158 case PLUS:
8159 case MINUS:
8160 case COMPARE:
8161 case NEG:
8162 case NOT:
8163 return COSTS_N_INSNS (1);
8164
8165 case MULT:
8166 if (CONST_INT_P (XEXP (x, 1)))
8167 {
8168 int cycles = 0;
8169 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8170
8171 while (i)
8172 {
8173 i >>= 2;
8174 cycles++;
8175 }
8176 return COSTS_N_INSNS (2) + cycles;
8177 }
8178 return COSTS_N_INSNS (1) + 16;
8179
8180 case SET:
8181 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8182 the mode. */
8183 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8184 return (COSTS_N_INSNS (words)
8185 + 4 * ((MEM_P (SET_SRC (x)))
8186 + MEM_P (SET_DEST (x))));
8187
8188 case CONST_INT:
8189 if (outer == SET)
8190 {
8191 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8192 return 0;
8193 if (thumb_shiftable_const (INTVAL (x)))
8194 return COSTS_N_INSNS (2);
8195 return COSTS_N_INSNS (3);
8196 }
8197 else if ((outer == PLUS || outer == COMPARE)
8198 && INTVAL (x) < 256 && INTVAL (x) > -256)
8199 return 0;
8200 else if ((outer == IOR || outer == XOR || outer == AND)
8201 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8202 return COSTS_N_INSNS (1);
8203 else if (outer == AND)
8204 {
8205 int i;
8206 /* This duplicates the tests in the andsi3 expander. */
8207 for (i = 9; i <= 31; i++)
8208 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8209 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8210 return COSTS_N_INSNS (2);
8211 }
8212 else if (outer == ASHIFT || outer == ASHIFTRT
8213 || outer == LSHIFTRT)
8214 return 0;
8215 return COSTS_N_INSNS (2);
8216
8217 case CONST:
8218 case CONST_DOUBLE:
8219 case LABEL_REF:
8220 case SYMBOL_REF:
8221 return COSTS_N_INSNS (3);
8222
8223 case UDIV:
8224 case UMOD:
8225 case DIV:
8226 case MOD:
8227 return 100;
8228
8229 case TRUNCATE:
8230 return 99;
8231
8232 case AND:
8233 case XOR:
8234 case IOR:
8235 /* XXX guess. */
8236 return 8;
8237
8238 case MEM:
8239 /* XXX another guess. */
8240 /* Memory costs quite a lot for the first word, but subsequent words
8241 load at the equivalent of a single insn each. */
8242 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8243 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8244 ? 4 : 0));
8245
8246 case IF_THEN_ELSE:
8247 /* XXX a guess. */
8248 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8249 return 14;
8250 return 2;
8251
8252 case SIGN_EXTEND:
8253 case ZERO_EXTEND:
8254 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8255 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8256
8257 if (mode == SImode)
8258 return total;
8259
8260 if (arm_arch6)
8261 return total + COSTS_N_INSNS (1);
8262
8263 /* Assume a two-shift sequence. Increase the cost slightly so
8264 we prefer actual shifts over an extend operation. */
8265 return total + 1 + COSTS_N_INSNS (2);
8266
8267 default:
8268 return 99;
8269 }
8270 }
8271
8272 static inline bool
8273 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8274 {
8275 machine_mode mode = GET_MODE (x);
8276 enum rtx_code subcode;
8277 rtx operand;
8278 enum rtx_code code = GET_CODE (x);
8279 *total = 0;
8280
8281 switch (code)
8282 {
8283 case MEM:
8284 /* Memory costs quite a lot for the first word, but subsequent words
8285 load at the equivalent of a single insn each. */
8286 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8287 return true;
8288
8289 case DIV:
8290 case MOD:
8291 case UDIV:
8292 case UMOD:
8293 if (TARGET_HARD_FLOAT && mode == SFmode)
8294 *total = COSTS_N_INSNS (2);
8295 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8296 *total = COSTS_N_INSNS (4);
8297 else
8298 *total = COSTS_N_INSNS (20);
8299 return false;
8300
8301 case ROTATE:
8302 if (REG_P (XEXP (x, 1)))
8303 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8304 else if (!CONST_INT_P (XEXP (x, 1)))
8305 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8306
8307 /* Fall through */
8308 case ROTATERT:
8309 if (mode != SImode)
8310 {
8311 *total += COSTS_N_INSNS (4);
8312 return true;
8313 }
8314
8315 /* Fall through */
8316 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8317 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8318 if (mode == DImode)
8319 {
8320 *total += COSTS_N_INSNS (3);
8321 return true;
8322 }
8323
8324 *total += COSTS_N_INSNS (1);
8325 /* Increase the cost of complex shifts because they aren't any faster,
8326 and reduce dual issue opportunities. */
8327 if (arm_tune_cortex_a9
8328 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8329 ++*total;
8330
8331 return true;
8332
8333 case MINUS:
8334 if (mode == DImode)
8335 {
8336 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8337 if (CONST_INT_P (XEXP (x, 0))
8338 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8339 {
8340 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8341 return true;
8342 }
8343
8344 if (CONST_INT_P (XEXP (x, 1))
8345 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8346 {
8347 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8348 return true;
8349 }
8350
8351 return false;
8352 }
8353
8354 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8355 {
8356 if (TARGET_HARD_FLOAT
8357 && (mode == SFmode
8358 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8359 {
8360 *total = COSTS_N_INSNS (1);
8361 if (CONST_DOUBLE_P (XEXP (x, 0))
8362 && arm_const_double_rtx (XEXP (x, 0)))
8363 {
8364 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8365 return true;
8366 }
8367
8368 if (CONST_DOUBLE_P (XEXP (x, 1))
8369 && arm_const_double_rtx (XEXP (x, 1)))
8370 {
8371 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8372 return true;
8373 }
8374
8375 return false;
8376 }
8377 *total = COSTS_N_INSNS (20);
8378 return false;
8379 }
8380
8381 *total = COSTS_N_INSNS (1);
8382 if (CONST_INT_P (XEXP (x, 0))
8383 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8384 {
8385 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8386 return true;
8387 }
8388
8389 subcode = GET_CODE (XEXP (x, 1));
8390 if (subcode == ASHIFT || subcode == ASHIFTRT
8391 || subcode == LSHIFTRT
8392 || subcode == ROTATE || subcode == ROTATERT)
8393 {
8394 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8395 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8396 return true;
8397 }
8398
8399 /* A shift as a part of RSB costs no more than RSB itself. */
8400 if (GET_CODE (XEXP (x, 0)) == MULT
8401 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8402 {
8403 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8404 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8405 return true;
8406 }
8407
8408 if (subcode == MULT
8409 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8410 {
8411 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8412 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8413 return true;
8414 }
8415
8416 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8417 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8418 {
8419 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8420 if (REG_P (XEXP (XEXP (x, 1), 0))
8421 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8422 *total += COSTS_N_INSNS (1);
8423
8424 return true;
8425 }
8426
8427 /* Fall through */
8428
8429 case PLUS:
8430 if (code == PLUS && arm_arch6 && mode == SImode
8431 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8432 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8433 {
8434 *total = COSTS_N_INSNS (1);
8435 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8436 0, speed);
8437 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8438 return true;
8439 }
8440
8441 /* MLA: All arguments must be registers. We filter out
8442 multiplication by a power of two, so that we fall down into
8443 the code below. */
8444 if (GET_CODE (XEXP (x, 0)) == MULT
8445 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8446 {
8447 /* The cost comes from the cost of the multiply. */
8448 return false;
8449 }
8450
8451 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8452 {
8453 if (TARGET_HARD_FLOAT
8454 && (mode == SFmode
8455 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8456 {
8457 *total = COSTS_N_INSNS (1);
8458 if (CONST_DOUBLE_P (XEXP (x, 1))
8459 && arm_const_double_rtx (XEXP (x, 1)))
8460 {
8461 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8462 return true;
8463 }
8464
8465 return false;
8466 }
8467
8468 *total = COSTS_N_INSNS (20);
8469 return false;
8470 }
8471
8472 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8473 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8474 {
8475 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8476 if (REG_P (XEXP (XEXP (x, 0), 0))
8477 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8478 *total += COSTS_N_INSNS (1);
8479 return true;
8480 }
8481
8482 /* Fall through */
8483
8484 case AND: case XOR: case IOR:
8485
8486 /* Normally the frame registers will be spilt into reg+const during
8487 reload, so it is a bad idea to combine them with other instructions,
8488 since then they might not be moved outside of loops. As a compromise
8489 we allow integration with ops that have a constant as their second
8490 operand. */
8491 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8492 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8493 && !CONST_INT_P (XEXP (x, 1)))
8494 *total = COSTS_N_INSNS (1);
8495
8496 if (mode == DImode)
8497 {
8498 *total += COSTS_N_INSNS (2);
8499 if (CONST_INT_P (XEXP (x, 1))
8500 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8501 {
8502 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8503 return true;
8504 }
8505
8506 return false;
8507 }
8508
8509 *total += COSTS_N_INSNS (1);
8510 if (CONST_INT_P (XEXP (x, 1))
8511 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8512 {
8513 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8514 return true;
8515 }
8516 subcode = GET_CODE (XEXP (x, 0));
8517 if (subcode == ASHIFT || subcode == ASHIFTRT
8518 || subcode == LSHIFTRT
8519 || subcode == ROTATE || subcode == ROTATERT)
8520 {
8521 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8522 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8523 return true;
8524 }
8525
8526 if (subcode == MULT
8527 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8528 {
8529 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8530 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8531 return true;
8532 }
8533
8534 if (subcode == UMIN || subcode == UMAX
8535 || subcode == SMIN || subcode == SMAX)
8536 {
8537 *total = COSTS_N_INSNS (3);
8538 return true;
8539 }
8540
8541 return false;
8542
8543 case MULT:
8544 /* This should have been handled by the CPU specific routines. */
8545 gcc_unreachable ();
8546
8547 case TRUNCATE:
8548 if (arm_arch3m && mode == SImode
8549 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8550 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8551 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8552 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8553 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8554 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8555 {
8556 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8557 return true;
8558 }
8559 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8560 return false;
8561
8562 case NEG:
8563 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8564 {
8565 if (TARGET_HARD_FLOAT
8566 && (mode == SFmode
8567 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8568 {
8569 *total = COSTS_N_INSNS (1);
8570 return false;
8571 }
8572 *total = COSTS_N_INSNS (2);
8573 return false;
8574 }
8575
8576 /* Fall through */
8577 case NOT:
8578 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8579 if (mode == SImode && code == NOT)
8580 {
8581 subcode = GET_CODE (XEXP (x, 0));
8582 if (subcode == ASHIFT || subcode == ASHIFTRT
8583 || subcode == LSHIFTRT
8584 || subcode == ROTATE || subcode == ROTATERT
8585 || (subcode == MULT
8586 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8587 {
8588 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8589 /* Register shifts cost an extra cycle. */
8590 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8591 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8592 subcode, 1, speed);
8593 return true;
8594 }
8595 }
8596
8597 return false;
8598
8599 case IF_THEN_ELSE:
8600 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8601 {
8602 *total = COSTS_N_INSNS (4);
8603 return true;
8604 }
8605
8606 operand = XEXP (x, 0);
8607
8608 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8609 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8610 && REG_P (XEXP (operand, 0))
8611 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8612 *total += COSTS_N_INSNS (1);
8613 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8614 + rtx_cost (XEXP (x, 2), code, 2, speed));
8615 return true;
8616
8617 case NE:
8618 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8619 {
8620 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8621 return true;
8622 }
8623 goto scc_insn;
8624
8625 case GE:
8626 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8627 && mode == SImode && XEXP (x, 1) == const0_rtx)
8628 {
8629 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8630 return true;
8631 }
8632 goto scc_insn;
8633
8634 case LT:
8635 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8636 && mode == SImode && XEXP (x, 1) == const0_rtx)
8637 {
8638 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8639 return true;
8640 }
8641 goto scc_insn;
8642
8643 case EQ:
8644 case GT:
8645 case LE:
8646 case GEU:
8647 case LTU:
8648 case GTU:
8649 case LEU:
8650 case UNORDERED:
8651 case ORDERED:
8652 case UNEQ:
8653 case UNGE:
8654 case UNLT:
8655 case UNGT:
8656 case UNLE:
8657 scc_insn:
8658 /* SCC insns. In the case where the comparison has already been
8659 performed, then they cost 2 instructions. Otherwise they need
8660 an additional comparison before them. */
8661 *total = COSTS_N_INSNS (2);
8662 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8663 {
8664 return true;
8665 }
8666
8667 /* Fall through */
8668 case COMPARE:
8669 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8670 {
8671 *total = 0;
8672 return true;
8673 }
8674
8675 *total += COSTS_N_INSNS (1);
8676 if (CONST_INT_P (XEXP (x, 1))
8677 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8678 {
8679 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8680 return true;
8681 }
8682
8683 subcode = GET_CODE (XEXP (x, 0));
8684 if (subcode == ASHIFT || subcode == ASHIFTRT
8685 || subcode == LSHIFTRT
8686 || subcode == ROTATE || subcode == ROTATERT)
8687 {
8688 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8689 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8690 return true;
8691 }
8692
8693 if (subcode == MULT
8694 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8695 {
8696 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8697 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8698 return true;
8699 }
8700
8701 return false;
8702
8703 case UMIN:
8704 case UMAX:
8705 case SMIN:
8706 case SMAX:
8707 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8708 if (!CONST_INT_P (XEXP (x, 1))
8709 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8710 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8711 return true;
8712
8713 case ABS:
8714 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8715 {
8716 if (TARGET_HARD_FLOAT
8717 && (mode == SFmode
8718 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8719 {
8720 *total = COSTS_N_INSNS (1);
8721 return false;
8722 }
8723 *total = COSTS_N_INSNS (20);
8724 return false;
8725 }
8726 *total = COSTS_N_INSNS (1);
8727 if (mode == DImode)
8728 *total += COSTS_N_INSNS (3);
8729 return false;
8730
8731 case SIGN_EXTEND:
8732 case ZERO_EXTEND:
8733 *total = 0;
8734 if (GET_MODE_CLASS (mode) == MODE_INT)
8735 {
8736 rtx op = XEXP (x, 0);
8737 machine_mode opmode = GET_MODE (op);
8738
8739 if (mode == DImode)
8740 *total += COSTS_N_INSNS (1);
8741
8742 if (opmode != SImode)
8743 {
8744 if (MEM_P (op))
8745 {
8746 /* If !arm_arch4, we use one of the extendhisi2_mem
8747 or movhi_bytes patterns for HImode. For a QImode
8748 sign extension, we first zero-extend from memory
8749 and then perform a shift sequence. */
8750 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8751 *total += COSTS_N_INSNS (2);
8752 }
8753 else if (arm_arch6)
8754 *total += COSTS_N_INSNS (1);
8755
8756 /* We don't have the necessary insn, so we need to perform some
8757 other operation. */
8758 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8759 /* An and with constant 255. */
8760 *total += COSTS_N_INSNS (1);
8761 else
8762 /* A shift sequence. Increase costs slightly to avoid
8763 combining two shifts into an extend operation. */
8764 *total += COSTS_N_INSNS (2) + 1;
8765 }
8766
8767 return false;
8768 }
8769
8770 switch (GET_MODE (XEXP (x, 0)))
8771 {
8772 case V8QImode:
8773 case V4HImode:
8774 case V2SImode:
8775 case V4QImode:
8776 case V2HImode:
8777 *total = COSTS_N_INSNS (1);
8778 return false;
8779
8780 default:
8781 gcc_unreachable ();
8782 }
8783 gcc_unreachable ();
8784
8785 case ZERO_EXTRACT:
8786 case SIGN_EXTRACT:
8787 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8788 return true;
8789
8790 case CONST_INT:
8791 if (const_ok_for_arm (INTVAL (x))
8792 || const_ok_for_arm (~INTVAL (x)))
8793 *total = COSTS_N_INSNS (1);
8794 else
8795 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8796 INTVAL (x), NULL_RTX,
8797 NULL_RTX, 0, 0));
8798 return true;
8799
8800 case CONST:
8801 case LABEL_REF:
8802 case SYMBOL_REF:
8803 *total = COSTS_N_INSNS (3);
8804 return true;
8805
8806 case HIGH:
8807 *total = COSTS_N_INSNS (1);
8808 return true;
8809
8810 case LO_SUM:
8811 *total = COSTS_N_INSNS (1);
8812 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8813 return true;
8814
8815 case CONST_DOUBLE:
8816 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8817 && (mode == SFmode || !TARGET_VFP_SINGLE))
8818 *total = COSTS_N_INSNS (1);
8819 else
8820 *total = COSTS_N_INSNS (4);
8821 return true;
8822
8823 case SET:
8824 /* The vec_extract patterns accept memory operands that require an
8825 address reload. Account for the cost of that reload to give the
8826 auto-inc-dec pass an incentive to try to replace them. */
8827 if (TARGET_NEON && MEM_P (SET_DEST (x))
8828 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8829 {
8830 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8831 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8832 *total += COSTS_N_INSNS (1);
8833 return true;
8834 }
8835 /* Likewise for the vec_set patterns. */
8836 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8837 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8838 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8839 {
8840 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8841 *total = rtx_cost (mem, code, 0, speed);
8842 if (!neon_vector_mem_operand (mem, 2, true))
8843 *total += COSTS_N_INSNS (1);
8844 return true;
8845 }
8846 return false;
8847
8848 case UNSPEC:
8849 /* We cost this as high as our memory costs to allow this to
8850 be hoisted from loops. */
8851 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8852 {
8853 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8854 }
8855 return true;
8856
8857 case CONST_VECTOR:
8858 if (TARGET_NEON
8859 && TARGET_HARD_FLOAT
8860 && outer == SET
8861 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8862 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8863 *total = COSTS_N_INSNS (1);
8864 else
8865 *total = COSTS_N_INSNS (4);
8866 return true;
8867
8868 default:
8869 *total = COSTS_N_INSNS (4);
8870 return false;
8871 }
8872 }
8873
8874 /* Estimates the size cost of thumb1 instructions.
8875 For now most of the code is copied from thumb1_rtx_costs. We need more
8876 fine grain tuning when we have more related test cases. */
8877 static inline int
8878 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8879 {
8880 machine_mode mode = GET_MODE (x);
8881 int words;
8882
8883 switch (code)
8884 {
8885 case ASHIFT:
8886 case ASHIFTRT:
8887 case LSHIFTRT:
8888 case ROTATERT:
8889 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8890
8891 case PLUS:
8892 case MINUS:
8893 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8894 defined by RTL expansion, especially for the expansion of
8895 multiplication. */
8896 if ((GET_CODE (XEXP (x, 0)) == MULT
8897 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8898 || (GET_CODE (XEXP (x, 1)) == MULT
8899 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8900 return COSTS_N_INSNS (2);
8901 /* On purpose fall through for normal RTX. */
8902 case COMPARE:
8903 case NEG:
8904 case NOT:
8905 return COSTS_N_INSNS (1);
8906
8907 case MULT:
8908 if (CONST_INT_P (XEXP (x, 1)))
8909 {
8910 /* Thumb1 mul instruction can't operate on const. We must Load it
8911 into a register first. */
8912 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8913 /* For the targets which have a very small and high-latency multiply
8914 unit, we prefer to synthesize the mult with up to 5 instructions,
8915 giving a good balance between size and performance. */
8916 if (arm_arch6m && arm_m_profile_small_mul)
8917 return COSTS_N_INSNS (5);
8918 else
8919 return COSTS_N_INSNS (1) + const_size;
8920 }
8921 return COSTS_N_INSNS (1);
8922
8923 case SET:
8924 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8925 the mode. */
8926 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8927 return COSTS_N_INSNS (words)
8928 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8929 || satisfies_constraint_K (SET_SRC (x))
8930 /* thumb1_movdi_insn. */
8931 || ((words > 1) && MEM_P (SET_SRC (x))));
8932
8933 case CONST_INT:
8934 if (outer == SET)
8935 {
8936 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8937 return COSTS_N_INSNS (1);
8938 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8939 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8940 return COSTS_N_INSNS (2);
8941 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8942 if (thumb_shiftable_const (INTVAL (x)))
8943 return COSTS_N_INSNS (2);
8944 return COSTS_N_INSNS (3);
8945 }
8946 else if ((outer == PLUS || outer == COMPARE)
8947 && INTVAL (x) < 256 && INTVAL (x) > -256)
8948 return 0;
8949 else if ((outer == IOR || outer == XOR || outer == AND)
8950 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8951 return COSTS_N_INSNS (1);
8952 else if (outer == AND)
8953 {
8954 int i;
8955 /* This duplicates the tests in the andsi3 expander. */
8956 for (i = 9; i <= 31; i++)
8957 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8958 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8959 return COSTS_N_INSNS (2);
8960 }
8961 else if (outer == ASHIFT || outer == ASHIFTRT
8962 || outer == LSHIFTRT)
8963 return 0;
8964 return COSTS_N_INSNS (2);
8965
8966 case CONST:
8967 case CONST_DOUBLE:
8968 case LABEL_REF:
8969 case SYMBOL_REF:
8970 return COSTS_N_INSNS (3);
8971
8972 case UDIV:
8973 case UMOD:
8974 case DIV:
8975 case MOD:
8976 return 100;
8977
8978 case TRUNCATE:
8979 return 99;
8980
8981 case AND:
8982 case XOR:
8983 case IOR:
8984 return COSTS_N_INSNS (1);
8985
8986 case MEM:
8987 return (COSTS_N_INSNS (1)
8988 + COSTS_N_INSNS (1)
8989 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8990 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8991 ? COSTS_N_INSNS (1) : 0));
8992
8993 case IF_THEN_ELSE:
8994 /* XXX a guess. */
8995 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8996 return 14;
8997 return 2;
8998
8999 case ZERO_EXTEND:
9000 /* XXX still guessing. */
9001 switch (GET_MODE (XEXP (x, 0)))
9002 {
9003 case QImode:
9004 return (1 + (mode == DImode ? 4 : 0)
9005 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9006
9007 case HImode:
9008 return (4 + (mode == DImode ? 4 : 0)
9009 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9010
9011 case SImode:
9012 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9013
9014 default:
9015 return 99;
9016 }
9017
9018 default:
9019 return 99;
9020 }
9021 }
9022
9023 /* RTX costs when optimizing for size. */
9024 static bool
9025 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9026 int *total)
9027 {
9028 machine_mode mode = GET_MODE (x);
9029 if (TARGET_THUMB1)
9030 {
9031 *total = thumb1_size_rtx_costs (x, code, outer_code);
9032 return true;
9033 }
9034
9035 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9036 switch (code)
9037 {
9038 case MEM:
9039 /* A memory access costs 1 insn if the mode is small, or the address is
9040 a single register, otherwise it costs one insn per word. */
9041 if (REG_P (XEXP (x, 0)))
9042 *total = COSTS_N_INSNS (1);
9043 else if (flag_pic
9044 && GET_CODE (XEXP (x, 0)) == PLUS
9045 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9046 /* This will be split into two instructions.
9047 See arm.md:calculate_pic_address. */
9048 *total = COSTS_N_INSNS (2);
9049 else
9050 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9051 return true;
9052
9053 case DIV:
9054 case MOD:
9055 case UDIV:
9056 case UMOD:
9057 /* Needs a libcall, so it costs about this. */
9058 *total = COSTS_N_INSNS (2);
9059 return false;
9060
9061 case ROTATE:
9062 if (mode == SImode && REG_P (XEXP (x, 1)))
9063 {
9064 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9065 return true;
9066 }
9067 /* Fall through */
9068 case ROTATERT:
9069 case ASHIFT:
9070 case LSHIFTRT:
9071 case ASHIFTRT:
9072 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9073 {
9074 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9075 return true;
9076 }
9077 else if (mode == SImode)
9078 {
9079 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9080 /* Slightly disparage register shifts, but not by much. */
9081 if (!CONST_INT_P (XEXP (x, 1)))
9082 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9083 return true;
9084 }
9085
9086 /* Needs a libcall. */
9087 *total = COSTS_N_INSNS (2);
9088 return false;
9089
9090 case MINUS:
9091 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9092 && (mode == SFmode || !TARGET_VFP_SINGLE))
9093 {
9094 *total = COSTS_N_INSNS (1);
9095 return false;
9096 }
9097
9098 if (mode == SImode)
9099 {
9100 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9101 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9102
9103 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9104 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9105 || subcode1 == ROTATE || subcode1 == ROTATERT
9106 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9107 || subcode1 == ASHIFTRT)
9108 {
9109 /* It's just the cost of the two operands. */
9110 *total = 0;
9111 return false;
9112 }
9113
9114 *total = COSTS_N_INSNS (1);
9115 return false;
9116 }
9117
9118 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9119 return false;
9120
9121 case PLUS:
9122 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9123 && (mode == SFmode || !TARGET_VFP_SINGLE))
9124 {
9125 *total = COSTS_N_INSNS (1);
9126 return false;
9127 }
9128
9129 /* A shift as a part of ADD costs nothing. */
9130 if (GET_CODE (XEXP (x, 0)) == MULT
9131 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9132 {
9133 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9134 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9135 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9136 return true;
9137 }
9138
9139 /* Fall through */
9140 case AND: case XOR: case IOR:
9141 if (mode == SImode)
9142 {
9143 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9144
9145 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9146 || subcode == LSHIFTRT || subcode == ASHIFTRT
9147 || (code == AND && subcode == NOT))
9148 {
9149 /* It's just the cost of the two operands. */
9150 *total = 0;
9151 return false;
9152 }
9153 }
9154
9155 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9156 return false;
9157
9158 case MULT:
9159 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9160 return false;
9161
9162 case NEG:
9163 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9164 && (mode == SFmode || !TARGET_VFP_SINGLE))
9165 {
9166 *total = COSTS_N_INSNS (1);
9167 return false;
9168 }
9169
9170 /* Fall through */
9171 case NOT:
9172 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9173
9174 return false;
9175
9176 case IF_THEN_ELSE:
9177 *total = 0;
9178 return false;
9179
9180 case COMPARE:
9181 if (cc_register (XEXP (x, 0), VOIDmode))
9182 * total = 0;
9183 else
9184 *total = COSTS_N_INSNS (1);
9185 return false;
9186
9187 case ABS:
9188 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9189 && (mode == SFmode || !TARGET_VFP_SINGLE))
9190 *total = COSTS_N_INSNS (1);
9191 else
9192 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9193 return false;
9194
9195 case SIGN_EXTEND:
9196 case ZERO_EXTEND:
9197 return arm_rtx_costs_1 (x, outer_code, total, 0);
9198
9199 case CONST_INT:
9200 if (const_ok_for_arm (INTVAL (x)))
9201 /* A multiplication by a constant requires another instruction
9202 to load the constant to a register. */
9203 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9204 ? 1 : 0);
9205 else if (const_ok_for_arm (~INTVAL (x)))
9206 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9207 else if (const_ok_for_arm (-INTVAL (x)))
9208 {
9209 if (outer_code == COMPARE || outer_code == PLUS
9210 || outer_code == MINUS)
9211 *total = 0;
9212 else
9213 *total = COSTS_N_INSNS (1);
9214 }
9215 else
9216 *total = COSTS_N_INSNS (2);
9217 return true;
9218
9219 case CONST:
9220 case LABEL_REF:
9221 case SYMBOL_REF:
9222 *total = COSTS_N_INSNS (2);
9223 return true;
9224
9225 case CONST_DOUBLE:
9226 *total = COSTS_N_INSNS (4);
9227 return true;
9228
9229 case CONST_VECTOR:
9230 if (TARGET_NEON
9231 && TARGET_HARD_FLOAT
9232 && outer_code == SET
9233 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9234 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9235 *total = COSTS_N_INSNS (1);
9236 else
9237 *total = COSTS_N_INSNS (4);
9238 return true;
9239
9240 case HIGH:
9241 case LO_SUM:
9242 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9243 cost of these slightly. */
9244 *total = COSTS_N_INSNS (1) + 1;
9245 return true;
9246
9247 case SET:
9248 return false;
9249
9250 default:
9251 if (mode != VOIDmode)
9252 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9253 else
9254 *total = COSTS_N_INSNS (4); /* How knows? */
9255 return false;
9256 }
9257 }
9258
9259 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9260 operand, then return the operand that is being shifted. If the shift
9261 is not by a constant, then set SHIFT_REG to point to the operand.
9262 Return NULL if OP is not a shifter operand. */
9263 static rtx
9264 shifter_op_p (rtx op, rtx *shift_reg)
9265 {
9266 enum rtx_code code = GET_CODE (op);
9267
9268 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9269 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9270 return XEXP (op, 0);
9271 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9272 return XEXP (op, 0);
9273 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9274 || code == ASHIFTRT)
9275 {
9276 if (!CONST_INT_P (XEXP (op, 1)))
9277 *shift_reg = XEXP (op, 1);
9278 return XEXP (op, 0);
9279 }
9280
9281 return NULL;
9282 }
9283
9284 static bool
9285 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9286 {
9287 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9288 rtx_code code = GET_CODE (x);
9289 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9290
9291 switch (XINT (x, 1))
9292 {
9293 case UNSPEC_UNALIGNED_LOAD:
9294 /* We can only do unaligned loads into the integer unit, and we can't
9295 use LDM or LDRD. */
9296 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9297 if (speed_p)
9298 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9299 + extra_cost->ldst.load_unaligned);
9300
9301 #ifdef NOT_YET
9302 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9303 ADDR_SPACE_GENERIC, speed_p);
9304 #endif
9305 return true;
9306
9307 case UNSPEC_UNALIGNED_STORE:
9308 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9309 if (speed_p)
9310 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9311 + extra_cost->ldst.store_unaligned);
9312
9313 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9314 #ifdef NOT_YET
9315 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9316 ADDR_SPACE_GENERIC, speed_p);
9317 #endif
9318 return true;
9319
9320 case UNSPEC_VRINTZ:
9321 case UNSPEC_VRINTP:
9322 case UNSPEC_VRINTM:
9323 case UNSPEC_VRINTR:
9324 case UNSPEC_VRINTX:
9325 case UNSPEC_VRINTA:
9326 *cost = COSTS_N_INSNS (1);
9327 if (speed_p)
9328 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9329
9330 return true;
9331 default:
9332 *cost = COSTS_N_INSNS (2);
9333 break;
9334 }
9335 return true;
9336 }
9337
9338 /* Cost of a libcall. We assume one insn per argument, an amount for the
9339 call (one insn for -Os) and then one for processing the result. */
9340 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9341
9342 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9343 do \
9344 { \
9345 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9346 if (shift_op != NULL \
9347 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9348 { \
9349 if (shift_reg) \
9350 { \
9351 if (speed_p) \
9352 *cost += extra_cost->alu.arith_shift_reg; \
9353 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9354 } \
9355 else if (speed_p) \
9356 *cost += extra_cost->alu.arith_shift; \
9357 \
9358 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9359 + rtx_cost (XEXP (x, 1 - IDX), \
9360 OP, 1, speed_p)); \
9361 return true; \
9362 } \
9363 } \
9364 while (0);
9365
9366 /* RTX costs. Make an estimate of the cost of executing the operation
9367 X, which is contained with an operation with code OUTER_CODE.
9368 SPEED_P indicates whether the cost desired is the performance cost,
9369 or the size cost. The estimate is stored in COST and the return
9370 value is TRUE if the cost calculation is final, or FALSE if the
9371 caller should recurse through the operands of X to add additional
9372 costs.
9373
9374 We currently make no attempt to model the size savings of Thumb-2
9375 16-bit instructions. At the normal points in compilation where
9376 this code is called we have no measure of whether the condition
9377 flags are live or not, and thus no realistic way to determine what
9378 the size will eventually be. */
9379 static bool
9380 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9381 const struct cpu_cost_table *extra_cost,
9382 int *cost, bool speed_p)
9383 {
9384 machine_mode mode = GET_MODE (x);
9385
9386 if (TARGET_THUMB1)
9387 {
9388 if (speed_p)
9389 *cost = thumb1_rtx_costs (x, code, outer_code);
9390 else
9391 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9392 return true;
9393 }
9394
9395 switch (code)
9396 {
9397 case SET:
9398 *cost = 0;
9399 /* SET RTXs don't have a mode so we get it from the destination. */
9400 mode = GET_MODE (SET_DEST (x));
9401
9402 if (REG_P (SET_SRC (x))
9403 && REG_P (SET_DEST (x)))
9404 {
9405 /* Assume that most copies can be done with a single insn,
9406 unless we don't have HW FP, in which case everything
9407 larger than word mode will require two insns. */
9408 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9409 && GET_MODE_SIZE (mode) > 4)
9410 || mode == DImode)
9411 ? 2 : 1);
9412 /* Conditional register moves can be encoded
9413 in 16 bits in Thumb mode. */
9414 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9415 *cost >>= 1;
9416
9417 return true;
9418 }
9419
9420 if (CONST_INT_P (SET_SRC (x)))
9421 {
9422 /* Handle CONST_INT here, since the value doesn't have a mode
9423 and we would otherwise be unable to work out the true cost. */
9424 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9425 outer_code = SET;
9426 /* Slightly lower the cost of setting a core reg to a constant.
9427 This helps break up chains and allows for better scheduling. */
9428 if (REG_P (SET_DEST (x))
9429 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9430 *cost -= 1;
9431 x = SET_SRC (x);
9432 /* Immediate moves with an immediate in the range [0, 255] can be
9433 encoded in 16 bits in Thumb mode. */
9434 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9435 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9436 *cost >>= 1;
9437 goto const_int_cost;
9438 }
9439
9440 return false;
9441
9442 case MEM:
9443 /* A memory access costs 1 insn if the mode is small, or the address is
9444 a single register, otherwise it costs one insn per word. */
9445 if (REG_P (XEXP (x, 0)))
9446 *cost = COSTS_N_INSNS (1);
9447 else if (flag_pic
9448 && GET_CODE (XEXP (x, 0)) == PLUS
9449 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9450 /* This will be split into two instructions.
9451 See arm.md:calculate_pic_address. */
9452 *cost = COSTS_N_INSNS (2);
9453 else
9454 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9455
9456 /* For speed optimizations, add the costs of the address and
9457 accessing memory. */
9458 if (speed_p)
9459 #ifdef NOT_YET
9460 *cost += (extra_cost->ldst.load
9461 + arm_address_cost (XEXP (x, 0), mode,
9462 ADDR_SPACE_GENERIC, speed_p));
9463 #else
9464 *cost += extra_cost->ldst.load;
9465 #endif
9466 return true;
9467
9468 case PARALLEL:
9469 {
9470 /* Calculations of LDM costs are complex. We assume an initial cost
9471 (ldm_1st) which will load the number of registers mentioned in
9472 ldm_regs_per_insn_1st registers; then each additional
9473 ldm_regs_per_insn_subsequent registers cost one more insn. The
9474 formula for N regs is thus:
9475
9476 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9477 + ldm_regs_per_insn_subsequent - 1)
9478 / ldm_regs_per_insn_subsequent).
9479
9480 Additional costs may also be added for addressing. A similar
9481 formula is used for STM. */
9482
9483 bool is_ldm = load_multiple_operation (x, SImode);
9484 bool is_stm = store_multiple_operation (x, SImode);
9485
9486 *cost = COSTS_N_INSNS (1);
9487
9488 if (is_ldm || is_stm)
9489 {
9490 if (speed_p)
9491 {
9492 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9493 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9494 ? extra_cost->ldst.ldm_regs_per_insn_1st
9495 : extra_cost->ldst.stm_regs_per_insn_1st;
9496 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9497 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9498 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9499
9500 *cost += regs_per_insn_1st
9501 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9502 + regs_per_insn_sub - 1)
9503 / regs_per_insn_sub);
9504 return true;
9505 }
9506
9507 }
9508 return false;
9509 }
9510 case DIV:
9511 case UDIV:
9512 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9513 && (mode == SFmode || !TARGET_VFP_SINGLE))
9514 *cost = COSTS_N_INSNS (speed_p
9515 ? extra_cost->fp[mode != SFmode].div : 1);
9516 else if (mode == SImode && TARGET_IDIV)
9517 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9518 else
9519 *cost = LIBCALL_COST (2);
9520 return false; /* All arguments must be in registers. */
9521
9522 case MOD:
9523 case UMOD:
9524 *cost = LIBCALL_COST (2);
9525 return false; /* All arguments must be in registers. */
9526
9527 case ROTATE:
9528 if (mode == SImode && REG_P (XEXP (x, 1)))
9529 {
9530 *cost = (COSTS_N_INSNS (2)
9531 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9532 if (speed_p)
9533 *cost += extra_cost->alu.shift_reg;
9534 return true;
9535 }
9536 /* Fall through */
9537 case ROTATERT:
9538 case ASHIFT:
9539 case LSHIFTRT:
9540 case ASHIFTRT:
9541 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9542 {
9543 *cost = (COSTS_N_INSNS (3)
9544 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9545 if (speed_p)
9546 *cost += 2 * extra_cost->alu.shift;
9547 return true;
9548 }
9549 else if (mode == SImode)
9550 {
9551 *cost = (COSTS_N_INSNS (1)
9552 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9553 /* Slightly disparage register shifts at -Os, but not by much. */
9554 if (!CONST_INT_P (XEXP (x, 1)))
9555 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9556 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9557 return true;
9558 }
9559 else if (GET_MODE_CLASS (mode) == MODE_INT
9560 && GET_MODE_SIZE (mode) < 4)
9561 {
9562 if (code == ASHIFT)
9563 {
9564 *cost = (COSTS_N_INSNS (1)
9565 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9566 /* Slightly disparage register shifts at -Os, but not by
9567 much. */
9568 if (!CONST_INT_P (XEXP (x, 1)))
9569 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9570 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9571 }
9572 else if (code == LSHIFTRT || code == ASHIFTRT)
9573 {
9574 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9575 {
9576 /* Can use SBFX/UBFX. */
9577 *cost = COSTS_N_INSNS (1);
9578 if (speed_p)
9579 *cost += extra_cost->alu.bfx;
9580 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9581 }
9582 else
9583 {
9584 *cost = COSTS_N_INSNS (2);
9585 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9586 if (speed_p)
9587 {
9588 if (CONST_INT_P (XEXP (x, 1)))
9589 *cost += 2 * extra_cost->alu.shift;
9590 else
9591 *cost += (extra_cost->alu.shift
9592 + extra_cost->alu.shift_reg);
9593 }
9594 else
9595 /* Slightly disparage register shifts. */
9596 *cost += !CONST_INT_P (XEXP (x, 1));
9597 }
9598 }
9599 else /* Rotates. */
9600 {
9601 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9602 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9603 if (speed_p)
9604 {
9605 if (CONST_INT_P (XEXP (x, 1)))
9606 *cost += (2 * extra_cost->alu.shift
9607 + extra_cost->alu.log_shift);
9608 else
9609 *cost += (extra_cost->alu.shift
9610 + extra_cost->alu.shift_reg
9611 + extra_cost->alu.log_shift_reg);
9612 }
9613 }
9614 return true;
9615 }
9616
9617 *cost = LIBCALL_COST (2);
9618 return false;
9619
9620 case BSWAP:
9621 if (arm_arch6)
9622 {
9623 if (mode == SImode)
9624 {
9625 *cost = COSTS_N_INSNS (1);
9626 if (speed_p)
9627 *cost += extra_cost->alu.rev;
9628
9629 return false;
9630 }
9631 }
9632 else
9633 {
9634 /* No rev instruction available. Look at arm_legacy_rev
9635 and thumb_legacy_rev for the form of RTL used then. */
9636 if (TARGET_THUMB)
9637 {
9638 *cost = COSTS_N_INSNS (10);
9639
9640 if (speed_p)
9641 {
9642 *cost += 6 * extra_cost->alu.shift;
9643 *cost += 3 * extra_cost->alu.logical;
9644 }
9645 }
9646 else
9647 {
9648 *cost = COSTS_N_INSNS (5);
9649
9650 if (speed_p)
9651 {
9652 *cost += 2 * extra_cost->alu.shift;
9653 *cost += extra_cost->alu.arith_shift;
9654 *cost += 2 * extra_cost->alu.logical;
9655 }
9656 }
9657 return true;
9658 }
9659 return false;
9660
9661 case MINUS:
9662 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9663 && (mode == SFmode || !TARGET_VFP_SINGLE))
9664 {
9665 *cost = COSTS_N_INSNS (1);
9666 if (GET_CODE (XEXP (x, 0)) == MULT
9667 || GET_CODE (XEXP (x, 1)) == MULT)
9668 {
9669 rtx mul_op0, mul_op1, sub_op;
9670
9671 if (speed_p)
9672 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9673
9674 if (GET_CODE (XEXP (x, 0)) == MULT)
9675 {
9676 mul_op0 = XEXP (XEXP (x, 0), 0);
9677 mul_op1 = XEXP (XEXP (x, 0), 1);
9678 sub_op = XEXP (x, 1);
9679 }
9680 else
9681 {
9682 mul_op0 = XEXP (XEXP (x, 1), 0);
9683 mul_op1 = XEXP (XEXP (x, 1), 1);
9684 sub_op = XEXP (x, 0);
9685 }
9686
9687 /* The first operand of the multiply may be optionally
9688 negated. */
9689 if (GET_CODE (mul_op0) == NEG)
9690 mul_op0 = XEXP (mul_op0, 0);
9691
9692 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9693 + rtx_cost (mul_op1, code, 0, speed_p)
9694 + rtx_cost (sub_op, code, 0, speed_p));
9695
9696 return true;
9697 }
9698
9699 if (speed_p)
9700 *cost += extra_cost->fp[mode != SFmode].addsub;
9701 return false;
9702 }
9703
9704 if (mode == SImode)
9705 {
9706 rtx shift_by_reg = NULL;
9707 rtx shift_op;
9708 rtx non_shift_op;
9709
9710 *cost = COSTS_N_INSNS (1);
9711
9712 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9713 if (shift_op == NULL)
9714 {
9715 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9716 non_shift_op = XEXP (x, 0);
9717 }
9718 else
9719 non_shift_op = XEXP (x, 1);
9720
9721 if (shift_op != NULL)
9722 {
9723 if (shift_by_reg != NULL)
9724 {
9725 if (speed_p)
9726 *cost += extra_cost->alu.arith_shift_reg;
9727 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9728 }
9729 else if (speed_p)
9730 *cost += extra_cost->alu.arith_shift;
9731
9732 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9733 + rtx_cost (non_shift_op, code, 0, speed_p));
9734 return true;
9735 }
9736
9737 if (arm_arch_thumb2
9738 && GET_CODE (XEXP (x, 1)) == MULT)
9739 {
9740 /* MLS. */
9741 if (speed_p)
9742 *cost += extra_cost->mult[0].add;
9743 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9744 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9745 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9746 return true;
9747 }
9748
9749 if (CONST_INT_P (XEXP (x, 0)))
9750 {
9751 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9752 INTVAL (XEXP (x, 0)), NULL_RTX,
9753 NULL_RTX, 1, 0);
9754 *cost = COSTS_N_INSNS (insns);
9755 if (speed_p)
9756 *cost += insns * extra_cost->alu.arith;
9757 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9758 return true;
9759 }
9760 else if (speed_p)
9761 *cost += extra_cost->alu.arith;
9762
9763 return false;
9764 }
9765
9766 if (GET_MODE_CLASS (mode) == MODE_INT
9767 && GET_MODE_SIZE (mode) < 4)
9768 {
9769 rtx shift_op, shift_reg;
9770 shift_reg = NULL;
9771
9772 /* We check both sides of the MINUS for shifter operands since,
9773 unlike PLUS, it's not commutative. */
9774
9775 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9776 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9777
9778 /* Slightly disparage, as we might need to widen the result. */
9779 *cost = 1 + COSTS_N_INSNS (1);
9780 if (speed_p)
9781 *cost += extra_cost->alu.arith;
9782
9783 if (CONST_INT_P (XEXP (x, 0)))
9784 {
9785 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9786 return true;
9787 }
9788
9789 return false;
9790 }
9791
9792 if (mode == DImode)
9793 {
9794 *cost = COSTS_N_INSNS (2);
9795
9796 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9797 {
9798 rtx op1 = XEXP (x, 1);
9799
9800 if (speed_p)
9801 *cost += 2 * extra_cost->alu.arith;
9802
9803 if (GET_CODE (op1) == ZERO_EXTEND)
9804 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9805 else
9806 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9807 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9808 0, speed_p);
9809 return true;
9810 }
9811 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9812 {
9813 if (speed_p)
9814 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9815 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9816 0, speed_p)
9817 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9818 return true;
9819 }
9820 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9821 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9822 {
9823 if (speed_p)
9824 *cost += (extra_cost->alu.arith
9825 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9826 ? extra_cost->alu.arith
9827 : extra_cost->alu.arith_shift));
9828 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9829 + rtx_cost (XEXP (XEXP (x, 1), 0),
9830 GET_CODE (XEXP (x, 1)), 0, speed_p));
9831 return true;
9832 }
9833
9834 if (speed_p)
9835 *cost += 2 * extra_cost->alu.arith;
9836 return false;
9837 }
9838
9839 /* Vector mode? */
9840
9841 *cost = LIBCALL_COST (2);
9842 return false;
9843
9844 case PLUS:
9845 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9846 && (mode == SFmode || !TARGET_VFP_SINGLE))
9847 {
9848 *cost = COSTS_N_INSNS (1);
9849 if (GET_CODE (XEXP (x, 0)) == MULT)
9850 {
9851 rtx mul_op0, mul_op1, add_op;
9852
9853 if (speed_p)
9854 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9855
9856 mul_op0 = XEXP (XEXP (x, 0), 0);
9857 mul_op1 = XEXP (XEXP (x, 0), 1);
9858 add_op = XEXP (x, 1);
9859
9860 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9861 + rtx_cost (mul_op1, code, 0, speed_p)
9862 + rtx_cost (add_op, code, 0, speed_p));
9863
9864 return true;
9865 }
9866
9867 if (speed_p)
9868 *cost += extra_cost->fp[mode != SFmode].addsub;
9869 return false;
9870 }
9871 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9872 {
9873 *cost = LIBCALL_COST (2);
9874 return false;
9875 }
9876
9877 /* Narrow modes can be synthesized in SImode, but the range
9878 of useful sub-operations is limited. Check for shift operations
9879 on one of the operands. Only left shifts can be used in the
9880 narrow modes. */
9881 if (GET_MODE_CLASS (mode) == MODE_INT
9882 && GET_MODE_SIZE (mode) < 4)
9883 {
9884 rtx shift_op, shift_reg;
9885 shift_reg = NULL;
9886
9887 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9888
9889 if (CONST_INT_P (XEXP (x, 1)))
9890 {
9891 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9892 INTVAL (XEXP (x, 1)), NULL_RTX,
9893 NULL_RTX, 1, 0);
9894 *cost = COSTS_N_INSNS (insns);
9895 if (speed_p)
9896 *cost += insns * extra_cost->alu.arith;
9897 /* Slightly penalize a narrow operation as the result may
9898 need widening. */
9899 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9900 return true;
9901 }
9902
9903 /* Slightly penalize a narrow operation as the result may
9904 need widening. */
9905 *cost = 1 + COSTS_N_INSNS (1);
9906 if (speed_p)
9907 *cost += extra_cost->alu.arith;
9908
9909 return false;
9910 }
9911
9912 if (mode == SImode)
9913 {
9914 rtx shift_op, shift_reg;
9915
9916 *cost = COSTS_N_INSNS (1);
9917 if (TARGET_INT_SIMD
9918 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9919 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9920 {
9921 /* UXTA[BH] or SXTA[BH]. */
9922 if (speed_p)
9923 *cost += extra_cost->alu.extend_arith;
9924 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9925 speed_p)
9926 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9927 return true;
9928 }
9929
9930 shift_reg = NULL;
9931 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9932 if (shift_op != NULL)
9933 {
9934 if (shift_reg)
9935 {
9936 if (speed_p)
9937 *cost += extra_cost->alu.arith_shift_reg;
9938 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9939 }
9940 else if (speed_p)
9941 *cost += extra_cost->alu.arith_shift;
9942
9943 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9944 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9945 return true;
9946 }
9947 if (GET_CODE (XEXP (x, 0)) == MULT)
9948 {
9949 rtx mul_op = XEXP (x, 0);
9950
9951 *cost = COSTS_N_INSNS (1);
9952
9953 if (TARGET_DSP_MULTIPLY
9954 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9955 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9956 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9957 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9958 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9959 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9960 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9961 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9962 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9963 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9964 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9965 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9966 == 16))))))
9967 {
9968 /* SMLA[BT][BT]. */
9969 if (speed_p)
9970 *cost += extra_cost->mult[0].extend_add;
9971 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9972 SIGN_EXTEND, 0, speed_p)
9973 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9974 SIGN_EXTEND, 0, speed_p)
9975 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9976 return true;
9977 }
9978
9979 if (speed_p)
9980 *cost += extra_cost->mult[0].add;
9981 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9982 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9983 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9984 return true;
9985 }
9986 if (CONST_INT_P (XEXP (x, 1)))
9987 {
9988 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9989 INTVAL (XEXP (x, 1)), NULL_RTX,
9990 NULL_RTX, 1, 0);
9991 *cost = COSTS_N_INSNS (insns);
9992 if (speed_p)
9993 *cost += insns * extra_cost->alu.arith;
9994 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9995 return true;
9996 }
9997 else if (speed_p)
9998 *cost += extra_cost->alu.arith;
9999
10000 return false;
10001 }
10002
10003 if (mode == DImode)
10004 {
10005 if (arm_arch3m
10006 && GET_CODE (XEXP (x, 0)) == MULT
10007 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10008 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10009 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10010 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10011 {
10012 *cost = COSTS_N_INSNS (1);
10013 if (speed_p)
10014 *cost += extra_cost->mult[1].extend_add;
10015 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10016 ZERO_EXTEND, 0, speed_p)
10017 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10018 ZERO_EXTEND, 0, speed_p)
10019 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10020 return true;
10021 }
10022
10023 *cost = COSTS_N_INSNS (2);
10024
10025 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10026 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10027 {
10028 if (speed_p)
10029 *cost += (extra_cost->alu.arith
10030 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10031 ? extra_cost->alu.arith
10032 : extra_cost->alu.arith_shift));
10033
10034 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10035 speed_p)
10036 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10037 return true;
10038 }
10039
10040 if (speed_p)
10041 *cost += 2 * extra_cost->alu.arith;
10042 return false;
10043 }
10044
10045 /* Vector mode? */
10046 *cost = LIBCALL_COST (2);
10047 return false;
10048 case IOR:
10049 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10050 {
10051 *cost = COSTS_N_INSNS (1);
10052 if (speed_p)
10053 *cost += extra_cost->alu.rev;
10054
10055 return true;
10056 }
10057 /* Fall through. */
10058 case AND: case XOR:
10059 if (mode == SImode)
10060 {
10061 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10062 rtx op0 = XEXP (x, 0);
10063 rtx shift_op, shift_reg;
10064
10065 *cost = COSTS_N_INSNS (1);
10066
10067 if (subcode == NOT
10068 && (code == AND
10069 || (code == IOR && TARGET_THUMB2)))
10070 op0 = XEXP (op0, 0);
10071
10072 shift_reg = NULL;
10073 shift_op = shifter_op_p (op0, &shift_reg);
10074 if (shift_op != NULL)
10075 {
10076 if (shift_reg)
10077 {
10078 if (speed_p)
10079 *cost += extra_cost->alu.log_shift_reg;
10080 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10081 }
10082 else if (speed_p)
10083 *cost += extra_cost->alu.log_shift;
10084
10085 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10086 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10087 return true;
10088 }
10089
10090 if (CONST_INT_P (XEXP (x, 1)))
10091 {
10092 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10093 INTVAL (XEXP (x, 1)), NULL_RTX,
10094 NULL_RTX, 1, 0);
10095
10096 *cost = COSTS_N_INSNS (insns);
10097 if (speed_p)
10098 *cost += insns * extra_cost->alu.logical;
10099 *cost += rtx_cost (op0, code, 0, speed_p);
10100 return true;
10101 }
10102
10103 if (speed_p)
10104 *cost += extra_cost->alu.logical;
10105 *cost += (rtx_cost (op0, code, 0, speed_p)
10106 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10107 return true;
10108 }
10109
10110 if (mode == DImode)
10111 {
10112 rtx op0 = XEXP (x, 0);
10113 enum rtx_code subcode = GET_CODE (op0);
10114
10115 *cost = COSTS_N_INSNS (2);
10116
10117 if (subcode == NOT
10118 && (code == AND
10119 || (code == IOR && TARGET_THUMB2)))
10120 op0 = XEXP (op0, 0);
10121
10122 if (GET_CODE (op0) == ZERO_EXTEND)
10123 {
10124 if (speed_p)
10125 *cost += 2 * extra_cost->alu.logical;
10126
10127 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10128 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10129 return true;
10130 }
10131 else if (GET_CODE (op0) == SIGN_EXTEND)
10132 {
10133 if (speed_p)
10134 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10135
10136 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10137 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10138 return true;
10139 }
10140
10141 if (speed_p)
10142 *cost += 2 * extra_cost->alu.logical;
10143
10144 return true;
10145 }
10146 /* Vector mode? */
10147
10148 *cost = LIBCALL_COST (2);
10149 return false;
10150
10151 case MULT:
10152 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10153 && (mode == SFmode || !TARGET_VFP_SINGLE))
10154 {
10155 rtx op0 = XEXP (x, 0);
10156
10157 *cost = COSTS_N_INSNS (1);
10158
10159 if (GET_CODE (op0) == NEG)
10160 op0 = XEXP (op0, 0);
10161
10162 if (speed_p)
10163 *cost += extra_cost->fp[mode != SFmode].mult;
10164
10165 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10166 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10167 return true;
10168 }
10169 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10170 {
10171 *cost = LIBCALL_COST (2);
10172 return false;
10173 }
10174
10175 if (mode == SImode)
10176 {
10177 *cost = COSTS_N_INSNS (1);
10178 if (TARGET_DSP_MULTIPLY
10179 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10180 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10181 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10182 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10183 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10184 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10185 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10186 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10187 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10188 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10189 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10190 && (INTVAL (XEXP (XEXP (x, 1), 1))
10191 == 16))))))
10192 {
10193 /* SMUL[TB][TB]. */
10194 if (speed_p)
10195 *cost += extra_cost->mult[0].extend;
10196 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10197 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10198 return true;
10199 }
10200 if (speed_p)
10201 *cost += extra_cost->mult[0].simple;
10202 return false;
10203 }
10204
10205 if (mode == DImode)
10206 {
10207 if (arm_arch3m
10208 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10209 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10210 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10211 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10212 {
10213 *cost = COSTS_N_INSNS (1);
10214 if (speed_p)
10215 *cost += extra_cost->mult[1].extend;
10216 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10217 ZERO_EXTEND, 0, speed_p)
10218 + rtx_cost (XEXP (XEXP (x, 1), 0),
10219 ZERO_EXTEND, 0, speed_p));
10220 return true;
10221 }
10222
10223 *cost = LIBCALL_COST (2);
10224 return false;
10225 }
10226
10227 /* Vector mode? */
10228 *cost = LIBCALL_COST (2);
10229 return false;
10230
10231 case NEG:
10232 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10233 && (mode == SFmode || !TARGET_VFP_SINGLE))
10234 {
10235 *cost = COSTS_N_INSNS (1);
10236 if (speed_p)
10237 *cost += extra_cost->fp[mode != SFmode].neg;
10238
10239 return false;
10240 }
10241 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10242 {
10243 *cost = LIBCALL_COST (1);
10244 return false;
10245 }
10246
10247 if (mode == SImode)
10248 {
10249 if (GET_CODE (XEXP (x, 0)) == ABS)
10250 {
10251 *cost = COSTS_N_INSNS (2);
10252 /* Assume the non-flag-changing variant. */
10253 if (speed_p)
10254 *cost += (extra_cost->alu.log_shift
10255 + extra_cost->alu.arith_shift);
10256 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10257 return true;
10258 }
10259
10260 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10261 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10262 {
10263 *cost = COSTS_N_INSNS (2);
10264 /* No extra cost for MOV imm and MVN imm. */
10265 /* If the comparison op is using the flags, there's no further
10266 cost, otherwise we need to add the cost of the comparison. */
10267 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10268 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10269 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10270 {
10271 *cost += (COSTS_N_INSNS (1)
10272 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10273 speed_p)
10274 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10275 speed_p));
10276 if (speed_p)
10277 *cost += extra_cost->alu.arith;
10278 }
10279 return true;
10280 }
10281 *cost = COSTS_N_INSNS (1);
10282 if (speed_p)
10283 *cost += extra_cost->alu.arith;
10284 return false;
10285 }
10286
10287 if (GET_MODE_CLASS (mode) == MODE_INT
10288 && GET_MODE_SIZE (mode) < 4)
10289 {
10290 /* Slightly disparage, as we might need an extend operation. */
10291 *cost = 1 + COSTS_N_INSNS (1);
10292 if (speed_p)
10293 *cost += extra_cost->alu.arith;
10294 return false;
10295 }
10296
10297 if (mode == DImode)
10298 {
10299 *cost = COSTS_N_INSNS (2);
10300 if (speed_p)
10301 *cost += 2 * extra_cost->alu.arith;
10302 return false;
10303 }
10304
10305 /* Vector mode? */
10306 *cost = LIBCALL_COST (1);
10307 return false;
10308
10309 case NOT:
10310 if (mode == SImode)
10311 {
10312 rtx shift_op;
10313 rtx shift_reg = NULL;
10314
10315 *cost = COSTS_N_INSNS (1);
10316 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10317
10318 if (shift_op)
10319 {
10320 if (shift_reg != NULL)
10321 {
10322 if (speed_p)
10323 *cost += extra_cost->alu.log_shift_reg;
10324 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10325 }
10326 else if (speed_p)
10327 *cost += extra_cost->alu.log_shift;
10328 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10329 return true;
10330 }
10331
10332 if (speed_p)
10333 *cost += extra_cost->alu.logical;
10334 return false;
10335 }
10336 if (mode == DImode)
10337 {
10338 *cost = COSTS_N_INSNS (2);
10339 return false;
10340 }
10341
10342 /* Vector mode? */
10343
10344 *cost += LIBCALL_COST (1);
10345 return false;
10346
10347 case IF_THEN_ELSE:
10348 {
10349 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10350 {
10351 *cost = COSTS_N_INSNS (4);
10352 return true;
10353 }
10354 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10355 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10356
10357 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10358 /* Assume that if one arm of the if_then_else is a register,
10359 that it will be tied with the result and eliminate the
10360 conditional insn. */
10361 if (REG_P (XEXP (x, 1)))
10362 *cost += op2cost;
10363 else if (REG_P (XEXP (x, 2)))
10364 *cost += op1cost;
10365 else
10366 {
10367 if (speed_p)
10368 {
10369 if (extra_cost->alu.non_exec_costs_exec)
10370 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10371 else
10372 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10373 }
10374 else
10375 *cost += op1cost + op2cost;
10376 }
10377 }
10378 return true;
10379
10380 case COMPARE:
10381 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10382 *cost = 0;
10383 else
10384 {
10385 machine_mode op0mode;
10386 /* We'll mostly assume that the cost of a compare is the cost of the
10387 LHS. However, there are some notable exceptions. */
10388
10389 /* Floating point compares are never done as side-effects. */
10390 op0mode = GET_MODE (XEXP (x, 0));
10391 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10392 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10393 {
10394 *cost = COSTS_N_INSNS (1);
10395 if (speed_p)
10396 *cost += extra_cost->fp[op0mode != SFmode].compare;
10397
10398 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10399 {
10400 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10401 return true;
10402 }
10403
10404 return false;
10405 }
10406 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10407 {
10408 *cost = LIBCALL_COST (2);
10409 return false;
10410 }
10411
10412 /* DImode compares normally take two insns. */
10413 if (op0mode == DImode)
10414 {
10415 *cost = COSTS_N_INSNS (2);
10416 if (speed_p)
10417 *cost += 2 * extra_cost->alu.arith;
10418 return false;
10419 }
10420
10421 if (op0mode == SImode)
10422 {
10423 rtx shift_op;
10424 rtx shift_reg;
10425
10426 if (XEXP (x, 1) == const0_rtx
10427 && !(REG_P (XEXP (x, 0))
10428 || (GET_CODE (XEXP (x, 0)) == SUBREG
10429 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10430 {
10431 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10432
10433 /* Multiply operations that set the flags are often
10434 significantly more expensive. */
10435 if (speed_p
10436 && GET_CODE (XEXP (x, 0)) == MULT
10437 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10438 *cost += extra_cost->mult[0].flag_setting;
10439
10440 if (speed_p
10441 && GET_CODE (XEXP (x, 0)) == PLUS
10442 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10443 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10444 0), 1), mode))
10445 *cost += extra_cost->mult[0].flag_setting;
10446 return true;
10447 }
10448
10449 shift_reg = NULL;
10450 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10451 if (shift_op != NULL)
10452 {
10453 *cost = COSTS_N_INSNS (1);
10454 if (shift_reg != NULL)
10455 {
10456 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10457 if (speed_p)
10458 *cost += extra_cost->alu.arith_shift_reg;
10459 }
10460 else if (speed_p)
10461 *cost += extra_cost->alu.arith_shift;
10462 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10463 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10464 return true;
10465 }
10466
10467 *cost = COSTS_N_INSNS (1);
10468 if (speed_p)
10469 *cost += extra_cost->alu.arith;
10470 if (CONST_INT_P (XEXP (x, 1))
10471 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10472 {
10473 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10474 return true;
10475 }
10476 return false;
10477 }
10478
10479 /* Vector mode? */
10480
10481 *cost = LIBCALL_COST (2);
10482 return false;
10483 }
10484 return true;
10485
10486 case EQ:
10487 case NE:
10488 case LT:
10489 case LE:
10490 case GT:
10491 case GE:
10492 case LTU:
10493 case LEU:
10494 case GEU:
10495 case GTU:
10496 case ORDERED:
10497 case UNORDERED:
10498 case UNEQ:
10499 case UNLE:
10500 case UNLT:
10501 case UNGE:
10502 case UNGT:
10503 case LTGT:
10504 if (outer_code == SET)
10505 {
10506 /* Is it a store-flag operation? */
10507 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10508 && XEXP (x, 1) == const0_rtx)
10509 {
10510 /* Thumb also needs an IT insn. */
10511 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10512 return true;
10513 }
10514 if (XEXP (x, 1) == const0_rtx)
10515 {
10516 switch (code)
10517 {
10518 case LT:
10519 /* LSR Rd, Rn, #31. */
10520 *cost = COSTS_N_INSNS (1);
10521 if (speed_p)
10522 *cost += extra_cost->alu.shift;
10523 break;
10524
10525 case EQ:
10526 /* RSBS T1, Rn, #0
10527 ADC Rd, Rn, T1. */
10528
10529 case NE:
10530 /* SUBS T1, Rn, #1
10531 SBC Rd, Rn, T1. */
10532 *cost = COSTS_N_INSNS (2);
10533 break;
10534
10535 case LE:
10536 /* RSBS T1, Rn, Rn, LSR #31
10537 ADC Rd, Rn, T1. */
10538 *cost = COSTS_N_INSNS (2);
10539 if (speed_p)
10540 *cost += extra_cost->alu.arith_shift;
10541 break;
10542
10543 case GT:
10544 /* RSB Rd, Rn, Rn, ASR #1
10545 LSR Rd, Rd, #31. */
10546 *cost = COSTS_N_INSNS (2);
10547 if (speed_p)
10548 *cost += (extra_cost->alu.arith_shift
10549 + extra_cost->alu.shift);
10550 break;
10551
10552 case GE:
10553 /* ASR Rd, Rn, #31
10554 ADD Rd, Rn, #1. */
10555 *cost = COSTS_N_INSNS (2);
10556 if (speed_p)
10557 *cost += extra_cost->alu.shift;
10558 break;
10559
10560 default:
10561 /* Remaining cases are either meaningless or would take
10562 three insns anyway. */
10563 *cost = COSTS_N_INSNS (3);
10564 break;
10565 }
10566 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10567 return true;
10568 }
10569 else
10570 {
10571 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10572 if (CONST_INT_P (XEXP (x, 1))
10573 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10574 {
10575 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10576 return true;
10577 }
10578
10579 return false;
10580 }
10581 }
10582 /* Not directly inside a set. If it involves the condition code
10583 register it must be the condition for a branch, cond_exec or
10584 I_T_E operation. Since the comparison is performed elsewhere
10585 this is just the control part which has no additional
10586 cost. */
10587 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10588 && XEXP (x, 1) == const0_rtx)
10589 {
10590 *cost = 0;
10591 return true;
10592 }
10593 return false;
10594
10595 case ABS:
10596 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10597 && (mode == SFmode || !TARGET_VFP_SINGLE))
10598 {
10599 *cost = COSTS_N_INSNS (1);
10600 if (speed_p)
10601 *cost += extra_cost->fp[mode != SFmode].neg;
10602
10603 return false;
10604 }
10605 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10606 {
10607 *cost = LIBCALL_COST (1);
10608 return false;
10609 }
10610
10611 if (mode == SImode)
10612 {
10613 *cost = COSTS_N_INSNS (1);
10614 if (speed_p)
10615 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10616 return false;
10617 }
10618 /* Vector mode? */
10619 *cost = LIBCALL_COST (1);
10620 return false;
10621
10622 case SIGN_EXTEND:
10623 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10624 && MEM_P (XEXP (x, 0)))
10625 {
10626 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10627
10628 if (mode == DImode)
10629 *cost += COSTS_N_INSNS (1);
10630
10631 if (!speed_p)
10632 return true;
10633
10634 if (GET_MODE (XEXP (x, 0)) == SImode)
10635 *cost += extra_cost->ldst.load;
10636 else
10637 *cost += extra_cost->ldst.load_sign_extend;
10638
10639 if (mode == DImode)
10640 *cost += extra_cost->alu.shift;
10641
10642 return true;
10643 }
10644
10645 /* Widening from less than 32-bits requires an extend operation. */
10646 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10647 {
10648 /* We have SXTB/SXTH. */
10649 *cost = COSTS_N_INSNS (1);
10650 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10651 if (speed_p)
10652 *cost += extra_cost->alu.extend;
10653 }
10654 else if (GET_MODE (XEXP (x, 0)) != SImode)
10655 {
10656 /* Needs two shifts. */
10657 *cost = COSTS_N_INSNS (2);
10658 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10659 if (speed_p)
10660 *cost += 2 * extra_cost->alu.shift;
10661 }
10662
10663 /* Widening beyond 32-bits requires one more insn. */
10664 if (mode == DImode)
10665 {
10666 *cost += COSTS_N_INSNS (1);
10667 if (speed_p)
10668 *cost += extra_cost->alu.shift;
10669 }
10670
10671 return true;
10672
10673 case ZERO_EXTEND:
10674 if ((arm_arch4
10675 || GET_MODE (XEXP (x, 0)) == SImode
10676 || GET_MODE (XEXP (x, 0)) == QImode)
10677 && MEM_P (XEXP (x, 0)))
10678 {
10679 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10680
10681 if (mode == DImode)
10682 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10683
10684 return true;
10685 }
10686
10687 /* Widening from less than 32-bits requires an extend operation. */
10688 if (GET_MODE (XEXP (x, 0)) == QImode)
10689 {
10690 /* UXTB can be a shorter instruction in Thumb2, but it might
10691 be slower than the AND Rd, Rn, #255 alternative. When
10692 optimizing for speed it should never be slower to use
10693 AND, and we don't really model 16-bit vs 32-bit insns
10694 here. */
10695 *cost = COSTS_N_INSNS (1);
10696 if (speed_p)
10697 *cost += extra_cost->alu.logical;
10698 }
10699 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10700 {
10701 /* We have UXTB/UXTH. */
10702 *cost = COSTS_N_INSNS (1);
10703 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10704 if (speed_p)
10705 *cost += extra_cost->alu.extend;
10706 }
10707 else if (GET_MODE (XEXP (x, 0)) != SImode)
10708 {
10709 /* Needs two shifts. It's marginally preferable to use
10710 shifts rather than two BIC instructions as the second
10711 shift may merge with a subsequent insn as a shifter
10712 op. */
10713 *cost = COSTS_N_INSNS (2);
10714 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10715 if (speed_p)
10716 *cost += 2 * extra_cost->alu.shift;
10717 }
10718 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10719 *cost = COSTS_N_INSNS (1);
10720
10721 /* Widening beyond 32-bits requires one more insn. */
10722 if (mode == DImode)
10723 {
10724 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10725 }
10726
10727 return true;
10728
10729 case CONST_INT:
10730 *cost = 0;
10731 /* CONST_INT has no mode, so we cannot tell for sure how many
10732 insns are really going to be needed. The best we can do is
10733 look at the value passed. If it fits in SImode, then assume
10734 that's the mode it will be used for. Otherwise assume it
10735 will be used in DImode. */
10736 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10737 mode = SImode;
10738 else
10739 mode = DImode;
10740
10741 /* Avoid blowing up in arm_gen_constant (). */
10742 if (!(outer_code == PLUS
10743 || outer_code == AND
10744 || outer_code == IOR
10745 || outer_code == XOR
10746 || outer_code == MINUS))
10747 outer_code = SET;
10748
10749 const_int_cost:
10750 if (mode == SImode)
10751 {
10752 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10753 INTVAL (x), NULL, NULL,
10754 0, 0));
10755 /* Extra costs? */
10756 }
10757 else
10758 {
10759 *cost += COSTS_N_INSNS (arm_gen_constant
10760 (outer_code, SImode, NULL,
10761 trunc_int_for_mode (INTVAL (x), SImode),
10762 NULL, NULL, 0, 0)
10763 + arm_gen_constant (outer_code, SImode, NULL,
10764 INTVAL (x) >> 32, NULL,
10765 NULL, 0, 0));
10766 /* Extra costs? */
10767 }
10768
10769 return true;
10770
10771 case CONST:
10772 case LABEL_REF:
10773 case SYMBOL_REF:
10774 if (speed_p)
10775 {
10776 if (arm_arch_thumb2 && !flag_pic)
10777 *cost = COSTS_N_INSNS (2);
10778 else
10779 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10780 }
10781 else
10782 *cost = COSTS_N_INSNS (2);
10783
10784 if (flag_pic)
10785 {
10786 *cost += COSTS_N_INSNS (1);
10787 if (speed_p)
10788 *cost += extra_cost->alu.arith;
10789 }
10790
10791 return true;
10792
10793 case CONST_FIXED:
10794 *cost = COSTS_N_INSNS (4);
10795 /* Fixme. */
10796 return true;
10797
10798 case CONST_DOUBLE:
10799 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10800 && (mode == SFmode || !TARGET_VFP_SINGLE))
10801 {
10802 if (vfp3_const_double_rtx (x))
10803 {
10804 *cost = COSTS_N_INSNS (1);
10805 if (speed_p)
10806 *cost += extra_cost->fp[mode == DFmode].fpconst;
10807 return true;
10808 }
10809
10810 if (speed_p)
10811 {
10812 *cost = COSTS_N_INSNS (1);
10813 if (mode == DFmode)
10814 *cost += extra_cost->ldst.loadd;
10815 else
10816 *cost += extra_cost->ldst.loadf;
10817 }
10818 else
10819 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10820
10821 return true;
10822 }
10823 *cost = COSTS_N_INSNS (4);
10824 return true;
10825
10826 case CONST_VECTOR:
10827 /* Fixme. */
10828 if (TARGET_NEON
10829 && TARGET_HARD_FLOAT
10830 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10831 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10832 *cost = COSTS_N_INSNS (1);
10833 else
10834 *cost = COSTS_N_INSNS (4);
10835 return true;
10836
10837 case HIGH:
10838 case LO_SUM:
10839 *cost = COSTS_N_INSNS (1);
10840 /* When optimizing for size, we prefer constant pool entries to
10841 MOVW/MOVT pairs, so bump the cost of these slightly. */
10842 if (!speed_p)
10843 *cost += 1;
10844 return true;
10845
10846 case CLZ:
10847 *cost = COSTS_N_INSNS (1);
10848 if (speed_p)
10849 *cost += extra_cost->alu.clz;
10850 return false;
10851
10852 case SMIN:
10853 if (XEXP (x, 1) == const0_rtx)
10854 {
10855 *cost = COSTS_N_INSNS (1);
10856 if (speed_p)
10857 *cost += extra_cost->alu.log_shift;
10858 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10859 return true;
10860 }
10861 /* Fall through. */
10862 case SMAX:
10863 case UMIN:
10864 case UMAX:
10865 *cost = COSTS_N_INSNS (2);
10866 return false;
10867
10868 case TRUNCATE:
10869 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10870 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10871 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10872 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10873 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10874 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10875 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10876 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10877 == ZERO_EXTEND))))
10878 {
10879 *cost = COSTS_N_INSNS (1);
10880 if (speed_p)
10881 *cost += extra_cost->mult[1].extend;
10882 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10883 speed_p)
10884 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10885 0, speed_p));
10886 return true;
10887 }
10888 *cost = LIBCALL_COST (1);
10889 return false;
10890
10891 case UNSPEC_VOLATILE:
10892 case UNSPEC:
10893 return arm_unspec_cost (x, outer_code, speed_p, cost);
10894
10895 case PC:
10896 /* Reading the PC is like reading any other register. Writing it
10897 is more expensive, but we take that into account elsewhere. */
10898 *cost = 0;
10899 return true;
10900
10901 case ZERO_EXTRACT:
10902 /* TODO: Simple zero_extract of bottom bits using AND. */
10903 /* Fall through. */
10904 case SIGN_EXTRACT:
10905 if (arm_arch6
10906 && mode == SImode
10907 && CONST_INT_P (XEXP (x, 1))
10908 && CONST_INT_P (XEXP (x, 2)))
10909 {
10910 *cost = COSTS_N_INSNS (1);
10911 if (speed_p)
10912 *cost += extra_cost->alu.bfx;
10913 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10914 return true;
10915 }
10916 /* Without UBFX/SBFX, need to resort to shift operations. */
10917 *cost = COSTS_N_INSNS (2);
10918 if (speed_p)
10919 *cost += 2 * extra_cost->alu.shift;
10920 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10921 return true;
10922
10923 case FLOAT_EXTEND:
10924 if (TARGET_HARD_FLOAT)
10925 {
10926 *cost = COSTS_N_INSNS (1);
10927 if (speed_p)
10928 *cost += extra_cost->fp[mode == DFmode].widen;
10929 if (!TARGET_FPU_ARMV8
10930 && GET_MODE (XEXP (x, 0)) == HFmode)
10931 {
10932 /* Pre v8, widening HF->DF is a two-step process, first
10933 widening to SFmode. */
10934 *cost += COSTS_N_INSNS (1);
10935 if (speed_p)
10936 *cost += extra_cost->fp[0].widen;
10937 }
10938 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10939 return true;
10940 }
10941
10942 *cost = LIBCALL_COST (1);
10943 return false;
10944
10945 case FLOAT_TRUNCATE:
10946 if (TARGET_HARD_FLOAT)
10947 {
10948 *cost = COSTS_N_INSNS (1);
10949 if (speed_p)
10950 *cost += extra_cost->fp[mode == DFmode].narrow;
10951 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10952 return true;
10953 /* Vector modes? */
10954 }
10955 *cost = LIBCALL_COST (1);
10956 return false;
10957
10958 case FMA:
10959 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10960 {
10961 rtx op0 = XEXP (x, 0);
10962 rtx op1 = XEXP (x, 1);
10963 rtx op2 = XEXP (x, 2);
10964
10965 *cost = COSTS_N_INSNS (1);
10966
10967 /* vfms or vfnma. */
10968 if (GET_CODE (op0) == NEG)
10969 op0 = XEXP (op0, 0);
10970
10971 /* vfnms or vfnma. */
10972 if (GET_CODE (op2) == NEG)
10973 op2 = XEXP (op2, 0);
10974
10975 *cost += rtx_cost (op0, FMA, 0, speed_p);
10976 *cost += rtx_cost (op1, FMA, 1, speed_p);
10977 *cost += rtx_cost (op2, FMA, 2, speed_p);
10978
10979 if (speed_p)
10980 *cost += extra_cost->fp[mode ==DFmode].fma;
10981
10982 return true;
10983 }
10984
10985 *cost = LIBCALL_COST (3);
10986 return false;
10987
10988 case FIX:
10989 case UNSIGNED_FIX:
10990 if (TARGET_HARD_FLOAT)
10991 {
10992 if (GET_MODE_CLASS (mode) == MODE_INT)
10993 {
10994 *cost = COSTS_N_INSNS (1);
10995 if (speed_p)
10996 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10997 /* Strip of the 'cost' of rounding towards zero. */
10998 if (GET_CODE (XEXP (x, 0)) == FIX)
10999 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
11000 else
11001 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11002 /* ??? Increase the cost to deal with transferring from
11003 FP -> CORE registers? */
11004 return true;
11005 }
11006 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11007 && TARGET_FPU_ARMV8)
11008 {
11009 *cost = COSTS_N_INSNS (1);
11010 if (speed_p)
11011 *cost += extra_cost->fp[mode == DFmode].roundint;
11012 return false;
11013 }
11014 /* Vector costs? */
11015 }
11016 *cost = LIBCALL_COST (1);
11017 return false;
11018
11019 case FLOAT:
11020 case UNSIGNED_FLOAT:
11021 if (TARGET_HARD_FLOAT)
11022 {
11023 /* ??? Increase the cost to deal with transferring from CORE
11024 -> FP registers? */
11025 *cost = COSTS_N_INSNS (1);
11026 if (speed_p)
11027 *cost += extra_cost->fp[mode == DFmode].fromint;
11028 return false;
11029 }
11030 *cost = LIBCALL_COST (1);
11031 return false;
11032
11033 case CALL:
11034 *cost = COSTS_N_INSNS (1);
11035 return true;
11036
11037 case ASM_OPERANDS:
11038 {
11039 /* Just a guess. Guess number of instructions in the asm
11040 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11041 though (see PR60663). */
11042 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11043 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11044
11045 *cost = COSTS_N_INSNS (asm_length + num_operands);
11046 return true;
11047 }
11048 default:
11049 if (mode != VOIDmode)
11050 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11051 else
11052 *cost = COSTS_N_INSNS (4); /* Who knows? */
11053 return false;
11054 }
11055 }
11056
11057 #undef HANDLE_NARROW_SHIFT_ARITH
11058
11059 /* RTX costs when optimizing for size. */
11060 static bool
11061 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11062 int *total, bool speed)
11063 {
11064 bool result;
11065
11066 if (TARGET_OLD_RTX_COSTS
11067 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11068 {
11069 /* Old way. (Deprecated.) */
11070 if (!speed)
11071 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11072 (enum rtx_code) outer_code, total);
11073 else
11074 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11075 (enum rtx_code) outer_code, total,
11076 speed);
11077 }
11078 else
11079 {
11080 /* New way. */
11081 if (current_tune->insn_extra_cost)
11082 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11083 (enum rtx_code) outer_code,
11084 current_tune->insn_extra_cost,
11085 total, speed);
11086 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11087 && current_tune->insn_extra_cost != NULL */
11088 else
11089 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11090 (enum rtx_code) outer_code,
11091 &generic_extra_costs, total, speed);
11092 }
11093
11094 if (dump_file && (dump_flags & TDF_DETAILS))
11095 {
11096 print_rtl_single (dump_file, x);
11097 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11098 *total, result ? "final" : "partial");
11099 }
11100 return result;
11101 }
11102
11103 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11104 supported on any "slowmul" cores, so it can be ignored. */
11105
11106 static bool
11107 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11108 int *total, bool speed)
11109 {
11110 machine_mode mode = GET_MODE (x);
11111
11112 if (TARGET_THUMB)
11113 {
11114 *total = thumb1_rtx_costs (x, code, outer_code);
11115 return true;
11116 }
11117
11118 switch (code)
11119 {
11120 case MULT:
11121 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11122 || mode == DImode)
11123 {
11124 *total = COSTS_N_INSNS (20);
11125 return false;
11126 }
11127
11128 if (CONST_INT_P (XEXP (x, 1)))
11129 {
11130 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11131 & (unsigned HOST_WIDE_INT) 0xffffffff);
11132 int cost, const_ok = const_ok_for_arm (i);
11133 int j, booth_unit_size;
11134
11135 /* Tune as appropriate. */
11136 cost = const_ok ? 4 : 8;
11137 booth_unit_size = 2;
11138 for (j = 0; i && j < 32; j += booth_unit_size)
11139 {
11140 i >>= booth_unit_size;
11141 cost++;
11142 }
11143
11144 *total = COSTS_N_INSNS (cost);
11145 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11146 return true;
11147 }
11148
11149 *total = COSTS_N_INSNS (20);
11150 return false;
11151
11152 default:
11153 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11154 }
11155 }
11156
11157
11158 /* RTX cost for cores with a fast multiply unit (M variants). */
11159
11160 static bool
11161 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11162 int *total, bool speed)
11163 {
11164 machine_mode mode = GET_MODE (x);
11165
11166 if (TARGET_THUMB1)
11167 {
11168 *total = thumb1_rtx_costs (x, code, outer_code);
11169 return true;
11170 }
11171
11172 /* ??? should thumb2 use different costs? */
11173 switch (code)
11174 {
11175 case MULT:
11176 /* There is no point basing this on the tuning, since it is always the
11177 fast variant if it exists at all. */
11178 if (mode == DImode
11179 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11180 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11181 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11182 {
11183 *total = COSTS_N_INSNS(2);
11184 return false;
11185 }
11186
11187
11188 if (mode == DImode)
11189 {
11190 *total = COSTS_N_INSNS (5);
11191 return false;
11192 }
11193
11194 if (CONST_INT_P (XEXP (x, 1)))
11195 {
11196 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11197 & (unsigned HOST_WIDE_INT) 0xffffffff);
11198 int cost, const_ok = const_ok_for_arm (i);
11199 int j, booth_unit_size;
11200
11201 /* Tune as appropriate. */
11202 cost = const_ok ? 4 : 8;
11203 booth_unit_size = 8;
11204 for (j = 0; i && j < 32; j += booth_unit_size)
11205 {
11206 i >>= booth_unit_size;
11207 cost++;
11208 }
11209
11210 *total = COSTS_N_INSNS(cost);
11211 return false;
11212 }
11213
11214 if (mode == SImode)
11215 {
11216 *total = COSTS_N_INSNS (4);
11217 return false;
11218 }
11219
11220 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11221 {
11222 if (TARGET_HARD_FLOAT
11223 && (mode == SFmode
11224 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11225 {
11226 *total = COSTS_N_INSNS (1);
11227 return false;
11228 }
11229 }
11230
11231 /* Requires a lib call */
11232 *total = COSTS_N_INSNS (20);
11233 return false;
11234
11235 default:
11236 return arm_rtx_costs_1 (x, outer_code, total, speed);
11237 }
11238 }
11239
11240
11241 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11242 so it can be ignored. */
11243
11244 static bool
11245 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11246 int *total, bool speed)
11247 {
11248 machine_mode mode = GET_MODE (x);
11249
11250 if (TARGET_THUMB)
11251 {
11252 *total = thumb1_rtx_costs (x, code, outer_code);
11253 return true;
11254 }
11255
11256 switch (code)
11257 {
11258 case COMPARE:
11259 if (GET_CODE (XEXP (x, 0)) != MULT)
11260 return arm_rtx_costs_1 (x, outer_code, total, speed);
11261
11262 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11263 will stall until the multiplication is complete. */
11264 *total = COSTS_N_INSNS (3);
11265 return false;
11266
11267 case MULT:
11268 /* There is no point basing this on the tuning, since it is always the
11269 fast variant if it exists at all. */
11270 if (mode == DImode
11271 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11272 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11273 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11274 {
11275 *total = COSTS_N_INSNS (2);
11276 return false;
11277 }
11278
11279
11280 if (mode == DImode)
11281 {
11282 *total = COSTS_N_INSNS (5);
11283 return false;
11284 }
11285
11286 if (CONST_INT_P (XEXP (x, 1)))
11287 {
11288 /* If operand 1 is a constant we can more accurately
11289 calculate the cost of the multiply. The multiplier can
11290 retire 15 bits on the first cycle and a further 12 on the
11291 second. We do, of course, have to load the constant into
11292 a register first. */
11293 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11294 /* There's a general overhead of one cycle. */
11295 int cost = 1;
11296 unsigned HOST_WIDE_INT masked_const;
11297
11298 if (i & 0x80000000)
11299 i = ~i;
11300
11301 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11302
11303 masked_const = i & 0xffff8000;
11304 if (masked_const != 0)
11305 {
11306 cost++;
11307 masked_const = i & 0xf8000000;
11308 if (masked_const != 0)
11309 cost++;
11310 }
11311 *total = COSTS_N_INSNS (cost);
11312 return false;
11313 }
11314
11315 if (mode == SImode)
11316 {
11317 *total = COSTS_N_INSNS (3);
11318 return false;
11319 }
11320
11321 /* Requires a lib call */
11322 *total = COSTS_N_INSNS (20);
11323 return false;
11324
11325 default:
11326 return arm_rtx_costs_1 (x, outer_code, total, speed);
11327 }
11328 }
11329
11330
11331 /* RTX costs for 9e (and later) cores. */
11332
11333 static bool
11334 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11335 int *total, bool speed)
11336 {
11337 machine_mode mode = GET_MODE (x);
11338
11339 if (TARGET_THUMB1)
11340 {
11341 switch (code)
11342 {
11343 case MULT:
11344 /* Small multiply: 32 cycles for an integer multiply inst. */
11345 if (arm_arch6m && arm_m_profile_small_mul)
11346 *total = COSTS_N_INSNS (32);
11347 else
11348 *total = COSTS_N_INSNS (3);
11349 return true;
11350
11351 default:
11352 *total = thumb1_rtx_costs (x, code, outer_code);
11353 return true;
11354 }
11355 }
11356
11357 switch (code)
11358 {
11359 case MULT:
11360 /* There is no point basing this on the tuning, since it is always the
11361 fast variant if it exists at all. */
11362 if (mode == DImode
11363 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11364 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11365 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11366 {
11367 *total = COSTS_N_INSNS (2);
11368 return false;
11369 }
11370
11371
11372 if (mode == DImode)
11373 {
11374 *total = COSTS_N_INSNS (5);
11375 return false;
11376 }
11377
11378 if (mode == SImode)
11379 {
11380 *total = COSTS_N_INSNS (2);
11381 return false;
11382 }
11383
11384 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11385 {
11386 if (TARGET_HARD_FLOAT
11387 && (mode == SFmode
11388 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11389 {
11390 *total = COSTS_N_INSNS (1);
11391 return false;
11392 }
11393 }
11394
11395 *total = COSTS_N_INSNS (20);
11396 return false;
11397
11398 default:
11399 return arm_rtx_costs_1 (x, outer_code, total, speed);
11400 }
11401 }
11402 /* All address computations that can be done are free, but rtx cost returns
11403 the same for practically all of them. So we weight the different types
11404 of address here in the order (most pref first):
11405 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11406 static inline int
11407 arm_arm_address_cost (rtx x)
11408 {
11409 enum rtx_code c = GET_CODE (x);
11410
11411 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11412 return 0;
11413 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11414 return 10;
11415
11416 if (c == PLUS)
11417 {
11418 if (CONST_INT_P (XEXP (x, 1)))
11419 return 2;
11420
11421 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11422 return 3;
11423
11424 return 4;
11425 }
11426
11427 return 6;
11428 }
11429
11430 static inline int
11431 arm_thumb_address_cost (rtx x)
11432 {
11433 enum rtx_code c = GET_CODE (x);
11434
11435 if (c == REG)
11436 return 1;
11437 if (c == PLUS
11438 && REG_P (XEXP (x, 0))
11439 && CONST_INT_P (XEXP (x, 1)))
11440 return 1;
11441
11442 return 2;
11443 }
11444
11445 static int
11446 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11447 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11448 {
11449 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11450 }
11451
11452 /* Adjust cost hook for XScale. */
11453 static bool
11454 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11455 {
11456 /* Some true dependencies can have a higher cost depending
11457 on precisely how certain input operands are used. */
11458 if (REG_NOTE_KIND(link) == 0
11459 && recog_memoized (insn) >= 0
11460 && recog_memoized (dep) >= 0)
11461 {
11462 int shift_opnum = get_attr_shift (insn);
11463 enum attr_type attr_type = get_attr_type (dep);
11464
11465 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11466 operand for INSN. If we have a shifted input operand and the
11467 instruction we depend on is another ALU instruction, then we may
11468 have to account for an additional stall. */
11469 if (shift_opnum != 0
11470 && (attr_type == TYPE_ALU_SHIFT_IMM
11471 || attr_type == TYPE_ALUS_SHIFT_IMM
11472 || attr_type == TYPE_LOGIC_SHIFT_IMM
11473 || attr_type == TYPE_LOGICS_SHIFT_IMM
11474 || attr_type == TYPE_ALU_SHIFT_REG
11475 || attr_type == TYPE_ALUS_SHIFT_REG
11476 || attr_type == TYPE_LOGIC_SHIFT_REG
11477 || attr_type == TYPE_LOGICS_SHIFT_REG
11478 || attr_type == TYPE_MOV_SHIFT
11479 || attr_type == TYPE_MVN_SHIFT
11480 || attr_type == TYPE_MOV_SHIFT_REG
11481 || attr_type == TYPE_MVN_SHIFT_REG))
11482 {
11483 rtx shifted_operand;
11484 int opno;
11485
11486 /* Get the shifted operand. */
11487 extract_insn (insn);
11488 shifted_operand = recog_data.operand[shift_opnum];
11489
11490 /* Iterate over all the operands in DEP. If we write an operand
11491 that overlaps with SHIFTED_OPERAND, then we have increase the
11492 cost of this dependency. */
11493 extract_insn (dep);
11494 preprocess_constraints (dep);
11495 for (opno = 0; opno < recog_data.n_operands; opno++)
11496 {
11497 /* We can ignore strict inputs. */
11498 if (recog_data.operand_type[opno] == OP_IN)
11499 continue;
11500
11501 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11502 shifted_operand))
11503 {
11504 *cost = 2;
11505 return false;
11506 }
11507 }
11508 }
11509 }
11510 return true;
11511 }
11512
11513 /* Adjust cost hook for Cortex A9. */
11514 static bool
11515 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11516 {
11517 switch (REG_NOTE_KIND (link))
11518 {
11519 case REG_DEP_ANTI:
11520 *cost = 0;
11521 return false;
11522
11523 case REG_DEP_TRUE:
11524 case REG_DEP_OUTPUT:
11525 if (recog_memoized (insn) >= 0
11526 && recog_memoized (dep) >= 0)
11527 {
11528 if (GET_CODE (PATTERN (insn)) == SET)
11529 {
11530 if (GET_MODE_CLASS
11531 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11532 || GET_MODE_CLASS
11533 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11534 {
11535 enum attr_type attr_type_insn = get_attr_type (insn);
11536 enum attr_type attr_type_dep = get_attr_type (dep);
11537
11538 /* By default all dependencies of the form
11539 s0 = s0 <op> s1
11540 s0 = s0 <op> s2
11541 have an extra latency of 1 cycle because
11542 of the input and output dependency in this
11543 case. However this gets modeled as an true
11544 dependency and hence all these checks. */
11545 if (REG_P (SET_DEST (PATTERN (insn)))
11546 && REG_P (SET_DEST (PATTERN (dep)))
11547 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11548 SET_DEST (PATTERN (dep))))
11549 {
11550 /* FMACS is a special case where the dependent
11551 instruction can be issued 3 cycles before
11552 the normal latency in case of an output
11553 dependency. */
11554 if ((attr_type_insn == TYPE_FMACS
11555 || attr_type_insn == TYPE_FMACD)
11556 && (attr_type_dep == TYPE_FMACS
11557 || attr_type_dep == TYPE_FMACD))
11558 {
11559 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11560 *cost = insn_default_latency (dep) - 3;
11561 else
11562 *cost = insn_default_latency (dep);
11563 return false;
11564 }
11565 else
11566 {
11567 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11568 *cost = insn_default_latency (dep) + 1;
11569 else
11570 *cost = insn_default_latency (dep);
11571 }
11572 return false;
11573 }
11574 }
11575 }
11576 }
11577 break;
11578
11579 default:
11580 gcc_unreachable ();
11581 }
11582
11583 return true;
11584 }
11585
11586 /* Adjust cost hook for FA726TE. */
11587 static bool
11588 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11589 {
11590 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11591 have penalty of 3. */
11592 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11593 && recog_memoized (insn) >= 0
11594 && recog_memoized (dep) >= 0
11595 && get_attr_conds (dep) == CONDS_SET)
11596 {
11597 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11598 if (get_attr_conds (insn) == CONDS_USE
11599 && get_attr_type (insn) != TYPE_BRANCH)
11600 {
11601 *cost = 3;
11602 return false;
11603 }
11604
11605 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11606 || get_attr_conds (insn) == CONDS_USE)
11607 {
11608 *cost = 0;
11609 return false;
11610 }
11611 }
11612
11613 return true;
11614 }
11615
11616 /* Implement TARGET_REGISTER_MOVE_COST.
11617
11618 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11619 it is typically more expensive than a single memory access. We set
11620 the cost to less than two memory accesses so that floating
11621 point to integer conversion does not go through memory. */
11622
11623 int
11624 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11625 reg_class_t from, reg_class_t to)
11626 {
11627 if (TARGET_32BIT)
11628 {
11629 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11630 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11631 return 15;
11632 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11633 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11634 return 4;
11635 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11636 return 20;
11637 else
11638 return 2;
11639 }
11640 else
11641 {
11642 if (from == HI_REGS || to == HI_REGS)
11643 return 4;
11644 else
11645 return 2;
11646 }
11647 }
11648
11649 /* Implement TARGET_MEMORY_MOVE_COST. */
11650
11651 int
11652 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11653 bool in ATTRIBUTE_UNUSED)
11654 {
11655 if (TARGET_32BIT)
11656 return 10;
11657 else
11658 {
11659 if (GET_MODE_SIZE (mode) < 4)
11660 return 8;
11661 else
11662 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11663 }
11664 }
11665
11666 /* Vectorizer cost model implementation. */
11667
11668 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11669 static int
11670 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11671 tree vectype,
11672 int misalign ATTRIBUTE_UNUSED)
11673 {
11674 unsigned elements;
11675
11676 switch (type_of_cost)
11677 {
11678 case scalar_stmt:
11679 return current_tune->vec_costs->scalar_stmt_cost;
11680
11681 case scalar_load:
11682 return current_tune->vec_costs->scalar_load_cost;
11683
11684 case scalar_store:
11685 return current_tune->vec_costs->scalar_store_cost;
11686
11687 case vector_stmt:
11688 return current_tune->vec_costs->vec_stmt_cost;
11689
11690 case vector_load:
11691 return current_tune->vec_costs->vec_align_load_cost;
11692
11693 case vector_store:
11694 return current_tune->vec_costs->vec_store_cost;
11695
11696 case vec_to_scalar:
11697 return current_tune->vec_costs->vec_to_scalar_cost;
11698
11699 case scalar_to_vec:
11700 return current_tune->vec_costs->scalar_to_vec_cost;
11701
11702 case unaligned_load:
11703 return current_tune->vec_costs->vec_unalign_load_cost;
11704
11705 case unaligned_store:
11706 return current_tune->vec_costs->vec_unalign_store_cost;
11707
11708 case cond_branch_taken:
11709 return current_tune->vec_costs->cond_taken_branch_cost;
11710
11711 case cond_branch_not_taken:
11712 return current_tune->vec_costs->cond_not_taken_branch_cost;
11713
11714 case vec_perm:
11715 case vec_promote_demote:
11716 return current_tune->vec_costs->vec_stmt_cost;
11717
11718 case vec_construct:
11719 elements = TYPE_VECTOR_SUBPARTS (vectype);
11720 return elements / 2 + 1;
11721
11722 default:
11723 gcc_unreachable ();
11724 }
11725 }
11726
11727 /* Implement targetm.vectorize.add_stmt_cost. */
11728
11729 static unsigned
11730 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11731 struct _stmt_vec_info *stmt_info, int misalign,
11732 enum vect_cost_model_location where)
11733 {
11734 unsigned *cost = (unsigned *) data;
11735 unsigned retval = 0;
11736
11737 if (flag_vect_cost_model)
11738 {
11739 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11740 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11741
11742 /* Statements in an inner loop relative to the loop being
11743 vectorized are weighted more heavily. The value here is
11744 arbitrary and could potentially be improved with analysis. */
11745 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11746 count *= 50; /* FIXME. */
11747
11748 retval = (unsigned) (count * stmt_cost);
11749 cost[where] += retval;
11750 }
11751
11752 return retval;
11753 }
11754
11755 /* Return true if and only if this insn can dual-issue only as older. */
11756 static bool
11757 cortexa7_older_only (rtx_insn *insn)
11758 {
11759 if (recog_memoized (insn) < 0)
11760 return false;
11761
11762 switch (get_attr_type (insn))
11763 {
11764 case TYPE_ALU_DSP_REG:
11765 case TYPE_ALU_SREG:
11766 case TYPE_ALUS_SREG:
11767 case TYPE_LOGIC_REG:
11768 case TYPE_LOGICS_REG:
11769 case TYPE_ADC_REG:
11770 case TYPE_ADCS_REG:
11771 case TYPE_ADR:
11772 case TYPE_BFM:
11773 case TYPE_REV:
11774 case TYPE_MVN_REG:
11775 case TYPE_SHIFT_IMM:
11776 case TYPE_SHIFT_REG:
11777 case TYPE_LOAD_BYTE:
11778 case TYPE_LOAD1:
11779 case TYPE_STORE1:
11780 case TYPE_FFARITHS:
11781 case TYPE_FADDS:
11782 case TYPE_FFARITHD:
11783 case TYPE_FADDD:
11784 case TYPE_FMOV:
11785 case TYPE_F_CVT:
11786 case TYPE_FCMPS:
11787 case TYPE_FCMPD:
11788 case TYPE_FCONSTS:
11789 case TYPE_FCONSTD:
11790 case TYPE_FMULS:
11791 case TYPE_FMACS:
11792 case TYPE_FMULD:
11793 case TYPE_FMACD:
11794 case TYPE_FDIVS:
11795 case TYPE_FDIVD:
11796 case TYPE_F_MRC:
11797 case TYPE_F_MRRC:
11798 case TYPE_F_FLAG:
11799 case TYPE_F_LOADS:
11800 case TYPE_F_STORES:
11801 return true;
11802 default:
11803 return false;
11804 }
11805 }
11806
11807 /* Return true if and only if this insn can dual-issue as younger. */
11808 static bool
11809 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11810 {
11811 if (recog_memoized (insn) < 0)
11812 {
11813 if (verbose > 5)
11814 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11815 return false;
11816 }
11817
11818 switch (get_attr_type (insn))
11819 {
11820 case TYPE_ALU_IMM:
11821 case TYPE_ALUS_IMM:
11822 case TYPE_LOGIC_IMM:
11823 case TYPE_LOGICS_IMM:
11824 case TYPE_EXTEND:
11825 case TYPE_MVN_IMM:
11826 case TYPE_MOV_IMM:
11827 case TYPE_MOV_REG:
11828 case TYPE_MOV_SHIFT:
11829 case TYPE_MOV_SHIFT_REG:
11830 case TYPE_BRANCH:
11831 case TYPE_CALL:
11832 return true;
11833 default:
11834 return false;
11835 }
11836 }
11837
11838
11839 /* Look for an instruction that can dual issue only as an older
11840 instruction, and move it in front of any instructions that can
11841 dual-issue as younger, while preserving the relative order of all
11842 other instructions in the ready list. This is a hueuristic to help
11843 dual-issue in later cycles, by postponing issue of more flexible
11844 instructions. This heuristic may affect dual issue opportunities
11845 in the current cycle. */
11846 static void
11847 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11848 int *n_readyp, int clock)
11849 {
11850 int i;
11851 int first_older_only = -1, first_younger = -1;
11852
11853 if (verbose > 5)
11854 fprintf (file,
11855 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11856 clock,
11857 *n_readyp);
11858
11859 /* Traverse the ready list from the head (the instruction to issue
11860 first), and looking for the first instruction that can issue as
11861 younger and the first instruction that can dual-issue only as
11862 older. */
11863 for (i = *n_readyp - 1; i >= 0; i--)
11864 {
11865 rtx_insn *insn = ready[i];
11866 if (cortexa7_older_only (insn))
11867 {
11868 first_older_only = i;
11869 if (verbose > 5)
11870 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11871 break;
11872 }
11873 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11874 first_younger = i;
11875 }
11876
11877 /* Nothing to reorder because either no younger insn found or insn
11878 that can dual-issue only as older appears before any insn that
11879 can dual-issue as younger. */
11880 if (first_younger == -1)
11881 {
11882 if (verbose > 5)
11883 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11884 return;
11885 }
11886
11887 /* Nothing to reorder because no older-only insn in the ready list. */
11888 if (first_older_only == -1)
11889 {
11890 if (verbose > 5)
11891 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11892 return;
11893 }
11894
11895 /* Move first_older_only insn before first_younger. */
11896 if (verbose > 5)
11897 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11898 INSN_UID(ready [first_older_only]),
11899 INSN_UID(ready [first_younger]));
11900 rtx_insn *first_older_only_insn = ready [first_older_only];
11901 for (i = first_older_only; i < first_younger; i++)
11902 {
11903 ready[i] = ready[i+1];
11904 }
11905
11906 ready[i] = first_older_only_insn;
11907 return;
11908 }
11909
11910 /* Implement TARGET_SCHED_REORDER. */
11911 static int
11912 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11913 int clock)
11914 {
11915 switch (arm_tune)
11916 {
11917 case cortexa7:
11918 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11919 break;
11920 default:
11921 /* Do nothing for other cores. */
11922 break;
11923 }
11924
11925 return arm_issue_rate ();
11926 }
11927
11928 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11929 It corrects the value of COST based on the relationship between
11930 INSN and DEP through the dependence LINK. It returns the new
11931 value. There is a per-core adjust_cost hook to adjust scheduler costs
11932 and the per-core hook can choose to completely override the generic
11933 adjust_cost function. Only put bits of code into arm_adjust_cost that
11934 are common across all cores. */
11935 static int
11936 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11937 {
11938 rtx i_pat, d_pat;
11939
11940 /* When generating Thumb-1 code, we want to place flag-setting operations
11941 close to a conditional branch which depends on them, so that we can
11942 omit the comparison. */
11943 if (TARGET_THUMB1
11944 && REG_NOTE_KIND (link) == 0
11945 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11946 && recog_memoized (dep) >= 0
11947 && get_attr_conds (dep) == CONDS_SET)
11948 return 0;
11949
11950 if (current_tune->sched_adjust_cost != NULL)
11951 {
11952 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11953 return cost;
11954 }
11955
11956 /* XXX Is this strictly true? */
11957 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11958 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11959 return 0;
11960
11961 /* Call insns don't incur a stall, even if they follow a load. */
11962 if (REG_NOTE_KIND (link) == 0
11963 && CALL_P (insn))
11964 return 1;
11965
11966 if ((i_pat = single_set (insn)) != NULL
11967 && MEM_P (SET_SRC (i_pat))
11968 && (d_pat = single_set (dep)) != NULL
11969 && MEM_P (SET_DEST (d_pat)))
11970 {
11971 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11972 /* This is a load after a store, there is no conflict if the load reads
11973 from a cached area. Assume that loads from the stack, and from the
11974 constant pool are cached, and that others will miss. This is a
11975 hack. */
11976
11977 if ((GET_CODE (src_mem) == SYMBOL_REF
11978 && CONSTANT_POOL_ADDRESS_P (src_mem))
11979 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11980 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11981 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11982 return 1;
11983 }
11984
11985 return cost;
11986 }
11987
11988 int
11989 arm_max_conditional_execute (void)
11990 {
11991 return max_insns_skipped;
11992 }
11993
11994 static int
11995 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11996 {
11997 if (TARGET_32BIT)
11998 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11999 else
12000 return (optimize > 0) ? 2 : 0;
12001 }
12002
12003 static int
12004 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12005 {
12006 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12007 }
12008
12009 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12010 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12011 sequences of non-executed instructions in IT blocks probably take the same
12012 amount of time as executed instructions (and the IT instruction itself takes
12013 space in icache). This function was experimentally determined to give good
12014 results on a popular embedded benchmark. */
12015
12016 static int
12017 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12018 {
12019 return (TARGET_32BIT && speed_p) ? 1
12020 : arm_default_branch_cost (speed_p, predictable_p);
12021 }
12022
12023 static int
12024 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12025 {
12026 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12027 }
12028
12029 static bool fp_consts_inited = false;
12030
12031 static REAL_VALUE_TYPE value_fp0;
12032
12033 static void
12034 init_fp_table (void)
12035 {
12036 REAL_VALUE_TYPE r;
12037
12038 r = REAL_VALUE_ATOF ("0", DFmode);
12039 value_fp0 = r;
12040 fp_consts_inited = true;
12041 }
12042
12043 /* Return TRUE if rtx X is a valid immediate FP constant. */
12044 int
12045 arm_const_double_rtx (rtx x)
12046 {
12047 REAL_VALUE_TYPE r;
12048
12049 if (!fp_consts_inited)
12050 init_fp_table ();
12051
12052 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12053 if (REAL_VALUE_MINUS_ZERO (r))
12054 return 0;
12055
12056 if (REAL_VALUES_EQUAL (r, value_fp0))
12057 return 1;
12058
12059 return 0;
12060 }
12061
12062 /* VFPv3 has a fairly wide range of representable immediates, formed from
12063 "quarter-precision" floating-point values. These can be evaluated using this
12064 formula (with ^ for exponentiation):
12065
12066 -1^s * n * 2^-r
12067
12068 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12069 16 <= n <= 31 and 0 <= r <= 7.
12070
12071 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12072
12073 - A (most-significant) is the sign bit.
12074 - BCD are the exponent (encoded as r XOR 3).
12075 - EFGH are the mantissa (encoded as n - 16).
12076 */
12077
12078 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12079 fconst[sd] instruction, or -1 if X isn't suitable. */
12080 static int
12081 vfp3_const_double_index (rtx x)
12082 {
12083 REAL_VALUE_TYPE r, m;
12084 int sign, exponent;
12085 unsigned HOST_WIDE_INT mantissa, mant_hi;
12086 unsigned HOST_WIDE_INT mask;
12087 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12088 bool fail;
12089
12090 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12091 return -1;
12092
12093 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12094
12095 /* We can't represent these things, so detect them first. */
12096 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12097 return -1;
12098
12099 /* Extract sign, exponent and mantissa. */
12100 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12101 r = real_value_abs (&r);
12102 exponent = REAL_EXP (&r);
12103 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12104 highest (sign) bit, with a fixed binary point at bit point_pos.
12105 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12106 bits for the mantissa, this may fail (low bits would be lost). */
12107 real_ldexp (&m, &r, point_pos - exponent);
12108 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12109 mantissa = w.elt (0);
12110 mant_hi = w.elt (1);
12111
12112 /* If there are bits set in the low part of the mantissa, we can't
12113 represent this value. */
12114 if (mantissa != 0)
12115 return -1;
12116
12117 /* Now make it so that mantissa contains the most-significant bits, and move
12118 the point_pos to indicate that the least-significant bits have been
12119 discarded. */
12120 point_pos -= HOST_BITS_PER_WIDE_INT;
12121 mantissa = mant_hi;
12122
12123 /* We can permit four significant bits of mantissa only, plus a high bit
12124 which is always 1. */
12125 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12126 if ((mantissa & mask) != 0)
12127 return -1;
12128
12129 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12130 mantissa >>= point_pos - 5;
12131
12132 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12133 floating-point immediate zero with Neon using an integer-zero load, but
12134 that case is handled elsewhere.) */
12135 if (mantissa == 0)
12136 return -1;
12137
12138 gcc_assert (mantissa >= 16 && mantissa <= 31);
12139
12140 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12141 normalized significands are in the range [1, 2). (Our mantissa is shifted
12142 left 4 places at this point relative to normalized IEEE754 values). GCC
12143 internally uses [0.5, 1) (see real.c), so the exponent returned from
12144 REAL_EXP must be altered. */
12145 exponent = 5 - exponent;
12146
12147 if (exponent < 0 || exponent > 7)
12148 return -1;
12149
12150 /* Sign, mantissa and exponent are now in the correct form to plug into the
12151 formula described in the comment above. */
12152 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12153 }
12154
12155 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12156 int
12157 vfp3_const_double_rtx (rtx x)
12158 {
12159 if (!TARGET_VFP3)
12160 return 0;
12161
12162 return vfp3_const_double_index (x) != -1;
12163 }
12164
12165 /* Recognize immediates which can be used in various Neon instructions. Legal
12166 immediates are described by the following table (for VMVN variants, the
12167 bitwise inverse of the constant shown is recognized. In either case, VMOV
12168 is output and the correct instruction to use for a given constant is chosen
12169 by the assembler). The constant shown is replicated across all elements of
12170 the destination vector.
12171
12172 insn elems variant constant (binary)
12173 ---- ----- ------- -----------------
12174 vmov i32 0 00000000 00000000 00000000 abcdefgh
12175 vmov i32 1 00000000 00000000 abcdefgh 00000000
12176 vmov i32 2 00000000 abcdefgh 00000000 00000000
12177 vmov i32 3 abcdefgh 00000000 00000000 00000000
12178 vmov i16 4 00000000 abcdefgh
12179 vmov i16 5 abcdefgh 00000000
12180 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12181 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12182 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12183 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12184 vmvn i16 10 00000000 abcdefgh
12185 vmvn i16 11 abcdefgh 00000000
12186 vmov i32 12 00000000 00000000 abcdefgh 11111111
12187 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12188 vmov i32 14 00000000 abcdefgh 11111111 11111111
12189 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12190 vmov i8 16 abcdefgh
12191 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12192 eeeeeeee ffffffff gggggggg hhhhhhhh
12193 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12194 vmov f32 19 00000000 00000000 00000000 00000000
12195
12196 For case 18, B = !b. Representable values are exactly those accepted by
12197 vfp3_const_double_index, but are output as floating-point numbers rather
12198 than indices.
12199
12200 For case 19, we will change it to vmov.i32 when assembling.
12201
12202 Variants 0-5 (inclusive) may also be used as immediates for the second
12203 operand of VORR/VBIC instructions.
12204
12205 The INVERSE argument causes the bitwise inverse of the given operand to be
12206 recognized instead (used for recognizing legal immediates for the VAND/VORN
12207 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12208 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12209 output, rather than the real insns vbic/vorr).
12210
12211 INVERSE makes no difference to the recognition of float vectors.
12212
12213 The return value is the variant of immediate as shown in the above table, or
12214 -1 if the given value doesn't match any of the listed patterns.
12215 */
12216 static int
12217 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12218 rtx *modconst, int *elementwidth)
12219 {
12220 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12221 matches = 1; \
12222 for (i = 0; i < idx; i += (STRIDE)) \
12223 if (!(TEST)) \
12224 matches = 0; \
12225 if (matches) \
12226 { \
12227 immtype = (CLASS); \
12228 elsize = (ELSIZE); \
12229 break; \
12230 }
12231
12232 unsigned int i, elsize = 0, idx = 0, n_elts;
12233 unsigned int innersize;
12234 unsigned char bytes[16];
12235 int immtype = -1, matches;
12236 unsigned int invmask = inverse ? 0xff : 0;
12237 bool vector = GET_CODE (op) == CONST_VECTOR;
12238
12239 if (vector)
12240 {
12241 n_elts = CONST_VECTOR_NUNITS (op);
12242 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12243 }
12244 else
12245 {
12246 n_elts = 1;
12247 if (mode == VOIDmode)
12248 mode = DImode;
12249 innersize = GET_MODE_SIZE (mode);
12250 }
12251
12252 /* Vectors of float constants. */
12253 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12254 {
12255 rtx el0 = CONST_VECTOR_ELT (op, 0);
12256 REAL_VALUE_TYPE r0;
12257
12258 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12259 return -1;
12260
12261 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12262
12263 for (i = 1; i < n_elts; i++)
12264 {
12265 rtx elt = CONST_VECTOR_ELT (op, i);
12266 REAL_VALUE_TYPE re;
12267
12268 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12269
12270 if (!REAL_VALUES_EQUAL (r0, re))
12271 return -1;
12272 }
12273
12274 if (modconst)
12275 *modconst = CONST_VECTOR_ELT (op, 0);
12276
12277 if (elementwidth)
12278 *elementwidth = 0;
12279
12280 if (el0 == CONST0_RTX (GET_MODE (el0)))
12281 return 19;
12282 else
12283 return 18;
12284 }
12285
12286 /* Splat vector constant out into a byte vector. */
12287 for (i = 0; i < n_elts; i++)
12288 {
12289 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12290 unsigned HOST_WIDE_INT elpart;
12291 unsigned int part, parts;
12292
12293 if (CONST_INT_P (el))
12294 {
12295 elpart = INTVAL (el);
12296 parts = 1;
12297 }
12298 else if (CONST_DOUBLE_P (el))
12299 {
12300 elpart = CONST_DOUBLE_LOW (el);
12301 parts = 2;
12302 }
12303 else
12304 gcc_unreachable ();
12305
12306 for (part = 0; part < parts; part++)
12307 {
12308 unsigned int byte;
12309 for (byte = 0; byte < innersize; byte++)
12310 {
12311 bytes[idx++] = (elpart & 0xff) ^ invmask;
12312 elpart >>= BITS_PER_UNIT;
12313 }
12314 if (CONST_DOUBLE_P (el))
12315 elpart = CONST_DOUBLE_HIGH (el);
12316 }
12317 }
12318
12319 /* Sanity check. */
12320 gcc_assert (idx == GET_MODE_SIZE (mode));
12321
12322 do
12323 {
12324 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12325 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12326
12327 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12328 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12329
12330 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12331 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12332
12333 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12334 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12335
12336 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12337
12338 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12339
12340 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12341 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12342
12343 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12344 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12345
12346 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12347 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12348
12349 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12350 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12351
12352 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12353
12354 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12355
12356 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12357 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12358
12359 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12360 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12361
12362 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12363 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12364
12365 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12366 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12367
12368 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12369
12370 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12371 && bytes[i] == bytes[(i + 8) % idx]);
12372 }
12373 while (0);
12374
12375 if (immtype == -1)
12376 return -1;
12377
12378 if (elementwidth)
12379 *elementwidth = elsize;
12380
12381 if (modconst)
12382 {
12383 unsigned HOST_WIDE_INT imm = 0;
12384
12385 /* Un-invert bytes of recognized vector, if necessary. */
12386 if (invmask != 0)
12387 for (i = 0; i < idx; i++)
12388 bytes[i] ^= invmask;
12389
12390 if (immtype == 17)
12391 {
12392 /* FIXME: Broken on 32-bit H_W_I hosts. */
12393 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12394
12395 for (i = 0; i < 8; i++)
12396 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12397 << (i * BITS_PER_UNIT);
12398
12399 *modconst = GEN_INT (imm);
12400 }
12401 else
12402 {
12403 unsigned HOST_WIDE_INT imm = 0;
12404
12405 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12406 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12407
12408 *modconst = GEN_INT (imm);
12409 }
12410 }
12411
12412 return immtype;
12413 #undef CHECK
12414 }
12415
12416 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12417 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12418 float elements), and a modified constant (whatever should be output for a
12419 VMOV) in *MODCONST. */
12420
12421 int
12422 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12423 rtx *modconst, int *elementwidth)
12424 {
12425 rtx tmpconst;
12426 int tmpwidth;
12427 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12428
12429 if (retval == -1)
12430 return 0;
12431
12432 if (modconst)
12433 *modconst = tmpconst;
12434
12435 if (elementwidth)
12436 *elementwidth = tmpwidth;
12437
12438 return 1;
12439 }
12440
12441 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12442 the immediate is valid, write a constant suitable for using as an operand
12443 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12444 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12445
12446 int
12447 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12448 rtx *modconst, int *elementwidth)
12449 {
12450 rtx tmpconst;
12451 int tmpwidth;
12452 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12453
12454 if (retval < 0 || retval > 5)
12455 return 0;
12456
12457 if (modconst)
12458 *modconst = tmpconst;
12459
12460 if (elementwidth)
12461 *elementwidth = tmpwidth;
12462
12463 return 1;
12464 }
12465
12466 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12467 the immediate is valid, write a constant suitable for using as an operand
12468 to VSHR/VSHL to *MODCONST and the corresponding element width to
12469 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12470 because they have different limitations. */
12471
12472 int
12473 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12474 rtx *modconst, int *elementwidth,
12475 bool isleftshift)
12476 {
12477 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12478 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12479 unsigned HOST_WIDE_INT last_elt = 0;
12480 unsigned HOST_WIDE_INT maxshift;
12481
12482 /* Split vector constant out into a byte vector. */
12483 for (i = 0; i < n_elts; i++)
12484 {
12485 rtx el = CONST_VECTOR_ELT (op, i);
12486 unsigned HOST_WIDE_INT elpart;
12487
12488 if (CONST_INT_P (el))
12489 elpart = INTVAL (el);
12490 else if (CONST_DOUBLE_P (el))
12491 return 0;
12492 else
12493 gcc_unreachable ();
12494
12495 if (i != 0 && elpart != last_elt)
12496 return 0;
12497
12498 last_elt = elpart;
12499 }
12500
12501 /* Shift less than element size. */
12502 maxshift = innersize * 8;
12503
12504 if (isleftshift)
12505 {
12506 /* Left shift immediate value can be from 0 to <size>-1. */
12507 if (last_elt >= maxshift)
12508 return 0;
12509 }
12510 else
12511 {
12512 /* Right shift immediate value can be from 1 to <size>. */
12513 if (last_elt == 0 || last_elt > maxshift)
12514 return 0;
12515 }
12516
12517 if (elementwidth)
12518 *elementwidth = innersize * 8;
12519
12520 if (modconst)
12521 *modconst = CONST_VECTOR_ELT (op, 0);
12522
12523 return 1;
12524 }
12525
12526 /* Return a string suitable for output of Neon immediate logic operation
12527 MNEM. */
12528
12529 char *
12530 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12531 int inverse, int quad)
12532 {
12533 int width, is_valid;
12534 static char templ[40];
12535
12536 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12537
12538 gcc_assert (is_valid != 0);
12539
12540 if (quad)
12541 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12542 else
12543 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12544
12545 return templ;
12546 }
12547
12548 /* Return a string suitable for output of Neon immediate shift operation
12549 (VSHR or VSHL) MNEM. */
12550
12551 char *
12552 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12553 machine_mode mode, int quad,
12554 bool isleftshift)
12555 {
12556 int width, is_valid;
12557 static char templ[40];
12558
12559 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12560 gcc_assert (is_valid != 0);
12561
12562 if (quad)
12563 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12564 else
12565 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12566
12567 return templ;
12568 }
12569
12570 /* Output a sequence of pairwise operations to implement a reduction.
12571 NOTE: We do "too much work" here, because pairwise operations work on two
12572 registers-worth of operands in one go. Unfortunately we can't exploit those
12573 extra calculations to do the full operation in fewer steps, I don't think.
12574 Although all vector elements of the result but the first are ignored, we
12575 actually calculate the same result in each of the elements. An alternative
12576 such as initially loading a vector with zero to use as each of the second
12577 operands would use up an additional register and take an extra instruction,
12578 for no particular gain. */
12579
12580 void
12581 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12582 rtx (*reduc) (rtx, rtx, rtx))
12583 {
12584 machine_mode inner = GET_MODE_INNER (mode);
12585 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12586 rtx tmpsum = op1;
12587
12588 for (i = parts / 2; i >= 1; i /= 2)
12589 {
12590 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12591 emit_insn (reduc (dest, tmpsum, tmpsum));
12592 tmpsum = dest;
12593 }
12594 }
12595
12596 /* If VALS is a vector constant that can be loaded into a register
12597 using VDUP, generate instructions to do so and return an RTX to
12598 assign to the register. Otherwise return NULL_RTX. */
12599
12600 static rtx
12601 neon_vdup_constant (rtx vals)
12602 {
12603 machine_mode mode = GET_MODE (vals);
12604 machine_mode inner_mode = GET_MODE_INNER (mode);
12605 int n_elts = GET_MODE_NUNITS (mode);
12606 bool all_same = true;
12607 rtx x;
12608 int i;
12609
12610 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12611 return NULL_RTX;
12612
12613 for (i = 0; i < n_elts; ++i)
12614 {
12615 x = XVECEXP (vals, 0, i);
12616 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12617 all_same = false;
12618 }
12619
12620 if (!all_same)
12621 /* The elements are not all the same. We could handle repeating
12622 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12623 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12624 vdup.i16). */
12625 return NULL_RTX;
12626
12627 /* We can load this constant by using VDUP and a constant in a
12628 single ARM register. This will be cheaper than a vector
12629 load. */
12630
12631 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12632 return gen_rtx_VEC_DUPLICATE (mode, x);
12633 }
12634
12635 /* Generate code to load VALS, which is a PARALLEL containing only
12636 constants (for vec_init) or CONST_VECTOR, efficiently into a
12637 register. Returns an RTX to copy into the register, or NULL_RTX
12638 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12639
12640 rtx
12641 neon_make_constant (rtx vals)
12642 {
12643 machine_mode mode = GET_MODE (vals);
12644 rtx target;
12645 rtx const_vec = NULL_RTX;
12646 int n_elts = GET_MODE_NUNITS (mode);
12647 int n_const = 0;
12648 int i;
12649
12650 if (GET_CODE (vals) == CONST_VECTOR)
12651 const_vec = vals;
12652 else if (GET_CODE (vals) == PARALLEL)
12653 {
12654 /* A CONST_VECTOR must contain only CONST_INTs and
12655 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12656 Only store valid constants in a CONST_VECTOR. */
12657 for (i = 0; i < n_elts; ++i)
12658 {
12659 rtx x = XVECEXP (vals, 0, i);
12660 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12661 n_const++;
12662 }
12663 if (n_const == n_elts)
12664 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12665 }
12666 else
12667 gcc_unreachable ();
12668
12669 if (const_vec != NULL
12670 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12671 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12672 return const_vec;
12673 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12674 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12675 pipeline cycle; creating the constant takes one or two ARM
12676 pipeline cycles. */
12677 return target;
12678 else if (const_vec != NULL_RTX)
12679 /* Load from constant pool. On Cortex-A8 this takes two cycles
12680 (for either double or quad vectors). We can not take advantage
12681 of single-cycle VLD1 because we need a PC-relative addressing
12682 mode. */
12683 return const_vec;
12684 else
12685 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12686 We can not construct an initializer. */
12687 return NULL_RTX;
12688 }
12689
12690 /* Initialize vector TARGET to VALS. */
12691
12692 void
12693 neon_expand_vector_init (rtx target, rtx vals)
12694 {
12695 machine_mode mode = GET_MODE (target);
12696 machine_mode inner_mode = GET_MODE_INNER (mode);
12697 int n_elts = GET_MODE_NUNITS (mode);
12698 int n_var = 0, one_var = -1;
12699 bool all_same = true;
12700 rtx x, mem;
12701 int i;
12702
12703 for (i = 0; i < n_elts; ++i)
12704 {
12705 x = XVECEXP (vals, 0, i);
12706 if (!CONSTANT_P (x))
12707 ++n_var, one_var = i;
12708
12709 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12710 all_same = false;
12711 }
12712
12713 if (n_var == 0)
12714 {
12715 rtx constant = neon_make_constant (vals);
12716 if (constant != NULL_RTX)
12717 {
12718 emit_move_insn (target, constant);
12719 return;
12720 }
12721 }
12722
12723 /* Splat a single non-constant element if we can. */
12724 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12725 {
12726 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12727 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12728 return;
12729 }
12730
12731 /* One field is non-constant. Load constant then overwrite varying
12732 field. This is more efficient than using the stack. */
12733 if (n_var == 1)
12734 {
12735 rtx copy = copy_rtx (vals);
12736 rtx index = GEN_INT (one_var);
12737
12738 /* Load constant part of vector, substitute neighboring value for
12739 varying element. */
12740 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12741 neon_expand_vector_init (target, copy);
12742
12743 /* Insert variable. */
12744 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12745 switch (mode)
12746 {
12747 case V8QImode:
12748 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12749 break;
12750 case V16QImode:
12751 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12752 break;
12753 case V4HImode:
12754 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12755 break;
12756 case V8HImode:
12757 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12758 break;
12759 case V2SImode:
12760 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12761 break;
12762 case V4SImode:
12763 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12764 break;
12765 case V2SFmode:
12766 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12767 break;
12768 case V4SFmode:
12769 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12770 break;
12771 case V2DImode:
12772 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12773 break;
12774 default:
12775 gcc_unreachable ();
12776 }
12777 return;
12778 }
12779
12780 /* Construct the vector in memory one field at a time
12781 and load the whole vector. */
12782 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12783 for (i = 0; i < n_elts; i++)
12784 emit_move_insn (adjust_address_nv (mem, inner_mode,
12785 i * GET_MODE_SIZE (inner_mode)),
12786 XVECEXP (vals, 0, i));
12787 emit_move_insn (target, mem);
12788 }
12789
12790 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12791 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12792 reported source locations are bogus. */
12793
12794 static void
12795 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12796 const char *err)
12797 {
12798 HOST_WIDE_INT lane;
12799
12800 gcc_assert (CONST_INT_P (operand));
12801
12802 lane = INTVAL (operand);
12803
12804 if (lane < low || lane >= high)
12805 error (err);
12806 }
12807
12808 /* Bounds-check lanes. */
12809
12810 void
12811 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12812 {
12813 bounds_check (operand, low, high, "lane out of range");
12814 }
12815
12816 /* Bounds-check constants. */
12817
12818 void
12819 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12820 {
12821 bounds_check (operand, low, high, "constant out of range");
12822 }
12823
12824 HOST_WIDE_INT
12825 neon_element_bits (machine_mode mode)
12826 {
12827 if (mode == DImode)
12828 return GET_MODE_BITSIZE (mode);
12829 else
12830 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12831 }
12832
12833 \f
12834 /* Predicates for `match_operand' and `match_operator'. */
12835
12836 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12837 WB is true if full writeback address modes are allowed and is false
12838 if limited writeback address modes (POST_INC and PRE_DEC) are
12839 allowed. */
12840
12841 int
12842 arm_coproc_mem_operand (rtx op, bool wb)
12843 {
12844 rtx ind;
12845
12846 /* Reject eliminable registers. */
12847 if (! (reload_in_progress || reload_completed || lra_in_progress)
12848 && ( reg_mentioned_p (frame_pointer_rtx, op)
12849 || reg_mentioned_p (arg_pointer_rtx, op)
12850 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12851 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12852 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12853 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12854 return FALSE;
12855
12856 /* Constants are converted into offsets from labels. */
12857 if (!MEM_P (op))
12858 return FALSE;
12859
12860 ind = XEXP (op, 0);
12861
12862 if (reload_completed
12863 && (GET_CODE (ind) == LABEL_REF
12864 || (GET_CODE (ind) == CONST
12865 && GET_CODE (XEXP (ind, 0)) == PLUS
12866 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12867 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12868 return TRUE;
12869
12870 /* Match: (mem (reg)). */
12871 if (REG_P (ind))
12872 return arm_address_register_rtx_p (ind, 0);
12873
12874 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12875 acceptable in any case (subject to verification by
12876 arm_address_register_rtx_p). We need WB to be true to accept
12877 PRE_INC and POST_DEC. */
12878 if (GET_CODE (ind) == POST_INC
12879 || GET_CODE (ind) == PRE_DEC
12880 || (wb
12881 && (GET_CODE (ind) == PRE_INC
12882 || GET_CODE (ind) == POST_DEC)))
12883 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12884
12885 if (wb
12886 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12887 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12888 && GET_CODE (XEXP (ind, 1)) == PLUS
12889 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12890 ind = XEXP (ind, 1);
12891
12892 /* Match:
12893 (plus (reg)
12894 (const)). */
12895 if (GET_CODE (ind) == PLUS
12896 && REG_P (XEXP (ind, 0))
12897 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12898 && CONST_INT_P (XEXP (ind, 1))
12899 && INTVAL (XEXP (ind, 1)) > -1024
12900 && INTVAL (XEXP (ind, 1)) < 1024
12901 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12902 return TRUE;
12903
12904 return FALSE;
12905 }
12906
12907 /* Return TRUE if OP is a memory operand which we can load or store a vector
12908 to/from. TYPE is one of the following values:
12909 0 - Vector load/stor (vldr)
12910 1 - Core registers (ldm)
12911 2 - Element/structure loads (vld1)
12912 */
12913 int
12914 neon_vector_mem_operand (rtx op, int type, bool strict)
12915 {
12916 rtx ind;
12917
12918 /* Reject eliminable registers. */
12919 if (! (reload_in_progress || reload_completed)
12920 && ( reg_mentioned_p (frame_pointer_rtx, op)
12921 || reg_mentioned_p (arg_pointer_rtx, op)
12922 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12923 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12924 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12925 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12926 return !strict;
12927
12928 /* Constants are converted into offsets from labels. */
12929 if (!MEM_P (op))
12930 return FALSE;
12931
12932 ind = XEXP (op, 0);
12933
12934 if (reload_completed
12935 && (GET_CODE (ind) == LABEL_REF
12936 || (GET_CODE (ind) == CONST
12937 && GET_CODE (XEXP (ind, 0)) == PLUS
12938 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12939 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12940 return TRUE;
12941
12942 /* Match: (mem (reg)). */
12943 if (REG_P (ind))
12944 return arm_address_register_rtx_p (ind, 0);
12945
12946 /* Allow post-increment with Neon registers. */
12947 if ((type != 1 && GET_CODE (ind) == POST_INC)
12948 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12949 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12950
12951 /* Allow post-increment by register for VLDn */
12952 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12953 && GET_CODE (XEXP (ind, 1)) == PLUS
12954 && REG_P (XEXP (XEXP (ind, 1), 1)))
12955 return true;
12956
12957 /* Match:
12958 (plus (reg)
12959 (const)). */
12960 if (type == 0
12961 && GET_CODE (ind) == PLUS
12962 && REG_P (XEXP (ind, 0))
12963 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12964 && CONST_INT_P (XEXP (ind, 1))
12965 && INTVAL (XEXP (ind, 1)) > -1024
12966 /* For quad modes, we restrict the constant offset to be slightly less
12967 than what the instruction format permits. We have no such constraint
12968 on double mode offsets. (This must match arm_legitimate_index_p.) */
12969 && (INTVAL (XEXP (ind, 1))
12970 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12971 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12972 return TRUE;
12973
12974 return FALSE;
12975 }
12976
12977 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12978 type. */
12979 int
12980 neon_struct_mem_operand (rtx op)
12981 {
12982 rtx ind;
12983
12984 /* Reject eliminable registers. */
12985 if (! (reload_in_progress || reload_completed)
12986 && ( reg_mentioned_p (frame_pointer_rtx, op)
12987 || reg_mentioned_p (arg_pointer_rtx, op)
12988 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12989 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12990 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12991 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12992 return FALSE;
12993
12994 /* Constants are converted into offsets from labels. */
12995 if (!MEM_P (op))
12996 return FALSE;
12997
12998 ind = XEXP (op, 0);
12999
13000 if (reload_completed
13001 && (GET_CODE (ind) == LABEL_REF
13002 || (GET_CODE (ind) == CONST
13003 && GET_CODE (XEXP (ind, 0)) == PLUS
13004 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13005 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13006 return TRUE;
13007
13008 /* Match: (mem (reg)). */
13009 if (REG_P (ind))
13010 return arm_address_register_rtx_p (ind, 0);
13011
13012 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13013 if (GET_CODE (ind) == POST_INC
13014 || GET_CODE (ind) == PRE_DEC)
13015 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13016
13017 return FALSE;
13018 }
13019
13020 /* Return true if X is a register that will be eliminated later on. */
13021 int
13022 arm_eliminable_register (rtx x)
13023 {
13024 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13025 || REGNO (x) == ARG_POINTER_REGNUM
13026 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13027 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13028 }
13029
13030 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13031 coprocessor registers. Otherwise return NO_REGS. */
13032
13033 enum reg_class
13034 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13035 {
13036 if (mode == HFmode)
13037 {
13038 if (!TARGET_NEON_FP16)
13039 return GENERAL_REGS;
13040 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13041 return NO_REGS;
13042 return GENERAL_REGS;
13043 }
13044
13045 /* The neon move patterns handle all legitimate vector and struct
13046 addresses. */
13047 if (TARGET_NEON
13048 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13049 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13050 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13051 || VALID_NEON_STRUCT_MODE (mode)))
13052 return NO_REGS;
13053
13054 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13055 return NO_REGS;
13056
13057 return GENERAL_REGS;
13058 }
13059
13060 /* Values which must be returned in the most-significant end of the return
13061 register. */
13062
13063 static bool
13064 arm_return_in_msb (const_tree valtype)
13065 {
13066 return (TARGET_AAPCS_BASED
13067 && BYTES_BIG_ENDIAN
13068 && (AGGREGATE_TYPE_P (valtype)
13069 || TREE_CODE (valtype) == COMPLEX_TYPE
13070 || FIXED_POINT_TYPE_P (valtype)));
13071 }
13072
13073 /* Return TRUE if X references a SYMBOL_REF. */
13074 int
13075 symbol_mentioned_p (rtx x)
13076 {
13077 const char * fmt;
13078 int i;
13079
13080 if (GET_CODE (x) == SYMBOL_REF)
13081 return 1;
13082
13083 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13084 are constant offsets, not symbols. */
13085 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13086 return 0;
13087
13088 fmt = GET_RTX_FORMAT (GET_CODE (x));
13089
13090 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13091 {
13092 if (fmt[i] == 'E')
13093 {
13094 int j;
13095
13096 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13097 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13098 return 1;
13099 }
13100 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13101 return 1;
13102 }
13103
13104 return 0;
13105 }
13106
13107 /* Return TRUE if X references a LABEL_REF. */
13108 int
13109 label_mentioned_p (rtx x)
13110 {
13111 const char * fmt;
13112 int i;
13113
13114 if (GET_CODE (x) == LABEL_REF)
13115 return 1;
13116
13117 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13118 instruction, but they are constant offsets, not symbols. */
13119 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13120 return 0;
13121
13122 fmt = GET_RTX_FORMAT (GET_CODE (x));
13123 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13124 {
13125 if (fmt[i] == 'E')
13126 {
13127 int j;
13128
13129 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13130 if (label_mentioned_p (XVECEXP (x, i, j)))
13131 return 1;
13132 }
13133 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13134 return 1;
13135 }
13136
13137 return 0;
13138 }
13139
13140 int
13141 tls_mentioned_p (rtx x)
13142 {
13143 switch (GET_CODE (x))
13144 {
13145 case CONST:
13146 return tls_mentioned_p (XEXP (x, 0));
13147
13148 case UNSPEC:
13149 if (XINT (x, 1) == UNSPEC_TLS)
13150 return 1;
13151
13152 default:
13153 return 0;
13154 }
13155 }
13156
13157 /* Must not copy any rtx that uses a pc-relative address. */
13158
13159 static bool
13160 arm_cannot_copy_insn_p (rtx_insn *insn)
13161 {
13162 /* The tls call insn cannot be copied, as it is paired with a data
13163 word. */
13164 if (recog_memoized (insn) == CODE_FOR_tlscall)
13165 return true;
13166
13167 subrtx_iterator::array_type array;
13168 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13169 {
13170 const_rtx x = *iter;
13171 if (GET_CODE (x) == UNSPEC
13172 && (XINT (x, 1) == UNSPEC_PIC_BASE
13173 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13174 return true;
13175 }
13176 return false;
13177 }
13178
13179 enum rtx_code
13180 minmax_code (rtx x)
13181 {
13182 enum rtx_code code = GET_CODE (x);
13183
13184 switch (code)
13185 {
13186 case SMAX:
13187 return GE;
13188 case SMIN:
13189 return LE;
13190 case UMIN:
13191 return LEU;
13192 case UMAX:
13193 return GEU;
13194 default:
13195 gcc_unreachable ();
13196 }
13197 }
13198
13199 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13200
13201 bool
13202 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13203 int *mask, bool *signed_sat)
13204 {
13205 /* The high bound must be a power of two minus one. */
13206 int log = exact_log2 (INTVAL (hi_bound) + 1);
13207 if (log == -1)
13208 return false;
13209
13210 /* The low bound is either zero (for usat) or one less than the
13211 negation of the high bound (for ssat). */
13212 if (INTVAL (lo_bound) == 0)
13213 {
13214 if (mask)
13215 *mask = log;
13216 if (signed_sat)
13217 *signed_sat = false;
13218
13219 return true;
13220 }
13221
13222 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13223 {
13224 if (mask)
13225 *mask = log + 1;
13226 if (signed_sat)
13227 *signed_sat = true;
13228
13229 return true;
13230 }
13231
13232 return false;
13233 }
13234
13235 /* Return 1 if memory locations are adjacent. */
13236 int
13237 adjacent_mem_locations (rtx a, rtx b)
13238 {
13239 /* We don't guarantee to preserve the order of these memory refs. */
13240 if (volatile_refs_p (a) || volatile_refs_p (b))
13241 return 0;
13242
13243 if ((REG_P (XEXP (a, 0))
13244 || (GET_CODE (XEXP (a, 0)) == PLUS
13245 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13246 && (REG_P (XEXP (b, 0))
13247 || (GET_CODE (XEXP (b, 0)) == PLUS
13248 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13249 {
13250 HOST_WIDE_INT val0 = 0, val1 = 0;
13251 rtx reg0, reg1;
13252 int val_diff;
13253
13254 if (GET_CODE (XEXP (a, 0)) == PLUS)
13255 {
13256 reg0 = XEXP (XEXP (a, 0), 0);
13257 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13258 }
13259 else
13260 reg0 = XEXP (a, 0);
13261
13262 if (GET_CODE (XEXP (b, 0)) == PLUS)
13263 {
13264 reg1 = XEXP (XEXP (b, 0), 0);
13265 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13266 }
13267 else
13268 reg1 = XEXP (b, 0);
13269
13270 /* Don't accept any offset that will require multiple
13271 instructions to handle, since this would cause the
13272 arith_adjacentmem pattern to output an overlong sequence. */
13273 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13274 return 0;
13275
13276 /* Don't allow an eliminable register: register elimination can make
13277 the offset too large. */
13278 if (arm_eliminable_register (reg0))
13279 return 0;
13280
13281 val_diff = val1 - val0;
13282
13283 if (arm_ld_sched)
13284 {
13285 /* If the target has load delay slots, then there's no benefit
13286 to using an ldm instruction unless the offset is zero and
13287 we are optimizing for size. */
13288 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13289 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13290 && (val_diff == 4 || val_diff == -4));
13291 }
13292
13293 return ((REGNO (reg0) == REGNO (reg1))
13294 && (val_diff == 4 || val_diff == -4));
13295 }
13296
13297 return 0;
13298 }
13299
13300 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13301 for load operations, false for store operations. CONSECUTIVE is true
13302 if the register numbers in the operation must be consecutive in the register
13303 bank. RETURN_PC is true if value is to be loaded in PC.
13304 The pattern we are trying to match for load is:
13305 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13306 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13307 :
13308 :
13309 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13310 ]
13311 where
13312 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13313 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13314 3. If consecutive is TRUE, then for kth register being loaded,
13315 REGNO (R_dk) = REGNO (R_d0) + k.
13316 The pattern for store is similar. */
13317 bool
13318 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13319 bool consecutive, bool return_pc)
13320 {
13321 HOST_WIDE_INT count = XVECLEN (op, 0);
13322 rtx reg, mem, addr;
13323 unsigned regno;
13324 unsigned first_regno;
13325 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13326 rtx elt;
13327 bool addr_reg_in_reglist = false;
13328 bool update = false;
13329 int reg_increment;
13330 int offset_adj;
13331 int regs_per_val;
13332
13333 /* If not in SImode, then registers must be consecutive
13334 (e.g., VLDM instructions for DFmode). */
13335 gcc_assert ((mode == SImode) || consecutive);
13336 /* Setting return_pc for stores is illegal. */
13337 gcc_assert (!return_pc || load);
13338
13339 /* Set up the increments and the regs per val based on the mode. */
13340 reg_increment = GET_MODE_SIZE (mode);
13341 regs_per_val = reg_increment / 4;
13342 offset_adj = return_pc ? 1 : 0;
13343
13344 if (count <= 1
13345 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13346 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13347 return false;
13348
13349 /* Check if this is a write-back. */
13350 elt = XVECEXP (op, 0, offset_adj);
13351 if (GET_CODE (SET_SRC (elt)) == PLUS)
13352 {
13353 i++;
13354 base = 1;
13355 update = true;
13356
13357 /* The offset adjustment must be the number of registers being
13358 popped times the size of a single register. */
13359 if (!REG_P (SET_DEST (elt))
13360 || !REG_P (XEXP (SET_SRC (elt), 0))
13361 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13362 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13363 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13364 ((count - 1 - offset_adj) * reg_increment))
13365 return false;
13366 }
13367
13368 i = i + offset_adj;
13369 base = base + offset_adj;
13370 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13371 success depends on the type: VLDM can do just one reg,
13372 LDM must do at least two. */
13373 if ((count <= i) && (mode == SImode))
13374 return false;
13375
13376 elt = XVECEXP (op, 0, i - 1);
13377 if (GET_CODE (elt) != SET)
13378 return false;
13379
13380 if (load)
13381 {
13382 reg = SET_DEST (elt);
13383 mem = SET_SRC (elt);
13384 }
13385 else
13386 {
13387 reg = SET_SRC (elt);
13388 mem = SET_DEST (elt);
13389 }
13390
13391 if (!REG_P (reg) || !MEM_P (mem))
13392 return false;
13393
13394 regno = REGNO (reg);
13395 first_regno = regno;
13396 addr = XEXP (mem, 0);
13397 if (GET_CODE (addr) == PLUS)
13398 {
13399 if (!CONST_INT_P (XEXP (addr, 1)))
13400 return false;
13401
13402 offset = INTVAL (XEXP (addr, 1));
13403 addr = XEXP (addr, 0);
13404 }
13405
13406 if (!REG_P (addr))
13407 return false;
13408
13409 /* Don't allow SP to be loaded unless it is also the base register. It
13410 guarantees that SP is reset correctly when an LDM instruction
13411 is interrupted. Otherwise, we might end up with a corrupt stack. */
13412 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13413 return false;
13414
13415 for (; i < count; i++)
13416 {
13417 elt = XVECEXP (op, 0, i);
13418 if (GET_CODE (elt) != SET)
13419 return false;
13420
13421 if (load)
13422 {
13423 reg = SET_DEST (elt);
13424 mem = SET_SRC (elt);
13425 }
13426 else
13427 {
13428 reg = SET_SRC (elt);
13429 mem = SET_DEST (elt);
13430 }
13431
13432 if (!REG_P (reg)
13433 || GET_MODE (reg) != mode
13434 || REGNO (reg) <= regno
13435 || (consecutive
13436 && (REGNO (reg) !=
13437 (unsigned int) (first_regno + regs_per_val * (i - base))))
13438 /* Don't allow SP to be loaded unless it is also the base register. It
13439 guarantees that SP is reset correctly when an LDM instruction
13440 is interrupted. Otherwise, we might end up with a corrupt stack. */
13441 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13442 || !MEM_P (mem)
13443 || GET_MODE (mem) != mode
13444 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13445 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13446 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13447 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13448 offset + (i - base) * reg_increment))
13449 && (!REG_P (XEXP (mem, 0))
13450 || offset + (i - base) * reg_increment != 0)))
13451 return false;
13452
13453 regno = REGNO (reg);
13454 if (regno == REGNO (addr))
13455 addr_reg_in_reglist = true;
13456 }
13457
13458 if (load)
13459 {
13460 if (update && addr_reg_in_reglist)
13461 return false;
13462
13463 /* For Thumb-1, address register is always modified - either by write-back
13464 or by explicit load. If the pattern does not describe an update,
13465 then the address register must be in the list of loaded registers. */
13466 if (TARGET_THUMB1)
13467 return update || addr_reg_in_reglist;
13468 }
13469
13470 return true;
13471 }
13472
13473 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13474 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13475 instruction. ADD_OFFSET is nonzero if the base address register needs
13476 to be modified with an add instruction before we can use it. */
13477
13478 static bool
13479 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13480 int nops, HOST_WIDE_INT add_offset)
13481 {
13482 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13483 if the offset isn't small enough. The reason 2 ldrs are faster
13484 is because these ARMs are able to do more than one cache access
13485 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13486 whilst the ARM8 has a double bandwidth cache. This means that
13487 these cores can do both an instruction fetch and a data fetch in
13488 a single cycle, so the trick of calculating the address into a
13489 scratch register (one of the result regs) and then doing a load
13490 multiple actually becomes slower (and no smaller in code size).
13491 That is the transformation
13492
13493 ldr rd1, [rbase + offset]
13494 ldr rd2, [rbase + offset + 4]
13495
13496 to
13497
13498 add rd1, rbase, offset
13499 ldmia rd1, {rd1, rd2}
13500
13501 produces worse code -- '3 cycles + any stalls on rd2' instead of
13502 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13503 access per cycle, the first sequence could never complete in less
13504 than 6 cycles, whereas the ldm sequence would only take 5 and
13505 would make better use of sequential accesses if not hitting the
13506 cache.
13507
13508 We cheat here and test 'arm_ld_sched' which we currently know to
13509 only be true for the ARM8, ARM9 and StrongARM. If this ever
13510 changes, then the test below needs to be reworked. */
13511 if (nops == 2 && arm_ld_sched && add_offset != 0)
13512 return false;
13513
13514 /* XScale has load-store double instructions, but they have stricter
13515 alignment requirements than load-store multiple, so we cannot
13516 use them.
13517
13518 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13519 the pipeline until completion.
13520
13521 NREGS CYCLES
13522 1 3
13523 2 4
13524 3 5
13525 4 6
13526
13527 An ldr instruction takes 1-3 cycles, but does not block the
13528 pipeline.
13529
13530 NREGS CYCLES
13531 1 1-3
13532 2 2-6
13533 3 3-9
13534 4 4-12
13535
13536 Best case ldr will always win. However, the more ldr instructions
13537 we issue, the less likely we are to be able to schedule them well.
13538 Using ldr instructions also increases code size.
13539
13540 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13541 for counts of 3 or 4 regs. */
13542 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13543 return false;
13544 return true;
13545 }
13546
13547 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13548 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13549 an array ORDER which describes the sequence to use when accessing the
13550 offsets that produces an ascending order. In this sequence, each
13551 offset must be larger by exactly 4 than the previous one. ORDER[0]
13552 must have been filled in with the lowest offset by the caller.
13553 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13554 we use to verify that ORDER produces an ascending order of registers.
13555 Return true if it was possible to construct such an order, false if
13556 not. */
13557
13558 static bool
13559 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13560 int *unsorted_regs)
13561 {
13562 int i;
13563 for (i = 1; i < nops; i++)
13564 {
13565 int j;
13566
13567 order[i] = order[i - 1];
13568 for (j = 0; j < nops; j++)
13569 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13570 {
13571 /* We must find exactly one offset that is higher than the
13572 previous one by 4. */
13573 if (order[i] != order[i - 1])
13574 return false;
13575 order[i] = j;
13576 }
13577 if (order[i] == order[i - 1])
13578 return false;
13579 /* The register numbers must be ascending. */
13580 if (unsorted_regs != NULL
13581 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13582 return false;
13583 }
13584 return true;
13585 }
13586
13587 /* Used to determine in a peephole whether a sequence of load
13588 instructions can be changed into a load-multiple instruction.
13589 NOPS is the number of separate load instructions we are examining. The
13590 first NOPS entries in OPERANDS are the destination registers, the
13591 next NOPS entries are memory operands. If this function is
13592 successful, *BASE is set to the common base register of the memory
13593 accesses; *LOAD_OFFSET is set to the first memory location's offset
13594 from that base register.
13595 REGS is an array filled in with the destination register numbers.
13596 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13597 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13598 the sequence of registers in REGS matches the loads from ascending memory
13599 locations, and the function verifies that the register numbers are
13600 themselves ascending. If CHECK_REGS is false, the register numbers
13601 are stored in the order they are found in the operands. */
13602 static int
13603 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13604 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13605 {
13606 int unsorted_regs[MAX_LDM_STM_OPS];
13607 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13608 int order[MAX_LDM_STM_OPS];
13609 rtx base_reg_rtx = NULL;
13610 int base_reg = -1;
13611 int i, ldm_case;
13612
13613 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13614 easily extended if required. */
13615 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13616
13617 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13618
13619 /* Loop over the operands and check that the memory references are
13620 suitable (i.e. immediate offsets from the same base register). At
13621 the same time, extract the target register, and the memory
13622 offsets. */
13623 for (i = 0; i < nops; i++)
13624 {
13625 rtx reg;
13626 rtx offset;
13627
13628 /* Convert a subreg of a mem into the mem itself. */
13629 if (GET_CODE (operands[nops + i]) == SUBREG)
13630 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13631
13632 gcc_assert (MEM_P (operands[nops + i]));
13633
13634 /* Don't reorder volatile memory references; it doesn't seem worth
13635 looking for the case where the order is ok anyway. */
13636 if (MEM_VOLATILE_P (operands[nops + i]))
13637 return 0;
13638
13639 offset = const0_rtx;
13640
13641 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13642 || (GET_CODE (reg) == SUBREG
13643 && REG_P (reg = SUBREG_REG (reg))))
13644 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13645 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13646 || (GET_CODE (reg) == SUBREG
13647 && REG_P (reg = SUBREG_REG (reg))))
13648 && (CONST_INT_P (offset
13649 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13650 {
13651 if (i == 0)
13652 {
13653 base_reg = REGNO (reg);
13654 base_reg_rtx = reg;
13655 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13656 return 0;
13657 }
13658 else if (base_reg != (int) REGNO (reg))
13659 /* Not addressed from the same base register. */
13660 return 0;
13661
13662 unsorted_regs[i] = (REG_P (operands[i])
13663 ? REGNO (operands[i])
13664 : REGNO (SUBREG_REG (operands[i])));
13665
13666 /* If it isn't an integer register, or if it overwrites the
13667 base register but isn't the last insn in the list, then
13668 we can't do this. */
13669 if (unsorted_regs[i] < 0
13670 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13671 || unsorted_regs[i] > 14
13672 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13673 return 0;
13674
13675 /* Don't allow SP to be loaded unless it is also the base
13676 register. It guarantees that SP is reset correctly when
13677 an LDM instruction is interrupted. Otherwise, we might
13678 end up with a corrupt stack. */
13679 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13680 return 0;
13681
13682 unsorted_offsets[i] = INTVAL (offset);
13683 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13684 order[0] = i;
13685 }
13686 else
13687 /* Not a suitable memory address. */
13688 return 0;
13689 }
13690
13691 /* All the useful information has now been extracted from the
13692 operands into unsorted_regs and unsorted_offsets; additionally,
13693 order[0] has been set to the lowest offset in the list. Sort
13694 the offsets into order, verifying that they are adjacent, and
13695 check that the register numbers are ascending. */
13696 if (!compute_offset_order (nops, unsorted_offsets, order,
13697 check_regs ? unsorted_regs : NULL))
13698 return 0;
13699
13700 if (saved_order)
13701 memcpy (saved_order, order, sizeof order);
13702
13703 if (base)
13704 {
13705 *base = base_reg;
13706
13707 for (i = 0; i < nops; i++)
13708 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13709
13710 *load_offset = unsorted_offsets[order[0]];
13711 }
13712
13713 if (TARGET_THUMB1
13714 && !peep2_reg_dead_p (nops, base_reg_rtx))
13715 return 0;
13716
13717 if (unsorted_offsets[order[0]] == 0)
13718 ldm_case = 1; /* ldmia */
13719 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13720 ldm_case = 2; /* ldmib */
13721 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13722 ldm_case = 3; /* ldmda */
13723 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13724 ldm_case = 4; /* ldmdb */
13725 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13726 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13727 ldm_case = 5;
13728 else
13729 return 0;
13730
13731 if (!multiple_operation_profitable_p (false, nops,
13732 ldm_case == 5
13733 ? unsorted_offsets[order[0]] : 0))
13734 return 0;
13735
13736 return ldm_case;
13737 }
13738
13739 /* Used to determine in a peephole whether a sequence of store instructions can
13740 be changed into a store-multiple instruction.
13741 NOPS is the number of separate store instructions we are examining.
13742 NOPS_TOTAL is the total number of instructions recognized by the peephole
13743 pattern.
13744 The first NOPS entries in OPERANDS are the source registers, the next
13745 NOPS entries are memory operands. If this function is successful, *BASE is
13746 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13747 to the first memory location's offset from that base register. REGS is an
13748 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13749 likewise filled with the corresponding rtx's.
13750 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13751 numbers to an ascending order of stores.
13752 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13753 from ascending memory locations, and the function verifies that the register
13754 numbers are themselves ascending. If CHECK_REGS is false, the register
13755 numbers are stored in the order they are found in the operands. */
13756 static int
13757 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13758 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13759 HOST_WIDE_INT *load_offset, bool check_regs)
13760 {
13761 int unsorted_regs[MAX_LDM_STM_OPS];
13762 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13763 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13764 int order[MAX_LDM_STM_OPS];
13765 int base_reg = -1;
13766 rtx base_reg_rtx = NULL;
13767 int i, stm_case;
13768
13769 /* Write back of base register is currently only supported for Thumb 1. */
13770 int base_writeback = TARGET_THUMB1;
13771
13772 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13773 easily extended if required. */
13774 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13775
13776 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13777
13778 /* Loop over the operands and check that the memory references are
13779 suitable (i.e. immediate offsets from the same base register). At
13780 the same time, extract the target register, and the memory
13781 offsets. */
13782 for (i = 0; i < nops; i++)
13783 {
13784 rtx reg;
13785 rtx offset;
13786
13787 /* Convert a subreg of a mem into the mem itself. */
13788 if (GET_CODE (operands[nops + i]) == SUBREG)
13789 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13790
13791 gcc_assert (MEM_P (operands[nops + i]));
13792
13793 /* Don't reorder volatile memory references; it doesn't seem worth
13794 looking for the case where the order is ok anyway. */
13795 if (MEM_VOLATILE_P (operands[nops + i]))
13796 return 0;
13797
13798 offset = const0_rtx;
13799
13800 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13801 || (GET_CODE (reg) == SUBREG
13802 && REG_P (reg = SUBREG_REG (reg))))
13803 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13804 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13805 || (GET_CODE (reg) == SUBREG
13806 && REG_P (reg = SUBREG_REG (reg))))
13807 && (CONST_INT_P (offset
13808 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13809 {
13810 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13811 ? operands[i] : SUBREG_REG (operands[i]));
13812 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13813
13814 if (i == 0)
13815 {
13816 base_reg = REGNO (reg);
13817 base_reg_rtx = reg;
13818 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13819 return 0;
13820 }
13821 else if (base_reg != (int) REGNO (reg))
13822 /* Not addressed from the same base register. */
13823 return 0;
13824
13825 /* If it isn't an integer register, then we can't do this. */
13826 if (unsorted_regs[i] < 0
13827 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13828 /* The effects are unpredictable if the base register is
13829 both updated and stored. */
13830 || (base_writeback && unsorted_regs[i] == base_reg)
13831 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13832 || unsorted_regs[i] > 14)
13833 return 0;
13834
13835 unsorted_offsets[i] = INTVAL (offset);
13836 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13837 order[0] = i;
13838 }
13839 else
13840 /* Not a suitable memory address. */
13841 return 0;
13842 }
13843
13844 /* All the useful information has now been extracted from the
13845 operands into unsorted_regs and unsorted_offsets; additionally,
13846 order[0] has been set to the lowest offset in the list. Sort
13847 the offsets into order, verifying that they are adjacent, and
13848 check that the register numbers are ascending. */
13849 if (!compute_offset_order (nops, unsorted_offsets, order,
13850 check_regs ? unsorted_regs : NULL))
13851 return 0;
13852
13853 if (saved_order)
13854 memcpy (saved_order, order, sizeof order);
13855
13856 if (base)
13857 {
13858 *base = base_reg;
13859
13860 for (i = 0; i < nops; i++)
13861 {
13862 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13863 if (reg_rtxs)
13864 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13865 }
13866
13867 *load_offset = unsorted_offsets[order[0]];
13868 }
13869
13870 if (TARGET_THUMB1
13871 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13872 return 0;
13873
13874 if (unsorted_offsets[order[0]] == 0)
13875 stm_case = 1; /* stmia */
13876 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13877 stm_case = 2; /* stmib */
13878 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13879 stm_case = 3; /* stmda */
13880 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13881 stm_case = 4; /* stmdb */
13882 else
13883 return 0;
13884
13885 if (!multiple_operation_profitable_p (false, nops, 0))
13886 return 0;
13887
13888 return stm_case;
13889 }
13890 \f
13891 /* Routines for use in generating RTL. */
13892
13893 /* Generate a load-multiple instruction. COUNT is the number of loads in
13894 the instruction; REGS and MEMS are arrays containing the operands.
13895 BASEREG is the base register to be used in addressing the memory operands.
13896 WBACK_OFFSET is nonzero if the instruction should update the base
13897 register. */
13898
13899 static rtx
13900 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13901 HOST_WIDE_INT wback_offset)
13902 {
13903 int i = 0, j;
13904 rtx result;
13905
13906 if (!multiple_operation_profitable_p (false, count, 0))
13907 {
13908 rtx seq;
13909
13910 start_sequence ();
13911
13912 for (i = 0; i < count; i++)
13913 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13914
13915 if (wback_offset != 0)
13916 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13917
13918 seq = get_insns ();
13919 end_sequence ();
13920
13921 return seq;
13922 }
13923
13924 result = gen_rtx_PARALLEL (VOIDmode,
13925 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13926 if (wback_offset != 0)
13927 {
13928 XVECEXP (result, 0, 0)
13929 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13930 i = 1;
13931 count++;
13932 }
13933
13934 for (j = 0; i < count; i++, j++)
13935 XVECEXP (result, 0, i)
13936 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13937
13938 return result;
13939 }
13940
13941 /* Generate a store-multiple instruction. COUNT is the number of stores in
13942 the instruction; REGS and MEMS are arrays containing the operands.
13943 BASEREG is the base register to be used in addressing the memory operands.
13944 WBACK_OFFSET is nonzero if the instruction should update the base
13945 register. */
13946
13947 static rtx
13948 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13949 HOST_WIDE_INT wback_offset)
13950 {
13951 int i = 0, j;
13952 rtx result;
13953
13954 if (GET_CODE (basereg) == PLUS)
13955 basereg = XEXP (basereg, 0);
13956
13957 if (!multiple_operation_profitable_p (false, count, 0))
13958 {
13959 rtx seq;
13960
13961 start_sequence ();
13962
13963 for (i = 0; i < count; i++)
13964 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13965
13966 if (wback_offset != 0)
13967 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13968
13969 seq = get_insns ();
13970 end_sequence ();
13971
13972 return seq;
13973 }
13974
13975 result = gen_rtx_PARALLEL (VOIDmode,
13976 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13977 if (wback_offset != 0)
13978 {
13979 XVECEXP (result, 0, 0)
13980 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13981 i = 1;
13982 count++;
13983 }
13984
13985 for (j = 0; i < count; i++, j++)
13986 XVECEXP (result, 0, i)
13987 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13988
13989 return result;
13990 }
13991
13992 /* Generate either a load-multiple or a store-multiple instruction. This
13993 function can be used in situations where we can start with a single MEM
13994 rtx and adjust its address upwards.
13995 COUNT is the number of operations in the instruction, not counting a
13996 possible update of the base register. REGS is an array containing the
13997 register operands.
13998 BASEREG is the base register to be used in addressing the memory operands,
13999 which are constructed from BASEMEM.
14000 WRITE_BACK specifies whether the generated instruction should include an
14001 update of the base register.
14002 OFFSETP is used to pass an offset to and from this function; this offset
14003 is not used when constructing the address (instead BASEMEM should have an
14004 appropriate offset in its address), it is used only for setting
14005 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14006
14007 static rtx
14008 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14009 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14010 {
14011 rtx mems[MAX_LDM_STM_OPS];
14012 HOST_WIDE_INT offset = *offsetp;
14013 int i;
14014
14015 gcc_assert (count <= MAX_LDM_STM_OPS);
14016
14017 if (GET_CODE (basereg) == PLUS)
14018 basereg = XEXP (basereg, 0);
14019
14020 for (i = 0; i < count; i++)
14021 {
14022 rtx addr = plus_constant (Pmode, basereg, i * 4);
14023 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14024 offset += 4;
14025 }
14026
14027 if (write_back)
14028 *offsetp = offset;
14029
14030 if (is_load)
14031 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14032 write_back ? 4 * count : 0);
14033 else
14034 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14035 write_back ? 4 * count : 0);
14036 }
14037
14038 rtx
14039 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14040 rtx basemem, HOST_WIDE_INT *offsetp)
14041 {
14042 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14043 offsetp);
14044 }
14045
14046 rtx
14047 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14048 rtx basemem, HOST_WIDE_INT *offsetp)
14049 {
14050 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14051 offsetp);
14052 }
14053
14054 /* Called from a peephole2 expander to turn a sequence of loads into an
14055 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14056 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14057 is true if we can reorder the registers because they are used commutatively
14058 subsequently.
14059 Returns true iff we could generate a new instruction. */
14060
14061 bool
14062 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14063 {
14064 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14065 rtx mems[MAX_LDM_STM_OPS];
14066 int i, j, base_reg;
14067 rtx base_reg_rtx;
14068 HOST_WIDE_INT offset;
14069 int write_back = FALSE;
14070 int ldm_case;
14071 rtx addr;
14072
14073 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14074 &base_reg, &offset, !sort_regs);
14075
14076 if (ldm_case == 0)
14077 return false;
14078
14079 if (sort_regs)
14080 for (i = 0; i < nops - 1; i++)
14081 for (j = i + 1; j < nops; j++)
14082 if (regs[i] > regs[j])
14083 {
14084 int t = regs[i];
14085 regs[i] = regs[j];
14086 regs[j] = t;
14087 }
14088 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14089
14090 if (TARGET_THUMB1)
14091 {
14092 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14093 gcc_assert (ldm_case == 1 || ldm_case == 5);
14094 write_back = TRUE;
14095 }
14096
14097 if (ldm_case == 5)
14098 {
14099 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14100 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14101 offset = 0;
14102 if (!TARGET_THUMB1)
14103 {
14104 base_reg = regs[0];
14105 base_reg_rtx = newbase;
14106 }
14107 }
14108
14109 for (i = 0; i < nops; i++)
14110 {
14111 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14112 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14113 SImode, addr, 0);
14114 }
14115 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14116 write_back ? offset + i * 4 : 0));
14117 return true;
14118 }
14119
14120 /* Called from a peephole2 expander to turn a sequence of stores into an
14121 STM instruction. OPERANDS are the operands found by the peephole matcher;
14122 NOPS indicates how many separate stores we are trying to combine.
14123 Returns true iff we could generate a new instruction. */
14124
14125 bool
14126 gen_stm_seq (rtx *operands, int nops)
14127 {
14128 int i;
14129 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14130 rtx mems[MAX_LDM_STM_OPS];
14131 int base_reg;
14132 rtx base_reg_rtx;
14133 HOST_WIDE_INT offset;
14134 int write_back = FALSE;
14135 int stm_case;
14136 rtx addr;
14137 bool base_reg_dies;
14138
14139 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14140 mem_order, &base_reg, &offset, true);
14141
14142 if (stm_case == 0)
14143 return false;
14144
14145 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14146
14147 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14148 if (TARGET_THUMB1)
14149 {
14150 gcc_assert (base_reg_dies);
14151 write_back = TRUE;
14152 }
14153
14154 if (stm_case == 5)
14155 {
14156 gcc_assert (base_reg_dies);
14157 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14158 offset = 0;
14159 }
14160
14161 addr = plus_constant (Pmode, base_reg_rtx, offset);
14162
14163 for (i = 0; i < nops; i++)
14164 {
14165 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14166 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14167 SImode, addr, 0);
14168 }
14169 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14170 write_back ? offset + i * 4 : 0));
14171 return true;
14172 }
14173
14174 /* Called from a peephole2 expander to turn a sequence of stores that are
14175 preceded by constant loads into an STM instruction. OPERANDS are the
14176 operands found by the peephole matcher; NOPS indicates how many
14177 separate stores we are trying to combine; there are 2 * NOPS
14178 instructions in the peephole.
14179 Returns true iff we could generate a new instruction. */
14180
14181 bool
14182 gen_const_stm_seq (rtx *operands, int nops)
14183 {
14184 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14185 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14186 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14187 rtx mems[MAX_LDM_STM_OPS];
14188 int base_reg;
14189 rtx base_reg_rtx;
14190 HOST_WIDE_INT offset;
14191 int write_back = FALSE;
14192 int stm_case;
14193 rtx addr;
14194 bool base_reg_dies;
14195 int i, j;
14196 HARD_REG_SET allocated;
14197
14198 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14199 mem_order, &base_reg, &offset, false);
14200
14201 if (stm_case == 0)
14202 return false;
14203
14204 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14205
14206 /* If the same register is used more than once, try to find a free
14207 register. */
14208 CLEAR_HARD_REG_SET (allocated);
14209 for (i = 0; i < nops; i++)
14210 {
14211 for (j = i + 1; j < nops; j++)
14212 if (regs[i] == regs[j])
14213 {
14214 rtx t = peep2_find_free_register (0, nops * 2,
14215 TARGET_THUMB1 ? "l" : "r",
14216 SImode, &allocated);
14217 if (t == NULL_RTX)
14218 return false;
14219 reg_rtxs[i] = t;
14220 regs[i] = REGNO (t);
14221 }
14222 }
14223
14224 /* Compute an ordering that maps the register numbers to an ascending
14225 sequence. */
14226 reg_order[0] = 0;
14227 for (i = 0; i < nops; i++)
14228 if (regs[i] < regs[reg_order[0]])
14229 reg_order[0] = i;
14230
14231 for (i = 1; i < nops; i++)
14232 {
14233 int this_order = reg_order[i - 1];
14234 for (j = 0; j < nops; j++)
14235 if (regs[j] > regs[reg_order[i - 1]]
14236 && (this_order == reg_order[i - 1]
14237 || regs[j] < regs[this_order]))
14238 this_order = j;
14239 reg_order[i] = this_order;
14240 }
14241
14242 /* Ensure that registers that must be live after the instruction end
14243 up with the correct value. */
14244 for (i = 0; i < nops; i++)
14245 {
14246 int this_order = reg_order[i];
14247 if ((this_order != mem_order[i]
14248 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14249 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14250 return false;
14251 }
14252
14253 /* Load the constants. */
14254 for (i = 0; i < nops; i++)
14255 {
14256 rtx op = operands[2 * nops + mem_order[i]];
14257 sorted_regs[i] = regs[reg_order[i]];
14258 emit_move_insn (reg_rtxs[reg_order[i]], op);
14259 }
14260
14261 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14262
14263 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14264 if (TARGET_THUMB1)
14265 {
14266 gcc_assert (base_reg_dies);
14267 write_back = TRUE;
14268 }
14269
14270 if (stm_case == 5)
14271 {
14272 gcc_assert (base_reg_dies);
14273 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14274 offset = 0;
14275 }
14276
14277 addr = plus_constant (Pmode, base_reg_rtx, offset);
14278
14279 for (i = 0; i < nops; i++)
14280 {
14281 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14282 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14283 SImode, addr, 0);
14284 }
14285 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14286 write_back ? offset + i * 4 : 0));
14287 return true;
14288 }
14289
14290 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14291 unaligned copies on processors which support unaligned semantics for those
14292 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14293 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14294 An interleave factor of 1 (the minimum) will perform no interleaving.
14295 Load/store multiple are used for aligned addresses where possible. */
14296
14297 static void
14298 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14299 HOST_WIDE_INT length,
14300 unsigned int interleave_factor)
14301 {
14302 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14303 int *regnos = XALLOCAVEC (int, interleave_factor);
14304 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14305 HOST_WIDE_INT i, j;
14306 HOST_WIDE_INT remaining = length, words;
14307 rtx halfword_tmp = NULL, byte_tmp = NULL;
14308 rtx dst, src;
14309 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14310 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14311 HOST_WIDE_INT srcoffset, dstoffset;
14312 HOST_WIDE_INT src_autoinc, dst_autoinc;
14313 rtx mem, addr;
14314
14315 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14316
14317 /* Use hard registers if we have aligned source or destination so we can use
14318 load/store multiple with contiguous registers. */
14319 if (dst_aligned || src_aligned)
14320 for (i = 0; i < interleave_factor; i++)
14321 regs[i] = gen_rtx_REG (SImode, i);
14322 else
14323 for (i = 0; i < interleave_factor; i++)
14324 regs[i] = gen_reg_rtx (SImode);
14325
14326 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14327 src = copy_addr_to_reg (XEXP (srcbase, 0));
14328
14329 srcoffset = dstoffset = 0;
14330
14331 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14332 For copying the last bytes we want to subtract this offset again. */
14333 src_autoinc = dst_autoinc = 0;
14334
14335 for (i = 0; i < interleave_factor; i++)
14336 regnos[i] = i;
14337
14338 /* Copy BLOCK_SIZE_BYTES chunks. */
14339
14340 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14341 {
14342 /* Load words. */
14343 if (src_aligned && interleave_factor > 1)
14344 {
14345 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14346 TRUE, srcbase, &srcoffset));
14347 src_autoinc += UNITS_PER_WORD * interleave_factor;
14348 }
14349 else
14350 {
14351 for (j = 0; j < interleave_factor; j++)
14352 {
14353 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14354 - src_autoinc));
14355 mem = adjust_automodify_address (srcbase, SImode, addr,
14356 srcoffset + j * UNITS_PER_WORD);
14357 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14358 }
14359 srcoffset += block_size_bytes;
14360 }
14361
14362 /* Store words. */
14363 if (dst_aligned && interleave_factor > 1)
14364 {
14365 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14366 TRUE, dstbase, &dstoffset));
14367 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14368 }
14369 else
14370 {
14371 for (j = 0; j < interleave_factor; j++)
14372 {
14373 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14374 - dst_autoinc));
14375 mem = adjust_automodify_address (dstbase, SImode, addr,
14376 dstoffset + j * UNITS_PER_WORD);
14377 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14378 }
14379 dstoffset += block_size_bytes;
14380 }
14381
14382 remaining -= block_size_bytes;
14383 }
14384
14385 /* Copy any whole words left (note these aren't interleaved with any
14386 subsequent halfword/byte load/stores in the interests of simplicity). */
14387
14388 words = remaining / UNITS_PER_WORD;
14389
14390 gcc_assert (words < interleave_factor);
14391
14392 if (src_aligned && words > 1)
14393 {
14394 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14395 &srcoffset));
14396 src_autoinc += UNITS_PER_WORD * words;
14397 }
14398 else
14399 {
14400 for (j = 0; j < words; j++)
14401 {
14402 addr = plus_constant (Pmode, src,
14403 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14404 mem = adjust_automodify_address (srcbase, SImode, addr,
14405 srcoffset + j * UNITS_PER_WORD);
14406 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14407 }
14408 srcoffset += words * UNITS_PER_WORD;
14409 }
14410
14411 if (dst_aligned && words > 1)
14412 {
14413 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14414 &dstoffset));
14415 dst_autoinc += words * UNITS_PER_WORD;
14416 }
14417 else
14418 {
14419 for (j = 0; j < words; j++)
14420 {
14421 addr = plus_constant (Pmode, dst,
14422 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14423 mem = adjust_automodify_address (dstbase, SImode, addr,
14424 dstoffset + j * UNITS_PER_WORD);
14425 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14426 }
14427 dstoffset += words * UNITS_PER_WORD;
14428 }
14429
14430 remaining -= words * UNITS_PER_WORD;
14431
14432 gcc_assert (remaining < 4);
14433
14434 /* Copy a halfword if necessary. */
14435
14436 if (remaining >= 2)
14437 {
14438 halfword_tmp = gen_reg_rtx (SImode);
14439
14440 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14441 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14442 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14443
14444 /* Either write out immediately, or delay until we've loaded the last
14445 byte, depending on interleave factor. */
14446 if (interleave_factor == 1)
14447 {
14448 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14449 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14450 emit_insn (gen_unaligned_storehi (mem,
14451 gen_lowpart (HImode, halfword_tmp)));
14452 halfword_tmp = NULL;
14453 dstoffset += 2;
14454 }
14455
14456 remaining -= 2;
14457 srcoffset += 2;
14458 }
14459
14460 gcc_assert (remaining < 2);
14461
14462 /* Copy last byte. */
14463
14464 if ((remaining & 1) != 0)
14465 {
14466 byte_tmp = gen_reg_rtx (SImode);
14467
14468 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14469 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14470 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14471
14472 if (interleave_factor == 1)
14473 {
14474 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14475 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14476 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14477 byte_tmp = NULL;
14478 dstoffset++;
14479 }
14480
14481 remaining--;
14482 srcoffset++;
14483 }
14484
14485 /* Store last halfword if we haven't done so already. */
14486
14487 if (halfword_tmp)
14488 {
14489 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14490 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14491 emit_insn (gen_unaligned_storehi (mem,
14492 gen_lowpart (HImode, halfword_tmp)));
14493 dstoffset += 2;
14494 }
14495
14496 /* Likewise for last byte. */
14497
14498 if (byte_tmp)
14499 {
14500 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14501 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14502 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14503 dstoffset++;
14504 }
14505
14506 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14507 }
14508
14509 /* From mips_adjust_block_mem:
14510
14511 Helper function for doing a loop-based block operation on memory
14512 reference MEM. Each iteration of the loop will operate on LENGTH
14513 bytes of MEM.
14514
14515 Create a new base register for use within the loop and point it to
14516 the start of MEM. Create a new memory reference that uses this
14517 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14518
14519 static void
14520 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14521 rtx *loop_mem)
14522 {
14523 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14524
14525 /* Although the new mem does not refer to a known location,
14526 it does keep up to LENGTH bytes of alignment. */
14527 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14528 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14529 }
14530
14531 /* From mips_block_move_loop:
14532
14533 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14534 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14535 the memory regions do not overlap. */
14536
14537 static void
14538 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14539 unsigned int interleave_factor,
14540 HOST_WIDE_INT bytes_per_iter)
14541 {
14542 rtx src_reg, dest_reg, final_src, test;
14543 HOST_WIDE_INT leftover;
14544
14545 leftover = length % bytes_per_iter;
14546 length -= leftover;
14547
14548 /* Create registers and memory references for use within the loop. */
14549 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14550 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14551
14552 /* Calculate the value that SRC_REG should have after the last iteration of
14553 the loop. */
14554 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14555 0, 0, OPTAB_WIDEN);
14556
14557 /* Emit the start of the loop. */
14558 rtx_code_label *label = gen_label_rtx ();
14559 emit_label (label);
14560
14561 /* Emit the loop body. */
14562 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14563 interleave_factor);
14564
14565 /* Move on to the next block. */
14566 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14567 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14568
14569 /* Emit the loop condition. */
14570 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14571 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14572
14573 /* Mop up any left-over bytes. */
14574 if (leftover)
14575 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14576 }
14577
14578 /* Emit a block move when either the source or destination is unaligned (not
14579 aligned to a four-byte boundary). This may need further tuning depending on
14580 core type, optimize_size setting, etc. */
14581
14582 static int
14583 arm_movmemqi_unaligned (rtx *operands)
14584 {
14585 HOST_WIDE_INT length = INTVAL (operands[2]);
14586
14587 if (optimize_size)
14588 {
14589 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14590 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14591 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14592 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14593 or dst_aligned though: allow more interleaving in those cases since the
14594 resulting code can be smaller. */
14595 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14596 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14597
14598 if (length > 12)
14599 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14600 interleave_factor, bytes_per_iter);
14601 else
14602 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14603 interleave_factor);
14604 }
14605 else
14606 {
14607 /* Note that the loop created by arm_block_move_unaligned_loop may be
14608 subject to loop unrolling, which makes tuning this condition a little
14609 redundant. */
14610 if (length > 32)
14611 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14612 else
14613 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14614 }
14615
14616 return 1;
14617 }
14618
14619 int
14620 arm_gen_movmemqi (rtx *operands)
14621 {
14622 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14623 HOST_WIDE_INT srcoffset, dstoffset;
14624 int i;
14625 rtx src, dst, srcbase, dstbase;
14626 rtx part_bytes_reg = NULL;
14627 rtx mem;
14628
14629 if (!CONST_INT_P (operands[2])
14630 || !CONST_INT_P (operands[3])
14631 || INTVAL (operands[2]) > 64)
14632 return 0;
14633
14634 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14635 return arm_movmemqi_unaligned (operands);
14636
14637 if (INTVAL (operands[3]) & 3)
14638 return 0;
14639
14640 dstbase = operands[0];
14641 srcbase = operands[1];
14642
14643 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14644 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14645
14646 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14647 out_words_to_go = INTVAL (operands[2]) / 4;
14648 last_bytes = INTVAL (operands[2]) & 3;
14649 dstoffset = srcoffset = 0;
14650
14651 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14652 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14653
14654 for (i = 0; in_words_to_go >= 2; i+=4)
14655 {
14656 if (in_words_to_go > 4)
14657 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14658 TRUE, srcbase, &srcoffset));
14659 else
14660 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14661 src, FALSE, srcbase,
14662 &srcoffset));
14663
14664 if (out_words_to_go)
14665 {
14666 if (out_words_to_go > 4)
14667 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14668 TRUE, dstbase, &dstoffset));
14669 else if (out_words_to_go != 1)
14670 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14671 out_words_to_go, dst,
14672 (last_bytes == 0
14673 ? FALSE : TRUE),
14674 dstbase, &dstoffset));
14675 else
14676 {
14677 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14678 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14679 if (last_bytes != 0)
14680 {
14681 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14682 dstoffset += 4;
14683 }
14684 }
14685 }
14686
14687 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14688 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14689 }
14690
14691 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14692 if (out_words_to_go)
14693 {
14694 rtx sreg;
14695
14696 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14697 sreg = copy_to_reg (mem);
14698
14699 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14700 emit_move_insn (mem, sreg);
14701 in_words_to_go--;
14702
14703 gcc_assert (!in_words_to_go); /* Sanity check */
14704 }
14705
14706 if (in_words_to_go)
14707 {
14708 gcc_assert (in_words_to_go > 0);
14709
14710 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14711 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14712 }
14713
14714 gcc_assert (!last_bytes || part_bytes_reg);
14715
14716 if (BYTES_BIG_ENDIAN && last_bytes)
14717 {
14718 rtx tmp = gen_reg_rtx (SImode);
14719
14720 /* The bytes we want are in the top end of the word. */
14721 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14722 GEN_INT (8 * (4 - last_bytes))));
14723 part_bytes_reg = tmp;
14724
14725 while (last_bytes)
14726 {
14727 mem = adjust_automodify_address (dstbase, QImode,
14728 plus_constant (Pmode, dst,
14729 last_bytes - 1),
14730 dstoffset + last_bytes - 1);
14731 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14732
14733 if (--last_bytes)
14734 {
14735 tmp = gen_reg_rtx (SImode);
14736 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14737 part_bytes_reg = tmp;
14738 }
14739 }
14740
14741 }
14742 else
14743 {
14744 if (last_bytes > 1)
14745 {
14746 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14747 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14748 last_bytes -= 2;
14749 if (last_bytes)
14750 {
14751 rtx tmp = gen_reg_rtx (SImode);
14752 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14753 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14754 part_bytes_reg = tmp;
14755 dstoffset += 2;
14756 }
14757 }
14758
14759 if (last_bytes)
14760 {
14761 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14762 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14763 }
14764 }
14765
14766 return 1;
14767 }
14768
14769 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14770 by mode size. */
14771 inline static rtx
14772 next_consecutive_mem (rtx mem)
14773 {
14774 machine_mode mode = GET_MODE (mem);
14775 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14776 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14777
14778 return adjust_automodify_address (mem, mode, addr, offset);
14779 }
14780
14781 /* Copy using LDRD/STRD instructions whenever possible.
14782 Returns true upon success. */
14783 bool
14784 gen_movmem_ldrd_strd (rtx *operands)
14785 {
14786 unsigned HOST_WIDE_INT len;
14787 HOST_WIDE_INT align;
14788 rtx src, dst, base;
14789 rtx reg0;
14790 bool src_aligned, dst_aligned;
14791 bool src_volatile, dst_volatile;
14792
14793 gcc_assert (CONST_INT_P (operands[2]));
14794 gcc_assert (CONST_INT_P (operands[3]));
14795
14796 len = UINTVAL (operands[2]);
14797 if (len > 64)
14798 return false;
14799
14800 /* Maximum alignment we can assume for both src and dst buffers. */
14801 align = INTVAL (operands[3]);
14802
14803 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14804 return false;
14805
14806 /* Place src and dst addresses in registers
14807 and update the corresponding mem rtx. */
14808 dst = operands[0];
14809 dst_volatile = MEM_VOLATILE_P (dst);
14810 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14811 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14812 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14813
14814 src = operands[1];
14815 src_volatile = MEM_VOLATILE_P (src);
14816 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14817 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14818 src = adjust_automodify_address (src, VOIDmode, base, 0);
14819
14820 if (!unaligned_access && !(src_aligned && dst_aligned))
14821 return false;
14822
14823 if (src_volatile || dst_volatile)
14824 return false;
14825
14826 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14827 if (!(dst_aligned || src_aligned))
14828 return arm_gen_movmemqi (operands);
14829
14830 src = adjust_address (src, DImode, 0);
14831 dst = adjust_address (dst, DImode, 0);
14832 while (len >= 8)
14833 {
14834 len -= 8;
14835 reg0 = gen_reg_rtx (DImode);
14836 if (src_aligned)
14837 emit_move_insn (reg0, src);
14838 else
14839 emit_insn (gen_unaligned_loaddi (reg0, src));
14840
14841 if (dst_aligned)
14842 emit_move_insn (dst, reg0);
14843 else
14844 emit_insn (gen_unaligned_storedi (dst, reg0));
14845
14846 src = next_consecutive_mem (src);
14847 dst = next_consecutive_mem (dst);
14848 }
14849
14850 gcc_assert (len < 8);
14851 if (len >= 4)
14852 {
14853 /* More than a word but less than a double-word to copy. Copy a word. */
14854 reg0 = gen_reg_rtx (SImode);
14855 src = adjust_address (src, SImode, 0);
14856 dst = adjust_address (dst, SImode, 0);
14857 if (src_aligned)
14858 emit_move_insn (reg0, src);
14859 else
14860 emit_insn (gen_unaligned_loadsi (reg0, src));
14861
14862 if (dst_aligned)
14863 emit_move_insn (dst, reg0);
14864 else
14865 emit_insn (gen_unaligned_storesi (dst, reg0));
14866
14867 src = next_consecutive_mem (src);
14868 dst = next_consecutive_mem (dst);
14869 len -= 4;
14870 }
14871
14872 if (len == 0)
14873 return true;
14874
14875 /* Copy the remaining bytes. */
14876 if (len >= 2)
14877 {
14878 dst = adjust_address (dst, HImode, 0);
14879 src = adjust_address (src, HImode, 0);
14880 reg0 = gen_reg_rtx (SImode);
14881 if (src_aligned)
14882 emit_insn (gen_zero_extendhisi2 (reg0, src));
14883 else
14884 emit_insn (gen_unaligned_loadhiu (reg0, src));
14885
14886 if (dst_aligned)
14887 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14888 else
14889 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14890
14891 src = next_consecutive_mem (src);
14892 dst = next_consecutive_mem (dst);
14893 if (len == 2)
14894 return true;
14895 }
14896
14897 dst = adjust_address (dst, QImode, 0);
14898 src = adjust_address (src, QImode, 0);
14899 reg0 = gen_reg_rtx (QImode);
14900 emit_move_insn (reg0, src);
14901 emit_move_insn (dst, reg0);
14902 return true;
14903 }
14904
14905 /* Select a dominance comparison mode if possible for a test of the general
14906 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14907 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14908 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14909 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14910 In all cases OP will be either EQ or NE, but we don't need to know which
14911 here. If we are unable to support a dominance comparison we return
14912 CC mode. This will then fail to match for the RTL expressions that
14913 generate this call. */
14914 machine_mode
14915 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14916 {
14917 enum rtx_code cond1, cond2;
14918 int swapped = 0;
14919
14920 /* Currently we will probably get the wrong result if the individual
14921 comparisons are not simple. This also ensures that it is safe to
14922 reverse a comparison if necessary. */
14923 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14924 != CCmode)
14925 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14926 != CCmode))
14927 return CCmode;
14928
14929 /* The if_then_else variant of this tests the second condition if the
14930 first passes, but is true if the first fails. Reverse the first
14931 condition to get a true "inclusive-or" expression. */
14932 if (cond_or == DOM_CC_NX_OR_Y)
14933 cond1 = reverse_condition (cond1);
14934
14935 /* If the comparisons are not equal, and one doesn't dominate the other,
14936 then we can't do this. */
14937 if (cond1 != cond2
14938 && !comparison_dominates_p (cond1, cond2)
14939 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14940 return CCmode;
14941
14942 if (swapped)
14943 std::swap (cond1, cond2);
14944
14945 switch (cond1)
14946 {
14947 case EQ:
14948 if (cond_or == DOM_CC_X_AND_Y)
14949 return CC_DEQmode;
14950
14951 switch (cond2)
14952 {
14953 case EQ: return CC_DEQmode;
14954 case LE: return CC_DLEmode;
14955 case LEU: return CC_DLEUmode;
14956 case GE: return CC_DGEmode;
14957 case GEU: return CC_DGEUmode;
14958 default: gcc_unreachable ();
14959 }
14960
14961 case LT:
14962 if (cond_or == DOM_CC_X_AND_Y)
14963 return CC_DLTmode;
14964
14965 switch (cond2)
14966 {
14967 case LT:
14968 return CC_DLTmode;
14969 case LE:
14970 return CC_DLEmode;
14971 case NE:
14972 return CC_DNEmode;
14973 default:
14974 gcc_unreachable ();
14975 }
14976
14977 case GT:
14978 if (cond_or == DOM_CC_X_AND_Y)
14979 return CC_DGTmode;
14980
14981 switch (cond2)
14982 {
14983 case GT:
14984 return CC_DGTmode;
14985 case GE:
14986 return CC_DGEmode;
14987 case NE:
14988 return CC_DNEmode;
14989 default:
14990 gcc_unreachable ();
14991 }
14992
14993 case LTU:
14994 if (cond_or == DOM_CC_X_AND_Y)
14995 return CC_DLTUmode;
14996
14997 switch (cond2)
14998 {
14999 case LTU:
15000 return CC_DLTUmode;
15001 case LEU:
15002 return CC_DLEUmode;
15003 case NE:
15004 return CC_DNEmode;
15005 default:
15006 gcc_unreachable ();
15007 }
15008
15009 case GTU:
15010 if (cond_or == DOM_CC_X_AND_Y)
15011 return CC_DGTUmode;
15012
15013 switch (cond2)
15014 {
15015 case GTU:
15016 return CC_DGTUmode;
15017 case GEU:
15018 return CC_DGEUmode;
15019 case NE:
15020 return CC_DNEmode;
15021 default:
15022 gcc_unreachable ();
15023 }
15024
15025 /* The remaining cases only occur when both comparisons are the
15026 same. */
15027 case NE:
15028 gcc_assert (cond1 == cond2);
15029 return CC_DNEmode;
15030
15031 case LE:
15032 gcc_assert (cond1 == cond2);
15033 return CC_DLEmode;
15034
15035 case GE:
15036 gcc_assert (cond1 == cond2);
15037 return CC_DGEmode;
15038
15039 case LEU:
15040 gcc_assert (cond1 == cond2);
15041 return CC_DLEUmode;
15042
15043 case GEU:
15044 gcc_assert (cond1 == cond2);
15045 return CC_DGEUmode;
15046
15047 default:
15048 gcc_unreachable ();
15049 }
15050 }
15051
15052 machine_mode
15053 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15054 {
15055 /* All floating point compares return CCFP if it is an equality
15056 comparison, and CCFPE otherwise. */
15057 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15058 {
15059 switch (op)
15060 {
15061 case EQ:
15062 case NE:
15063 case UNORDERED:
15064 case ORDERED:
15065 case UNLT:
15066 case UNLE:
15067 case UNGT:
15068 case UNGE:
15069 case UNEQ:
15070 case LTGT:
15071 return CCFPmode;
15072
15073 case LT:
15074 case LE:
15075 case GT:
15076 case GE:
15077 return CCFPEmode;
15078
15079 default:
15080 gcc_unreachable ();
15081 }
15082 }
15083
15084 /* A compare with a shifted operand. Because of canonicalization, the
15085 comparison will have to be swapped when we emit the assembler. */
15086 if (GET_MODE (y) == SImode
15087 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15088 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15089 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15090 || GET_CODE (x) == ROTATERT))
15091 return CC_SWPmode;
15092
15093 /* This operation is performed swapped, but since we only rely on the Z
15094 flag we don't need an additional mode. */
15095 if (GET_MODE (y) == SImode
15096 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15097 && GET_CODE (x) == NEG
15098 && (op == EQ || op == NE))
15099 return CC_Zmode;
15100
15101 /* This is a special case that is used by combine to allow a
15102 comparison of a shifted byte load to be split into a zero-extend
15103 followed by a comparison of the shifted integer (only valid for
15104 equalities and unsigned inequalities). */
15105 if (GET_MODE (x) == SImode
15106 && GET_CODE (x) == ASHIFT
15107 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15108 && GET_CODE (XEXP (x, 0)) == SUBREG
15109 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15110 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15111 && (op == EQ || op == NE
15112 || op == GEU || op == GTU || op == LTU || op == LEU)
15113 && CONST_INT_P (y))
15114 return CC_Zmode;
15115
15116 /* A construct for a conditional compare, if the false arm contains
15117 0, then both conditions must be true, otherwise either condition
15118 must be true. Not all conditions are possible, so CCmode is
15119 returned if it can't be done. */
15120 if (GET_CODE (x) == IF_THEN_ELSE
15121 && (XEXP (x, 2) == const0_rtx
15122 || XEXP (x, 2) == const1_rtx)
15123 && COMPARISON_P (XEXP (x, 0))
15124 && COMPARISON_P (XEXP (x, 1)))
15125 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15126 INTVAL (XEXP (x, 2)));
15127
15128 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15129 if (GET_CODE (x) == AND
15130 && (op == EQ || op == NE)
15131 && COMPARISON_P (XEXP (x, 0))
15132 && COMPARISON_P (XEXP (x, 1)))
15133 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15134 DOM_CC_X_AND_Y);
15135
15136 if (GET_CODE (x) == IOR
15137 && (op == EQ || op == NE)
15138 && COMPARISON_P (XEXP (x, 0))
15139 && COMPARISON_P (XEXP (x, 1)))
15140 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15141 DOM_CC_X_OR_Y);
15142
15143 /* An operation (on Thumb) where we want to test for a single bit.
15144 This is done by shifting that bit up into the top bit of a
15145 scratch register; we can then branch on the sign bit. */
15146 if (TARGET_THUMB1
15147 && GET_MODE (x) == SImode
15148 && (op == EQ || op == NE)
15149 && GET_CODE (x) == ZERO_EXTRACT
15150 && XEXP (x, 1) == const1_rtx)
15151 return CC_Nmode;
15152
15153 /* An operation that sets the condition codes as a side-effect, the
15154 V flag is not set correctly, so we can only use comparisons where
15155 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15156 instead.) */
15157 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15158 if (GET_MODE (x) == SImode
15159 && y == const0_rtx
15160 && (op == EQ || op == NE || op == LT || op == GE)
15161 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15162 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15163 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15164 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15165 || GET_CODE (x) == LSHIFTRT
15166 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15167 || GET_CODE (x) == ROTATERT
15168 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15169 return CC_NOOVmode;
15170
15171 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15172 return CC_Zmode;
15173
15174 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15175 && GET_CODE (x) == PLUS
15176 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15177 return CC_Cmode;
15178
15179 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15180 {
15181 switch (op)
15182 {
15183 case EQ:
15184 case NE:
15185 /* A DImode comparison against zero can be implemented by
15186 or'ing the two halves together. */
15187 if (y == const0_rtx)
15188 return CC_Zmode;
15189
15190 /* We can do an equality test in three Thumb instructions. */
15191 if (!TARGET_32BIT)
15192 return CC_Zmode;
15193
15194 /* FALLTHROUGH */
15195
15196 case LTU:
15197 case LEU:
15198 case GTU:
15199 case GEU:
15200 /* DImode unsigned comparisons can be implemented by cmp +
15201 cmpeq without a scratch register. Not worth doing in
15202 Thumb-2. */
15203 if (TARGET_32BIT)
15204 return CC_CZmode;
15205
15206 /* FALLTHROUGH */
15207
15208 case LT:
15209 case LE:
15210 case GT:
15211 case GE:
15212 /* DImode signed and unsigned comparisons can be implemented
15213 by cmp + sbcs with a scratch register, but that does not
15214 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15215 gcc_assert (op != EQ && op != NE);
15216 return CC_NCVmode;
15217
15218 default:
15219 gcc_unreachable ();
15220 }
15221 }
15222
15223 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15224 return GET_MODE (x);
15225
15226 return CCmode;
15227 }
15228
15229 /* X and Y are two things to compare using CODE. Emit the compare insn and
15230 return the rtx for register 0 in the proper mode. FP means this is a
15231 floating point compare: I don't think that it is needed on the arm. */
15232 rtx
15233 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15234 {
15235 machine_mode mode;
15236 rtx cc_reg;
15237 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15238
15239 /* We might have X as a constant, Y as a register because of the predicates
15240 used for cmpdi. If so, force X to a register here. */
15241 if (dimode_comparison && !REG_P (x))
15242 x = force_reg (DImode, x);
15243
15244 mode = SELECT_CC_MODE (code, x, y);
15245 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15246
15247 if (dimode_comparison
15248 && mode != CC_CZmode)
15249 {
15250 rtx clobber, set;
15251
15252 /* To compare two non-zero values for equality, XOR them and
15253 then compare against zero. Not used for ARM mode; there
15254 CC_CZmode is cheaper. */
15255 if (mode == CC_Zmode && y != const0_rtx)
15256 {
15257 gcc_assert (!reload_completed);
15258 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15259 y = const0_rtx;
15260 }
15261
15262 /* A scratch register is required. */
15263 if (reload_completed)
15264 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15265 else
15266 scratch = gen_rtx_SCRATCH (SImode);
15267
15268 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15269 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15270 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15271 }
15272 else
15273 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15274
15275 return cc_reg;
15276 }
15277
15278 /* Generate a sequence of insns that will generate the correct return
15279 address mask depending on the physical architecture that the program
15280 is running on. */
15281 rtx
15282 arm_gen_return_addr_mask (void)
15283 {
15284 rtx reg = gen_reg_rtx (Pmode);
15285
15286 emit_insn (gen_return_addr_mask (reg));
15287 return reg;
15288 }
15289
15290 void
15291 arm_reload_in_hi (rtx *operands)
15292 {
15293 rtx ref = operands[1];
15294 rtx base, scratch;
15295 HOST_WIDE_INT offset = 0;
15296
15297 if (GET_CODE (ref) == SUBREG)
15298 {
15299 offset = SUBREG_BYTE (ref);
15300 ref = SUBREG_REG (ref);
15301 }
15302
15303 if (REG_P (ref))
15304 {
15305 /* We have a pseudo which has been spilt onto the stack; there
15306 are two cases here: the first where there is a simple
15307 stack-slot replacement and a second where the stack-slot is
15308 out of range, or is used as a subreg. */
15309 if (reg_equiv_mem (REGNO (ref)))
15310 {
15311 ref = reg_equiv_mem (REGNO (ref));
15312 base = find_replacement (&XEXP (ref, 0));
15313 }
15314 else
15315 /* The slot is out of range, or was dressed up in a SUBREG. */
15316 base = reg_equiv_address (REGNO (ref));
15317 }
15318 else
15319 base = find_replacement (&XEXP (ref, 0));
15320
15321 /* Handle the case where the address is too complex to be offset by 1. */
15322 if (GET_CODE (base) == MINUS
15323 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15324 {
15325 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15326
15327 emit_set_insn (base_plus, base);
15328 base = base_plus;
15329 }
15330 else if (GET_CODE (base) == PLUS)
15331 {
15332 /* The addend must be CONST_INT, or we would have dealt with it above. */
15333 HOST_WIDE_INT hi, lo;
15334
15335 offset += INTVAL (XEXP (base, 1));
15336 base = XEXP (base, 0);
15337
15338 /* Rework the address into a legal sequence of insns. */
15339 /* Valid range for lo is -4095 -> 4095 */
15340 lo = (offset >= 0
15341 ? (offset & 0xfff)
15342 : -((-offset) & 0xfff));
15343
15344 /* Corner case, if lo is the max offset then we would be out of range
15345 once we have added the additional 1 below, so bump the msb into the
15346 pre-loading insn(s). */
15347 if (lo == 4095)
15348 lo &= 0x7ff;
15349
15350 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15351 ^ (HOST_WIDE_INT) 0x80000000)
15352 - (HOST_WIDE_INT) 0x80000000);
15353
15354 gcc_assert (hi + lo == offset);
15355
15356 if (hi != 0)
15357 {
15358 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15359
15360 /* Get the base address; addsi3 knows how to handle constants
15361 that require more than one insn. */
15362 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15363 base = base_plus;
15364 offset = lo;
15365 }
15366 }
15367
15368 /* Operands[2] may overlap operands[0] (though it won't overlap
15369 operands[1]), that's why we asked for a DImode reg -- so we can
15370 use the bit that does not overlap. */
15371 if (REGNO (operands[2]) == REGNO (operands[0]))
15372 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15373 else
15374 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15375
15376 emit_insn (gen_zero_extendqisi2 (scratch,
15377 gen_rtx_MEM (QImode,
15378 plus_constant (Pmode, base,
15379 offset))));
15380 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15381 gen_rtx_MEM (QImode,
15382 plus_constant (Pmode, base,
15383 offset + 1))));
15384 if (!BYTES_BIG_ENDIAN)
15385 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15386 gen_rtx_IOR (SImode,
15387 gen_rtx_ASHIFT
15388 (SImode,
15389 gen_rtx_SUBREG (SImode, operands[0], 0),
15390 GEN_INT (8)),
15391 scratch));
15392 else
15393 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15394 gen_rtx_IOR (SImode,
15395 gen_rtx_ASHIFT (SImode, scratch,
15396 GEN_INT (8)),
15397 gen_rtx_SUBREG (SImode, operands[0], 0)));
15398 }
15399
15400 /* Handle storing a half-word to memory during reload by synthesizing as two
15401 byte stores. Take care not to clobber the input values until after we
15402 have moved them somewhere safe. This code assumes that if the DImode
15403 scratch in operands[2] overlaps either the input value or output address
15404 in some way, then that value must die in this insn (we absolutely need
15405 two scratch registers for some corner cases). */
15406 void
15407 arm_reload_out_hi (rtx *operands)
15408 {
15409 rtx ref = operands[0];
15410 rtx outval = operands[1];
15411 rtx base, scratch;
15412 HOST_WIDE_INT offset = 0;
15413
15414 if (GET_CODE (ref) == SUBREG)
15415 {
15416 offset = SUBREG_BYTE (ref);
15417 ref = SUBREG_REG (ref);
15418 }
15419
15420 if (REG_P (ref))
15421 {
15422 /* We have a pseudo which has been spilt onto the stack; there
15423 are two cases here: the first where there is a simple
15424 stack-slot replacement and a second where the stack-slot is
15425 out of range, or is used as a subreg. */
15426 if (reg_equiv_mem (REGNO (ref)))
15427 {
15428 ref = reg_equiv_mem (REGNO (ref));
15429 base = find_replacement (&XEXP (ref, 0));
15430 }
15431 else
15432 /* The slot is out of range, or was dressed up in a SUBREG. */
15433 base = reg_equiv_address (REGNO (ref));
15434 }
15435 else
15436 base = find_replacement (&XEXP (ref, 0));
15437
15438 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15439
15440 /* Handle the case where the address is too complex to be offset by 1. */
15441 if (GET_CODE (base) == MINUS
15442 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15443 {
15444 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15445
15446 /* Be careful not to destroy OUTVAL. */
15447 if (reg_overlap_mentioned_p (base_plus, outval))
15448 {
15449 /* Updating base_plus might destroy outval, see if we can
15450 swap the scratch and base_plus. */
15451 if (!reg_overlap_mentioned_p (scratch, outval))
15452 std::swap (scratch, base_plus);
15453 else
15454 {
15455 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15456
15457 /* Be conservative and copy OUTVAL into the scratch now,
15458 this should only be necessary if outval is a subreg
15459 of something larger than a word. */
15460 /* XXX Might this clobber base? I can't see how it can,
15461 since scratch is known to overlap with OUTVAL, and
15462 must be wider than a word. */
15463 emit_insn (gen_movhi (scratch_hi, outval));
15464 outval = scratch_hi;
15465 }
15466 }
15467
15468 emit_set_insn (base_plus, base);
15469 base = base_plus;
15470 }
15471 else if (GET_CODE (base) == PLUS)
15472 {
15473 /* The addend must be CONST_INT, or we would have dealt with it above. */
15474 HOST_WIDE_INT hi, lo;
15475
15476 offset += INTVAL (XEXP (base, 1));
15477 base = XEXP (base, 0);
15478
15479 /* Rework the address into a legal sequence of insns. */
15480 /* Valid range for lo is -4095 -> 4095 */
15481 lo = (offset >= 0
15482 ? (offset & 0xfff)
15483 : -((-offset) & 0xfff));
15484
15485 /* Corner case, if lo is the max offset then we would be out of range
15486 once we have added the additional 1 below, so bump the msb into the
15487 pre-loading insn(s). */
15488 if (lo == 4095)
15489 lo &= 0x7ff;
15490
15491 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15492 ^ (HOST_WIDE_INT) 0x80000000)
15493 - (HOST_WIDE_INT) 0x80000000);
15494
15495 gcc_assert (hi + lo == offset);
15496
15497 if (hi != 0)
15498 {
15499 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15500
15501 /* Be careful not to destroy OUTVAL. */
15502 if (reg_overlap_mentioned_p (base_plus, outval))
15503 {
15504 /* Updating base_plus might destroy outval, see if we
15505 can swap the scratch and base_plus. */
15506 if (!reg_overlap_mentioned_p (scratch, outval))
15507 std::swap (scratch, base_plus);
15508 else
15509 {
15510 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15511
15512 /* Be conservative and copy outval into scratch now,
15513 this should only be necessary if outval is a
15514 subreg of something larger than a word. */
15515 /* XXX Might this clobber base? I can't see how it
15516 can, since scratch is known to overlap with
15517 outval. */
15518 emit_insn (gen_movhi (scratch_hi, outval));
15519 outval = scratch_hi;
15520 }
15521 }
15522
15523 /* Get the base address; addsi3 knows how to handle constants
15524 that require more than one insn. */
15525 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15526 base = base_plus;
15527 offset = lo;
15528 }
15529 }
15530
15531 if (BYTES_BIG_ENDIAN)
15532 {
15533 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15534 plus_constant (Pmode, base,
15535 offset + 1)),
15536 gen_lowpart (QImode, outval)));
15537 emit_insn (gen_lshrsi3 (scratch,
15538 gen_rtx_SUBREG (SImode, outval, 0),
15539 GEN_INT (8)));
15540 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15541 offset)),
15542 gen_lowpart (QImode, scratch)));
15543 }
15544 else
15545 {
15546 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15547 offset)),
15548 gen_lowpart (QImode, outval)));
15549 emit_insn (gen_lshrsi3 (scratch,
15550 gen_rtx_SUBREG (SImode, outval, 0),
15551 GEN_INT (8)));
15552 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15553 plus_constant (Pmode, base,
15554 offset + 1)),
15555 gen_lowpart (QImode, scratch)));
15556 }
15557 }
15558
15559 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15560 (padded to the size of a word) should be passed in a register. */
15561
15562 static bool
15563 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15564 {
15565 if (TARGET_AAPCS_BASED)
15566 return must_pass_in_stack_var_size (mode, type);
15567 else
15568 return must_pass_in_stack_var_size_or_pad (mode, type);
15569 }
15570
15571
15572 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15573 Return true if an argument passed on the stack should be padded upwards,
15574 i.e. if the least-significant byte has useful data.
15575 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15576 aggregate types are placed in the lowest memory address. */
15577
15578 bool
15579 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15580 {
15581 if (!TARGET_AAPCS_BASED)
15582 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15583
15584 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15585 return false;
15586
15587 return true;
15588 }
15589
15590
15591 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15592 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15593 register has useful data, and return the opposite if the most
15594 significant byte does. */
15595
15596 bool
15597 arm_pad_reg_upward (machine_mode mode,
15598 tree type, int first ATTRIBUTE_UNUSED)
15599 {
15600 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15601 {
15602 /* For AAPCS, small aggregates, small fixed-point types,
15603 and small complex types are always padded upwards. */
15604 if (type)
15605 {
15606 if ((AGGREGATE_TYPE_P (type)
15607 || TREE_CODE (type) == COMPLEX_TYPE
15608 || FIXED_POINT_TYPE_P (type))
15609 && int_size_in_bytes (type) <= 4)
15610 return true;
15611 }
15612 else
15613 {
15614 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15615 && GET_MODE_SIZE (mode) <= 4)
15616 return true;
15617 }
15618 }
15619
15620 /* Otherwise, use default padding. */
15621 return !BYTES_BIG_ENDIAN;
15622 }
15623
15624 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15625 assuming that the address in the base register is word aligned. */
15626 bool
15627 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15628 {
15629 HOST_WIDE_INT max_offset;
15630
15631 /* Offset must be a multiple of 4 in Thumb mode. */
15632 if (TARGET_THUMB2 && ((offset & 3) != 0))
15633 return false;
15634
15635 if (TARGET_THUMB2)
15636 max_offset = 1020;
15637 else if (TARGET_ARM)
15638 max_offset = 255;
15639 else
15640 return false;
15641
15642 return ((offset <= max_offset) && (offset >= -max_offset));
15643 }
15644
15645 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15646 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15647 Assumes that the address in the base register RN is word aligned. Pattern
15648 guarantees that both memory accesses use the same base register,
15649 the offsets are constants within the range, and the gap between the offsets is 4.
15650 If preload complete then check that registers are legal. WBACK indicates whether
15651 address is updated. LOAD indicates whether memory access is load or store. */
15652 bool
15653 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15654 bool wback, bool load)
15655 {
15656 unsigned int t, t2, n;
15657
15658 if (!reload_completed)
15659 return true;
15660
15661 if (!offset_ok_for_ldrd_strd (offset))
15662 return false;
15663
15664 t = REGNO (rt);
15665 t2 = REGNO (rt2);
15666 n = REGNO (rn);
15667
15668 if ((TARGET_THUMB2)
15669 && ((wback && (n == t || n == t2))
15670 || (t == SP_REGNUM)
15671 || (t == PC_REGNUM)
15672 || (t2 == SP_REGNUM)
15673 || (t2 == PC_REGNUM)
15674 || (!load && (n == PC_REGNUM))
15675 || (load && (t == t2))
15676 /* Triggers Cortex-M3 LDRD errata. */
15677 || (!wback && load && fix_cm3_ldrd && (n == t))))
15678 return false;
15679
15680 if ((TARGET_ARM)
15681 && ((wback && (n == t || n == t2))
15682 || (t2 == PC_REGNUM)
15683 || (t % 2 != 0) /* First destination register is not even. */
15684 || (t2 != t + 1)
15685 /* PC can be used as base register (for offset addressing only),
15686 but it is depricated. */
15687 || (n == PC_REGNUM)))
15688 return false;
15689
15690 return true;
15691 }
15692
15693 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15694 operand MEM's address contains an immediate offset from the base
15695 register and has no side effects, in which case it sets BASE and
15696 OFFSET accordingly. */
15697 static bool
15698 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15699 {
15700 rtx addr;
15701
15702 gcc_assert (base != NULL && offset != NULL);
15703
15704 /* TODO: Handle more general memory operand patterns, such as
15705 PRE_DEC and PRE_INC. */
15706
15707 if (side_effects_p (mem))
15708 return false;
15709
15710 /* Can't deal with subregs. */
15711 if (GET_CODE (mem) == SUBREG)
15712 return false;
15713
15714 gcc_assert (MEM_P (mem));
15715
15716 *offset = const0_rtx;
15717
15718 addr = XEXP (mem, 0);
15719
15720 /* If addr isn't valid for DImode, then we can't handle it. */
15721 if (!arm_legitimate_address_p (DImode, addr,
15722 reload_in_progress || reload_completed))
15723 return false;
15724
15725 if (REG_P (addr))
15726 {
15727 *base = addr;
15728 return true;
15729 }
15730 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15731 {
15732 *base = XEXP (addr, 0);
15733 *offset = XEXP (addr, 1);
15734 return (REG_P (*base) && CONST_INT_P (*offset));
15735 }
15736
15737 return false;
15738 }
15739
15740 /* Called from a peephole2 to replace two word-size accesses with a
15741 single LDRD/STRD instruction. Returns true iff we can generate a
15742 new instruction sequence. That is, both accesses use the same base
15743 register and the gap between constant offsets is 4. This function
15744 may reorder its operands to match ldrd/strd RTL templates.
15745 OPERANDS are the operands found by the peephole matcher;
15746 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15747 corresponding memory operands. LOAD indicaates whether the access
15748 is load or store. CONST_STORE indicates a store of constant
15749 integer values held in OPERANDS[4,5] and assumes that the pattern
15750 is of length 4 insn, for the purpose of checking dead registers.
15751 COMMUTE indicates that register operands may be reordered. */
15752 bool
15753 gen_operands_ldrd_strd (rtx *operands, bool load,
15754 bool const_store, bool commute)
15755 {
15756 int nops = 2;
15757 HOST_WIDE_INT offsets[2], offset;
15758 rtx base = NULL_RTX;
15759 rtx cur_base, cur_offset, tmp;
15760 int i, gap;
15761 HARD_REG_SET regset;
15762
15763 gcc_assert (!const_store || !load);
15764 /* Check that the memory references are immediate offsets from the
15765 same base register. Extract the base register, the destination
15766 registers, and the corresponding memory offsets. */
15767 for (i = 0; i < nops; i++)
15768 {
15769 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15770 return false;
15771
15772 if (i == 0)
15773 base = cur_base;
15774 else if (REGNO (base) != REGNO (cur_base))
15775 return false;
15776
15777 offsets[i] = INTVAL (cur_offset);
15778 if (GET_CODE (operands[i]) == SUBREG)
15779 {
15780 tmp = SUBREG_REG (operands[i]);
15781 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15782 operands[i] = tmp;
15783 }
15784 }
15785
15786 /* Make sure there is no dependency between the individual loads. */
15787 if (load && REGNO (operands[0]) == REGNO (base))
15788 return false; /* RAW */
15789
15790 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15791 return false; /* WAW */
15792
15793 /* If the same input register is used in both stores
15794 when storing different constants, try to find a free register.
15795 For example, the code
15796 mov r0, 0
15797 str r0, [r2]
15798 mov r0, 1
15799 str r0, [r2, #4]
15800 can be transformed into
15801 mov r1, 0
15802 strd r1, r0, [r2]
15803 in Thumb mode assuming that r1 is free. */
15804 if (const_store
15805 && REGNO (operands[0]) == REGNO (operands[1])
15806 && INTVAL (operands[4]) != INTVAL (operands[5]))
15807 {
15808 if (TARGET_THUMB2)
15809 {
15810 CLEAR_HARD_REG_SET (regset);
15811 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15812 if (tmp == NULL_RTX)
15813 return false;
15814
15815 /* Use the new register in the first load to ensure that
15816 if the original input register is not dead after peephole,
15817 then it will have the correct constant value. */
15818 operands[0] = tmp;
15819 }
15820 else if (TARGET_ARM)
15821 {
15822 return false;
15823 int regno = REGNO (operands[0]);
15824 if (!peep2_reg_dead_p (4, operands[0]))
15825 {
15826 /* When the input register is even and is not dead after the
15827 pattern, it has to hold the second constant but we cannot
15828 form a legal STRD in ARM mode with this register as the second
15829 register. */
15830 if (regno % 2 == 0)
15831 return false;
15832
15833 /* Is regno-1 free? */
15834 SET_HARD_REG_SET (regset);
15835 CLEAR_HARD_REG_BIT(regset, regno - 1);
15836 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15837 if (tmp == NULL_RTX)
15838 return false;
15839
15840 operands[0] = tmp;
15841 }
15842 else
15843 {
15844 /* Find a DImode register. */
15845 CLEAR_HARD_REG_SET (regset);
15846 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15847 if (tmp != NULL_RTX)
15848 {
15849 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15850 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15851 }
15852 else
15853 {
15854 /* Can we use the input register to form a DI register? */
15855 SET_HARD_REG_SET (regset);
15856 CLEAR_HARD_REG_BIT(regset,
15857 regno % 2 == 0 ? regno + 1 : regno - 1);
15858 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15859 if (tmp == NULL_RTX)
15860 return false;
15861 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15862 }
15863 }
15864
15865 gcc_assert (operands[0] != NULL_RTX);
15866 gcc_assert (operands[1] != NULL_RTX);
15867 gcc_assert (REGNO (operands[0]) % 2 == 0);
15868 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15869 }
15870 }
15871
15872 /* Make sure the instructions are ordered with lower memory access first. */
15873 if (offsets[0] > offsets[1])
15874 {
15875 gap = offsets[0] - offsets[1];
15876 offset = offsets[1];
15877
15878 /* Swap the instructions such that lower memory is accessed first. */
15879 std::swap (operands[0], operands[1]);
15880 std::swap (operands[2], operands[3]);
15881 if (const_store)
15882 std::swap (operands[4], operands[5]);
15883 }
15884 else
15885 {
15886 gap = offsets[1] - offsets[0];
15887 offset = offsets[0];
15888 }
15889
15890 /* Make sure accesses are to consecutive memory locations. */
15891 if (gap != 4)
15892 return false;
15893
15894 /* Make sure we generate legal instructions. */
15895 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15896 false, load))
15897 return true;
15898
15899 /* In Thumb state, where registers are almost unconstrained, there
15900 is little hope to fix it. */
15901 if (TARGET_THUMB2)
15902 return false;
15903
15904 if (load && commute)
15905 {
15906 /* Try reordering registers. */
15907 std::swap (operands[0], operands[1]);
15908 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15909 false, load))
15910 return true;
15911 }
15912
15913 if (const_store)
15914 {
15915 /* If input registers are dead after this pattern, they can be
15916 reordered or replaced by other registers that are free in the
15917 current pattern. */
15918 if (!peep2_reg_dead_p (4, operands[0])
15919 || !peep2_reg_dead_p (4, operands[1]))
15920 return false;
15921
15922 /* Try to reorder the input registers. */
15923 /* For example, the code
15924 mov r0, 0
15925 mov r1, 1
15926 str r1, [r2]
15927 str r0, [r2, #4]
15928 can be transformed into
15929 mov r1, 0
15930 mov r0, 1
15931 strd r0, [r2]
15932 */
15933 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15934 false, false))
15935 {
15936 std::swap (operands[0], operands[1]);
15937 return true;
15938 }
15939
15940 /* Try to find a free DI register. */
15941 CLEAR_HARD_REG_SET (regset);
15942 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15943 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15944 while (true)
15945 {
15946 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15947 if (tmp == NULL_RTX)
15948 return false;
15949
15950 /* DREG must be an even-numbered register in DImode.
15951 Split it into SI registers. */
15952 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15953 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15954 gcc_assert (operands[0] != NULL_RTX);
15955 gcc_assert (operands[1] != NULL_RTX);
15956 gcc_assert (REGNO (operands[0]) % 2 == 0);
15957 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15958
15959 return (operands_ok_ldrd_strd (operands[0], operands[1],
15960 base, offset,
15961 false, load));
15962 }
15963 }
15964
15965 return false;
15966 }
15967
15968
15969
15970 \f
15971 /* Print a symbolic form of X to the debug file, F. */
15972 static void
15973 arm_print_value (FILE *f, rtx x)
15974 {
15975 switch (GET_CODE (x))
15976 {
15977 case CONST_INT:
15978 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15979 return;
15980
15981 case CONST_DOUBLE:
15982 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15983 return;
15984
15985 case CONST_VECTOR:
15986 {
15987 int i;
15988
15989 fprintf (f, "<");
15990 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15991 {
15992 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15993 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15994 fputc (',', f);
15995 }
15996 fprintf (f, ">");
15997 }
15998 return;
15999
16000 case CONST_STRING:
16001 fprintf (f, "\"%s\"", XSTR (x, 0));
16002 return;
16003
16004 case SYMBOL_REF:
16005 fprintf (f, "`%s'", XSTR (x, 0));
16006 return;
16007
16008 case LABEL_REF:
16009 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16010 return;
16011
16012 case CONST:
16013 arm_print_value (f, XEXP (x, 0));
16014 return;
16015
16016 case PLUS:
16017 arm_print_value (f, XEXP (x, 0));
16018 fprintf (f, "+");
16019 arm_print_value (f, XEXP (x, 1));
16020 return;
16021
16022 case PC:
16023 fprintf (f, "pc");
16024 return;
16025
16026 default:
16027 fprintf (f, "????");
16028 return;
16029 }
16030 }
16031 \f
16032 /* Routines for manipulation of the constant pool. */
16033
16034 /* Arm instructions cannot load a large constant directly into a
16035 register; they have to come from a pc relative load. The constant
16036 must therefore be placed in the addressable range of the pc
16037 relative load. Depending on the precise pc relative load
16038 instruction the range is somewhere between 256 bytes and 4k. This
16039 means that we often have to dump a constant inside a function, and
16040 generate code to branch around it.
16041
16042 It is important to minimize this, since the branches will slow
16043 things down and make the code larger.
16044
16045 Normally we can hide the table after an existing unconditional
16046 branch so that there is no interruption of the flow, but in the
16047 worst case the code looks like this:
16048
16049 ldr rn, L1
16050 ...
16051 b L2
16052 align
16053 L1: .long value
16054 L2:
16055 ...
16056
16057 ldr rn, L3
16058 ...
16059 b L4
16060 align
16061 L3: .long value
16062 L4:
16063 ...
16064
16065 We fix this by performing a scan after scheduling, which notices
16066 which instructions need to have their operands fetched from the
16067 constant table and builds the table.
16068
16069 The algorithm starts by building a table of all the constants that
16070 need fixing up and all the natural barriers in the function (places
16071 where a constant table can be dropped without breaking the flow).
16072 For each fixup we note how far the pc-relative replacement will be
16073 able to reach and the offset of the instruction into the function.
16074
16075 Having built the table we then group the fixes together to form
16076 tables that are as large as possible (subject to addressing
16077 constraints) and emit each table of constants after the last
16078 barrier that is within range of all the instructions in the group.
16079 If a group does not contain a barrier, then we forcibly create one
16080 by inserting a jump instruction into the flow. Once the table has
16081 been inserted, the insns are then modified to reference the
16082 relevant entry in the pool.
16083
16084 Possible enhancements to the algorithm (not implemented) are:
16085
16086 1) For some processors and object formats, there may be benefit in
16087 aligning the pools to the start of cache lines; this alignment
16088 would need to be taken into account when calculating addressability
16089 of a pool. */
16090
16091 /* These typedefs are located at the start of this file, so that
16092 they can be used in the prototypes there. This comment is to
16093 remind readers of that fact so that the following structures
16094 can be understood more easily.
16095
16096 typedef struct minipool_node Mnode;
16097 typedef struct minipool_fixup Mfix; */
16098
16099 struct minipool_node
16100 {
16101 /* Doubly linked chain of entries. */
16102 Mnode * next;
16103 Mnode * prev;
16104 /* The maximum offset into the code that this entry can be placed. While
16105 pushing fixes for forward references, all entries are sorted in order
16106 of increasing max_address. */
16107 HOST_WIDE_INT max_address;
16108 /* Similarly for an entry inserted for a backwards ref. */
16109 HOST_WIDE_INT min_address;
16110 /* The number of fixes referencing this entry. This can become zero
16111 if we "unpush" an entry. In this case we ignore the entry when we
16112 come to emit the code. */
16113 int refcount;
16114 /* The offset from the start of the minipool. */
16115 HOST_WIDE_INT offset;
16116 /* The value in table. */
16117 rtx value;
16118 /* The mode of value. */
16119 machine_mode mode;
16120 /* The size of the value. With iWMMXt enabled
16121 sizes > 4 also imply an alignment of 8-bytes. */
16122 int fix_size;
16123 };
16124
16125 struct minipool_fixup
16126 {
16127 Mfix * next;
16128 rtx_insn * insn;
16129 HOST_WIDE_INT address;
16130 rtx * loc;
16131 machine_mode mode;
16132 int fix_size;
16133 rtx value;
16134 Mnode * minipool;
16135 HOST_WIDE_INT forwards;
16136 HOST_WIDE_INT backwards;
16137 };
16138
16139 /* Fixes less than a word need padding out to a word boundary. */
16140 #define MINIPOOL_FIX_SIZE(mode) \
16141 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16142
16143 static Mnode * minipool_vector_head;
16144 static Mnode * minipool_vector_tail;
16145 static rtx_code_label *minipool_vector_label;
16146 static int minipool_pad;
16147
16148 /* The linked list of all minipool fixes required for this function. */
16149 Mfix * minipool_fix_head;
16150 Mfix * minipool_fix_tail;
16151 /* The fix entry for the current minipool, once it has been placed. */
16152 Mfix * minipool_barrier;
16153
16154 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16155 #define JUMP_TABLES_IN_TEXT_SECTION 0
16156 #endif
16157
16158 static HOST_WIDE_INT
16159 get_jump_table_size (rtx_jump_table_data *insn)
16160 {
16161 /* ADDR_VECs only take room if read-only data does into the text
16162 section. */
16163 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16164 {
16165 rtx body = PATTERN (insn);
16166 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16167 HOST_WIDE_INT size;
16168 HOST_WIDE_INT modesize;
16169
16170 modesize = GET_MODE_SIZE (GET_MODE (body));
16171 size = modesize * XVECLEN (body, elt);
16172 switch (modesize)
16173 {
16174 case 1:
16175 /* Round up size of TBB table to a halfword boundary. */
16176 size = (size + 1) & ~(HOST_WIDE_INT)1;
16177 break;
16178 case 2:
16179 /* No padding necessary for TBH. */
16180 break;
16181 case 4:
16182 /* Add two bytes for alignment on Thumb. */
16183 if (TARGET_THUMB)
16184 size += 2;
16185 break;
16186 default:
16187 gcc_unreachable ();
16188 }
16189 return size;
16190 }
16191
16192 return 0;
16193 }
16194
16195 /* Return the maximum amount of padding that will be inserted before
16196 label LABEL. */
16197
16198 static HOST_WIDE_INT
16199 get_label_padding (rtx label)
16200 {
16201 HOST_WIDE_INT align, min_insn_size;
16202
16203 align = 1 << label_to_alignment (label);
16204 min_insn_size = TARGET_THUMB ? 2 : 4;
16205 return align > min_insn_size ? align - min_insn_size : 0;
16206 }
16207
16208 /* Move a minipool fix MP from its current location to before MAX_MP.
16209 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16210 constraints may need updating. */
16211 static Mnode *
16212 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16213 HOST_WIDE_INT max_address)
16214 {
16215 /* The code below assumes these are different. */
16216 gcc_assert (mp != max_mp);
16217
16218 if (max_mp == NULL)
16219 {
16220 if (max_address < mp->max_address)
16221 mp->max_address = max_address;
16222 }
16223 else
16224 {
16225 if (max_address > max_mp->max_address - mp->fix_size)
16226 mp->max_address = max_mp->max_address - mp->fix_size;
16227 else
16228 mp->max_address = max_address;
16229
16230 /* Unlink MP from its current position. Since max_mp is non-null,
16231 mp->prev must be non-null. */
16232 mp->prev->next = mp->next;
16233 if (mp->next != NULL)
16234 mp->next->prev = mp->prev;
16235 else
16236 minipool_vector_tail = mp->prev;
16237
16238 /* Re-insert it before MAX_MP. */
16239 mp->next = max_mp;
16240 mp->prev = max_mp->prev;
16241 max_mp->prev = mp;
16242
16243 if (mp->prev != NULL)
16244 mp->prev->next = mp;
16245 else
16246 minipool_vector_head = mp;
16247 }
16248
16249 /* Save the new entry. */
16250 max_mp = mp;
16251
16252 /* Scan over the preceding entries and adjust their addresses as
16253 required. */
16254 while (mp->prev != NULL
16255 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16256 {
16257 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16258 mp = mp->prev;
16259 }
16260
16261 return max_mp;
16262 }
16263
16264 /* Add a constant to the minipool for a forward reference. Returns the
16265 node added or NULL if the constant will not fit in this pool. */
16266 static Mnode *
16267 add_minipool_forward_ref (Mfix *fix)
16268 {
16269 /* If set, max_mp is the first pool_entry that has a lower
16270 constraint than the one we are trying to add. */
16271 Mnode * max_mp = NULL;
16272 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16273 Mnode * mp;
16274
16275 /* If the minipool starts before the end of FIX->INSN then this FIX
16276 can not be placed into the current pool. Furthermore, adding the
16277 new constant pool entry may cause the pool to start FIX_SIZE bytes
16278 earlier. */
16279 if (minipool_vector_head &&
16280 (fix->address + get_attr_length (fix->insn)
16281 >= minipool_vector_head->max_address - fix->fix_size))
16282 return NULL;
16283
16284 /* Scan the pool to see if a constant with the same value has
16285 already been added. While we are doing this, also note the
16286 location where we must insert the constant if it doesn't already
16287 exist. */
16288 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16289 {
16290 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16291 && fix->mode == mp->mode
16292 && (!LABEL_P (fix->value)
16293 || (CODE_LABEL_NUMBER (fix->value)
16294 == CODE_LABEL_NUMBER (mp->value)))
16295 && rtx_equal_p (fix->value, mp->value))
16296 {
16297 /* More than one fix references this entry. */
16298 mp->refcount++;
16299 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16300 }
16301
16302 /* Note the insertion point if necessary. */
16303 if (max_mp == NULL
16304 && mp->max_address > max_address)
16305 max_mp = mp;
16306
16307 /* If we are inserting an 8-bytes aligned quantity and
16308 we have not already found an insertion point, then
16309 make sure that all such 8-byte aligned quantities are
16310 placed at the start of the pool. */
16311 if (ARM_DOUBLEWORD_ALIGN
16312 && max_mp == NULL
16313 && fix->fix_size >= 8
16314 && mp->fix_size < 8)
16315 {
16316 max_mp = mp;
16317 max_address = mp->max_address;
16318 }
16319 }
16320
16321 /* The value is not currently in the minipool, so we need to create
16322 a new entry for it. If MAX_MP is NULL, the entry will be put on
16323 the end of the list since the placement is less constrained than
16324 any existing entry. Otherwise, we insert the new fix before
16325 MAX_MP and, if necessary, adjust the constraints on the other
16326 entries. */
16327 mp = XNEW (Mnode);
16328 mp->fix_size = fix->fix_size;
16329 mp->mode = fix->mode;
16330 mp->value = fix->value;
16331 mp->refcount = 1;
16332 /* Not yet required for a backwards ref. */
16333 mp->min_address = -65536;
16334
16335 if (max_mp == NULL)
16336 {
16337 mp->max_address = max_address;
16338 mp->next = NULL;
16339 mp->prev = minipool_vector_tail;
16340
16341 if (mp->prev == NULL)
16342 {
16343 minipool_vector_head = mp;
16344 minipool_vector_label = gen_label_rtx ();
16345 }
16346 else
16347 mp->prev->next = mp;
16348
16349 minipool_vector_tail = mp;
16350 }
16351 else
16352 {
16353 if (max_address > max_mp->max_address - mp->fix_size)
16354 mp->max_address = max_mp->max_address - mp->fix_size;
16355 else
16356 mp->max_address = max_address;
16357
16358 mp->next = max_mp;
16359 mp->prev = max_mp->prev;
16360 max_mp->prev = mp;
16361 if (mp->prev != NULL)
16362 mp->prev->next = mp;
16363 else
16364 minipool_vector_head = mp;
16365 }
16366
16367 /* Save the new entry. */
16368 max_mp = mp;
16369
16370 /* Scan over the preceding entries and adjust their addresses as
16371 required. */
16372 while (mp->prev != NULL
16373 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16374 {
16375 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16376 mp = mp->prev;
16377 }
16378
16379 return max_mp;
16380 }
16381
16382 static Mnode *
16383 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16384 HOST_WIDE_INT min_address)
16385 {
16386 HOST_WIDE_INT offset;
16387
16388 /* The code below assumes these are different. */
16389 gcc_assert (mp != min_mp);
16390
16391 if (min_mp == NULL)
16392 {
16393 if (min_address > mp->min_address)
16394 mp->min_address = min_address;
16395 }
16396 else
16397 {
16398 /* We will adjust this below if it is too loose. */
16399 mp->min_address = min_address;
16400
16401 /* Unlink MP from its current position. Since min_mp is non-null,
16402 mp->next must be non-null. */
16403 mp->next->prev = mp->prev;
16404 if (mp->prev != NULL)
16405 mp->prev->next = mp->next;
16406 else
16407 minipool_vector_head = mp->next;
16408
16409 /* Reinsert it after MIN_MP. */
16410 mp->prev = min_mp;
16411 mp->next = min_mp->next;
16412 min_mp->next = mp;
16413 if (mp->next != NULL)
16414 mp->next->prev = mp;
16415 else
16416 minipool_vector_tail = mp;
16417 }
16418
16419 min_mp = mp;
16420
16421 offset = 0;
16422 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16423 {
16424 mp->offset = offset;
16425 if (mp->refcount > 0)
16426 offset += mp->fix_size;
16427
16428 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16429 mp->next->min_address = mp->min_address + mp->fix_size;
16430 }
16431
16432 return min_mp;
16433 }
16434
16435 /* Add a constant to the minipool for a backward reference. Returns the
16436 node added or NULL if the constant will not fit in this pool.
16437
16438 Note that the code for insertion for a backwards reference can be
16439 somewhat confusing because the calculated offsets for each fix do
16440 not take into account the size of the pool (which is still under
16441 construction. */
16442 static Mnode *
16443 add_minipool_backward_ref (Mfix *fix)
16444 {
16445 /* If set, min_mp is the last pool_entry that has a lower constraint
16446 than the one we are trying to add. */
16447 Mnode *min_mp = NULL;
16448 /* This can be negative, since it is only a constraint. */
16449 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16450 Mnode *mp;
16451
16452 /* If we can't reach the current pool from this insn, or if we can't
16453 insert this entry at the end of the pool without pushing other
16454 fixes out of range, then we don't try. This ensures that we
16455 can't fail later on. */
16456 if (min_address >= minipool_barrier->address
16457 || (minipool_vector_tail->min_address + fix->fix_size
16458 >= minipool_barrier->address))
16459 return NULL;
16460
16461 /* Scan the pool to see if a constant with the same value has
16462 already been added. While we are doing this, also note the
16463 location where we must insert the constant if it doesn't already
16464 exist. */
16465 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16466 {
16467 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16468 && fix->mode == mp->mode
16469 && (!LABEL_P (fix->value)
16470 || (CODE_LABEL_NUMBER (fix->value)
16471 == CODE_LABEL_NUMBER (mp->value)))
16472 && rtx_equal_p (fix->value, mp->value)
16473 /* Check that there is enough slack to move this entry to the
16474 end of the table (this is conservative). */
16475 && (mp->max_address
16476 > (minipool_barrier->address
16477 + minipool_vector_tail->offset
16478 + minipool_vector_tail->fix_size)))
16479 {
16480 mp->refcount++;
16481 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16482 }
16483
16484 if (min_mp != NULL)
16485 mp->min_address += fix->fix_size;
16486 else
16487 {
16488 /* Note the insertion point if necessary. */
16489 if (mp->min_address < min_address)
16490 {
16491 /* For now, we do not allow the insertion of 8-byte alignment
16492 requiring nodes anywhere but at the start of the pool. */
16493 if (ARM_DOUBLEWORD_ALIGN
16494 && fix->fix_size >= 8 && mp->fix_size < 8)
16495 return NULL;
16496 else
16497 min_mp = mp;
16498 }
16499 else if (mp->max_address
16500 < minipool_barrier->address + mp->offset + fix->fix_size)
16501 {
16502 /* Inserting before this entry would push the fix beyond
16503 its maximum address (which can happen if we have
16504 re-located a forwards fix); force the new fix to come
16505 after it. */
16506 if (ARM_DOUBLEWORD_ALIGN
16507 && fix->fix_size >= 8 && mp->fix_size < 8)
16508 return NULL;
16509 else
16510 {
16511 min_mp = mp;
16512 min_address = mp->min_address + fix->fix_size;
16513 }
16514 }
16515 /* Do not insert a non-8-byte aligned quantity before 8-byte
16516 aligned quantities. */
16517 else if (ARM_DOUBLEWORD_ALIGN
16518 && fix->fix_size < 8
16519 && mp->fix_size >= 8)
16520 {
16521 min_mp = mp;
16522 min_address = mp->min_address + fix->fix_size;
16523 }
16524 }
16525 }
16526
16527 /* We need to create a new entry. */
16528 mp = XNEW (Mnode);
16529 mp->fix_size = fix->fix_size;
16530 mp->mode = fix->mode;
16531 mp->value = fix->value;
16532 mp->refcount = 1;
16533 mp->max_address = minipool_barrier->address + 65536;
16534
16535 mp->min_address = min_address;
16536
16537 if (min_mp == NULL)
16538 {
16539 mp->prev = NULL;
16540 mp->next = minipool_vector_head;
16541
16542 if (mp->next == NULL)
16543 {
16544 minipool_vector_tail = mp;
16545 minipool_vector_label = gen_label_rtx ();
16546 }
16547 else
16548 mp->next->prev = mp;
16549
16550 minipool_vector_head = mp;
16551 }
16552 else
16553 {
16554 mp->next = min_mp->next;
16555 mp->prev = min_mp;
16556 min_mp->next = mp;
16557
16558 if (mp->next != NULL)
16559 mp->next->prev = mp;
16560 else
16561 minipool_vector_tail = mp;
16562 }
16563
16564 /* Save the new entry. */
16565 min_mp = mp;
16566
16567 if (mp->prev)
16568 mp = mp->prev;
16569 else
16570 mp->offset = 0;
16571
16572 /* Scan over the following entries and adjust their offsets. */
16573 while (mp->next != NULL)
16574 {
16575 if (mp->next->min_address < mp->min_address + mp->fix_size)
16576 mp->next->min_address = mp->min_address + mp->fix_size;
16577
16578 if (mp->refcount)
16579 mp->next->offset = mp->offset + mp->fix_size;
16580 else
16581 mp->next->offset = mp->offset;
16582
16583 mp = mp->next;
16584 }
16585
16586 return min_mp;
16587 }
16588
16589 static void
16590 assign_minipool_offsets (Mfix *barrier)
16591 {
16592 HOST_WIDE_INT offset = 0;
16593 Mnode *mp;
16594
16595 minipool_barrier = barrier;
16596
16597 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16598 {
16599 mp->offset = offset;
16600
16601 if (mp->refcount > 0)
16602 offset += mp->fix_size;
16603 }
16604 }
16605
16606 /* Output the literal table */
16607 static void
16608 dump_minipool (rtx_insn *scan)
16609 {
16610 Mnode * mp;
16611 Mnode * nmp;
16612 int align64 = 0;
16613
16614 if (ARM_DOUBLEWORD_ALIGN)
16615 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16616 if (mp->refcount > 0 && mp->fix_size >= 8)
16617 {
16618 align64 = 1;
16619 break;
16620 }
16621
16622 if (dump_file)
16623 fprintf (dump_file,
16624 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16625 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16626
16627 scan = emit_label_after (gen_label_rtx (), scan);
16628 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16629 scan = emit_label_after (minipool_vector_label, scan);
16630
16631 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16632 {
16633 if (mp->refcount > 0)
16634 {
16635 if (dump_file)
16636 {
16637 fprintf (dump_file,
16638 ";; Offset %u, min %ld, max %ld ",
16639 (unsigned) mp->offset, (unsigned long) mp->min_address,
16640 (unsigned long) mp->max_address);
16641 arm_print_value (dump_file, mp->value);
16642 fputc ('\n', dump_file);
16643 }
16644
16645 switch (GET_MODE_SIZE (mp->mode))
16646 {
16647 #ifdef HAVE_consttable_1
16648 case 1:
16649 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16650 break;
16651
16652 #endif
16653 #ifdef HAVE_consttable_2
16654 case 2:
16655 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16656 break;
16657
16658 #endif
16659 #ifdef HAVE_consttable_4
16660 case 4:
16661 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16662 break;
16663
16664 #endif
16665 #ifdef HAVE_consttable_8
16666 case 8:
16667 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16668 break;
16669
16670 #endif
16671 #ifdef HAVE_consttable_16
16672 case 16:
16673 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16674 break;
16675
16676 #endif
16677 default:
16678 gcc_unreachable ();
16679 }
16680 }
16681
16682 nmp = mp->next;
16683 free (mp);
16684 }
16685
16686 minipool_vector_head = minipool_vector_tail = NULL;
16687 scan = emit_insn_after (gen_consttable_end (), scan);
16688 scan = emit_barrier_after (scan);
16689 }
16690
16691 /* Return the cost of forcibly inserting a barrier after INSN. */
16692 static int
16693 arm_barrier_cost (rtx_insn *insn)
16694 {
16695 /* Basing the location of the pool on the loop depth is preferable,
16696 but at the moment, the basic block information seems to be
16697 corrupt by this stage of the compilation. */
16698 int base_cost = 50;
16699 rtx_insn *next = next_nonnote_insn (insn);
16700
16701 if (next != NULL && LABEL_P (next))
16702 base_cost -= 20;
16703
16704 switch (GET_CODE (insn))
16705 {
16706 case CODE_LABEL:
16707 /* It will always be better to place the table before the label, rather
16708 than after it. */
16709 return 50;
16710
16711 case INSN:
16712 case CALL_INSN:
16713 return base_cost;
16714
16715 case JUMP_INSN:
16716 return base_cost - 10;
16717
16718 default:
16719 return base_cost + 10;
16720 }
16721 }
16722
16723 /* Find the best place in the insn stream in the range
16724 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16725 Create the barrier by inserting a jump and add a new fix entry for
16726 it. */
16727 static Mfix *
16728 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16729 {
16730 HOST_WIDE_INT count = 0;
16731 rtx_barrier *barrier;
16732 rtx_insn *from = fix->insn;
16733 /* The instruction after which we will insert the jump. */
16734 rtx_insn *selected = NULL;
16735 int selected_cost;
16736 /* The address at which the jump instruction will be placed. */
16737 HOST_WIDE_INT selected_address;
16738 Mfix * new_fix;
16739 HOST_WIDE_INT max_count = max_address - fix->address;
16740 rtx_code_label *label = gen_label_rtx ();
16741
16742 selected_cost = arm_barrier_cost (from);
16743 selected_address = fix->address;
16744
16745 while (from && count < max_count)
16746 {
16747 rtx_jump_table_data *tmp;
16748 int new_cost;
16749
16750 /* This code shouldn't have been called if there was a natural barrier
16751 within range. */
16752 gcc_assert (!BARRIER_P (from));
16753
16754 /* Count the length of this insn. This must stay in sync with the
16755 code that pushes minipool fixes. */
16756 if (LABEL_P (from))
16757 count += get_label_padding (from);
16758 else
16759 count += get_attr_length (from);
16760
16761 /* If there is a jump table, add its length. */
16762 if (tablejump_p (from, NULL, &tmp))
16763 {
16764 count += get_jump_table_size (tmp);
16765
16766 /* Jump tables aren't in a basic block, so base the cost on
16767 the dispatch insn. If we select this location, we will
16768 still put the pool after the table. */
16769 new_cost = arm_barrier_cost (from);
16770
16771 if (count < max_count
16772 && (!selected || new_cost <= selected_cost))
16773 {
16774 selected = tmp;
16775 selected_cost = new_cost;
16776 selected_address = fix->address + count;
16777 }
16778
16779 /* Continue after the dispatch table. */
16780 from = NEXT_INSN (tmp);
16781 continue;
16782 }
16783
16784 new_cost = arm_barrier_cost (from);
16785
16786 if (count < max_count
16787 && (!selected || new_cost <= selected_cost))
16788 {
16789 selected = from;
16790 selected_cost = new_cost;
16791 selected_address = fix->address + count;
16792 }
16793
16794 from = NEXT_INSN (from);
16795 }
16796
16797 /* Make sure that we found a place to insert the jump. */
16798 gcc_assert (selected);
16799
16800 /* Make sure we do not split a call and its corresponding
16801 CALL_ARG_LOCATION note. */
16802 if (CALL_P (selected))
16803 {
16804 rtx_insn *next = NEXT_INSN (selected);
16805 if (next && NOTE_P (next)
16806 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16807 selected = next;
16808 }
16809
16810 /* Create a new JUMP_INSN that branches around a barrier. */
16811 from = emit_jump_insn_after (gen_jump (label), selected);
16812 JUMP_LABEL (from) = label;
16813 barrier = emit_barrier_after (from);
16814 emit_label_after (label, barrier);
16815
16816 /* Create a minipool barrier entry for the new barrier. */
16817 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16818 new_fix->insn = barrier;
16819 new_fix->address = selected_address;
16820 new_fix->next = fix->next;
16821 fix->next = new_fix;
16822
16823 return new_fix;
16824 }
16825
16826 /* Record that there is a natural barrier in the insn stream at
16827 ADDRESS. */
16828 static void
16829 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16830 {
16831 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16832
16833 fix->insn = insn;
16834 fix->address = address;
16835
16836 fix->next = NULL;
16837 if (minipool_fix_head != NULL)
16838 minipool_fix_tail->next = fix;
16839 else
16840 minipool_fix_head = fix;
16841
16842 minipool_fix_tail = fix;
16843 }
16844
16845 /* Record INSN, which will need fixing up to load a value from the
16846 minipool. ADDRESS is the offset of the insn since the start of the
16847 function; LOC is a pointer to the part of the insn which requires
16848 fixing; VALUE is the constant that must be loaded, which is of type
16849 MODE. */
16850 static void
16851 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16852 machine_mode mode, rtx value)
16853 {
16854 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16855
16856 fix->insn = insn;
16857 fix->address = address;
16858 fix->loc = loc;
16859 fix->mode = mode;
16860 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16861 fix->value = value;
16862 fix->forwards = get_attr_pool_range (insn);
16863 fix->backwards = get_attr_neg_pool_range (insn);
16864 fix->minipool = NULL;
16865
16866 /* If an insn doesn't have a range defined for it, then it isn't
16867 expecting to be reworked by this code. Better to stop now than
16868 to generate duff assembly code. */
16869 gcc_assert (fix->forwards || fix->backwards);
16870
16871 /* If an entry requires 8-byte alignment then assume all constant pools
16872 require 4 bytes of padding. Trying to do this later on a per-pool
16873 basis is awkward because existing pool entries have to be modified. */
16874 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16875 minipool_pad = 4;
16876
16877 if (dump_file)
16878 {
16879 fprintf (dump_file,
16880 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16881 GET_MODE_NAME (mode),
16882 INSN_UID (insn), (unsigned long) address,
16883 -1 * (long)fix->backwards, (long)fix->forwards);
16884 arm_print_value (dump_file, fix->value);
16885 fprintf (dump_file, "\n");
16886 }
16887
16888 /* Add it to the chain of fixes. */
16889 fix->next = NULL;
16890
16891 if (minipool_fix_head != NULL)
16892 minipool_fix_tail->next = fix;
16893 else
16894 minipool_fix_head = fix;
16895
16896 minipool_fix_tail = fix;
16897 }
16898
16899 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16900 Returns the number of insns needed, or 99 if we always want to synthesize
16901 the value. */
16902 int
16903 arm_max_const_double_inline_cost ()
16904 {
16905 /* Let the value get synthesized to avoid the use of literal pools. */
16906 if (arm_disable_literal_pool)
16907 return 99;
16908
16909 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16910 }
16911
16912 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16913 Returns the number of insns needed, or 99 if we don't know how to
16914 do it. */
16915 int
16916 arm_const_double_inline_cost (rtx val)
16917 {
16918 rtx lowpart, highpart;
16919 machine_mode mode;
16920
16921 mode = GET_MODE (val);
16922
16923 if (mode == VOIDmode)
16924 mode = DImode;
16925
16926 gcc_assert (GET_MODE_SIZE (mode) == 8);
16927
16928 lowpart = gen_lowpart (SImode, val);
16929 highpart = gen_highpart_mode (SImode, mode, val);
16930
16931 gcc_assert (CONST_INT_P (lowpart));
16932 gcc_assert (CONST_INT_P (highpart));
16933
16934 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16935 NULL_RTX, NULL_RTX, 0, 0)
16936 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16937 NULL_RTX, NULL_RTX, 0, 0));
16938 }
16939
16940 /* Cost of loading a SImode constant. */
16941 static inline int
16942 arm_const_inline_cost (enum rtx_code code, rtx val)
16943 {
16944 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16945 NULL_RTX, NULL_RTX, 1, 0);
16946 }
16947
16948 /* Return true if it is worthwhile to split a 64-bit constant into two
16949 32-bit operations. This is the case if optimizing for size, or
16950 if we have load delay slots, or if one 32-bit part can be done with
16951 a single data operation. */
16952 bool
16953 arm_const_double_by_parts (rtx val)
16954 {
16955 machine_mode mode = GET_MODE (val);
16956 rtx part;
16957
16958 if (optimize_size || arm_ld_sched)
16959 return true;
16960
16961 if (mode == VOIDmode)
16962 mode = DImode;
16963
16964 part = gen_highpart_mode (SImode, mode, val);
16965
16966 gcc_assert (CONST_INT_P (part));
16967
16968 if (const_ok_for_arm (INTVAL (part))
16969 || const_ok_for_arm (~INTVAL (part)))
16970 return true;
16971
16972 part = gen_lowpart (SImode, val);
16973
16974 gcc_assert (CONST_INT_P (part));
16975
16976 if (const_ok_for_arm (INTVAL (part))
16977 || const_ok_for_arm (~INTVAL (part)))
16978 return true;
16979
16980 return false;
16981 }
16982
16983 /* Return true if it is possible to inline both the high and low parts
16984 of a 64-bit constant into 32-bit data processing instructions. */
16985 bool
16986 arm_const_double_by_immediates (rtx val)
16987 {
16988 machine_mode mode = GET_MODE (val);
16989 rtx part;
16990
16991 if (mode == VOIDmode)
16992 mode = DImode;
16993
16994 part = gen_highpart_mode (SImode, mode, val);
16995
16996 gcc_assert (CONST_INT_P (part));
16997
16998 if (!const_ok_for_arm (INTVAL (part)))
16999 return false;
17000
17001 part = gen_lowpart (SImode, val);
17002
17003 gcc_assert (CONST_INT_P (part));
17004
17005 if (!const_ok_for_arm (INTVAL (part)))
17006 return false;
17007
17008 return true;
17009 }
17010
17011 /* Scan INSN and note any of its operands that need fixing.
17012 If DO_PUSHES is false we do not actually push any of the fixups
17013 needed. */
17014 static void
17015 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17016 {
17017 int opno;
17018
17019 extract_constrain_insn (insn);
17020
17021 if (recog_data.n_alternatives == 0)
17022 return;
17023
17024 /* Fill in recog_op_alt with information about the constraints of
17025 this insn. */
17026 preprocess_constraints (insn);
17027
17028 const operand_alternative *op_alt = which_op_alt ();
17029 for (opno = 0; opno < recog_data.n_operands; opno++)
17030 {
17031 /* Things we need to fix can only occur in inputs. */
17032 if (recog_data.operand_type[opno] != OP_IN)
17033 continue;
17034
17035 /* If this alternative is a memory reference, then any mention
17036 of constants in this alternative is really to fool reload
17037 into allowing us to accept one there. We need to fix them up
17038 now so that we output the right code. */
17039 if (op_alt[opno].memory_ok)
17040 {
17041 rtx op = recog_data.operand[opno];
17042
17043 if (CONSTANT_P (op))
17044 {
17045 if (do_pushes)
17046 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17047 recog_data.operand_mode[opno], op);
17048 }
17049 else if (MEM_P (op)
17050 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17051 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17052 {
17053 if (do_pushes)
17054 {
17055 rtx cop = avoid_constant_pool_reference (op);
17056
17057 /* Casting the address of something to a mode narrower
17058 than a word can cause avoid_constant_pool_reference()
17059 to return the pool reference itself. That's no good to
17060 us here. Lets just hope that we can use the
17061 constant pool value directly. */
17062 if (op == cop)
17063 cop = get_pool_constant (XEXP (op, 0));
17064
17065 push_minipool_fix (insn, address,
17066 recog_data.operand_loc[opno],
17067 recog_data.operand_mode[opno], cop);
17068 }
17069
17070 }
17071 }
17072 }
17073
17074 return;
17075 }
17076
17077 /* Rewrite move insn into subtract of 0 if the condition codes will
17078 be useful in next conditional jump insn. */
17079
17080 static void
17081 thumb1_reorg (void)
17082 {
17083 basic_block bb;
17084
17085 FOR_EACH_BB_FN (bb, cfun)
17086 {
17087 rtx dest, src;
17088 rtx pat, op0, set = NULL;
17089 rtx_insn *prev, *insn = BB_END (bb);
17090 bool insn_clobbered = false;
17091
17092 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17093 insn = PREV_INSN (insn);
17094
17095 /* Find the last cbranchsi4_insn in basic block BB. */
17096 if (insn == BB_HEAD (bb)
17097 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17098 continue;
17099
17100 /* Get the register with which we are comparing. */
17101 pat = PATTERN (insn);
17102 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17103
17104 /* Find the first flag setting insn before INSN in basic block BB. */
17105 gcc_assert (insn != BB_HEAD (bb));
17106 for (prev = PREV_INSN (insn);
17107 (!insn_clobbered
17108 && prev != BB_HEAD (bb)
17109 && (NOTE_P (prev)
17110 || DEBUG_INSN_P (prev)
17111 || ((set = single_set (prev)) != NULL
17112 && get_attr_conds (prev) == CONDS_NOCOND)));
17113 prev = PREV_INSN (prev))
17114 {
17115 if (reg_set_p (op0, prev))
17116 insn_clobbered = true;
17117 }
17118
17119 /* Skip if op0 is clobbered by insn other than prev. */
17120 if (insn_clobbered)
17121 continue;
17122
17123 if (!set)
17124 continue;
17125
17126 dest = SET_DEST (set);
17127 src = SET_SRC (set);
17128 if (!low_register_operand (dest, SImode)
17129 || !low_register_operand (src, SImode))
17130 continue;
17131
17132 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17133 in INSN. Both src and dest of the move insn are checked. */
17134 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17135 {
17136 dest = copy_rtx (dest);
17137 src = copy_rtx (src);
17138 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17139 PATTERN (prev) = gen_rtx_SET (dest, src);
17140 INSN_CODE (prev) = -1;
17141 /* Set test register in INSN to dest. */
17142 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17143 INSN_CODE (insn) = -1;
17144 }
17145 }
17146 }
17147
17148 /* Convert instructions to their cc-clobbering variant if possible, since
17149 that allows us to use smaller encodings. */
17150
17151 static void
17152 thumb2_reorg (void)
17153 {
17154 basic_block bb;
17155 regset_head live;
17156
17157 INIT_REG_SET (&live);
17158
17159 /* We are freeing block_for_insn in the toplev to keep compatibility
17160 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17161 compute_bb_for_insn ();
17162 df_analyze ();
17163
17164 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17165
17166 FOR_EACH_BB_FN (bb, cfun)
17167 {
17168 if ((current_tune->disparage_flag_setting_t16_encodings
17169 == tune_params::DISPARAGE_FLAGS_ALL)
17170 && optimize_bb_for_speed_p (bb))
17171 continue;
17172
17173 rtx_insn *insn;
17174 Convert_Action action = SKIP;
17175 Convert_Action action_for_partial_flag_setting
17176 = ((current_tune->disparage_flag_setting_t16_encodings
17177 != tune_params::DISPARAGE_FLAGS_NEITHER)
17178 && optimize_bb_for_speed_p (bb))
17179 ? SKIP : CONV;
17180
17181 COPY_REG_SET (&live, DF_LR_OUT (bb));
17182 df_simulate_initialize_backwards (bb, &live);
17183 FOR_BB_INSNS_REVERSE (bb, insn)
17184 {
17185 if (NONJUMP_INSN_P (insn)
17186 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17187 && GET_CODE (PATTERN (insn)) == SET)
17188 {
17189 action = SKIP;
17190 rtx pat = PATTERN (insn);
17191 rtx dst = XEXP (pat, 0);
17192 rtx src = XEXP (pat, 1);
17193 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17194
17195 if (UNARY_P (src) || BINARY_P (src))
17196 op0 = XEXP (src, 0);
17197
17198 if (BINARY_P (src))
17199 op1 = XEXP (src, 1);
17200
17201 if (low_register_operand (dst, SImode))
17202 {
17203 switch (GET_CODE (src))
17204 {
17205 case PLUS:
17206 /* Adding two registers and storing the result
17207 in the first source is already a 16-bit
17208 operation. */
17209 if (rtx_equal_p (dst, op0)
17210 && register_operand (op1, SImode))
17211 break;
17212
17213 if (low_register_operand (op0, SImode))
17214 {
17215 /* ADDS <Rd>,<Rn>,<Rm> */
17216 if (low_register_operand (op1, SImode))
17217 action = CONV;
17218 /* ADDS <Rdn>,#<imm8> */
17219 /* SUBS <Rdn>,#<imm8> */
17220 else if (rtx_equal_p (dst, op0)
17221 && CONST_INT_P (op1)
17222 && IN_RANGE (INTVAL (op1), -255, 255))
17223 action = CONV;
17224 /* ADDS <Rd>,<Rn>,#<imm3> */
17225 /* SUBS <Rd>,<Rn>,#<imm3> */
17226 else if (CONST_INT_P (op1)
17227 && IN_RANGE (INTVAL (op1), -7, 7))
17228 action = CONV;
17229 }
17230 /* ADCS <Rd>, <Rn> */
17231 else if (GET_CODE (XEXP (src, 0)) == PLUS
17232 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17233 && low_register_operand (XEXP (XEXP (src, 0), 1),
17234 SImode)
17235 && COMPARISON_P (op1)
17236 && cc_register (XEXP (op1, 0), VOIDmode)
17237 && maybe_get_arm_condition_code (op1) == ARM_CS
17238 && XEXP (op1, 1) == const0_rtx)
17239 action = CONV;
17240 break;
17241
17242 case MINUS:
17243 /* RSBS <Rd>,<Rn>,#0
17244 Not handled here: see NEG below. */
17245 /* SUBS <Rd>,<Rn>,#<imm3>
17246 SUBS <Rdn>,#<imm8>
17247 Not handled here: see PLUS above. */
17248 /* SUBS <Rd>,<Rn>,<Rm> */
17249 if (low_register_operand (op0, SImode)
17250 && low_register_operand (op1, SImode))
17251 action = CONV;
17252 break;
17253
17254 case MULT:
17255 /* MULS <Rdm>,<Rn>,<Rdm>
17256 As an exception to the rule, this is only used
17257 when optimizing for size since MULS is slow on all
17258 known implementations. We do not even want to use
17259 MULS in cold code, if optimizing for speed, so we
17260 test the global flag here. */
17261 if (!optimize_size)
17262 break;
17263 /* else fall through. */
17264 case AND:
17265 case IOR:
17266 case XOR:
17267 /* ANDS <Rdn>,<Rm> */
17268 if (rtx_equal_p (dst, op0)
17269 && low_register_operand (op1, SImode))
17270 action = action_for_partial_flag_setting;
17271 else if (rtx_equal_p (dst, op1)
17272 && low_register_operand (op0, SImode))
17273 action = action_for_partial_flag_setting == SKIP
17274 ? SKIP : SWAP_CONV;
17275 break;
17276
17277 case ASHIFTRT:
17278 case ASHIFT:
17279 case LSHIFTRT:
17280 /* ASRS <Rdn>,<Rm> */
17281 /* LSRS <Rdn>,<Rm> */
17282 /* LSLS <Rdn>,<Rm> */
17283 if (rtx_equal_p (dst, op0)
17284 && low_register_operand (op1, SImode))
17285 action = action_for_partial_flag_setting;
17286 /* ASRS <Rd>,<Rm>,#<imm5> */
17287 /* LSRS <Rd>,<Rm>,#<imm5> */
17288 /* LSLS <Rd>,<Rm>,#<imm5> */
17289 else if (low_register_operand (op0, SImode)
17290 && CONST_INT_P (op1)
17291 && IN_RANGE (INTVAL (op1), 0, 31))
17292 action = action_for_partial_flag_setting;
17293 break;
17294
17295 case ROTATERT:
17296 /* RORS <Rdn>,<Rm> */
17297 if (rtx_equal_p (dst, op0)
17298 && low_register_operand (op1, SImode))
17299 action = action_for_partial_flag_setting;
17300 break;
17301
17302 case NOT:
17303 /* MVNS <Rd>,<Rm> */
17304 if (low_register_operand (op0, SImode))
17305 action = action_for_partial_flag_setting;
17306 break;
17307
17308 case NEG:
17309 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17310 if (low_register_operand (op0, SImode))
17311 action = CONV;
17312 break;
17313
17314 case CONST_INT:
17315 /* MOVS <Rd>,#<imm8> */
17316 if (CONST_INT_P (src)
17317 && IN_RANGE (INTVAL (src), 0, 255))
17318 action = action_for_partial_flag_setting;
17319 break;
17320
17321 case REG:
17322 /* MOVS and MOV<c> with registers have different
17323 encodings, so are not relevant here. */
17324 break;
17325
17326 default:
17327 break;
17328 }
17329 }
17330
17331 if (action != SKIP)
17332 {
17333 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17334 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17335 rtvec vec;
17336
17337 if (action == SWAP_CONV)
17338 {
17339 src = copy_rtx (src);
17340 XEXP (src, 0) = op1;
17341 XEXP (src, 1) = op0;
17342 pat = gen_rtx_SET (dst, src);
17343 vec = gen_rtvec (2, pat, clobber);
17344 }
17345 else /* action == CONV */
17346 vec = gen_rtvec (2, pat, clobber);
17347
17348 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17349 INSN_CODE (insn) = -1;
17350 }
17351 }
17352
17353 if (NONDEBUG_INSN_P (insn))
17354 df_simulate_one_insn_backwards (bb, insn, &live);
17355 }
17356 }
17357
17358 CLEAR_REG_SET (&live);
17359 }
17360
17361 /* Gcc puts the pool in the wrong place for ARM, since we can only
17362 load addresses a limited distance around the pc. We do some
17363 special munging to move the constant pool values to the correct
17364 point in the code. */
17365 static void
17366 arm_reorg (void)
17367 {
17368 rtx_insn *insn;
17369 HOST_WIDE_INT address = 0;
17370 Mfix * fix;
17371
17372 if (TARGET_THUMB1)
17373 thumb1_reorg ();
17374 else if (TARGET_THUMB2)
17375 thumb2_reorg ();
17376
17377 /* Ensure all insns that must be split have been split at this point.
17378 Otherwise, the pool placement code below may compute incorrect
17379 insn lengths. Note that when optimizing, all insns have already
17380 been split at this point. */
17381 if (!optimize)
17382 split_all_insns_noflow ();
17383
17384 minipool_fix_head = minipool_fix_tail = NULL;
17385
17386 /* The first insn must always be a note, or the code below won't
17387 scan it properly. */
17388 insn = get_insns ();
17389 gcc_assert (NOTE_P (insn));
17390 minipool_pad = 0;
17391
17392 /* Scan all the insns and record the operands that will need fixing. */
17393 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17394 {
17395 if (BARRIER_P (insn))
17396 push_minipool_barrier (insn, address);
17397 else if (INSN_P (insn))
17398 {
17399 rtx_jump_table_data *table;
17400
17401 note_invalid_constants (insn, address, true);
17402 address += get_attr_length (insn);
17403
17404 /* If the insn is a vector jump, add the size of the table
17405 and skip the table. */
17406 if (tablejump_p (insn, NULL, &table))
17407 {
17408 address += get_jump_table_size (table);
17409 insn = table;
17410 }
17411 }
17412 else if (LABEL_P (insn))
17413 /* Add the worst-case padding due to alignment. We don't add
17414 the _current_ padding because the minipool insertions
17415 themselves might change it. */
17416 address += get_label_padding (insn);
17417 }
17418
17419 fix = minipool_fix_head;
17420
17421 /* Now scan the fixups and perform the required changes. */
17422 while (fix)
17423 {
17424 Mfix * ftmp;
17425 Mfix * fdel;
17426 Mfix * last_added_fix;
17427 Mfix * last_barrier = NULL;
17428 Mfix * this_fix;
17429
17430 /* Skip any further barriers before the next fix. */
17431 while (fix && BARRIER_P (fix->insn))
17432 fix = fix->next;
17433
17434 /* No more fixes. */
17435 if (fix == NULL)
17436 break;
17437
17438 last_added_fix = NULL;
17439
17440 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17441 {
17442 if (BARRIER_P (ftmp->insn))
17443 {
17444 if (ftmp->address >= minipool_vector_head->max_address)
17445 break;
17446
17447 last_barrier = ftmp;
17448 }
17449 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17450 break;
17451
17452 last_added_fix = ftmp; /* Keep track of the last fix added. */
17453 }
17454
17455 /* If we found a barrier, drop back to that; any fixes that we
17456 could have reached but come after the barrier will now go in
17457 the next mini-pool. */
17458 if (last_barrier != NULL)
17459 {
17460 /* Reduce the refcount for those fixes that won't go into this
17461 pool after all. */
17462 for (fdel = last_barrier->next;
17463 fdel && fdel != ftmp;
17464 fdel = fdel->next)
17465 {
17466 fdel->minipool->refcount--;
17467 fdel->minipool = NULL;
17468 }
17469
17470 ftmp = last_barrier;
17471 }
17472 else
17473 {
17474 /* ftmp is first fix that we can't fit into this pool and
17475 there no natural barriers that we could use. Insert a
17476 new barrier in the code somewhere between the previous
17477 fix and this one, and arrange to jump around it. */
17478 HOST_WIDE_INT max_address;
17479
17480 /* The last item on the list of fixes must be a barrier, so
17481 we can never run off the end of the list of fixes without
17482 last_barrier being set. */
17483 gcc_assert (ftmp);
17484
17485 max_address = minipool_vector_head->max_address;
17486 /* Check that there isn't another fix that is in range that
17487 we couldn't fit into this pool because the pool was
17488 already too large: we need to put the pool before such an
17489 instruction. The pool itself may come just after the
17490 fix because create_fix_barrier also allows space for a
17491 jump instruction. */
17492 if (ftmp->address < max_address)
17493 max_address = ftmp->address + 1;
17494
17495 last_barrier = create_fix_barrier (last_added_fix, max_address);
17496 }
17497
17498 assign_minipool_offsets (last_barrier);
17499
17500 while (ftmp)
17501 {
17502 if (!BARRIER_P (ftmp->insn)
17503 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17504 == NULL))
17505 break;
17506
17507 ftmp = ftmp->next;
17508 }
17509
17510 /* Scan over the fixes we have identified for this pool, fixing them
17511 up and adding the constants to the pool itself. */
17512 for (this_fix = fix; this_fix && ftmp != this_fix;
17513 this_fix = this_fix->next)
17514 if (!BARRIER_P (this_fix->insn))
17515 {
17516 rtx addr
17517 = plus_constant (Pmode,
17518 gen_rtx_LABEL_REF (VOIDmode,
17519 minipool_vector_label),
17520 this_fix->minipool->offset);
17521 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17522 }
17523
17524 dump_minipool (last_barrier->insn);
17525 fix = ftmp;
17526 }
17527
17528 /* From now on we must synthesize any constants that we can't handle
17529 directly. This can happen if the RTL gets split during final
17530 instruction generation. */
17531 cfun->machine->after_arm_reorg = 1;
17532
17533 /* Free the minipool memory. */
17534 obstack_free (&minipool_obstack, minipool_startobj);
17535 }
17536 \f
17537 /* Routines to output assembly language. */
17538
17539 /* Return string representation of passed in real value. */
17540 static const char *
17541 fp_const_from_val (REAL_VALUE_TYPE *r)
17542 {
17543 if (!fp_consts_inited)
17544 init_fp_table ();
17545
17546 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17547 return "0";
17548 }
17549
17550 /* OPERANDS[0] is the entire list of insns that constitute pop,
17551 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17552 is in the list, UPDATE is true iff the list contains explicit
17553 update of base register. */
17554 void
17555 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17556 bool update)
17557 {
17558 int i;
17559 char pattern[100];
17560 int offset;
17561 const char *conditional;
17562 int num_saves = XVECLEN (operands[0], 0);
17563 unsigned int regno;
17564 unsigned int regno_base = REGNO (operands[1]);
17565
17566 offset = 0;
17567 offset += update ? 1 : 0;
17568 offset += return_pc ? 1 : 0;
17569
17570 /* Is the base register in the list? */
17571 for (i = offset; i < num_saves; i++)
17572 {
17573 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17574 /* If SP is in the list, then the base register must be SP. */
17575 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17576 /* If base register is in the list, there must be no explicit update. */
17577 if (regno == regno_base)
17578 gcc_assert (!update);
17579 }
17580
17581 conditional = reverse ? "%?%D0" : "%?%d0";
17582 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17583 {
17584 /* Output pop (not stmfd) because it has a shorter encoding. */
17585 gcc_assert (update);
17586 sprintf (pattern, "pop%s\t{", conditional);
17587 }
17588 else
17589 {
17590 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17591 It's just a convention, their semantics are identical. */
17592 if (regno_base == SP_REGNUM)
17593 sprintf (pattern, "ldm%sfd\t", conditional);
17594 else if (TARGET_UNIFIED_ASM)
17595 sprintf (pattern, "ldmia%s\t", conditional);
17596 else
17597 sprintf (pattern, "ldm%sia\t", conditional);
17598
17599 strcat (pattern, reg_names[regno_base]);
17600 if (update)
17601 strcat (pattern, "!, {");
17602 else
17603 strcat (pattern, ", {");
17604 }
17605
17606 /* Output the first destination register. */
17607 strcat (pattern,
17608 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17609
17610 /* Output the rest of the destination registers. */
17611 for (i = offset + 1; i < num_saves; i++)
17612 {
17613 strcat (pattern, ", ");
17614 strcat (pattern,
17615 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17616 }
17617
17618 strcat (pattern, "}");
17619
17620 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17621 strcat (pattern, "^");
17622
17623 output_asm_insn (pattern, &cond);
17624 }
17625
17626
17627 /* Output the assembly for a store multiple. */
17628
17629 const char *
17630 vfp_output_vstmd (rtx * operands)
17631 {
17632 char pattern[100];
17633 int p;
17634 int base;
17635 int i;
17636 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17637 ? XEXP (operands[0], 0)
17638 : XEXP (XEXP (operands[0], 0), 0);
17639 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17640
17641 if (push_p)
17642 strcpy (pattern, "vpush%?.64\t{%P1");
17643 else
17644 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17645
17646 p = strlen (pattern);
17647
17648 gcc_assert (REG_P (operands[1]));
17649
17650 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17651 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17652 {
17653 p += sprintf (&pattern[p], ", d%d", base + i);
17654 }
17655 strcpy (&pattern[p], "}");
17656
17657 output_asm_insn (pattern, operands);
17658 return "";
17659 }
17660
17661
17662 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17663 number of bytes pushed. */
17664
17665 static int
17666 vfp_emit_fstmd (int base_reg, int count)
17667 {
17668 rtx par;
17669 rtx dwarf;
17670 rtx tmp, reg;
17671 int i;
17672
17673 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17674 register pairs are stored by a store multiple insn. We avoid this
17675 by pushing an extra pair. */
17676 if (count == 2 && !arm_arch6)
17677 {
17678 if (base_reg == LAST_VFP_REGNUM - 3)
17679 base_reg -= 2;
17680 count++;
17681 }
17682
17683 /* FSTMD may not store more than 16 doubleword registers at once. Split
17684 larger stores into multiple parts (up to a maximum of two, in
17685 practice). */
17686 if (count > 16)
17687 {
17688 int saved;
17689 /* NOTE: base_reg is an internal register number, so each D register
17690 counts as 2. */
17691 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17692 saved += vfp_emit_fstmd (base_reg, 16);
17693 return saved;
17694 }
17695
17696 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17697 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17698
17699 reg = gen_rtx_REG (DFmode, base_reg);
17700 base_reg += 2;
17701
17702 XVECEXP (par, 0, 0)
17703 = gen_rtx_SET (gen_frame_mem
17704 (BLKmode,
17705 gen_rtx_PRE_MODIFY (Pmode,
17706 stack_pointer_rtx,
17707 plus_constant
17708 (Pmode, stack_pointer_rtx,
17709 - (count * 8)))
17710 ),
17711 gen_rtx_UNSPEC (BLKmode,
17712 gen_rtvec (1, reg),
17713 UNSPEC_PUSH_MULT));
17714
17715 tmp = gen_rtx_SET (stack_pointer_rtx,
17716 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17717 RTX_FRAME_RELATED_P (tmp) = 1;
17718 XVECEXP (dwarf, 0, 0) = tmp;
17719
17720 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17721 RTX_FRAME_RELATED_P (tmp) = 1;
17722 XVECEXP (dwarf, 0, 1) = tmp;
17723
17724 for (i = 1; i < count; i++)
17725 {
17726 reg = gen_rtx_REG (DFmode, base_reg);
17727 base_reg += 2;
17728 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17729
17730 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17731 plus_constant (Pmode,
17732 stack_pointer_rtx,
17733 i * 8)),
17734 reg);
17735 RTX_FRAME_RELATED_P (tmp) = 1;
17736 XVECEXP (dwarf, 0, i + 1) = tmp;
17737 }
17738
17739 par = emit_insn (par);
17740 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17741 RTX_FRAME_RELATED_P (par) = 1;
17742
17743 return count * 8;
17744 }
17745
17746 /* Emit a call instruction with pattern PAT. ADDR is the address of
17747 the call target. */
17748
17749 void
17750 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17751 {
17752 rtx insn;
17753
17754 insn = emit_call_insn (pat);
17755
17756 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17757 If the call might use such an entry, add a use of the PIC register
17758 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17759 if (TARGET_VXWORKS_RTP
17760 && flag_pic
17761 && !sibcall
17762 && GET_CODE (addr) == SYMBOL_REF
17763 && (SYMBOL_REF_DECL (addr)
17764 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17765 : !SYMBOL_REF_LOCAL_P (addr)))
17766 {
17767 require_pic_register ();
17768 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17769 }
17770
17771 if (TARGET_AAPCS_BASED)
17772 {
17773 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17774 linker. We need to add an IP clobber to allow setting
17775 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17776 is not needed since it's a fixed register. */
17777 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17778 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17779 }
17780 }
17781
17782 /* Output a 'call' insn. */
17783 const char *
17784 output_call (rtx *operands)
17785 {
17786 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17787
17788 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17789 if (REGNO (operands[0]) == LR_REGNUM)
17790 {
17791 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17792 output_asm_insn ("mov%?\t%0, %|lr", operands);
17793 }
17794
17795 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17796
17797 if (TARGET_INTERWORK || arm_arch4t)
17798 output_asm_insn ("bx%?\t%0", operands);
17799 else
17800 output_asm_insn ("mov%?\t%|pc, %0", operands);
17801
17802 return "";
17803 }
17804
17805 /* Output a 'call' insn that is a reference in memory. This is
17806 disabled for ARMv5 and we prefer a blx instead because otherwise
17807 there's a significant performance overhead. */
17808 const char *
17809 output_call_mem (rtx *operands)
17810 {
17811 gcc_assert (!arm_arch5);
17812 if (TARGET_INTERWORK)
17813 {
17814 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17815 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17816 output_asm_insn ("bx%?\t%|ip", operands);
17817 }
17818 else if (regno_use_in (LR_REGNUM, operands[0]))
17819 {
17820 /* LR is used in the memory address. We load the address in the
17821 first instruction. It's safe to use IP as the target of the
17822 load since the call will kill it anyway. */
17823 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17824 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17825 if (arm_arch4t)
17826 output_asm_insn ("bx%?\t%|ip", operands);
17827 else
17828 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17829 }
17830 else
17831 {
17832 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17833 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17834 }
17835
17836 return "";
17837 }
17838
17839
17840 /* Output a move from arm registers to arm registers of a long double
17841 OPERANDS[0] is the destination.
17842 OPERANDS[1] is the source. */
17843 const char *
17844 output_mov_long_double_arm_from_arm (rtx *operands)
17845 {
17846 /* We have to be careful here because the two might overlap. */
17847 int dest_start = REGNO (operands[0]);
17848 int src_start = REGNO (operands[1]);
17849 rtx ops[2];
17850 int i;
17851
17852 if (dest_start < src_start)
17853 {
17854 for (i = 0; i < 3; i++)
17855 {
17856 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17857 ops[1] = gen_rtx_REG (SImode, src_start + i);
17858 output_asm_insn ("mov%?\t%0, %1", ops);
17859 }
17860 }
17861 else
17862 {
17863 for (i = 2; i >= 0; i--)
17864 {
17865 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17866 ops[1] = gen_rtx_REG (SImode, src_start + i);
17867 output_asm_insn ("mov%?\t%0, %1", ops);
17868 }
17869 }
17870
17871 return "";
17872 }
17873
17874 void
17875 arm_emit_movpair (rtx dest, rtx src)
17876 {
17877 /* If the src is an immediate, simplify it. */
17878 if (CONST_INT_P (src))
17879 {
17880 HOST_WIDE_INT val = INTVAL (src);
17881 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17882 if ((val >> 16) & 0x0000ffff)
17883 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17884 GEN_INT (16)),
17885 GEN_INT ((val >> 16) & 0x0000ffff));
17886 return;
17887 }
17888 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17889 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17890 }
17891
17892 /* Output a move between double words. It must be REG<-MEM
17893 or MEM<-REG. */
17894 const char *
17895 output_move_double (rtx *operands, bool emit, int *count)
17896 {
17897 enum rtx_code code0 = GET_CODE (operands[0]);
17898 enum rtx_code code1 = GET_CODE (operands[1]);
17899 rtx otherops[3];
17900 if (count)
17901 *count = 1;
17902
17903 /* The only case when this might happen is when
17904 you are looking at the length of a DImode instruction
17905 that has an invalid constant in it. */
17906 if (code0 == REG && code1 != MEM)
17907 {
17908 gcc_assert (!emit);
17909 *count = 2;
17910 return "";
17911 }
17912
17913 if (code0 == REG)
17914 {
17915 unsigned int reg0 = REGNO (operands[0]);
17916
17917 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17918
17919 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17920
17921 switch (GET_CODE (XEXP (operands[1], 0)))
17922 {
17923 case REG:
17924
17925 if (emit)
17926 {
17927 if (TARGET_LDRD
17928 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17929 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17930 else
17931 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17932 }
17933 break;
17934
17935 case PRE_INC:
17936 gcc_assert (TARGET_LDRD);
17937 if (emit)
17938 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17939 break;
17940
17941 case PRE_DEC:
17942 if (emit)
17943 {
17944 if (TARGET_LDRD)
17945 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17946 else
17947 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17948 }
17949 break;
17950
17951 case POST_INC:
17952 if (emit)
17953 {
17954 if (TARGET_LDRD)
17955 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17956 else
17957 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17958 }
17959 break;
17960
17961 case POST_DEC:
17962 gcc_assert (TARGET_LDRD);
17963 if (emit)
17964 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17965 break;
17966
17967 case PRE_MODIFY:
17968 case POST_MODIFY:
17969 /* Autoicrement addressing modes should never have overlapping
17970 base and destination registers, and overlapping index registers
17971 are already prohibited, so this doesn't need to worry about
17972 fix_cm3_ldrd. */
17973 otherops[0] = operands[0];
17974 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17975 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17976
17977 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17978 {
17979 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17980 {
17981 /* Registers overlap so split out the increment. */
17982 if (emit)
17983 {
17984 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17985 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17986 }
17987 if (count)
17988 *count = 2;
17989 }
17990 else
17991 {
17992 /* Use a single insn if we can.
17993 FIXME: IWMMXT allows offsets larger than ldrd can
17994 handle, fix these up with a pair of ldr. */
17995 if (TARGET_THUMB2
17996 || !CONST_INT_P (otherops[2])
17997 || (INTVAL (otherops[2]) > -256
17998 && INTVAL (otherops[2]) < 256))
17999 {
18000 if (emit)
18001 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18002 }
18003 else
18004 {
18005 if (emit)
18006 {
18007 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18008 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18009 }
18010 if (count)
18011 *count = 2;
18012
18013 }
18014 }
18015 }
18016 else
18017 {
18018 /* Use a single insn if we can.
18019 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18020 fix these up with a pair of ldr. */
18021 if (TARGET_THUMB2
18022 || !CONST_INT_P (otherops[2])
18023 || (INTVAL (otherops[2]) > -256
18024 && INTVAL (otherops[2]) < 256))
18025 {
18026 if (emit)
18027 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18028 }
18029 else
18030 {
18031 if (emit)
18032 {
18033 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18034 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18035 }
18036 if (count)
18037 *count = 2;
18038 }
18039 }
18040 break;
18041
18042 case LABEL_REF:
18043 case CONST:
18044 /* We might be able to use ldrd %0, %1 here. However the range is
18045 different to ldr/adr, and it is broken on some ARMv7-M
18046 implementations. */
18047 /* Use the second register of the pair to avoid problematic
18048 overlap. */
18049 otherops[1] = operands[1];
18050 if (emit)
18051 output_asm_insn ("adr%?\t%0, %1", otherops);
18052 operands[1] = otherops[0];
18053 if (emit)
18054 {
18055 if (TARGET_LDRD)
18056 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18057 else
18058 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18059 }
18060
18061 if (count)
18062 *count = 2;
18063 break;
18064
18065 /* ??? This needs checking for thumb2. */
18066 default:
18067 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18068 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18069 {
18070 otherops[0] = operands[0];
18071 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18072 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18073
18074 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18075 {
18076 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18077 {
18078 switch ((int) INTVAL (otherops[2]))
18079 {
18080 case -8:
18081 if (emit)
18082 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18083 return "";
18084 case -4:
18085 if (TARGET_THUMB2)
18086 break;
18087 if (emit)
18088 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18089 return "";
18090 case 4:
18091 if (TARGET_THUMB2)
18092 break;
18093 if (emit)
18094 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18095 return "";
18096 }
18097 }
18098 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18099 operands[1] = otherops[0];
18100 if (TARGET_LDRD
18101 && (REG_P (otherops[2])
18102 || TARGET_THUMB2
18103 || (CONST_INT_P (otherops[2])
18104 && INTVAL (otherops[2]) > -256
18105 && INTVAL (otherops[2]) < 256)))
18106 {
18107 if (reg_overlap_mentioned_p (operands[0],
18108 otherops[2]))
18109 {
18110 /* Swap base and index registers over to
18111 avoid a conflict. */
18112 std::swap (otherops[1], otherops[2]);
18113 }
18114 /* If both registers conflict, it will usually
18115 have been fixed by a splitter. */
18116 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18117 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18118 {
18119 if (emit)
18120 {
18121 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18122 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18123 }
18124 if (count)
18125 *count = 2;
18126 }
18127 else
18128 {
18129 otherops[0] = operands[0];
18130 if (emit)
18131 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18132 }
18133 return "";
18134 }
18135
18136 if (CONST_INT_P (otherops[2]))
18137 {
18138 if (emit)
18139 {
18140 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18141 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18142 else
18143 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18144 }
18145 }
18146 else
18147 {
18148 if (emit)
18149 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18150 }
18151 }
18152 else
18153 {
18154 if (emit)
18155 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18156 }
18157
18158 if (count)
18159 *count = 2;
18160
18161 if (TARGET_LDRD)
18162 return "ldr%(d%)\t%0, [%1]";
18163
18164 return "ldm%(ia%)\t%1, %M0";
18165 }
18166 else
18167 {
18168 otherops[1] = adjust_address (operands[1], SImode, 4);
18169 /* Take care of overlapping base/data reg. */
18170 if (reg_mentioned_p (operands[0], operands[1]))
18171 {
18172 if (emit)
18173 {
18174 output_asm_insn ("ldr%?\t%0, %1", otherops);
18175 output_asm_insn ("ldr%?\t%0, %1", operands);
18176 }
18177 if (count)
18178 *count = 2;
18179
18180 }
18181 else
18182 {
18183 if (emit)
18184 {
18185 output_asm_insn ("ldr%?\t%0, %1", operands);
18186 output_asm_insn ("ldr%?\t%0, %1", otherops);
18187 }
18188 if (count)
18189 *count = 2;
18190 }
18191 }
18192 }
18193 }
18194 else
18195 {
18196 /* Constraints should ensure this. */
18197 gcc_assert (code0 == MEM && code1 == REG);
18198 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18199 || (TARGET_ARM && TARGET_LDRD));
18200
18201 switch (GET_CODE (XEXP (operands[0], 0)))
18202 {
18203 case REG:
18204 if (emit)
18205 {
18206 if (TARGET_LDRD)
18207 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18208 else
18209 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18210 }
18211 break;
18212
18213 case PRE_INC:
18214 gcc_assert (TARGET_LDRD);
18215 if (emit)
18216 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18217 break;
18218
18219 case PRE_DEC:
18220 if (emit)
18221 {
18222 if (TARGET_LDRD)
18223 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18224 else
18225 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18226 }
18227 break;
18228
18229 case POST_INC:
18230 if (emit)
18231 {
18232 if (TARGET_LDRD)
18233 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18234 else
18235 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18236 }
18237 break;
18238
18239 case POST_DEC:
18240 gcc_assert (TARGET_LDRD);
18241 if (emit)
18242 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18243 break;
18244
18245 case PRE_MODIFY:
18246 case POST_MODIFY:
18247 otherops[0] = operands[1];
18248 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18249 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18250
18251 /* IWMMXT allows offsets larger than ldrd can handle,
18252 fix these up with a pair of ldr. */
18253 if (!TARGET_THUMB2
18254 && CONST_INT_P (otherops[2])
18255 && (INTVAL(otherops[2]) <= -256
18256 || INTVAL(otherops[2]) >= 256))
18257 {
18258 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18259 {
18260 if (emit)
18261 {
18262 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18263 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18264 }
18265 if (count)
18266 *count = 2;
18267 }
18268 else
18269 {
18270 if (emit)
18271 {
18272 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18273 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18274 }
18275 if (count)
18276 *count = 2;
18277 }
18278 }
18279 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18280 {
18281 if (emit)
18282 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18283 }
18284 else
18285 {
18286 if (emit)
18287 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18288 }
18289 break;
18290
18291 case PLUS:
18292 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18293 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18294 {
18295 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18296 {
18297 case -8:
18298 if (emit)
18299 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18300 return "";
18301
18302 case -4:
18303 if (TARGET_THUMB2)
18304 break;
18305 if (emit)
18306 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18307 return "";
18308
18309 case 4:
18310 if (TARGET_THUMB2)
18311 break;
18312 if (emit)
18313 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18314 return "";
18315 }
18316 }
18317 if (TARGET_LDRD
18318 && (REG_P (otherops[2])
18319 || TARGET_THUMB2
18320 || (CONST_INT_P (otherops[2])
18321 && INTVAL (otherops[2]) > -256
18322 && INTVAL (otherops[2]) < 256)))
18323 {
18324 otherops[0] = operands[1];
18325 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18326 if (emit)
18327 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18328 return "";
18329 }
18330 /* Fall through */
18331
18332 default:
18333 otherops[0] = adjust_address (operands[0], SImode, 4);
18334 otherops[1] = operands[1];
18335 if (emit)
18336 {
18337 output_asm_insn ("str%?\t%1, %0", operands);
18338 output_asm_insn ("str%?\t%H1, %0", otherops);
18339 }
18340 if (count)
18341 *count = 2;
18342 }
18343 }
18344
18345 return "";
18346 }
18347
18348 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18349 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18350
18351 const char *
18352 output_move_quad (rtx *operands)
18353 {
18354 if (REG_P (operands[0]))
18355 {
18356 /* Load, or reg->reg move. */
18357
18358 if (MEM_P (operands[1]))
18359 {
18360 switch (GET_CODE (XEXP (operands[1], 0)))
18361 {
18362 case REG:
18363 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18364 break;
18365
18366 case LABEL_REF:
18367 case CONST:
18368 output_asm_insn ("adr%?\t%0, %1", operands);
18369 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18370 break;
18371
18372 default:
18373 gcc_unreachable ();
18374 }
18375 }
18376 else
18377 {
18378 rtx ops[2];
18379 int dest, src, i;
18380
18381 gcc_assert (REG_P (operands[1]));
18382
18383 dest = REGNO (operands[0]);
18384 src = REGNO (operands[1]);
18385
18386 /* This seems pretty dumb, but hopefully GCC won't try to do it
18387 very often. */
18388 if (dest < src)
18389 for (i = 0; i < 4; i++)
18390 {
18391 ops[0] = gen_rtx_REG (SImode, dest + i);
18392 ops[1] = gen_rtx_REG (SImode, src + i);
18393 output_asm_insn ("mov%?\t%0, %1", ops);
18394 }
18395 else
18396 for (i = 3; i >= 0; i--)
18397 {
18398 ops[0] = gen_rtx_REG (SImode, dest + i);
18399 ops[1] = gen_rtx_REG (SImode, src + i);
18400 output_asm_insn ("mov%?\t%0, %1", ops);
18401 }
18402 }
18403 }
18404 else
18405 {
18406 gcc_assert (MEM_P (operands[0]));
18407 gcc_assert (REG_P (operands[1]));
18408 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18409
18410 switch (GET_CODE (XEXP (operands[0], 0)))
18411 {
18412 case REG:
18413 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18414 break;
18415
18416 default:
18417 gcc_unreachable ();
18418 }
18419 }
18420
18421 return "";
18422 }
18423
18424 /* Output a VFP load or store instruction. */
18425
18426 const char *
18427 output_move_vfp (rtx *operands)
18428 {
18429 rtx reg, mem, addr, ops[2];
18430 int load = REG_P (operands[0]);
18431 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18432 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18433 const char *templ;
18434 char buff[50];
18435 machine_mode mode;
18436
18437 reg = operands[!load];
18438 mem = operands[load];
18439
18440 mode = GET_MODE (reg);
18441
18442 gcc_assert (REG_P (reg));
18443 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18444 gcc_assert (mode == SFmode
18445 || mode == DFmode
18446 || mode == SImode
18447 || mode == DImode
18448 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18449 gcc_assert (MEM_P (mem));
18450
18451 addr = XEXP (mem, 0);
18452
18453 switch (GET_CODE (addr))
18454 {
18455 case PRE_DEC:
18456 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18457 ops[0] = XEXP (addr, 0);
18458 ops[1] = reg;
18459 break;
18460
18461 case POST_INC:
18462 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18463 ops[0] = XEXP (addr, 0);
18464 ops[1] = reg;
18465 break;
18466
18467 default:
18468 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18469 ops[0] = reg;
18470 ops[1] = mem;
18471 break;
18472 }
18473
18474 sprintf (buff, templ,
18475 load ? "ld" : "st",
18476 dp ? "64" : "32",
18477 dp ? "P" : "",
18478 integer_p ? "\t%@ int" : "");
18479 output_asm_insn (buff, ops);
18480
18481 return "";
18482 }
18483
18484 /* Output a Neon double-word or quad-word load or store, or a load
18485 or store for larger structure modes.
18486
18487 WARNING: The ordering of elements is weird in big-endian mode,
18488 because the EABI requires that vectors stored in memory appear
18489 as though they were stored by a VSTM, as required by the EABI.
18490 GCC RTL defines element ordering based on in-memory order.
18491 This can be different from the architectural ordering of elements
18492 within a NEON register. The intrinsics defined in arm_neon.h use the
18493 NEON register element ordering, not the GCC RTL element ordering.
18494
18495 For example, the in-memory ordering of a big-endian a quadword
18496 vector with 16-bit elements when stored from register pair {d0,d1}
18497 will be (lowest address first, d0[N] is NEON register element N):
18498
18499 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18500
18501 When necessary, quadword registers (dN, dN+1) are moved to ARM
18502 registers from rN in the order:
18503
18504 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18505
18506 So that STM/LDM can be used on vectors in ARM registers, and the
18507 same memory layout will result as if VSTM/VLDM were used.
18508
18509 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18510 possible, which allows use of appropriate alignment tags.
18511 Note that the choice of "64" is independent of the actual vector
18512 element size; this size simply ensures that the behavior is
18513 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18514
18515 Due to limitations of those instructions, use of VST1.64/VLD1.64
18516 is not possible if:
18517 - the address contains PRE_DEC, or
18518 - the mode refers to more than 4 double-word registers
18519
18520 In those cases, it would be possible to replace VSTM/VLDM by a
18521 sequence of instructions; this is not currently implemented since
18522 this is not certain to actually improve performance. */
18523
18524 const char *
18525 output_move_neon (rtx *operands)
18526 {
18527 rtx reg, mem, addr, ops[2];
18528 int regno, nregs, load = REG_P (operands[0]);
18529 const char *templ;
18530 char buff[50];
18531 machine_mode mode;
18532
18533 reg = operands[!load];
18534 mem = operands[load];
18535
18536 mode = GET_MODE (reg);
18537
18538 gcc_assert (REG_P (reg));
18539 regno = REGNO (reg);
18540 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18541 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18542 || NEON_REGNO_OK_FOR_QUAD (regno));
18543 gcc_assert (VALID_NEON_DREG_MODE (mode)
18544 || VALID_NEON_QREG_MODE (mode)
18545 || VALID_NEON_STRUCT_MODE (mode));
18546 gcc_assert (MEM_P (mem));
18547
18548 addr = XEXP (mem, 0);
18549
18550 /* Strip off const from addresses like (const (plus (...))). */
18551 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18552 addr = XEXP (addr, 0);
18553
18554 switch (GET_CODE (addr))
18555 {
18556 case POST_INC:
18557 /* We have to use vldm / vstm for too-large modes. */
18558 if (nregs > 4)
18559 {
18560 templ = "v%smia%%?\t%%0!, %%h1";
18561 ops[0] = XEXP (addr, 0);
18562 }
18563 else
18564 {
18565 templ = "v%s1.64\t%%h1, %%A0";
18566 ops[0] = mem;
18567 }
18568 ops[1] = reg;
18569 break;
18570
18571 case PRE_DEC:
18572 /* We have to use vldm / vstm in this case, since there is no
18573 pre-decrement form of the vld1 / vst1 instructions. */
18574 templ = "v%smdb%%?\t%%0!, %%h1";
18575 ops[0] = XEXP (addr, 0);
18576 ops[1] = reg;
18577 break;
18578
18579 case POST_MODIFY:
18580 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18581 gcc_unreachable ();
18582
18583 case REG:
18584 /* We have to use vldm / vstm for too-large modes. */
18585 if (nregs > 1)
18586 {
18587 if (nregs > 4)
18588 templ = "v%smia%%?\t%%m0, %%h1";
18589 else
18590 templ = "v%s1.64\t%%h1, %%A0";
18591
18592 ops[0] = mem;
18593 ops[1] = reg;
18594 break;
18595 }
18596 /* Fall through. */
18597 case LABEL_REF:
18598 case PLUS:
18599 {
18600 int i;
18601 int overlap = -1;
18602 for (i = 0; i < nregs; i++)
18603 {
18604 /* We're only using DImode here because it's a convenient size. */
18605 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18606 ops[1] = adjust_address (mem, DImode, 8 * i);
18607 if (reg_overlap_mentioned_p (ops[0], mem))
18608 {
18609 gcc_assert (overlap == -1);
18610 overlap = i;
18611 }
18612 else
18613 {
18614 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18615 output_asm_insn (buff, ops);
18616 }
18617 }
18618 if (overlap != -1)
18619 {
18620 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18621 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18622 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18623 output_asm_insn (buff, ops);
18624 }
18625
18626 return "";
18627 }
18628
18629 default:
18630 gcc_unreachable ();
18631 }
18632
18633 sprintf (buff, templ, load ? "ld" : "st");
18634 output_asm_insn (buff, ops);
18635
18636 return "";
18637 }
18638
18639 /* Compute and return the length of neon_mov<mode>, where <mode> is
18640 one of VSTRUCT modes: EI, OI, CI or XI. */
18641 int
18642 arm_attr_length_move_neon (rtx_insn *insn)
18643 {
18644 rtx reg, mem, addr;
18645 int load;
18646 machine_mode mode;
18647
18648 extract_insn_cached (insn);
18649
18650 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18651 {
18652 mode = GET_MODE (recog_data.operand[0]);
18653 switch (mode)
18654 {
18655 case EImode:
18656 case OImode:
18657 return 8;
18658 case CImode:
18659 return 12;
18660 case XImode:
18661 return 16;
18662 default:
18663 gcc_unreachable ();
18664 }
18665 }
18666
18667 load = REG_P (recog_data.operand[0]);
18668 reg = recog_data.operand[!load];
18669 mem = recog_data.operand[load];
18670
18671 gcc_assert (MEM_P (mem));
18672
18673 mode = GET_MODE (reg);
18674 addr = XEXP (mem, 0);
18675
18676 /* Strip off const from addresses like (const (plus (...))). */
18677 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18678 addr = XEXP (addr, 0);
18679
18680 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18681 {
18682 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18683 return insns * 4;
18684 }
18685 else
18686 return 4;
18687 }
18688
18689 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18690 return zero. */
18691
18692 int
18693 arm_address_offset_is_imm (rtx_insn *insn)
18694 {
18695 rtx mem, addr;
18696
18697 extract_insn_cached (insn);
18698
18699 if (REG_P (recog_data.operand[0]))
18700 return 0;
18701
18702 mem = recog_data.operand[0];
18703
18704 gcc_assert (MEM_P (mem));
18705
18706 addr = XEXP (mem, 0);
18707
18708 if (REG_P (addr)
18709 || (GET_CODE (addr) == PLUS
18710 && REG_P (XEXP (addr, 0))
18711 && CONST_INT_P (XEXP (addr, 1))))
18712 return 1;
18713 else
18714 return 0;
18715 }
18716
18717 /* Output an ADD r, s, #n where n may be too big for one instruction.
18718 If adding zero to one register, output nothing. */
18719 const char *
18720 output_add_immediate (rtx *operands)
18721 {
18722 HOST_WIDE_INT n = INTVAL (operands[2]);
18723
18724 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18725 {
18726 if (n < 0)
18727 output_multi_immediate (operands,
18728 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18729 -n);
18730 else
18731 output_multi_immediate (operands,
18732 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18733 n);
18734 }
18735
18736 return "";
18737 }
18738
18739 /* Output a multiple immediate operation.
18740 OPERANDS is the vector of operands referred to in the output patterns.
18741 INSTR1 is the output pattern to use for the first constant.
18742 INSTR2 is the output pattern to use for subsequent constants.
18743 IMMED_OP is the index of the constant slot in OPERANDS.
18744 N is the constant value. */
18745 static const char *
18746 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18747 int immed_op, HOST_WIDE_INT n)
18748 {
18749 #if HOST_BITS_PER_WIDE_INT > 32
18750 n &= 0xffffffff;
18751 #endif
18752
18753 if (n == 0)
18754 {
18755 /* Quick and easy output. */
18756 operands[immed_op] = const0_rtx;
18757 output_asm_insn (instr1, operands);
18758 }
18759 else
18760 {
18761 int i;
18762 const char * instr = instr1;
18763
18764 /* Note that n is never zero here (which would give no output). */
18765 for (i = 0; i < 32; i += 2)
18766 {
18767 if (n & (3 << i))
18768 {
18769 operands[immed_op] = GEN_INT (n & (255 << i));
18770 output_asm_insn (instr, operands);
18771 instr = instr2;
18772 i += 6;
18773 }
18774 }
18775 }
18776
18777 return "";
18778 }
18779
18780 /* Return the name of a shifter operation. */
18781 static const char *
18782 arm_shift_nmem(enum rtx_code code)
18783 {
18784 switch (code)
18785 {
18786 case ASHIFT:
18787 return ARM_LSL_NAME;
18788
18789 case ASHIFTRT:
18790 return "asr";
18791
18792 case LSHIFTRT:
18793 return "lsr";
18794
18795 case ROTATERT:
18796 return "ror";
18797
18798 default:
18799 abort();
18800 }
18801 }
18802
18803 /* Return the appropriate ARM instruction for the operation code.
18804 The returned result should not be overwritten. OP is the rtx of the
18805 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18806 was shifted. */
18807 const char *
18808 arithmetic_instr (rtx op, int shift_first_arg)
18809 {
18810 switch (GET_CODE (op))
18811 {
18812 case PLUS:
18813 return "add";
18814
18815 case MINUS:
18816 return shift_first_arg ? "rsb" : "sub";
18817
18818 case IOR:
18819 return "orr";
18820
18821 case XOR:
18822 return "eor";
18823
18824 case AND:
18825 return "and";
18826
18827 case ASHIFT:
18828 case ASHIFTRT:
18829 case LSHIFTRT:
18830 case ROTATERT:
18831 return arm_shift_nmem(GET_CODE(op));
18832
18833 default:
18834 gcc_unreachable ();
18835 }
18836 }
18837
18838 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18839 for the operation code. The returned result should not be overwritten.
18840 OP is the rtx code of the shift.
18841 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18842 shift. */
18843 static const char *
18844 shift_op (rtx op, HOST_WIDE_INT *amountp)
18845 {
18846 const char * mnem;
18847 enum rtx_code code = GET_CODE (op);
18848
18849 switch (code)
18850 {
18851 case ROTATE:
18852 if (!CONST_INT_P (XEXP (op, 1)))
18853 {
18854 output_operand_lossage ("invalid shift operand");
18855 return NULL;
18856 }
18857
18858 code = ROTATERT;
18859 *amountp = 32 - INTVAL (XEXP (op, 1));
18860 mnem = "ror";
18861 break;
18862
18863 case ASHIFT:
18864 case ASHIFTRT:
18865 case LSHIFTRT:
18866 case ROTATERT:
18867 mnem = arm_shift_nmem(code);
18868 if (CONST_INT_P (XEXP (op, 1)))
18869 {
18870 *amountp = INTVAL (XEXP (op, 1));
18871 }
18872 else if (REG_P (XEXP (op, 1)))
18873 {
18874 *amountp = -1;
18875 return mnem;
18876 }
18877 else
18878 {
18879 output_operand_lossage ("invalid shift operand");
18880 return NULL;
18881 }
18882 break;
18883
18884 case MULT:
18885 /* We never have to worry about the amount being other than a
18886 power of 2, since this case can never be reloaded from a reg. */
18887 if (!CONST_INT_P (XEXP (op, 1)))
18888 {
18889 output_operand_lossage ("invalid shift operand");
18890 return NULL;
18891 }
18892
18893 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18894
18895 /* Amount must be a power of two. */
18896 if (*amountp & (*amountp - 1))
18897 {
18898 output_operand_lossage ("invalid shift operand");
18899 return NULL;
18900 }
18901
18902 *amountp = int_log2 (*amountp);
18903 return ARM_LSL_NAME;
18904
18905 default:
18906 output_operand_lossage ("invalid shift operand");
18907 return NULL;
18908 }
18909
18910 /* This is not 100% correct, but follows from the desire to merge
18911 multiplication by a power of 2 with the recognizer for a
18912 shift. >=32 is not a valid shift for "lsl", so we must try and
18913 output a shift that produces the correct arithmetical result.
18914 Using lsr #32 is identical except for the fact that the carry bit
18915 is not set correctly if we set the flags; but we never use the
18916 carry bit from such an operation, so we can ignore that. */
18917 if (code == ROTATERT)
18918 /* Rotate is just modulo 32. */
18919 *amountp &= 31;
18920 else if (*amountp != (*amountp & 31))
18921 {
18922 if (code == ASHIFT)
18923 mnem = "lsr";
18924 *amountp = 32;
18925 }
18926
18927 /* Shifts of 0 are no-ops. */
18928 if (*amountp == 0)
18929 return NULL;
18930
18931 return mnem;
18932 }
18933
18934 /* Obtain the shift from the POWER of two. */
18935
18936 static HOST_WIDE_INT
18937 int_log2 (HOST_WIDE_INT power)
18938 {
18939 HOST_WIDE_INT shift = 0;
18940
18941 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18942 {
18943 gcc_assert (shift <= 31);
18944 shift++;
18945 }
18946
18947 return shift;
18948 }
18949
18950 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18951 because /bin/as is horribly restrictive. The judgement about
18952 whether or not each character is 'printable' (and can be output as
18953 is) or not (and must be printed with an octal escape) must be made
18954 with reference to the *host* character set -- the situation is
18955 similar to that discussed in the comments above pp_c_char in
18956 c-pretty-print.c. */
18957
18958 #define MAX_ASCII_LEN 51
18959
18960 void
18961 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18962 {
18963 int i;
18964 int len_so_far = 0;
18965
18966 fputs ("\t.ascii\t\"", stream);
18967
18968 for (i = 0; i < len; i++)
18969 {
18970 int c = p[i];
18971
18972 if (len_so_far >= MAX_ASCII_LEN)
18973 {
18974 fputs ("\"\n\t.ascii\t\"", stream);
18975 len_so_far = 0;
18976 }
18977
18978 if (ISPRINT (c))
18979 {
18980 if (c == '\\' || c == '\"')
18981 {
18982 putc ('\\', stream);
18983 len_so_far++;
18984 }
18985 putc (c, stream);
18986 len_so_far++;
18987 }
18988 else
18989 {
18990 fprintf (stream, "\\%03o", c);
18991 len_so_far += 4;
18992 }
18993 }
18994
18995 fputs ("\"\n", stream);
18996 }
18997 \f
18998 /* Whether a register is callee saved or not. This is necessary because high
18999 registers are marked as caller saved when optimizing for size on Thumb-1
19000 targets despite being callee saved in order to avoid using them. */
19001 #define callee_saved_reg_p(reg) \
19002 (!call_used_regs[reg] \
19003 || (TARGET_THUMB1 && optimize_size \
19004 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19005
19006 /* Compute the register save mask for registers 0 through 12
19007 inclusive. This code is used by arm_compute_save_reg_mask. */
19008
19009 static unsigned long
19010 arm_compute_save_reg0_reg12_mask (void)
19011 {
19012 unsigned long func_type = arm_current_func_type ();
19013 unsigned long save_reg_mask = 0;
19014 unsigned int reg;
19015
19016 if (IS_INTERRUPT (func_type))
19017 {
19018 unsigned int max_reg;
19019 /* Interrupt functions must not corrupt any registers,
19020 even call clobbered ones. If this is a leaf function
19021 we can just examine the registers used by the RTL, but
19022 otherwise we have to assume that whatever function is
19023 called might clobber anything, and so we have to save
19024 all the call-clobbered registers as well. */
19025 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19026 /* FIQ handlers have registers r8 - r12 banked, so
19027 we only need to check r0 - r7, Normal ISRs only
19028 bank r14 and r15, so we must check up to r12.
19029 r13 is the stack pointer which is always preserved,
19030 so we do not need to consider it here. */
19031 max_reg = 7;
19032 else
19033 max_reg = 12;
19034
19035 for (reg = 0; reg <= max_reg; reg++)
19036 if (df_regs_ever_live_p (reg)
19037 || (! crtl->is_leaf && call_used_regs[reg]))
19038 save_reg_mask |= (1 << reg);
19039
19040 /* Also save the pic base register if necessary. */
19041 if (flag_pic
19042 && !TARGET_SINGLE_PIC_BASE
19043 && arm_pic_register != INVALID_REGNUM
19044 && crtl->uses_pic_offset_table)
19045 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19046 }
19047 else if (IS_VOLATILE(func_type))
19048 {
19049 /* For noreturn functions we historically omitted register saves
19050 altogether. However this really messes up debugging. As a
19051 compromise save just the frame pointers. Combined with the link
19052 register saved elsewhere this should be sufficient to get
19053 a backtrace. */
19054 if (frame_pointer_needed)
19055 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19056 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19057 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19058 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19059 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19060 }
19061 else
19062 {
19063 /* In the normal case we only need to save those registers
19064 which are call saved and which are used by this function. */
19065 for (reg = 0; reg <= 11; reg++)
19066 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19067 save_reg_mask |= (1 << reg);
19068
19069 /* Handle the frame pointer as a special case. */
19070 if (frame_pointer_needed)
19071 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19072
19073 /* If we aren't loading the PIC register,
19074 don't stack it even though it may be live. */
19075 if (flag_pic
19076 && !TARGET_SINGLE_PIC_BASE
19077 && arm_pic_register != INVALID_REGNUM
19078 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19079 || crtl->uses_pic_offset_table))
19080 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19081
19082 /* The prologue will copy SP into R0, so save it. */
19083 if (IS_STACKALIGN (func_type))
19084 save_reg_mask |= 1;
19085 }
19086
19087 /* Save registers so the exception handler can modify them. */
19088 if (crtl->calls_eh_return)
19089 {
19090 unsigned int i;
19091
19092 for (i = 0; ; i++)
19093 {
19094 reg = EH_RETURN_DATA_REGNO (i);
19095 if (reg == INVALID_REGNUM)
19096 break;
19097 save_reg_mask |= 1 << reg;
19098 }
19099 }
19100
19101 return save_reg_mask;
19102 }
19103
19104 /* Return true if r3 is live at the start of the function. */
19105
19106 static bool
19107 arm_r3_live_at_start_p (void)
19108 {
19109 /* Just look at cfg info, which is still close enough to correct at this
19110 point. This gives false positives for broken functions that might use
19111 uninitialized data that happens to be allocated in r3, but who cares? */
19112 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19113 }
19114
19115 /* Compute the number of bytes used to store the static chain register on the
19116 stack, above the stack frame. We need to know this accurately to get the
19117 alignment of the rest of the stack frame correct. */
19118
19119 static int
19120 arm_compute_static_chain_stack_bytes (void)
19121 {
19122 /* See the defining assertion in arm_expand_prologue. */
19123 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19124 && IS_NESTED (arm_current_func_type ())
19125 && arm_r3_live_at_start_p ()
19126 && crtl->args.pretend_args_size == 0)
19127 return 4;
19128
19129 return 0;
19130 }
19131
19132 /* Compute a bit mask of which registers need to be
19133 saved on the stack for the current function.
19134 This is used by arm_get_frame_offsets, which may add extra registers. */
19135
19136 static unsigned long
19137 arm_compute_save_reg_mask (void)
19138 {
19139 unsigned int save_reg_mask = 0;
19140 unsigned long func_type = arm_current_func_type ();
19141 unsigned int reg;
19142
19143 if (IS_NAKED (func_type))
19144 /* This should never really happen. */
19145 return 0;
19146
19147 /* If we are creating a stack frame, then we must save the frame pointer,
19148 IP (which will hold the old stack pointer), LR and the PC. */
19149 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19150 save_reg_mask |=
19151 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19152 | (1 << IP_REGNUM)
19153 | (1 << LR_REGNUM)
19154 | (1 << PC_REGNUM);
19155
19156 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19157
19158 /* Decide if we need to save the link register.
19159 Interrupt routines have their own banked link register,
19160 so they never need to save it.
19161 Otherwise if we do not use the link register we do not need to save
19162 it. If we are pushing other registers onto the stack however, we
19163 can save an instruction in the epilogue by pushing the link register
19164 now and then popping it back into the PC. This incurs extra memory
19165 accesses though, so we only do it when optimizing for size, and only
19166 if we know that we will not need a fancy return sequence. */
19167 if (df_regs_ever_live_p (LR_REGNUM)
19168 || (save_reg_mask
19169 && optimize_size
19170 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19171 && !crtl->tail_call_emit
19172 && !crtl->calls_eh_return))
19173 save_reg_mask |= 1 << LR_REGNUM;
19174
19175 if (cfun->machine->lr_save_eliminated)
19176 save_reg_mask &= ~ (1 << LR_REGNUM);
19177
19178 if (TARGET_REALLY_IWMMXT
19179 && ((bit_count (save_reg_mask)
19180 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19181 arm_compute_static_chain_stack_bytes())
19182 ) % 2) != 0)
19183 {
19184 /* The total number of registers that are going to be pushed
19185 onto the stack is odd. We need to ensure that the stack
19186 is 64-bit aligned before we start to save iWMMXt registers,
19187 and also before we start to create locals. (A local variable
19188 might be a double or long long which we will load/store using
19189 an iWMMXt instruction). Therefore we need to push another
19190 ARM register, so that the stack will be 64-bit aligned. We
19191 try to avoid using the arg registers (r0 -r3) as they might be
19192 used to pass values in a tail call. */
19193 for (reg = 4; reg <= 12; reg++)
19194 if ((save_reg_mask & (1 << reg)) == 0)
19195 break;
19196
19197 if (reg <= 12)
19198 save_reg_mask |= (1 << reg);
19199 else
19200 {
19201 cfun->machine->sibcall_blocked = 1;
19202 save_reg_mask |= (1 << 3);
19203 }
19204 }
19205
19206 /* We may need to push an additional register for use initializing the
19207 PIC base register. */
19208 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19209 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19210 {
19211 reg = thumb_find_work_register (1 << 4);
19212 if (!call_used_regs[reg])
19213 save_reg_mask |= (1 << reg);
19214 }
19215
19216 return save_reg_mask;
19217 }
19218
19219
19220 /* Compute a bit mask of which registers need to be
19221 saved on the stack for the current function. */
19222 static unsigned long
19223 thumb1_compute_save_reg_mask (void)
19224 {
19225 unsigned long mask;
19226 unsigned reg;
19227
19228 mask = 0;
19229 for (reg = 0; reg < 12; reg ++)
19230 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19231 mask |= 1 << reg;
19232
19233 if (flag_pic
19234 && !TARGET_SINGLE_PIC_BASE
19235 && arm_pic_register != INVALID_REGNUM
19236 && crtl->uses_pic_offset_table)
19237 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19238
19239 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19240 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19241 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19242
19243 /* LR will also be pushed if any lo regs are pushed. */
19244 if (mask & 0xff || thumb_force_lr_save ())
19245 mask |= (1 << LR_REGNUM);
19246
19247 /* Make sure we have a low work register if we need one.
19248 We will need one if we are going to push a high register,
19249 but we are not currently intending to push a low register. */
19250 if ((mask & 0xff) == 0
19251 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19252 {
19253 /* Use thumb_find_work_register to choose which register
19254 we will use. If the register is live then we will
19255 have to push it. Use LAST_LO_REGNUM as our fallback
19256 choice for the register to select. */
19257 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19258 /* Make sure the register returned by thumb_find_work_register is
19259 not part of the return value. */
19260 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19261 reg = LAST_LO_REGNUM;
19262
19263 if (callee_saved_reg_p (reg))
19264 mask |= 1 << reg;
19265 }
19266
19267 /* The 504 below is 8 bytes less than 512 because there are two possible
19268 alignment words. We can't tell here if they will be present or not so we
19269 have to play it safe and assume that they are. */
19270 if ((CALLER_INTERWORKING_SLOT_SIZE +
19271 ROUND_UP_WORD (get_frame_size ()) +
19272 crtl->outgoing_args_size) >= 504)
19273 {
19274 /* This is the same as the code in thumb1_expand_prologue() which
19275 determines which register to use for stack decrement. */
19276 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19277 if (mask & (1 << reg))
19278 break;
19279
19280 if (reg > LAST_LO_REGNUM)
19281 {
19282 /* Make sure we have a register available for stack decrement. */
19283 mask |= 1 << LAST_LO_REGNUM;
19284 }
19285 }
19286
19287 return mask;
19288 }
19289
19290
19291 /* Return the number of bytes required to save VFP registers. */
19292 static int
19293 arm_get_vfp_saved_size (void)
19294 {
19295 unsigned int regno;
19296 int count;
19297 int saved;
19298
19299 saved = 0;
19300 /* Space for saved VFP registers. */
19301 if (TARGET_HARD_FLOAT && TARGET_VFP)
19302 {
19303 count = 0;
19304 for (regno = FIRST_VFP_REGNUM;
19305 regno < LAST_VFP_REGNUM;
19306 regno += 2)
19307 {
19308 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19309 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19310 {
19311 if (count > 0)
19312 {
19313 /* Workaround ARM10 VFPr1 bug. */
19314 if (count == 2 && !arm_arch6)
19315 count++;
19316 saved += count * 8;
19317 }
19318 count = 0;
19319 }
19320 else
19321 count++;
19322 }
19323 if (count > 0)
19324 {
19325 if (count == 2 && !arm_arch6)
19326 count++;
19327 saved += count * 8;
19328 }
19329 }
19330 return saved;
19331 }
19332
19333
19334 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19335 everything bar the final return instruction. If simple_return is true,
19336 then do not output epilogue, because it has already been emitted in RTL. */
19337 const char *
19338 output_return_instruction (rtx operand, bool really_return, bool reverse,
19339 bool simple_return)
19340 {
19341 char conditional[10];
19342 char instr[100];
19343 unsigned reg;
19344 unsigned long live_regs_mask;
19345 unsigned long func_type;
19346 arm_stack_offsets *offsets;
19347
19348 func_type = arm_current_func_type ();
19349
19350 if (IS_NAKED (func_type))
19351 return "";
19352
19353 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19354 {
19355 /* If this function was declared non-returning, and we have
19356 found a tail call, then we have to trust that the called
19357 function won't return. */
19358 if (really_return)
19359 {
19360 rtx ops[2];
19361
19362 /* Otherwise, trap an attempted return by aborting. */
19363 ops[0] = operand;
19364 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19365 : "abort");
19366 assemble_external_libcall (ops[1]);
19367 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19368 }
19369
19370 return "";
19371 }
19372
19373 gcc_assert (!cfun->calls_alloca || really_return);
19374
19375 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19376
19377 cfun->machine->return_used_this_function = 1;
19378
19379 offsets = arm_get_frame_offsets ();
19380 live_regs_mask = offsets->saved_regs_mask;
19381
19382 if (!simple_return && live_regs_mask)
19383 {
19384 const char * return_reg;
19385
19386 /* If we do not have any special requirements for function exit
19387 (e.g. interworking) then we can load the return address
19388 directly into the PC. Otherwise we must load it into LR. */
19389 if (really_return
19390 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19391 return_reg = reg_names[PC_REGNUM];
19392 else
19393 return_reg = reg_names[LR_REGNUM];
19394
19395 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19396 {
19397 /* There are three possible reasons for the IP register
19398 being saved. 1) a stack frame was created, in which case
19399 IP contains the old stack pointer, or 2) an ISR routine
19400 corrupted it, or 3) it was saved to align the stack on
19401 iWMMXt. In case 1, restore IP into SP, otherwise just
19402 restore IP. */
19403 if (frame_pointer_needed)
19404 {
19405 live_regs_mask &= ~ (1 << IP_REGNUM);
19406 live_regs_mask |= (1 << SP_REGNUM);
19407 }
19408 else
19409 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19410 }
19411
19412 /* On some ARM architectures it is faster to use LDR rather than
19413 LDM to load a single register. On other architectures, the
19414 cost is the same. In 26 bit mode, or for exception handlers,
19415 we have to use LDM to load the PC so that the CPSR is also
19416 restored. */
19417 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19418 if (live_regs_mask == (1U << reg))
19419 break;
19420
19421 if (reg <= LAST_ARM_REGNUM
19422 && (reg != LR_REGNUM
19423 || ! really_return
19424 || ! IS_INTERRUPT (func_type)))
19425 {
19426 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19427 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19428 }
19429 else
19430 {
19431 char *p;
19432 int first = 1;
19433
19434 /* Generate the load multiple instruction to restore the
19435 registers. Note we can get here, even if
19436 frame_pointer_needed is true, but only if sp already
19437 points to the base of the saved core registers. */
19438 if (live_regs_mask & (1 << SP_REGNUM))
19439 {
19440 unsigned HOST_WIDE_INT stack_adjust;
19441
19442 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19443 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19444
19445 if (stack_adjust && arm_arch5 && TARGET_ARM)
19446 if (TARGET_UNIFIED_ASM)
19447 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19448 else
19449 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19450 else
19451 {
19452 /* If we can't use ldmib (SA110 bug),
19453 then try to pop r3 instead. */
19454 if (stack_adjust)
19455 live_regs_mask |= 1 << 3;
19456
19457 if (TARGET_UNIFIED_ASM)
19458 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19459 else
19460 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19461 }
19462 }
19463 else
19464 if (TARGET_UNIFIED_ASM)
19465 sprintf (instr, "pop%s\t{", conditional);
19466 else
19467 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19468
19469 p = instr + strlen (instr);
19470
19471 for (reg = 0; reg <= SP_REGNUM; reg++)
19472 if (live_regs_mask & (1 << reg))
19473 {
19474 int l = strlen (reg_names[reg]);
19475
19476 if (first)
19477 first = 0;
19478 else
19479 {
19480 memcpy (p, ", ", 2);
19481 p += 2;
19482 }
19483
19484 memcpy (p, "%|", 2);
19485 memcpy (p + 2, reg_names[reg], l);
19486 p += l + 2;
19487 }
19488
19489 if (live_regs_mask & (1 << LR_REGNUM))
19490 {
19491 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19492 /* If returning from an interrupt, restore the CPSR. */
19493 if (IS_INTERRUPT (func_type))
19494 strcat (p, "^");
19495 }
19496 else
19497 strcpy (p, "}");
19498 }
19499
19500 output_asm_insn (instr, & operand);
19501
19502 /* See if we need to generate an extra instruction to
19503 perform the actual function return. */
19504 if (really_return
19505 && func_type != ARM_FT_INTERWORKED
19506 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19507 {
19508 /* The return has already been handled
19509 by loading the LR into the PC. */
19510 return "";
19511 }
19512 }
19513
19514 if (really_return)
19515 {
19516 switch ((int) ARM_FUNC_TYPE (func_type))
19517 {
19518 case ARM_FT_ISR:
19519 case ARM_FT_FIQ:
19520 /* ??? This is wrong for unified assembly syntax. */
19521 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19522 break;
19523
19524 case ARM_FT_INTERWORKED:
19525 sprintf (instr, "bx%s\t%%|lr", conditional);
19526 break;
19527
19528 case ARM_FT_EXCEPTION:
19529 /* ??? This is wrong for unified assembly syntax. */
19530 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19531 break;
19532
19533 default:
19534 /* Use bx if it's available. */
19535 if (arm_arch5 || arm_arch4t)
19536 sprintf (instr, "bx%s\t%%|lr", conditional);
19537 else
19538 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19539 break;
19540 }
19541
19542 output_asm_insn (instr, & operand);
19543 }
19544
19545 return "";
19546 }
19547
19548 /* Write the function name into the code section, directly preceding
19549 the function prologue.
19550
19551 Code will be output similar to this:
19552 t0
19553 .ascii "arm_poke_function_name", 0
19554 .align
19555 t1
19556 .word 0xff000000 + (t1 - t0)
19557 arm_poke_function_name
19558 mov ip, sp
19559 stmfd sp!, {fp, ip, lr, pc}
19560 sub fp, ip, #4
19561
19562 When performing a stack backtrace, code can inspect the value
19563 of 'pc' stored at 'fp' + 0. If the trace function then looks
19564 at location pc - 12 and the top 8 bits are set, then we know
19565 that there is a function name embedded immediately preceding this
19566 location and has length ((pc[-3]) & 0xff000000).
19567
19568 We assume that pc is declared as a pointer to an unsigned long.
19569
19570 It is of no benefit to output the function name if we are assembling
19571 a leaf function. These function types will not contain a stack
19572 backtrace structure, therefore it is not possible to determine the
19573 function name. */
19574 void
19575 arm_poke_function_name (FILE *stream, const char *name)
19576 {
19577 unsigned long alignlength;
19578 unsigned long length;
19579 rtx x;
19580
19581 length = strlen (name) + 1;
19582 alignlength = ROUND_UP_WORD (length);
19583
19584 ASM_OUTPUT_ASCII (stream, name, length);
19585 ASM_OUTPUT_ALIGN (stream, 2);
19586 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19587 assemble_aligned_integer (UNITS_PER_WORD, x);
19588 }
19589
19590 /* Place some comments into the assembler stream
19591 describing the current function. */
19592 static void
19593 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19594 {
19595 unsigned long func_type;
19596
19597 /* ??? Do we want to print some of the below anyway? */
19598 if (TARGET_THUMB1)
19599 return;
19600
19601 /* Sanity check. */
19602 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19603
19604 func_type = arm_current_func_type ();
19605
19606 switch ((int) ARM_FUNC_TYPE (func_type))
19607 {
19608 default:
19609 case ARM_FT_NORMAL:
19610 break;
19611 case ARM_FT_INTERWORKED:
19612 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19613 break;
19614 case ARM_FT_ISR:
19615 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19616 break;
19617 case ARM_FT_FIQ:
19618 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19619 break;
19620 case ARM_FT_EXCEPTION:
19621 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19622 break;
19623 }
19624
19625 if (IS_NAKED (func_type))
19626 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19627
19628 if (IS_VOLATILE (func_type))
19629 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19630
19631 if (IS_NESTED (func_type))
19632 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19633 if (IS_STACKALIGN (func_type))
19634 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19635
19636 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19637 crtl->args.size,
19638 crtl->args.pretend_args_size, frame_size);
19639
19640 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19641 frame_pointer_needed,
19642 cfun->machine->uses_anonymous_args);
19643
19644 if (cfun->machine->lr_save_eliminated)
19645 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19646
19647 if (crtl->calls_eh_return)
19648 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19649
19650 }
19651
19652 static void
19653 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19654 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19655 {
19656 arm_stack_offsets *offsets;
19657
19658 if (TARGET_THUMB1)
19659 {
19660 int regno;
19661
19662 /* Emit any call-via-reg trampolines that are needed for v4t support
19663 of call_reg and call_value_reg type insns. */
19664 for (regno = 0; regno < LR_REGNUM; regno++)
19665 {
19666 rtx label = cfun->machine->call_via[regno];
19667
19668 if (label != NULL)
19669 {
19670 switch_to_section (function_section (current_function_decl));
19671 targetm.asm_out.internal_label (asm_out_file, "L",
19672 CODE_LABEL_NUMBER (label));
19673 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19674 }
19675 }
19676
19677 /* ??? Probably not safe to set this here, since it assumes that a
19678 function will be emitted as assembly immediately after we generate
19679 RTL for it. This does not happen for inline functions. */
19680 cfun->machine->return_used_this_function = 0;
19681 }
19682 else /* TARGET_32BIT */
19683 {
19684 /* We need to take into account any stack-frame rounding. */
19685 offsets = arm_get_frame_offsets ();
19686
19687 gcc_assert (!use_return_insn (FALSE, NULL)
19688 || (cfun->machine->return_used_this_function != 0)
19689 || offsets->saved_regs == offsets->outgoing_args
19690 || frame_pointer_needed);
19691 }
19692 }
19693
19694 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19695 STR and STRD. If an even number of registers are being pushed, one
19696 or more STRD patterns are created for each register pair. If an
19697 odd number of registers are pushed, emit an initial STR followed by
19698 as many STRD instructions as are needed. This works best when the
19699 stack is initially 64-bit aligned (the normal case), since it
19700 ensures that each STRD is also 64-bit aligned. */
19701 static void
19702 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19703 {
19704 int num_regs = 0;
19705 int i;
19706 int regno;
19707 rtx par = NULL_RTX;
19708 rtx dwarf = NULL_RTX;
19709 rtx tmp;
19710 bool first = true;
19711
19712 num_regs = bit_count (saved_regs_mask);
19713
19714 /* Must be at least one register to save, and can't save SP or PC. */
19715 gcc_assert (num_regs > 0 && num_regs <= 14);
19716 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19717 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19718
19719 /* Create sequence for DWARF info. All the frame-related data for
19720 debugging is held in this wrapper. */
19721 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19722
19723 /* Describe the stack adjustment. */
19724 tmp = gen_rtx_SET (stack_pointer_rtx,
19725 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19726 RTX_FRAME_RELATED_P (tmp) = 1;
19727 XVECEXP (dwarf, 0, 0) = tmp;
19728
19729 /* Find the first register. */
19730 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19731 ;
19732
19733 i = 0;
19734
19735 /* If there's an odd number of registers to push. Start off by
19736 pushing a single register. This ensures that subsequent strd
19737 operations are dword aligned (assuming that SP was originally
19738 64-bit aligned). */
19739 if ((num_regs & 1) != 0)
19740 {
19741 rtx reg, mem, insn;
19742
19743 reg = gen_rtx_REG (SImode, regno);
19744 if (num_regs == 1)
19745 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19746 stack_pointer_rtx));
19747 else
19748 mem = gen_frame_mem (Pmode,
19749 gen_rtx_PRE_MODIFY
19750 (Pmode, stack_pointer_rtx,
19751 plus_constant (Pmode, stack_pointer_rtx,
19752 -4 * num_regs)));
19753
19754 tmp = gen_rtx_SET (mem, reg);
19755 RTX_FRAME_RELATED_P (tmp) = 1;
19756 insn = emit_insn (tmp);
19757 RTX_FRAME_RELATED_P (insn) = 1;
19758 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19759 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19760 RTX_FRAME_RELATED_P (tmp) = 1;
19761 i++;
19762 regno++;
19763 XVECEXP (dwarf, 0, i) = tmp;
19764 first = false;
19765 }
19766
19767 while (i < num_regs)
19768 if (saved_regs_mask & (1 << regno))
19769 {
19770 rtx reg1, reg2, mem1, mem2;
19771 rtx tmp0, tmp1, tmp2;
19772 int regno2;
19773
19774 /* Find the register to pair with this one. */
19775 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19776 regno2++)
19777 ;
19778
19779 reg1 = gen_rtx_REG (SImode, regno);
19780 reg2 = gen_rtx_REG (SImode, regno2);
19781
19782 if (first)
19783 {
19784 rtx insn;
19785
19786 first = false;
19787 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19788 stack_pointer_rtx,
19789 -4 * num_regs));
19790 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19791 stack_pointer_rtx,
19792 -4 * (num_regs - 1)));
19793 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19794 plus_constant (Pmode, stack_pointer_rtx,
19795 -4 * (num_regs)));
19796 tmp1 = gen_rtx_SET (mem1, reg1);
19797 tmp2 = gen_rtx_SET (mem2, reg2);
19798 RTX_FRAME_RELATED_P (tmp0) = 1;
19799 RTX_FRAME_RELATED_P (tmp1) = 1;
19800 RTX_FRAME_RELATED_P (tmp2) = 1;
19801 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19802 XVECEXP (par, 0, 0) = tmp0;
19803 XVECEXP (par, 0, 1) = tmp1;
19804 XVECEXP (par, 0, 2) = tmp2;
19805 insn = emit_insn (par);
19806 RTX_FRAME_RELATED_P (insn) = 1;
19807 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19808 }
19809 else
19810 {
19811 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19812 stack_pointer_rtx,
19813 4 * i));
19814 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19815 stack_pointer_rtx,
19816 4 * (i + 1)));
19817 tmp1 = gen_rtx_SET (mem1, reg1);
19818 tmp2 = gen_rtx_SET (mem2, reg2);
19819 RTX_FRAME_RELATED_P (tmp1) = 1;
19820 RTX_FRAME_RELATED_P (tmp2) = 1;
19821 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19822 XVECEXP (par, 0, 0) = tmp1;
19823 XVECEXP (par, 0, 1) = tmp2;
19824 emit_insn (par);
19825 }
19826
19827 /* Create unwind information. This is an approximation. */
19828 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19829 plus_constant (Pmode,
19830 stack_pointer_rtx,
19831 4 * i)),
19832 reg1);
19833 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19834 plus_constant (Pmode,
19835 stack_pointer_rtx,
19836 4 * (i + 1))),
19837 reg2);
19838
19839 RTX_FRAME_RELATED_P (tmp1) = 1;
19840 RTX_FRAME_RELATED_P (tmp2) = 1;
19841 XVECEXP (dwarf, 0, i + 1) = tmp1;
19842 XVECEXP (dwarf, 0, i + 2) = tmp2;
19843 i += 2;
19844 regno = regno2 + 1;
19845 }
19846 else
19847 regno++;
19848
19849 return;
19850 }
19851
19852 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19853 whenever possible, otherwise it emits single-word stores. The first store
19854 also allocates stack space for all saved registers, using writeback with
19855 post-addressing mode. All other stores use offset addressing. If no STRD
19856 can be emitted, this function emits a sequence of single-word stores,
19857 and not an STM as before, because single-word stores provide more freedom
19858 scheduling and can be turned into an STM by peephole optimizations. */
19859 static void
19860 arm_emit_strd_push (unsigned long saved_regs_mask)
19861 {
19862 int num_regs = 0;
19863 int i, j, dwarf_index = 0;
19864 int offset = 0;
19865 rtx dwarf = NULL_RTX;
19866 rtx insn = NULL_RTX;
19867 rtx tmp, mem;
19868
19869 /* TODO: A more efficient code can be emitted by changing the
19870 layout, e.g., first push all pairs that can use STRD to keep the
19871 stack aligned, and then push all other registers. */
19872 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19873 if (saved_regs_mask & (1 << i))
19874 num_regs++;
19875
19876 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19877 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19878 gcc_assert (num_regs > 0);
19879
19880 /* Create sequence for DWARF info. */
19881 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19882
19883 /* For dwarf info, we generate explicit stack update. */
19884 tmp = gen_rtx_SET (stack_pointer_rtx,
19885 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19886 RTX_FRAME_RELATED_P (tmp) = 1;
19887 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19888
19889 /* Save registers. */
19890 offset = - 4 * num_regs;
19891 j = 0;
19892 while (j <= LAST_ARM_REGNUM)
19893 if (saved_regs_mask & (1 << j))
19894 {
19895 if ((j % 2 == 0)
19896 && (saved_regs_mask & (1 << (j + 1))))
19897 {
19898 /* Current register and previous register form register pair for
19899 which STRD can be generated. */
19900 if (offset < 0)
19901 {
19902 /* Allocate stack space for all saved registers. */
19903 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19904 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19905 mem = gen_frame_mem (DImode, tmp);
19906 offset = 0;
19907 }
19908 else if (offset > 0)
19909 mem = gen_frame_mem (DImode,
19910 plus_constant (Pmode,
19911 stack_pointer_rtx,
19912 offset));
19913 else
19914 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19915
19916 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
19917 RTX_FRAME_RELATED_P (tmp) = 1;
19918 tmp = emit_insn (tmp);
19919
19920 /* Record the first store insn. */
19921 if (dwarf_index == 1)
19922 insn = tmp;
19923
19924 /* Generate dwarf info. */
19925 mem = gen_frame_mem (SImode,
19926 plus_constant (Pmode,
19927 stack_pointer_rtx,
19928 offset));
19929 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19930 RTX_FRAME_RELATED_P (tmp) = 1;
19931 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19932
19933 mem = gen_frame_mem (SImode,
19934 plus_constant (Pmode,
19935 stack_pointer_rtx,
19936 offset + 4));
19937 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
19938 RTX_FRAME_RELATED_P (tmp) = 1;
19939 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19940
19941 offset += 8;
19942 j += 2;
19943 }
19944 else
19945 {
19946 /* Emit a single word store. */
19947 if (offset < 0)
19948 {
19949 /* Allocate stack space for all saved registers. */
19950 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19951 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19952 mem = gen_frame_mem (SImode, tmp);
19953 offset = 0;
19954 }
19955 else if (offset > 0)
19956 mem = gen_frame_mem (SImode,
19957 plus_constant (Pmode,
19958 stack_pointer_rtx,
19959 offset));
19960 else
19961 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19962
19963 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19964 RTX_FRAME_RELATED_P (tmp) = 1;
19965 tmp = emit_insn (tmp);
19966
19967 /* Record the first store insn. */
19968 if (dwarf_index == 1)
19969 insn = tmp;
19970
19971 /* Generate dwarf info. */
19972 mem = gen_frame_mem (SImode,
19973 plus_constant(Pmode,
19974 stack_pointer_rtx,
19975 offset));
19976 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19977 RTX_FRAME_RELATED_P (tmp) = 1;
19978 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19979
19980 offset += 4;
19981 j += 1;
19982 }
19983 }
19984 else
19985 j++;
19986
19987 /* Attach dwarf info to the first insn we generate. */
19988 gcc_assert (insn != NULL_RTX);
19989 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19990 RTX_FRAME_RELATED_P (insn) = 1;
19991 }
19992
19993 /* Generate and emit an insn that we will recognize as a push_multi.
19994 Unfortunately, since this insn does not reflect very well the actual
19995 semantics of the operation, we need to annotate the insn for the benefit
19996 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19997 MASK for registers that should be annotated for DWARF2 frame unwind
19998 information. */
19999 static rtx
20000 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20001 {
20002 int num_regs = 0;
20003 int num_dwarf_regs = 0;
20004 int i, j;
20005 rtx par;
20006 rtx dwarf;
20007 int dwarf_par_index;
20008 rtx tmp, reg;
20009
20010 /* We don't record the PC in the dwarf frame information. */
20011 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20012
20013 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20014 {
20015 if (mask & (1 << i))
20016 num_regs++;
20017 if (dwarf_regs_mask & (1 << i))
20018 num_dwarf_regs++;
20019 }
20020
20021 gcc_assert (num_regs && num_regs <= 16);
20022 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20023
20024 /* For the body of the insn we are going to generate an UNSPEC in
20025 parallel with several USEs. This allows the insn to be recognized
20026 by the push_multi pattern in the arm.md file.
20027
20028 The body of the insn looks something like this:
20029
20030 (parallel [
20031 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20032 (const_int:SI <num>)))
20033 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20034 (use (reg:SI XX))
20035 (use (reg:SI YY))
20036 ...
20037 ])
20038
20039 For the frame note however, we try to be more explicit and actually
20040 show each register being stored into the stack frame, plus a (single)
20041 decrement of the stack pointer. We do it this way in order to be
20042 friendly to the stack unwinding code, which only wants to see a single
20043 stack decrement per instruction. The RTL we generate for the note looks
20044 something like this:
20045
20046 (sequence [
20047 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20048 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20049 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20050 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20051 ...
20052 ])
20053
20054 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20055 instead we'd have a parallel expression detailing all
20056 the stores to the various memory addresses so that debug
20057 information is more up-to-date. Remember however while writing
20058 this to take care of the constraints with the push instruction.
20059
20060 Note also that this has to be taken care of for the VFP registers.
20061
20062 For more see PR43399. */
20063
20064 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20065 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20066 dwarf_par_index = 1;
20067
20068 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20069 {
20070 if (mask & (1 << i))
20071 {
20072 reg = gen_rtx_REG (SImode, i);
20073
20074 XVECEXP (par, 0, 0)
20075 = gen_rtx_SET (gen_frame_mem
20076 (BLKmode,
20077 gen_rtx_PRE_MODIFY (Pmode,
20078 stack_pointer_rtx,
20079 plus_constant
20080 (Pmode, stack_pointer_rtx,
20081 -4 * num_regs))
20082 ),
20083 gen_rtx_UNSPEC (BLKmode,
20084 gen_rtvec (1, reg),
20085 UNSPEC_PUSH_MULT));
20086
20087 if (dwarf_regs_mask & (1 << i))
20088 {
20089 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20090 reg);
20091 RTX_FRAME_RELATED_P (tmp) = 1;
20092 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20093 }
20094
20095 break;
20096 }
20097 }
20098
20099 for (j = 1, i++; j < num_regs; i++)
20100 {
20101 if (mask & (1 << i))
20102 {
20103 reg = gen_rtx_REG (SImode, i);
20104
20105 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20106
20107 if (dwarf_regs_mask & (1 << i))
20108 {
20109 tmp
20110 = gen_rtx_SET (gen_frame_mem
20111 (SImode,
20112 plus_constant (Pmode, stack_pointer_rtx,
20113 4 * j)),
20114 reg);
20115 RTX_FRAME_RELATED_P (tmp) = 1;
20116 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20117 }
20118
20119 j++;
20120 }
20121 }
20122
20123 par = emit_insn (par);
20124
20125 tmp = gen_rtx_SET (stack_pointer_rtx,
20126 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20127 RTX_FRAME_RELATED_P (tmp) = 1;
20128 XVECEXP (dwarf, 0, 0) = tmp;
20129
20130 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20131
20132 return par;
20133 }
20134
20135 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20136 SIZE is the offset to be adjusted.
20137 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20138 static void
20139 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20140 {
20141 rtx dwarf;
20142
20143 RTX_FRAME_RELATED_P (insn) = 1;
20144 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20145 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20146 }
20147
20148 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20149 SAVED_REGS_MASK shows which registers need to be restored.
20150
20151 Unfortunately, since this insn does not reflect very well the actual
20152 semantics of the operation, we need to annotate the insn for the benefit
20153 of DWARF2 frame unwind information. */
20154 static void
20155 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20156 {
20157 int num_regs = 0;
20158 int i, j;
20159 rtx par;
20160 rtx dwarf = NULL_RTX;
20161 rtx tmp, reg;
20162 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20163 int offset_adj;
20164 int emit_update;
20165
20166 offset_adj = return_in_pc ? 1 : 0;
20167 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20168 if (saved_regs_mask & (1 << i))
20169 num_regs++;
20170
20171 gcc_assert (num_regs && num_regs <= 16);
20172
20173 /* If SP is in reglist, then we don't emit SP update insn. */
20174 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20175
20176 /* The parallel needs to hold num_regs SETs
20177 and one SET for the stack update. */
20178 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20179
20180 if (return_in_pc)
20181 XVECEXP (par, 0, 0) = ret_rtx;
20182
20183 if (emit_update)
20184 {
20185 /* Increment the stack pointer, based on there being
20186 num_regs 4-byte registers to restore. */
20187 tmp = gen_rtx_SET (stack_pointer_rtx,
20188 plus_constant (Pmode,
20189 stack_pointer_rtx,
20190 4 * num_regs));
20191 RTX_FRAME_RELATED_P (tmp) = 1;
20192 XVECEXP (par, 0, offset_adj) = tmp;
20193 }
20194
20195 /* Now restore every reg, which may include PC. */
20196 for (j = 0, i = 0; j < num_regs; i++)
20197 if (saved_regs_mask & (1 << i))
20198 {
20199 reg = gen_rtx_REG (SImode, i);
20200 if ((num_regs == 1) && emit_update && !return_in_pc)
20201 {
20202 /* Emit single load with writeback. */
20203 tmp = gen_frame_mem (SImode,
20204 gen_rtx_POST_INC (Pmode,
20205 stack_pointer_rtx));
20206 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20207 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20208 return;
20209 }
20210
20211 tmp = gen_rtx_SET (reg,
20212 gen_frame_mem
20213 (SImode,
20214 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20215 RTX_FRAME_RELATED_P (tmp) = 1;
20216 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20217
20218 /* We need to maintain a sequence for DWARF info too. As dwarf info
20219 should not have PC, skip PC. */
20220 if (i != PC_REGNUM)
20221 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20222
20223 j++;
20224 }
20225
20226 if (return_in_pc)
20227 par = emit_jump_insn (par);
20228 else
20229 par = emit_insn (par);
20230
20231 REG_NOTES (par) = dwarf;
20232 if (!return_in_pc)
20233 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20234 stack_pointer_rtx, stack_pointer_rtx);
20235 }
20236
20237 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20238 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20239
20240 Unfortunately, since this insn does not reflect very well the actual
20241 semantics of the operation, we need to annotate the insn for the benefit
20242 of DWARF2 frame unwind information. */
20243 static void
20244 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20245 {
20246 int i, j;
20247 rtx par;
20248 rtx dwarf = NULL_RTX;
20249 rtx tmp, reg;
20250
20251 gcc_assert (num_regs && num_regs <= 32);
20252
20253 /* Workaround ARM10 VFPr1 bug. */
20254 if (num_regs == 2 && !arm_arch6)
20255 {
20256 if (first_reg == 15)
20257 first_reg--;
20258
20259 num_regs++;
20260 }
20261
20262 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20263 there could be up to 32 D-registers to restore.
20264 If there are more than 16 D-registers, make two recursive calls,
20265 each of which emits one pop_multi instruction. */
20266 if (num_regs > 16)
20267 {
20268 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20269 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20270 return;
20271 }
20272
20273 /* The parallel needs to hold num_regs SETs
20274 and one SET for the stack update. */
20275 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20276
20277 /* Increment the stack pointer, based on there being
20278 num_regs 8-byte registers to restore. */
20279 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20280 RTX_FRAME_RELATED_P (tmp) = 1;
20281 XVECEXP (par, 0, 0) = tmp;
20282
20283 /* Now show every reg that will be restored, using a SET for each. */
20284 for (j = 0, i=first_reg; j < num_regs; i += 2)
20285 {
20286 reg = gen_rtx_REG (DFmode, i);
20287
20288 tmp = gen_rtx_SET (reg,
20289 gen_frame_mem
20290 (DFmode,
20291 plus_constant (Pmode, base_reg, 8 * j)));
20292 RTX_FRAME_RELATED_P (tmp) = 1;
20293 XVECEXP (par, 0, j + 1) = tmp;
20294
20295 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20296
20297 j++;
20298 }
20299
20300 par = emit_insn (par);
20301 REG_NOTES (par) = dwarf;
20302
20303 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20304 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20305 {
20306 RTX_FRAME_RELATED_P (par) = 1;
20307 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20308 }
20309 else
20310 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20311 base_reg, base_reg);
20312 }
20313
20314 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20315 number of registers are being popped, multiple LDRD patterns are created for
20316 all register pairs. If odd number of registers are popped, last register is
20317 loaded by using LDR pattern. */
20318 static void
20319 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20320 {
20321 int num_regs = 0;
20322 int i, j;
20323 rtx par = NULL_RTX;
20324 rtx dwarf = NULL_RTX;
20325 rtx tmp, reg, tmp1;
20326 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20327
20328 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20329 if (saved_regs_mask & (1 << i))
20330 num_regs++;
20331
20332 gcc_assert (num_regs && num_regs <= 16);
20333
20334 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20335 to be popped. So, if num_regs is even, now it will become odd,
20336 and we can generate pop with PC. If num_regs is odd, it will be
20337 even now, and ldr with return can be generated for PC. */
20338 if (return_in_pc)
20339 num_regs--;
20340
20341 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20342
20343 /* Var j iterates over all the registers to gather all the registers in
20344 saved_regs_mask. Var i gives index of saved registers in stack frame.
20345 A PARALLEL RTX of register-pair is created here, so that pattern for
20346 LDRD can be matched. As PC is always last register to be popped, and
20347 we have already decremented num_regs if PC, we don't have to worry
20348 about PC in this loop. */
20349 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20350 if (saved_regs_mask & (1 << j))
20351 {
20352 /* Create RTX for memory load. */
20353 reg = gen_rtx_REG (SImode, j);
20354 tmp = gen_rtx_SET (reg,
20355 gen_frame_mem (SImode,
20356 plus_constant (Pmode,
20357 stack_pointer_rtx, 4 * i)));
20358 RTX_FRAME_RELATED_P (tmp) = 1;
20359
20360 if (i % 2 == 0)
20361 {
20362 /* When saved-register index (i) is even, the RTX to be emitted is
20363 yet to be created. Hence create it first. The LDRD pattern we
20364 are generating is :
20365 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20366 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20367 where target registers need not be consecutive. */
20368 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20369 dwarf = NULL_RTX;
20370 }
20371
20372 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20373 added as 0th element and if i is odd, reg_i is added as 1st element
20374 of LDRD pattern shown above. */
20375 XVECEXP (par, 0, (i % 2)) = tmp;
20376 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20377
20378 if ((i % 2) == 1)
20379 {
20380 /* When saved-register index (i) is odd, RTXs for both the registers
20381 to be loaded are generated in above given LDRD pattern, and the
20382 pattern can be emitted now. */
20383 par = emit_insn (par);
20384 REG_NOTES (par) = dwarf;
20385 RTX_FRAME_RELATED_P (par) = 1;
20386 }
20387
20388 i++;
20389 }
20390
20391 /* If the number of registers pushed is odd AND return_in_pc is false OR
20392 number of registers are even AND return_in_pc is true, last register is
20393 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20394 then LDR with post increment. */
20395
20396 /* Increment the stack pointer, based on there being
20397 num_regs 4-byte registers to restore. */
20398 tmp = gen_rtx_SET (stack_pointer_rtx,
20399 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20400 RTX_FRAME_RELATED_P (tmp) = 1;
20401 tmp = emit_insn (tmp);
20402 if (!return_in_pc)
20403 {
20404 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20405 stack_pointer_rtx, stack_pointer_rtx);
20406 }
20407
20408 dwarf = NULL_RTX;
20409
20410 if (((num_regs % 2) == 1 && !return_in_pc)
20411 || ((num_regs % 2) == 0 && return_in_pc))
20412 {
20413 /* Scan for the single register to be popped. Skip until the saved
20414 register is found. */
20415 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20416
20417 /* Gen LDR with post increment here. */
20418 tmp1 = gen_rtx_MEM (SImode,
20419 gen_rtx_POST_INC (SImode,
20420 stack_pointer_rtx));
20421 set_mem_alias_set (tmp1, get_frame_alias_set ());
20422
20423 reg = gen_rtx_REG (SImode, j);
20424 tmp = gen_rtx_SET (reg, tmp1);
20425 RTX_FRAME_RELATED_P (tmp) = 1;
20426 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20427
20428 if (return_in_pc)
20429 {
20430 /* If return_in_pc, j must be PC_REGNUM. */
20431 gcc_assert (j == PC_REGNUM);
20432 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20433 XVECEXP (par, 0, 0) = ret_rtx;
20434 XVECEXP (par, 0, 1) = tmp;
20435 par = emit_jump_insn (par);
20436 }
20437 else
20438 {
20439 par = emit_insn (tmp);
20440 REG_NOTES (par) = dwarf;
20441 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20442 stack_pointer_rtx, stack_pointer_rtx);
20443 }
20444
20445 }
20446 else if ((num_regs % 2) == 1 && return_in_pc)
20447 {
20448 /* There are 2 registers to be popped. So, generate the pattern
20449 pop_multiple_with_stack_update_and_return to pop in PC. */
20450 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20451 }
20452
20453 return;
20454 }
20455
20456 /* LDRD in ARM mode needs consecutive registers as operands. This function
20457 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20458 offset addressing and then generates one separate stack udpate. This provides
20459 more scheduling freedom, compared to writeback on every load. However,
20460 if the function returns using load into PC directly
20461 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20462 before the last load. TODO: Add a peephole optimization to recognize
20463 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20464 peephole optimization to merge the load at stack-offset zero
20465 with the stack update instruction using load with writeback
20466 in post-index addressing mode. */
20467 static void
20468 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20469 {
20470 int j = 0;
20471 int offset = 0;
20472 rtx par = NULL_RTX;
20473 rtx dwarf = NULL_RTX;
20474 rtx tmp, mem;
20475
20476 /* Restore saved registers. */
20477 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20478 j = 0;
20479 while (j <= LAST_ARM_REGNUM)
20480 if (saved_regs_mask & (1 << j))
20481 {
20482 if ((j % 2) == 0
20483 && (saved_regs_mask & (1 << (j + 1)))
20484 && (j + 1) != PC_REGNUM)
20485 {
20486 /* Current register and next register form register pair for which
20487 LDRD can be generated. PC is always the last register popped, and
20488 we handle it separately. */
20489 if (offset > 0)
20490 mem = gen_frame_mem (DImode,
20491 plus_constant (Pmode,
20492 stack_pointer_rtx,
20493 offset));
20494 else
20495 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20496
20497 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20498 tmp = emit_insn (tmp);
20499 RTX_FRAME_RELATED_P (tmp) = 1;
20500
20501 /* Generate dwarf info. */
20502
20503 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20504 gen_rtx_REG (SImode, j),
20505 NULL_RTX);
20506 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20507 gen_rtx_REG (SImode, j + 1),
20508 dwarf);
20509
20510 REG_NOTES (tmp) = dwarf;
20511
20512 offset += 8;
20513 j += 2;
20514 }
20515 else if (j != PC_REGNUM)
20516 {
20517 /* Emit a single word load. */
20518 if (offset > 0)
20519 mem = gen_frame_mem (SImode,
20520 plus_constant (Pmode,
20521 stack_pointer_rtx,
20522 offset));
20523 else
20524 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20525
20526 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20527 tmp = emit_insn (tmp);
20528 RTX_FRAME_RELATED_P (tmp) = 1;
20529
20530 /* Generate dwarf info. */
20531 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20532 gen_rtx_REG (SImode, j),
20533 NULL_RTX);
20534
20535 offset += 4;
20536 j += 1;
20537 }
20538 else /* j == PC_REGNUM */
20539 j++;
20540 }
20541 else
20542 j++;
20543
20544 /* Update the stack. */
20545 if (offset > 0)
20546 {
20547 tmp = gen_rtx_SET (stack_pointer_rtx,
20548 plus_constant (Pmode,
20549 stack_pointer_rtx,
20550 offset));
20551 tmp = emit_insn (tmp);
20552 arm_add_cfa_adjust_cfa_note (tmp, offset,
20553 stack_pointer_rtx, stack_pointer_rtx);
20554 offset = 0;
20555 }
20556
20557 if (saved_regs_mask & (1 << PC_REGNUM))
20558 {
20559 /* Only PC is to be popped. */
20560 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20561 XVECEXP (par, 0, 0) = ret_rtx;
20562 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20563 gen_frame_mem (SImode,
20564 gen_rtx_POST_INC (SImode,
20565 stack_pointer_rtx)));
20566 RTX_FRAME_RELATED_P (tmp) = 1;
20567 XVECEXP (par, 0, 1) = tmp;
20568 par = emit_jump_insn (par);
20569
20570 /* Generate dwarf info. */
20571 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20572 gen_rtx_REG (SImode, PC_REGNUM),
20573 NULL_RTX);
20574 REG_NOTES (par) = dwarf;
20575 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20576 stack_pointer_rtx, stack_pointer_rtx);
20577 }
20578 }
20579
20580 /* Calculate the size of the return value that is passed in registers. */
20581 static unsigned
20582 arm_size_return_regs (void)
20583 {
20584 machine_mode mode;
20585
20586 if (crtl->return_rtx != 0)
20587 mode = GET_MODE (crtl->return_rtx);
20588 else
20589 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20590
20591 return GET_MODE_SIZE (mode);
20592 }
20593
20594 /* Return true if the current function needs to save/restore LR. */
20595 static bool
20596 thumb_force_lr_save (void)
20597 {
20598 return !cfun->machine->lr_save_eliminated
20599 && (!leaf_function_p ()
20600 || thumb_far_jump_used_p ()
20601 || df_regs_ever_live_p (LR_REGNUM));
20602 }
20603
20604 /* We do not know if r3 will be available because
20605 we do have an indirect tailcall happening in this
20606 particular case. */
20607 static bool
20608 is_indirect_tailcall_p (rtx call)
20609 {
20610 rtx pat = PATTERN (call);
20611
20612 /* Indirect tail call. */
20613 pat = XVECEXP (pat, 0, 0);
20614 if (GET_CODE (pat) == SET)
20615 pat = SET_SRC (pat);
20616
20617 pat = XEXP (XEXP (pat, 0), 0);
20618 return REG_P (pat);
20619 }
20620
20621 /* Return true if r3 is used by any of the tail call insns in the
20622 current function. */
20623 static bool
20624 any_sibcall_could_use_r3 (void)
20625 {
20626 edge_iterator ei;
20627 edge e;
20628
20629 if (!crtl->tail_call_emit)
20630 return false;
20631 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20632 if (e->flags & EDGE_SIBCALL)
20633 {
20634 rtx call = BB_END (e->src);
20635 if (!CALL_P (call))
20636 call = prev_nonnote_nondebug_insn (call);
20637 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20638 if (find_regno_fusage (call, USE, 3)
20639 || is_indirect_tailcall_p (call))
20640 return true;
20641 }
20642 return false;
20643 }
20644
20645
20646 /* Compute the distance from register FROM to register TO.
20647 These can be the arg pointer (26), the soft frame pointer (25),
20648 the stack pointer (13) or the hard frame pointer (11).
20649 In thumb mode r7 is used as the soft frame pointer, if needed.
20650 Typical stack layout looks like this:
20651
20652 old stack pointer -> | |
20653 ----
20654 | | \
20655 | | saved arguments for
20656 | | vararg functions
20657 | | /
20658 --
20659 hard FP & arg pointer -> | | \
20660 | | stack
20661 | | frame
20662 | | /
20663 --
20664 | | \
20665 | | call saved
20666 | | registers
20667 soft frame pointer -> | | /
20668 --
20669 | | \
20670 | | local
20671 | | variables
20672 locals base pointer -> | | /
20673 --
20674 | | \
20675 | | outgoing
20676 | | arguments
20677 current stack pointer -> | | /
20678 --
20679
20680 For a given function some or all of these stack components
20681 may not be needed, giving rise to the possibility of
20682 eliminating some of the registers.
20683
20684 The values returned by this function must reflect the behavior
20685 of arm_expand_prologue() and arm_compute_save_reg_mask().
20686
20687 The sign of the number returned reflects the direction of stack
20688 growth, so the values are positive for all eliminations except
20689 from the soft frame pointer to the hard frame pointer.
20690
20691 SFP may point just inside the local variables block to ensure correct
20692 alignment. */
20693
20694
20695 /* Calculate stack offsets. These are used to calculate register elimination
20696 offsets and in prologue/epilogue code. Also calculates which registers
20697 should be saved. */
20698
20699 static arm_stack_offsets *
20700 arm_get_frame_offsets (void)
20701 {
20702 struct arm_stack_offsets *offsets;
20703 unsigned long func_type;
20704 int leaf;
20705 int saved;
20706 int core_saved;
20707 HOST_WIDE_INT frame_size;
20708 int i;
20709
20710 offsets = &cfun->machine->stack_offsets;
20711
20712 /* We need to know if we are a leaf function. Unfortunately, it
20713 is possible to be called after start_sequence has been called,
20714 which causes get_insns to return the insns for the sequence,
20715 not the function, which will cause leaf_function_p to return
20716 the incorrect result.
20717
20718 to know about leaf functions once reload has completed, and the
20719 frame size cannot be changed after that time, so we can safely
20720 use the cached value. */
20721
20722 if (reload_completed)
20723 return offsets;
20724
20725 /* Initially this is the size of the local variables. It will translated
20726 into an offset once we have determined the size of preceding data. */
20727 frame_size = ROUND_UP_WORD (get_frame_size ());
20728
20729 leaf = leaf_function_p ();
20730
20731 /* Space for variadic functions. */
20732 offsets->saved_args = crtl->args.pretend_args_size;
20733
20734 /* In Thumb mode this is incorrect, but never used. */
20735 offsets->frame
20736 = (offsets->saved_args
20737 + arm_compute_static_chain_stack_bytes ()
20738 + (frame_pointer_needed ? 4 : 0));
20739
20740 if (TARGET_32BIT)
20741 {
20742 unsigned int regno;
20743
20744 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20745 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20746 saved = core_saved;
20747
20748 /* We know that SP will be doubleword aligned on entry, and we must
20749 preserve that condition at any subroutine call. We also require the
20750 soft frame pointer to be doubleword aligned. */
20751
20752 if (TARGET_REALLY_IWMMXT)
20753 {
20754 /* Check for the call-saved iWMMXt registers. */
20755 for (regno = FIRST_IWMMXT_REGNUM;
20756 regno <= LAST_IWMMXT_REGNUM;
20757 regno++)
20758 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20759 saved += 8;
20760 }
20761
20762 func_type = arm_current_func_type ();
20763 /* Space for saved VFP registers. */
20764 if (! IS_VOLATILE (func_type)
20765 && TARGET_HARD_FLOAT && TARGET_VFP)
20766 saved += arm_get_vfp_saved_size ();
20767 }
20768 else /* TARGET_THUMB1 */
20769 {
20770 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20771 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20772 saved = core_saved;
20773 if (TARGET_BACKTRACE)
20774 saved += 16;
20775 }
20776
20777 /* Saved registers include the stack frame. */
20778 offsets->saved_regs
20779 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20780 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20781
20782 /* A leaf function does not need any stack alignment if it has nothing
20783 on the stack. */
20784 if (leaf && frame_size == 0
20785 /* However if it calls alloca(), we have a dynamically allocated
20786 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20787 && ! cfun->calls_alloca)
20788 {
20789 offsets->outgoing_args = offsets->soft_frame;
20790 offsets->locals_base = offsets->soft_frame;
20791 return offsets;
20792 }
20793
20794 /* Ensure SFP has the correct alignment. */
20795 if (ARM_DOUBLEWORD_ALIGN
20796 && (offsets->soft_frame & 7))
20797 {
20798 offsets->soft_frame += 4;
20799 /* Try to align stack by pushing an extra reg. Don't bother doing this
20800 when there is a stack frame as the alignment will be rolled into
20801 the normal stack adjustment. */
20802 if (frame_size + crtl->outgoing_args_size == 0)
20803 {
20804 int reg = -1;
20805
20806 /* Register r3 is caller-saved. Normally it does not need to be
20807 saved on entry by the prologue. However if we choose to save
20808 it for padding then we may confuse the compiler into thinking
20809 a prologue sequence is required when in fact it is not. This
20810 will occur when shrink-wrapping if r3 is used as a scratch
20811 register and there are no other callee-saved writes.
20812
20813 This situation can be avoided when other callee-saved registers
20814 are available and r3 is not mandatory if we choose a callee-saved
20815 register for padding. */
20816 bool prefer_callee_reg_p = false;
20817
20818 /* If it is safe to use r3, then do so. This sometimes
20819 generates better code on Thumb-2 by avoiding the need to
20820 use 32-bit push/pop instructions. */
20821 if (! any_sibcall_could_use_r3 ()
20822 && arm_size_return_regs () <= 12
20823 && (offsets->saved_regs_mask & (1 << 3)) == 0
20824 && (TARGET_THUMB2
20825 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20826 {
20827 reg = 3;
20828 if (!TARGET_THUMB2)
20829 prefer_callee_reg_p = true;
20830 }
20831 if (reg == -1
20832 || prefer_callee_reg_p)
20833 {
20834 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20835 {
20836 /* Avoid fixed registers; they may be changed at
20837 arbitrary times so it's unsafe to restore them
20838 during the epilogue. */
20839 if (!fixed_regs[i]
20840 && (offsets->saved_regs_mask & (1 << i)) == 0)
20841 {
20842 reg = i;
20843 break;
20844 }
20845 }
20846 }
20847
20848 if (reg != -1)
20849 {
20850 offsets->saved_regs += 4;
20851 offsets->saved_regs_mask |= (1 << reg);
20852 }
20853 }
20854 }
20855
20856 offsets->locals_base = offsets->soft_frame + frame_size;
20857 offsets->outgoing_args = (offsets->locals_base
20858 + crtl->outgoing_args_size);
20859
20860 if (ARM_DOUBLEWORD_ALIGN)
20861 {
20862 /* Ensure SP remains doubleword aligned. */
20863 if (offsets->outgoing_args & 7)
20864 offsets->outgoing_args += 4;
20865 gcc_assert (!(offsets->outgoing_args & 7));
20866 }
20867
20868 return offsets;
20869 }
20870
20871
20872 /* Calculate the relative offsets for the different stack pointers. Positive
20873 offsets are in the direction of stack growth. */
20874
20875 HOST_WIDE_INT
20876 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20877 {
20878 arm_stack_offsets *offsets;
20879
20880 offsets = arm_get_frame_offsets ();
20881
20882 /* OK, now we have enough information to compute the distances.
20883 There must be an entry in these switch tables for each pair
20884 of registers in ELIMINABLE_REGS, even if some of the entries
20885 seem to be redundant or useless. */
20886 switch (from)
20887 {
20888 case ARG_POINTER_REGNUM:
20889 switch (to)
20890 {
20891 case THUMB_HARD_FRAME_POINTER_REGNUM:
20892 return 0;
20893
20894 case FRAME_POINTER_REGNUM:
20895 /* This is the reverse of the soft frame pointer
20896 to hard frame pointer elimination below. */
20897 return offsets->soft_frame - offsets->saved_args;
20898
20899 case ARM_HARD_FRAME_POINTER_REGNUM:
20900 /* This is only non-zero in the case where the static chain register
20901 is stored above the frame. */
20902 return offsets->frame - offsets->saved_args - 4;
20903
20904 case STACK_POINTER_REGNUM:
20905 /* If nothing has been pushed on the stack at all
20906 then this will return -4. This *is* correct! */
20907 return offsets->outgoing_args - (offsets->saved_args + 4);
20908
20909 default:
20910 gcc_unreachable ();
20911 }
20912 gcc_unreachable ();
20913
20914 case FRAME_POINTER_REGNUM:
20915 switch (to)
20916 {
20917 case THUMB_HARD_FRAME_POINTER_REGNUM:
20918 return 0;
20919
20920 case ARM_HARD_FRAME_POINTER_REGNUM:
20921 /* The hard frame pointer points to the top entry in the
20922 stack frame. The soft frame pointer to the bottom entry
20923 in the stack frame. If there is no stack frame at all,
20924 then they are identical. */
20925
20926 return offsets->frame - offsets->soft_frame;
20927
20928 case STACK_POINTER_REGNUM:
20929 return offsets->outgoing_args - offsets->soft_frame;
20930
20931 default:
20932 gcc_unreachable ();
20933 }
20934 gcc_unreachable ();
20935
20936 default:
20937 /* You cannot eliminate from the stack pointer.
20938 In theory you could eliminate from the hard frame
20939 pointer to the stack pointer, but this will never
20940 happen, since if a stack frame is not needed the
20941 hard frame pointer will never be used. */
20942 gcc_unreachable ();
20943 }
20944 }
20945
20946 /* Given FROM and TO register numbers, say whether this elimination is
20947 allowed. Frame pointer elimination is automatically handled.
20948
20949 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20950 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20951 pointer, we must eliminate FRAME_POINTER_REGNUM into
20952 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20953 ARG_POINTER_REGNUM. */
20954
20955 bool
20956 arm_can_eliminate (const int from, const int to)
20957 {
20958 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20959 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20960 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20961 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20962 true);
20963 }
20964
20965 /* Emit RTL to save coprocessor registers on function entry. Returns the
20966 number of bytes pushed. */
20967
20968 static int
20969 arm_save_coproc_regs(void)
20970 {
20971 int saved_size = 0;
20972 unsigned reg;
20973 unsigned start_reg;
20974 rtx insn;
20975
20976 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20977 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20978 {
20979 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20980 insn = gen_rtx_MEM (V2SImode, insn);
20981 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20982 RTX_FRAME_RELATED_P (insn) = 1;
20983 saved_size += 8;
20984 }
20985
20986 if (TARGET_HARD_FLOAT && TARGET_VFP)
20987 {
20988 start_reg = FIRST_VFP_REGNUM;
20989
20990 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20991 {
20992 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20993 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20994 {
20995 if (start_reg != reg)
20996 saved_size += vfp_emit_fstmd (start_reg,
20997 (reg - start_reg) / 2);
20998 start_reg = reg + 2;
20999 }
21000 }
21001 if (start_reg != reg)
21002 saved_size += vfp_emit_fstmd (start_reg,
21003 (reg - start_reg) / 2);
21004 }
21005 return saved_size;
21006 }
21007
21008
21009 /* Set the Thumb frame pointer from the stack pointer. */
21010
21011 static void
21012 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21013 {
21014 HOST_WIDE_INT amount;
21015 rtx insn, dwarf;
21016
21017 amount = offsets->outgoing_args - offsets->locals_base;
21018 if (amount < 1024)
21019 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21020 stack_pointer_rtx, GEN_INT (amount)));
21021 else
21022 {
21023 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21024 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21025 expects the first two operands to be the same. */
21026 if (TARGET_THUMB2)
21027 {
21028 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21029 stack_pointer_rtx,
21030 hard_frame_pointer_rtx));
21031 }
21032 else
21033 {
21034 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21035 hard_frame_pointer_rtx,
21036 stack_pointer_rtx));
21037 }
21038 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21039 plus_constant (Pmode, stack_pointer_rtx, amount));
21040 RTX_FRAME_RELATED_P (dwarf) = 1;
21041 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21042 }
21043
21044 RTX_FRAME_RELATED_P (insn) = 1;
21045 }
21046
21047 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21048 function. */
21049 void
21050 arm_expand_prologue (void)
21051 {
21052 rtx amount;
21053 rtx insn;
21054 rtx ip_rtx;
21055 unsigned long live_regs_mask;
21056 unsigned long func_type;
21057 int fp_offset = 0;
21058 int saved_pretend_args = 0;
21059 int saved_regs = 0;
21060 unsigned HOST_WIDE_INT args_to_push;
21061 arm_stack_offsets *offsets;
21062
21063 func_type = arm_current_func_type ();
21064
21065 /* Naked functions don't have prologues. */
21066 if (IS_NAKED (func_type))
21067 return;
21068
21069 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21070 args_to_push = crtl->args.pretend_args_size;
21071
21072 /* Compute which register we will have to save onto the stack. */
21073 offsets = arm_get_frame_offsets ();
21074 live_regs_mask = offsets->saved_regs_mask;
21075
21076 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21077
21078 if (IS_STACKALIGN (func_type))
21079 {
21080 rtx r0, r1;
21081
21082 /* Handle a word-aligned stack pointer. We generate the following:
21083
21084 mov r0, sp
21085 bic r1, r0, #7
21086 mov sp, r1
21087 <save and restore r0 in normal prologue/epilogue>
21088 mov sp, r0
21089 bx lr
21090
21091 The unwinder doesn't need to know about the stack realignment.
21092 Just tell it we saved SP in r0. */
21093 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21094
21095 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21096 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21097
21098 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21099 RTX_FRAME_RELATED_P (insn) = 1;
21100 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21101
21102 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21103
21104 /* ??? The CFA changes here, which may cause GDB to conclude that it
21105 has entered a different function. That said, the unwind info is
21106 correct, individually, before and after this instruction because
21107 we've described the save of SP, which will override the default
21108 handling of SP as restoring from the CFA. */
21109 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21110 }
21111
21112 /* For APCS frames, if IP register is clobbered
21113 when creating frame, save that register in a special
21114 way. */
21115 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21116 {
21117 if (IS_INTERRUPT (func_type))
21118 {
21119 /* Interrupt functions must not corrupt any registers.
21120 Creating a frame pointer however, corrupts the IP
21121 register, so we must push it first. */
21122 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21123
21124 /* Do not set RTX_FRAME_RELATED_P on this insn.
21125 The dwarf stack unwinding code only wants to see one
21126 stack decrement per function, and this is not it. If
21127 this instruction is labeled as being part of the frame
21128 creation sequence then dwarf2out_frame_debug_expr will
21129 die when it encounters the assignment of IP to FP
21130 later on, since the use of SP here establishes SP as
21131 the CFA register and not IP.
21132
21133 Anyway this instruction is not really part of the stack
21134 frame creation although it is part of the prologue. */
21135 }
21136 else if (IS_NESTED (func_type))
21137 {
21138 /* The static chain register is the same as the IP register
21139 used as a scratch register during stack frame creation.
21140 To get around this need to find somewhere to store IP
21141 whilst the frame is being created. We try the following
21142 places in order:
21143
21144 1. The last argument register r3 if it is available.
21145 2. A slot on the stack above the frame if there are no
21146 arguments to push onto the stack.
21147 3. Register r3 again, after pushing the argument registers
21148 onto the stack, if this is a varargs function.
21149 4. The last slot on the stack created for the arguments to
21150 push, if this isn't a varargs function.
21151
21152 Note - we only need to tell the dwarf2 backend about the SP
21153 adjustment in the second variant; the static chain register
21154 doesn't need to be unwound, as it doesn't contain a value
21155 inherited from the caller. */
21156
21157 if (!arm_r3_live_at_start_p ())
21158 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21159 else if (args_to_push == 0)
21160 {
21161 rtx addr, dwarf;
21162
21163 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21164 saved_regs += 4;
21165
21166 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21167 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21168 fp_offset = 4;
21169
21170 /* Just tell the dwarf backend that we adjusted SP. */
21171 dwarf = gen_rtx_SET (stack_pointer_rtx,
21172 plus_constant (Pmode, stack_pointer_rtx,
21173 -fp_offset));
21174 RTX_FRAME_RELATED_P (insn) = 1;
21175 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21176 }
21177 else
21178 {
21179 /* Store the args on the stack. */
21180 if (cfun->machine->uses_anonymous_args)
21181 {
21182 insn
21183 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21184 (0xf0 >> (args_to_push / 4)) & 0xf);
21185 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21186 saved_pretend_args = 1;
21187 }
21188 else
21189 {
21190 rtx addr, dwarf;
21191
21192 if (args_to_push == 4)
21193 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21194 else
21195 addr
21196 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21197 plus_constant (Pmode,
21198 stack_pointer_rtx,
21199 -args_to_push));
21200
21201 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21202
21203 /* Just tell the dwarf backend that we adjusted SP. */
21204 dwarf
21205 = gen_rtx_SET (stack_pointer_rtx,
21206 plus_constant (Pmode, stack_pointer_rtx,
21207 -args_to_push));
21208 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21209 }
21210
21211 RTX_FRAME_RELATED_P (insn) = 1;
21212 fp_offset = args_to_push;
21213 args_to_push = 0;
21214 }
21215 }
21216
21217 insn = emit_set_insn (ip_rtx,
21218 plus_constant (Pmode, stack_pointer_rtx,
21219 fp_offset));
21220 RTX_FRAME_RELATED_P (insn) = 1;
21221 }
21222
21223 if (args_to_push)
21224 {
21225 /* Push the argument registers, or reserve space for them. */
21226 if (cfun->machine->uses_anonymous_args)
21227 insn = emit_multi_reg_push
21228 ((0xf0 >> (args_to_push / 4)) & 0xf,
21229 (0xf0 >> (args_to_push / 4)) & 0xf);
21230 else
21231 insn = emit_insn
21232 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21233 GEN_INT (- args_to_push)));
21234 RTX_FRAME_RELATED_P (insn) = 1;
21235 }
21236
21237 /* If this is an interrupt service routine, and the link register
21238 is going to be pushed, and we're not generating extra
21239 push of IP (needed when frame is needed and frame layout if apcs),
21240 subtracting four from LR now will mean that the function return
21241 can be done with a single instruction. */
21242 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21243 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21244 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21245 && TARGET_ARM)
21246 {
21247 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21248
21249 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21250 }
21251
21252 if (live_regs_mask)
21253 {
21254 unsigned long dwarf_regs_mask = live_regs_mask;
21255
21256 saved_regs += bit_count (live_regs_mask) * 4;
21257 if (optimize_size && !frame_pointer_needed
21258 && saved_regs == offsets->saved_regs - offsets->saved_args)
21259 {
21260 /* If no coprocessor registers are being pushed and we don't have
21261 to worry about a frame pointer then push extra registers to
21262 create the stack frame. This is done is a way that does not
21263 alter the frame layout, so is independent of the epilogue. */
21264 int n;
21265 int frame;
21266 n = 0;
21267 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21268 n++;
21269 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21270 if (frame && n * 4 >= frame)
21271 {
21272 n = frame / 4;
21273 live_regs_mask |= (1 << n) - 1;
21274 saved_regs += frame;
21275 }
21276 }
21277
21278 if (TARGET_LDRD
21279 && current_tune->prefer_ldrd_strd
21280 && !optimize_function_for_size_p (cfun))
21281 {
21282 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21283 if (TARGET_THUMB2)
21284 thumb2_emit_strd_push (live_regs_mask);
21285 else if (TARGET_ARM
21286 && !TARGET_APCS_FRAME
21287 && !IS_INTERRUPT (func_type))
21288 arm_emit_strd_push (live_regs_mask);
21289 else
21290 {
21291 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21292 RTX_FRAME_RELATED_P (insn) = 1;
21293 }
21294 }
21295 else
21296 {
21297 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21298 RTX_FRAME_RELATED_P (insn) = 1;
21299 }
21300 }
21301
21302 if (! IS_VOLATILE (func_type))
21303 saved_regs += arm_save_coproc_regs ();
21304
21305 if (frame_pointer_needed && TARGET_ARM)
21306 {
21307 /* Create the new frame pointer. */
21308 if (TARGET_APCS_FRAME)
21309 {
21310 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21311 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21312 RTX_FRAME_RELATED_P (insn) = 1;
21313
21314 if (IS_NESTED (func_type))
21315 {
21316 /* Recover the static chain register. */
21317 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21318 insn = gen_rtx_REG (SImode, 3);
21319 else
21320 {
21321 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21322 insn = gen_frame_mem (SImode, insn);
21323 }
21324 emit_set_insn (ip_rtx, insn);
21325 /* Add a USE to stop propagate_one_insn() from barfing. */
21326 emit_insn (gen_force_register_use (ip_rtx));
21327 }
21328 }
21329 else
21330 {
21331 insn = GEN_INT (saved_regs - 4);
21332 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21333 stack_pointer_rtx, insn));
21334 RTX_FRAME_RELATED_P (insn) = 1;
21335 }
21336 }
21337
21338 if (flag_stack_usage_info)
21339 current_function_static_stack_size
21340 = offsets->outgoing_args - offsets->saved_args;
21341
21342 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21343 {
21344 /* This add can produce multiple insns for a large constant, so we
21345 need to get tricky. */
21346 rtx_insn *last = get_last_insn ();
21347
21348 amount = GEN_INT (offsets->saved_args + saved_regs
21349 - offsets->outgoing_args);
21350
21351 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21352 amount));
21353 do
21354 {
21355 last = last ? NEXT_INSN (last) : get_insns ();
21356 RTX_FRAME_RELATED_P (last) = 1;
21357 }
21358 while (last != insn);
21359
21360 /* If the frame pointer is needed, emit a special barrier that
21361 will prevent the scheduler from moving stores to the frame
21362 before the stack adjustment. */
21363 if (frame_pointer_needed)
21364 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21365 hard_frame_pointer_rtx));
21366 }
21367
21368
21369 if (frame_pointer_needed && TARGET_THUMB2)
21370 thumb_set_frame_pointer (offsets);
21371
21372 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21373 {
21374 unsigned long mask;
21375
21376 mask = live_regs_mask;
21377 mask &= THUMB2_WORK_REGS;
21378 if (!IS_NESTED (func_type))
21379 mask |= (1 << IP_REGNUM);
21380 arm_load_pic_register (mask);
21381 }
21382
21383 /* If we are profiling, make sure no instructions are scheduled before
21384 the call to mcount. Similarly if the user has requested no
21385 scheduling in the prolog. Similarly if we want non-call exceptions
21386 using the EABI unwinder, to prevent faulting instructions from being
21387 swapped with a stack adjustment. */
21388 if (crtl->profile || !TARGET_SCHED_PROLOG
21389 || (arm_except_unwind_info (&global_options) == UI_TARGET
21390 && cfun->can_throw_non_call_exceptions))
21391 emit_insn (gen_blockage ());
21392
21393 /* If the link register is being kept alive, with the return address in it,
21394 then make sure that it does not get reused by the ce2 pass. */
21395 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21396 cfun->machine->lr_save_eliminated = 1;
21397 }
21398 \f
21399 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21400 static void
21401 arm_print_condition (FILE *stream)
21402 {
21403 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21404 {
21405 /* Branch conversion is not implemented for Thumb-2. */
21406 if (TARGET_THUMB)
21407 {
21408 output_operand_lossage ("predicated Thumb instruction");
21409 return;
21410 }
21411 if (current_insn_predicate != NULL)
21412 {
21413 output_operand_lossage
21414 ("predicated instruction in conditional sequence");
21415 return;
21416 }
21417
21418 fputs (arm_condition_codes[arm_current_cc], stream);
21419 }
21420 else if (current_insn_predicate)
21421 {
21422 enum arm_cond_code code;
21423
21424 if (TARGET_THUMB1)
21425 {
21426 output_operand_lossage ("predicated Thumb instruction");
21427 return;
21428 }
21429
21430 code = get_arm_condition_code (current_insn_predicate);
21431 fputs (arm_condition_codes[code], stream);
21432 }
21433 }
21434
21435
21436 /* Globally reserved letters: acln
21437 Puncutation letters currently used: @_|?().!#
21438 Lower case letters currently used: bcdefhimpqtvwxyz
21439 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21440 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21441
21442 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21443
21444 If CODE is 'd', then the X is a condition operand and the instruction
21445 should only be executed if the condition is true.
21446 if CODE is 'D', then the X is a condition operand and the instruction
21447 should only be executed if the condition is false: however, if the mode
21448 of the comparison is CCFPEmode, then always execute the instruction -- we
21449 do this because in these circumstances !GE does not necessarily imply LT;
21450 in these cases the instruction pattern will take care to make sure that
21451 an instruction containing %d will follow, thereby undoing the effects of
21452 doing this instruction unconditionally.
21453 If CODE is 'N' then X is a floating point operand that must be negated
21454 before output.
21455 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21456 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21457 static void
21458 arm_print_operand (FILE *stream, rtx x, int code)
21459 {
21460 switch (code)
21461 {
21462 case '@':
21463 fputs (ASM_COMMENT_START, stream);
21464 return;
21465
21466 case '_':
21467 fputs (user_label_prefix, stream);
21468 return;
21469
21470 case '|':
21471 fputs (REGISTER_PREFIX, stream);
21472 return;
21473
21474 case '?':
21475 arm_print_condition (stream);
21476 return;
21477
21478 case '(':
21479 /* Nothing in unified syntax, otherwise the current condition code. */
21480 if (!TARGET_UNIFIED_ASM)
21481 arm_print_condition (stream);
21482 break;
21483
21484 case ')':
21485 /* The current condition code in unified syntax, otherwise nothing. */
21486 if (TARGET_UNIFIED_ASM)
21487 arm_print_condition (stream);
21488 break;
21489
21490 case '.':
21491 /* The current condition code for a condition code setting instruction.
21492 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21493 if (TARGET_UNIFIED_ASM)
21494 {
21495 fputc('s', stream);
21496 arm_print_condition (stream);
21497 }
21498 else
21499 {
21500 arm_print_condition (stream);
21501 fputc('s', stream);
21502 }
21503 return;
21504
21505 case '!':
21506 /* If the instruction is conditionally executed then print
21507 the current condition code, otherwise print 's'. */
21508 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21509 if (current_insn_predicate)
21510 arm_print_condition (stream);
21511 else
21512 fputc('s', stream);
21513 break;
21514
21515 /* %# is a "break" sequence. It doesn't output anything, but is used to
21516 separate e.g. operand numbers from following text, if that text consists
21517 of further digits which we don't want to be part of the operand
21518 number. */
21519 case '#':
21520 return;
21521
21522 case 'N':
21523 {
21524 REAL_VALUE_TYPE r;
21525 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21526 r = real_value_negate (&r);
21527 fprintf (stream, "%s", fp_const_from_val (&r));
21528 }
21529 return;
21530
21531 /* An integer or symbol address without a preceding # sign. */
21532 case 'c':
21533 switch (GET_CODE (x))
21534 {
21535 case CONST_INT:
21536 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21537 break;
21538
21539 case SYMBOL_REF:
21540 output_addr_const (stream, x);
21541 break;
21542
21543 case CONST:
21544 if (GET_CODE (XEXP (x, 0)) == PLUS
21545 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21546 {
21547 output_addr_const (stream, x);
21548 break;
21549 }
21550 /* Fall through. */
21551
21552 default:
21553 output_operand_lossage ("Unsupported operand for code '%c'", code);
21554 }
21555 return;
21556
21557 /* An integer that we want to print in HEX. */
21558 case 'x':
21559 switch (GET_CODE (x))
21560 {
21561 case CONST_INT:
21562 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21563 break;
21564
21565 default:
21566 output_operand_lossage ("Unsupported operand for code '%c'", code);
21567 }
21568 return;
21569
21570 case 'B':
21571 if (CONST_INT_P (x))
21572 {
21573 HOST_WIDE_INT val;
21574 val = ARM_SIGN_EXTEND (~INTVAL (x));
21575 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21576 }
21577 else
21578 {
21579 putc ('~', stream);
21580 output_addr_const (stream, x);
21581 }
21582 return;
21583
21584 case 'b':
21585 /* Print the log2 of a CONST_INT. */
21586 {
21587 HOST_WIDE_INT val;
21588
21589 if (!CONST_INT_P (x)
21590 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21591 output_operand_lossage ("Unsupported operand for code '%c'", code);
21592 else
21593 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21594 }
21595 return;
21596
21597 case 'L':
21598 /* The low 16 bits of an immediate constant. */
21599 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21600 return;
21601
21602 case 'i':
21603 fprintf (stream, "%s", arithmetic_instr (x, 1));
21604 return;
21605
21606 case 'I':
21607 fprintf (stream, "%s", arithmetic_instr (x, 0));
21608 return;
21609
21610 case 'S':
21611 {
21612 HOST_WIDE_INT val;
21613 const char *shift;
21614
21615 shift = shift_op (x, &val);
21616
21617 if (shift)
21618 {
21619 fprintf (stream, ", %s ", shift);
21620 if (val == -1)
21621 arm_print_operand (stream, XEXP (x, 1), 0);
21622 else
21623 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21624 }
21625 }
21626 return;
21627
21628 /* An explanation of the 'Q', 'R' and 'H' register operands:
21629
21630 In a pair of registers containing a DI or DF value the 'Q'
21631 operand returns the register number of the register containing
21632 the least significant part of the value. The 'R' operand returns
21633 the register number of the register containing the most
21634 significant part of the value.
21635
21636 The 'H' operand returns the higher of the two register numbers.
21637 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21638 same as the 'Q' operand, since the most significant part of the
21639 value is held in the lower number register. The reverse is true
21640 on systems where WORDS_BIG_ENDIAN is false.
21641
21642 The purpose of these operands is to distinguish between cases
21643 where the endian-ness of the values is important (for example
21644 when they are added together), and cases where the endian-ness
21645 is irrelevant, but the order of register operations is important.
21646 For example when loading a value from memory into a register
21647 pair, the endian-ness does not matter. Provided that the value
21648 from the lower memory address is put into the lower numbered
21649 register, and the value from the higher address is put into the
21650 higher numbered register, the load will work regardless of whether
21651 the value being loaded is big-wordian or little-wordian. The
21652 order of the two register loads can matter however, if the address
21653 of the memory location is actually held in one of the registers
21654 being overwritten by the load.
21655
21656 The 'Q' and 'R' constraints are also available for 64-bit
21657 constants. */
21658 case 'Q':
21659 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21660 {
21661 rtx part = gen_lowpart (SImode, x);
21662 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21663 return;
21664 }
21665
21666 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21667 {
21668 output_operand_lossage ("invalid operand for code '%c'", code);
21669 return;
21670 }
21671
21672 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21673 return;
21674
21675 case 'R':
21676 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21677 {
21678 machine_mode mode = GET_MODE (x);
21679 rtx part;
21680
21681 if (mode == VOIDmode)
21682 mode = DImode;
21683 part = gen_highpart_mode (SImode, mode, x);
21684 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21685 return;
21686 }
21687
21688 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21689 {
21690 output_operand_lossage ("invalid operand for code '%c'", code);
21691 return;
21692 }
21693
21694 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21695 return;
21696
21697 case 'H':
21698 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21699 {
21700 output_operand_lossage ("invalid operand for code '%c'", code);
21701 return;
21702 }
21703
21704 asm_fprintf (stream, "%r", REGNO (x) + 1);
21705 return;
21706
21707 case 'J':
21708 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21709 {
21710 output_operand_lossage ("invalid operand for code '%c'", code);
21711 return;
21712 }
21713
21714 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21715 return;
21716
21717 case 'K':
21718 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21719 {
21720 output_operand_lossage ("invalid operand for code '%c'", code);
21721 return;
21722 }
21723
21724 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21725 return;
21726
21727 case 'm':
21728 asm_fprintf (stream, "%r",
21729 REG_P (XEXP (x, 0))
21730 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21731 return;
21732
21733 case 'M':
21734 asm_fprintf (stream, "{%r-%r}",
21735 REGNO (x),
21736 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21737 return;
21738
21739 /* Like 'M', but writing doubleword vector registers, for use by Neon
21740 insns. */
21741 case 'h':
21742 {
21743 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21744 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21745 if (numregs == 1)
21746 asm_fprintf (stream, "{d%d}", regno);
21747 else
21748 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21749 }
21750 return;
21751
21752 case 'd':
21753 /* CONST_TRUE_RTX means always -- that's the default. */
21754 if (x == const_true_rtx)
21755 return;
21756
21757 if (!COMPARISON_P (x))
21758 {
21759 output_operand_lossage ("invalid operand for code '%c'", code);
21760 return;
21761 }
21762
21763 fputs (arm_condition_codes[get_arm_condition_code (x)],
21764 stream);
21765 return;
21766
21767 case 'D':
21768 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21769 want to do that. */
21770 if (x == const_true_rtx)
21771 {
21772 output_operand_lossage ("instruction never executed");
21773 return;
21774 }
21775 if (!COMPARISON_P (x))
21776 {
21777 output_operand_lossage ("invalid operand for code '%c'", code);
21778 return;
21779 }
21780
21781 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21782 (get_arm_condition_code (x))],
21783 stream);
21784 return;
21785
21786 case 's':
21787 case 'V':
21788 case 'W':
21789 case 'X':
21790 case 'Y':
21791 case 'Z':
21792 /* Former Maverick support, removed after GCC-4.7. */
21793 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21794 return;
21795
21796 case 'U':
21797 if (!REG_P (x)
21798 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21799 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21800 /* Bad value for wCG register number. */
21801 {
21802 output_operand_lossage ("invalid operand for code '%c'", code);
21803 return;
21804 }
21805
21806 else
21807 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21808 return;
21809
21810 /* Print an iWMMXt control register name. */
21811 case 'w':
21812 if (!CONST_INT_P (x)
21813 || INTVAL (x) < 0
21814 || INTVAL (x) >= 16)
21815 /* Bad value for wC register number. */
21816 {
21817 output_operand_lossage ("invalid operand for code '%c'", code);
21818 return;
21819 }
21820
21821 else
21822 {
21823 static const char * wc_reg_names [16] =
21824 {
21825 "wCID", "wCon", "wCSSF", "wCASF",
21826 "wC4", "wC5", "wC6", "wC7",
21827 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21828 "wC12", "wC13", "wC14", "wC15"
21829 };
21830
21831 fputs (wc_reg_names [INTVAL (x)], stream);
21832 }
21833 return;
21834
21835 /* Print the high single-precision register of a VFP double-precision
21836 register. */
21837 case 'p':
21838 {
21839 machine_mode mode = GET_MODE (x);
21840 int regno;
21841
21842 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21843 {
21844 output_operand_lossage ("invalid operand for code '%c'", code);
21845 return;
21846 }
21847
21848 regno = REGNO (x);
21849 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21850 {
21851 output_operand_lossage ("invalid operand for code '%c'", code);
21852 return;
21853 }
21854
21855 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21856 }
21857 return;
21858
21859 /* Print a VFP/Neon double precision or quad precision register name. */
21860 case 'P':
21861 case 'q':
21862 {
21863 machine_mode mode = GET_MODE (x);
21864 int is_quad = (code == 'q');
21865 int regno;
21866
21867 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21868 {
21869 output_operand_lossage ("invalid operand for code '%c'", code);
21870 return;
21871 }
21872
21873 if (!REG_P (x)
21874 || !IS_VFP_REGNUM (REGNO (x)))
21875 {
21876 output_operand_lossage ("invalid operand for code '%c'", code);
21877 return;
21878 }
21879
21880 regno = REGNO (x);
21881 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21882 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21883 {
21884 output_operand_lossage ("invalid operand for code '%c'", code);
21885 return;
21886 }
21887
21888 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21889 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21890 }
21891 return;
21892
21893 /* These two codes print the low/high doubleword register of a Neon quad
21894 register, respectively. For pair-structure types, can also print
21895 low/high quadword registers. */
21896 case 'e':
21897 case 'f':
21898 {
21899 machine_mode mode = GET_MODE (x);
21900 int regno;
21901
21902 if ((GET_MODE_SIZE (mode) != 16
21903 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21904 {
21905 output_operand_lossage ("invalid operand for code '%c'", code);
21906 return;
21907 }
21908
21909 regno = REGNO (x);
21910 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21911 {
21912 output_operand_lossage ("invalid operand for code '%c'", code);
21913 return;
21914 }
21915
21916 if (GET_MODE_SIZE (mode) == 16)
21917 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21918 + (code == 'f' ? 1 : 0));
21919 else
21920 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21921 + (code == 'f' ? 1 : 0));
21922 }
21923 return;
21924
21925 /* Print a VFPv3 floating-point constant, represented as an integer
21926 index. */
21927 case 'G':
21928 {
21929 int index = vfp3_const_double_index (x);
21930 gcc_assert (index != -1);
21931 fprintf (stream, "%d", index);
21932 }
21933 return;
21934
21935 /* Print bits representing opcode features for Neon.
21936
21937 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21938 and polynomials as unsigned.
21939
21940 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21941
21942 Bit 2 is 1 for rounding functions, 0 otherwise. */
21943
21944 /* Identify the type as 's', 'u', 'p' or 'f'. */
21945 case 'T':
21946 {
21947 HOST_WIDE_INT bits = INTVAL (x);
21948 fputc ("uspf"[bits & 3], stream);
21949 }
21950 return;
21951
21952 /* Likewise, but signed and unsigned integers are both 'i'. */
21953 case 'F':
21954 {
21955 HOST_WIDE_INT bits = INTVAL (x);
21956 fputc ("iipf"[bits & 3], stream);
21957 }
21958 return;
21959
21960 /* As for 'T', but emit 'u' instead of 'p'. */
21961 case 't':
21962 {
21963 HOST_WIDE_INT bits = INTVAL (x);
21964 fputc ("usuf"[bits & 3], stream);
21965 }
21966 return;
21967
21968 /* Bit 2: rounding (vs none). */
21969 case 'O':
21970 {
21971 HOST_WIDE_INT bits = INTVAL (x);
21972 fputs ((bits & 4) != 0 ? "r" : "", stream);
21973 }
21974 return;
21975
21976 /* Memory operand for vld1/vst1 instruction. */
21977 case 'A':
21978 {
21979 rtx addr;
21980 bool postinc = FALSE;
21981 rtx postinc_reg = NULL;
21982 unsigned align, memsize, align_bits;
21983
21984 gcc_assert (MEM_P (x));
21985 addr = XEXP (x, 0);
21986 if (GET_CODE (addr) == POST_INC)
21987 {
21988 postinc = 1;
21989 addr = XEXP (addr, 0);
21990 }
21991 if (GET_CODE (addr) == POST_MODIFY)
21992 {
21993 postinc_reg = XEXP( XEXP (addr, 1), 1);
21994 addr = XEXP (addr, 0);
21995 }
21996 asm_fprintf (stream, "[%r", REGNO (addr));
21997
21998 /* We know the alignment of this access, so we can emit a hint in the
21999 instruction (for some alignments) as an aid to the memory subsystem
22000 of the target. */
22001 align = MEM_ALIGN (x) >> 3;
22002 memsize = MEM_SIZE (x);
22003
22004 /* Only certain alignment specifiers are supported by the hardware. */
22005 if (memsize == 32 && (align % 32) == 0)
22006 align_bits = 256;
22007 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22008 align_bits = 128;
22009 else if (memsize >= 8 && (align % 8) == 0)
22010 align_bits = 64;
22011 else
22012 align_bits = 0;
22013
22014 if (align_bits != 0)
22015 asm_fprintf (stream, ":%d", align_bits);
22016
22017 asm_fprintf (stream, "]");
22018
22019 if (postinc)
22020 fputs("!", stream);
22021 if (postinc_reg)
22022 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22023 }
22024 return;
22025
22026 case 'C':
22027 {
22028 rtx addr;
22029
22030 gcc_assert (MEM_P (x));
22031 addr = XEXP (x, 0);
22032 gcc_assert (REG_P (addr));
22033 asm_fprintf (stream, "[%r]", REGNO (addr));
22034 }
22035 return;
22036
22037 /* Translate an S register number into a D register number and element index. */
22038 case 'y':
22039 {
22040 machine_mode mode = GET_MODE (x);
22041 int regno;
22042
22043 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22044 {
22045 output_operand_lossage ("invalid operand for code '%c'", code);
22046 return;
22047 }
22048
22049 regno = REGNO (x);
22050 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22051 {
22052 output_operand_lossage ("invalid operand for code '%c'", code);
22053 return;
22054 }
22055
22056 regno = regno - FIRST_VFP_REGNUM;
22057 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22058 }
22059 return;
22060
22061 case 'v':
22062 gcc_assert (CONST_DOUBLE_P (x));
22063 int result;
22064 result = vfp3_const_double_for_fract_bits (x);
22065 if (result == 0)
22066 result = vfp3_const_double_for_bits (x);
22067 fprintf (stream, "#%d", result);
22068 return;
22069
22070 /* Register specifier for vld1.16/vst1.16. Translate the S register
22071 number into a D register number and element index. */
22072 case 'z':
22073 {
22074 machine_mode mode = GET_MODE (x);
22075 int regno;
22076
22077 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22078 {
22079 output_operand_lossage ("invalid operand for code '%c'", code);
22080 return;
22081 }
22082
22083 regno = REGNO (x);
22084 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22085 {
22086 output_operand_lossage ("invalid operand for code '%c'", code);
22087 return;
22088 }
22089
22090 regno = regno - FIRST_VFP_REGNUM;
22091 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22092 }
22093 return;
22094
22095 default:
22096 if (x == 0)
22097 {
22098 output_operand_lossage ("missing operand");
22099 return;
22100 }
22101
22102 switch (GET_CODE (x))
22103 {
22104 case REG:
22105 asm_fprintf (stream, "%r", REGNO (x));
22106 break;
22107
22108 case MEM:
22109 output_memory_reference_mode = GET_MODE (x);
22110 output_address (XEXP (x, 0));
22111 break;
22112
22113 case CONST_DOUBLE:
22114 {
22115 char fpstr[20];
22116 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22117 sizeof (fpstr), 0, 1);
22118 fprintf (stream, "#%s", fpstr);
22119 }
22120 break;
22121
22122 default:
22123 gcc_assert (GET_CODE (x) != NEG);
22124 fputc ('#', stream);
22125 if (GET_CODE (x) == HIGH)
22126 {
22127 fputs (":lower16:", stream);
22128 x = XEXP (x, 0);
22129 }
22130
22131 output_addr_const (stream, x);
22132 break;
22133 }
22134 }
22135 }
22136 \f
22137 /* Target hook for printing a memory address. */
22138 static void
22139 arm_print_operand_address (FILE *stream, rtx x)
22140 {
22141 if (TARGET_32BIT)
22142 {
22143 int is_minus = GET_CODE (x) == MINUS;
22144
22145 if (REG_P (x))
22146 asm_fprintf (stream, "[%r]", REGNO (x));
22147 else if (GET_CODE (x) == PLUS || is_minus)
22148 {
22149 rtx base = XEXP (x, 0);
22150 rtx index = XEXP (x, 1);
22151 HOST_WIDE_INT offset = 0;
22152 if (!REG_P (base)
22153 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22154 {
22155 /* Ensure that BASE is a register. */
22156 /* (one of them must be). */
22157 /* Also ensure the SP is not used as in index register. */
22158 std::swap (base, index);
22159 }
22160 switch (GET_CODE (index))
22161 {
22162 case CONST_INT:
22163 offset = INTVAL (index);
22164 if (is_minus)
22165 offset = -offset;
22166 asm_fprintf (stream, "[%r, #%wd]",
22167 REGNO (base), offset);
22168 break;
22169
22170 case REG:
22171 asm_fprintf (stream, "[%r, %s%r]",
22172 REGNO (base), is_minus ? "-" : "",
22173 REGNO (index));
22174 break;
22175
22176 case MULT:
22177 case ASHIFTRT:
22178 case LSHIFTRT:
22179 case ASHIFT:
22180 case ROTATERT:
22181 {
22182 asm_fprintf (stream, "[%r, %s%r",
22183 REGNO (base), is_minus ? "-" : "",
22184 REGNO (XEXP (index, 0)));
22185 arm_print_operand (stream, index, 'S');
22186 fputs ("]", stream);
22187 break;
22188 }
22189
22190 default:
22191 gcc_unreachable ();
22192 }
22193 }
22194 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22195 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22196 {
22197 extern machine_mode output_memory_reference_mode;
22198
22199 gcc_assert (REG_P (XEXP (x, 0)));
22200
22201 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22202 asm_fprintf (stream, "[%r, #%s%d]!",
22203 REGNO (XEXP (x, 0)),
22204 GET_CODE (x) == PRE_DEC ? "-" : "",
22205 GET_MODE_SIZE (output_memory_reference_mode));
22206 else
22207 asm_fprintf (stream, "[%r], #%s%d",
22208 REGNO (XEXP (x, 0)),
22209 GET_CODE (x) == POST_DEC ? "-" : "",
22210 GET_MODE_SIZE (output_memory_reference_mode));
22211 }
22212 else if (GET_CODE (x) == PRE_MODIFY)
22213 {
22214 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22215 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22216 asm_fprintf (stream, "#%wd]!",
22217 INTVAL (XEXP (XEXP (x, 1), 1)));
22218 else
22219 asm_fprintf (stream, "%r]!",
22220 REGNO (XEXP (XEXP (x, 1), 1)));
22221 }
22222 else if (GET_CODE (x) == POST_MODIFY)
22223 {
22224 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22225 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22226 asm_fprintf (stream, "#%wd",
22227 INTVAL (XEXP (XEXP (x, 1), 1)));
22228 else
22229 asm_fprintf (stream, "%r",
22230 REGNO (XEXP (XEXP (x, 1), 1)));
22231 }
22232 else output_addr_const (stream, x);
22233 }
22234 else
22235 {
22236 if (REG_P (x))
22237 asm_fprintf (stream, "[%r]", REGNO (x));
22238 else if (GET_CODE (x) == POST_INC)
22239 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22240 else if (GET_CODE (x) == PLUS)
22241 {
22242 gcc_assert (REG_P (XEXP (x, 0)));
22243 if (CONST_INT_P (XEXP (x, 1)))
22244 asm_fprintf (stream, "[%r, #%wd]",
22245 REGNO (XEXP (x, 0)),
22246 INTVAL (XEXP (x, 1)));
22247 else
22248 asm_fprintf (stream, "[%r, %r]",
22249 REGNO (XEXP (x, 0)),
22250 REGNO (XEXP (x, 1)));
22251 }
22252 else
22253 output_addr_const (stream, x);
22254 }
22255 }
22256 \f
22257 /* Target hook for indicating whether a punctuation character for
22258 TARGET_PRINT_OPERAND is valid. */
22259 static bool
22260 arm_print_operand_punct_valid_p (unsigned char code)
22261 {
22262 return (code == '@' || code == '|' || code == '.'
22263 || code == '(' || code == ')' || code == '#'
22264 || (TARGET_32BIT && (code == '?'))
22265 || (TARGET_THUMB2 && (code == '!'))
22266 || (TARGET_THUMB && (code == '_')));
22267 }
22268 \f
22269 /* Target hook for assembling integer objects. The ARM version needs to
22270 handle word-sized values specially. */
22271 static bool
22272 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22273 {
22274 machine_mode mode;
22275
22276 if (size == UNITS_PER_WORD && aligned_p)
22277 {
22278 fputs ("\t.word\t", asm_out_file);
22279 output_addr_const (asm_out_file, x);
22280
22281 /* Mark symbols as position independent. We only do this in the
22282 .text segment, not in the .data segment. */
22283 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22284 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22285 {
22286 /* See legitimize_pic_address for an explanation of the
22287 TARGET_VXWORKS_RTP check. */
22288 if (!arm_pic_data_is_text_relative
22289 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22290 fputs ("(GOT)", asm_out_file);
22291 else
22292 fputs ("(GOTOFF)", asm_out_file);
22293 }
22294 fputc ('\n', asm_out_file);
22295 return true;
22296 }
22297
22298 mode = GET_MODE (x);
22299
22300 if (arm_vector_mode_supported_p (mode))
22301 {
22302 int i, units;
22303
22304 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22305
22306 units = CONST_VECTOR_NUNITS (x);
22307 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22308
22309 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22310 for (i = 0; i < units; i++)
22311 {
22312 rtx elt = CONST_VECTOR_ELT (x, i);
22313 assemble_integer
22314 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22315 }
22316 else
22317 for (i = 0; i < units; i++)
22318 {
22319 rtx elt = CONST_VECTOR_ELT (x, i);
22320 REAL_VALUE_TYPE rval;
22321
22322 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22323
22324 assemble_real
22325 (rval, GET_MODE_INNER (mode),
22326 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22327 }
22328
22329 return true;
22330 }
22331
22332 return default_assemble_integer (x, size, aligned_p);
22333 }
22334
22335 static void
22336 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22337 {
22338 section *s;
22339
22340 if (!TARGET_AAPCS_BASED)
22341 {
22342 (is_ctor ?
22343 default_named_section_asm_out_constructor
22344 : default_named_section_asm_out_destructor) (symbol, priority);
22345 return;
22346 }
22347
22348 /* Put these in the .init_array section, using a special relocation. */
22349 if (priority != DEFAULT_INIT_PRIORITY)
22350 {
22351 char buf[18];
22352 sprintf (buf, "%s.%.5u",
22353 is_ctor ? ".init_array" : ".fini_array",
22354 priority);
22355 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22356 }
22357 else if (is_ctor)
22358 s = ctors_section;
22359 else
22360 s = dtors_section;
22361
22362 switch_to_section (s);
22363 assemble_align (POINTER_SIZE);
22364 fputs ("\t.word\t", asm_out_file);
22365 output_addr_const (asm_out_file, symbol);
22366 fputs ("(target1)\n", asm_out_file);
22367 }
22368
22369 /* Add a function to the list of static constructors. */
22370
22371 static void
22372 arm_elf_asm_constructor (rtx symbol, int priority)
22373 {
22374 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22375 }
22376
22377 /* Add a function to the list of static destructors. */
22378
22379 static void
22380 arm_elf_asm_destructor (rtx symbol, int priority)
22381 {
22382 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22383 }
22384 \f
22385 /* A finite state machine takes care of noticing whether or not instructions
22386 can be conditionally executed, and thus decrease execution time and code
22387 size by deleting branch instructions. The fsm is controlled by
22388 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22389
22390 /* The state of the fsm controlling condition codes are:
22391 0: normal, do nothing special
22392 1: make ASM_OUTPUT_OPCODE not output this instruction
22393 2: make ASM_OUTPUT_OPCODE not output this instruction
22394 3: make instructions conditional
22395 4: make instructions conditional
22396
22397 State transitions (state->state by whom under condition):
22398 0 -> 1 final_prescan_insn if the `target' is a label
22399 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22400 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22401 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22402 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22403 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22404 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22405 (the target insn is arm_target_insn).
22406
22407 If the jump clobbers the conditions then we use states 2 and 4.
22408
22409 A similar thing can be done with conditional return insns.
22410
22411 XXX In case the `target' is an unconditional branch, this conditionalising
22412 of the instructions always reduces code size, but not always execution
22413 time. But then, I want to reduce the code size to somewhere near what
22414 /bin/cc produces. */
22415
22416 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22417 instructions. When a COND_EXEC instruction is seen the subsequent
22418 instructions are scanned so that multiple conditional instructions can be
22419 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22420 specify the length and true/false mask for the IT block. These will be
22421 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22422
22423 /* Returns the index of the ARM condition code string in
22424 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22425 COMPARISON should be an rtx like `(eq (...) (...))'. */
22426
22427 enum arm_cond_code
22428 maybe_get_arm_condition_code (rtx comparison)
22429 {
22430 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22431 enum arm_cond_code code;
22432 enum rtx_code comp_code = GET_CODE (comparison);
22433
22434 if (GET_MODE_CLASS (mode) != MODE_CC)
22435 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22436 XEXP (comparison, 1));
22437
22438 switch (mode)
22439 {
22440 case CC_DNEmode: code = ARM_NE; goto dominance;
22441 case CC_DEQmode: code = ARM_EQ; goto dominance;
22442 case CC_DGEmode: code = ARM_GE; goto dominance;
22443 case CC_DGTmode: code = ARM_GT; goto dominance;
22444 case CC_DLEmode: code = ARM_LE; goto dominance;
22445 case CC_DLTmode: code = ARM_LT; goto dominance;
22446 case CC_DGEUmode: code = ARM_CS; goto dominance;
22447 case CC_DGTUmode: code = ARM_HI; goto dominance;
22448 case CC_DLEUmode: code = ARM_LS; goto dominance;
22449 case CC_DLTUmode: code = ARM_CC;
22450
22451 dominance:
22452 if (comp_code == EQ)
22453 return ARM_INVERSE_CONDITION_CODE (code);
22454 if (comp_code == NE)
22455 return code;
22456 return ARM_NV;
22457
22458 case CC_NOOVmode:
22459 switch (comp_code)
22460 {
22461 case NE: return ARM_NE;
22462 case EQ: return ARM_EQ;
22463 case GE: return ARM_PL;
22464 case LT: return ARM_MI;
22465 default: return ARM_NV;
22466 }
22467
22468 case CC_Zmode:
22469 switch (comp_code)
22470 {
22471 case NE: return ARM_NE;
22472 case EQ: return ARM_EQ;
22473 default: return ARM_NV;
22474 }
22475
22476 case CC_Nmode:
22477 switch (comp_code)
22478 {
22479 case NE: return ARM_MI;
22480 case EQ: return ARM_PL;
22481 default: return ARM_NV;
22482 }
22483
22484 case CCFPEmode:
22485 case CCFPmode:
22486 /* We can handle all cases except UNEQ and LTGT. */
22487 switch (comp_code)
22488 {
22489 case GE: return ARM_GE;
22490 case GT: return ARM_GT;
22491 case LE: return ARM_LS;
22492 case LT: return ARM_MI;
22493 case NE: return ARM_NE;
22494 case EQ: return ARM_EQ;
22495 case ORDERED: return ARM_VC;
22496 case UNORDERED: return ARM_VS;
22497 case UNLT: return ARM_LT;
22498 case UNLE: return ARM_LE;
22499 case UNGT: return ARM_HI;
22500 case UNGE: return ARM_PL;
22501 /* UNEQ and LTGT do not have a representation. */
22502 case UNEQ: /* Fall through. */
22503 case LTGT: /* Fall through. */
22504 default: return ARM_NV;
22505 }
22506
22507 case CC_SWPmode:
22508 switch (comp_code)
22509 {
22510 case NE: return ARM_NE;
22511 case EQ: return ARM_EQ;
22512 case GE: return ARM_LE;
22513 case GT: return ARM_LT;
22514 case LE: return ARM_GE;
22515 case LT: return ARM_GT;
22516 case GEU: return ARM_LS;
22517 case GTU: return ARM_CC;
22518 case LEU: return ARM_CS;
22519 case LTU: return ARM_HI;
22520 default: return ARM_NV;
22521 }
22522
22523 case CC_Cmode:
22524 switch (comp_code)
22525 {
22526 case LTU: return ARM_CS;
22527 case GEU: return ARM_CC;
22528 default: return ARM_NV;
22529 }
22530
22531 case CC_CZmode:
22532 switch (comp_code)
22533 {
22534 case NE: return ARM_NE;
22535 case EQ: return ARM_EQ;
22536 case GEU: return ARM_CS;
22537 case GTU: return ARM_HI;
22538 case LEU: return ARM_LS;
22539 case LTU: return ARM_CC;
22540 default: return ARM_NV;
22541 }
22542
22543 case CC_NCVmode:
22544 switch (comp_code)
22545 {
22546 case GE: return ARM_GE;
22547 case LT: return ARM_LT;
22548 case GEU: return ARM_CS;
22549 case LTU: return ARM_CC;
22550 default: return ARM_NV;
22551 }
22552
22553 case CCmode:
22554 switch (comp_code)
22555 {
22556 case NE: return ARM_NE;
22557 case EQ: return ARM_EQ;
22558 case GE: return ARM_GE;
22559 case GT: return ARM_GT;
22560 case LE: return ARM_LE;
22561 case LT: return ARM_LT;
22562 case GEU: return ARM_CS;
22563 case GTU: return ARM_HI;
22564 case LEU: return ARM_LS;
22565 case LTU: return ARM_CC;
22566 default: return ARM_NV;
22567 }
22568
22569 default: gcc_unreachable ();
22570 }
22571 }
22572
22573 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22574 static enum arm_cond_code
22575 get_arm_condition_code (rtx comparison)
22576 {
22577 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22578 gcc_assert (code != ARM_NV);
22579 return code;
22580 }
22581
22582 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22583 instructions. */
22584 void
22585 thumb2_final_prescan_insn (rtx_insn *insn)
22586 {
22587 rtx_insn *first_insn = insn;
22588 rtx body = PATTERN (insn);
22589 rtx predicate;
22590 enum arm_cond_code code;
22591 int n;
22592 int mask;
22593 int max;
22594
22595 /* max_insns_skipped in the tune was already taken into account in the
22596 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22597 just emit the IT blocks as we can. It does not make sense to split
22598 the IT blocks. */
22599 max = MAX_INSN_PER_IT_BLOCK;
22600
22601 /* Remove the previous insn from the count of insns to be output. */
22602 if (arm_condexec_count)
22603 arm_condexec_count--;
22604
22605 /* Nothing to do if we are already inside a conditional block. */
22606 if (arm_condexec_count)
22607 return;
22608
22609 if (GET_CODE (body) != COND_EXEC)
22610 return;
22611
22612 /* Conditional jumps are implemented directly. */
22613 if (JUMP_P (insn))
22614 return;
22615
22616 predicate = COND_EXEC_TEST (body);
22617 arm_current_cc = get_arm_condition_code (predicate);
22618
22619 n = get_attr_ce_count (insn);
22620 arm_condexec_count = 1;
22621 arm_condexec_mask = (1 << n) - 1;
22622 arm_condexec_masklen = n;
22623 /* See if subsequent instructions can be combined into the same block. */
22624 for (;;)
22625 {
22626 insn = next_nonnote_insn (insn);
22627
22628 /* Jumping into the middle of an IT block is illegal, so a label or
22629 barrier terminates the block. */
22630 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22631 break;
22632
22633 body = PATTERN (insn);
22634 /* USE and CLOBBER aren't really insns, so just skip them. */
22635 if (GET_CODE (body) == USE
22636 || GET_CODE (body) == CLOBBER)
22637 continue;
22638
22639 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22640 if (GET_CODE (body) != COND_EXEC)
22641 break;
22642 /* Maximum number of conditionally executed instructions in a block. */
22643 n = get_attr_ce_count (insn);
22644 if (arm_condexec_masklen + n > max)
22645 break;
22646
22647 predicate = COND_EXEC_TEST (body);
22648 code = get_arm_condition_code (predicate);
22649 mask = (1 << n) - 1;
22650 if (arm_current_cc == code)
22651 arm_condexec_mask |= (mask << arm_condexec_masklen);
22652 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22653 break;
22654
22655 arm_condexec_count++;
22656 arm_condexec_masklen += n;
22657
22658 /* A jump must be the last instruction in a conditional block. */
22659 if (JUMP_P (insn))
22660 break;
22661 }
22662 /* Restore recog_data (getting the attributes of other insns can
22663 destroy this array, but final.c assumes that it remains intact
22664 across this call). */
22665 extract_constrain_insn_cached (first_insn);
22666 }
22667
22668 void
22669 arm_final_prescan_insn (rtx_insn *insn)
22670 {
22671 /* BODY will hold the body of INSN. */
22672 rtx body = PATTERN (insn);
22673
22674 /* This will be 1 if trying to repeat the trick, and things need to be
22675 reversed if it appears to fail. */
22676 int reverse = 0;
22677
22678 /* If we start with a return insn, we only succeed if we find another one. */
22679 int seeking_return = 0;
22680 enum rtx_code return_code = UNKNOWN;
22681
22682 /* START_INSN will hold the insn from where we start looking. This is the
22683 first insn after the following code_label if REVERSE is true. */
22684 rtx_insn *start_insn = insn;
22685
22686 /* If in state 4, check if the target branch is reached, in order to
22687 change back to state 0. */
22688 if (arm_ccfsm_state == 4)
22689 {
22690 if (insn == arm_target_insn)
22691 {
22692 arm_target_insn = NULL;
22693 arm_ccfsm_state = 0;
22694 }
22695 return;
22696 }
22697
22698 /* If in state 3, it is possible to repeat the trick, if this insn is an
22699 unconditional branch to a label, and immediately following this branch
22700 is the previous target label which is only used once, and the label this
22701 branch jumps to is not too far off. */
22702 if (arm_ccfsm_state == 3)
22703 {
22704 if (simplejump_p (insn))
22705 {
22706 start_insn = next_nonnote_insn (start_insn);
22707 if (BARRIER_P (start_insn))
22708 {
22709 /* XXX Isn't this always a barrier? */
22710 start_insn = next_nonnote_insn (start_insn);
22711 }
22712 if (LABEL_P (start_insn)
22713 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22714 && LABEL_NUSES (start_insn) == 1)
22715 reverse = TRUE;
22716 else
22717 return;
22718 }
22719 else if (ANY_RETURN_P (body))
22720 {
22721 start_insn = next_nonnote_insn (start_insn);
22722 if (BARRIER_P (start_insn))
22723 start_insn = next_nonnote_insn (start_insn);
22724 if (LABEL_P (start_insn)
22725 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22726 && LABEL_NUSES (start_insn) == 1)
22727 {
22728 reverse = TRUE;
22729 seeking_return = 1;
22730 return_code = GET_CODE (body);
22731 }
22732 else
22733 return;
22734 }
22735 else
22736 return;
22737 }
22738
22739 gcc_assert (!arm_ccfsm_state || reverse);
22740 if (!JUMP_P (insn))
22741 return;
22742
22743 /* This jump might be paralleled with a clobber of the condition codes
22744 the jump should always come first */
22745 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22746 body = XVECEXP (body, 0, 0);
22747
22748 if (reverse
22749 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22750 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22751 {
22752 int insns_skipped;
22753 int fail = FALSE, succeed = FALSE;
22754 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22755 int then_not_else = TRUE;
22756 rtx_insn *this_insn = start_insn;
22757 rtx label = 0;
22758
22759 /* Register the insn jumped to. */
22760 if (reverse)
22761 {
22762 if (!seeking_return)
22763 label = XEXP (SET_SRC (body), 0);
22764 }
22765 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22766 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22767 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22768 {
22769 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22770 then_not_else = FALSE;
22771 }
22772 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22773 {
22774 seeking_return = 1;
22775 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22776 }
22777 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22778 {
22779 seeking_return = 1;
22780 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22781 then_not_else = FALSE;
22782 }
22783 else
22784 gcc_unreachable ();
22785
22786 /* See how many insns this branch skips, and what kind of insns. If all
22787 insns are okay, and the label or unconditional branch to the same
22788 label is not too far away, succeed. */
22789 for (insns_skipped = 0;
22790 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22791 {
22792 rtx scanbody;
22793
22794 this_insn = next_nonnote_insn (this_insn);
22795 if (!this_insn)
22796 break;
22797
22798 switch (GET_CODE (this_insn))
22799 {
22800 case CODE_LABEL:
22801 /* Succeed if it is the target label, otherwise fail since
22802 control falls in from somewhere else. */
22803 if (this_insn == label)
22804 {
22805 arm_ccfsm_state = 1;
22806 succeed = TRUE;
22807 }
22808 else
22809 fail = TRUE;
22810 break;
22811
22812 case BARRIER:
22813 /* Succeed if the following insn is the target label.
22814 Otherwise fail.
22815 If return insns are used then the last insn in a function
22816 will be a barrier. */
22817 this_insn = next_nonnote_insn (this_insn);
22818 if (this_insn && this_insn == label)
22819 {
22820 arm_ccfsm_state = 1;
22821 succeed = TRUE;
22822 }
22823 else
22824 fail = TRUE;
22825 break;
22826
22827 case CALL_INSN:
22828 /* The AAPCS says that conditional calls should not be
22829 used since they make interworking inefficient (the
22830 linker can't transform BL<cond> into BLX). That's
22831 only a problem if the machine has BLX. */
22832 if (arm_arch5)
22833 {
22834 fail = TRUE;
22835 break;
22836 }
22837
22838 /* Succeed if the following insn is the target label, or
22839 if the following two insns are a barrier and the
22840 target label. */
22841 this_insn = next_nonnote_insn (this_insn);
22842 if (this_insn && BARRIER_P (this_insn))
22843 this_insn = next_nonnote_insn (this_insn);
22844
22845 if (this_insn && this_insn == label
22846 && insns_skipped < max_insns_skipped)
22847 {
22848 arm_ccfsm_state = 1;
22849 succeed = TRUE;
22850 }
22851 else
22852 fail = TRUE;
22853 break;
22854
22855 case JUMP_INSN:
22856 /* If this is an unconditional branch to the same label, succeed.
22857 If it is to another label, do nothing. If it is conditional,
22858 fail. */
22859 /* XXX Probably, the tests for SET and the PC are
22860 unnecessary. */
22861
22862 scanbody = PATTERN (this_insn);
22863 if (GET_CODE (scanbody) == SET
22864 && GET_CODE (SET_DEST (scanbody)) == PC)
22865 {
22866 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22867 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22868 {
22869 arm_ccfsm_state = 2;
22870 succeed = TRUE;
22871 }
22872 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22873 fail = TRUE;
22874 }
22875 /* Fail if a conditional return is undesirable (e.g. on a
22876 StrongARM), but still allow this if optimizing for size. */
22877 else if (GET_CODE (scanbody) == return_code
22878 && !use_return_insn (TRUE, NULL)
22879 && !optimize_size)
22880 fail = TRUE;
22881 else if (GET_CODE (scanbody) == return_code)
22882 {
22883 arm_ccfsm_state = 2;
22884 succeed = TRUE;
22885 }
22886 else if (GET_CODE (scanbody) == PARALLEL)
22887 {
22888 switch (get_attr_conds (this_insn))
22889 {
22890 case CONDS_NOCOND:
22891 break;
22892 default:
22893 fail = TRUE;
22894 break;
22895 }
22896 }
22897 else
22898 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22899
22900 break;
22901
22902 case INSN:
22903 /* Instructions using or affecting the condition codes make it
22904 fail. */
22905 scanbody = PATTERN (this_insn);
22906 if (!(GET_CODE (scanbody) == SET
22907 || GET_CODE (scanbody) == PARALLEL)
22908 || get_attr_conds (this_insn) != CONDS_NOCOND)
22909 fail = TRUE;
22910 break;
22911
22912 default:
22913 break;
22914 }
22915 }
22916 if (succeed)
22917 {
22918 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22919 arm_target_label = CODE_LABEL_NUMBER (label);
22920 else
22921 {
22922 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22923
22924 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22925 {
22926 this_insn = next_nonnote_insn (this_insn);
22927 gcc_assert (!this_insn
22928 || (!BARRIER_P (this_insn)
22929 && !LABEL_P (this_insn)));
22930 }
22931 if (!this_insn)
22932 {
22933 /* Oh, dear! we ran off the end.. give up. */
22934 extract_constrain_insn_cached (insn);
22935 arm_ccfsm_state = 0;
22936 arm_target_insn = NULL;
22937 return;
22938 }
22939 arm_target_insn = this_insn;
22940 }
22941
22942 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22943 what it was. */
22944 if (!reverse)
22945 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22946
22947 if (reverse || then_not_else)
22948 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22949 }
22950
22951 /* Restore recog_data (getting the attributes of other insns can
22952 destroy this array, but final.c assumes that it remains intact
22953 across this call. */
22954 extract_constrain_insn_cached (insn);
22955 }
22956 }
22957
22958 /* Output IT instructions. */
22959 void
22960 thumb2_asm_output_opcode (FILE * stream)
22961 {
22962 char buff[5];
22963 int n;
22964
22965 if (arm_condexec_mask)
22966 {
22967 for (n = 0; n < arm_condexec_masklen; n++)
22968 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22969 buff[n] = 0;
22970 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22971 arm_condition_codes[arm_current_cc]);
22972 arm_condexec_mask = 0;
22973 }
22974 }
22975
22976 /* Returns true if REGNO is a valid register
22977 for holding a quantity of type MODE. */
22978 int
22979 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
22980 {
22981 if (GET_MODE_CLASS (mode) == MODE_CC)
22982 return (regno == CC_REGNUM
22983 || (TARGET_HARD_FLOAT && TARGET_VFP
22984 && regno == VFPCC_REGNUM));
22985
22986 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
22987 return false;
22988
22989 if (TARGET_THUMB1)
22990 /* For the Thumb we only allow values bigger than SImode in
22991 registers 0 - 6, so that there is always a second low
22992 register available to hold the upper part of the value.
22993 We probably we ought to ensure that the register is the
22994 start of an even numbered register pair. */
22995 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22996
22997 if (TARGET_HARD_FLOAT && TARGET_VFP
22998 && IS_VFP_REGNUM (regno))
22999 {
23000 if (mode == SFmode || mode == SImode)
23001 return VFP_REGNO_OK_FOR_SINGLE (regno);
23002
23003 if (mode == DFmode)
23004 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23005
23006 /* VFP registers can hold HFmode values, but there is no point in
23007 putting them there unless we have hardware conversion insns. */
23008 if (mode == HFmode)
23009 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23010
23011 if (TARGET_NEON)
23012 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23013 || (VALID_NEON_QREG_MODE (mode)
23014 && NEON_REGNO_OK_FOR_QUAD (regno))
23015 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23016 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23017 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23018 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23019 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23020
23021 return FALSE;
23022 }
23023
23024 if (TARGET_REALLY_IWMMXT)
23025 {
23026 if (IS_IWMMXT_GR_REGNUM (regno))
23027 return mode == SImode;
23028
23029 if (IS_IWMMXT_REGNUM (regno))
23030 return VALID_IWMMXT_REG_MODE (mode);
23031 }
23032
23033 /* We allow almost any value to be stored in the general registers.
23034 Restrict doubleword quantities to even register pairs in ARM state
23035 so that we can use ldrd. Do not allow very large Neon structure
23036 opaque modes in general registers; they would use too many. */
23037 if (regno <= LAST_ARM_REGNUM)
23038 {
23039 if (ARM_NUM_REGS (mode) > 4)
23040 return FALSE;
23041
23042 if (TARGET_THUMB2)
23043 return TRUE;
23044
23045 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23046 }
23047
23048 if (regno == FRAME_POINTER_REGNUM
23049 || regno == ARG_POINTER_REGNUM)
23050 /* We only allow integers in the fake hard registers. */
23051 return GET_MODE_CLASS (mode) == MODE_INT;
23052
23053 return FALSE;
23054 }
23055
23056 /* Implement MODES_TIEABLE_P. */
23057
23058 bool
23059 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23060 {
23061 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23062 return true;
23063
23064 /* We specifically want to allow elements of "structure" modes to
23065 be tieable to the structure. This more general condition allows
23066 other rarer situations too. */
23067 if (TARGET_NEON
23068 && (VALID_NEON_DREG_MODE (mode1)
23069 || VALID_NEON_QREG_MODE (mode1)
23070 || VALID_NEON_STRUCT_MODE (mode1))
23071 && (VALID_NEON_DREG_MODE (mode2)
23072 || VALID_NEON_QREG_MODE (mode2)
23073 || VALID_NEON_STRUCT_MODE (mode2)))
23074 return true;
23075
23076 return false;
23077 }
23078
23079 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23080 not used in arm mode. */
23081
23082 enum reg_class
23083 arm_regno_class (int regno)
23084 {
23085 if (regno == PC_REGNUM)
23086 return NO_REGS;
23087
23088 if (TARGET_THUMB1)
23089 {
23090 if (regno == STACK_POINTER_REGNUM)
23091 return STACK_REG;
23092 if (regno == CC_REGNUM)
23093 return CC_REG;
23094 if (regno < 8)
23095 return LO_REGS;
23096 return HI_REGS;
23097 }
23098
23099 if (TARGET_THUMB2 && regno < 8)
23100 return LO_REGS;
23101
23102 if ( regno <= LAST_ARM_REGNUM
23103 || regno == FRAME_POINTER_REGNUM
23104 || regno == ARG_POINTER_REGNUM)
23105 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23106
23107 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23108 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23109
23110 if (IS_VFP_REGNUM (regno))
23111 {
23112 if (regno <= D7_VFP_REGNUM)
23113 return VFP_D0_D7_REGS;
23114 else if (regno <= LAST_LO_VFP_REGNUM)
23115 return VFP_LO_REGS;
23116 else
23117 return VFP_HI_REGS;
23118 }
23119
23120 if (IS_IWMMXT_REGNUM (regno))
23121 return IWMMXT_REGS;
23122
23123 if (IS_IWMMXT_GR_REGNUM (regno))
23124 return IWMMXT_GR_REGS;
23125
23126 return NO_REGS;
23127 }
23128
23129 /* Handle a special case when computing the offset
23130 of an argument from the frame pointer. */
23131 int
23132 arm_debugger_arg_offset (int value, rtx addr)
23133 {
23134 rtx_insn *insn;
23135
23136 /* We are only interested if dbxout_parms() failed to compute the offset. */
23137 if (value != 0)
23138 return 0;
23139
23140 /* We can only cope with the case where the address is held in a register. */
23141 if (!REG_P (addr))
23142 return 0;
23143
23144 /* If we are using the frame pointer to point at the argument, then
23145 an offset of 0 is correct. */
23146 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23147 return 0;
23148
23149 /* If we are using the stack pointer to point at the
23150 argument, then an offset of 0 is correct. */
23151 /* ??? Check this is consistent with thumb2 frame layout. */
23152 if ((TARGET_THUMB || !frame_pointer_needed)
23153 && REGNO (addr) == SP_REGNUM)
23154 return 0;
23155
23156 /* Oh dear. The argument is pointed to by a register rather
23157 than being held in a register, or being stored at a known
23158 offset from the frame pointer. Since GDB only understands
23159 those two kinds of argument we must translate the address
23160 held in the register into an offset from the frame pointer.
23161 We do this by searching through the insns for the function
23162 looking to see where this register gets its value. If the
23163 register is initialized from the frame pointer plus an offset
23164 then we are in luck and we can continue, otherwise we give up.
23165
23166 This code is exercised by producing debugging information
23167 for a function with arguments like this:
23168
23169 double func (double a, double b, int c, double d) {return d;}
23170
23171 Without this code the stab for parameter 'd' will be set to
23172 an offset of 0 from the frame pointer, rather than 8. */
23173
23174 /* The if() statement says:
23175
23176 If the insn is a normal instruction
23177 and if the insn is setting the value in a register
23178 and if the register being set is the register holding the address of the argument
23179 and if the address is computing by an addition
23180 that involves adding to a register
23181 which is the frame pointer
23182 a constant integer
23183
23184 then... */
23185
23186 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23187 {
23188 if ( NONJUMP_INSN_P (insn)
23189 && GET_CODE (PATTERN (insn)) == SET
23190 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23191 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23192 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23193 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23194 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23195 )
23196 {
23197 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23198
23199 break;
23200 }
23201 }
23202
23203 if (value == 0)
23204 {
23205 debug_rtx (addr);
23206 warning (0, "unable to compute real location of stacked parameter");
23207 value = 8; /* XXX magic hack */
23208 }
23209
23210 return value;
23211 }
23212 \f
23213 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23214
23215 static const char *
23216 arm_invalid_parameter_type (const_tree t)
23217 {
23218 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23219 return N_("function parameters cannot have __fp16 type");
23220 return NULL;
23221 }
23222
23223 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23224
23225 static const char *
23226 arm_invalid_return_type (const_tree t)
23227 {
23228 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23229 return N_("functions cannot return __fp16 type");
23230 return NULL;
23231 }
23232
23233 /* Implement TARGET_PROMOTED_TYPE. */
23234
23235 static tree
23236 arm_promoted_type (const_tree t)
23237 {
23238 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23239 return float_type_node;
23240 return NULL_TREE;
23241 }
23242
23243 /* Implement TARGET_CONVERT_TO_TYPE.
23244 Specifically, this hook implements the peculiarity of the ARM
23245 half-precision floating-point C semantics that requires conversions between
23246 __fp16 to or from double to do an intermediate conversion to float. */
23247
23248 static tree
23249 arm_convert_to_type (tree type, tree expr)
23250 {
23251 tree fromtype = TREE_TYPE (expr);
23252 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23253 return NULL_TREE;
23254 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23255 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23256 return convert (type, convert (float_type_node, expr));
23257 return NULL_TREE;
23258 }
23259
23260 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23261 This simply adds HFmode as a supported mode; even though we don't
23262 implement arithmetic on this type directly, it's supported by
23263 optabs conversions, much the way the double-word arithmetic is
23264 special-cased in the default hook. */
23265
23266 static bool
23267 arm_scalar_mode_supported_p (machine_mode mode)
23268 {
23269 if (mode == HFmode)
23270 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23271 else if (ALL_FIXED_POINT_MODE_P (mode))
23272 return true;
23273 else
23274 return default_scalar_mode_supported_p (mode);
23275 }
23276
23277 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23278 void
23279 neon_reinterpret (rtx dest, rtx src)
23280 {
23281 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23282 }
23283
23284 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23285 not to early-clobber SRC registers in the process.
23286
23287 We assume that the operands described by SRC and DEST represent a
23288 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23289 number of components into which the copy has been decomposed. */
23290 void
23291 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23292 {
23293 unsigned int i;
23294
23295 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23296 || REGNO (operands[0]) < REGNO (operands[1]))
23297 {
23298 for (i = 0; i < count; i++)
23299 {
23300 operands[2 * i] = dest[i];
23301 operands[2 * i + 1] = src[i];
23302 }
23303 }
23304 else
23305 {
23306 for (i = 0; i < count; i++)
23307 {
23308 operands[2 * i] = dest[count - i - 1];
23309 operands[2 * i + 1] = src[count - i - 1];
23310 }
23311 }
23312 }
23313
23314 /* Split operands into moves from op[1] + op[2] into op[0]. */
23315
23316 void
23317 neon_split_vcombine (rtx operands[3])
23318 {
23319 unsigned int dest = REGNO (operands[0]);
23320 unsigned int src1 = REGNO (operands[1]);
23321 unsigned int src2 = REGNO (operands[2]);
23322 machine_mode halfmode = GET_MODE (operands[1]);
23323 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23324 rtx destlo, desthi;
23325
23326 if (src1 == dest && src2 == dest + halfregs)
23327 {
23328 /* No-op move. Can't split to nothing; emit something. */
23329 emit_note (NOTE_INSN_DELETED);
23330 return;
23331 }
23332
23333 /* Preserve register attributes for variable tracking. */
23334 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23335 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23336 GET_MODE_SIZE (halfmode));
23337
23338 /* Special case of reversed high/low parts. Use VSWP. */
23339 if (src2 == dest && src1 == dest + halfregs)
23340 {
23341 rtx x = gen_rtx_SET (destlo, operands[1]);
23342 rtx y = gen_rtx_SET (desthi, operands[2]);
23343 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23344 return;
23345 }
23346
23347 if (!reg_overlap_mentioned_p (operands[2], destlo))
23348 {
23349 /* Try to avoid unnecessary moves if part of the result
23350 is in the right place already. */
23351 if (src1 != dest)
23352 emit_move_insn (destlo, operands[1]);
23353 if (src2 != dest + halfregs)
23354 emit_move_insn (desthi, operands[2]);
23355 }
23356 else
23357 {
23358 if (src2 != dest + halfregs)
23359 emit_move_insn (desthi, operands[2]);
23360 if (src1 != dest)
23361 emit_move_insn (destlo, operands[1]);
23362 }
23363 }
23364 \f
23365 /* Return the number (counting from 0) of
23366 the least significant set bit in MASK. */
23367
23368 inline static int
23369 number_of_first_bit_set (unsigned mask)
23370 {
23371 return ctz_hwi (mask);
23372 }
23373
23374 /* Like emit_multi_reg_push, but allowing for a different set of
23375 registers to be described as saved. MASK is the set of registers
23376 to be saved; REAL_REGS is the set of registers to be described as
23377 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23378
23379 static rtx_insn *
23380 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23381 {
23382 unsigned long regno;
23383 rtx par[10], tmp, reg;
23384 rtx_insn *insn;
23385 int i, j;
23386
23387 /* Build the parallel of the registers actually being stored. */
23388 for (i = 0; mask; ++i, mask &= mask - 1)
23389 {
23390 regno = ctz_hwi (mask);
23391 reg = gen_rtx_REG (SImode, regno);
23392
23393 if (i == 0)
23394 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23395 else
23396 tmp = gen_rtx_USE (VOIDmode, reg);
23397
23398 par[i] = tmp;
23399 }
23400
23401 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23402 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23403 tmp = gen_frame_mem (BLKmode, tmp);
23404 tmp = gen_rtx_SET (tmp, par[0]);
23405 par[0] = tmp;
23406
23407 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23408 insn = emit_insn (tmp);
23409
23410 /* Always build the stack adjustment note for unwind info. */
23411 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23412 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23413 par[0] = tmp;
23414
23415 /* Build the parallel of the registers recorded as saved for unwind. */
23416 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23417 {
23418 regno = ctz_hwi (real_regs);
23419 reg = gen_rtx_REG (SImode, regno);
23420
23421 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23422 tmp = gen_frame_mem (SImode, tmp);
23423 tmp = gen_rtx_SET (tmp, reg);
23424 RTX_FRAME_RELATED_P (tmp) = 1;
23425 par[j + 1] = tmp;
23426 }
23427
23428 if (j == 0)
23429 tmp = par[0];
23430 else
23431 {
23432 RTX_FRAME_RELATED_P (par[0]) = 1;
23433 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23434 }
23435
23436 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23437
23438 return insn;
23439 }
23440
23441 /* Emit code to push or pop registers to or from the stack. F is the
23442 assembly file. MASK is the registers to pop. */
23443 static void
23444 thumb_pop (FILE *f, unsigned long mask)
23445 {
23446 int regno;
23447 int lo_mask = mask & 0xFF;
23448 int pushed_words = 0;
23449
23450 gcc_assert (mask);
23451
23452 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23453 {
23454 /* Special case. Do not generate a POP PC statement here, do it in
23455 thumb_exit() */
23456 thumb_exit (f, -1);
23457 return;
23458 }
23459
23460 fprintf (f, "\tpop\t{");
23461
23462 /* Look at the low registers first. */
23463 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23464 {
23465 if (lo_mask & 1)
23466 {
23467 asm_fprintf (f, "%r", regno);
23468
23469 if ((lo_mask & ~1) != 0)
23470 fprintf (f, ", ");
23471
23472 pushed_words++;
23473 }
23474 }
23475
23476 if (mask & (1 << PC_REGNUM))
23477 {
23478 /* Catch popping the PC. */
23479 if (TARGET_INTERWORK || TARGET_BACKTRACE
23480 || crtl->calls_eh_return)
23481 {
23482 /* The PC is never poped directly, instead
23483 it is popped into r3 and then BX is used. */
23484 fprintf (f, "}\n");
23485
23486 thumb_exit (f, -1);
23487
23488 return;
23489 }
23490 else
23491 {
23492 if (mask & 0xFF)
23493 fprintf (f, ", ");
23494
23495 asm_fprintf (f, "%r", PC_REGNUM);
23496 }
23497 }
23498
23499 fprintf (f, "}\n");
23500 }
23501
23502 /* Generate code to return from a thumb function.
23503 If 'reg_containing_return_addr' is -1, then the return address is
23504 actually on the stack, at the stack pointer. */
23505 static void
23506 thumb_exit (FILE *f, int reg_containing_return_addr)
23507 {
23508 unsigned regs_available_for_popping;
23509 unsigned regs_to_pop;
23510 int pops_needed;
23511 unsigned available;
23512 unsigned required;
23513 machine_mode mode;
23514 int size;
23515 int restore_a4 = FALSE;
23516
23517 /* Compute the registers we need to pop. */
23518 regs_to_pop = 0;
23519 pops_needed = 0;
23520
23521 if (reg_containing_return_addr == -1)
23522 {
23523 regs_to_pop |= 1 << LR_REGNUM;
23524 ++pops_needed;
23525 }
23526
23527 if (TARGET_BACKTRACE)
23528 {
23529 /* Restore the (ARM) frame pointer and stack pointer. */
23530 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23531 pops_needed += 2;
23532 }
23533
23534 /* If there is nothing to pop then just emit the BX instruction and
23535 return. */
23536 if (pops_needed == 0)
23537 {
23538 if (crtl->calls_eh_return)
23539 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23540
23541 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23542 return;
23543 }
23544 /* Otherwise if we are not supporting interworking and we have not created
23545 a backtrace structure and the function was not entered in ARM mode then
23546 just pop the return address straight into the PC. */
23547 else if (!TARGET_INTERWORK
23548 && !TARGET_BACKTRACE
23549 && !is_called_in_ARM_mode (current_function_decl)
23550 && !crtl->calls_eh_return)
23551 {
23552 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23553 return;
23554 }
23555
23556 /* Find out how many of the (return) argument registers we can corrupt. */
23557 regs_available_for_popping = 0;
23558
23559 /* If returning via __builtin_eh_return, the bottom three registers
23560 all contain information needed for the return. */
23561 if (crtl->calls_eh_return)
23562 size = 12;
23563 else
23564 {
23565 /* If we can deduce the registers used from the function's
23566 return value. This is more reliable that examining
23567 df_regs_ever_live_p () because that will be set if the register is
23568 ever used in the function, not just if the register is used
23569 to hold a return value. */
23570
23571 if (crtl->return_rtx != 0)
23572 mode = GET_MODE (crtl->return_rtx);
23573 else
23574 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23575
23576 size = GET_MODE_SIZE (mode);
23577
23578 if (size == 0)
23579 {
23580 /* In a void function we can use any argument register.
23581 In a function that returns a structure on the stack
23582 we can use the second and third argument registers. */
23583 if (mode == VOIDmode)
23584 regs_available_for_popping =
23585 (1 << ARG_REGISTER (1))
23586 | (1 << ARG_REGISTER (2))
23587 | (1 << ARG_REGISTER (3));
23588 else
23589 regs_available_for_popping =
23590 (1 << ARG_REGISTER (2))
23591 | (1 << ARG_REGISTER (3));
23592 }
23593 else if (size <= 4)
23594 regs_available_for_popping =
23595 (1 << ARG_REGISTER (2))
23596 | (1 << ARG_REGISTER (3));
23597 else if (size <= 8)
23598 regs_available_for_popping =
23599 (1 << ARG_REGISTER (3));
23600 }
23601
23602 /* Match registers to be popped with registers into which we pop them. */
23603 for (available = regs_available_for_popping,
23604 required = regs_to_pop;
23605 required != 0 && available != 0;
23606 available &= ~(available & - available),
23607 required &= ~(required & - required))
23608 -- pops_needed;
23609
23610 /* If we have any popping registers left over, remove them. */
23611 if (available > 0)
23612 regs_available_for_popping &= ~available;
23613
23614 /* Otherwise if we need another popping register we can use
23615 the fourth argument register. */
23616 else if (pops_needed)
23617 {
23618 /* If we have not found any free argument registers and
23619 reg a4 contains the return address, we must move it. */
23620 if (regs_available_for_popping == 0
23621 && reg_containing_return_addr == LAST_ARG_REGNUM)
23622 {
23623 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23624 reg_containing_return_addr = LR_REGNUM;
23625 }
23626 else if (size > 12)
23627 {
23628 /* Register a4 is being used to hold part of the return value,
23629 but we have dire need of a free, low register. */
23630 restore_a4 = TRUE;
23631
23632 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23633 }
23634
23635 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23636 {
23637 /* The fourth argument register is available. */
23638 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23639
23640 --pops_needed;
23641 }
23642 }
23643
23644 /* Pop as many registers as we can. */
23645 thumb_pop (f, regs_available_for_popping);
23646
23647 /* Process the registers we popped. */
23648 if (reg_containing_return_addr == -1)
23649 {
23650 /* The return address was popped into the lowest numbered register. */
23651 regs_to_pop &= ~(1 << LR_REGNUM);
23652
23653 reg_containing_return_addr =
23654 number_of_first_bit_set (regs_available_for_popping);
23655
23656 /* Remove this register for the mask of available registers, so that
23657 the return address will not be corrupted by further pops. */
23658 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23659 }
23660
23661 /* If we popped other registers then handle them here. */
23662 if (regs_available_for_popping)
23663 {
23664 int frame_pointer;
23665
23666 /* Work out which register currently contains the frame pointer. */
23667 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23668
23669 /* Move it into the correct place. */
23670 asm_fprintf (f, "\tmov\t%r, %r\n",
23671 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23672
23673 /* (Temporarily) remove it from the mask of popped registers. */
23674 regs_available_for_popping &= ~(1 << frame_pointer);
23675 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23676
23677 if (regs_available_for_popping)
23678 {
23679 int stack_pointer;
23680
23681 /* We popped the stack pointer as well,
23682 find the register that contains it. */
23683 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23684
23685 /* Move it into the stack register. */
23686 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23687
23688 /* At this point we have popped all necessary registers, so
23689 do not worry about restoring regs_available_for_popping
23690 to its correct value:
23691
23692 assert (pops_needed == 0)
23693 assert (regs_available_for_popping == (1 << frame_pointer))
23694 assert (regs_to_pop == (1 << STACK_POINTER)) */
23695 }
23696 else
23697 {
23698 /* Since we have just move the popped value into the frame
23699 pointer, the popping register is available for reuse, and
23700 we know that we still have the stack pointer left to pop. */
23701 regs_available_for_popping |= (1 << frame_pointer);
23702 }
23703 }
23704
23705 /* If we still have registers left on the stack, but we no longer have
23706 any registers into which we can pop them, then we must move the return
23707 address into the link register and make available the register that
23708 contained it. */
23709 if (regs_available_for_popping == 0 && pops_needed > 0)
23710 {
23711 regs_available_for_popping |= 1 << reg_containing_return_addr;
23712
23713 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23714 reg_containing_return_addr);
23715
23716 reg_containing_return_addr = LR_REGNUM;
23717 }
23718
23719 /* If we have registers left on the stack then pop some more.
23720 We know that at most we will want to pop FP and SP. */
23721 if (pops_needed > 0)
23722 {
23723 int popped_into;
23724 int move_to;
23725
23726 thumb_pop (f, regs_available_for_popping);
23727
23728 /* We have popped either FP or SP.
23729 Move whichever one it is into the correct register. */
23730 popped_into = number_of_first_bit_set (regs_available_for_popping);
23731 move_to = number_of_first_bit_set (regs_to_pop);
23732
23733 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23734
23735 regs_to_pop &= ~(1 << move_to);
23736
23737 --pops_needed;
23738 }
23739
23740 /* If we still have not popped everything then we must have only
23741 had one register available to us and we are now popping the SP. */
23742 if (pops_needed > 0)
23743 {
23744 int popped_into;
23745
23746 thumb_pop (f, regs_available_for_popping);
23747
23748 popped_into = number_of_first_bit_set (regs_available_for_popping);
23749
23750 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23751 /*
23752 assert (regs_to_pop == (1 << STACK_POINTER))
23753 assert (pops_needed == 1)
23754 */
23755 }
23756
23757 /* If necessary restore the a4 register. */
23758 if (restore_a4)
23759 {
23760 if (reg_containing_return_addr != LR_REGNUM)
23761 {
23762 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23763 reg_containing_return_addr = LR_REGNUM;
23764 }
23765
23766 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23767 }
23768
23769 if (crtl->calls_eh_return)
23770 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23771
23772 /* Return to caller. */
23773 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23774 }
23775 \f
23776 /* Scan INSN just before assembler is output for it.
23777 For Thumb-1, we track the status of the condition codes; this
23778 information is used in the cbranchsi4_insn pattern. */
23779 void
23780 thumb1_final_prescan_insn (rtx_insn *insn)
23781 {
23782 if (flag_print_asm_name)
23783 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23784 INSN_ADDRESSES (INSN_UID (insn)));
23785 /* Don't overwrite the previous setter when we get to a cbranch. */
23786 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23787 {
23788 enum attr_conds conds;
23789
23790 if (cfun->machine->thumb1_cc_insn)
23791 {
23792 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23793 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23794 CC_STATUS_INIT;
23795 }
23796 conds = get_attr_conds (insn);
23797 if (conds == CONDS_SET)
23798 {
23799 rtx set = single_set (insn);
23800 cfun->machine->thumb1_cc_insn = insn;
23801 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23802 cfun->machine->thumb1_cc_op1 = const0_rtx;
23803 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23804 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23805 {
23806 rtx src1 = XEXP (SET_SRC (set), 1);
23807 if (src1 == const0_rtx)
23808 cfun->machine->thumb1_cc_mode = CCmode;
23809 }
23810 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23811 {
23812 /* Record the src register operand instead of dest because
23813 cprop_hardreg pass propagates src. */
23814 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23815 }
23816 }
23817 else if (conds != CONDS_NOCOND)
23818 cfun->machine->thumb1_cc_insn = NULL_RTX;
23819 }
23820
23821 /* Check if unexpected far jump is used. */
23822 if (cfun->machine->lr_save_eliminated
23823 && get_attr_far_jump (insn) == FAR_JUMP_YES)
23824 internal_error("Unexpected thumb1 far jump");
23825 }
23826
23827 int
23828 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23829 {
23830 unsigned HOST_WIDE_INT mask = 0xff;
23831 int i;
23832
23833 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23834 if (val == 0) /* XXX */
23835 return 0;
23836
23837 for (i = 0; i < 25; i++)
23838 if ((val & (mask << i)) == val)
23839 return 1;
23840
23841 return 0;
23842 }
23843
23844 /* Returns nonzero if the current function contains,
23845 or might contain a far jump. */
23846 static int
23847 thumb_far_jump_used_p (void)
23848 {
23849 rtx_insn *insn;
23850 bool far_jump = false;
23851 unsigned int func_size = 0;
23852
23853 /* This test is only important for leaf functions. */
23854 /* assert (!leaf_function_p ()); */
23855
23856 /* If we have already decided that far jumps may be used,
23857 do not bother checking again, and always return true even if
23858 it turns out that they are not being used. Once we have made
23859 the decision that far jumps are present (and that hence the link
23860 register will be pushed onto the stack) we cannot go back on it. */
23861 if (cfun->machine->far_jump_used)
23862 return 1;
23863
23864 /* If this function is not being called from the prologue/epilogue
23865 generation code then it must be being called from the
23866 INITIAL_ELIMINATION_OFFSET macro. */
23867 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23868 {
23869 /* In this case we know that we are being asked about the elimination
23870 of the arg pointer register. If that register is not being used,
23871 then there are no arguments on the stack, and we do not have to
23872 worry that a far jump might force the prologue to push the link
23873 register, changing the stack offsets. In this case we can just
23874 return false, since the presence of far jumps in the function will
23875 not affect stack offsets.
23876
23877 If the arg pointer is live (or if it was live, but has now been
23878 eliminated and so set to dead) then we do have to test to see if
23879 the function might contain a far jump. This test can lead to some
23880 false negatives, since before reload is completed, then length of
23881 branch instructions is not known, so gcc defaults to returning their
23882 longest length, which in turn sets the far jump attribute to true.
23883
23884 A false negative will not result in bad code being generated, but it
23885 will result in a needless push and pop of the link register. We
23886 hope that this does not occur too often.
23887
23888 If we need doubleword stack alignment this could affect the other
23889 elimination offsets so we can't risk getting it wrong. */
23890 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23891 cfun->machine->arg_pointer_live = 1;
23892 else if (!cfun->machine->arg_pointer_live)
23893 return 0;
23894 }
23895
23896 /* We should not change far_jump_used during or after reload, as there is
23897 no chance to change stack frame layout. */
23898 if (reload_in_progress || reload_completed)
23899 return 0;
23900
23901 /* Check to see if the function contains a branch
23902 insn with the far jump attribute set. */
23903 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23904 {
23905 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
23906 {
23907 far_jump = true;
23908 }
23909 func_size += get_attr_length (insn);
23910 }
23911
23912 /* Attribute far_jump will always be true for thumb1 before
23913 shorten_branch pass. So checking far_jump attribute before
23914 shorten_branch isn't much useful.
23915
23916 Following heuristic tries to estimate more accurately if a far jump
23917 may finally be used. The heuristic is very conservative as there is
23918 no chance to roll-back the decision of not to use far jump.
23919
23920 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23921 2-byte insn is associated with a 4 byte constant pool. Using
23922 function size 2048/3 as the threshold is conservative enough. */
23923 if (far_jump)
23924 {
23925 if ((func_size * 3) >= 2048)
23926 {
23927 /* Record the fact that we have decided that
23928 the function does use far jumps. */
23929 cfun->machine->far_jump_used = 1;
23930 return 1;
23931 }
23932 }
23933
23934 return 0;
23935 }
23936
23937 /* Return nonzero if FUNC must be entered in ARM mode. */
23938 static bool
23939 is_called_in_ARM_mode (tree func)
23940 {
23941 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23942
23943 /* Ignore the problem about functions whose address is taken. */
23944 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23945 return true;
23946
23947 #ifdef ARM_PE
23948 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23949 #else
23950 return false;
23951 #endif
23952 }
23953
23954 /* Given the stack offsets and register mask in OFFSETS, decide how
23955 many additional registers to push instead of subtracting a constant
23956 from SP. For epilogues the principle is the same except we use pop.
23957 FOR_PROLOGUE indicates which we're generating. */
23958 static int
23959 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23960 {
23961 HOST_WIDE_INT amount;
23962 unsigned long live_regs_mask = offsets->saved_regs_mask;
23963 /* Extract a mask of the ones we can give to the Thumb's push/pop
23964 instruction. */
23965 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23966 /* Then count how many other high registers will need to be pushed. */
23967 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23968 int n_free, reg_base, size;
23969
23970 if (!for_prologue && frame_pointer_needed)
23971 amount = offsets->locals_base - offsets->saved_regs;
23972 else
23973 amount = offsets->outgoing_args - offsets->saved_regs;
23974
23975 /* If the stack frame size is 512 exactly, we can save one load
23976 instruction, which should make this a win even when optimizing
23977 for speed. */
23978 if (!optimize_size && amount != 512)
23979 return 0;
23980
23981 /* Can't do this if there are high registers to push. */
23982 if (high_regs_pushed != 0)
23983 return 0;
23984
23985 /* Shouldn't do it in the prologue if no registers would normally
23986 be pushed at all. In the epilogue, also allow it if we'll have
23987 a pop insn for the PC. */
23988 if (l_mask == 0
23989 && (for_prologue
23990 || TARGET_BACKTRACE
23991 || (live_regs_mask & 1 << LR_REGNUM) == 0
23992 || TARGET_INTERWORK
23993 || crtl->args.pretend_args_size != 0))
23994 return 0;
23995
23996 /* Don't do this if thumb_expand_prologue wants to emit instructions
23997 between the push and the stack frame allocation. */
23998 if (for_prologue
23999 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24000 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24001 return 0;
24002
24003 reg_base = 0;
24004 n_free = 0;
24005 if (!for_prologue)
24006 {
24007 size = arm_size_return_regs ();
24008 reg_base = ARM_NUM_INTS (size);
24009 live_regs_mask >>= reg_base;
24010 }
24011
24012 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24013 && (for_prologue || call_used_regs[reg_base + n_free]))
24014 {
24015 live_regs_mask >>= 1;
24016 n_free++;
24017 }
24018
24019 if (n_free == 0)
24020 return 0;
24021 gcc_assert (amount / 4 * 4 == amount);
24022
24023 if (amount >= 512 && (amount - n_free * 4) < 512)
24024 return (amount - 508) / 4;
24025 if (amount <= n_free * 4)
24026 return amount / 4;
24027 return 0;
24028 }
24029
24030 /* The bits which aren't usefully expanded as rtl. */
24031 const char *
24032 thumb1_unexpanded_epilogue (void)
24033 {
24034 arm_stack_offsets *offsets;
24035 int regno;
24036 unsigned long live_regs_mask = 0;
24037 int high_regs_pushed = 0;
24038 int extra_pop;
24039 int had_to_push_lr;
24040 int size;
24041
24042 if (cfun->machine->return_used_this_function != 0)
24043 return "";
24044
24045 if (IS_NAKED (arm_current_func_type ()))
24046 return "";
24047
24048 offsets = arm_get_frame_offsets ();
24049 live_regs_mask = offsets->saved_regs_mask;
24050 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24051
24052 /* If we can deduce the registers used from the function's return value.
24053 This is more reliable that examining df_regs_ever_live_p () because that
24054 will be set if the register is ever used in the function, not just if
24055 the register is used to hold a return value. */
24056 size = arm_size_return_regs ();
24057
24058 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24059 if (extra_pop > 0)
24060 {
24061 unsigned long extra_mask = (1 << extra_pop) - 1;
24062 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24063 }
24064
24065 /* The prolog may have pushed some high registers to use as
24066 work registers. e.g. the testsuite file:
24067 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24068 compiles to produce:
24069 push {r4, r5, r6, r7, lr}
24070 mov r7, r9
24071 mov r6, r8
24072 push {r6, r7}
24073 as part of the prolog. We have to undo that pushing here. */
24074
24075 if (high_regs_pushed)
24076 {
24077 unsigned long mask = live_regs_mask & 0xff;
24078 int next_hi_reg;
24079
24080 /* The available low registers depend on the size of the value we are
24081 returning. */
24082 if (size <= 12)
24083 mask |= 1 << 3;
24084 if (size <= 8)
24085 mask |= 1 << 2;
24086
24087 if (mask == 0)
24088 /* Oh dear! We have no low registers into which we can pop
24089 high registers! */
24090 internal_error
24091 ("no low registers available for popping high registers");
24092
24093 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24094 if (live_regs_mask & (1 << next_hi_reg))
24095 break;
24096
24097 while (high_regs_pushed)
24098 {
24099 /* Find lo register(s) into which the high register(s) can
24100 be popped. */
24101 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24102 {
24103 if (mask & (1 << regno))
24104 high_regs_pushed--;
24105 if (high_regs_pushed == 0)
24106 break;
24107 }
24108
24109 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24110
24111 /* Pop the values into the low register(s). */
24112 thumb_pop (asm_out_file, mask);
24113
24114 /* Move the value(s) into the high registers. */
24115 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24116 {
24117 if (mask & (1 << regno))
24118 {
24119 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24120 regno);
24121
24122 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24123 if (live_regs_mask & (1 << next_hi_reg))
24124 break;
24125 }
24126 }
24127 }
24128 live_regs_mask &= ~0x0f00;
24129 }
24130
24131 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24132 live_regs_mask &= 0xff;
24133
24134 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24135 {
24136 /* Pop the return address into the PC. */
24137 if (had_to_push_lr)
24138 live_regs_mask |= 1 << PC_REGNUM;
24139
24140 /* Either no argument registers were pushed or a backtrace
24141 structure was created which includes an adjusted stack
24142 pointer, so just pop everything. */
24143 if (live_regs_mask)
24144 thumb_pop (asm_out_file, live_regs_mask);
24145
24146 /* We have either just popped the return address into the
24147 PC or it is was kept in LR for the entire function.
24148 Note that thumb_pop has already called thumb_exit if the
24149 PC was in the list. */
24150 if (!had_to_push_lr)
24151 thumb_exit (asm_out_file, LR_REGNUM);
24152 }
24153 else
24154 {
24155 /* Pop everything but the return address. */
24156 if (live_regs_mask)
24157 thumb_pop (asm_out_file, live_regs_mask);
24158
24159 if (had_to_push_lr)
24160 {
24161 if (size > 12)
24162 {
24163 /* We have no free low regs, so save one. */
24164 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24165 LAST_ARG_REGNUM);
24166 }
24167
24168 /* Get the return address into a temporary register. */
24169 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24170
24171 if (size > 12)
24172 {
24173 /* Move the return address to lr. */
24174 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24175 LAST_ARG_REGNUM);
24176 /* Restore the low register. */
24177 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24178 IP_REGNUM);
24179 regno = LR_REGNUM;
24180 }
24181 else
24182 regno = LAST_ARG_REGNUM;
24183 }
24184 else
24185 regno = LR_REGNUM;
24186
24187 /* Remove the argument registers that were pushed onto the stack. */
24188 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24189 SP_REGNUM, SP_REGNUM,
24190 crtl->args.pretend_args_size);
24191
24192 thumb_exit (asm_out_file, regno);
24193 }
24194
24195 return "";
24196 }
24197
24198 /* Functions to save and restore machine-specific function data. */
24199 static struct machine_function *
24200 arm_init_machine_status (void)
24201 {
24202 struct machine_function *machine;
24203 machine = ggc_cleared_alloc<machine_function> ();
24204
24205 #if ARM_FT_UNKNOWN != 0
24206 machine->func_type = ARM_FT_UNKNOWN;
24207 #endif
24208 return machine;
24209 }
24210
24211 /* Return an RTX indicating where the return address to the
24212 calling function can be found. */
24213 rtx
24214 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24215 {
24216 if (count != 0)
24217 return NULL_RTX;
24218
24219 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24220 }
24221
24222 /* Do anything needed before RTL is emitted for each function. */
24223 void
24224 arm_init_expanders (void)
24225 {
24226 /* Arrange to initialize and mark the machine per-function status. */
24227 init_machine_status = arm_init_machine_status;
24228
24229 /* This is to stop the combine pass optimizing away the alignment
24230 adjustment of va_arg. */
24231 /* ??? It is claimed that this should not be necessary. */
24232 if (cfun)
24233 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24234 }
24235
24236 /* Check that FUNC is called with a different mode. */
24237
24238 bool
24239 arm_change_mode_p (tree func)
24240 {
24241 if (TREE_CODE (func) != FUNCTION_DECL)
24242 return false;
24243
24244 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24245
24246 if (!callee_tree)
24247 callee_tree = target_option_default_node;
24248
24249 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24250 int flags = callee_opts->x_target_flags;
24251
24252 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24253 }
24254
24255 /* Like arm_compute_initial_elimination offset. Simpler because there
24256 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24257 to point at the base of the local variables after static stack
24258 space for a function has been allocated. */
24259
24260 HOST_WIDE_INT
24261 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24262 {
24263 arm_stack_offsets *offsets;
24264
24265 offsets = arm_get_frame_offsets ();
24266
24267 switch (from)
24268 {
24269 case ARG_POINTER_REGNUM:
24270 switch (to)
24271 {
24272 case STACK_POINTER_REGNUM:
24273 return offsets->outgoing_args - offsets->saved_args;
24274
24275 case FRAME_POINTER_REGNUM:
24276 return offsets->soft_frame - offsets->saved_args;
24277
24278 case ARM_HARD_FRAME_POINTER_REGNUM:
24279 return offsets->saved_regs - offsets->saved_args;
24280
24281 case THUMB_HARD_FRAME_POINTER_REGNUM:
24282 return offsets->locals_base - offsets->saved_args;
24283
24284 default:
24285 gcc_unreachable ();
24286 }
24287 break;
24288
24289 case FRAME_POINTER_REGNUM:
24290 switch (to)
24291 {
24292 case STACK_POINTER_REGNUM:
24293 return offsets->outgoing_args - offsets->soft_frame;
24294
24295 case ARM_HARD_FRAME_POINTER_REGNUM:
24296 return offsets->saved_regs - offsets->soft_frame;
24297
24298 case THUMB_HARD_FRAME_POINTER_REGNUM:
24299 return offsets->locals_base - offsets->soft_frame;
24300
24301 default:
24302 gcc_unreachable ();
24303 }
24304 break;
24305
24306 default:
24307 gcc_unreachable ();
24308 }
24309 }
24310
24311 /* Generate the function's prologue. */
24312
24313 void
24314 thumb1_expand_prologue (void)
24315 {
24316 rtx_insn *insn;
24317
24318 HOST_WIDE_INT amount;
24319 arm_stack_offsets *offsets;
24320 unsigned long func_type;
24321 int regno;
24322 unsigned long live_regs_mask;
24323 unsigned long l_mask;
24324 unsigned high_regs_pushed = 0;
24325
24326 func_type = arm_current_func_type ();
24327
24328 /* Naked functions don't have prologues. */
24329 if (IS_NAKED (func_type))
24330 return;
24331
24332 if (IS_INTERRUPT (func_type))
24333 {
24334 error ("interrupt Service Routines cannot be coded in Thumb mode");
24335 return;
24336 }
24337
24338 if (is_called_in_ARM_mode (current_function_decl))
24339 emit_insn (gen_prologue_thumb1_interwork ());
24340
24341 offsets = arm_get_frame_offsets ();
24342 live_regs_mask = offsets->saved_regs_mask;
24343
24344 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24345 l_mask = live_regs_mask & 0x40ff;
24346 /* Then count how many other high registers will need to be pushed. */
24347 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24348
24349 if (crtl->args.pretend_args_size)
24350 {
24351 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24352
24353 if (cfun->machine->uses_anonymous_args)
24354 {
24355 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24356 unsigned long mask;
24357
24358 mask = 1ul << (LAST_ARG_REGNUM + 1);
24359 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24360
24361 insn = thumb1_emit_multi_reg_push (mask, 0);
24362 }
24363 else
24364 {
24365 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24366 stack_pointer_rtx, x));
24367 }
24368 RTX_FRAME_RELATED_P (insn) = 1;
24369 }
24370
24371 if (TARGET_BACKTRACE)
24372 {
24373 HOST_WIDE_INT offset = 0;
24374 unsigned work_register;
24375 rtx work_reg, x, arm_hfp_rtx;
24376
24377 /* We have been asked to create a stack backtrace structure.
24378 The code looks like this:
24379
24380 0 .align 2
24381 0 func:
24382 0 sub SP, #16 Reserve space for 4 registers.
24383 2 push {R7} Push low registers.
24384 4 add R7, SP, #20 Get the stack pointer before the push.
24385 6 str R7, [SP, #8] Store the stack pointer
24386 (before reserving the space).
24387 8 mov R7, PC Get hold of the start of this code + 12.
24388 10 str R7, [SP, #16] Store it.
24389 12 mov R7, FP Get hold of the current frame pointer.
24390 14 str R7, [SP, #4] Store it.
24391 16 mov R7, LR Get hold of the current return address.
24392 18 str R7, [SP, #12] Store it.
24393 20 add R7, SP, #16 Point at the start of the
24394 backtrace structure.
24395 22 mov FP, R7 Put this value into the frame pointer. */
24396
24397 work_register = thumb_find_work_register (live_regs_mask);
24398 work_reg = gen_rtx_REG (SImode, work_register);
24399 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24400
24401 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24402 stack_pointer_rtx, GEN_INT (-16)));
24403 RTX_FRAME_RELATED_P (insn) = 1;
24404
24405 if (l_mask)
24406 {
24407 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24408 RTX_FRAME_RELATED_P (insn) = 1;
24409
24410 offset = bit_count (l_mask) * UNITS_PER_WORD;
24411 }
24412
24413 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24414 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24415
24416 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24417 x = gen_frame_mem (SImode, x);
24418 emit_move_insn (x, work_reg);
24419
24420 /* Make sure that the instruction fetching the PC is in the right place
24421 to calculate "start of backtrace creation code + 12". */
24422 /* ??? The stores using the common WORK_REG ought to be enough to
24423 prevent the scheduler from doing anything weird. Failing that
24424 we could always move all of the following into an UNSPEC_VOLATILE. */
24425 if (l_mask)
24426 {
24427 x = gen_rtx_REG (SImode, PC_REGNUM);
24428 emit_move_insn (work_reg, x);
24429
24430 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24431 x = gen_frame_mem (SImode, x);
24432 emit_move_insn (x, work_reg);
24433
24434 emit_move_insn (work_reg, arm_hfp_rtx);
24435
24436 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24437 x = gen_frame_mem (SImode, x);
24438 emit_move_insn (x, work_reg);
24439 }
24440 else
24441 {
24442 emit_move_insn (work_reg, arm_hfp_rtx);
24443
24444 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24445 x = gen_frame_mem (SImode, x);
24446 emit_move_insn (x, work_reg);
24447
24448 x = gen_rtx_REG (SImode, PC_REGNUM);
24449 emit_move_insn (work_reg, x);
24450
24451 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24452 x = gen_frame_mem (SImode, x);
24453 emit_move_insn (x, work_reg);
24454 }
24455
24456 x = gen_rtx_REG (SImode, LR_REGNUM);
24457 emit_move_insn (work_reg, x);
24458
24459 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24460 x = gen_frame_mem (SImode, x);
24461 emit_move_insn (x, work_reg);
24462
24463 x = GEN_INT (offset + 12);
24464 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24465
24466 emit_move_insn (arm_hfp_rtx, work_reg);
24467 }
24468 /* Optimization: If we are not pushing any low registers but we are going
24469 to push some high registers then delay our first push. This will just
24470 be a push of LR and we can combine it with the push of the first high
24471 register. */
24472 else if ((l_mask & 0xff) != 0
24473 || (high_regs_pushed == 0 && l_mask))
24474 {
24475 unsigned long mask = l_mask;
24476 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24477 insn = thumb1_emit_multi_reg_push (mask, mask);
24478 RTX_FRAME_RELATED_P (insn) = 1;
24479 }
24480
24481 if (high_regs_pushed)
24482 {
24483 unsigned pushable_regs;
24484 unsigned next_hi_reg;
24485 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24486 : crtl->args.info.nregs;
24487 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24488
24489 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24490 if (live_regs_mask & (1 << next_hi_reg))
24491 break;
24492
24493 /* Here we need to mask out registers used for passing arguments
24494 even if they can be pushed. This is to avoid using them to stash the high
24495 registers. Such kind of stash may clobber the use of arguments. */
24496 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24497
24498 if (pushable_regs == 0)
24499 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24500
24501 while (high_regs_pushed > 0)
24502 {
24503 unsigned long real_regs_mask = 0;
24504
24505 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24506 {
24507 if (pushable_regs & (1 << regno))
24508 {
24509 emit_move_insn (gen_rtx_REG (SImode, regno),
24510 gen_rtx_REG (SImode, next_hi_reg));
24511
24512 high_regs_pushed --;
24513 real_regs_mask |= (1 << next_hi_reg);
24514
24515 if (high_regs_pushed)
24516 {
24517 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24518 next_hi_reg --)
24519 if (live_regs_mask & (1 << next_hi_reg))
24520 break;
24521 }
24522 else
24523 {
24524 pushable_regs &= ~((1 << regno) - 1);
24525 break;
24526 }
24527 }
24528 }
24529
24530 /* If we had to find a work register and we have not yet
24531 saved the LR then add it to the list of regs to push. */
24532 if (l_mask == (1 << LR_REGNUM))
24533 {
24534 pushable_regs |= l_mask;
24535 real_regs_mask |= l_mask;
24536 l_mask = 0;
24537 }
24538
24539 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24540 RTX_FRAME_RELATED_P (insn) = 1;
24541 }
24542 }
24543
24544 /* Load the pic register before setting the frame pointer,
24545 so we can use r7 as a temporary work register. */
24546 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24547 arm_load_pic_register (live_regs_mask);
24548
24549 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24550 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24551 stack_pointer_rtx);
24552
24553 if (flag_stack_usage_info)
24554 current_function_static_stack_size
24555 = offsets->outgoing_args - offsets->saved_args;
24556
24557 amount = offsets->outgoing_args - offsets->saved_regs;
24558 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24559 if (amount)
24560 {
24561 if (amount < 512)
24562 {
24563 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24564 GEN_INT (- amount)));
24565 RTX_FRAME_RELATED_P (insn) = 1;
24566 }
24567 else
24568 {
24569 rtx reg, dwarf;
24570
24571 /* The stack decrement is too big for an immediate value in a single
24572 insn. In theory we could issue multiple subtracts, but after
24573 three of them it becomes more space efficient to place the full
24574 value in the constant pool and load into a register. (Also the
24575 ARM debugger really likes to see only one stack decrement per
24576 function). So instead we look for a scratch register into which
24577 we can load the decrement, and then we subtract this from the
24578 stack pointer. Unfortunately on the thumb the only available
24579 scratch registers are the argument registers, and we cannot use
24580 these as they may hold arguments to the function. Instead we
24581 attempt to locate a call preserved register which is used by this
24582 function. If we can find one, then we know that it will have
24583 been pushed at the start of the prologue and so we can corrupt
24584 it now. */
24585 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24586 if (live_regs_mask & (1 << regno))
24587 break;
24588
24589 gcc_assert(regno <= LAST_LO_REGNUM);
24590
24591 reg = gen_rtx_REG (SImode, regno);
24592
24593 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24594
24595 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24596 stack_pointer_rtx, reg));
24597
24598 dwarf = gen_rtx_SET (stack_pointer_rtx,
24599 plus_constant (Pmode, stack_pointer_rtx,
24600 -amount));
24601 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24602 RTX_FRAME_RELATED_P (insn) = 1;
24603 }
24604 }
24605
24606 if (frame_pointer_needed)
24607 thumb_set_frame_pointer (offsets);
24608
24609 /* If we are profiling, make sure no instructions are scheduled before
24610 the call to mcount. Similarly if the user has requested no
24611 scheduling in the prolog. Similarly if we want non-call exceptions
24612 using the EABI unwinder, to prevent faulting instructions from being
24613 swapped with a stack adjustment. */
24614 if (crtl->profile || !TARGET_SCHED_PROLOG
24615 || (arm_except_unwind_info (&global_options) == UI_TARGET
24616 && cfun->can_throw_non_call_exceptions))
24617 emit_insn (gen_blockage ());
24618
24619 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24620 if (live_regs_mask & 0xff)
24621 cfun->machine->lr_save_eliminated = 0;
24622 }
24623
24624 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24625 POP instruction can be generated. LR should be replaced by PC. All
24626 the checks required are already done by USE_RETURN_INSN (). Hence,
24627 all we really need to check here is if single register is to be
24628 returned, or multiple register return. */
24629 void
24630 thumb2_expand_return (bool simple_return)
24631 {
24632 int i, num_regs;
24633 unsigned long saved_regs_mask;
24634 arm_stack_offsets *offsets;
24635
24636 offsets = arm_get_frame_offsets ();
24637 saved_regs_mask = offsets->saved_regs_mask;
24638
24639 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24640 if (saved_regs_mask & (1 << i))
24641 num_regs++;
24642
24643 if (!simple_return && saved_regs_mask)
24644 {
24645 if (num_regs == 1)
24646 {
24647 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24648 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24649 rtx addr = gen_rtx_MEM (SImode,
24650 gen_rtx_POST_INC (SImode,
24651 stack_pointer_rtx));
24652 set_mem_alias_set (addr, get_frame_alias_set ());
24653 XVECEXP (par, 0, 0) = ret_rtx;
24654 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
24655 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24656 emit_jump_insn (par);
24657 }
24658 else
24659 {
24660 saved_regs_mask &= ~ (1 << LR_REGNUM);
24661 saved_regs_mask |= (1 << PC_REGNUM);
24662 arm_emit_multi_reg_pop (saved_regs_mask);
24663 }
24664 }
24665 else
24666 {
24667 emit_jump_insn (simple_return_rtx);
24668 }
24669 }
24670
24671 void
24672 thumb1_expand_epilogue (void)
24673 {
24674 HOST_WIDE_INT amount;
24675 arm_stack_offsets *offsets;
24676 int regno;
24677
24678 /* Naked functions don't have prologues. */
24679 if (IS_NAKED (arm_current_func_type ()))
24680 return;
24681
24682 offsets = arm_get_frame_offsets ();
24683 amount = offsets->outgoing_args - offsets->saved_regs;
24684
24685 if (frame_pointer_needed)
24686 {
24687 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24688 amount = offsets->locals_base - offsets->saved_regs;
24689 }
24690 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24691
24692 gcc_assert (amount >= 0);
24693 if (amount)
24694 {
24695 emit_insn (gen_blockage ());
24696
24697 if (amount < 512)
24698 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24699 GEN_INT (amount)));
24700 else
24701 {
24702 /* r3 is always free in the epilogue. */
24703 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24704
24705 emit_insn (gen_movsi (reg, GEN_INT (amount)));
24706 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24707 }
24708 }
24709
24710 /* Emit a USE (stack_pointer_rtx), so that
24711 the stack adjustment will not be deleted. */
24712 emit_insn (gen_force_register_use (stack_pointer_rtx));
24713
24714 if (crtl->profile || !TARGET_SCHED_PROLOG)
24715 emit_insn (gen_blockage ());
24716
24717 /* Emit a clobber for each insn that will be restored in the epilogue,
24718 so that flow2 will get register lifetimes correct. */
24719 for (regno = 0; regno < 13; regno++)
24720 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24721 emit_clobber (gen_rtx_REG (SImode, regno));
24722
24723 if (! df_regs_ever_live_p (LR_REGNUM))
24724 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24725 }
24726
24727 /* Epilogue code for APCS frame. */
24728 static void
24729 arm_expand_epilogue_apcs_frame (bool really_return)
24730 {
24731 unsigned long func_type;
24732 unsigned long saved_regs_mask;
24733 int num_regs = 0;
24734 int i;
24735 int floats_from_frame = 0;
24736 arm_stack_offsets *offsets;
24737
24738 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24739 func_type = arm_current_func_type ();
24740
24741 /* Get frame offsets for ARM. */
24742 offsets = arm_get_frame_offsets ();
24743 saved_regs_mask = offsets->saved_regs_mask;
24744
24745 /* Find the offset of the floating-point save area in the frame. */
24746 floats_from_frame
24747 = (offsets->saved_args
24748 + arm_compute_static_chain_stack_bytes ()
24749 - offsets->frame);
24750
24751 /* Compute how many core registers saved and how far away the floats are. */
24752 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24753 if (saved_regs_mask & (1 << i))
24754 {
24755 num_regs++;
24756 floats_from_frame += 4;
24757 }
24758
24759 if (TARGET_HARD_FLOAT && TARGET_VFP)
24760 {
24761 int start_reg;
24762 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24763
24764 /* The offset is from IP_REGNUM. */
24765 int saved_size = arm_get_vfp_saved_size ();
24766 if (saved_size > 0)
24767 {
24768 rtx_insn *insn;
24769 floats_from_frame += saved_size;
24770 insn = emit_insn (gen_addsi3 (ip_rtx,
24771 hard_frame_pointer_rtx,
24772 GEN_INT (-floats_from_frame)));
24773 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24774 ip_rtx, hard_frame_pointer_rtx);
24775 }
24776
24777 /* Generate VFP register multi-pop. */
24778 start_reg = FIRST_VFP_REGNUM;
24779
24780 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24781 /* Look for a case where a reg does not need restoring. */
24782 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24783 && (!df_regs_ever_live_p (i + 1)
24784 || call_used_regs[i + 1]))
24785 {
24786 if (start_reg != i)
24787 arm_emit_vfp_multi_reg_pop (start_reg,
24788 (i - start_reg) / 2,
24789 gen_rtx_REG (SImode,
24790 IP_REGNUM));
24791 start_reg = i + 2;
24792 }
24793
24794 /* Restore the remaining regs that we have discovered (or possibly
24795 even all of them, if the conditional in the for loop never
24796 fired). */
24797 if (start_reg != i)
24798 arm_emit_vfp_multi_reg_pop (start_reg,
24799 (i - start_reg) / 2,
24800 gen_rtx_REG (SImode, IP_REGNUM));
24801 }
24802
24803 if (TARGET_IWMMXT)
24804 {
24805 /* The frame pointer is guaranteed to be non-double-word aligned, as
24806 it is set to double-word-aligned old_stack_pointer - 4. */
24807 rtx_insn *insn;
24808 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24809
24810 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24811 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24812 {
24813 rtx addr = gen_frame_mem (V2SImode,
24814 plus_constant (Pmode, hard_frame_pointer_rtx,
24815 - lrm_count * 4));
24816 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24817 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24818 gen_rtx_REG (V2SImode, i),
24819 NULL_RTX);
24820 lrm_count += 2;
24821 }
24822 }
24823
24824 /* saved_regs_mask should contain IP which contains old stack pointer
24825 at the time of activation creation. Since SP and IP are adjacent registers,
24826 we can restore the value directly into SP. */
24827 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24828 saved_regs_mask &= ~(1 << IP_REGNUM);
24829 saved_regs_mask |= (1 << SP_REGNUM);
24830
24831 /* There are two registers left in saved_regs_mask - LR and PC. We
24832 only need to restore LR (the return address), but to
24833 save time we can load it directly into PC, unless we need a
24834 special function exit sequence, or we are not really returning. */
24835 if (really_return
24836 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24837 && !crtl->calls_eh_return)
24838 /* Delete LR from the register mask, so that LR on
24839 the stack is loaded into the PC in the register mask. */
24840 saved_regs_mask &= ~(1 << LR_REGNUM);
24841 else
24842 saved_regs_mask &= ~(1 << PC_REGNUM);
24843
24844 num_regs = bit_count (saved_regs_mask);
24845 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24846 {
24847 rtx_insn *insn;
24848 emit_insn (gen_blockage ());
24849 /* Unwind the stack to just below the saved registers. */
24850 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24851 hard_frame_pointer_rtx,
24852 GEN_INT (- 4 * num_regs)));
24853
24854 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24855 stack_pointer_rtx, hard_frame_pointer_rtx);
24856 }
24857
24858 arm_emit_multi_reg_pop (saved_regs_mask);
24859
24860 if (IS_INTERRUPT (func_type))
24861 {
24862 /* Interrupt handlers will have pushed the
24863 IP onto the stack, so restore it now. */
24864 rtx_insn *insn;
24865 rtx addr = gen_rtx_MEM (SImode,
24866 gen_rtx_POST_INC (SImode,
24867 stack_pointer_rtx));
24868 set_mem_alias_set (addr, get_frame_alias_set ());
24869 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24870 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24871 gen_rtx_REG (SImode, IP_REGNUM),
24872 NULL_RTX);
24873 }
24874
24875 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24876 return;
24877
24878 if (crtl->calls_eh_return)
24879 emit_insn (gen_addsi3 (stack_pointer_rtx,
24880 stack_pointer_rtx,
24881 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24882
24883 if (IS_STACKALIGN (func_type))
24884 /* Restore the original stack pointer. Before prologue, the stack was
24885 realigned and the original stack pointer saved in r0. For details,
24886 see comment in arm_expand_prologue. */
24887 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
24888
24889 emit_jump_insn (simple_return_rtx);
24890 }
24891
24892 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24893 function is not a sibcall. */
24894 void
24895 arm_expand_epilogue (bool really_return)
24896 {
24897 unsigned long func_type;
24898 unsigned long saved_regs_mask;
24899 int num_regs = 0;
24900 int i;
24901 int amount;
24902 arm_stack_offsets *offsets;
24903
24904 func_type = arm_current_func_type ();
24905
24906 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24907 let output_return_instruction take care of instruction emission if any. */
24908 if (IS_NAKED (func_type)
24909 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24910 {
24911 if (really_return)
24912 emit_jump_insn (simple_return_rtx);
24913 return;
24914 }
24915
24916 /* If we are throwing an exception, then we really must be doing a
24917 return, so we can't tail-call. */
24918 gcc_assert (!crtl->calls_eh_return || really_return);
24919
24920 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24921 {
24922 arm_expand_epilogue_apcs_frame (really_return);
24923 return;
24924 }
24925
24926 /* Get frame offsets for ARM. */
24927 offsets = arm_get_frame_offsets ();
24928 saved_regs_mask = offsets->saved_regs_mask;
24929 num_regs = bit_count (saved_regs_mask);
24930
24931 if (frame_pointer_needed)
24932 {
24933 rtx_insn *insn;
24934 /* Restore stack pointer if necessary. */
24935 if (TARGET_ARM)
24936 {
24937 /* In ARM mode, frame pointer points to first saved register.
24938 Restore stack pointer to last saved register. */
24939 amount = offsets->frame - offsets->saved_regs;
24940
24941 /* Force out any pending memory operations that reference stacked data
24942 before stack de-allocation occurs. */
24943 emit_insn (gen_blockage ());
24944 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24945 hard_frame_pointer_rtx,
24946 GEN_INT (amount)));
24947 arm_add_cfa_adjust_cfa_note (insn, amount,
24948 stack_pointer_rtx,
24949 hard_frame_pointer_rtx);
24950
24951 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24952 deleted. */
24953 emit_insn (gen_force_register_use (stack_pointer_rtx));
24954 }
24955 else
24956 {
24957 /* In Thumb-2 mode, the frame pointer points to the last saved
24958 register. */
24959 amount = offsets->locals_base - offsets->saved_regs;
24960 if (amount)
24961 {
24962 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24963 hard_frame_pointer_rtx,
24964 GEN_INT (amount)));
24965 arm_add_cfa_adjust_cfa_note (insn, amount,
24966 hard_frame_pointer_rtx,
24967 hard_frame_pointer_rtx);
24968 }
24969
24970 /* Force out any pending memory operations that reference stacked data
24971 before stack de-allocation occurs. */
24972 emit_insn (gen_blockage ());
24973 insn = emit_insn (gen_movsi (stack_pointer_rtx,
24974 hard_frame_pointer_rtx));
24975 arm_add_cfa_adjust_cfa_note (insn, 0,
24976 stack_pointer_rtx,
24977 hard_frame_pointer_rtx);
24978 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24979 deleted. */
24980 emit_insn (gen_force_register_use (stack_pointer_rtx));
24981 }
24982 }
24983 else
24984 {
24985 /* Pop off outgoing args and local frame to adjust stack pointer to
24986 last saved register. */
24987 amount = offsets->outgoing_args - offsets->saved_regs;
24988 if (amount)
24989 {
24990 rtx_insn *tmp;
24991 /* Force out any pending memory operations that reference stacked data
24992 before stack de-allocation occurs. */
24993 emit_insn (gen_blockage ());
24994 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24995 stack_pointer_rtx,
24996 GEN_INT (amount)));
24997 arm_add_cfa_adjust_cfa_note (tmp, amount,
24998 stack_pointer_rtx, stack_pointer_rtx);
24999 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25000 not deleted. */
25001 emit_insn (gen_force_register_use (stack_pointer_rtx));
25002 }
25003 }
25004
25005 if (TARGET_HARD_FLOAT && TARGET_VFP)
25006 {
25007 /* Generate VFP register multi-pop. */
25008 int end_reg = LAST_VFP_REGNUM + 1;
25009
25010 /* Scan the registers in reverse order. We need to match
25011 any groupings made in the prologue and generate matching
25012 vldm operations. The need to match groups is because,
25013 unlike pop, vldm can only do consecutive regs. */
25014 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25015 /* Look for a case where a reg does not need restoring. */
25016 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25017 && (!df_regs_ever_live_p (i + 1)
25018 || call_used_regs[i + 1]))
25019 {
25020 /* Restore the regs discovered so far (from reg+2 to
25021 end_reg). */
25022 if (end_reg > i + 2)
25023 arm_emit_vfp_multi_reg_pop (i + 2,
25024 (end_reg - (i + 2)) / 2,
25025 stack_pointer_rtx);
25026 end_reg = i;
25027 }
25028
25029 /* Restore the remaining regs that we have discovered (or possibly
25030 even all of them, if the conditional in the for loop never
25031 fired). */
25032 if (end_reg > i + 2)
25033 arm_emit_vfp_multi_reg_pop (i + 2,
25034 (end_reg - (i + 2)) / 2,
25035 stack_pointer_rtx);
25036 }
25037
25038 if (TARGET_IWMMXT)
25039 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25040 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25041 {
25042 rtx_insn *insn;
25043 rtx addr = gen_rtx_MEM (V2SImode,
25044 gen_rtx_POST_INC (SImode,
25045 stack_pointer_rtx));
25046 set_mem_alias_set (addr, get_frame_alias_set ());
25047 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25048 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25049 gen_rtx_REG (V2SImode, i),
25050 NULL_RTX);
25051 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25052 stack_pointer_rtx, stack_pointer_rtx);
25053 }
25054
25055 if (saved_regs_mask)
25056 {
25057 rtx insn;
25058 bool return_in_pc = false;
25059
25060 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25061 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25062 && !IS_STACKALIGN (func_type)
25063 && really_return
25064 && crtl->args.pretend_args_size == 0
25065 && saved_regs_mask & (1 << LR_REGNUM)
25066 && !crtl->calls_eh_return)
25067 {
25068 saved_regs_mask &= ~(1 << LR_REGNUM);
25069 saved_regs_mask |= (1 << PC_REGNUM);
25070 return_in_pc = true;
25071 }
25072
25073 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25074 {
25075 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25076 if (saved_regs_mask & (1 << i))
25077 {
25078 rtx addr = gen_rtx_MEM (SImode,
25079 gen_rtx_POST_INC (SImode,
25080 stack_pointer_rtx));
25081 set_mem_alias_set (addr, get_frame_alias_set ());
25082
25083 if (i == PC_REGNUM)
25084 {
25085 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25086 XVECEXP (insn, 0, 0) = ret_rtx;
25087 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25088 addr);
25089 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25090 insn = emit_jump_insn (insn);
25091 }
25092 else
25093 {
25094 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25095 addr));
25096 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25097 gen_rtx_REG (SImode, i),
25098 NULL_RTX);
25099 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25100 stack_pointer_rtx,
25101 stack_pointer_rtx);
25102 }
25103 }
25104 }
25105 else
25106 {
25107 if (TARGET_LDRD
25108 && current_tune->prefer_ldrd_strd
25109 && !optimize_function_for_size_p (cfun))
25110 {
25111 if (TARGET_THUMB2)
25112 thumb2_emit_ldrd_pop (saved_regs_mask);
25113 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25114 arm_emit_ldrd_pop (saved_regs_mask);
25115 else
25116 arm_emit_multi_reg_pop (saved_regs_mask);
25117 }
25118 else
25119 arm_emit_multi_reg_pop (saved_regs_mask);
25120 }
25121
25122 if (return_in_pc)
25123 return;
25124 }
25125
25126 if (crtl->args.pretend_args_size)
25127 {
25128 int i, j;
25129 rtx dwarf = NULL_RTX;
25130 rtx_insn *tmp =
25131 emit_insn (gen_addsi3 (stack_pointer_rtx,
25132 stack_pointer_rtx,
25133 GEN_INT (crtl->args.pretend_args_size)));
25134
25135 RTX_FRAME_RELATED_P (tmp) = 1;
25136
25137 if (cfun->machine->uses_anonymous_args)
25138 {
25139 /* Restore pretend args. Refer arm_expand_prologue on how to save
25140 pretend_args in stack. */
25141 int num_regs = crtl->args.pretend_args_size / 4;
25142 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25143 for (j = 0, i = 0; j < num_regs; i++)
25144 if (saved_regs_mask & (1 << i))
25145 {
25146 rtx reg = gen_rtx_REG (SImode, i);
25147 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25148 j++;
25149 }
25150 REG_NOTES (tmp) = dwarf;
25151 }
25152 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25153 stack_pointer_rtx, stack_pointer_rtx);
25154 }
25155
25156 if (!really_return)
25157 return;
25158
25159 if (crtl->calls_eh_return)
25160 emit_insn (gen_addsi3 (stack_pointer_rtx,
25161 stack_pointer_rtx,
25162 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25163
25164 if (IS_STACKALIGN (func_type))
25165 /* Restore the original stack pointer. Before prologue, the stack was
25166 realigned and the original stack pointer saved in r0. For details,
25167 see comment in arm_expand_prologue. */
25168 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25169
25170 emit_jump_insn (simple_return_rtx);
25171 }
25172
25173 /* Implementation of insn prologue_thumb1_interwork. This is the first
25174 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25175
25176 const char *
25177 thumb1_output_interwork (void)
25178 {
25179 const char * name;
25180 FILE *f = asm_out_file;
25181
25182 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25183 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25184 == SYMBOL_REF);
25185 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25186
25187 /* Generate code sequence to switch us into Thumb mode. */
25188 /* The .code 32 directive has already been emitted by
25189 ASM_DECLARE_FUNCTION_NAME. */
25190 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25191 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25192
25193 /* Generate a label, so that the debugger will notice the
25194 change in instruction sets. This label is also used by
25195 the assembler to bypass the ARM code when this function
25196 is called from a Thumb encoded function elsewhere in the
25197 same file. Hence the definition of STUB_NAME here must
25198 agree with the definition in gas/config/tc-arm.c. */
25199
25200 #define STUB_NAME ".real_start_of"
25201
25202 fprintf (f, "\t.code\t16\n");
25203 #ifdef ARM_PE
25204 if (arm_dllexport_name_p (name))
25205 name = arm_strip_name_encoding (name);
25206 #endif
25207 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25208 fprintf (f, "\t.thumb_func\n");
25209 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25210
25211 return "";
25212 }
25213
25214 /* Handle the case of a double word load into a low register from
25215 a computed memory address. The computed address may involve a
25216 register which is overwritten by the load. */
25217 const char *
25218 thumb_load_double_from_address (rtx *operands)
25219 {
25220 rtx addr;
25221 rtx base;
25222 rtx offset;
25223 rtx arg1;
25224 rtx arg2;
25225
25226 gcc_assert (REG_P (operands[0]));
25227 gcc_assert (MEM_P (operands[1]));
25228
25229 /* Get the memory address. */
25230 addr = XEXP (operands[1], 0);
25231
25232 /* Work out how the memory address is computed. */
25233 switch (GET_CODE (addr))
25234 {
25235 case REG:
25236 operands[2] = adjust_address (operands[1], SImode, 4);
25237
25238 if (REGNO (operands[0]) == REGNO (addr))
25239 {
25240 output_asm_insn ("ldr\t%H0, %2", operands);
25241 output_asm_insn ("ldr\t%0, %1", operands);
25242 }
25243 else
25244 {
25245 output_asm_insn ("ldr\t%0, %1", operands);
25246 output_asm_insn ("ldr\t%H0, %2", operands);
25247 }
25248 break;
25249
25250 case CONST:
25251 /* Compute <address> + 4 for the high order load. */
25252 operands[2] = adjust_address (operands[1], SImode, 4);
25253
25254 output_asm_insn ("ldr\t%0, %1", operands);
25255 output_asm_insn ("ldr\t%H0, %2", operands);
25256 break;
25257
25258 case PLUS:
25259 arg1 = XEXP (addr, 0);
25260 arg2 = XEXP (addr, 1);
25261
25262 if (CONSTANT_P (arg1))
25263 base = arg2, offset = arg1;
25264 else
25265 base = arg1, offset = arg2;
25266
25267 gcc_assert (REG_P (base));
25268
25269 /* Catch the case of <address> = <reg> + <reg> */
25270 if (REG_P (offset))
25271 {
25272 int reg_offset = REGNO (offset);
25273 int reg_base = REGNO (base);
25274 int reg_dest = REGNO (operands[0]);
25275
25276 /* Add the base and offset registers together into the
25277 higher destination register. */
25278 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25279 reg_dest + 1, reg_base, reg_offset);
25280
25281 /* Load the lower destination register from the address in
25282 the higher destination register. */
25283 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25284 reg_dest, reg_dest + 1);
25285
25286 /* Load the higher destination register from its own address
25287 plus 4. */
25288 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25289 reg_dest + 1, reg_dest + 1);
25290 }
25291 else
25292 {
25293 /* Compute <address> + 4 for the high order load. */
25294 operands[2] = adjust_address (operands[1], SImode, 4);
25295
25296 /* If the computed address is held in the low order register
25297 then load the high order register first, otherwise always
25298 load the low order register first. */
25299 if (REGNO (operands[0]) == REGNO (base))
25300 {
25301 output_asm_insn ("ldr\t%H0, %2", operands);
25302 output_asm_insn ("ldr\t%0, %1", operands);
25303 }
25304 else
25305 {
25306 output_asm_insn ("ldr\t%0, %1", operands);
25307 output_asm_insn ("ldr\t%H0, %2", operands);
25308 }
25309 }
25310 break;
25311
25312 case LABEL_REF:
25313 /* With no registers to worry about we can just load the value
25314 directly. */
25315 operands[2] = adjust_address (operands[1], SImode, 4);
25316
25317 output_asm_insn ("ldr\t%H0, %2", operands);
25318 output_asm_insn ("ldr\t%0, %1", operands);
25319 break;
25320
25321 default:
25322 gcc_unreachable ();
25323 }
25324
25325 return "";
25326 }
25327
25328 const char *
25329 thumb_output_move_mem_multiple (int n, rtx *operands)
25330 {
25331 rtx tmp;
25332
25333 switch (n)
25334 {
25335 case 2:
25336 if (REGNO (operands[4]) > REGNO (operands[5]))
25337 {
25338 tmp = operands[4];
25339 operands[4] = operands[5];
25340 operands[5] = tmp;
25341 }
25342 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25343 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25344 break;
25345
25346 case 3:
25347 if (REGNO (operands[4]) > REGNO (operands[5]))
25348 std::swap (operands[4], operands[5]);
25349 if (REGNO (operands[5]) > REGNO (operands[6]))
25350 std::swap (operands[5], operands[6]);
25351 if (REGNO (operands[4]) > REGNO (operands[5]))
25352 std::swap (operands[4], operands[5]);
25353
25354 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25355 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25356 break;
25357
25358 default:
25359 gcc_unreachable ();
25360 }
25361
25362 return "";
25363 }
25364
25365 /* Output a call-via instruction for thumb state. */
25366 const char *
25367 thumb_call_via_reg (rtx reg)
25368 {
25369 int regno = REGNO (reg);
25370 rtx *labelp;
25371
25372 gcc_assert (regno < LR_REGNUM);
25373
25374 /* If we are in the normal text section we can use a single instance
25375 per compilation unit. If we are doing function sections, then we need
25376 an entry per section, since we can't rely on reachability. */
25377 if (in_section == text_section)
25378 {
25379 thumb_call_reg_needed = 1;
25380
25381 if (thumb_call_via_label[regno] == NULL)
25382 thumb_call_via_label[regno] = gen_label_rtx ();
25383 labelp = thumb_call_via_label + regno;
25384 }
25385 else
25386 {
25387 if (cfun->machine->call_via[regno] == NULL)
25388 cfun->machine->call_via[regno] = gen_label_rtx ();
25389 labelp = cfun->machine->call_via + regno;
25390 }
25391
25392 output_asm_insn ("bl\t%a0", labelp);
25393 return "";
25394 }
25395
25396 /* Routines for generating rtl. */
25397 void
25398 thumb_expand_movmemqi (rtx *operands)
25399 {
25400 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25401 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25402 HOST_WIDE_INT len = INTVAL (operands[2]);
25403 HOST_WIDE_INT offset = 0;
25404
25405 while (len >= 12)
25406 {
25407 emit_insn (gen_movmem12b (out, in, out, in));
25408 len -= 12;
25409 }
25410
25411 if (len >= 8)
25412 {
25413 emit_insn (gen_movmem8b (out, in, out, in));
25414 len -= 8;
25415 }
25416
25417 if (len >= 4)
25418 {
25419 rtx reg = gen_reg_rtx (SImode);
25420 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25421 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25422 len -= 4;
25423 offset += 4;
25424 }
25425
25426 if (len >= 2)
25427 {
25428 rtx reg = gen_reg_rtx (HImode);
25429 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25430 plus_constant (Pmode, in,
25431 offset))));
25432 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25433 offset)),
25434 reg));
25435 len -= 2;
25436 offset += 2;
25437 }
25438
25439 if (len)
25440 {
25441 rtx reg = gen_reg_rtx (QImode);
25442 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25443 plus_constant (Pmode, in,
25444 offset))));
25445 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25446 offset)),
25447 reg));
25448 }
25449 }
25450
25451 void
25452 thumb_reload_out_hi (rtx *operands)
25453 {
25454 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25455 }
25456
25457 /* Handle reading a half-word from memory during reload. */
25458 void
25459 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25460 {
25461 gcc_unreachable ();
25462 }
25463
25464 /* Return the length of a function name prefix
25465 that starts with the character 'c'. */
25466 static int
25467 arm_get_strip_length (int c)
25468 {
25469 switch (c)
25470 {
25471 ARM_NAME_ENCODING_LENGTHS
25472 default: return 0;
25473 }
25474 }
25475
25476 /* Return a pointer to a function's name with any
25477 and all prefix encodings stripped from it. */
25478 const char *
25479 arm_strip_name_encoding (const char *name)
25480 {
25481 int skip;
25482
25483 while ((skip = arm_get_strip_length (* name)))
25484 name += skip;
25485
25486 return name;
25487 }
25488
25489 /* If there is a '*' anywhere in the name's prefix, then
25490 emit the stripped name verbatim, otherwise prepend an
25491 underscore if leading underscores are being used. */
25492 void
25493 arm_asm_output_labelref (FILE *stream, const char *name)
25494 {
25495 int skip;
25496 int verbatim = 0;
25497
25498 while ((skip = arm_get_strip_length (* name)))
25499 {
25500 verbatim |= (*name == '*');
25501 name += skip;
25502 }
25503
25504 if (verbatim)
25505 fputs (name, stream);
25506 else
25507 asm_fprintf (stream, "%U%s", name);
25508 }
25509
25510 /* This function is used to emit an EABI tag and its associated value.
25511 We emit the numerical value of the tag in case the assembler does not
25512 support textual tags. (Eg gas prior to 2.20). If requested we include
25513 the tag name in a comment so that anyone reading the assembler output
25514 will know which tag is being set.
25515
25516 This function is not static because arm-c.c needs it too. */
25517
25518 void
25519 arm_emit_eabi_attribute (const char *name, int num, int val)
25520 {
25521 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25522 if (flag_verbose_asm || flag_debug_asm)
25523 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25524 asm_fprintf (asm_out_file, "\n");
25525 }
25526
25527 /* This function is used to print CPU tuning information as comment
25528 in assembler file. Pointers are not printed for now. */
25529
25530 void
25531 arm_print_tune_info (void)
25532 {
25533 asm_fprintf (asm_out_file, "\t@.tune parameters\n");
25534 asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
25535 current_tune->constant_limit);
25536 asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
25537 current_tune->max_insns_skipped);
25538 asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n",
25539 current_tune->prefetch.num_slots);
25540 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n",
25541 current_tune->prefetch.l1_cache_size);
25542 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25543 current_tune->prefetch.l1_cache_line_size);
25544 asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
25545 (int) current_tune->prefer_constant_pool);
25546 asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25547 asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
25548 asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
25549 current_tune->branch_cost (false, false));
25550 asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
25551 current_tune->branch_cost (false, true));
25552 asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
25553 current_tune->branch_cost (true, false));
25554 asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
25555 current_tune->branch_cost (true, true));
25556 asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
25557 (int) current_tune->prefer_ldrd_strd);
25558 asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25559 (int) current_tune->logical_op_non_short_circuit_thumb,
25560 (int) current_tune->logical_op_non_short_circuit_arm);
25561 asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
25562 (int) current_tune->prefer_neon_for_64bits);
25563 asm_fprintf (asm_out_file,
25564 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25565 (int) current_tune->disparage_flag_setting_t16_encodings);
25566 asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
25567 (int) current_tune->string_ops_prefer_neon);
25568 asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
25569 current_tune->max_insns_inline_memset);
25570 asm_fprintf (asm_out_file, "\t\t@fusible_ops:\t%u\n",
25571 current_tune->fusible_ops);
25572 asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
25573 (int) current_tune->sched_autopref);
25574 }
25575
25576 static void
25577 arm_file_start (void)
25578 {
25579 int val;
25580
25581 if (TARGET_BPABI)
25582 {
25583 const char *fpu_name;
25584 if (arm_selected_arch)
25585 {
25586 /* armv7ve doesn't support any extensions. */
25587 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25588 {
25589 /* Keep backward compatability for assemblers
25590 which don't support armv7ve. */
25591 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25592 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25593 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25594 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25595 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25596 }
25597 else
25598 {
25599 const char* pos = strchr (arm_selected_arch->name, '+');
25600 if (pos)
25601 {
25602 char buf[15];
25603 gcc_assert (strlen (arm_selected_arch->name)
25604 <= sizeof (buf) / sizeof (*pos));
25605 strncpy (buf, arm_selected_arch->name,
25606 (pos - arm_selected_arch->name) * sizeof (*pos));
25607 buf[pos - arm_selected_arch->name] = '\0';
25608 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25609 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25610 }
25611 else
25612 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25613 }
25614 }
25615 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25616 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25617 else
25618 {
25619 const char* truncated_name
25620 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25621 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25622 }
25623
25624 if (print_tune_info)
25625 arm_print_tune_info ();
25626
25627 if (TARGET_SOFT_FLOAT)
25628 {
25629 fpu_name = "softvfp";
25630 }
25631 else
25632 {
25633 fpu_name = arm_fpu_desc->name;
25634 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25635 {
25636 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
25637 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25638
25639 if (TARGET_HARD_FLOAT_ABI)
25640 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25641 }
25642 }
25643 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25644
25645 /* Some of these attributes only apply when the corresponding features
25646 are used. However we don't have any easy way of figuring this out.
25647 Conservatively record the setting that would have been used. */
25648
25649 if (flag_rounding_math)
25650 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25651
25652 if (!flag_unsafe_math_optimizations)
25653 {
25654 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25655 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25656 }
25657 if (flag_signaling_nans)
25658 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25659
25660 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25661 flag_finite_math_only ? 1 : 3);
25662
25663 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25664 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25665 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25666 flag_short_enums ? 1 : 2);
25667
25668 /* Tag_ABI_optimization_goals. */
25669 if (optimize_size)
25670 val = 4;
25671 else if (optimize >= 2)
25672 val = 2;
25673 else if (optimize)
25674 val = 1;
25675 else
25676 val = 6;
25677 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25678
25679 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25680 unaligned_access);
25681
25682 if (arm_fp16_format)
25683 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25684 (int) arm_fp16_format);
25685
25686 if (arm_lang_output_object_attributes_hook)
25687 arm_lang_output_object_attributes_hook();
25688 }
25689
25690 default_file_start ();
25691 }
25692
25693 static void
25694 arm_file_end (void)
25695 {
25696 int regno;
25697
25698 if (NEED_INDICATE_EXEC_STACK)
25699 /* Add .note.GNU-stack. */
25700 file_end_indicate_exec_stack ();
25701
25702 if (! thumb_call_reg_needed)
25703 return;
25704
25705 switch_to_section (text_section);
25706 asm_fprintf (asm_out_file, "\t.code 16\n");
25707 ASM_OUTPUT_ALIGN (asm_out_file, 1);
25708
25709 for (regno = 0; regno < LR_REGNUM; regno++)
25710 {
25711 rtx label = thumb_call_via_label[regno];
25712
25713 if (label != 0)
25714 {
25715 targetm.asm_out.internal_label (asm_out_file, "L",
25716 CODE_LABEL_NUMBER (label));
25717 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25718 }
25719 }
25720 }
25721
25722 #ifndef ARM_PE
25723 /* Symbols in the text segment can be accessed without indirecting via the
25724 constant pool; it may take an extra binary operation, but this is still
25725 faster than indirecting via memory. Don't do this when not optimizing,
25726 since we won't be calculating al of the offsets necessary to do this
25727 simplification. */
25728
25729 static void
25730 arm_encode_section_info (tree decl, rtx rtl, int first)
25731 {
25732 if (optimize > 0 && TREE_CONSTANT (decl))
25733 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25734
25735 default_encode_section_info (decl, rtl, first);
25736 }
25737 #endif /* !ARM_PE */
25738
25739 static void
25740 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25741 {
25742 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25743 && !strcmp (prefix, "L"))
25744 {
25745 arm_ccfsm_state = 0;
25746 arm_target_insn = NULL;
25747 }
25748 default_internal_label (stream, prefix, labelno);
25749 }
25750
25751 /* Output code to add DELTA to the first argument, and then jump
25752 to FUNCTION. Used for C++ multiple inheritance. */
25753 static void
25754 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25755 HOST_WIDE_INT delta,
25756 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25757 tree function)
25758 {
25759 static int thunk_label = 0;
25760 char label[256];
25761 char labelpc[256];
25762 int mi_delta = delta;
25763 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25764 int shift = 0;
25765 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25766 ? 1 : 0);
25767 if (mi_delta < 0)
25768 mi_delta = - mi_delta;
25769
25770 final_start_function (emit_barrier (), file, 1);
25771
25772 if (TARGET_THUMB1)
25773 {
25774 int labelno = thunk_label++;
25775 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25776 /* Thunks are entered in arm mode when avaiable. */
25777 if (TARGET_THUMB1_ONLY)
25778 {
25779 /* push r3 so we can use it as a temporary. */
25780 /* TODO: Omit this save if r3 is not used. */
25781 fputs ("\tpush {r3}\n", file);
25782 fputs ("\tldr\tr3, ", file);
25783 }
25784 else
25785 {
25786 fputs ("\tldr\tr12, ", file);
25787 }
25788 assemble_name (file, label);
25789 fputc ('\n', file);
25790 if (flag_pic)
25791 {
25792 /* If we are generating PIC, the ldr instruction below loads
25793 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25794 the address of the add + 8, so we have:
25795
25796 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25797 = target + 1.
25798
25799 Note that we have "+ 1" because some versions of GNU ld
25800 don't set the low bit of the result for R_ARM_REL32
25801 relocations against thumb function symbols.
25802 On ARMv6M this is +4, not +8. */
25803 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25804 assemble_name (file, labelpc);
25805 fputs (":\n", file);
25806 if (TARGET_THUMB1_ONLY)
25807 {
25808 /* This is 2 insns after the start of the thunk, so we know it
25809 is 4-byte aligned. */
25810 fputs ("\tadd\tr3, pc, r3\n", file);
25811 fputs ("\tmov r12, r3\n", file);
25812 }
25813 else
25814 fputs ("\tadd\tr12, pc, r12\n", file);
25815 }
25816 else if (TARGET_THUMB1_ONLY)
25817 fputs ("\tmov r12, r3\n", file);
25818 }
25819 if (TARGET_THUMB1_ONLY)
25820 {
25821 if (mi_delta > 255)
25822 {
25823 fputs ("\tldr\tr3, ", file);
25824 assemble_name (file, label);
25825 fputs ("+4\n", file);
25826 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25827 mi_op, this_regno, this_regno);
25828 }
25829 else if (mi_delta != 0)
25830 {
25831 /* Thumb1 unified syntax requires s suffix in instruction name when
25832 one of the operands is immediate. */
25833 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25834 mi_op, this_regno, this_regno,
25835 mi_delta);
25836 }
25837 }
25838 else
25839 {
25840 /* TODO: Use movw/movt for large constants when available. */
25841 while (mi_delta != 0)
25842 {
25843 if ((mi_delta & (3 << shift)) == 0)
25844 shift += 2;
25845 else
25846 {
25847 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25848 mi_op, this_regno, this_regno,
25849 mi_delta & (0xff << shift));
25850 mi_delta &= ~(0xff << shift);
25851 shift += 8;
25852 }
25853 }
25854 }
25855 if (TARGET_THUMB1)
25856 {
25857 if (TARGET_THUMB1_ONLY)
25858 fputs ("\tpop\t{r3}\n", file);
25859
25860 fprintf (file, "\tbx\tr12\n");
25861 ASM_OUTPUT_ALIGN (file, 2);
25862 assemble_name (file, label);
25863 fputs (":\n", file);
25864 if (flag_pic)
25865 {
25866 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25867 rtx tem = XEXP (DECL_RTL (function), 0);
25868 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25869 pipeline offset is four rather than eight. Adjust the offset
25870 accordingly. */
25871 tem = plus_constant (GET_MODE (tem), tem,
25872 TARGET_THUMB1_ONLY ? -3 : -7);
25873 tem = gen_rtx_MINUS (GET_MODE (tem),
25874 tem,
25875 gen_rtx_SYMBOL_REF (Pmode,
25876 ggc_strdup (labelpc)));
25877 assemble_integer (tem, 4, BITS_PER_WORD, 1);
25878 }
25879 else
25880 /* Output ".word .LTHUNKn". */
25881 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25882
25883 if (TARGET_THUMB1_ONLY && mi_delta > 255)
25884 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25885 }
25886 else
25887 {
25888 fputs ("\tb\t", file);
25889 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25890 if (NEED_PLT_RELOC)
25891 fputs ("(PLT)", file);
25892 fputc ('\n', file);
25893 }
25894
25895 final_end_function ();
25896 }
25897
25898 int
25899 arm_emit_vector_const (FILE *file, rtx x)
25900 {
25901 int i;
25902 const char * pattern;
25903
25904 gcc_assert (GET_CODE (x) == CONST_VECTOR);
25905
25906 switch (GET_MODE (x))
25907 {
25908 case V2SImode: pattern = "%08x"; break;
25909 case V4HImode: pattern = "%04x"; break;
25910 case V8QImode: pattern = "%02x"; break;
25911 default: gcc_unreachable ();
25912 }
25913
25914 fprintf (file, "0x");
25915 for (i = CONST_VECTOR_NUNITS (x); i--;)
25916 {
25917 rtx element;
25918
25919 element = CONST_VECTOR_ELT (x, i);
25920 fprintf (file, pattern, INTVAL (element));
25921 }
25922
25923 return 1;
25924 }
25925
25926 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25927 HFmode constant pool entries are actually loaded with ldr. */
25928 void
25929 arm_emit_fp16_const (rtx c)
25930 {
25931 REAL_VALUE_TYPE r;
25932 long bits;
25933
25934 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25935 bits = real_to_target (NULL, &r, HFmode);
25936 if (WORDS_BIG_ENDIAN)
25937 assemble_zeros (2);
25938 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25939 if (!WORDS_BIG_ENDIAN)
25940 assemble_zeros (2);
25941 }
25942
25943 const char *
25944 arm_output_load_gr (rtx *operands)
25945 {
25946 rtx reg;
25947 rtx offset;
25948 rtx wcgr;
25949 rtx sum;
25950
25951 if (!MEM_P (operands [1])
25952 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25953 || !REG_P (reg = XEXP (sum, 0))
25954 || !CONST_INT_P (offset = XEXP (sum, 1))
25955 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25956 return "wldrw%?\t%0, %1";
25957
25958 /* Fix up an out-of-range load of a GR register. */
25959 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25960 wcgr = operands[0];
25961 operands[0] = reg;
25962 output_asm_insn ("ldr%?\t%0, %1", operands);
25963
25964 operands[0] = wcgr;
25965 operands[1] = reg;
25966 output_asm_insn ("tmcr%?\t%0, %1", operands);
25967 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25968
25969 return "";
25970 }
25971
25972 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25973
25974 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25975 named arg and all anonymous args onto the stack.
25976 XXX I know the prologue shouldn't be pushing registers, but it is faster
25977 that way. */
25978
25979 static void
25980 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25981 machine_mode mode,
25982 tree type,
25983 int *pretend_size,
25984 int second_time ATTRIBUTE_UNUSED)
25985 {
25986 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25987 int nregs;
25988
25989 cfun->machine->uses_anonymous_args = 1;
25990 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
25991 {
25992 nregs = pcum->aapcs_ncrn;
25993 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
25994 nregs++;
25995 }
25996 else
25997 nregs = pcum->nregs;
25998
25999 if (nregs < NUM_ARG_REGS)
26000 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26001 }
26002
26003 /* We can't rely on the caller doing the proper promotion when
26004 using APCS or ATPCS. */
26005
26006 static bool
26007 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26008 {
26009 return !TARGET_AAPCS_BASED;
26010 }
26011
26012 static machine_mode
26013 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26014 machine_mode mode,
26015 int *punsignedp ATTRIBUTE_UNUSED,
26016 const_tree fntype ATTRIBUTE_UNUSED,
26017 int for_return ATTRIBUTE_UNUSED)
26018 {
26019 if (GET_MODE_CLASS (mode) == MODE_INT
26020 && GET_MODE_SIZE (mode) < 4)
26021 return SImode;
26022
26023 return mode;
26024 }
26025
26026 /* AAPCS based ABIs use short enums by default. */
26027
26028 static bool
26029 arm_default_short_enums (void)
26030 {
26031 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26032 }
26033
26034
26035 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26036
26037 static bool
26038 arm_align_anon_bitfield (void)
26039 {
26040 return TARGET_AAPCS_BASED;
26041 }
26042
26043
26044 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26045
26046 static tree
26047 arm_cxx_guard_type (void)
26048 {
26049 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26050 }
26051
26052
26053 /* The EABI says test the least significant bit of a guard variable. */
26054
26055 static bool
26056 arm_cxx_guard_mask_bit (void)
26057 {
26058 return TARGET_AAPCS_BASED;
26059 }
26060
26061
26062 /* The EABI specifies that all array cookies are 8 bytes long. */
26063
26064 static tree
26065 arm_get_cookie_size (tree type)
26066 {
26067 tree size;
26068
26069 if (!TARGET_AAPCS_BASED)
26070 return default_cxx_get_cookie_size (type);
26071
26072 size = build_int_cst (sizetype, 8);
26073 return size;
26074 }
26075
26076
26077 /* The EABI says that array cookies should also contain the element size. */
26078
26079 static bool
26080 arm_cookie_has_size (void)
26081 {
26082 return TARGET_AAPCS_BASED;
26083 }
26084
26085
26086 /* The EABI says constructors and destructors should return a pointer to
26087 the object constructed/destroyed. */
26088
26089 static bool
26090 arm_cxx_cdtor_returns_this (void)
26091 {
26092 return TARGET_AAPCS_BASED;
26093 }
26094
26095 /* The EABI says that an inline function may never be the key
26096 method. */
26097
26098 static bool
26099 arm_cxx_key_method_may_be_inline (void)
26100 {
26101 return !TARGET_AAPCS_BASED;
26102 }
26103
26104 static void
26105 arm_cxx_determine_class_data_visibility (tree decl)
26106 {
26107 if (!TARGET_AAPCS_BASED
26108 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26109 return;
26110
26111 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26112 is exported. However, on systems without dynamic vague linkage,
26113 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26114 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26115 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26116 else
26117 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26118 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26119 }
26120
26121 static bool
26122 arm_cxx_class_data_always_comdat (void)
26123 {
26124 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26125 vague linkage if the class has no key function. */
26126 return !TARGET_AAPCS_BASED;
26127 }
26128
26129
26130 /* The EABI says __aeabi_atexit should be used to register static
26131 destructors. */
26132
26133 static bool
26134 arm_cxx_use_aeabi_atexit (void)
26135 {
26136 return TARGET_AAPCS_BASED;
26137 }
26138
26139
26140 void
26141 arm_set_return_address (rtx source, rtx scratch)
26142 {
26143 arm_stack_offsets *offsets;
26144 HOST_WIDE_INT delta;
26145 rtx addr;
26146 unsigned long saved_regs;
26147
26148 offsets = arm_get_frame_offsets ();
26149 saved_regs = offsets->saved_regs_mask;
26150
26151 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26152 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26153 else
26154 {
26155 if (frame_pointer_needed)
26156 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26157 else
26158 {
26159 /* LR will be the first saved register. */
26160 delta = offsets->outgoing_args - (offsets->frame + 4);
26161
26162
26163 if (delta >= 4096)
26164 {
26165 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26166 GEN_INT (delta & ~4095)));
26167 addr = scratch;
26168 delta &= 4095;
26169 }
26170 else
26171 addr = stack_pointer_rtx;
26172
26173 addr = plus_constant (Pmode, addr, delta);
26174 }
26175 /* The store needs to be marked as frame related in order to prevent
26176 DSE from deleting it as dead if it is based on fp. */
26177 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26178 RTX_FRAME_RELATED_P (insn) = 1;
26179 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26180 }
26181 }
26182
26183
26184 void
26185 thumb_set_return_address (rtx source, rtx scratch)
26186 {
26187 arm_stack_offsets *offsets;
26188 HOST_WIDE_INT delta;
26189 HOST_WIDE_INT limit;
26190 int reg;
26191 rtx addr;
26192 unsigned long mask;
26193
26194 emit_use (source);
26195
26196 offsets = arm_get_frame_offsets ();
26197 mask = offsets->saved_regs_mask;
26198 if (mask & (1 << LR_REGNUM))
26199 {
26200 limit = 1024;
26201 /* Find the saved regs. */
26202 if (frame_pointer_needed)
26203 {
26204 delta = offsets->soft_frame - offsets->saved_args;
26205 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26206 if (TARGET_THUMB1)
26207 limit = 128;
26208 }
26209 else
26210 {
26211 delta = offsets->outgoing_args - offsets->saved_args;
26212 reg = SP_REGNUM;
26213 }
26214 /* Allow for the stack frame. */
26215 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26216 delta -= 16;
26217 /* The link register is always the first saved register. */
26218 delta -= 4;
26219
26220 /* Construct the address. */
26221 addr = gen_rtx_REG (SImode, reg);
26222 if (delta > limit)
26223 {
26224 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26225 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26226 addr = scratch;
26227 }
26228 else
26229 addr = plus_constant (Pmode, addr, delta);
26230
26231 /* The store needs to be marked as frame related in order to prevent
26232 DSE from deleting it as dead if it is based on fp. */
26233 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26234 RTX_FRAME_RELATED_P (insn) = 1;
26235 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26236 }
26237 else
26238 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26239 }
26240
26241 /* Implements target hook vector_mode_supported_p. */
26242 bool
26243 arm_vector_mode_supported_p (machine_mode mode)
26244 {
26245 /* Neon also supports V2SImode, etc. listed in the clause below. */
26246 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26247 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26248 return true;
26249
26250 if ((TARGET_NEON || TARGET_IWMMXT)
26251 && ((mode == V2SImode)
26252 || (mode == V4HImode)
26253 || (mode == V8QImode)))
26254 return true;
26255
26256 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26257 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26258 || mode == V2HAmode))
26259 return true;
26260
26261 return false;
26262 }
26263
26264 /* Implements target hook array_mode_supported_p. */
26265
26266 static bool
26267 arm_array_mode_supported_p (machine_mode mode,
26268 unsigned HOST_WIDE_INT nelems)
26269 {
26270 if (TARGET_NEON
26271 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26272 && (nelems >= 2 && nelems <= 4))
26273 return true;
26274
26275 return false;
26276 }
26277
26278 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26279 registers when autovectorizing for Neon, at least until multiple vector
26280 widths are supported properly by the middle-end. */
26281
26282 static machine_mode
26283 arm_preferred_simd_mode (machine_mode mode)
26284 {
26285 if (TARGET_NEON)
26286 switch (mode)
26287 {
26288 case SFmode:
26289 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26290 case SImode:
26291 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26292 case HImode:
26293 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26294 case QImode:
26295 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26296 case DImode:
26297 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26298 return V2DImode;
26299 break;
26300
26301 default:;
26302 }
26303
26304 if (TARGET_REALLY_IWMMXT)
26305 switch (mode)
26306 {
26307 case SImode:
26308 return V2SImode;
26309 case HImode:
26310 return V4HImode;
26311 case QImode:
26312 return V8QImode;
26313
26314 default:;
26315 }
26316
26317 return word_mode;
26318 }
26319
26320 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26321
26322 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26323 using r0-r4 for function arguments, r7 for the stack frame and don't have
26324 enough left over to do doubleword arithmetic. For Thumb-2 all the
26325 potentially problematic instructions accept high registers so this is not
26326 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26327 that require many low registers. */
26328 static bool
26329 arm_class_likely_spilled_p (reg_class_t rclass)
26330 {
26331 if ((TARGET_THUMB1 && rclass == LO_REGS)
26332 || rclass == CC_REG)
26333 return true;
26334
26335 return false;
26336 }
26337
26338 /* Implements target hook small_register_classes_for_mode_p. */
26339 bool
26340 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26341 {
26342 return TARGET_THUMB1;
26343 }
26344
26345 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26346 ARM insns and therefore guarantee that the shift count is modulo 256.
26347 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26348 guarantee no particular behavior for out-of-range counts. */
26349
26350 static unsigned HOST_WIDE_INT
26351 arm_shift_truncation_mask (machine_mode mode)
26352 {
26353 return mode == SImode ? 255 : 0;
26354 }
26355
26356
26357 /* Map internal gcc register numbers to DWARF2 register numbers. */
26358
26359 unsigned int
26360 arm_dbx_register_number (unsigned int regno)
26361 {
26362 if (regno < 16)
26363 return regno;
26364
26365 if (IS_VFP_REGNUM (regno))
26366 {
26367 /* See comment in arm_dwarf_register_span. */
26368 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26369 return 64 + regno - FIRST_VFP_REGNUM;
26370 else
26371 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26372 }
26373
26374 if (IS_IWMMXT_GR_REGNUM (regno))
26375 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26376
26377 if (IS_IWMMXT_REGNUM (regno))
26378 return 112 + regno - FIRST_IWMMXT_REGNUM;
26379
26380 gcc_unreachable ();
26381 }
26382
26383 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26384 GCC models tham as 64 32-bit registers, so we need to describe this to
26385 the DWARF generation code. Other registers can use the default. */
26386 static rtx
26387 arm_dwarf_register_span (rtx rtl)
26388 {
26389 machine_mode mode;
26390 unsigned regno;
26391 rtx parts[16];
26392 int nregs;
26393 int i;
26394
26395 regno = REGNO (rtl);
26396 if (!IS_VFP_REGNUM (regno))
26397 return NULL_RTX;
26398
26399 /* XXX FIXME: The EABI defines two VFP register ranges:
26400 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26401 256-287: D0-D31
26402 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26403 corresponding D register. Until GDB supports this, we shall use the
26404 legacy encodings. We also use these encodings for D0-D15 for
26405 compatibility with older debuggers. */
26406 mode = GET_MODE (rtl);
26407 if (GET_MODE_SIZE (mode) < 8)
26408 return NULL_RTX;
26409
26410 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26411 {
26412 nregs = GET_MODE_SIZE (mode) / 4;
26413 for (i = 0; i < nregs; i += 2)
26414 if (TARGET_BIG_END)
26415 {
26416 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26417 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26418 }
26419 else
26420 {
26421 parts[i] = gen_rtx_REG (SImode, regno + i);
26422 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26423 }
26424 }
26425 else
26426 {
26427 nregs = GET_MODE_SIZE (mode) / 8;
26428 for (i = 0; i < nregs; i++)
26429 parts[i] = gen_rtx_REG (DImode, regno + i);
26430 }
26431
26432 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26433 }
26434
26435 #if ARM_UNWIND_INFO
26436 /* Emit unwind directives for a store-multiple instruction or stack pointer
26437 push during alignment.
26438 These should only ever be generated by the function prologue code, so
26439 expect them to have a particular form.
26440 The store-multiple instruction sometimes pushes pc as the last register,
26441 although it should not be tracked into unwind information, or for -Os
26442 sometimes pushes some dummy registers before first register that needs
26443 to be tracked in unwind information; such dummy registers are there just
26444 to avoid separate stack adjustment, and will not be restored in the
26445 epilogue. */
26446
26447 static void
26448 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26449 {
26450 int i;
26451 HOST_WIDE_INT offset;
26452 HOST_WIDE_INT nregs;
26453 int reg_size;
26454 unsigned reg;
26455 unsigned lastreg;
26456 unsigned padfirst = 0, padlast = 0;
26457 rtx e;
26458
26459 e = XVECEXP (p, 0, 0);
26460 gcc_assert (GET_CODE (e) == SET);
26461
26462 /* First insn will adjust the stack pointer. */
26463 gcc_assert (GET_CODE (e) == SET
26464 && REG_P (SET_DEST (e))
26465 && REGNO (SET_DEST (e)) == SP_REGNUM
26466 && GET_CODE (SET_SRC (e)) == PLUS);
26467
26468 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26469 nregs = XVECLEN (p, 0) - 1;
26470 gcc_assert (nregs);
26471
26472 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26473 if (reg < 16)
26474 {
26475 /* For -Os dummy registers can be pushed at the beginning to
26476 avoid separate stack pointer adjustment. */
26477 e = XVECEXP (p, 0, 1);
26478 e = XEXP (SET_DEST (e), 0);
26479 if (GET_CODE (e) == PLUS)
26480 padfirst = INTVAL (XEXP (e, 1));
26481 gcc_assert (padfirst == 0 || optimize_size);
26482 /* The function prologue may also push pc, but not annotate it as it is
26483 never restored. We turn this into a stack pointer adjustment. */
26484 e = XVECEXP (p, 0, nregs);
26485 e = XEXP (SET_DEST (e), 0);
26486 if (GET_CODE (e) == PLUS)
26487 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26488 else
26489 padlast = offset - 4;
26490 gcc_assert (padlast == 0 || padlast == 4);
26491 if (padlast == 4)
26492 fprintf (asm_out_file, "\t.pad #4\n");
26493 reg_size = 4;
26494 fprintf (asm_out_file, "\t.save {");
26495 }
26496 else if (IS_VFP_REGNUM (reg))
26497 {
26498 reg_size = 8;
26499 fprintf (asm_out_file, "\t.vsave {");
26500 }
26501 else
26502 /* Unknown register type. */
26503 gcc_unreachable ();
26504
26505 /* If the stack increment doesn't match the size of the saved registers,
26506 something has gone horribly wrong. */
26507 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26508
26509 offset = padfirst;
26510 lastreg = 0;
26511 /* The remaining insns will describe the stores. */
26512 for (i = 1; i <= nregs; i++)
26513 {
26514 /* Expect (set (mem <addr>) (reg)).
26515 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26516 e = XVECEXP (p, 0, i);
26517 gcc_assert (GET_CODE (e) == SET
26518 && MEM_P (SET_DEST (e))
26519 && REG_P (SET_SRC (e)));
26520
26521 reg = REGNO (SET_SRC (e));
26522 gcc_assert (reg >= lastreg);
26523
26524 if (i != 1)
26525 fprintf (asm_out_file, ", ");
26526 /* We can't use %r for vfp because we need to use the
26527 double precision register names. */
26528 if (IS_VFP_REGNUM (reg))
26529 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26530 else
26531 asm_fprintf (asm_out_file, "%r", reg);
26532
26533 #ifdef ENABLE_CHECKING
26534 /* Check that the addresses are consecutive. */
26535 e = XEXP (SET_DEST (e), 0);
26536 if (GET_CODE (e) == PLUS)
26537 gcc_assert (REG_P (XEXP (e, 0))
26538 && REGNO (XEXP (e, 0)) == SP_REGNUM
26539 && CONST_INT_P (XEXP (e, 1))
26540 && offset == INTVAL (XEXP (e, 1)));
26541 else
26542 gcc_assert (i == 1
26543 && REG_P (e)
26544 && REGNO (e) == SP_REGNUM);
26545 offset += reg_size;
26546 #endif
26547 }
26548 fprintf (asm_out_file, "}\n");
26549 if (padfirst)
26550 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26551 }
26552
26553 /* Emit unwind directives for a SET. */
26554
26555 static void
26556 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26557 {
26558 rtx e0;
26559 rtx e1;
26560 unsigned reg;
26561
26562 e0 = XEXP (p, 0);
26563 e1 = XEXP (p, 1);
26564 switch (GET_CODE (e0))
26565 {
26566 case MEM:
26567 /* Pushing a single register. */
26568 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26569 || !REG_P (XEXP (XEXP (e0, 0), 0))
26570 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26571 abort ();
26572
26573 asm_fprintf (asm_out_file, "\t.save ");
26574 if (IS_VFP_REGNUM (REGNO (e1)))
26575 asm_fprintf(asm_out_file, "{d%d}\n",
26576 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26577 else
26578 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26579 break;
26580
26581 case REG:
26582 if (REGNO (e0) == SP_REGNUM)
26583 {
26584 /* A stack increment. */
26585 if (GET_CODE (e1) != PLUS
26586 || !REG_P (XEXP (e1, 0))
26587 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26588 || !CONST_INT_P (XEXP (e1, 1)))
26589 abort ();
26590
26591 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26592 -INTVAL (XEXP (e1, 1)));
26593 }
26594 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26595 {
26596 HOST_WIDE_INT offset;
26597
26598 if (GET_CODE (e1) == PLUS)
26599 {
26600 if (!REG_P (XEXP (e1, 0))
26601 || !CONST_INT_P (XEXP (e1, 1)))
26602 abort ();
26603 reg = REGNO (XEXP (e1, 0));
26604 offset = INTVAL (XEXP (e1, 1));
26605 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26606 HARD_FRAME_POINTER_REGNUM, reg,
26607 offset);
26608 }
26609 else if (REG_P (e1))
26610 {
26611 reg = REGNO (e1);
26612 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26613 HARD_FRAME_POINTER_REGNUM, reg);
26614 }
26615 else
26616 abort ();
26617 }
26618 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26619 {
26620 /* Move from sp to reg. */
26621 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26622 }
26623 else if (GET_CODE (e1) == PLUS
26624 && REG_P (XEXP (e1, 0))
26625 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26626 && CONST_INT_P (XEXP (e1, 1)))
26627 {
26628 /* Set reg to offset from sp. */
26629 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26630 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26631 }
26632 else
26633 abort ();
26634 break;
26635
26636 default:
26637 abort ();
26638 }
26639 }
26640
26641
26642 /* Emit unwind directives for the given insn. */
26643
26644 static void
26645 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26646 {
26647 rtx note, pat;
26648 bool handled_one = false;
26649
26650 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26651 return;
26652
26653 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26654 && (TREE_NOTHROW (current_function_decl)
26655 || crtl->all_throwers_are_sibcalls))
26656 return;
26657
26658 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26659 return;
26660
26661 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26662 {
26663 switch (REG_NOTE_KIND (note))
26664 {
26665 case REG_FRAME_RELATED_EXPR:
26666 pat = XEXP (note, 0);
26667 goto found;
26668
26669 case REG_CFA_REGISTER:
26670 pat = XEXP (note, 0);
26671 if (pat == NULL)
26672 {
26673 pat = PATTERN (insn);
26674 if (GET_CODE (pat) == PARALLEL)
26675 pat = XVECEXP (pat, 0, 0);
26676 }
26677
26678 /* Only emitted for IS_STACKALIGN re-alignment. */
26679 {
26680 rtx dest, src;
26681 unsigned reg;
26682
26683 src = SET_SRC (pat);
26684 dest = SET_DEST (pat);
26685
26686 gcc_assert (src == stack_pointer_rtx);
26687 reg = REGNO (dest);
26688 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26689 reg + 0x90, reg);
26690 }
26691 handled_one = true;
26692 break;
26693
26694 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26695 to get correct dwarf information for shrink-wrap. We should not
26696 emit unwind information for it because these are used either for
26697 pretend arguments or notes to adjust sp and restore registers from
26698 stack. */
26699 case REG_CFA_DEF_CFA:
26700 case REG_CFA_ADJUST_CFA:
26701 case REG_CFA_RESTORE:
26702 return;
26703
26704 case REG_CFA_EXPRESSION:
26705 case REG_CFA_OFFSET:
26706 /* ??? Only handling here what we actually emit. */
26707 gcc_unreachable ();
26708
26709 default:
26710 break;
26711 }
26712 }
26713 if (handled_one)
26714 return;
26715 pat = PATTERN (insn);
26716 found:
26717
26718 switch (GET_CODE (pat))
26719 {
26720 case SET:
26721 arm_unwind_emit_set (asm_out_file, pat);
26722 break;
26723
26724 case SEQUENCE:
26725 /* Store multiple. */
26726 arm_unwind_emit_sequence (asm_out_file, pat);
26727 break;
26728
26729 default:
26730 abort();
26731 }
26732 }
26733
26734
26735 /* Output a reference from a function exception table to the type_info
26736 object X. The EABI specifies that the symbol should be relocated by
26737 an R_ARM_TARGET2 relocation. */
26738
26739 static bool
26740 arm_output_ttype (rtx x)
26741 {
26742 fputs ("\t.word\t", asm_out_file);
26743 output_addr_const (asm_out_file, x);
26744 /* Use special relocations for symbol references. */
26745 if (!CONST_INT_P (x))
26746 fputs ("(TARGET2)", asm_out_file);
26747 fputc ('\n', asm_out_file);
26748
26749 return TRUE;
26750 }
26751
26752 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26753
26754 static void
26755 arm_asm_emit_except_personality (rtx personality)
26756 {
26757 fputs ("\t.personality\t", asm_out_file);
26758 output_addr_const (asm_out_file, personality);
26759 fputc ('\n', asm_out_file);
26760 }
26761
26762 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26763
26764 static void
26765 arm_asm_init_sections (void)
26766 {
26767 exception_section = get_unnamed_section (0, output_section_asm_op,
26768 "\t.handlerdata");
26769 }
26770 #endif /* ARM_UNWIND_INFO */
26771
26772 /* Output unwind directives for the start/end of a function. */
26773
26774 void
26775 arm_output_fn_unwind (FILE * f, bool prologue)
26776 {
26777 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26778 return;
26779
26780 if (prologue)
26781 fputs ("\t.fnstart\n", f);
26782 else
26783 {
26784 /* If this function will never be unwound, then mark it as such.
26785 The came condition is used in arm_unwind_emit to suppress
26786 the frame annotations. */
26787 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26788 && (TREE_NOTHROW (current_function_decl)
26789 || crtl->all_throwers_are_sibcalls))
26790 fputs("\t.cantunwind\n", f);
26791
26792 fputs ("\t.fnend\n", f);
26793 }
26794 }
26795
26796 static bool
26797 arm_emit_tls_decoration (FILE *fp, rtx x)
26798 {
26799 enum tls_reloc reloc;
26800 rtx val;
26801
26802 val = XVECEXP (x, 0, 0);
26803 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26804
26805 output_addr_const (fp, val);
26806
26807 switch (reloc)
26808 {
26809 case TLS_GD32:
26810 fputs ("(tlsgd)", fp);
26811 break;
26812 case TLS_LDM32:
26813 fputs ("(tlsldm)", fp);
26814 break;
26815 case TLS_LDO32:
26816 fputs ("(tlsldo)", fp);
26817 break;
26818 case TLS_IE32:
26819 fputs ("(gottpoff)", fp);
26820 break;
26821 case TLS_LE32:
26822 fputs ("(tpoff)", fp);
26823 break;
26824 case TLS_DESCSEQ:
26825 fputs ("(tlsdesc)", fp);
26826 break;
26827 default:
26828 gcc_unreachable ();
26829 }
26830
26831 switch (reloc)
26832 {
26833 case TLS_GD32:
26834 case TLS_LDM32:
26835 case TLS_IE32:
26836 case TLS_DESCSEQ:
26837 fputs (" + (. - ", fp);
26838 output_addr_const (fp, XVECEXP (x, 0, 2));
26839 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26840 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26841 output_addr_const (fp, XVECEXP (x, 0, 3));
26842 fputc (')', fp);
26843 break;
26844 default:
26845 break;
26846 }
26847
26848 return TRUE;
26849 }
26850
26851 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26852
26853 static void
26854 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26855 {
26856 gcc_assert (size == 4);
26857 fputs ("\t.word\t", file);
26858 output_addr_const (file, x);
26859 fputs ("(tlsldo)", file);
26860 }
26861
26862 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26863
26864 static bool
26865 arm_output_addr_const_extra (FILE *fp, rtx x)
26866 {
26867 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26868 return arm_emit_tls_decoration (fp, x);
26869 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26870 {
26871 char label[256];
26872 int labelno = INTVAL (XVECEXP (x, 0, 0));
26873
26874 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26875 assemble_name_raw (fp, label);
26876
26877 return TRUE;
26878 }
26879 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26880 {
26881 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26882 if (GOT_PCREL)
26883 fputs ("+.", fp);
26884 fputs ("-(", fp);
26885 output_addr_const (fp, XVECEXP (x, 0, 0));
26886 fputc (')', fp);
26887 return TRUE;
26888 }
26889 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26890 {
26891 output_addr_const (fp, XVECEXP (x, 0, 0));
26892 if (GOT_PCREL)
26893 fputs ("+.", fp);
26894 fputs ("-(", fp);
26895 output_addr_const (fp, XVECEXP (x, 0, 1));
26896 fputc (')', fp);
26897 return TRUE;
26898 }
26899 else if (GET_CODE (x) == CONST_VECTOR)
26900 return arm_emit_vector_const (fp, x);
26901
26902 return FALSE;
26903 }
26904
26905 /* Output assembly for a shift instruction.
26906 SET_FLAGS determines how the instruction modifies the condition codes.
26907 0 - Do not set condition codes.
26908 1 - Set condition codes.
26909 2 - Use smallest instruction. */
26910 const char *
26911 arm_output_shift(rtx * operands, int set_flags)
26912 {
26913 char pattern[100];
26914 static const char flag_chars[3] = {'?', '.', '!'};
26915 const char *shift;
26916 HOST_WIDE_INT val;
26917 char c;
26918
26919 c = flag_chars[set_flags];
26920 if (TARGET_UNIFIED_ASM)
26921 {
26922 shift = shift_op(operands[3], &val);
26923 if (shift)
26924 {
26925 if (val != -1)
26926 operands[2] = GEN_INT(val);
26927 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26928 }
26929 else
26930 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26931 }
26932 else
26933 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26934 output_asm_insn (pattern, operands);
26935 return "";
26936 }
26937
26938 /* Output assembly for a WMMX immediate shift instruction. */
26939 const char *
26940 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26941 {
26942 int shift = INTVAL (operands[2]);
26943 char templ[50];
26944 machine_mode opmode = GET_MODE (operands[0]);
26945
26946 gcc_assert (shift >= 0);
26947
26948 /* If the shift value in the register versions is > 63 (for D qualifier),
26949 31 (for W qualifier) or 15 (for H qualifier). */
26950 if (((opmode == V4HImode) && (shift > 15))
26951 || ((opmode == V2SImode) && (shift > 31))
26952 || ((opmode == DImode) && (shift > 63)))
26953 {
26954 if (wror_or_wsra)
26955 {
26956 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26957 output_asm_insn (templ, operands);
26958 if (opmode == DImode)
26959 {
26960 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26961 output_asm_insn (templ, operands);
26962 }
26963 }
26964 else
26965 {
26966 /* The destination register will contain all zeros. */
26967 sprintf (templ, "wzero\t%%0");
26968 output_asm_insn (templ, operands);
26969 }
26970 return "";
26971 }
26972
26973 if ((opmode == DImode) && (shift > 32))
26974 {
26975 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26976 output_asm_insn (templ, operands);
26977 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26978 output_asm_insn (templ, operands);
26979 }
26980 else
26981 {
26982 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26983 output_asm_insn (templ, operands);
26984 }
26985 return "";
26986 }
26987
26988 /* Output assembly for a WMMX tinsr instruction. */
26989 const char *
26990 arm_output_iwmmxt_tinsr (rtx *operands)
26991 {
26992 int mask = INTVAL (operands[3]);
26993 int i;
26994 char templ[50];
26995 int units = mode_nunits[GET_MODE (operands[0])];
26996 gcc_assert ((mask & (mask - 1)) == 0);
26997 for (i = 0; i < units; ++i)
26998 {
26999 if ((mask & 0x01) == 1)
27000 {
27001 break;
27002 }
27003 mask >>= 1;
27004 }
27005 gcc_assert (i < units);
27006 {
27007 switch (GET_MODE (operands[0]))
27008 {
27009 case V8QImode:
27010 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27011 break;
27012 case V4HImode:
27013 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27014 break;
27015 case V2SImode:
27016 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27017 break;
27018 default:
27019 gcc_unreachable ();
27020 break;
27021 }
27022 output_asm_insn (templ, operands);
27023 }
27024 return "";
27025 }
27026
27027 /* Output a Thumb-1 casesi dispatch sequence. */
27028 const char *
27029 thumb1_output_casesi (rtx *operands)
27030 {
27031 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27032
27033 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27034
27035 switch (GET_MODE(diff_vec))
27036 {
27037 case QImode:
27038 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27039 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27040 case HImode:
27041 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27042 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27043 case SImode:
27044 return "bl\t%___gnu_thumb1_case_si";
27045 default:
27046 gcc_unreachable ();
27047 }
27048 }
27049
27050 /* Output a Thumb-2 casesi instruction. */
27051 const char *
27052 thumb2_output_casesi (rtx *operands)
27053 {
27054 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27055
27056 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27057
27058 output_asm_insn ("cmp\t%0, %1", operands);
27059 output_asm_insn ("bhi\t%l3", operands);
27060 switch (GET_MODE(diff_vec))
27061 {
27062 case QImode:
27063 return "tbb\t[%|pc, %0]";
27064 case HImode:
27065 return "tbh\t[%|pc, %0, lsl #1]";
27066 case SImode:
27067 if (flag_pic)
27068 {
27069 output_asm_insn ("adr\t%4, %l2", operands);
27070 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27071 output_asm_insn ("add\t%4, %4, %5", operands);
27072 return "bx\t%4";
27073 }
27074 else
27075 {
27076 output_asm_insn ("adr\t%4, %l2", operands);
27077 return "ldr\t%|pc, [%4, %0, lsl #2]";
27078 }
27079 default:
27080 gcc_unreachable ();
27081 }
27082 }
27083
27084 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27085 per-core tuning structs. */
27086 static int
27087 arm_issue_rate (void)
27088 {
27089 return current_tune->issue_rate;
27090 }
27091
27092 /* Return how many instructions should scheduler lookahead to choose the
27093 best one. */
27094 static int
27095 arm_first_cycle_multipass_dfa_lookahead (void)
27096 {
27097 int issue_rate = arm_issue_rate ();
27098
27099 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27100 }
27101
27102 /* Enable modeling of L2 auto-prefetcher. */
27103 static int
27104 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27105 {
27106 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27107 }
27108
27109 const char *
27110 arm_mangle_type (const_tree type)
27111 {
27112 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27113 has to be managled as if it is in the "std" namespace. */
27114 if (TARGET_AAPCS_BASED
27115 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27116 return "St9__va_list";
27117
27118 /* Half-precision float. */
27119 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27120 return "Dh";
27121
27122 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27123 builtin type. */
27124 if (TYPE_NAME (type) != NULL)
27125 return arm_mangle_builtin_type (type);
27126
27127 /* Use the default mangling. */
27128 return NULL;
27129 }
27130
27131 /* Order of allocation of core registers for Thumb: this allocation is
27132 written over the corresponding initial entries of the array
27133 initialized with REG_ALLOC_ORDER. We allocate all low registers
27134 first. Saving and restoring a low register is usually cheaper than
27135 using a call-clobbered high register. */
27136
27137 static const int thumb_core_reg_alloc_order[] =
27138 {
27139 3, 2, 1, 0, 4, 5, 6, 7,
27140 14, 12, 8, 9, 10, 11
27141 };
27142
27143 /* Adjust register allocation order when compiling for Thumb. */
27144
27145 void
27146 arm_order_regs_for_local_alloc (void)
27147 {
27148 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27149 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27150 if (TARGET_THUMB)
27151 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27152 sizeof (thumb_core_reg_alloc_order));
27153 }
27154
27155 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27156
27157 bool
27158 arm_frame_pointer_required (void)
27159 {
27160 return (cfun->has_nonlocal_label
27161 || SUBTARGET_FRAME_POINTER_REQUIRED
27162 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27163 }
27164
27165 /* Only thumb1 can't support conditional execution, so return true if
27166 the target is not thumb1. */
27167 static bool
27168 arm_have_conditional_execution (void)
27169 {
27170 return !TARGET_THUMB1;
27171 }
27172
27173 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27174 static HOST_WIDE_INT
27175 arm_vector_alignment (const_tree type)
27176 {
27177 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27178
27179 if (TARGET_AAPCS_BASED)
27180 align = MIN (align, 64);
27181
27182 return align;
27183 }
27184
27185 static unsigned int
27186 arm_autovectorize_vector_sizes (void)
27187 {
27188 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27189 }
27190
27191 static bool
27192 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27193 {
27194 /* Vectors which aren't in packed structures will not be less aligned than
27195 the natural alignment of their element type, so this is safe. */
27196 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27197 return !is_packed;
27198
27199 return default_builtin_vector_alignment_reachable (type, is_packed);
27200 }
27201
27202 static bool
27203 arm_builtin_support_vector_misalignment (machine_mode mode,
27204 const_tree type, int misalignment,
27205 bool is_packed)
27206 {
27207 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27208 {
27209 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27210
27211 if (is_packed)
27212 return align == 1;
27213
27214 /* If the misalignment is unknown, we should be able to handle the access
27215 so long as it is not to a member of a packed data structure. */
27216 if (misalignment == -1)
27217 return true;
27218
27219 /* Return true if the misalignment is a multiple of the natural alignment
27220 of the vector's element type. This is probably always going to be
27221 true in practice, since we've already established that this isn't a
27222 packed access. */
27223 return ((misalignment % align) == 0);
27224 }
27225
27226 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27227 is_packed);
27228 }
27229
27230 static void
27231 arm_conditional_register_usage (void)
27232 {
27233 int regno;
27234
27235 if (TARGET_THUMB1 && optimize_size)
27236 {
27237 /* When optimizing for size on Thumb-1, it's better not
27238 to use the HI regs, because of the overhead of
27239 stacking them. */
27240 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27241 fixed_regs[regno] = call_used_regs[regno] = 1;
27242 }
27243
27244 /* The link register can be clobbered by any branch insn,
27245 but we have no way to track that at present, so mark
27246 it as unavailable. */
27247 if (TARGET_THUMB1)
27248 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27249
27250 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27251 {
27252 /* VFPv3 registers are disabled when earlier VFP
27253 versions are selected due to the definition of
27254 LAST_VFP_REGNUM. */
27255 for (regno = FIRST_VFP_REGNUM;
27256 regno <= LAST_VFP_REGNUM; ++ regno)
27257 {
27258 fixed_regs[regno] = 0;
27259 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27260 || regno >= FIRST_VFP_REGNUM + 32;
27261 }
27262 }
27263
27264 if (TARGET_REALLY_IWMMXT)
27265 {
27266 regno = FIRST_IWMMXT_GR_REGNUM;
27267 /* The 2002/10/09 revision of the XScale ABI has wCG0
27268 and wCG1 as call-preserved registers. The 2002/11/21
27269 revision changed this so that all wCG registers are
27270 scratch registers. */
27271 for (regno = FIRST_IWMMXT_GR_REGNUM;
27272 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27273 fixed_regs[regno] = 0;
27274 /* The XScale ABI has wR0 - wR9 as scratch registers,
27275 the rest as call-preserved registers. */
27276 for (regno = FIRST_IWMMXT_REGNUM;
27277 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27278 {
27279 fixed_regs[regno] = 0;
27280 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27281 }
27282 }
27283
27284 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27285 {
27286 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27287 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27288 }
27289 else if (TARGET_APCS_STACK)
27290 {
27291 fixed_regs[10] = 1;
27292 call_used_regs[10] = 1;
27293 }
27294 /* -mcaller-super-interworking reserves r11 for calls to
27295 _interwork_r11_call_via_rN(). Making the register global
27296 is an easy way of ensuring that it remains valid for all
27297 calls. */
27298 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27299 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27300 {
27301 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27302 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27303 if (TARGET_CALLER_INTERWORKING)
27304 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27305 }
27306 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27307 }
27308
27309 static reg_class_t
27310 arm_preferred_rename_class (reg_class_t rclass)
27311 {
27312 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27313 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27314 and code size can be reduced. */
27315 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27316 return LO_REGS;
27317 else
27318 return NO_REGS;
27319 }
27320
27321 /* Compute the atrribute "length" of insn "*push_multi".
27322 So this function MUST be kept in sync with that insn pattern. */
27323 int
27324 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27325 {
27326 int i, regno, hi_reg;
27327 int num_saves = XVECLEN (parallel_op, 0);
27328
27329 /* ARM mode. */
27330 if (TARGET_ARM)
27331 return 4;
27332 /* Thumb1 mode. */
27333 if (TARGET_THUMB1)
27334 return 2;
27335
27336 /* Thumb2 mode. */
27337 regno = REGNO (first_op);
27338 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27339 for (i = 1; i < num_saves && !hi_reg; i++)
27340 {
27341 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27342 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27343 }
27344
27345 if (!hi_reg)
27346 return 2;
27347 return 4;
27348 }
27349
27350 /* Compute the number of instructions emitted by output_move_double. */
27351 int
27352 arm_count_output_move_double_insns (rtx *operands)
27353 {
27354 int count;
27355 rtx ops[2];
27356 /* output_move_double may modify the operands array, so call it
27357 here on a copy of the array. */
27358 ops[0] = operands[0];
27359 ops[1] = operands[1];
27360 output_move_double (ops, false, &count);
27361 return count;
27362 }
27363
27364 int
27365 vfp3_const_double_for_fract_bits (rtx operand)
27366 {
27367 REAL_VALUE_TYPE r0;
27368
27369 if (!CONST_DOUBLE_P (operand))
27370 return 0;
27371
27372 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27373 if (exact_real_inverse (DFmode, &r0))
27374 {
27375 if (exact_real_truncate (DFmode, &r0))
27376 {
27377 HOST_WIDE_INT value = real_to_integer (&r0);
27378 value = value & 0xffffffff;
27379 if ((value != 0) && ( (value & (value - 1)) == 0))
27380 return int_log2 (value);
27381 }
27382 }
27383 return 0;
27384 }
27385
27386 int
27387 vfp3_const_double_for_bits (rtx operand)
27388 {
27389 REAL_VALUE_TYPE r0;
27390
27391 if (!CONST_DOUBLE_P (operand))
27392 return 0;
27393
27394 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27395 if (exact_real_truncate (DFmode, &r0))
27396 {
27397 HOST_WIDE_INT value = real_to_integer (&r0);
27398 value = value & 0xffffffff;
27399 if ((value != 0) && ( (value & (value - 1)) == 0))
27400 return int_log2 (value);
27401 }
27402
27403 return 0;
27404 }
27405 \f
27406 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27407
27408 static void
27409 arm_pre_atomic_barrier (enum memmodel model)
27410 {
27411 if (need_atomic_barrier_p (model, true))
27412 emit_insn (gen_memory_barrier ());
27413 }
27414
27415 static void
27416 arm_post_atomic_barrier (enum memmodel model)
27417 {
27418 if (need_atomic_barrier_p (model, false))
27419 emit_insn (gen_memory_barrier ());
27420 }
27421
27422 /* Emit the load-exclusive and store-exclusive instructions.
27423 Use acquire and release versions if necessary. */
27424
27425 static void
27426 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27427 {
27428 rtx (*gen) (rtx, rtx);
27429
27430 if (acq)
27431 {
27432 switch (mode)
27433 {
27434 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27435 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27436 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27437 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27438 default:
27439 gcc_unreachable ();
27440 }
27441 }
27442 else
27443 {
27444 switch (mode)
27445 {
27446 case QImode: gen = gen_arm_load_exclusiveqi; break;
27447 case HImode: gen = gen_arm_load_exclusivehi; break;
27448 case SImode: gen = gen_arm_load_exclusivesi; break;
27449 case DImode: gen = gen_arm_load_exclusivedi; break;
27450 default:
27451 gcc_unreachable ();
27452 }
27453 }
27454
27455 emit_insn (gen (rval, mem));
27456 }
27457
27458 static void
27459 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27460 rtx mem, bool rel)
27461 {
27462 rtx (*gen) (rtx, rtx, rtx);
27463
27464 if (rel)
27465 {
27466 switch (mode)
27467 {
27468 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27469 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27470 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27471 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27472 default:
27473 gcc_unreachable ();
27474 }
27475 }
27476 else
27477 {
27478 switch (mode)
27479 {
27480 case QImode: gen = gen_arm_store_exclusiveqi; break;
27481 case HImode: gen = gen_arm_store_exclusivehi; break;
27482 case SImode: gen = gen_arm_store_exclusivesi; break;
27483 case DImode: gen = gen_arm_store_exclusivedi; break;
27484 default:
27485 gcc_unreachable ();
27486 }
27487 }
27488
27489 emit_insn (gen (bval, rval, mem));
27490 }
27491
27492 /* Mark the previous jump instruction as unlikely. */
27493
27494 static void
27495 emit_unlikely_jump (rtx insn)
27496 {
27497 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27498
27499 insn = emit_jump_insn (insn);
27500 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27501 }
27502
27503 /* Expand a compare and swap pattern. */
27504
27505 void
27506 arm_expand_compare_and_swap (rtx operands[])
27507 {
27508 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27509 machine_mode mode;
27510 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27511
27512 bval = operands[0];
27513 rval = operands[1];
27514 mem = operands[2];
27515 oldval = operands[3];
27516 newval = operands[4];
27517 is_weak = operands[5];
27518 mod_s = operands[6];
27519 mod_f = operands[7];
27520 mode = GET_MODE (mem);
27521
27522 /* Normally the succ memory model must be stronger than fail, but in the
27523 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27524 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27525
27526 if (TARGET_HAVE_LDACQ
27527 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
27528 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
27529 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27530
27531 switch (mode)
27532 {
27533 case QImode:
27534 case HImode:
27535 /* For narrow modes, we're going to perform the comparison in SImode,
27536 so do the zero-extension now. */
27537 rval = gen_reg_rtx (SImode);
27538 oldval = convert_modes (SImode, mode, oldval, true);
27539 /* FALLTHRU */
27540
27541 case SImode:
27542 /* Force the value into a register if needed. We waited until after
27543 the zero-extension above to do this properly. */
27544 if (!arm_add_operand (oldval, SImode))
27545 oldval = force_reg (SImode, oldval);
27546 break;
27547
27548 case DImode:
27549 if (!cmpdi_operand (oldval, mode))
27550 oldval = force_reg (mode, oldval);
27551 break;
27552
27553 default:
27554 gcc_unreachable ();
27555 }
27556
27557 switch (mode)
27558 {
27559 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27560 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27561 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27562 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27563 default:
27564 gcc_unreachable ();
27565 }
27566
27567 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27568
27569 if (mode == QImode || mode == HImode)
27570 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27571
27572 /* In all cases, we arrange for success to be signaled by Z set.
27573 This arrangement allows for the boolean result to be used directly
27574 in a subsequent branch, post optimization. */
27575 x = gen_rtx_REG (CCmode, CC_REGNUM);
27576 x = gen_rtx_EQ (SImode, x, const0_rtx);
27577 emit_insn (gen_rtx_SET (bval, x));
27578 }
27579
27580 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27581 another memory store between the load-exclusive and store-exclusive can
27582 reset the monitor from Exclusive to Open state. This means we must wait
27583 until after reload to split the pattern, lest we get a register spill in
27584 the middle of the atomic sequence. */
27585
27586 void
27587 arm_split_compare_and_swap (rtx operands[])
27588 {
27589 rtx rval, mem, oldval, newval, scratch;
27590 machine_mode mode;
27591 enum memmodel mod_s, mod_f;
27592 bool is_weak;
27593 rtx_code_label *label1, *label2;
27594 rtx x, cond;
27595
27596 rval = operands[0];
27597 mem = operands[1];
27598 oldval = operands[2];
27599 newval = operands[3];
27600 is_weak = (operands[4] != const0_rtx);
27601 mod_s = memmodel_from_int (INTVAL (operands[5]));
27602 mod_f = memmodel_from_int (INTVAL (operands[6]));
27603 scratch = operands[7];
27604 mode = GET_MODE (mem);
27605
27606 bool use_acquire = TARGET_HAVE_LDACQ
27607 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27608 || is_mm_release (mod_s));
27609
27610 bool use_release = TARGET_HAVE_LDACQ
27611 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27612 || is_mm_acquire (mod_s));
27613
27614 /* Checks whether a barrier is needed and emits one accordingly. */
27615 if (!(use_acquire || use_release))
27616 arm_pre_atomic_barrier (mod_s);
27617
27618 label1 = NULL;
27619 if (!is_weak)
27620 {
27621 label1 = gen_label_rtx ();
27622 emit_label (label1);
27623 }
27624 label2 = gen_label_rtx ();
27625
27626 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27627
27628 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27629 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27630 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27631 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27632 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
27633
27634 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27635
27636 /* Weak or strong, we want EQ to be true for success, so that we
27637 match the flags that we got from the compare above. */
27638 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27639 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27640 emit_insn (gen_rtx_SET (cond, x));
27641
27642 if (!is_weak)
27643 {
27644 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27645 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27646 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27647 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
27648 }
27649
27650 if (!is_mm_relaxed (mod_f))
27651 emit_label (label2);
27652
27653 /* Checks whether a barrier is needed and emits one accordingly. */
27654 if (!(use_acquire || use_release))
27655 arm_post_atomic_barrier (mod_s);
27656
27657 if (is_mm_relaxed (mod_f))
27658 emit_label (label2);
27659 }
27660
27661 void
27662 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27663 rtx value, rtx model_rtx, rtx cond)
27664 {
27665 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
27666 machine_mode mode = GET_MODE (mem);
27667 machine_mode wmode = (mode == DImode ? DImode : SImode);
27668 rtx_code_label *label;
27669 rtx x;
27670
27671 bool use_acquire = TARGET_HAVE_LDACQ
27672 && !(is_mm_relaxed (model) || is_mm_consume (model)
27673 || is_mm_release (model));
27674
27675 bool use_release = TARGET_HAVE_LDACQ
27676 && !(is_mm_relaxed (model) || is_mm_consume (model)
27677 || is_mm_acquire (model));
27678
27679 /* Checks whether a barrier is needed and emits one accordingly. */
27680 if (!(use_acquire || use_release))
27681 arm_pre_atomic_barrier (model);
27682
27683 label = gen_label_rtx ();
27684 emit_label (label);
27685
27686 if (new_out)
27687 new_out = gen_lowpart (wmode, new_out);
27688 if (old_out)
27689 old_out = gen_lowpart (wmode, old_out);
27690 else
27691 old_out = new_out;
27692 value = simplify_gen_subreg (wmode, value, mode, 0);
27693
27694 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27695
27696 switch (code)
27697 {
27698 case SET:
27699 new_out = value;
27700 break;
27701
27702 case NOT:
27703 x = gen_rtx_AND (wmode, old_out, value);
27704 emit_insn (gen_rtx_SET (new_out, x));
27705 x = gen_rtx_NOT (wmode, new_out);
27706 emit_insn (gen_rtx_SET (new_out, x));
27707 break;
27708
27709 case MINUS:
27710 if (CONST_INT_P (value))
27711 {
27712 value = GEN_INT (-INTVAL (value));
27713 code = PLUS;
27714 }
27715 /* FALLTHRU */
27716
27717 case PLUS:
27718 if (mode == DImode)
27719 {
27720 /* DImode plus/minus need to clobber flags. */
27721 /* The adddi3 and subdi3 patterns are incorrectly written so that
27722 they require matching operands, even when we could easily support
27723 three operands. Thankfully, this can be fixed up post-splitting,
27724 as the individual add+adc patterns do accept three operands and
27725 post-reload cprop can make these moves go away. */
27726 emit_move_insn (new_out, old_out);
27727 if (code == PLUS)
27728 x = gen_adddi3 (new_out, new_out, value);
27729 else
27730 x = gen_subdi3 (new_out, new_out, value);
27731 emit_insn (x);
27732 break;
27733 }
27734 /* FALLTHRU */
27735
27736 default:
27737 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27738 emit_insn (gen_rtx_SET (new_out, x));
27739 break;
27740 }
27741
27742 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27743 use_release);
27744
27745 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27746 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27747
27748 /* Checks whether a barrier is needed and emits one accordingly. */
27749 if (!(use_acquire || use_release))
27750 arm_post_atomic_barrier (model);
27751 }
27752 \f
27753 #define MAX_VECT_LEN 16
27754
27755 struct expand_vec_perm_d
27756 {
27757 rtx target, op0, op1;
27758 unsigned char perm[MAX_VECT_LEN];
27759 machine_mode vmode;
27760 unsigned char nelt;
27761 bool one_vector_p;
27762 bool testing_p;
27763 };
27764
27765 /* Generate a variable permutation. */
27766
27767 static void
27768 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27769 {
27770 machine_mode vmode = GET_MODE (target);
27771 bool one_vector_p = rtx_equal_p (op0, op1);
27772
27773 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27774 gcc_checking_assert (GET_MODE (op0) == vmode);
27775 gcc_checking_assert (GET_MODE (op1) == vmode);
27776 gcc_checking_assert (GET_MODE (sel) == vmode);
27777 gcc_checking_assert (TARGET_NEON);
27778
27779 if (one_vector_p)
27780 {
27781 if (vmode == V8QImode)
27782 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27783 else
27784 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27785 }
27786 else
27787 {
27788 rtx pair;
27789
27790 if (vmode == V8QImode)
27791 {
27792 pair = gen_reg_rtx (V16QImode);
27793 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27794 pair = gen_lowpart (TImode, pair);
27795 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27796 }
27797 else
27798 {
27799 pair = gen_reg_rtx (OImode);
27800 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27801 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27802 }
27803 }
27804 }
27805
27806 void
27807 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27808 {
27809 machine_mode vmode = GET_MODE (target);
27810 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27811 bool one_vector_p = rtx_equal_p (op0, op1);
27812 rtx rmask[MAX_VECT_LEN], mask;
27813
27814 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27815 numbering of elements for big-endian, we must reverse the order. */
27816 gcc_checking_assert (!BYTES_BIG_ENDIAN);
27817
27818 /* The VTBL instruction does not use a modulo index, so we must take care
27819 of that ourselves. */
27820 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27821 for (i = 0; i < nelt; ++i)
27822 rmask[i] = mask;
27823 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27824 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27825
27826 arm_expand_vec_perm_1 (target, op0, op1, sel);
27827 }
27828
27829 /* Generate or test for an insn that supports a constant permutation. */
27830
27831 /* Recognize patterns for the VUZP insns. */
27832
27833 static bool
27834 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27835 {
27836 unsigned int i, odd, mask, nelt = d->nelt;
27837 rtx out0, out1, in0, in1, x;
27838 rtx (*gen)(rtx, rtx, rtx, rtx);
27839
27840 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27841 return false;
27842
27843 /* Note that these are little-endian tests. Adjust for big-endian later. */
27844 if (d->perm[0] == 0)
27845 odd = 0;
27846 else if (d->perm[0] == 1)
27847 odd = 1;
27848 else
27849 return false;
27850 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27851
27852 for (i = 0; i < nelt; i++)
27853 {
27854 unsigned elt = (i * 2 + odd) & mask;
27855 if (d->perm[i] != elt)
27856 return false;
27857 }
27858
27859 /* Success! */
27860 if (d->testing_p)
27861 return true;
27862
27863 switch (d->vmode)
27864 {
27865 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27866 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
27867 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
27868 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
27869 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
27870 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
27871 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
27872 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
27873 default:
27874 gcc_unreachable ();
27875 }
27876
27877 in0 = d->op0;
27878 in1 = d->op1;
27879 if (BYTES_BIG_ENDIAN)
27880 {
27881 x = in0, in0 = in1, in1 = x;
27882 odd = !odd;
27883 }
27884
27885 out0 = d->target;
27886 out1 = gen_reg_rtx (d->vmode);
27887 if (odd)
27888 x = out0, out0 = out1, out1 = x;
27889
27890 emit_insn (gen (out0, in0, in1, out1));
27891 return true;
27892 }
27893
27894 /* Recognize patterns for the VZIP insns. */
27895
27896 static bool
27897 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27898 {
27899 unsigned int i, high, mask, nelt = d->nelt;
27900 rtx out0, out1, in0, in1, x;
27901 rtx (*gen)(rtx, rtx, rtx, rtx);
27902
27903 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27904 return false;
27905
27906 /* Note that these are little-endian tests. Adjust for big-endian later. */
27907 high = nelt / 2;
27908 if (d->perm[0] == high)
27909 ;
27910 else if (d->perm[0] == 0)
27911 high = 0;
27912 else
27913 return false;
27914 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27915
27916 for (i = 0; i < nelt / 2; i++)
27917 {
27918 unsigned elt = (i + high) & mask;
27919 if (d->perm[i * 2] != elt)
27920 return false;
27921 elt = (elt + nelt) & mask;
27922 if (d->perm[i * 2 + 1] != elt)
27923 return false;
27924 }
27925
27926 /* Success! */
27927 if (d->testing_p)
27928 return true;
27929
27930 switch (d->vmode)
27931 {
27932 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27933 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
27934 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
27935 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
27936 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
27937 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
27938 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
27939 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
27940 default:
27941 gcc_unreachable ();
27942 }
27943
27944 in0 = d->op0;
27945 in1 = d->op1;
27946 if (BYTES_BIG_ENDIAN)
27947 {
27948 x = in0, in0 = in1, in1 = x;
27949 high = !high;
27950 }
27951
27952 out0 = d->target;
27953 out1 = gen_reg_rtx (d->vmode);
27954 if (high)
27955 x = out0, out0 = out1, out1 = x;
27956
27957 emit_insn (gen (out0, in0, in1, out1));
27958 return true;
27959 }
27960
27961 /* Recognize patterns for the VREV insns. */
27962
27963 static bool
27964 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27965 {
27966 unsigned int i, j, diff, nelt = d->nelt;
27967 rtx (*gen)(rtx, rtx);
27968
27969 if (!d->one_vector_p)
27970 return false;
27971
27972 diff = d->perm[0];
27973 switch (diff)
27974 {
27975 case 7:
27976 switch (d->vmode)
27977 {
27978 case V16QImode: gen = gen_neon_vrev64v16qi; break;
27979 case V8QImode: gen = gen_neon_vrev64v8qi; break;
27980 default:
27981 return false;
27982 }
27983 break;
27984 case 3:
27985 switch (d->vmode)
27986 {
27987 case V16QImode: gen = gen_neon_vrev32v16qi; break;
27988 case V8QImode: gen = gen_neon_vrev32v8qi; break;
27989 case V8HImode: gen = gen_neon_vrev64v8hi; break;
27990 case V4HImode: gen = gen_neon_vrev64v4hi; break;
27991 default:
27992 return false;
27993 }
27994 break;
27995 case 1:
27996 switch (d->vmode)
27997 {
27998 case V16QImode: gen = gen_neon_vrev16v16qi; break;
27999 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28000 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28001 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28002 case V4SImode: gen = gen_neon_vrev64v4si; break;
28003 case V2SImode: gen = gen_neon_vrev64v2si; break;
28004 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28005 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28006 default:
28007 return false;
28008 }
28009 break;
28010 default:
28011 return false;
28012 }
28013
28014 for (i = 0; i < nelt ; i += diff + 1)
28015 for (j = 0; j <= diff; j += 1)
28016 {
28017 /* This is guaranteed to be true as the value of diff
28018 is 7, 3, 1 and we should have enough elements in the
28019 queue to generate this. Getting a vector mask with a
28020 value of diff other than these values implies that
28021 something is wrong by the time we get here. */
28022 gcc_assert (i + j < nelt);
28023 if (d->perm[i + j] != i + diff - j)
28024 return false;
28025 }
28026
28027 /* Success! */
28028 if (d->testing_p)
28029 return true;
28030
28031 emit_insn (gen (d->target, d->op0));
28032 return true;
28033 }
28034
28035 /* Recognize patterns for the VTRN insns. */
28036
28037 static bool
28038 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28039 {
28040 unsigned int i, odd, mask, nelt = d->nelt;
28041 rtx out0, out1, in0, in1, x;
28042 rtx (*gen)(rtx, rtx, rtx, rtx);
28043
28044 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28045 return false;
28046
28047 /* Note that these are little-endian tests. Adjust for big-endian later. */
28048 if (d->perm[0] == 0)
28049 odd = 0;
28050 else if (d->perm[0] == 1)
28051 odd = 1;
28052 else
28053 return false;
28054 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28055
28056 for (i = 0; i < nelt; i += 2)
28057 {
28058 if (d->perm[i] != i + odd)
28059 return false;
28060 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28061 return false;
28062 }
28063
28064 /* Success! */
28065 if (d->testing_p)
28066 return true;
28067
28068 switch (d->vmode)
28069 {
28070 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28071 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28072 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28073 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28074 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28075 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28076 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28077 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28078 default:
28079 gcc_unreachable ();
28080 }
28081
28082 in0 = d->op0;
28083 in1 = d->op1;
28084 if (BYTES_BIG_ENDIAN)
28085 {
28086 x = in0, in0 = in1, in1 = x;
28087 odd = !odd;
28088 }
28089
28090 out0 = d->target;
28091 out1 = gen_reg_rtx (d->vmode);
28092 if (odd)
28093 x = out0, out0 = out1, out1 = x;
28094
28095 emit_insn (gen (out0, in0, in1, out1));
28096 return true;
28097 }
28098
28099 /* Recognize patterns for the VEXT insns. */
28100
28101 static bool
28102 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28103 {
28104 unsigned int i, nelt = d->nelt;
28105 rtx (*gen) (rtx, rtx, rtx, rtx);
28106 rtx offset;
28107
28108 unsigned int location;
28109
28110 unsigned int next = d->perm[0] + 1;
28111
28112 /* TODO: Handle GCC's numbering of elements for big-endian. */
28113 if (BYTES_BIG_ENDIAN)
28114 return false;
28115
28116 /* Check if the extracted indexes are increasing by one. */
28117 for (i = 1; i < nelt; next++, i++)
28118 {
28119 /* If we hit the most significant element of the 2nd vector in
28120 the previous iteration, no need to test further. */
28121 if (next == 2 * nelt)
28122 return false;
28123
28124 /* If we are operating on only one vector: it could be a
28125 rotation. If there are only two elements of size < 64, let
28126 arm_evpc_neon_vrev catch it. */
28127 if (d->one_vector_p && (next == nelt))
28128 {
28129 if ((nelt == 2) && (d->vmode != V2DImode))
28130 return false;
28131 else
28132 next = 0;
28133 }
28134
28135 if (d->perm[i] != next)
28136 return false;
28137 }
28138
28139 location = d->perm[0];
28140
28141 switch (d->vmode)
28142 {
28143 case V16QImode: gen = gen_neon_vextv16qi; break;
28144 case V8QImode: gen = gen_neon_vextv8qi; break;
28145 case V4HImode: gen = gen_neon_vextv4hi; break;
28146 case V8HImode: gen = gen_neon_vextv8hi; break;
28147 case V2SImode: gen = gen_neon_vextv2si; break;
28148 case V4SImode: gen = gen_neon_vextv4si; break;
28149 case V2SFmode: gen = gen_neon_vextv2sf; break;
28150 case V4SFmode: gen = gen_neon_vextv4sf; break;
28151 case V2DImode: gen = gen_neon_vextv2di; break;
28152 default:
28153 return false;
28154 }
28155
28156 /* Success! */
28157 if (d->testing_p)
28158 return true;
28159
28160 offset = GEN_INT (location);
28161 emit_insn (gen (d->target, d->op0, d->op1, offset));
28162 return true;
28163 }
28164
28165 /* The NEON VTBL instruction is a fully variable permuation that's even
28166 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28167 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28168 can do slightly better by expanding this as a constant where we don't
28169 have to apply a mask. */
28170
28171 static bool
28172 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28173 {
28174 rtx rperm[MAX_VECT_LEN], sel;
28175 machine_mode vmode = d->vmode;
28176 unsigned int i, nelt = d->nelt;
28177
28178 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28179 numbering of elements for big-endian, we must reverse the order. */
28180 if (BYTES_BIG_ENDIAN)
28181 return false;
28182
28183 if (d->testing_p)
28184 return true;
28185
28186 /* Generic code will try constant permutation twice. Once with the
28187 original mode and again with the elements lowered to QImode.
28188 So wait and don't do the selector expansion ourselves. */
28189 if (vmode != V8QImode && vmode != V16QImode)
28190 return false;
28191
28192 for (i = 0; i < nelt; ++i)
28193 rperm[i] = GEN_INT (d->perm[i]);
28194 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28195 sel = force_reg (vmode, sel);
28196
28197 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28198 return true;
28199 }
28200
28201 static bool
28202 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28203 {
28204 /* Check if the input mask matches vext before reordering the
28205 operands. */
28206 if (TARGET_NEON)
28207 if (arm_evpc_neon_vext (d))
28208 return true;
28209
28210 /* The pattern matching functions above are written to look for a small
28211 number to begin the sequence (0, 1, N/2). If we begin with an index
28212 from the second operand, we can swap the operands. */
28213 if (d->perm[0] >= d->nelt)
28214 {
28215 unsigned i, nelt = d->nelt;
28216 rtx x;
28217
28218 for (i = 0; i < nelt; ++i)
28219 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28220
28221 x = d->op0;
28222 d->op0 = d->op1;
28223 d->op1 = x;
28224 }
28225
28226 if (TARGET_NEON)
28227 {
28228 if (arm_evpc_neon_vuzp (d))
28229 return true;
28230 if (arm_evpc_neon_vzip (d))
28231 return true;
28232 if (arm_evpc_neon_vrev (d))
28233 return true;
28234 if (arm_evpc_neon_vtrn (d))
28235 return true;
28236 return arm_evpc_neon_vtbl (d);
28237 }
28238 return false;
28239 }
28240
28241 /* Expand a vec_perm_const pattern. */
28242
28243 bool
28244 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28245 {
28246 struct expand_vec_perm_d d;
28247 int i, nelt, which;
28248
28249 d.target = target;
28250 d.op0 = op0;
28251 d.op1 = op1;
28252
28253 d.vmode = GET_MODE (target);
28254 gcc_assert (VECTOR_MODE_P (d.vmode));
28255 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28256 d.testing_p = false;
28257
28258 for (i = which = 0; i < nelt; ++i)
28259 {
28260 rtx e = XVECEXP (sel, 0, i);
28261 int ei = INTVAL (e) & (2 * nelt - 1);
28262 which |= (ei < nelt ? 1 : 2);
28263 d.perm[i] = ei;
28264 }
28265
28266 switch (which)
28267 {
28268 default:
28269 gcc_unreachable();
28270
28271 case 3:
28272 d.one_vector_p = false;
28273 if (!rtx_equal_p (op0, op1))
28274 break;
28275
28276 /* The elements of PERM do not suggest that only the first operand
28277 is used, but both operands are identical. Allow easier matching
28278 of the permutation by folding the permutation into the single
28279 input vector. */
28280 /* FALLTHRU */
28281 case 2:
28282 for (i = 0; i < nelt; ++i)
28283 d.perm[i] &= nelt - 1;
28284 d.op0 = op1;
28285 d.one_vector_p = true;
28286 break;
28287
28288 case 1:
28289 d.op1 = op0;
28290 d.one_vector_p = true;
28291 break;
28292 }
28293
28294 return arm_expand_vec_perm_const_1 (&d);
28295 }
28296
28297 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28298
28299 static bool
28300 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28301 const unsigned char *sel)
28302 {
28303 struct expand_vec_perm_d d;
28304 unsigned int i, nelt, which;
28305 bool ret;
28306
28307 d.vmode = vmode;
28308 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28309 d.testing_p = true;
28310 memcpy (d.perm, sel, nelt);
28311
28312 /* Categorize the set of elements in the selector. */
28313 for (i = which = 0; i < nelt; ++i)
28314 {
28315 unsigned char e = d.perm[i];
28316 gcc_assert (e < 2 * nelt);
28317 which |= (e < nelt ? 1 : 2);
28318 }
28319
28320 /* For all elements from second vector, fold the elements to first. */
28321 if (which == 2)
28322 for (i = 0; i < nelt; ++i)
28323 d.perm[i] -= nelt;
28324
28325 /* Check whether the mask can be applied to the vector type. */
28326 d.one_vector_p = (which != 3);
28327
28328 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28329 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28330 if (!d.one_vector_p)
28331 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28332
28333 start_sequence ();
28334 ret = arm_expand_vec_perm_const_1 (&d);
28335 end_sequence ();
28336
28337 return ret;
28338 }
28339
28340 bool
28341 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28342 {
28343 /* If we are soft float and we do not have ldrd
28344 then all auto increment forms are ok. */
28345 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28346 return true;
28347
28348 switch (code)
28349 {
28350 /* Post increment and Pre Decrement are supported for all
28351 instruction forms except for vector forms. */
28352 case ARM_POST_INC:
28353 case ARM_PRE_DEC:
28354 if (VECTOR_MODE_P (mode))
28355 {
28356 if (code != ARM_PRE_DEC)
28357 return true;
28358 else
28359 return false;
28360 }
28361
28362 return true;
28363
28364 case ARM_POST_DEC:
28365 case ARM_PRE_INC:
28366 /* Without LDRD and mode size greater than
28367 word size, there is no point in auto-incrementing
28368 because ldm and stm will not have these forms. */
28369 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28370 return false;
28371
28372 /* Vector and floating point modes do not support
28373 these auto increment forms. */
28374 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28375 return false;
28376
28377 return true;
28378
28379 default:
28380 return false;
28381
28382 }
28383
28384 return false;
28385 }
28386
28387 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28388 on ARM, since we know that shifts by negative amounts are no-ops.
28389 Additionally, the default expansion code is not available or suitable
28390 for post-reload insn splits (this can occur when the register allocator
28391 chooses not to do a shift in NEON).
28392
28393 This function is used in both initial expand and post-reload splits, and
28394 handles all kinds of 64-bit shifts.
28395
28396 Input requirements:
28397 - It is safe for the input and output to be the same register, but
28398 early-clobber rules apply for the shift amount and scratch registers.
28399 - Shift by register requires both scratch registers. In all other cases
28400 the scratch registers may be NULL.
28401 - Ashiftrt by a register also clobbers the CC register. */
28402 void
28403 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28404 rtx amount, rtx scratch1, rtx scratch2)
28405 {
28406 rtx out_high = gen_highpart (SImode, out);
28407 rtx out_low = gen_lowpart (SImode, out);
28408 rtx in_high = gen_highpart (SImode, in);
28409 rtx in_low = gen_lowpart (SImode, in);
28410
28411 /* Terminology:
28412 in = the register pair containing the input value.
28413 out = the destination register pair.
28414 up = the high- or low-part of each pair.
28415 down = the opposite part to "up".
28416 In a shift, we can consider bits to shift from "up"-stream to
28417 "down"-stream, so in a left-shift "up" is the low-part and "down"
28418 is the high-part of each register pair. */
28419
28420 rtx out_up = code == ASHIFT ? out_low : out_high;
28421 rtx out_down = code == ASHIFT ? out_high : out_low;
28422 rtx in_up = code == ASHIFT ? in_low : in_high;
28423 rtx in_down = code == ASHIFT ? in_high : in_low;
28424
28425 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28426 gcc_assert (out
28427 && (REG_P (out) || GET_CODE (out) == SUBREG)
28428 && GET_MODE (out) == DImode);
28429 gcc_assert (in
28430 && (REG_P (in) || GET_CODE (in) == SUBREG)
28431 && GET_MODE (in) == DImode);
28432 gcc_assert (amount
28433 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28434 && GET_MODE (amount) == SImode)
28435 || CONST_INT_P (amount)));
28436 gcc_assert (scratch1 == NULL
28437 || (GET_CODE (scratch1) == SCRATCH)
28438 || (GET_MODE (scratch1) == SImode
28439 && REG_P (scratch1)));
28440 gcc_assert (scratch2 == NULL
28441 || (GET_CODE (scratch2) == SCRATCH)
28442 || (GET_MODE (scratch2) == SImode
28443 && REG_P (scratch2)));
28444 gcc_assert (!REG_P (out) || !REG_P (amount)
28445 || !HARD_REGISTER_P (out)
28446 || (REGNO (out) != REGNO (amount)
28447 && REGNO (out) + 1 != REGNO (amount)));
28448
28449 /* Macros to make following code more readable. */
28450 #define SUB_32(DEST,SRC) \
28451 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28452 #define RSB_32(DEST,SRC) \
28453 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28454 #define SUB_S_32(DEST,SRC) \
28455 gen_addsi3_compare0 ((DEST), (SRC), \
28456 GEN_INT (-32))
28457 #define SET(DEST,SRC) \
28458 gen_rtx_SET ((DEST), (SRC))
28459 #define SHIFT(CODE,SRC,AMOUNT) \
28460 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28461 #define LSHIFT(CODE,SRC,AMOUNT) \
28462 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28463 SImode, (SRC), (AMOUNT))
28464 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28465 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28466 SImode, (SRC), (AMOUNT))
28467 #define ORR(A,B) \
28468 gen_rtx_IOR (SImode, (A), (B))
28469 #define BRANCH(COND,LABEL) \
28470 gen_arm_cond_branch ((LABEL), \
28471 gen_rtx_ ## COND (CCmode, cc_reg, \
28472 const0_rtx), \
28473 cc_reg)
28474
28475 /* Shifts by register and shifts by constant are handled separately. */
28476 if (CONST_INT_P (amount))
28477 {
28478 /* We have a shift-by-constant. */
28479
28480 /* First, handle out-of-range shift amounts.
28481 In both cases we try to match the result an ARM instruction in a
28482 shift-by-register would give. This helps reduce execution
28483 differences between optimization levels, but it won't stop other
28484 parts of the compiler doing different things. This is "undefined
28485 behaviour, in any case. */
28486 if (INTVAL (amount) <= 0)
28487 emit_insn (gen_movdi (out, in));
28488 else if (INTVAL (amount) >= 64)
28489 {
28490 if (code == ASHIFTRT)
28491 {
28492 rtx const31_rtx = GEN_INT (31);
28493 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28494 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28495 }
28496 else
28497 emit_insn (gen_movdi (out, const0_rtx));
28498 }
28499
28500 /* Now handle valid shifts. */
28501 else if (INTVAL (amount) < 32)
28502 {
28503 /* Shifts by a constant less than 32. */
28504 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28505
28506 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28507 emit_insn (SET (out_down,
28508 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28509 out_down)));
28510 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28511 }
28512 else
28513 {
28514 /* Shifts by a constant greater than 31. */
28515 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28516
28517 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28518 if (code == ASHIFTRT)
28519 emit_insn (gen_ashrsi3 (out_up, in_up,
28520 GEN_INT (31)));
28521 else
28522 emit_insn (SET (out_up, const0_rtx));
28523 }
28524 }
28525 else
28526 {
28527 /* We have a shift-by-register. */
28528 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28529
28530 /* This alternative requires the scratch registers. */
28531 gcc_assert (scratch1 && REG_P (scratch1));
28532 gcc_assert (scratch2 && REG_P (scratch2));
28533
28534 /* We will need the values "amount-32" and "32-amount" later.
28535 Swapping them around now allows the later code to be more general. */
28536 switch (code)
28537 {
28538 case ASHIFT:
28539 emit_insn (SUB_32 (scratch1, amount));
28540 emit_insn (RSB_32 (scratch2, amount));
28541 break;
28542 case ASHIFTRT:
28543 emit_insn (RSB_32 (scratch1, amount));
28544 /* Also set CC = amount > 32. */
28545 emit_insn (SUB_S_32 (scratch2, amount));
28546 break;
28547 case LSHIFTRT:
28548 emit_insn (RSB_32 (scratch1, amount));
28549 emit_insn (SUB_32 (scratch2, amount));
28550 break;
28551 default:
28552 gcc_unreachable ();
28553 }
28554
28555 /* Emit code like this:
28556
28557 arithmetic-left:
28558 out_down = in_down << amount;
28559 out_down = (in_up << (amount - 32)) | out_down;
28560 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28561 out_up = in_up << amount;
28562
28563 arithmetic-right:
28564 out_down = in_down >> amount;
28565 out_down = (in_up << (32 - amount)) | out_down;
28566 if (amount < 32)
28567 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28568 out_up = in_up << amount;
28569
28570 logical-right:
28571 out_down = in_down >> amount;
28572 out_down = (in_up << (32 - amount)) | out_down;
28573 if (amount < 32)
28574 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28575 out_up = in_up << amount;
28576
28577 The ARM and Thumb2 variants are the same but implemented slightly
28578 differently. If this were only called during expand we could just
28579 use the Thumb2 case and let combine do the right thing, but this
28580 can also be called from post-reload splitters. */
28581
28582 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28583
28584 if (!TARGET_THUMB2)
28585 {
28586 /* Emit code for ARM mode. */
28587 emit_insn (SET (out_down,
28588 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28589 if (code == ASHIFTRT)
28590 {
28591 rtx_code_label *done_label = gen_label_rtx ();
28592 emit_jump_insn (BRANCH (LT, done_label));
28593 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28594 out_down)));
28595 emit_label (done_label);
28596 }
28597 else
28598 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28599 out_down)));
28600 }
28601 else
28602 {
28603 /* Emit code for Thumb2 mode.
28604 Thumb2 can't do shift and or in one insn. */
28605 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28606 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28607
28608 if (code == ASHIFTRT)
28609 {
28610 rtx_code_label *done_label = gen_label_rtx ();
28611 emit_jump_insn (BRANCH (LT, done_label));
28612 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28613 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28614 emit_label (done_label);
28615 }
28616 else
28617 {
28618 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28619 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28620 }
28621 }
28622
28623 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28624 }
28625
28626 #undef SUB_32
28627 #undef RSB_32
28628 #undef SUB_S_32
28629 #undef SET
28630 #undef SHIFT
28631 #undef LSHIFT
28632 #undef REV_LSHIFT
28633 #undef ORR
28634 #undef BRANCH
28635 }
28636
28637
28638 /* Returns true if a valid comparison operation and makes
28639 the operands in a form that is valid. */
28640 bool
28641 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28642 {
28643 enum rtx_code code = GET_CODE (*comparison);
28644 int code_int;
28645 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28646 ? GET_MODE (*op2) : GET_MODE (*op1);
28647
28648 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28649
28650 if (code == UNEQ || code == LTGT)
28651 return false;
28652
28653 code_int = (int)code;
28654 arm_canonicalize_comparison (&code_int, op1, op2, 0);
28655 PUT_CODE (*comparison, (enum rtx_code)code_int);
28656
28657 switch (mode)
28658 {
28659 case SImode:
28660 if (!arm_add_operand (*op1, mode))
28661 *op1 = force_reg (mode, *op1);
28662 if (!arm_add_operand (*op2, mode))
28663 *op2 = force_reg (mode, *op2);
28664 return true;
28665
28666 case DImode:
28667 if (!cmpdi_operand (*op1, mode))
28668 *op1 = force_reg (mode, *op1);
28669 if (!cmpdi_operand (*op2, mode))
28670 *op2 = force_reg (mode, *op2);
28671 return true;
28672
28673 case SFmode:
28674 case DFmode:
28675 if (!arm_float_compare_operand (*op1, mode))
28676 *op1 = force_reg (mode, *op1);
28677 if (!arm_float_compare_operand (*op2, mode))
28678 *op2 = force_reg (mode, *op2);
28679 return true;
28680 default:
28681 break;
28682 }
28683
28684 return false;
28685
28686 }
28687
28688 /* Maximum number of instructions to set block of memory. */
28689 static int
28690 arm_block_set_max_insns (void)
28691 {
28692 if (optimize_function_for_size_p (cfun))
28693 return 4;
28694 else
28695 return current_tune->max_insns_inline_memset;
28696 }
28697
28698 /* Return TRUE if it's profitable to set block of memory for
28699 non-vectorized case. VAL is the value to set the memory
28700 with. LENGTH is the number of bytes to set. ALIGN is the
28701 alignment of the destination memory in bytes. UNALIGNED_P
28702 is TRUE if we can only set the memory with instructions
28703 meeting alignment requirements. USE_STRD_P is TRUE if we
28704 can use strd to set the memory. */
28705 static bool
28706 arm_block_set_non_vect_profit_p (rtx val,
28707 unsigned HOST_WIDE_INT length,
28708 unsigned HOST_WIDE_INT align,
28709 bool unaligned_p, bool use_strd_p)
28710 {
28711 int num = 0;
28712 /* For leftovers in bytes of 0-7, we can set the memory block using
28713 strb/strh/str with minimum instruction number. */
28714 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28715
28716 if (unaligned_p)
28717 {
28718 num = arm_const_inline_cost (SET, val);
28719 num += length / align + length % align;
28720 }
28721 else if (use_strd_p)
28722 {
28723 num = arm_const_double_inline_cost (val);
28724 num += (length >> 3) + leftover[length & 7];
28725 }
28726 else
28727 {
28728 num = arm_const_inline_cost (SET, val);
28729 num += (length >> 2) + leftover[length & 3];
28730 }
28731
28732 /* We may be able to combine last pair STRH/STRB into a single STR
28733 by shifting one byte back. */
28734 if (unaligned_access && length > 3 && (length & 3) == 3)
28735 num--;
28736
28737 return (num <= arm_block_set_max_insns ());
28738 }
28739
28740 /* Return TRUE if it's profitable to set block of memory for
28741 vectorized case. LENGTH is the number of bytes to set.
28742 ALIGN is the alignment of destination memory in bytes.
28743 MODE is the vector mode used to set the memory. */
28744 static bool
28745 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28746 unsigned HOST_WIDE_INT align,
28747 machine_mode mode)
28748 {
28749 int num;
28750 bool unaligned_p = ((align & 3) != 0);
28751 unsigned int nelt = GET_MODE_NUNITS (mode);
28752
28753 /* Instruction loading constant value. */
28754 num = 1;
28755 /* Instructions storing the memory. */
28756 num += (length + nelt - 1) / nelt;
28757 /* Instructions adjusting the address expression. Only need to
28758 adjust address expression if it's 4 bytes aligned and bytes
28759 leftover can only be stored by mis-aligned store instruction. */
28760 if (!unaligned_p && (length & 3) != 0)
28761 num++;
28762
28763 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28764 if (!unaligned_p && mode == V16QImode)
28765 num--;
28766
28767 return (num <= arm_block_set_max_insns ());
28768 }
28769
28770 /* Set a block of memory using vectorization instructions for the
28771 unaligned case. We fill the first LENGTH bytes of the memory
28772 area starting from DSTBASE with byte constant VALUE. ALIGN is
28773 the alignment requirement of memory. Return TRUE if succeeded. */
28774 static bool
28775 arm_block_set_unaligned_vect (rtx dstbase,
28776 unsigned HOST_WIDE_INT length,
28777 unsigned HOST_WIDE_INT value,
28778 unsigned HOST_WIDE_INT align)
28779 {
28780 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28781 rtx dst, mem;
28782 rtx val_elt, val_vec, reg;
28783 rtx rval[MAX_VECT_LEN];
28784 rtx (*gen_func) (rtx, rtx);
28785 machine_mode mode;
28786 unsigned HOST_WIDE_INT v = value;
28787
28788 gcc_assert ((align & 0x3) != 0);
28789 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28790 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28791 if (length >= nelt_v16)
28792 {
28793 mode = V16QImode;
28794 gen_func = gen_movmisalignv16qi;
28795 }
28796 else
28797 {
28798 mode = V8QImode;
28799 gen_func = gen_movmisalignv8qi;
28800 }
28801 nelt_mode = GET_MODE_NUNITS (mode);
28802 gcc_assert (length >= nelt_mode);
28803 /* Skip if it isn't profitable. */
28804 if (!arm_block_set_vect_profit_p (length, align, mode))
28805 return false;
28806
28807 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28808 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28809
28810 v = sext_hwi (v, BITS_PER_WORD);
28811 val_elt = GEN_INT (v);
28812 for (j = 0; j < nelt_mode; j++)
28813 rval[j] = val_elt;
28814
28815 reg = gen_reg_rtx (mode);
28816 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28817 /* Emit instruction loading the constant value. */
28818 emit_move_insn (reg, val_vec);
28819
28820 /* Handle nelt_mode bytes in a vector. */
28821 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28822 {
28823 emit_insn ((*gen_func) (mem, reg));
28824 if (i + 2 * nelt_mode <= length)
28825 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28826 }
28827
28828 /* If there are not less than nelt_v8 bytes leftover, we must be in
28829 V16QI mode. */
28830 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28831
28832 /* Handle (8, 16) bytes leftover. */
28833 if (i + nelt_v8 < length)
28834 {
28835 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28836 /* We are shifting bytes back, set the alignment accordingly. */
28837 if ((length & 1) != 0 && align >= 2)
28838 set_mem_align (mem, BITS_PER_UNIT);
28839
28840 emit_insn (gen_movmisalignv16qi (mem, reg));
28841 }
28842 /* Handle (0, 8] bytes leftover. */
28843 else if (i < length && i + nelt_v8 >= length)
28844 {
28845 if (mode == V16QImode)
28846 {
28847 reg = gen_lowpart (V8QImode, reg);
28848 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
28849 }
28850 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28851 + (nelt_mode - nelt_v8))));
28852 /* We are shifting bytes back, set the alignment accordingly. */
28853 if ((length & 1) != 0 && align >= 2)
28854 set_mem_align (mem, BITS_PER_UNIT);
28855
28856 emit_insn (gen_movmisalignv8qi (mem, reg));
28857 }
28858
28859 return true;
28860 }
28861
28862 /* Set a block of memory using vectorization instructions for the
28863 aligned case. We fill the first LENGTH bytes of the memory area
28864 starting from DSTBASE with byte constant VALUE. ALIGN is the
28865 alignment requirement of memory. Return TRUE if succeeded. */
28866 static bool
28867 arm_block_set_aligned_vect (rtx dstbase,
28868 unsigned HOST_WIDE_INT length,
28869 unsigned HOST_WIDE_INT value,
28870 unsigned HOST_WIDE_INT align)
28871 {
28872 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
28873 rtx dst, addr, mem;
28874 rtx val_elt, val_vec, reg;
28875 rtx rval[MAX_VECT_LEN];
28876 machine_mode mode;
28877 unsigned HOST_WIDE_INT v = value;
28878
28879 gcc_assert ((align & 0x3) == 0);
28880 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28881 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28882 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
28883 mode = V16QImode;
28884 else
28885 mode = V8QImode;
28886
28887 nelt_mode = GET_MODE_NUNITS (mode);
28888 gcc_assert (length >= nelt_mode);
28889 /* Skip if it isn't profitable. */
28890 if (!arm_block_set_vect_profit_p (length, align, mode))
28891 return false;
28892
28893 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28894
28895 v = sext_hwi (v, BITS_PER_WORD);
28896 val_elt = GEN_INT (v);
28897 for (j = 0; j < nelt_mode; j++)
28898 rval[j] = val_elt;
28899
28900 reg = gen_reg_rtx (mode);
28901 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28902 /* Emit instruction loading the constant value. */
28903 emit_move_insn (reg, val_vec);
28904
28905 i = 0;
28906 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28907 if (mode == V16QImode)
28908 {
28909 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28910 emit_insn (gen_movmisalignv16qi (mem, reg));
28911 i += nelt_mode;
28912 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28913 if (i + nelt_v8 < length && i + nelt_v16 > length)
28914 {
28915 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28916 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28917 /* We are shifting bytes back, set the alignment accordingly. */
28918 if ((length & 0x3) == 0)
28919 set_mem_align (mem, BITS_PER_UNIT * 4);
28920 else if ((length & 0x1) == 0)
28921 set_mem_align (mem, BITS_PER_UNIT * 2);
28922 else
28923 set_mem_align (mem, BITS_PER_UNIT);
28924
28925 emit_insn (gen_movmisalignv16qi (mem, reg));
28926 return true;
28927 }
28928 /* Fall through for bytes leftover. */
28929 mode = V8QImode;
28930 nelt_mode = GET_MODE_NUNITS (mode);
28931 reg = gen_lowpart (V8QImode, reg);
28932 }
28933
28934 /* Handle 8 bytes in a vector. */
28935 for (; (i + nelt_mode <= length); i += nelt_mode)
28936 {
28937 addr = plus_constant (Pmode, dst, i);
28938 mem = adjust_automodify_address (dstbase, mode, addr, i);
28939 emit_move_insn (mem, reg);
28940 }
28941
28942 /* Handle single word leftover by shifting 4 bytes back. We can
28943 use aligned access for this case. */
28944 if (i + UNITS_PER_WORD == length)
28945 {
28946 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
28947 mem = adjust_automodify_address (dstbase, mode,
28948 addr, i - UNITS_PER_WORD);
28949 /* We are shifting 4 bytes back, set the alignment accordingly. */
28950 if (align > UNITS_PER_WORD)
28951 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
28952
28953 emit_move_insn (mem, reg);
28954 }
28955 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28956 We have to use unaligned access for this case. */
28957 else if (i < length)
28958 {
28959 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28960 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28961 /* We are shifting bytes back, set the alignment accordingly. */
28962 if ((length & 1) == 0)
28963 set_mem_align (mem, BITS_PER_UNIT * 2);
28964 else
28965 set_mem_align (mem, BITS_PER_UNIT);
28966
28967 emit_insn (gen_movmisalignv8qi (mem, reg));
28968 }
28969
28970 return true;
28971 }
28972
28973 /* Set a block of memory using plain strh/strb instructions, only
28974 using instructions allowed by ALIGN on processor. We fill the
28975 first LENGTH bytes of the memory area starting from DSTBASE
28976 with byte constant VALUE. ALIGN is the alignment requirement
28977 of memory. */
28978 static bool
28979 arm_block_set_unaligned_non_vect (rtx dstbase,
28980 unsigned HOST_WIDE_INT length,
28981 unsigned HOST_WIDE_INT value,
28982 unsigned HOST_WIDE_INT align)
28983 {
28984 unsigned int i;
28985 rtx dst, addr, mem;
28986 rtx val_exp, val_reg, reg;
28987 machine_mode mode;
28988 HOST_WIDE_INT v = value;
28989
28990 gcc_assert (align == 1 || align == 2);
28991
28992 if (align == 2)
28993 v |= (value << BITS_PER_UNIT);
28994
28995 v = sext_hwi (v, BITS_PER_WORD);
28996 val_exp = GEN_INT (v);
28997 /* Skip if it isn't profitable. */
28998 if (!arm_block_set_non_vect_profit_p (val_exp, length,
28999 align, true, false))
29000 return false;
29001
29002 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29003 mode = (align == 2 ? HImode : QImode);
29004 val_reg = force_reg (SImode, val_exp);
29005 reg = gen_lowpart (mode, val_reg);
29006
29007 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29008 {
29009 addr = plus_constant (Pmode, dst, i);
29010 mem = adjust_automodify_address (dstbase, mode, addr, i);
29011 emit_move_insn (mem, reg);
29012 }
29013
29014 /* Handle single byte leftover. */
29015 if (i + 1 == length)
29016 {
29017 reg = gen_lowpart (QImode, val_reg);
29018 addr = plus_constant (Pmode, dst, i);
29019 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29020 emit_move_insn (mem, reg);
29021 i++;
29022 }
29023
29024 gcc_assert (i == length);
29025 return true;
29026 }
29027
29028 /* Set a block of memory using plain strd/str/strh/strb instructions,
29029 to permit unaligned copies on processors which support unaligned
29030 semantics for those instructions. We fill the first LENGTH bytes
29031 of the memory area starting from DSTBASE with byte constant VALUE.
29032 ALIGN is the alignment requirement of memory. */
29033 static bool
29034 arm_block_set_aligned_non_vect (rtx dstbase,
29035 unsigned HOST_WIDE_INT length,
29036 unsigned HOST_WIDE_INT value,
29037 unsigned HOST_WIDE_INT align)
29038 {
29039 unsigned int i;
29040 rtx dst, addr, mem;
29041 rtx val_exp, val_reg, reg;
29042 unsigned HOST_WIDE_INT v;
29043 bool use_strd_p;
29044
29045 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29046 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29047
29048 v = (value | (value << 8) | (value << 16) | (value << 24));
29049 if (length < UNITS_PER_WORD)
29050 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29051
29052 if (use_strd_p)
29053 v |= (v << BITS_PER_WORD);
29054 else
29055 v = sext_hwi (v, BITS_PER_WORD);
29056
29057 val_exp = GEN_INT (v);
29058 /* Skip if it isn't profitable. */
29059 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29060 align, false, use_strd_p))
29061 {
29062 if (!use_strd_p)
29063 return false;
29064
29065 /* Try without strd. */
29066 v = (v >> BITS_PER_WORD);
29067 v = sext_hwi (v, BITS_PER_WORD);
29068 val_exp = GEN_INT (v);
29069 use_strd_p = false;
29070 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29071 align, false, use_strd_p))
29072 return false;
29073 }
29074
29075 i = 0;
29076 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29077 /* Handle double words using strd if possible. */
29078 if (use_strd_p)
29079 {
29080 val_reg = force_reg (DImode, val_exp);
29081 reg = val_reg;
29082 for (; (i + 8 <= length); i += 8)
29083 {
29084 addr = plus_constant (Pmode, dst, i);
29085 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29086 emit_move_insn (mem, reg);
29087 }
29088 }
29089 else
29090 val_reg = force_reg (SImode, val_exp);
29091
29092 /* Handle words. */
29093 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29094 for (; (i + 4 <= length); i += 4)
29095 {
29096 addr = plus_constant (Pmode, dst, i);
29097 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29098 if ((align & 3) == 0)
29099 emit_move_insn (mem, reg);
29100 else
29101 emit_insn (gen_unaligned_storesi (mem, reg));
29102 }
29103
29104 /* Merge last pair of STRH and STRB into a STR if possible. */
29105 if (unaligned_access && i > 0 && (i + 3) == length)
29106 {
29107 addr = plus_constant (Pmode, dst, i - 1);
29108 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29109 /* We are shifting one byte back, set the alignment accordingly. */
29110 if ((align & 1) == 0)
29111 set_mem_align (mem, BITS_PER_UNIT);
29112
29113 /* Most likely this is an unaligned access, and we can't tell at
29114 compilation time. */
29115 emit_insn (gen_unaligned_storesi (mem, reg));
29116 return true;
29117 }
29118
29119 /* Handle half word leftover. */
29120 if (i + 2 <= length)
29121 {
29122 reg = gen_lowpart (HImode, val_reg);
29123 addr = plus_constant (Pmode, dst, i);
29124 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29125 if ((align & 1) == 0)
29126 emit_move_insn (mem, reg);
29127 else
29128 emit_insn (gen_unaligned_storehi (mem, reg));
29129
29130 i += 2;
29131 }
29132
29133 /* Handle single byte leftover. */
29134 if (i + 1 == length)
29135 {
29136 reg = gen_lowpart (QImode, val_reg);
29137 addr = plus_constant (Pmode, dst, i);
29138 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29139 emit_move_insn (mem, reg);
29140 }
29141
29142 return true;
29143 }
29144
29145 /* Set a block of memory using vectorization instructions for both
29146 aligned and unaligned cases. We fill the first LENGTH bytes of
29147 the memory area starting from DSTBASE with byte constant VALUE.
29148 ALIGN is the alignment requirement of memory. */
29149 static bool
29150 arm_block_set_vect (rtx dstbase,
29151 unsigned HOST_WIDE_INT length,
29152 unsigned HOST_WIDE_INT value,
29153 unsigned HOST_WIDE_INT align)
29154 {
29155 /* Check whether we need to use unaligned store instruction. */
29156 if (((align & 3) != 0 || (length & 3) != 0)
29157 /* Check whether unaligned store instruction is available. */
29158 && (!unaligned_access || BYTES_BIG_ENDIAN))
29159 return false;
29160
29161 if ((align & 3) == 0)
29162 return arm_block_set_aligned_vect (dstbase, length, value, align);
29163 else
29164 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29165 }
29166
29167 /* Expand string store operation. Firstly we try to do that by using
29168 vectorization instructions, then try with ARM unaligned access and
29169 double-word store if profitable. OPERANDS[0] is the destination,
29170 OPERANDS[1] is the number of bytes, operands[2] is the value to
29171 initialize the memory, OPERANDS[3] is the known alignment of the
29172 destination. */
29173 bool
29174 arm_gen_setmem (rtx *operands)
29175 {
29176 rtx dstbase = operands[0];
29177 unsigned HOST_WIDE_INT length;
29178 unsigned HOST_WIDE_INT value;
29179 unsigned HOST_WIDE_INT align;
29180
29181 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29182 return false;
29183
29184 length = UINTVAL (operands[1]);
29185 if (length > 64)
29186 return false;
29187
29188 value = (UINTVAL (operands[2]) & 0xFF);
29189 align = UINTVAL (operands[3]);
29190 if (TARGET_NEON && length >= 8
29191 && current_tune->string_ops_prefer_neon
29192 && arm_block_set_vect (dstbase, length, value, align))
29193 return true;
29194
29195 if (!unaligned_access && (align & 3) != 0)
29196 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29197
29198 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29199 }
29200
29201
29202 static bool
29203 arm_macro_fusion_p (void)
29204 {
29205 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
29206 }
29207
29208
29209 static bool
29210 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
29211 {
29212 rtx set_dest;
29213 rtx prev_set = single_set (prev);
29214 rtx curr_set = single_set (curr);
29215
29216 if (!prev_set
29217 || !curr_set)
29218 return false;
29219
29220 if (any_condjump_p (curr))
29221 return false;
29222
29223 if (!arm_macro_fusion_p ())
29224 return false;
29225
29226 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT)
29227 {
29228 /* We are trying to fuse
29229 movw imm / movt imm
29230 instructions as a group that gets scheduled together. */
29231
29232 set_dest = SET_DEST (curr_set);
29233
29234 if (GET_MODE (set_dest) != SImode)
29235 return false;
29236
29237 /* We are trying to match:
29238 prev (movw) == (set (reg r0) (const_int imm16))
29239 curr (movt) == (set (zero_extract (reg r0)
29240 (const_int 16)
29241 (const_int 16))
29242 (const_int imm16_1))
29243 or
29244 prev (movw) == (set (reg r1)
29245 (high (symbol_ref ("SYM"))))
29246 curr (movt) == (set (reg r0)
29247 (lo_sum (reg r1)
29248 (symbol_ref ("SYM")))) */
29249 if (GET_CODE (set_dest) == ZERO_EXTRACT)
29250 {
29251 if (CONST_INT_P (SET_SRC (curr_set))
29252 && CONST_INT_P (SET_SRC (prev_set))
29253 && REG_P (XEXP (set_dest, 0))
29254 && REG_P (SET_DEST (prev_set))
29255 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
29256 return true;
29257 }
29258 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
29259 && REG_P (SET_DEST (curr_set))
29260 && REG_P (SET_DEST (prev_set))
29261 && GET_CODE (SET_SRC (prev_set)) == HIGH
29262 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
29263 return true;
29264 }
29265 return false;
29266 }
29267
29268 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29269
29270 static unsigned HOST_WIDE_INT
29271 arm_asan_shadow_offset (void)
29272 {
29273 return (unsigned HOST_WIDE_INT) 1 << 29;
29274 }
29275
29276
29277 /* This is a temporary fix for PR60655. Ideally we need
29278 to handle most of these cases in the generic part but
29279 currently we reject minus (..) (sym_ref). We try to
29280 ameliorate the case with minus (sym_ref1) (sym_ref2)
29281 where they are in the same section. */
29282
29283 static bool
29284 arm_const_not_ok_for_debug_p (rtx p)
29285 {
29286 tree decl_op0 = NULL;
29287 tree decl_op1 = NULL;
29288
29289 if (GET_CODE (p) == MINUS)
29290 {
29291 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29292 {
29293 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29294 if (decl_op1
29295 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29296 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29297 {
29298 if ((TREE_CODE (decl_op1) == VAR_DECL
29299 || TREE_CODE (decl_op1) == CONST_DECL)
29300 && (TREE_CODE (decl_op0) == VAR_DECL
29301 || TREE_CODE (decl_op0) == CONST_DECL))
29302 return (get_variable_section (decl_op1, false)
29303 != get_variable_section (decl_op0, false));
29304
29305 if (TREE_CODE (decl_op1) == LABEL_DECL
29306 && TREE_CODE (decl_op0) == LABEL_DECL)
29307 return (DECL_CONTEXT (decl_op1)
29308 != DECL_CONTEXT (decl_op0));
29309 }
29310
29311 return true;
29312 }
29313 }
29314
29315 return false;
29316 }
29317
29318 /* return TRUE if x is a reference to a value in a constant pool */
29319 extern bool
29320 arm_is_constant_pool_ref (rtx x)
29321 {
29322 return (MEM_P (x)
29323 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29324 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29325 }
29326
29327 /* Remember the last target of arm_set_current_function. */
29328 static GTY(()) tree arm_previous_fndecl;
29329
29330 /* Invalidate arm_previous_fndecl. */
29331 void
29332 arm_reset_previous_fndecl (void)
29333 {
29334 arm_previous_fndecl = NULL_TREE;
29335 }
29336
29337 /* Establish appropriate back-end context for processing the function
29338 FNDECL. The argument might be NULL to indicate processing at top
29339 level, outside of any function scope. */
29340 static void
29341 arm_set_current_function (tree fndecl)
29342 {
29343 if (!fndecl || fndecl == arm_previous_fndecl)
29344 return;
29345
29346 tree old_tree = (arm_previous_fndecl
29347 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
29348 : NULL_TREE);
29349
29350 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
29351
29352 arm_previous_fndecl = fndecl;
29353 if (old_tree == new_tree)
29354 ;
29355
29356 else if (new_tree)
29357 {
29358 cl_target_option_restore (&global_options,
29359 TREE_TARGET_OPTION (new_tree));
29360
29361 if (TREE_TARGET_GLOBALS (new_tree))
29362 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
29363 else
29364 TREE_TARGET_GLOBALS (new_tree)
29365 = save_target_globals_default_opts ();
29366 }
29367
29368 else if (old_tree)
29369 {
29370 new_tree = target_option_current_node;
29371
29372 cl_target_option_restore (&global_options,
29373 TREE_TARGET_OPTION (new_tree));
29374 if (TREE_TARGET_GLOBALS (new_tree))
29375 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
29376 else if (new_tree == target_option_default_node)
29377 restore_target_globals (&default_target_globals);
29378 else
29379 TREE_TARGET_GLOBALS (new_tree)
29380 = save_target_globals_default_opts ();
29381 }
29382
29383 arm_option_params_internal (&global_options);
29384 }
29385
29386 /* Hook to determine if one function can safely inline another. */
29387
29388 static bool
29389 arm_can_inline_p (tree caller ATTRIBUTE_UNUSED, tree callee ATTRIBUTE_UNUSED)
29390 {
29391 /* Overidde default hook: Always OK to inline between different modes.
29392 Function with mode specific instructions, e.g using asm, must be explicitely
29393 protected with noinline. */
29394 return true;
29395 }
29396
29397 /* Inner function to process the attribute((target(...))), take an argument and
29398 set the current options from the argument. If we have a list, recursively
29399 go over the list. */
29400
29401 static bool
29402 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
29403 {
29404 if (TREE_CODE (args) == TREE_LIST)
29405 {
29406 bool ret = true;
29407 for (; args; args = TREE_CHAIN (args))
29408 if (TREE_VALUE (args)
29409 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
29410 ret = false;
29411 return ret;
29412 }
29413
29414 else if (TREE_CODE (args) != STRING_CST)
29415 {
29416 error ("attribute %<target%> argument not a string");
29417 return false;
29418 }
29419
29420 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
29421 while (argstr && *argstr != '\0')
29422 {
29423 while (ISSPACE (*argstr))
29424 argstr++;
29425
29426 if (!strcmp (argstr, "thumb"))
29427 {
29428 opts->x_target_flags |= MASK_THUMB;
29429 arm_option_check_internal (opts);
29430 return true;
29431 }
29432
29433 if (!strcmp (argstr, "arm"))
29434 {
29435 opts->x_target_flags &= ~MASK_THUMB;
29436 arm_option_check_internal (opts);
29437 return true;
29438 }
29439
29440 warning (0, "attribute(target(\"%s\")) is unknown", argstr);
29441 return false;
29442 }
29443
29444 return false;
29445 }
29446
29447 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
29448
29449 tree
29450 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
29451 struct gcc_options *opts_set)
29452 {
29453 if (!arm_valid_target_attribute_rec (args, opts))
29454 return NULL_TREE;
29455
29456 /* Do any overrides, such as global options arch=xxx. */
29457 arm_option_override_internal (opts, opts_set);
29458
29459 return build_target_option_node (opts);
29460 }
29461
29462 /* Hook to validate attribute((target("string"))). */
29463
29464 static bool
29465 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
29466 tree args, int ARG_UNUSED (flags))
29467 {
29468 bool ret = true;
29469 struct gcc_options func_options;
29470 tree cur_tree, new_optimize;
29471 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
29472
29473 /* Get the optimization options of the current function. */
29474 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
29475
29476 /* If the function changed the optimization levels as well as setting target
29477 options, start with the optimizations specified. */
29478 if (!func_optimize)
29479 func_optimize = optimization_default_node;
29480
29481 /* Init func_options. */
29482 memset (&func_options, 0, sizeof (func_options));
29483 init_options_struct (&func_options, NULL);
29484 lang_hooks.init_options_struct (&func_options);
29485
29486 /* Initialize func_options to the defaults. */
29487 cl_optimization_restore (&func_options,
29488 TREE_OPTIMIZATION (func_optimize));
29489
29490 cl_target_option_restore (&func_options,
29491 TREE_TARGET_OPTION (target_option_default_node));
29492
29493 /* Set func_options flags with new target mode. */
29494 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
29495 &global_options_set);
29496
29497 if (cur_tree == NULL_TREE)
29498 ret = false;
29499
29500 new_optimize = build_optimization_node (&func_options);
29501
29502 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
29503
29504 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
29505
29506 return ret;
29507 }
29508
29509 void
29510 arm_declare_function_name (FILE *stream, const char *name, tree decl)
29511 {
29512 if (TARGET_UNIFIED_ASM)
29513 fprintf (stream, "\t.syntax unified\n");
29514 else
29515 fprintf (stream, "\t.syntax divided\n");
29516
29517 if (TARGET_THUMB)
29518 {
29519 if (is_called_in_ARM_mode (decl)
29520 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
29521 && cfun->is_thunk))
29522 fprintf (stream, "\t.code 32\n");
29523 else if (TARGET_THUMB1)
29524 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
29525 else
29526 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
29527 }
29528 else
29529 fprintf (stream, "\t.arm\n");
29530
29531 if (TARGET_POKE_FUNCTION_NAME)
29532 arm_poke_function_name (stream, (const char *) name);
29533 }
29534
29535 /* If MEM is in the form of [base+offset], extract the two parts
29536 of address and set to BASE and OFFSET, otherwise return false
29537 after clearing BASE and OFFSET. */
29538
29539 static bool
29540 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29541 {
29542 rtx addr;
29543
29544 gcc_assert (MEM_P (mem));
29545
29546 addr = XEXP (mem, 0);
29547
29548 /* Strip off const from addresses like (const (addr)). */
29549 if (GET_CODE (addr) == CONST)
29550 addr = XEXP (addr, 0);
29551
29552 if (GET_CODE (addr) == REG)
29553 {
29554 *base = addr;
29555 *offset = const0_rtx;
29556 return true;
29557 }
29558
29559 if (GET_CODE (addr) == PLUS
29560 && GET_CODE (XEXP (addr, 0)) == REG
29561 && CONST_INT_P (XEXP (addr, 1)))
29562 {
29563 *base = XEXP (addr, 0);
29564 *offset = XEXP (addr, 1);
29565 return true;
29566 }
29567
29568 *base = NULL_RTX;
29569 *offset = NULL_RTX;
29570
29571 return false;
29572 }
29573
29574 /* If INSN is a load or store of address in the form of [base+offset],
29575 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29576 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29577 otherwise return FALSE. */
29578
29579 static bool
29580 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29581 {
29582 rtx x, dest, src;
29583
29584 gcc_assert (INSN_P (insn));
29585 x = PATTERN (insn);
29586 if (GET_CODE (x) != SET)
29587 return false;
29588
29589 src = SET_SRC (x);
29590 dest = SET_DEST (x);
29591 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29592 {
29593 *is_load = false;
29594 extract_base_offset_in_addr (dest, base, offset);
29595 }
29596 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29597 {
29598 *is_load = true;
29599 extract_base_offset_in_addr (src, base, offset);
29600 }
29601 else
29602 return false;
29603
29604 return (*base != NULL_RTX && *offset != NULL_RTX);
29605 }
29606
29607 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29608
29609 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29610 and PRI are only calculated for these instructions. For other instruction,
29611 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29612 instruction fusion can be supported by returning different priorities.
29613
29614 It's important that irrelevant instructions get the largest FUSION_PRI. */
29615
29616 static void
29617 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29618 int *fusion_pri, int *pri)
29619 {
29620 int tmp, off_val;
29621 bool is_load;
29622 rtx base, offset;
29623
29624 gcc_assert (INSN_P (insn));
29625
29626 tmp = max_pri - 1;
29627 if (!fusion_load_store (insn, &base, &offset, &is_load))
29628 {
29629 *pri = tmp;
29630 *fusion_pri = tmp;
29631 return;
29632 }
29633
29634 /* Load goes first. */
29635 if (is_load)
29636 *fusion_pri = tmp - 1;
29637 else
29638 *fusion_pri = tmp - 2;
29639
29640 tmp /= 2;
29641
29642 /* INSN with smaller base register goes first. */
29643 tmp -= ((REGNO (base) & 0xff) << 20);
29644
29645 /* INSN with smaller offset goes first. */
29646 off_val = (int)(INTVAL (offset));
29647 if (off_val >= 0)
29648 tmp -= (off_val & 0xfffff);
29649 else
29650 tmp += ((- off_val) & 0xfffff);
29651
29652 *pri = tmp;
29653 return;
29654 }
29655 #include "gt-arm.h"