]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
8193bf1ce63c47de9e66a383f6a6e843d7ba22d6
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "hashtab.h"
44 #include "hash-set.h"
45 #include "vec.h"
46 #include "machmode.h"
47 #include "input.h"
48 #include "function.h"
49 #include "expr.h"
50 #include "insn-codes.h"
51 #include "optabs.h"
52 #include "diagnostic-core.h"
53 #include "recog.h"
54 #include "predict.h"
55 #include "dominance.h"
56 #include "cfg.h"
57 #include "cfgrtl.h"
58 #include "cfganal.h"
59 #include "lcm.h"
60 #include "cfgbuild.h"
61 #include "cfgcleanup.h"
62 #include "basic-block.h"
63 #include "hash-map.h"
64 #include "is-a.h"
65 #include "plugin-api.h"
66 #include "ipa-ref.h"
67 #include "cgraph.h"
68 #include "ggc.h"
69 #include "except.h"
70 #include "tm_p.h"
71 #include "target.h"
72 #include "sched-int.h"
73 #include "target-def.h"
74 #include "debug.h"
75 #include "langhooks.h"
76 #include "df.h"
77 #include "intl.h"
78 #include "libfuncs.h"
79 #include "params.h"
80 #include "opts.h"
81 #include "dumpfile.h"
82 #include "gimple-expr.h"
83 #include "builtins.h"
84 #include "tm-constrs.h"
85 #include "rtl-iter.h"
86
87 /* Forward definitions of types. */
88 typedef struct minipool_node Mnode;
89 typedef struct minipool_fixup Mfix;
90
91 void (*arm_lang_output_object_attributes_hook)(void);
92
93 struct four_ints
94 {
95 int i[4];
96 };
97
98 /* Forward function declarations. */
99 static bool arm_const_not_ok_for_debug_p (rtx);
100 static bool arm_lra_p (void);
101 static bool arm_needs_doubleword_align (machine_mode, const_tree);
102 static int arm_compute_static_chain_stack_bytes (void);
103 static arm_stack_offsets *arm_get_frame_offsets (void);
104 static void arm_add_gc_roots (void);
105 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
106 HOST_WIDE_INT, rtx, rtx, int, int);
107 static unsigned bit_count (unsigned long);
108 static int arm_address_register_rtx_p (rtx, int);
109 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
110 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
111 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
112 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
113 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
114 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
115 inline static int thumb1_index_register_rtx_p (rtx, int);
116 static int thumb_far_jump_used_p (void);
117 static bool thumb_force_lr_save (void);
118 static unsigned arm_size_return_regs (void);
119 static bool arm_assemble_integer (rtx, unsigned int, int);
120 static void arm_print_operand (FILE *, rtx, int);
121 static void arm_print_operand_address (FILE *, rtx);
122 static bool arm_print_operand_punct_valid_p (unsigned char code);
123 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
124 static arm_cc get_arm_condition_code (rtx);
125 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
126 static const char *output_multi_immediate (rtx *, const char *, const char *,
127 int, HOST_WIDE_INT);
128 static const char *shift_op (rtx, HOST_WIDE_INT *);
129 static struct machine_function *arm_init_machine_status (void);
130 static void thumb_exit (FILE *, int);
131 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
132 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
133 static Mnode *add_minipool_forward_ref (Mfix *);
134 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
135 static Mnode *add_minipool_backward_ref (Mfix *);
136 static void assign_minipool_offsets (Mfix *);
137 static void arm_print_value (FILE *, rtx);
138 static void dump_minipool (rtx_insn *);
139 static int arm_barrier_cost (rtx);
140 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
141 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
142 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
143 machine_mode, rtx);
144 static void arm_reorg (void);
145 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
146 static unsigned long arm_compute_save_reg0_reg12_mask (void);
147 static unsigned long arm_compute_save_reg_mask (void);
148 static unsigned long arm_isr_value (tree);
149 static unsigned long arm_compute_func_type (void);
150 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
151 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
152 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
153 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
154 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
155 #endif
156 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
157 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
158 static int arm_comp_type_attributes (const_tree, const_tree);
159 static void arm_set_default_type_attributes (tree);
160 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
161 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
162 static int optimal_immediate_sequence (enum rtx_code code,
163 unsigned HOST_WIDE_INT val,
164 struct four_ints *return_sequence);
165 static int optimal_immediate_sequence_1 (enum rtx_code code,
166 unsigned HOST_WIDE_INT val,
167 struct four_ints *return_sequence,
168 int i);
169 static int arm_get_strip_length (int);
170 static bool arm_function_ok_for_sibcall (tree, tree);
171 static machine_mode arm_promote_function_mode (const_tree,
172 machine_mode, int *,
173 const_tree, int);
174 static bool arm_return_in_memory (const_tree, const_tree);
175 static rtx arm_function_value (const_tree, const_tree, bool);
176 static rtx arm_libcall_value_1 (machine_mode);
177 static rtx arm_libcall_value (machine_mode, const_rtx);
178 static bool arm_function_value_regno_p (const unsigned int);
179 static void arm_internal_label (FILE *, const char *, unsigned long);
180 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
181 tree);
182 static bool arm_have_conditional_execution (void);
183 static bool arm_cannot_force_const_mem (machine_mode, rtx);
184 static bool arm_legitimate_constant_p (machine_mode, rtx);
185 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
186 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
187 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
188 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
189 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
190 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
191 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
192 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
193 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
194 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
195 static void emit_constant_insn (rtx cond, rtx pattern);
196 static rtx_insn *emit_set_insn (rtx, rtx);
197 static rtx emit_multi_reg_push (unsigned long, unsigned long);
198 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
199 tree, bool);
200 static rtx arm_function_arg (cumulative_args_t, machine_mode,
201 const_tree, bool);
202 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
203 const_tree, bool);
204 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
205 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
206 const_tree);
207 static rtx aapcs_libcall_value (machine_mode);
208 static int aapcs_select_return_coproc (const_tree, const_tree);
209
210 #ifdef OBJECT_FORMAT_ELF
211 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
212 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
213 #endif
214 #ifndef ARM_PE
215 static void arm_encode_section_info (tree, rtx, int);
216 #endif
217
218 static void arm_file_end (void);
219 static void arm_file_start (void);
220
221 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
222 tree, int *, int);
223 static bool arm_pass_by_reference (cumulative_args_t,
224 machine_mode, const_tree, bool);
225 static bool arm_promote_prototypes (const_tree);
226 static bool arm_default_short_enums (void);
227 static bool arm_align_anon_bitfield (void);
228 static bool arm_return_in_msb (const_tree);
229 static bool arm_must_pass_in_stack (machine_mode, const_tree);
230 static bool arm_return_in_memory (const_tree, const_tree);
231 #if ARM_UNWIND_INFO
232 static void arm_unwind_emit (FILE *, rtx_insn *);
233 static bool arm_output_ttype (rtx);
234 static void arm_asm_emit_except_personality (rtx);
235 static void arm_asm_init_sections (void);
236 #endif
237 static rtx arm_dwarf_register_span (rtx);
238
239 static tree arm_cxx_guard_type (void);
240 static bool arm_cxx_guard_mask_bit (void);
241 static tree arm_get_cookie_size (tree);
242 static bool arm_cookie_has_size (void);
243 static bool arm_cxx_cdtor_returns_this (void);
244 static bool arm_cxx_key_method_may_be_inline (void);
245 static void arm_cxx_determine_class_data_visibility (tree);
246 static bool arm_cxx_class_data_always_comdat (void);
247 static bool arm_cxx_use_aeabi_atexit (void);
248 static void arm_init_libfuncs (void);
249 static tree arm_build_builtin_va_list (void);
250 static void arm_expand_builtin_va_start (tree, rtx);
251 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
252 static void arm_option_override (void);
253 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
254 static bool arm_cannot_copy_insn_p (rtx_insn *);
255 static int arm_issue_rate (void);
256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
257 static bool arm_output_addr_const_extra (FILE *, rtx);
258 static bool arm_allocate_stack_slots_for_args (void);
259 static bool arm_warn_func_return (tree);
260 static const char *arm_invalid_parameter_type (const_tree t);
261 static const char *arm_invalid_return_type (const_tree t);
262 static tree arm_promoted_type (const_tree t);
263 static tree arm_convert_to_type (tree type, tree expr);
264 static bool arm_scalar_mode_supported_p (machine_mode);
265 static bool arm_frame_pointer_required (void);
266 static bool arm_can_eliminate (const int, const int);
267 static void arm_asm_trampoline_template (FILE *);
268 static void arm_trampoline_init (rtx, tree, rtx);
269 static rtx arm_trampoline_adjust_address (rtx);
270 static rtx arm_pic_static_addr (rtx orig, rtx reg);
271 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
272 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
273 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
274 static bool arm_array_mode_supported_p (machine_mode,
275 unsigned HOST_WIDE_INT);
276 static machine_mode arm_preferred_simd_mode (machine_mode);
277 static bool arm_class_likely_spilled_p (reg_class_t);
278 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
279 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
280 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
281 const_tree type,
282 int misalignment,
283 bool is_packed);
284 static void arm_conditional_register_usage (void);
285 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
286 static unsigned int arm_autovectorize_vector_sizes (void);
287 static int arm_default_branch_cost (bool, bool);
288 static int arm_cortex_a5_branch_cost (bool, bool);
289 static int arm_cortex_m_branch_cost (bool, bool);
290
291 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
292 const unsigned char *sel);
293
294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
295 tree vectype,
296 int misalign ATTRIBUTE_UNUSED);
297 static unsigned arm_add_stmt_cost (void *data, int count,
298 enum vect_cost_for_stmt kind,
299 struct _stmt_vec_info *stmt_info,
300 int misalign,
301 enum vect_cost_model_location where);
302
303 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
304 bool op0_preserve_value);
305 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
306
307 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
308 \f
309 /* Table of machine attributes. */
310 static const struct attribute_spec arm_attribute_table[] =
311 {
312 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
313 affects_type_identity } */
314 /* Function calls made to this symbol must be done indirectly, because
315 it may lie outside of the 26 bit addressing range of a normal function
316 call. */
317 { "long_call", 0, 0, false, true, true, NULL, false },
318 /* Whereas these functions are always known to reside within the 26 bit
319 addressing range. */
320 { "short_call", 0, 0, false, true, true, NULL, false },
321 /* Specify the procedure call conventions for a function. */
322 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
323 false },
324 /* Interrupt Service Routines have special prologue and epilogue requirements. */
325 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
326 false },
327 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
328 false },
329 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
330 false },
331 #ifdef ARM_PE
332 /* ARM/PE has three new attributes:
333 interfacearm - ?
334 dllexport - for exporting a function/variable that will live in a dll
335 dllimport - for importing a function/variable from a dll
336
337 Microsoft allows multiple declspecs in one __declspec, separating
338 them with spaces. We do NOT support this. Instead, use __declspec
339 multiple times.
340 */
341 { "dllimport", 0, 0, true, false, false, NULL, false },
342 { "dllexport", 0, 0, true, false, false, NULL, false },
343 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
344 false },
345 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
346 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
347 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
348 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
349 false },
350 #endif
351 { NULL, 0, 0, false, false, false, NULL, false }
352 };
353 \f
354 /* Initialize the GCC target structure. */
355 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
356 #undef TARGET_MERGE_DECL_ATTRIBUTES
357 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
358 #endif
359
360 #undef TARGET_LEGITIMIZE_ADDRESS
361 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
362
363 #undef TARGET_LRA_P
364 #define TARGET_LRA_P arm_lra_p
365
366 #undef TARGET_ATTRIBUTE_TABLE
367 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
368
369 #undef TARGET_ASM_FILE_START
370 #define TARGET_ASM_FILE_START arm_file_start
371 #undef TARGET_ASM_FILE_END
372 #define TARGET_ASM_FILE_END arm_file_end
373
374 #undef TARGET_ASM_ALIGNED_SI_OP
375 #define TARGET_ASM_ALIGNED_SI_OP NULL
376 #undef TARGET_ASM_INTEGER
377 #define TARGET_ASM_INTEGER arm_assemble_integer
378
379 #undef TARGET_PRINT_OPERAND
380 #define TARGET_PRINT_OPERAND arm_print_operand
381 #undef TARGET_PRINT_OPERAND_ADDRESS
382 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
383 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
384 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
385
386 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
387 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
388
389 #undef TARGET_ASM_FUNCTION_PROLOGUE
390 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
391
392 #undef TARGET_ASM_FUNCTION_EPILOGUE
393 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
394
395 #undef TARGET_OPTION_OVERRIDE
396 #define TARGET_OPTION_OVERRIDE arm_option_override
397
398 #undef TARGET_COMP_TYPE_ATTRIBUTES
399 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
400
401 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
402 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
403
404 #undef TARGET_SCHED_ADJUST_COST
405 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
406
407 #undef TARGET_SCHED_REORDER
408 #define TARGET_SCHED_REORDER arm_sched_reorder
409
410 #undef TARGET_REGISTER_MOVE_COST
411 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
412
413 #undef TARGET_MEMORY_MOVE_COST
414 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
415
416 #undef TARGET_ENCODE_SECTION_INFO
417 #ifdef ARM_PE
418 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
419 #else
420 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
421 #endif
422
423 #undef TARGET_STRIP_NAME_ENCODING
424 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
425
426 #undef TARGET_ASM_INTERNAL_LABEL
427 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
428
429 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
430 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
431
432 #undef TARGET_FUNCTION_VALUE
433 #define TARGET_FUNCTION_VALUE arm_function_value
434
435 #undef TARGET_LIBCALL_VALUE
436 #define TARGET_LIBCALL_VALUE arm_libcall_value
437
438 #undef TARGET_FUNCTION_VALUE_REGNO_P
439 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
440
441 #undef TARGET_ASM_OUTPUT_MI_THUNK
442 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
443 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
444 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
445
446 #undef TARGET_RTX_COSTS
447 #define TARGET_RTX_COSTS arm_rtx_costs
448 #undef TARGET_ADDRESS_COST
449 #define TARGET_ADDRESS_COST arm_address_cost
450
451 #undef TARGET_SHIFT_TRUNCATION_MASK
452 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
453 #undef TARGET_VECTOR_MODE_SUPPORTED_P
454 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
455 #undef TARGET_ARRAY_MODE_SUPPORTED_P
456 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
457 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
458 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
459 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
460 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
461 arm_autovectorize_vector_sizes
462
463 #undef TARGET_MACHINE_DEPENDENT_REORG
464 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
465
466 #undef TARGET_INIT_BUILTINS
467 #define TARGET_INIT_BUILTINS arm_init_builtins
468 #undef TARGET_EXPAND_BUILTIN
469 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
470 #undef TARGET_BUILTIN_DECL
471 #define TARGET_BUILTIN_DECL arm_builtin_decl
472
473 #undef TARGET_INIT_LIBFUNCS
474 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
475
476 #undef TARGET_PROMOTE_FUNCTION_MODE
477 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
478 #undef TARGET_PROMOTE_PROTOTYPES
479 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
480 #undef TARGET_PASS_BY_REFERENCE
481 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
482 #undef TARGET_ARG_PARTIAL_BYTES
483 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
484 #undef TARGET_FUNCTION_ARG
485 #define TARGET_FUNCTION_ARG arm_function_arg
486 #undef TARGET_FUNCTION_ARG_ADVANCE
487 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
488 #undef TARGET_FUNCTION_ARG_BOUNDARY
489 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
490
491 #undef TARGET_SETUP_INCOMING_VARARGS
492 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
493
494 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
495 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
496
497 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
498 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
499 #undef TARGET_TRAMPOLINE_INIT
500 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
501 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
502 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
503
504 #undef TARGET_WARN_FUNC_RETURN
505 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
506
507 #undef TARGET_DEFAULT_SHORT_ENUMS
508 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
509
510 #undef TARGET_ALIGN_ANON_BITFIELD
511 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
512
513 #undef TARGET_NARROW_VOLATILE_BITFIELD
514 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
515
516 #undef TARGET_CXX_GUARD_TYPE
517 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
518
519 #undef TARGET_CXX_GUARD_MASK_BIT
520 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
521
522 #undef TARGET_CXX_GET_COOKIE_SIZE
523 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
524
525 #undef TARGET_CXX_COOKIE_HAS_SIZE
526 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
527
528 #undef TARGET_CXX_CDTOR_RETURNS_THIS
529 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
530
531 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
532 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
533
534 #undef TARGET_CXX_USE_AEABI_ATEXIT
535 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
536
537 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
538 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
539 arm_cxx_determine_class_data_visibility
540
541 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
542 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
543
544 #undef TARGET_RETURN_IN_MSB
545 #define TARGET_RETURN_IN_MSB arm_return_in_msb
546
547 #undef TARGET_RETURN_IN_MEMORY
548 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
549
550 #undef TARGET_MUST_PASS_IN_STACK
551 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
552
553 #if ARM_UNWIND_INFO
554 #undef TARGET_ASM_UNWIND_EMIT
555 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
556
557 /* EABI unwinding tables use a different format for the typeinfo tables. */
558 #undef TARGET_ASM_TTYPE
559 #define TARGET_ASM_TTYPE arm_output_ttype
560
561 #undef TARGET_ARM_EABI_UNWINDER
562 #define TARGET_ARM_EABI_UNWINDER true
563
564 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
565 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
566
567 #undef TARGET_ASM_INIT_SECTIONS
568 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
569 #endif /* ARM_UNWIND_INFO */
570
571 #undef TARGET_DWARF_REGISTER_SPAN
572 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
573
574 #undef TARGET_CANNOT_COPY_INSN_P
575 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
576
577 #ifdef HAVE_AS_TLS
578 #undef TARGET_HAVE_TLS
579 #define TARGET_HAVE_TLS true
580 #endif
581
582 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
583 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
584
585 #undef TARGET_LEGITIMATE_CONSTANT_P
586 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
587
588 #undef TARGET_CANNOT_FORCE_CONST_MEM
589 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
590
591 #undef TARGET_MAX_ANCHOR_OFFSET
592 #define TARGET_MAX_ANCHOR_OFFSET 4095
593
594 /* The minimum is set such that the total size of the block
595 for a particular anchor is -4088 + 1 + 4095 bytes, which is
596 divisible by eight, ensuring natural spacing of anchors. */
597 #undef TARGET_MIN_ANCHOR_OFFSET
598 #define TARGET_MIN_ANCHOR_OFFSET -4088
599
600 #undef TARGET_SCHED_ISSUE_RATE
601 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
602
603 #undef TARGET_MANGLE_TYPE
604 #define TARGET_MANGLE_TYPE arm_mangle_type
605
606 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
607 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
608
609 #undef TARGET_BUILD_BUILTIN_VA_LIST
610 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
611 #undef TARGET_EXPAND_BUILTIN_VA_START
612 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
613 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
614 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
615
616 #ifdef HAVE_AS_TLS
617 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
618 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
619 #endif
620
621 #undef TARGET_LEGITIMATE_ADDRESS_P
622 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
623
624 #undef TARGET_PREFERRED_RELOAD_CLASS
625 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
626
627 #undef TARGET_INVALID_PARAMETER_TYPE
628 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
629
630 #undef TARGET_INVALID_RETURN_TYPE
631 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
632
633 #undef TARGET_PROMOTED_TYPE
634 #define TARGET_PROMOTED_TYPE arm_promoted_type
635
636 #undef TARGET_CONVERT_TO_TYPE
637 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
638
639 #undef TARGET_SCALAR_MODE_SUPPORTED_P
640 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
641
642 #undef TARGET_FRAME_POINTER_REQUIRED
643 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
644
645 #undef TARGET_CAN_ELIMINATE
646 #define TARGET_CAN_ELIMINATE arm_can_eliminate
647
648 #undef TARGET_CONDITIONAL_REGISTER_USAGE
649 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
650
651 #undef TARGET_CLASS_LIKELY_SPILLED_P
652 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
653
654 #undef TARGET_VECTORIZE_BUILTINS
655 #define TARGET_VECTORIZE_BUILTINS
656
657 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
658 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
659 arm_builtin_vectorized_function
660
661 #undef TARGET_VECTOR_ALIGNMENT
662 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
663
664 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
665 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
666 arm_vector_alignment_reachable
667
668 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
669 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
670 arm_builtin_support_vector_misalignment
671
672 #undef TARGET_PREFERRED_RENAME_CLASS
673 #define TARGET_PREFERRED_RENAME_CLASS \
674 arm_preferred_rename_class
675
676 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
677 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
678 arm_vectorize_vec_perm_const_ok
679
680 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
681 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
682 arm_builtin_vectorization_cost
683 #undef TARGET_VECTORIZE_ADD_STMT_COST
684 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
685
686 #undef TARGET_CANONICALIZE_COMPARISON
687 #define TARGET_CANONICALIZE_COMPARISON \
688 arm_canonicalize_comparison
689
690 #undef TARGET_ASAN_SHADOW_OFFSET
691 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
692
693 #undef MAX_INSN_PER_IT_BLOCK
694 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
695
696 #undef TARGET_CAN_USE_DOLOOP_P
697 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
698
699 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
700 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
701
702 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
703 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
704
705 #undef TARGET_SCHED_FUSION_PRIORITY
706 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
707
708 struct gcc_target targetm = TARGET_INITIALIZER;
709 \f
710 /* Obstack for minipool constant handling. */
711 static struct obstack minipool_obstack;
712 static char * minipool_startobj;
713
714 /* The maximum number of insns skipped which
715 will be conditionalised if possible. */
716 static int max_insns_skipped = 5;
717
718 extern FILE * asm_out_file;
719
720 /* True if we are currently building a constant table. */
721 int making_const_table;
722
723 /* The processor for which instructions should be scheduled. */
724 enum processor_type arm_tune = arm_none;
725
726 /* The current tuning set. */
727 const struct tune_params *current_tune;
728
729 /* Which floating point hardware to schedule for. */
730 int arm_fpu_attr;
731
732 /* Which floating popint hardware to use. */
733 const struct arm_fpu_desc *arm_fpu_desc;
734
735 /* Used for Thumb call_via trampolines. */
736 rtx thumb_call_via_label[14];
737 static int thumb_call_reg_needed;
738
739 /* The bits in this mask specify which
740 instructions we are allowed to generate. */
741 unsigned long insn_flags = 0;
742
743 /* The bits in this mask specify which instruction scheduling options should
744 be used. */
745 unsigned long tune_flags = 0;
746
747 /* The highest ARM architecture version supported by the
748 target. */
749 enum base_architecture arm_base_arch = BASE_ARCH_0;
750
751 /* The following are used in the arm.md file as equivalents to bits
752 in the above two flag variables. */
753
754 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
755 int arm_arch3m = 0;
756
757 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
758 int arm_arch4 = 0;
759
760 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
761 int arm_arch4t = 0;
762
763 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
764 int arm_arch5 = 0;
765
766 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
767 int arm_arch5e = 0;
768
769 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
770 int arm_arch6 = 0;
771
772 /* Nonzero if this chip supports the ARM 6K extensions. */
773 int arm_arch6k = 0;
774
775 /* Nonzero if instructions present in ARMv6-M can be used. */
776 int arm_arch6m = 0;
777
778 /* Nonzero if this chip supports the ARM 7 extensions. */
779 int arm_arch7 = 0;
780
781 /* Nonzero if instructions not present in the 'M' profile can be used. */
782 int arm_arch_notm = 0;
783
784 /* Nonzero if instructions present in ARMv7E-M can be used. */
785 int arm_arch7em = 0;
786
787 /* Nonzero if instructions present in ARMv8 can be used. */
788 int arm_arch8 = 0;
789
790 /* Nonzero if this chip can benefit from load scheduling. */
791 int arm_ld_sched = 0;
792
793 /* Nonzero if this chip is a StrongARM. */
794 int arm_tune_strongarm = 0;
795
796 /* Nonzero if this chip supports Intel Wireless MMX technology. */
797 int arm_arch_iwmmxt = 0;
798
799 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
800 int arm_arch_iwmmxt2 = 0;
801
802 /* Nonzero if this chip is an XScale. */
803 int arm_arch_xscale = 0;
804
805 /* Nonzero if tuning for XScale */
806 int arm_tune_xscale = 0;
807
808 /* Nonzero if we want to tune for stores that access the write-buffer.
809 This typically means an ARM6 or ARM7 with MMU or MPU. */
810 int arm_tune_wbuf = 0;
811
812 /* Nonzero if tuning for Cortex-A9. */
813 int arm_tune_cortex_a9 = 0;
814
815 /* Nonzero if generating Thumb instructions. */
816 int thumb_code = 0;
817
818 /* Nonzero if generating Thumb-1 instructions. */
819 int thumb1_code = 0;
820
821 /* Nonzero if we should define __THUMB_INTERWORK__ in the
822 preprocessor.
823 XXX This is a bit of a hack, it's intended to help work around
824 problems in GLD which doesn't understand that armv5t code is
825 interworking clean. */
826 int arm_cpp_interwork = 0;
827
828 /* Nonzero if chip supports Thumb 2. */
829 int arm_arch_thumb2;
830
831 /* Nonzero if chip supports integer division instruction. */
832 int arm_arch_arm_hwdiv;
833 int arm_arch_thumb_hwdiv;
834
835 /* Nonzero if we should use Neon to handle 64-bits operations rather
836 than core registers. */
837 int prefer_neon_for_64bits = 0;
838
839 /* Nonzero if we shouldn't use literal pools. */
840 bool arm_disable_literal_pool = false;
841
842 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
843 we must report the mode of the memory reference from
844 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
845 machine_mode output_memory_reference_mode;
846
847 /* The register number to be used for the PIC offset register. */
848 unsigned arm_pic_register = INVALID_REGNUM;
849
850 enum arm_pcs arm_pcs_default;
851
852 /* For an explanation of these variables, see final_prescan_insn below. */
853 int arm_ccfsm_state;
854 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
855 enum arm_cond_code arm_current_cc;
856
857 rtx arm_target_insn;
858 int arm_target_label;
859 /* The number of conditionally executed insns, including the current insn. */
860 int arm_condexec_count = 0;
861 /* A bitmask specifying the patterns for the IT block.
862 Zero means do not output an IT block before this insn. */
863 int arm_condexec_mask = 0;
864 /* The number of bits used in arm_condexec_mask. */
865 int arm_condexec_masklen = 0;
866
867 /* Nonzero if chip supports the ARMv8 CRC instructions. */
868 int arm_arch_crc = 0;
869
870 /* Nonzero if the core has a very small, high-latency, multiply unit. */
871 int arm_m_profile_small_mul = 0;
872
873 /* The condition codes of the ARM, and the inverse function. */
874 static const char * const arm_condition_codes[] =
875 {
876 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
877 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
878 };
879
880 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
881 int arm_regs_in_sequence[] =
882 {
883 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
884 };
885
886 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
887 #define streq(string1, string2) (strcmp (string1, string2) == 0)
888
889 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
890 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
891 | (1 << PIC_OFFSET_TABLE_REGNUM)))
892 \f
893 /* Initialization code. */
894
895 struct processors
896 {
897 const char *const name;
898 enum processor_type core;
899 const char *arch;
900 enum base_architecture base_arch;
901 const unsigned long flags;
902 const struct tune_params *const tune;
903 };
904
905
906 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
907 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
908 prefetch_slots, \
909 l1_size, \
910 l1_line_size
911
912 /* arm generic vectorizer costs. */
913 static const
914 struct cpu_vec_costs arm_default_vec_cost = {
915 1, /* scalar_stmt_cost. */
916 1, /* scalar load_cost. */
917 1, /* scalar_store_cost. */
918 1, /* vec_stmt_cost. */
919 1, /* vec_to_scalar_cost. */
920 1, /* scalar_to_vec_cost. */
921 1, /* vec_align_load_cost. */
922 1, /* vec_unalign_load_cost. */
923 1, /* vec_unalign_store_cost. */
924 1, /* vec_store_cost. */
925 3, /* cond_taken_branch_cost. */
926 1, /* cond_not_taken_branch_cost. */
927 };
928
929 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
930 #include "aarch-cost-tables.h"
931
932
933
934 const struct cpu_cost_table cortexa9_extra_costs =
935 {
936 /* ALU */
937 {
938 0, /* arith. */
939 0, /* logical. */
940 0, /* shift. */
941 COSTS_N_INSNS (1), /* shift_reg. */
942 COSTS_N_INSNS (1), /* arith_shift. */
943 COSTS_N_INSNS (2), /* arith_shift_reg. */
944 0, /* log_shift. */
945 COSTS_N_INSNS (1), /* log_shift_reg. */
946 COSTS_N_INSNS (1), /* extend. */
947 COSTS_N_INSNS (2), /* extend_arith. */
948 COSTS_N_INSNS (1), /* bfi. */
949 COSTS_N_INSNS (1), /* bfx. */
950 0, /* clz. */
951 0, /* rev. */
952 0, /* non_exec. */
953 true /* non_exec_costs_exec. */
954 },
955 {
956 /* MULT SImode */
957 {
958 COSTS_N_INSNS (3), /* simple. */
959 COSTS_N_INSNS (3), /* flag_setting. */
960 COSTS_N_INSNS (2), /* extend. */
961 COSTS_N_INSNS (3), /* add. */
962 COSTS_N_INSNS (2), /* extend_add. */
963 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
964 },
965 /* MULT DImode */
966 {
967 0, /* simple (N/A). */
968 0, /* flag_setting (N/A). */
969 COSTS_N_INSNS (4), /* extend. */
970 0, /* add (N/A). */
971 COSTS_N_INSNS (4), /* extend_add. */
972 0 /* idiv (N/A). */
973 }
974 },
975 /* LD/ST */
976 {
977 COSTS_N_INSNS (2), /* load. */
978 COSTS_N_INSNS (2), /* load_sign_extend. */
979 COSTS_N_INSNS (2), /* ldrd. */
980 COSTS_N_INSNS (2), /* ldm_1st. */
981 1, /* ldm_regs_per_insn_1st. */
982 2, /* ldm_regs_per_insn_subsequent. */
983 COSTS_N_INSNS (5), /* loadf. */
984 COSTS_N_INSNS (5), /* loadd. */
985 COSTS_N_INSNS (1), /* load_unaligned. */
986 COSTS_N_INSNS (2), /* store. */
987 COSTS_N_INSNS (2), /* strd. */
988 COSTS_N_INSNS (2), /* stm_1st. */
989 1, /* stm_regs_per_insn_1st. */
990 2, /* stm_regs_per_insn_subsequent. */
991 COSTS_N_INSNS (1), /* storef. */
992 COSTS_N_INSNS (1), /* stored. */
993 COSTS_N_INSNS (1) /* store_unaligned. */
994 },
995 {
996 /* FP SFmode */
997 {
998 COSTS_N_INSNS (14), /* div. */
999 COSTS_N_INSNS (4), /* mult. */
1000 COSTS_N_INSNS (7), /* mult_addsub. */
1001 COSTS_N_INSNS (30), /* fma. */
1002 COSTS_N_INSNS (3), /* addsub. */
1003 COSTS_N_INSNS (1), /* fpconst. */
1004 COSTS_N_INSNS (1), /* neg. */
1005 COSTS_N_INSNS (3), /* compare. */
1006 COSTS_N_INSNS (3), /* widen. */
1007 COSTS_N_INSNS (3), /* narrow. */
1008 COSTS_N_INSNS (3), /* toint. */
1009 COSTS_N_INSNS (3), /* fromint. */
1010 COSTS_N_INSNS (3) /* roundint. */
1011 },
1012 /* FP DFmode */
1013 {
1014 COSTS_N_INSNS (24), /* div. */
1015 COSTS_N_INSNS (5), /* mult. */
1016 COSTS_N_INSNS (8), /* mult_addsub. */
1017 COSTS_N_INSNS (30), /* fma. */
1018 COSTS_N_INSNS (3), /* addsub. */
1019 COSTS_N_INSNS (1), /* fpconst. */
1020 COSTS_N_INSNS (1), /* neg. */
1021 COSTS_N_INSNS (3), /* compare. */
1022 COSTS_N_INSNS (3), /* widen. */
1023 COSTS_N_INSNS (3), /* narrow. */
1024 COSTS_N_INSNS (3), /* toint. */
1025 COSTS_N_INSNS (3), /* fromint. */
1026 COSTS_N_INSNS (3) /* roundint. */
1027 }
1028 },
1029 /* Vector */
1030 {
1031 COSTS_N_INSNS (1) /* alu. */
1032 }
1033 };
1034
1035 const struct cpu_cost_table cortexa8_extra_costs =
1036 {
1037 /* ALU */
1038 {
1039 0, /* arith. */
1040 0, /* logical. */
1041 COSTS_N_INSNS (1), /* shift. */
1042 0, /* shift_reg. */
1043 COSTS_N_INSNS (1), /* arith_shift. */
1044 0, /* arith_shift_reg. */
1045 COSTS_N_INSNS (1), /* log_shift. */
1046 0, /* log_shift_reg. */
1047 0, /* extend. */
1048 0, /* extend_arith. */
1049 0, /* bfi. */
1050 0, /* bfx. */
1051 0, /* clz. */
1052 0, /* rev. */
1053 0, /* non_exec. */
1054 true /* non_exec_costs_exec. */
1055 },
1056 {
1057 /* MULT SImode */
1058 {
1059 COSTS_N_INSNS (1), /* simple. */
1060 COSTS_N_INSNS (1), /* flag_setting. */
1061 COSTS_N_INSNS (1), /* extend. */
1062 COSTS_N_INSNS (1), /* add. */
1063 COSTS_N_INSNS (1), /* extend_add. */
1064 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1065 },
1066 /* MULT DImode */
1067 {
1068 0, /* simple (N/A). */
1069 0, /* flag_setting (N/A). */
1070 COSTS_N_INSNS (2), /* extend. */
1071 0, /* add (N/A). */
1072 COSTS_N_INSNS (2), /* extend_add. */
1073 0 /* idiv (N/A). */
1074 }
1075 },
1076 /* LD/ST */
1077 {
1078 COSTS_N_INSNS (1), /* load. */
1079 COSTS_N_INSNS (1), /* load_sign_extend. */
1080 COSTS_N_INSNS (1), /* ldrd. */
1081 COSTS_N_INSNS (1), /* ldm_1st. */
1082 1, /* ldm_regs_per_insn_1st. */
1083 2, /* ldm_regs_per_insn_subsequent. */
1084 COSTS_N_INSNS (1), /* loadf. */
1085 COSTS_N_INSNS (1), /* loadd. */
1086 COSTS_N_INSNS (1), /* load_unaligned. */
1087 COSTS_N_INSNS (1), /* store. */
1088 COSTS_N_INSNS (1), /* strd. */
1089 COSTS_N_INSNS (1), /* stm_1st. */
1090 1, /* stm_regs_per_insn_1st. */
1091 2, /* stm_regs_per_insn_subsequent. */
1092 COSTS_N_INSNS (1), /* storef. */
1093 COSTS_N_INSNS (1), /* stored. */
1094 COSTS_N_INSNS (1) /* store_unaligned. */
1095 },
1096 {
1097 /* FP SFmode */
1098 {
1099 COSTS_N_INSNS (36), /* div. */
1100 COSTS_N_INSNS (11), /* mult. */
1101 COSTS_N_INSNS (20), /* mult_addsub. */
1102 COSTS_N_INSNS (30), /* fma. */
1103 COSTS_N_INSNS (9), /* addsub. */
1104 COSTS_N_INSNS (3), /* fpconst. */
1105 COSTS_N_INSNS (3), /* neg. */
1106 COSTS_N_INSNS (6), /* compare. */
1107 COSTS_N_INSNS (4), /* widen. */
1108 COSTS_N_INSNS (4), /* narrow. */
1109 COSTS_N_INSNS (8), /* toint. */
1110 COSTS_N_INSNS (8), /* fromint. */
1111 COSTS_N_INSNS (8) /* roundint. */
1112 },
1113 /* FP DFmode */
1114 {
1115 COSTS_N_INSNS (64), /* div. */
1116 COSTS_N_INSNS (16), /* mult. */
1117 COSTS_N_INSNS (25), /* mult_addsub. */
1118 COSTS_N_INSNS (30), /* fma. */
1119 COSTS_N_INSNS (9), /* addsub. */
1120 COSTS_N_INSNS (3), /* fpconst. */
1121 COSTS_N_INSNS (3), /* neg. */
1122 COSTS_N_INSNS (6), /* compare. */
1123 COSTS_N_INSNS (6), /* widen. */
1124 COSTS_N_INSNS (6), /* narrow. */
1125 COSTS_N_INSNS (8), /* toint. */
1126 COSTS_N_INSNS (8), /* fromint. */
1127 COSTS_N_INSNS (8) /* roundint. */
1128 }
1129 },
1130 /* Vector */
1131 {
1132 COSTS_N_INSNS (1) /* alu. */
1133 }
1134 };
1135
1136 const struct cpu_cost_table cortexa5_extra_costs =
1137 {
1138 /* ALU */
1139 {
1140 0, /* arith. */
1141 0, /* logical. */
1142 COSTS_N_INSNS (1), /* shift. */
1143 COSTS_N_INSNS (1), /* shift_reg. */
1144 COSTS_N_INSNS (1), /* arith_shift. */
1145 COSTS_N_INSNS (1), /* arith_shift_reg. */
1146 COSTS_N_INSNS (1), /* log_shift. */
1147 COSTS_N_INSNS (1), /* log_shift_reg. */
1148 COSTS_N_INSNS (1), /* extend. */
1149 COSTS_N_INSNS (1), /* extend_arith. */
1150 COSTS_N_INSNS (1), /* bfi. */
1151 COSTS_N_INSNS (1), /* bfx. */
1152 COSTS_N_INSNS (1), /* clz. */
1153 COSTS_N_INSNS (1), /* rev. */
1154 0, /* non_exec. */
1155 true /* non_exec_costs_exec. */
1156 },
1157
1158 {
1159 /* MULT SImode */
1160 {
1161 0, /* simple. */
1162 COSTS_N_INSNS (1), /* flag_setting. */
1163 COSTS_N_INSNS (1), /* extend. */
1164 COSTS_N_INSNS (1), /* add. */
1165 COSTS_N_INSNS (1), /* extend_add. */
1166 COSTS_N_INSNS (7) /* idiv. */
1167 },
1168 /* MULT DImode */
1169 {
1170 0, /* simple (N/A). */
1171 0, /* flag_setting (N/A). */
1172 COSTS_N_INSNS (1), /* extend. */
1173 0, /* add. */
1174 COSTS_N_INSNS (2), /* extend_add. */
1175 0 /* idiv (N/A). */
1176 }
1177 },
1178 /* LD/ST */
1179 {
1180 COSTS_N_INSNS (1), /* load. */
1181 COSTS_N_INSNS (1), /* load_sign_extend. */
1182 COSTS_N_INSNS (6), /* ldrd. */
1183 COSTS_N_INSNS (1), /* ldm_1st. */
1184 1, /* ldm_regs_per_insn_1st. */
1185 2, /* ldm_regs_per_insn_subsequent. */
1186 COSTS_N_INSNS (2), /* loadf. */
1187 COSTS_N_INSNS (4), /* loadd. */
1188 COSTS_N_INSNS (1), /* load_unaligned. */
1189 COSTS_N_INSNS (1), /* store. */
1190 COSTS_N_INSNS (3), /* strd. */
1191 COSTS_N_INSNS (1), /* stm_1st. */
1192 1, /* stm_regs_per_insn_1st. */
1193 2, /* stm_regs_per_insn_subsequent. */
1194 COSTS_N_INSNS (2), /* storef. */
1195 COSTS_N_INSNS (2), /* stored. */
1196 COSTS_N_INSNS (1) /* store_unaligned. */
1197 },
1198 {
1199 /* FP SFmode */
1200 {
1201 COSTS_N_INSNS (15), /* div. */
1202 COSTS_N_INSNS (3), /* mult. */
1203 COSTS_N_INSNS (7), /* mult_addsub. */
1204 COSTS_N_INSNS (7), /* fma. */
1205 COSTS_N_INSNS (3), /* addsub. */
1206 COSTS_N_INSNS (3), /* fpconst. */
1207 COSTS_N_INSNS (3), /* neg. */
1208 COSTS_N_INSNS (3), /* compare. */
1209 COSTS_N_INSNS (3), /* widen. */
1210 COSTS_N_INSNS (3), /* narrow. */
1211 COSTS_N_INSNS (3), /* toint. */
1212 COSTS_N_INSNS (3), /* fromint. */
1213 COSTS_N_INSNS (3) /* roundint. */
1214 },
1215 /* FP DFmode */
1216 {
1217 COSTS_N_INSNS (30), /* div. */
1218 COSTS_N_INSNS (6), /* mult. */
1219 COSTS_N_INSNS (10), /* mult_addsub. */
1220 COSTS_N_INSNS (7), /* fma. */
1221 COSTS_N_INSNS (3), /* addsub. */
1222 COSTS_N_INSNS (3), /* fpconst. */
1223 COSTS_N_INSNS (3), /* neg. */
1224 COSTS_N_INSNS (3), /* compare. */
1225 COSTS_N_INSNS (3), /* widen. */
1226 COSTS_N_INSNS (3), /* narrow. */
1227 COSTS_N_INSNS (3), /* toint. */
1228 COSTS_N_INSNS (3), /* fromint. */
1229 COSTS_N_INSNS (3) /* roundint. */
1230 }
1231 },
1232 /* Vector */
1233 {
1234 COSTS_N_INSNS (1) /* alu. */
1235 }
1236 };
1237
1238
1239 const struct cpu_cost_table cortexa7_extra_costs =
1240 {
1241 /* ALU */
1242 {
1243 0, /* arith. */
1244 0, /* logical. */
1245 COSTS_N_INSNS (1), /* shift. */
1246 COSTS_N_INSNS (1), /* shift_reg. */
1247 COSTS_N_INSNS (1), /* arith_shift. */
1248 COSTS_N_INSNS (1), /* arith_shift_reg. */
1249 COSTS_N_INSNS (1), /* log_shift. */
1250 COSTS_N_INSNS (1), /* log_shift_reg. */
1251 COSTS_N_INSNS (1), /* extend. */
1252 COSTS_N_INSNS (1), /* extend_arith. */
1253 COSTS_N_INSNS (1), /* bfi. */
1254 COSTS_N_INSNS (1), /* bfx. */
1255 COSTS_N_INSNS (1), /* clz. */
1256 COSTS_N_INSNS (1), /* rev. */
1257 0, /* non_exec. */
1258 true /* non_exec_costs_exec. */
1259 },
1260
1261 {
1262 /* MULT SImode */
1263 {
1264 0, /* simple. */
1265 COSTS_N_INSNS (1), /* flag_setting. */
1266 COSTS_N_INSNS (1), /* extend. */
1267 COSTS_N_INSNS (1), /* add. */
1268 COSTS_N_INSNS (1), /* extend_add. */
1269 COSTS_N_INSNS (7) /* idiv. */
1270 },
1271 /* MULT DImode */
1272 {
1273 0, /* simple (N/A). */
1274 0, /* flag_setting (N/A). */
1275 COSTS_N_INSNS (1), /* extend. */
1276 0, /* add. */
1277 COSTS_N_INSNS (2), /* extend_add. */
1278 0 /* idiv (N/A). */
1279 }
1280 },
1281 /* LD/ST */
1282 {
1283 COSTS_N_INSNS (1), /* load. */
1284 COSTS_N_INSNS (1), /* load_sign_extend. */
1285 COSTS_N_INSNS (3), /* ldrd. */
1286 COSTS_N_INSNS (1), /* ldm_1st. */
1287 1, /* ldm_regs_per_insn_1st. */
1288 2, /* ldm_regs_per_insn_subsequent. */
1289 COSTS_N_INSNS (2), /* loadf. */
1290 COSTS_N_INSNS (2), /* loadd. */
1291 COSTS_N_INSNS (1), /* load_unaligned. */
1292 COSTS_N_INSNS (1), /* store. */
1293 COSTS_N_INSNS (3), /* strd. */
1294 COSTS_N_INSNS (1), /* stm_1st. */
1295 1, /* stm_regs_per_insn_1st. */
1296 2, /* stm_regs_per_insn_subsequent. */
1297 COSTS_N_INSNS (2), /* storef. */
1298 COSTS_N_INSNS (2), /* stored. */
1299 COSTS_N_INSNS (1) /* store_unaligned. */
1300 },
1301 {
1302 /* FP SFmode */
1303 {
1304 COSTS_N_INSNS (15), /* div. */
1305 COSTS_N_INSNS (3), /* mult. */
1306 COSTS_N_INSNS (7), /* mult_addsub. */
1307 COSTS_N_INSNS (7), /* fma. */
1308 COSTS_N_INSNS (3), /* addsub. */
1309 COSTS_N_INSNS (3), /* fpconst. */
1310 COSTS_N_INSNS (3), /* neg. */
1311 COSTS_N_INSNS (3), /* compare. */
1312 COSTS_N_INSNS (3), /* widen. */
1313 COSTS_N_INSNS (3), /* narrow. */
1314 COSTS_N_INSNS (3), /* toint. */
1315 COSTS_N_INSNS (3), /* fromint. */
1316 COSTS_N_INSNS (3) /* roundint. */
1317 },
1318 /* FP DFmode */
1319 {
1320 COSTS_N_INSNS (30), /* div. */
1321 COSTS_N_INSNS (6), /* mult. */
1322 COSTS_N_INSNS (10), /* mult_addsub. */
1323 COSTS_N_INSNS (7), /* fma. */
1324 COSTS_N_INSNS (3), /* addsub. */
1325 COSTS_N_INSNS (3), /* fpconst. */
1326 COSTS_N_INSNS (3), /* neg. */
1327 COSTS_N_INSNS (3), /* compare. */
1328 COSTS_N_INSNS (3), /* widen. */
1329 COSTS_N_INSNS (3), /* narrow. */
1330 COSTS_N_INSNS (3), /* toint. */
1331 COSTS_N_INSNS (3), /* fromint. */
1332 COSTS_N_INSNS (3) /* roundint. */
1333 }
1334 },
1335 /* Vector */
1336 {
1337 COSTS_N_INSNS (1) /* alu. */
1338 }
1339 };
1340
1341 const struct cpu_cost_table cortexa12_extra_costs =
1342 {
1343 /* ALU */
1344 {
1345 0, /* arith. */
1346 0, /* logical. */
1347 0, /* shift. */
1348 COSTS_N_INSNS (1), /* shift_reg. */
1349 COSTS_N_INSNS (1), /* arith_shift. */
1350 COSTS_N_INSNS (1), /* arith_shift_reg. */
1351 COSTS_N_INSNS (1), /* log_shift. */
1352 COSTS_N_INSNS (1), /* log_shift_reg. */
1353 0, /* extend. */
1354 COSTS_N_INSNS (1), /* extend_arith. */
1355 0, /* bfi. */
1356 COSTS_N_INSNS (1), /* bfx. */
1357 COSTS_N_INSNS (1), /* clz. */
1358 COSTS_N_INSNS (1), /* rev. */
1359 0, /* non_exec. */
1360 true /* non_exec_costs_exec. */
1361 },
1362 /* MULT SImode */
1363 {
1364 {
1365 COSTS_N_INSNS (2), /* simple. */
1366 COSTS_N_INSNS (3), /* flag_setting. */
1367 COSTS_N_INSNS (2), /* extend. */
1368 COSTS_N_INSNS (3), /* add. */
1369 COSTS_N_INSNS (2), /* extend_add. */
1370 COSTS_N_INSNS (18) /* idiv. */
1371 },
1372 /* MULT DImode */
1373 {
1374 0, /* simple (N/A). */
1375 0, /* flag_setting (N/A). */
1376 COSTS_N_INSNS (3), /* extend. */
1377 0, /* add (N/A). */
1378 COSTS_N_INSNS (3), /* extend_add. */
1379 0 /* idiv (N/A). */
1380 }
1381 },
1382 /* LD/ST */
1383 {
1384 COSTS_N_INSNS (3), /* load. */
1385 COSTS_N_INSNS (3), /* load_sign_extend. */
1386 COSTS_N_INSNS (3), /* ldrd. */
1387 COSTS_N_INSNS (3), /* ldm_1st. */
1388 1, /* ldm_regs_per_insn_1st. */
1389 2, /* ldm_regs_per_insn_subsequent. */
1390 COSTS_N_INSNS (3), /* loadf. */
1391 COSTS_N_INSNS (3), /* loadd. */
1392 0, /* load_unaligned. */
1393 0, /* store. */
1394 0, /* strd. */
1395 0, /* stm_1st. */
1396 1, /* stm_regs_per_insn_1st. */
1397 2, /* stm_regs_per_insn_subsequent. */
1398 COSTS_N_INSNS (2), /* storef. */
1399 COSTS_N_INSNS (2), /* stored. */
1400 0 /* store_unaligned. */
1401 },
1402 {
1403 /* FP SFmode */
1404 {
1405 COSTS_N_INSNS (17), /* div. */
1406 COSTS_N_INSNS (4), /* mult. */
1407 COSTS_N_INSNS (8), /* mult_addsub. */
1408 COSTS_N_INSNS (8), /* fma. */
1409 COSTS_N_INSNS (4), /* addsub. */
1410 COSTS_N_INSNS (2), /* fpconst. */
1411 COSTS_N_INSNS (2), /* neg. */
1412 COSTS_N_INSNS (2), /* compare. */
1413 COSTS_N_INSNS (4), /* widen. */
1414 COSTS_N_INSNS (4), /* narrow. */
1415 COSTS_N_INSNS (4), /* toint. */
1416 COSTS_N_INSNS (4), /* fromint. */
1417 COSTS_N_INSNS (4) /* roundint. */
1418 },
1419 /* FP DFmode */
1420 {
1421 COSTS_N_INSNS (31), /* div. */
1422 COSTS_N_INSNS (4), /* mult. */
1423 COSTS_N_INSNS (8), /* mult_addsub. */
1424 COSTS_N_INSNS (8), /* fma. */
1425 COSTS_N_INSNS (4), /* addsub. */
1426 COSTS_N_INSNS (2), /* fpconst. */
1427 COSTS_N_INSNS (2), /* neg. */
1428 COSTS_N_INSNS (2), /* compare. */
1429 COSTS_N_INSNS (4), /* widen. */
1430 COSTS_N_INSNS (4), /* narrow. */
1431 COSTS_N_INSNS (4), /* toint. */
1432 COSTS_N_INSNS (4), /* fromint. */
1433 COSTS_N_INSNS (4) /* roundint. */
1434 }
1435 },
1436 /* Vector */
1437 {
1438 COSTS_N_INSNS (1) /* alu. */
1439 }
1440 };
1441
1442 const struct cpu_cost_table cortexa15_extra_costs =
1443 {
1444 /* ALU */
1445 {
1446 0, /* arith. */
1447 0, /* logical. */
1448 0, /* shift. */
1449 0, /* shift_reg. */
1450 COSTS_N_INSNS (1), /* arith_shift. */
1451 COSTS_N_INSNS (1), /* arith_shift_reg. */
1452 COSTS_N_INSNS (1), /* log_shift. */
1453 COSTS_N_INSNS (1), /* log_shift_reg. */
1454 0, /* extend. */
1455 COSTS_N_INSNS (1), /* extend_arith. */
1456 COSTS_N_INSNS (1), /* bfi. */
1457 0, /* bfx. */
1458 0, /* clz. */
1459 0, /* rev. */
1460 0, /* non_exec. */
1461 true /* non_exec_costs_exec. */
1462 },
1463 /* MULT SImode */
1464 {
1465 {
1466 COSTS_N_INSNS (2), /* simple. */
1467 COSTS_N_INSNS (3), /* flag_setting. */
1468 COSTS_N_INSNS (2), /* extend. */
1469 COSTS_N_INSNS (2), /* add. */
1470 COSTS_N_INSNS (2), /* extend_add. */
1471 COSTS_N_INSNS (18) /* idiv. */
1472 },
1473 /* MULT DImode */
1474 {
1475 0, /* simple (N/A). */
1476 0, /* flag_setting (N/A). */
1477 COSTS_N_INSNS (3), /* extend. */
1478 0, /* add (N/A). */
1479 COSTS_N_INSNS (3), /* extend_add. */
1480 0 /* idiv (N/A). */
1481 }
1482 },
1483 /* LD/ST */
1484 {
1485 COSTS_N_INSNS (3), /* load. */
1486 COSTS_N_INSNS (3), /* load_sign_extend. */
1487 COSTS_N_INSNS (3), /* ldrd. */
1488 COSTS_N_INSNS (4), /* ldm_1st. */
1489 1, /* ldm_regs_per_insn_1st. */
1490 2, /* ldm_regs_per_insn_subsequent. */
1491 COSTS_N_INSNS (4), /* loadf. */
1492 COSTS_N_INSNS (4), /* loadd. */
1493 0, /* load_unaligned. */
1494 0, /* store. */
1495 0, /* strd. */
1496 COSTS_N_INSNS (1), /* stm_1st. */
1497 1, /* stm_regs_per_insn_1st. */
1498 2, /* stm_regs_per_insn_subsequent. */
1499 0, /* storef. */
1500 0, /* stored. */
1501 0 /* store_unaligned. */
1502 },
1503 {
1504 /* FP SFmode */
1505 {
1506 COSTS_N_INSNS (17), /* div. */
1507 COSTS_N_INSNS (4), /* mult. */
1508 COSTS_N_INSNS (8), /* mult_addsub. */
1509 COSTS_N_INSNS (8), /* fma. */
1510 COSTS_N_INSNS (4), /* addsub. */
1511 COSTS_N_INSNS (2), /* fpconst. */
1512 COSTS_N_INSNS (2), /* neg. */
1513 COSTS_N_INSNS (5), /* compare. */
1514 COSTS_N_INSNS (4), /* widen. */
1515 COSTS_N_INSNS (4), /* narrow. */
1516 COSTS_N_INSNS (4), /* toint. */
1517 COSTS_N_INSNS (4), /* fromint. */
1518 COSTS_N_INSNS (4) /* roundint. */
1519 },
1520 /* FP DFmode */
1521 {
1522 COSTS_N_INSNS (31), /* div. */
1523 COSTS_N_INSNS (4), /* mult. */
1524 COSTS_N_INSNS (8), /* mult_addsub. */
1525 COSTS_N_INSNS (8), /* fma. */
1526 COSTS_N_INSNS (4), /* addsub. */
1527 COSTS_N_INSNS (2), /* fpconst. */
1528 COSTS_N_INSNS (2), /* neg. */
1529 COSTS_N_INSNS (2), /* compare. */
1530 COSTS_N_INSNS (4), /* widen. */
1531 COSTS_N_INSNS (4), /* narrow. */
1532 COSTS_N_INSNS (4), /* toint. */
1533 COSTS_N_INSNS (4), /* fromint. */
1534 COSTS_N_INSNS (4) /* roundint. */
1535 }
1536 },
1537 /* Vector */
1538 {
1539 COSTS_N_INSNS (1) /* alu. */
1540 }
1541 };
1542
1543 const struct cpu_cost_table v7m_extra_costs =
1544 {
1545 /* ALU */
1546 {
1547 0, /* arith. */
1548 0, /* logical. */
1549 0, /* shift. */
1550 0, /* shift_reg. */
1551 0, /* arith_shift. */
1552 COSTS_N_INSNS (1), /* arith_shift_reg. */
1553 0, /* log_shift. */
1554 COSTS_N_INSNS (1), /* log_shift_reg. */
1555 0, /* extend. */
1556 COSTS_N_INSNS (1), /* extend_arith. */
1557 0, /* bfi. */
1558 0, /* bfx. */
1559 0, /* clz. */
1560 0, /* rev. */
1561 COSTS_N_INSNS (1), /* non_exec. */
1562 false /* non_exec_costs_exec. */
1563 },
1564 {
1565 /* MULT SImode */
1566 {
1567 COSTS_N_INSNS (1), /* simple. */
1568 COSTS_N_INSNS (1), /* flag_setting. */
1569 COSTS_N_INSNS (2), /* extend. */
1570 COSTS_N_INSNS (1), /* add. */
1571 COSTS_N_INSNS (3), /* extend_add. */
1572 COSTS_N_INSNS (8) /* idiv. */
1573 },
1574 /* MULT DImode */
1575 {
1576 0, /* simple (N/A). */
1577 0, /* flag_setting (N/A). */
1578 COSTS_N_INSNS (2), /* extend. */
1579 0, /* add (N/A). */
1580 COSTS_N_INSNS (3), /* extend_add. */
1581 0 /* idiv (N/A). */
1582 }
1583 },
1584 /* LD/ST */
1585 {
1586 COSTS_N_INSNS (2), /* load. */
1587 0, /* load_sign_extend. */
1588 COSTS_N_INSNS (3), /* ldrd. */
1589 COSTS_N_INSNS (2), /* ldm_1st. */
1590 1, /* ldm_regs_per_insn_1st. */
1591 1, /* ldm_regs_per_insn_subsequent. */
1592 COSTS_N_INSNS (2), /* loadf. */
1593 COSTS_N_INSNS (3), /* loadd. */
1594 COSTS_N_INSNS (1), /* load_unaligned. */
1595 COSTS_N_INSNS (2), /* store. */
1596 COSTS_N_INSNS (3), /* strd. */
1597 COSTS_N_INSNS (2), /* stm_1st. */
1598 1, /* stm_regs_per_insn_1st. */
1599 1, /* stm_regs_per_insn_subsequent. */
1600 COSTS_N_INSNS (2), /* storef. */
1601 COSTS_N_INSNS (3), /* stored. */
1602 COSTS_N_INSNS (1) /* store_unaligned. */
1603 },
1604 {
1605 /* FP SFmode */
1606 {
1607 COSTS_N_INSNS (7), /* div. */
1608 COSTS_N_INSNS (2), /* mult. */
1609 COSTS_N_INSNS (5), /* mult_addsub. */
1610 COSTS_N_INSNS (3), /* fma. */
1611 COSTS_N_INSNS (1), /* addsub. */
1612 0, /* fpconst. */
1613 0, /* neg. */
1614 0, /* compare. */
1615 0, /* widen. */
1616 0, /* narrow. */
1617 0, /* toint. */
1618 0, /* fromint. */
1619 0 /* roundint. */
1620 },
1621 /* FP DFmode */
1622 {
1623 COSTS_N_INSNS (15), /* div. */
1624 COSTS_N_INSNS (5), /* mult. */
1625 COSTS_N_INSNS (7), /* mult_addsub. */
1626 COSTS_N_INSNS (7), /* fma. */
1627 COSTS_N_INSNS (3), /* addsub. */
1628 0, /* fpconst. */
1629 0, /* neg. */
1630 0, /* compare. */
1631 0, /* widen. */
1632 0, /* narrow. */
1633 0, /* toint. */
1634 0, /* fromint. */
1635 0 /* roundint. */
1636 }
1637 },
1638 /* Vector */
1639 {
1640 COSTS_N_INSNS (1) /* alu. */
1641 }
1642 };
1643
1644 const struct tune_params arm_slowmul_tune =
1645 {
1646 arm_slowmul_rtx_costs,
1647 NULL,
1648 NULL, /* Sched adj cost. */
1649 3, /* Constant limit. */
1650 5, /* Max cond insns. */
1651 ARM_PREFETCH_NOT_BENEFICIAL,
1652 true, /* Prefer constant pool. */
1653 arm_default_branch_cost,
1654 false, /* Prefer LDRD/STRD. */
1655 {true, true}, /* Prefer non short circuit. */
1656 &arm_default_vec_cost, /* Vectorizer costs. */
1657 false, /* Prefer Neon for 64-bits bitops. */
1658 false, false, /* Prefer 32-bit encodings. */
1659 false, /* Prefer Neon for stringops. */
1660 8 /* Maximum insns to inline memset. */
1661 };
1662
1663 const struct tune_params arm_fastmul_tune =
1664 {
1665 arm_fastmul_rtx_costs,
1666 NULL,
1667 NULL, /* Sched adj cost. */
1668 1, /* Constant limit. */
1669 5, /* Max cond insns. */
1670 ARM_PREFETCH_NOT_BENEFICIAL,
1671 true, /* Prefer constant pool. */
1672 arm_default_branch_cost,
1673 false, /* Prefer LDRD/STRD. */
1674 {true, true}, /* Prefer non short circuit. */
1675 &arm_default_vec_cost, /* Vectorizer costs. */
1676 false, /* Prefer Neon for 64-bits bitops. */
1677 false, false, /* Prefer 32-bit encodings. */
1678 false, /* Prefer Neon for stringops. */
1679 8 /* Maximum insns to inline memset. */
1680 };
1681
1682 /* StrongARM has early execution of branches, so a sequence that is worth
1683 skipping is shorter. Set max_insns_skipped to a lower value. */
1684
1685 const struct tune_params arm_strongarm_tune =
1686 {
1687 arm_fastmul_rtx_costs,
1688 NULL,
1689 NULL, /* Sched adj cost. */
1690 1, /* Constant limit. */
1691 3, /* Max cond insns. */
1692 ARM_PREFETCH_NOT_BENEFICIAL,
1693 true, /* Prefer constant pool. */
1694 arm_default_branch_cost,
1695 false, /* Prefer LDRD/STRD. */
1696 {true, true}, /* Prefer non short circuit. */
1697 &arm_default_vec_cost, /* Vectorizer costs. */
1698 false, /* Prefer Neon for 64-bits bitops. */
1699 false, false, /* Prefer 32-bit encodings. */
1700 false, /* Prefer Neon for stringops. */
1701 8 /* Maximum insns to inline memset. */
1702 };
1703
1704 const struct tune_params arm_xscale_tune =
1705 {
1706 arm_xscale_rtx_costs,
1707 NULL,
1708 xscale_sched_adjust_cost,
1709 2, /* Constant limit. */
1710 3, /* Max cond insns. */
1711 ARM_PREFETCH_NOT_BENEFICIAL,
1712 true, /* Prefer constant pool. */
1713 arm_default_branch_cost,
1714 false, /* Prefer LDRD/STRD. */
1715 {true, true}, /* Prefer non short circuit. */
1716 &arm_default_vec_cost, /* Vectorizer costs. */
1717 false, /* Prefer Neon for 64-bits bitops. */
1718 false, false, /* Prefer 32-bit encodings. */
1719 false, /* Prefer Neon for stringops. */
1720 8 /* Maximum insns to inline memset. */
1721 };
1722
1723 const struct tune_params arm_9e_tune =
1724 {
1725 arm_9e_rtx_costs,
1726 NULL,
1727 NULL, /* Sched adj cost. */
1728 1, /* Constant limit. */
1729 5, /* Max cond insns. */
1730 ARM_PREFETCH_NOT_BENEFICIAL,
1731 true, /* Prefer constant pool. */
1732 arm_default_branch_cost,
1733 false, /* Prefer LDRD/STRD. */
1734 {true, true}, /* Prefer non short circuit. */
1735 &arm_default_vec_cost, /* Vectorizer costs. */
1736 false, /* Prefer Neon for 64-bits bitops. */
1737 false, false, /* Prefer 32-bit encodings. */
1738 false, /* Prefer Neon for stringops. */
1739 8 /* Maximum insns to inline memset. */
1740 };
1741
1742 const struct tune_params arm_v6t2_tune =
1743 {
1744 arm_9e_rtx_costs,
1745 NULL,
1746 NULL, /* Sched adj cost. */
1747 1, /* Constant limit. */
1748 5, /* Max cond insns. */
1749 ARM_PREFETCH_NOT_BENEFICIAL,
1750 false, /* Prefer constant pool. */
1751 arm_default_branch_cost,
1752 false, /* Prefer LDRD/STRD. */
1753 {true, true}, /* Prefer non short circuit. */
1754 &arm_default_vec_cost, /* Vectorizer costs. */
1755 false, /* Prefer Neon for 64-bits bitops. */
1756 false, false, /* Prefer 32-bit encodings. */
1757 false, /* Prefer Neon for stringops. */
1758 8 /* Maximum insns to inline memset. */
1759 };
1760
1761 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1762 const struct tune_params arm_cortex_tune =
1763 {
1764 arm_9e_rtx_costs,
1765 &generic_extra_costs,
1766 NULL, /* Sched adj cost. */
1767 1, /* Constant limit. */
1768 5, /* Max cond insns. */
1769 ARM_PREFETCH_NOT_BENEFICIAL,
1770 false, /* Prefer constant pool. */
1771 arm_default_branch_cost,
1772 false, /* Prefer LDRD/STRD. */
1773 {true, true}, /* Prefer non short circuit. */
1774 &arm_default_vec_cost, /* Vectorizer costs. */
1775 false, /* Prefer Neon for 64-bits bitops. */
1776 false, false, /* Prefer 32-bit encodings. */
1777 false, /* Prefer Neon for stringops. */
1778 8 /* Maximum insns to inline memset. */
1779 };
1780
1781 const struct tune_params arm_cortex_a8_tune =
1782 {
1783 arm_9e_rtx_costs,
1784 &cortexa8_extra_costs,
1785 NULL, /* Sched adj cost. */
1786 1, /* Constant limit. */
1787 5, /* Max cond insns. */
1788 ARM_PREFETCH_NOT_BENEFICIAL,
1789 false, /* Prefer constant pool. */
1790 arm_default_branch_cost,
1791 false, /* Prefer LDRD/STRD. */
1792 {true, true}, /* Prefer non short circuit. */
1793 &arm_default_vec_cost, /* Vectorizer costs. */
1794 false, /* Prefer Neon for 64-bits bitops. */
1795 false, false, /* Prefer 32-bit encodings. */
1796 true, /* Prefer Neon for stringops. */
1797 8 /* Maximum insns to inline memset. */
1798 };
1799
1800 const struct tune_params arm_cortex_a7_tune =
1801 {
1802 arm_9e_rtx_costs,
1803 &cortexa7_extra_costs,
1804 NULL,
1805 1, /* Constant limit. */
1806 5, /* Max cond insns. */
1807 ARM_PREFETCH_NOT_BENEFICIAL,
1808 false, /* Prefer constant pool. */
1809 arm_default_branch_cost,
1810 false, /* Prefer LDRD/STRD. */
1811 {true, true}, /* Prefer non short circuit. */
1812 &arm_default_vec_cost, /* Vectorizer costs. */
1813 false, /* Prefer Neon for 64-bits bitops. */
1814 false, false, /* Prefer 32-bit encodings. */
1815 true, /* Prefer Neon for stringops. */
1816 8 /* Maximum insns to inline memset. */
1817 };
1818
1819 const struct tune_params arm_cortex_a15_tune =
1820 {
1821 arm_9e_rtx_costs,
1822 &cortexa15_extra_costs,
1823 NULL, /* Sched adj cost. */
1824 1, /* Constant limit. */
1825 2, /* Max cond insns. */
1826 ARM_PREFETCH_NOT_BENEFICIAL,
1827 false, /* Prefer constant pool. */
1828 arm_default_branch_cost,
1829 true, /* Prefer LDRD/STRD. */
1830 {true, true}, /* Prefer non short circuit. */
1831 &arm_default_vec_cost, /* Vectorizer costs. */
1832 false, /* Prefer Neon for 64-bits bitops. */
1833 true, true, /* Prefer 32-bit encodings. */
1834 true, /* Prefer Neon for stringops. */
1835 8 /* Maximum insns to inline memset. */
1836 };
1837
1838 const struct tune_params arm_cortex_a53_tune =
1839 {
1840 arm_9e_rtx_costs,
1841 &cortexa53_extra_costs,
1842 NULL, /* Scheduler cost adjustment. */
1843 1, /* Constant limit. */
1844 5, /* Max cond insns. */
1845 ARM_PREFETCH_NOT_BENEFICIAL,
1846 false, /* Prefer constant pool. */
1847 arm_default_branch_cost,
1848 false, /* Prefer LDRD/STRD. */
1849 {true, true}, /* Prefer non short circuit. */
1850 &arm_default_vec_cost, /* Vectorizer costs. */
1851 false, /* Prefer Neon for 64-bits bitops. */
1852 false, false, /* Prefer 32-bit encodings. */
1853 false, /* Prefer Neon for stringops. */
1854 8 /* Maximum insns to inline memset. */
1855 };
1856
1857 const struct tune_params arm_cortex_a57_tune =
1858 {
1859 arm_9e_rtx_costs,
1860 &cortexa57_extra_costs,
1861 NULL, /* Scheduler cost adjustment. */
1862 1, /* Constant limit. */
1863 2, /* Max cond insns. */
1864 ARM_PREFETCH_NOT_BENEFICIAL,
1865 false, /* Prefer constant pool. */
1866 arm_default_branch_cost,
1867 true, /* Prefer LDRD/STRD. */
1868 {true, true}, /* Prefer non short circuit. */
1869 &arm_default_vec_cost, /* Vectorizer costs. */
1870 false, /* Prefer Neon for 64-bits bitops. */
1871 true, true, /* Prefer 32-bit encodings. */
1872 false, /* Prefer Neon for stringops. */
1873 8 /* Maximum insns to inline memset. */
1874 };
1875
1876 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1877 less appealing. Set max_insns_skipped to a low value. */
1878
1879 const struct tune_params arm_cortex_a5_tune =
1880 {
1881 arm_9e_rtx_costs,
1882 &cortexa5_extra_costs,
1883 NULL, /* Sched adj cost. */
1884 1, /* Constant limit. */
1885 1, /* Max cond insns. */
1886 ARM_PREFETCH_NOT_BENEFICIAL,
1887 false, /* Prefer constant pool. */
1888 arm_cortex_a5_branch_cost,
1889 false, /* Prefer LDRD/STRD. */
1890 {false, false}, /* Prefer non short circuit. */
1891 &arm_default_vec_cost, /* Vectorizer costs. */
1892 false, /* Prefer Neon for 64-bits bitops. */
1893 false, false, /* Prefer 32-bit encodings. */
1894 true, /* Prefer Neon for stringops. */
1895 8 /* Maximum insns to inline memset. */
1896 };
1897
1898 const struct tune_params arm_cortex_a9_tune =
1899 {
1900 arm_9e_rtx_costs,
1901 &cortexa9_extra_costs,
1902 cortex_a9_sched_adjust_cost,
1903 1, /* Constant limit. */
1904 5, /* Max cond insns. */
1905 ARM_PREFETCH_BENEFICIAL(4,32,32),
1906 false, /* Prefer constant pool. */
1907 arm_default_branch_cost,
1908 false, /* Prefer LDRD/STRD. */
1909 {true, true}, /* Prefer non short circuit. */
1910 &arm_default_vec_cost, /* Vectorizer costs. */
1911 false, /* Prefer Neon for 64-bits bitops. */
1912 false, false, /* Prefer 32-bit encodings. */
1913 false, /* Prefer Neon for stringops. */
1914 8 /* Maximum insns to inline memset. */
1915 };
1916
1917 const struct tune_params arm_cortex_a12_tune =
1918 {
1919 arm_9e_rtx_costs,
1920 &cortexa12_extra_costs,
1921 NULL,
1922 1, /* Constant limit. */
1923 5, /* Max cond insns. */
1924 ARM_PREFETCH_BENEFICIAL(4,32,32),
1925 false, /* Prefer constant pool. */
1926 arm_default_branch_cost,
1927 true, /* Prefer LDRD/STRD. */
1928 {true, true}, /* Prefer non short circuit. */
1929 &arm_default_vec_cost, /* Vectorizer costs. */
1930 false, /* Prefer Neon for 64-bits bitops. */
1931 false, false, /* Prefer 32-bit encodings. */
1932 true, /* Prefer Neon for stringops. */
1933 8 /* Maximum insns to inline memset. */
1934 };
1935
1936 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1937 cycle to execute each. An LDR from the constant pool also takes two cycles
1938 to execute, but mildly increases pipelining opportunity (consecutive
1939 loads/stores can be pipelined together, saving one cycle), and may also
1940 improve icache utilisation. Hence we prefer the constant pool for such
1941 processors. */
1942
1943 const struct tune_params arm_v7m_tune =
1944 {
1945 arm_9e_rtx_costs,
1946 &v7m_extra_costs,
1947 NULL, /* Sched adj cost. */
1948 1, /* Constant limit. */
1949 2, /* Max cond insns. */
1950 ARM_PREFETCH_NOT_BENEFICIAL,
1951 true, /* Prefer constant pool. */
1952 arm_cortex_m_branch_cost,
1953 false, /* Prefer LDRD/STRD. */
1954 {false, false}, /* Prefer non short circuit. */
1955 &arm_default_vec_cost, /* Vectorizer costs. */
1956 false, /* Prefer Neon for 64-bits bitops. */
1957 false, false, /* Prefer 32-bit encodings. */
1958 false, /* Prefer Neon for stringops. */
1959 8 /* Maximum insns to inline memset. */
1960 };
1961
1962 /* Cortex-M7 tuning. */
1963
1964 const struct tune_params arm_cortex_m7_tune =
1965 {
1966 arm_9e_rtx_costs,
1967 &v7m_extra_costs,
1968 NULL, /* Sched adj cost. */
1969 0, /* Constant limit. */
1970 0, /* Max cond insns. */
1971 ARM_PREFETCH_NOT_BENEFICIAL,
1972 true, /* Prefer constant pool. */
1973 arm_cortex_m_branch_cost,
1974 false, /* Prefer LDRD/STRD. */
1975 {true, true}, /* Prefer non short circuit. */
1976 &arm_default_vec_cost, /* Vectorizer costs. */
1977 false, /* Prefer Neon for 64-bits bitops. */
1978 false, false, /* Prefer 32-bit encodings. */
1979 false, /* Prefer Neon for stringops. */
1980 8 /* Maximum insns to inline memset. */
1981 };
1982
1983 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1984 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1985 const struct tune_params arm_v6m_tune =
1986 {
1987 arm_9e_rtx_costs,
1988 NULL,
1989 NULL, /* Sched adj cost. */
1990 1, /* Constant limit. */
1991 5, /* Max cond insns. */
1992 ARM_PREFETCH_NOT_BENEFICIAL,
1993 false, /* Prefer constant pool. */
1994 arm_default_branch_cost,
1995 false, /* Prefer LDRD/STRD. */
1996 {false, false}, /* Prefer non short circuit. */
1997 &arm_default_vec_cost, /* Vectorizer costs. */
1998 false, /* Prefer Neon for 64-bits bitops. */
1999 false, false, /* Prefer 32-bit encodings. */
2000 false, /* Prefer Neon for stringops. */
2001 8 /* Maximum insns to inline memset. */
2002 };
2003
2004 const struct tune_params arm_fa726te_tune =
2005 {
2006 arm_9e_rtx_costs,
2007 NULL,
2008 fa726te_sched_adjust_cost,
2009 1, /* Constant limit. */
2010 5, /* Max cond insns. */
2011 ARM_PREFETCH_NOT_BENEFICIAL,
2012 true, /* Prefer constant pool. */
2013 arm_default_branch_cost,
2014 false, /* Prefer LDRD/STRD. */
2015 {true, true}, /* Prefer non short circuit. */
2016 &arm_default_vec_cost, /* Vectorizer costs. */
2017 false, /* Prefer Neon for 64-bits bitops. */
2018 false, false, /* Prefer 32-bit encodings. */
2019 false, /* Prefer Neon for stringops. */
2020 8 /* Maximum insns to inline memset. */
2021 };
2022
2023
2024 /* Not all of these give usefully different compilation alternatives,
2025 but there is no simple way of generalizing them. */
2026 static const struct processors all_cores[] =
2027 {
2028 /* ARM Cores */
2029 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2030 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2031 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2032 #include "arm-cores.def"
2033 #undef ARM_CORE
2034 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2035 };
2036
2037 static const struct processors all_architectures[] =
2038 {
2039 /* ARM Architectures */
2040 /* We don't specify tuning costs here as it will be figured out
2041 from the core. */
2042
2043 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2044 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2045 #include "arm-arches.def"
2046 #undef ARM_ARCH
2047 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2048 };
2049
2050
2051 /* These are populated as commandline arguments are processed, or NULL
2052 if not specified. */
2053 static const struct processors *arm_selected_arch;
2054 static const struct processors *arm_selected_cpu;
2055 static const struct processors *arm_selected_tune;
2056
2057 /* The name of the preprocessor macro to define for this architecture. */
2058
2059 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2060
2061 /* Available values for -mfpu=. */
2062
2063 static const struct arm_fpu_desc all_fpus[] =
2064 {
2065 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2066 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2067 #include "arm-fpus.def"
2068 #undef ARM_FPU
2069 };
2070
2071
2072 /* Supported TLS relocations. */
2073
2074 enum tls_reloc {
2075 TLS_GD32,
2076 TLS_LDM32,
2077 TLS_LDO32,
2078 TLS_IE32,
2079 TLS_LE32,
2080 TLS_DESCSEQ /* GNU scheme */
2081 };
2082
2083 /* The maximum number of insns to be used when loading a constant. */
2084 inline static int
2085 arm_constant_limit (bool size_p)
2086 {
2087 return size_p ? 1 : current_tune->constant_limit;
2088 }
2089
2090 /* Emit an insn that's a simple single-set. Both the operands must be known
2091 to be valid. */
2092 inline static rtx_insn *
2093 emit_set_insn (rtx x, rtx y)
2094 {
2095 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2096 }
2097
2098 /* Return the number of bits set in VALUE. */
2099 static unsigned
2100 bit_count (unsigned long value)
2101 {
2102 unsigned long count = 0;
2103
2104 while (value)
2105 {
2106 count++;
2107 value &= value - 1; /* Clear the least-significant set bit. */
2108 }
2109
2110 return count;
2111 }
2112
2113 typedef struct
2114 {
2115 machine_mode mode;
2116 const char *name;
2117 } arm_fixed_mode_set;
2118
2119 /* A small helper for setting fixed-point library libfuncs. */
2120
2121 static void
2122 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2123 const char *funcname, const char *modename,
2124 int num_suffix)
2125 {
2126 char buffer[50];
2127
2128 if (num_suffix == 0)
2129 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2130 else
2131 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2132
2133 set_optab_libfunc (optable, mode, buffer);
2134 }
2135
2136 static void
2137 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2138 machine_mode from, const char *funcname,
2139 const char *toname, const char *fromname)
2140 {
2141 char buffer[50];
2142 const char *maybe_suffix_2 = "";
2143
2144 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2145 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2146 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2147 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2148 maybe_suffix_2 = "2";
2149
2150 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2151 maybe_suffix_2);
2152
2153 set_conv_libfunc (optable, to, from, buffer);
2154 }
2155
2156 /* Set up library functions unique to ARM. */
2157
2158 static void
2159 arm_init_libfuncs (void)
2160 {
2161 /* For Linux, we have access to kernel support for atomic operations. */
2162 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2163 init_sync_libfuncs (2 * UNITS_PER_WORD);
2164
2165 /* There are no special library functions unless we are using the
2166 ARM BPABI. */
2167 if (!TARGET_BPABI)
2168 return;
2169
2170 /* The functions below are described in Section 4 of the "Run-Time
2171 ABI for the ARM architecture", Version 1.0. */
2172
2173 /* Double-precision floating-point arithmetic. Table 2. */
2174 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2175 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2176 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2177 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2178 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2179
2180 /* Double-precision comparisons. Table 3. */
2181 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2182 set_optab_libfunc (ne_optab, DFmode, NULL);
2183 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2184 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2185 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2186 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2187 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2188
2189 /* Single-precision floating-point arithmetic. Table 4. */
2190 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2191 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2192 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2193 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2194 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2195
2196 /* Single-precision comparisons. Table 5. */
2197 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2198 set_optab_libfunc (ne_optab, SFmode, NULL);
2199 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2200 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2201 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2202 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2203 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2204
2205 /* Floating-point to integer conversions. Table 6. */
2206 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2207 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2208 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2209 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2210 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2211 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2212 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2213 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2214
2215 /* Conversions between floating types. Table 7. */
2216 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2217 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2218
2219 /* Integer to floating-point conversions. Table 8. */
2220 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2221 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2222 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2223 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2224 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2225 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2226 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2227 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2228
2229 /* Long long. Table 9. */
2230 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2231 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2232 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2233 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2234 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2235 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2236 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2237 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2238
2239 /* Integer (32/32->32) division. \S 4.3.1. */
2240 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2241 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2242
2243 /* The divmod functions are designed so that they can be used for
2244 plain division, even though they return both the quotient and the
2245 remainder. The quotient is returned in the usual location (i.e.,
2246 r0 for SImode, {r0, r1} for DImode), just as would be expected
2247 for an ordinary division routine. Because the AAPCS calling
2248 conventions specify that all of { r0, r1, r2, r3 } are
2249 callee-saved registers, there is no need to tell the compiler
2250 explicitly that those registers are clobbered by these
2251 routines. */
2252 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2253 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2254
2255 /* For SImode division the ABI provides div-without-mod routines,
2256 which are faster. */
2257 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2258 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2259
2260 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2261 divmod libcalls instead. */
2262 set_optab_libfunc (smod_optab, DImode, NULL);
2263 set_optab_libfunc (umod_optab, DImode, NULL);
2264 set_optab_libfunc (smod_optab, SImode, NULL);
2265 set_optab_libfunc (umod_optab, SImode, NULL);
2266
2267 /* Half-precision float operations. The compiler handles all operations
2268 with NULL libfuncs by converting the SFmode. */
2269 switch (arm_fp16_format)
2270 {
2271 case ARM_FP16_FORMAT_IEEE:
2272 case ARM_FP16_FORMAT_ALTERNATIVE:
2273
2274 /* Conversions. */
2275 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2276 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2277 ? "__gnu_f2h_ieee"
2278 : "__gnu_f2h_alternative"));
2279 set_conv_libfunc (sext_optab, SFmode, HFmode,
2280 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2281 ? "__gnu_h2f_ieee"
2282 : "__gnu_h2f_alternative"));
2283
2284 /* Arithmetic. */
2285 set_optab_libfunc (add_optab, HFmode, NULL);
2286 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2287 set_optab_libfunc (smul_optab, HFmode, NULL);
2288 set_optab_libfunc (neg_optab, HFmode, NULL);
2289 set_optab_libfunc (sub_optab, HFmode, NULL);
2290
2291 /* Comparisons. */
2292 set_optab_libfunc (eq_optab, HFmode, NULL);
2293 set_optab_libfunc (ne_optab, HFmode, NULL);
2294 set_optab_libfunc (lt_optab, HFmode, NULL);
2295 set_optab_libfunc (le_optab, HFmode, NULL);
2296 set_optab_libfunc (ge_optab, HFmode, NULL);
2297 set_optab_libfunc (gt_optab, HFmode, NULL);
2298 set_optab_libfunc (unord_optab, HFmode, NULL);
2299 break;
2300
2301 default:
2302 break;
2303 }
2304
2305 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2306 {
2307 const arm_fixed_mode_set fixed_arith_modes[] =
2308 {
2309 { QQmode, "qq" },
2310 { UQQmode, "uqq" },
2311 { HQmode, "hq" },
2312 { UHQmode, "uhq" },
2313 { SQmode, "sq" },
2314 { USQmode, "usq" },
2315 { DQmode, "dq" },
2316 { UDQmode, "udq" },
2317 { TQmode, "tq" },
2318 { UTQmode, "utq" },
2319 { HAmode, "ha" },
2320 { UHAmode, "uha" },
2321 { SAmode, "sa" },
2322 { USAmode, "usa" },
2323 { DAmode, "da" },
2324 { UDAmode, "uda" },
2325 { TAmode, "ta" },
2326 { UTAmode, "uta" }
2327 };
2328 const arm_fixed_mode_set fixed_conv_modes[] =
2329 {
2330 { QQmode, "qq" },
2331 { UQQmode, "uqq" },
2332 { HQmode, "hq" },
2333 { UHQmode, "uhq" },
2334 { SQmode, "sq" },
2335 { USQmode, "usq" },
2336 { DQmode, "dq" },
2337 { UDQmode, "udq" },
2338 { TQmode, "tq" },
2339 { UTQmode, "utq" },
2340 { HAmode, "ha" },
2341 { UHAmode, "uha" },
2342 { SAmode, "sa" },
2343 { USAmode, "usa" },
2344 { DAmode, "da" },
2345 { UDAmode, "uda" },
2346 { TAmode, "ta" },
2347 { UTAmode, "uta" },
2348 { QImode, "qi" },
2349 { HImode, "hi" },
2350 { SImode, "si" },
2351 { DImode, "di" },
2352 { TImode, "ti" },
2353 { SFmode, "sf" },
2354 { DFmode, "df" }
2355 };
2356 unsigned int i, j;
2357
2358 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2359 {
2360 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2361 "add", fixed_arith_modes[i].name, 3);
2362 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2363 "ssadd", fixed_arith_modes[i].name, 3);
2364 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2365 "usadd", fixed_arith_modes[i].name, 3);
2366 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2367 "sub", fixed_arith_modes[i].name, 3);
2368 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2369 "sssub", fixed_arith_modes[i].name, 3);
2370 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2371 "ussub", fixed_arith_modes[i].name, 3);
2372 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2373 "mul", fixed_arith_modes[i].name, 3);
2374 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2375 "ssmul", fixed_arith_modes[i].name, 3);
2376 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2377 "usmul", fixed_arith_modes[i].name, 3);
2378 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2379 "div", fixed_arith_modes[i].name, 3);
2380 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2381 "udiv", fixed_arith_modes[i].name, 3);
2382 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2383 "ssdiv", fixed_arith_modes[i].name, 3);
2384 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2385 "usdiv", fixed_arith_modes[i].name, 3);
2386 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2387 "neg", fixed_arith_modes[i].name, 2);
2388 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2389 "ssneg", fixed_arith_modes[i].name, 2);
2390 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2391 "usneg", fixed_arith_modes[i].name, 2);
2392 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2393 "ashl", fixed_arith_modes[i].name, 3);
2394 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2395 "ashr", fixed_arith_modes[i].name, 3);
2396 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2397 "lshr", fixed_arith_modes[i].name, 3);
2398 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2399 "ssashl", fixed_arith_modes[i].name, 3);
2400 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2401 "usashl", fixed_arith_modes[i].name, 3);
2402 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2403 "cmp", fixed_arith_modes[i].name, 2);
2404 }
2405
2406 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2407 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2408 {
2409 if (i == j
2410 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2411 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2412 continue;
2413
2414 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2415 fixed_conv_modes[j].mode, "fract",
2416 fixed_conv_modes[i].name,
2417 fixed_conv_modes[j].name);
2418 arm_set_fixed_conv_libfunc (satfract_optab,
2419 fixed_conv_modes[i].mode,
2420 fixed_conv_modes[j].mode, "satfract",
2421 fixed_conv_modes[i].name,
2422 fixed_conv_modes[j].name);
2423 arm_set_fixed_conv_libfunc (fractuns_optab,
2424 fixed_conv_modes[i].mode,
2425 fixed_conv_modes[j].mode, "fractuns",
2426 fixed_conv_modes[i].name,
2427 fixed_conv_modes[j].name);
2428 arm_set_fixed_conv_libfunc (satfractuns_optab,
2429 fixed_conv_modes[i].mode,
2430 fixed_conv_modes[j].mode, "satfractuns",
2431 fixed_conv_modes[i].name,
2432 fixed_conv_modes[j].name);
2433 }
2434 }
2435
2436 if (TARGET_AAPCS_BASED)
2437 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2438 }
2439
2440 /* On AAPCS systems, this is the "struct __va_list". */
2441 static GTY(()) tree va_list_type;
2442
2443 /* Return the type to use as __builtin_va_list. */
2444 static tree
2445 arm_build_builtin_va_list (void)
2446 {
2447 tree va_list_name;
2448 tree ap_field;
2449
2450 if (!TARGET_AAPCS_BASED)
2451 return std_build_builtin_va_list ();
2452
2453 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2454 defined as:
2455
2456 struct __va_list
2457 {
2458 void *__ap;
2459 };
2460
2461 The C Library ABI further reinforces this definition in \S
2462 4.1.
2463
2464 We must follow this definition exactly. The structure tag
2465 name is visible in C++ mangled names, and thus forms a part
2466 of the ABI. The field name may be used by people who
2467 #include <stdarg.h>. */
2468 /* Create the type. */
2469 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2470 /* Give it the required name. */
2471 va_list_name = build_decl (BUILTINS_LOCATION,
2472 TYPE_DECL,
2473 get_identifier ("__va_list"),
2474 va_list_type);
2475 DECL_ARTIFICIAL (va_list_name) = 1;
2476 TYPE_NAME (va_list_type) = va_list_name;
2477 TYPE_STUB_DECL (va_list_type) = va_list_name;
2478 /* Create the __ap field. */
2479 ap_field = build_decl (BUILTINS_LOCATION,
2480 FIELD_DECL,
2481 get_identifier ("__ap"),
2482 ptr_type_node);
2483 DECL_ARTIFICIAL (ap_field) = 1;
2484 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2485 TYPE_FIELDS (va_list_type) = ap_field;
2486 /* Compute its layout. */
2487 layout_type (va_list_type);
2488
2489 return va_list_type;
2490 }
2491
2492 /* Return an expression of type "void *" pointing to the next
2493 available argument in a variable-argument list. VALIST is the
2494 user-level va_list object, of type __builtin_va_list. */
2495 static tree
2496 arm_extract_valist_ptr (tree valist)
2497 {
2498 if (TREE_TYPE (valist) == error_mark_node)
2499 return error_mark_node;
2500
2501 /* On an AAPCS target, the pointer is stored within "struct
2502 va_list". */
2503 if (TARGET_AAPCS_BASED)
2504 {
2505 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2506 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2507 valist, ap_field, NULL_TREE);
2508 }
2509
2510 return valist;
2511 }
2512
2513 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2514 static void
2515 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2516 {
2517 valist = arm_extract_valist_ptr (valist);
2518 std_expand_builtin_va_start (valist, nextarg);
2519 }
2520
2521 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2522 static tree
2523 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2524 gimple_seq *post_p)
2525 {
2526 valist = arm_extract_valist_ptr (valist);
2527 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2528 }
2529
2530 /* Fix up any incompatible options that the user has specified. */
2531 static void
2532 arm_option_override (void)
2533 {
2534 if (global_options_set.x_arm_arch_option)
2535 arm_selected_arch = &all_architectures[arm_arch_option];
2536
2537 if (global_options_set.x_arm_cpu_option)
2538 {
2539 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2540 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2541 }
2542
2543 if (global_options_set.x_arm_tune_option)
2544 arm_selected_tune = &all_cores[(int) arm_tune_option];
2545
2546 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2547 SUBTARGET_OVERRIDE_OPTIONS;
2548 #endif
2549
2550 if (arm_selected_arch)
2551 {
2552 if (arm_selected_cpu)
2553 {
2554 /* Check for conflict between mcpu and march. */
2555 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2556 {
2557 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2558 arm_selected_cpu->name, arm_selected_arch->name);
2559 /* -march wins for code generation.
2560 -mcpu wins for default tuning. */
2561 if (!arm_selected_tune)
2562 arm_selected_tune = arm_selected_cpu;
2563
2564 arm_selected_cpu = arm_selected_arch;
2565 }
2566 else
2567 /* -mcpu wins. */
2568 arm_selected_arch = NULL;
2569 }
2570 else
2571 /* Pick a CPU based on the architecture. */
2572 arm_selected_cpu = arm_selected_arch;
2573 }
2574
2575 /* If the user did not specify a processor, choose one for them. */
2576 if (!arm_selected_cpu)
2577 {
2578 const struct processors * sel;
2579 unsigned int sought;
2580
2581 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2582 if (!arm_selected_cpu->name)
2583 {
2584 #ifdef SUBTARGET_CPU_DEFAULT
2585 /* Use the subtarget default CPU if none was specified by
2586 configure. */
2587 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2588 #endif
2589 /* Default to ARM6. */
2590 if (!arm_selected_cpu->name)
2591 arm_selected_cpu = &all_cores[arm6];
2592 }
2593
2594 sel = arm_selected_cpu;
2595 insn_flags = sel->flags;
2596
2597 /* Now check to see if the user has specified some command line
2598 switch that require certain abilities from the cpu. */
2599 sought = 0;
2600
2601 if (TARGET_INTERWORK || TARGET_THUMB)
2602 {
2603 sought |= (FL_THUMB | FL_MODE32);
2604
2605 /* There are no ARM processors that support both APCS-26 and
2606 interworking. Therefore we force FL_MODE26 to be removed
2607 from insn_flags here (if it was set), so that the search
2608 below will always be able to find a compatible processor. */
2609 insn_flags &= ~FL_MODE26;
2610 }
2611
2612 if (sought != 0 && ((sought & insn_flags) != sought))
2613 {
2614 /* Try to locate a CPU type that supports all of the abilities
2615 of the default CPU, plus the extra abilities requested by
2616 the user. */
2617 for (sel = all_cores; sel->name != NULL; sel++)
2618 if ((sel->flags & sought) == (sought | insn_flags))
2619 break;
2620
2621 if (sel->name == NULL)
2622 {
2623 unsigned current_bit_count = 0;
2624 const struct processors * best_fit = NULL;
2625
2626 /* Ideally we would like to issue an error message here
2627 saying that it was not possible to find a CPU compatible
2628 with the default CPU, but which also supports the command
2629 line options specified by the programmer, and so they
2630 ought to use the -mcpu=<name> command line option to
2631 override the default CPU type.
2632
2633 If we cannot find a cpu that has both the
2634 characteristics of the default cpu and the given
2635 command line options we scan the array again looking
2636 for a best match. */
2637 for (sel = all_cores; sel->name != NULL; sel++)
2638 if ((sel->flags & sought) == sought)
2639 {
2640 unsigned count;
2641
2642 count = bit_count (sel->flags & insn_flags);
2643
2644 if (count >= current_bit_count)
2645 {
2646 best_fit = sel;
2647 current_bit_count = count;
2648 }
2649 }
2650
2651 gcc_assert (best_fit);
2652 sel = best_fit;
2653 }
2654
2655 arm_selected_cpu = sel;
2656 }
2657 }
2658
2659 gcc_assert (arm_selected_cpu);
2660 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2661 if (!arm_selected_tune)
2662 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2663
2664 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2665 insn_flags = arm_selected_cpu->flags;
2666 arm_base_arch = arm_selected_cpu->base_arch;
2667
2668 arm_tune = arm_selected_tune->core;
2669 tune_flags = arm_selected_tune->flags;
2670 current_tune = arm_selected_tune->tune;
2671
2672 /* Make sure that the processor choice does not conflict with any of the
2673 other command line choices. */
2674 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2675 error ("target CPU does not support ARM mode");
2676
2677 /* BPABI targets use linker tricks to allow interworking on cores
2678 without thumb support. */
2679 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2680 {
2681 warning (0, "target CPU does not support interworking" );
2682 target_flags &= ~MASK_INTERWORK;
2683 }
2684
2685 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2686 {
2687 warning (0, "target CPU does not support THUMB instructions");
2688 target_flags &= ~MASK_THUMB;
2689 }
2690
2691 if (TARGET_APCS_FRAME && TARGET_THUMB)
2692 {
2693 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2694 target_flags &= ~MASK_APCS_FRAME;
2695 }
2696
2697 /* Callee super interworking implies thumb interworking. Adding
2698 this to the flags here simplifies the logic elsewhere. */
2699 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2700 target_flags |= MASK_INTERWORK;
2701
2702 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2703 from here where no function is being compiled currently. */
2704 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2705 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2706
2707 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2708 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2709
2710 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2711 {
2712 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2713 target_flags |= MASK_APCS_FRAME;
2714 }
2715
2716 if (TARGET_POKE_FUNCTION_NAME)
2717 target_flags |= MASK_APCS_FRAME;
2718
2719 if (TARGET_APCS_REENT && flag_pic)
2720 error ("-fpic and -mapcs-reent are incompatible");
2721
2722 if (TARGET_APCS_REENT)
2723 warning (0, "APCS reentrant code not supported. Ignored");
2724
2725 /* If this target is normally configured to use APCS frames, warn if they
2726 are turned off and debugging is turned on. */
2727 if (TARGET_ARM
2728 && write_symbols != NO_DEBUG
2729 && !TARGET_APCS_FRAME
2730 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2731 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2732
2733 if (TARGET_APCS_FLOAT)
2734 warning (0, "passing floating point arguments in fp regs not yet supported");
2735
2736 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2737 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2738 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2739 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2740 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2741 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2742 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2743 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2744 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2745 arm_arch6m = arm_arch6 && !arm_arch_notm;
2746 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2747 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2748 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2749 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2750 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2751
2752 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2753 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2754 thumb_code = TARGET_ARM == 0;
2755 thumb1_code = TARGET_THUMB1 != 0;
2756 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2757 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2758 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2759 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2760 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2761 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2762 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2763 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2764 arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2765 if (arm_restrict_it == 2)
2766 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2767
2768 if (!TARGET_THUMB2)
2769 arm_restrict_it = 0;
2770
2771 /* If we are not using the default (ARM mode) section anchor offset
2772 ranges, then set the correct ranges now. */
2773 if (TARGET_THUMB1)
2774 {
2775 /* Thumb-1 LDR instructions cannot have negative offsets.
2776 Permissible positive offset ranges are 5-bit (for byte loads),
2777 6-bit (for halfword loads), or 7-bit (for word loads).
2778 Empirical results suggest a 7-bit anchor range gives the best
2779 overall code size. */
2780 targetm.min_anchor_offset = 0;
2781 targetm.max_anchor_offset = 127;
2782 }
2783 else if (TARGET_THUMB2)
2784 {
2785 /* The minimum is set such that the total size of the block
2786 for a particular anchor is 248 + 1 + 4095 bytes, which is
2787 divisible by eight, ensuring natural spacing of anchors. */
2788 targetm.min_anchor_offset = -248;
2789 targetm.max_anchor_offset = 4095;
2790 }
2791
2792 /* V5 code we generate is completely interworking capable, so we turn off
2793 TARGET_INTERWORK here to avoid many tests later on. */
2794
2795 /* XXX However, we must pass the right pre-processor defines to CPP
2796 or GLD can get confused. This is a hack. */
2797 if (TARGET_INTERWORK)
2798 arm_cpp_interwork = 1;
2799
2800 if (arm_arch5)
2801 target_flags &= ~MASK_INTERWORK;
2802
2803 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2804 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2805
2806 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2807 error ("iwmmxt abi requires an iwmmxt capable cpu");
2808
2809 if (!global_options_set.x_arm_fpu_index)
2810 {
2811 const char *target_fpu_name;
2812 bool ok;
2813
2814 #ifdef FPUTYPE_DEFAULT
2815 target_fpu_name = FPUTYPE_DEFAULT;
2816 #else
2817 target_fpu_name = "vfp";
2818 #endif
2819
2820 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2821 CL_TARGET);
2822 gcc_assert (ok);
2823 }
2824
2825 arm_fpu_desc = &all_fpus[arm_fpu_index];
2826
2827 switch (arm_fpu_desc->model)
2828 {
2829 case ARM_FP_MODEL_VFP:
2830 arm_fpu_attr = FPU_VFP;
2831 break;
2832
2833 default:
2834 gcc_unreachable();
2835 }
2836
2837 if (TARGET_AAPCS_BASED)
2838 {
2839 if (TARGET_CALLER_INTERWORKING)
2840 error ("AAPCS does not support -mcaller-super-interworking");
2841 else
2842 if (TARGET_CALLEE_INTERWORKING)
2843 error ("AAPCS does not support -mcallee-super-interworking");
2844 }
2845
2846 /* iWMMXt and NEON are incompatible. */
2847 if (TARGET_IWMMXT && TARGET_NEON)
2848 error ("iWMMXt and NEON are incompatible");
2849
2850 /* iWMMXt unsupported under Thumb mode. */
2851 if (TARGET_THUMB && TARGET_IWMMXT)
2852 error ("iWMMXt unsupported under Thumb mode");
2853
2854 /* __fp16 support currently assumes the core has ldrh. */
2855 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2856 sorry ("__fp16 and no ldrh");
2857
2858 /* If soft-float is specified then don't use FPU. */
2859 if (TARGET_SOFT_FLOAT)
2860 arm_fpu_attr = FPU_NONE;
2861
2862 if (TARGET_AAPCS_BASED)
2863 {
2864 if (arm_abi == ARM_ABI_IWMMXT)
2865 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2866 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2867 && TARGET_HARD_FLOAT
2868 && TARGET_VFP)
2869 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2870 else
2871 arm_pcs_default = ARM_PCS_AAPCS;
2872 }
2873 else
2874 {
2875 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2876 sorry ("-mfloat-abi=hard and VFP");
2877
2878 if (arm_abi == ARM_ABI_APCS)
2879 arm_pcs_default = ARM_PCS_APCS;
2880 else
2881 arm_pcs_default = ARM_PCS_ATPCS;
2882 }
2883
2884 /* For arm2/3 there is no need to do any scheduling if we are doing
2885 software floating-point. */
2886 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2887 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2888
2889 /* Use the cp15 method if it is available. */
2890 if (target_thread_pointer == TP_AUTO)
2891 {
2892 if (arm_arch6k && !TARGET_THUMB1)
2893 target_thread_pointer = TP_CP15;
2894 else
2895 target_thread_pointer = TP_SOFT;
2896 }
2897
2898 if (TARGET_HARD_TP && TARGET_THUMB1)
2899 error ("can not use -mtp=cp15 with 16-bit Thumb");
2900
2901 /* Override the default structure alignment for AAPCS ABI. */
2902 if (!global_options_set.x_arm_structure_size_boundary)
2903 {
2904 if (TARGET_AAPCS_BASED)
2905 arm_structure_size_boundary = 8;
2906 }
2907 else
2908 {
2909 if (arm_structure_size_boundary != 8
2910 && arm_structure_size_boundary != 32
2911 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2912 {
2913 if (ARM_DOUBLEWORD_ALIGN)
2914 warning (0,
2915 "structure size boundary can only be set to 8, 32 or 64");
2916 else
2917 warning (0, "structure size boundary can only be set to 8 or 32");
2918 arm_structure_size_boundary
2919 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2920 }
2921 }
2922
2923 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2924 {
2925 error ("RTP PIC is incompatible with Thumb");
2926 flag_pic = 0;
2927 }
2928
2929 /* If stack checking is disabled, we can use r10 as the PIC register,
2930 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2931 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2932 {
2933 if (TARGET_VXWORKS_RTP)
2934 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2935 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2936 }
2937
2938 if (flag_pic && TARGET_VXWORKS_RTP)
2939 arm_pic_register = 9;
2940
2941 if (arm_pic_register_string != NULL)
2942 {
2943 int pic_register = decode_reg_name (arm_pic_register_string);
2944
2945 if (!flag_pic)
2946 warning (0, "-mpic-register= is useless without -fpic");
2947
2948 /* Prevent the user from choosing an obviously stupid PIC register. */
2949 else if (pic_register < 0 || call_used_regs[pic_register]
2950 || pic_register == HARD_FRAME_POINTER_REGNUM
2951 || pic_register == STACK_POINTER_REGNUM
2952 || pic_register >= PC_REGNUM
2953 || (TARGET_VXWORKS_RTP
2954 && (unsigned int) pic_register != arm_pic_register))
2955 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2956 else
2957 arm_pic_register = pic_register;
2958 }
2959
2960 if (TARGET_VXWORKS_RTP
2961 && !global_options_set.x_arm_pic_data_is_text_relative)
2962 arm_pic_data_is_text_relative = 0;
2963
2964 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2965 if (fix_cm3_ldrd == 2)
2966 {
2967 if (arm_selected_cpu->core == cortexm3)
2968 fix_cm3_ldrd = 1;
2969 else
2970 fix_cm3_ldrd = 0;
2971 }
2972
2973 /* Enable -munaligned-access by default for
2974 - all ARMv6 architecture-based processors
2975 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2976 - ARMv8 architecture-base processors.
2977
2978 Disable -munaligned-access by default for
2979 - all pre-ARMv6 architecture-based processors
2980 - ARMv6-M architecture-based processors. */
2981
2982 if (unaligned_access == 2)
2983 {
2984 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2985 unaligned_access = 1;
2986 else
2987 unaligned_access = 0;
2988 }
2989 else if (unaligned_access == 1
2990 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2991 {
2992 warning (0, "target CPU does not support unaligned accesses");
2993 unaligned_access = 0;
2994 }
2995
2996 if (TARGET_THUMB1 && flag_schedule_insns)
2997 {
2998 /* Don't warn since it's on by default in -O2. */
2999 flag_schedule_insns = 0;
3000 }
3001
3002 if (optimize_size)
3003 {
3004 /* If optimizing for size, bump the number of instructions that we
3005 are prepared to conditionally execute (even on a StrongARM). */
3006 max_insns_skipped = 6;
3007
3008 /* For THUMB2, we limit the conditional sequence to one IT block. */
3009 if (TARGET_THUMB2)
3010 max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3011 }
3012 else
3013 max_insns_skipped = current_tune->max_insns_skipped;
3014
3015 /* Hot/Cold partitioning is not currently supported, since we can't
3016 handle literal pool placement in that case. */
3017 if (flag_reorder_blocks_and_partition)
3018 {
3019 inform (input_location,
3020 "-freorder-blocks-and-partition not supported on this architecture");
3021 flag_reorder_blocks_and_partition = 0;
3022 flag_reorder_blocks = 1;
3023 }
3024
3025 if (flag_pic)
3026 /* Hoisting PIC address calculations more aggressively provides a small,
3027 but measurable, size reduction for PIC code. Therefore, we decrease
3028 the bar for unrestricted expression hoisting to the cost of PIC address
3029 calculation, which is 2 instructions. */
3030 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3031 global_options.x_param_values,
3032 global_options_set.x_param_values);
3033
3034 /* ARM EABI defaults to strict volatile bitfields. */
3035 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3036 && abi_version_at_least(2))
3037 flag_strict_volatile_bitfields = 1;
3038
3039 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3040 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3041 if (flag_prefetch_loop_arrays < 0
3042 && HAVE_prefetch
3043 && optimize >= 3
3044 && current_tune->num_prefetch_slots > 0)
3045 flag_prefetch_loop_arrays = 1;
3046
3047 /* Set up parameters to be used in prefetching algorithm. Do not override the
3048 defaults unless we are tuning for a core we have researched values for. */
3049 if (current_tune->num_prefetch_slots > 0)
3050 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3051 current_tune->num_prefetch_slots,
3052 global_options.x_param_values,
3053 global_options_set.x_param_values);
3054 if (current_tune->l1_cache_line_size >= 0)
3055 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3056 current_tune->l1_cache_line_size,
3057 global_options.x_param_values,
3058 global_options_set.x_param_values);
3059 if (current_tune->l1_cache_size >= 0)
3060 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3061 current_tune->l1_cache_size,
3062 global_options.x_param_values,
3063 global_options_set.x_param_values);
3064
3065 /* Use Neon to perform 64-bits operations rather than core
3066 registers. */
3067 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3068 if (use_neon_for_64bits == 1)
3069 prefer_neon_for_64bits = true;
3070
3071 /* Use the alternative scheduling-pressure algorithm by default. */
3072 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3073 global_options.x_param_values,
3074 global_options_set.x_param_values);
3075
3076 /* Disable shrink-wrap when optimizing function for size, since it tends to
3077 generate additional returns. */
3078 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3079 flag_shrink_wrap = false;
3080 /* TBD: Dwarf info for apcs frame is not handled yet. */
3081 if (TARGET_APCS_FRAME)
3082 flag_shrink_wrap = false;
3083
3084 /* We only support -mslow-flash-data on armv7-m targets. */
3085 if (target_slow_flash_data
3086 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3087 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3088 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3089
3090 /* Currently, for slow flash data, we just disable literal pools. */
3091 if (target_slow_flash_data)
3092 arm_disable_literal_pool = true;
3093
3094 /* Thumb2 inline assembly code should always use unified syntax.
3095 This will apply to ARM and Thumb1 eventually. */
3096 if (TARGET_THUMB2)
3097 inline_asm_unified = 1;
3098
3099 /* Disable scheduling fusion by default if it's not armv7 processor
3100 or doesn't prefer ldrd/strd. */
3101 if (flag_schedule_fusion == 2
3102 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3103 flag_schedule_fusion = 0;
3104
3105 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3106 - epilogue_insns - does not accurately model the corresponding insns
3107 emitted in the asm file. In particular, see the comment in thumb_exit
3108 'Find out how many of the (return) argument registers we can corrupt'.
3109 As a consequence, the epilogue may clobber registers without fipa-ra
3110 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3111 TODO: Accurately model clobbers for epilogue_insns and reenable
3112 fipa-ra. */
3113 if (TARGET_THUMB1)
3114 flag_ipa_ra = 0;
3115
3116 /* Register global variables with the garbage collector. */
3117 arm_add_gc_roots ();
3118 }
3119
3120 static void
3121 arm_add_gc_roots (void)
3122 {
3123 gcc_obstack_init(&minipool_obstack);
3124 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3125 }
3126 \f
3127 /* A table of known ARM exception types.
3128 For use with the interrupt function attribute. */
3129
3130 typedef struct
3131 {
3132 const char *const arg;
3133 const unsigned long return_value;
3134 }
3135 isr_attribute_arg;
3136
3137 static const isr_attribute_arg isr_attribute_args [] =
3138 {
3139 { "IRQ", ARM_FT_ISR },
3140 { "irq", ARM_FT_ISR },
3141 { "FIQ", ARM_FT_FIQ },
3142 { "fiq", ARM_FT_FIQ },
3143 { "ABORT", ARM_FT_ISR },
3144 { "abort", ARM_FT_ISR },
3145 { "ABORT", ARM_FT_ISR },
3146 { "abort", ARM_FT_ISR },
3147 { "UNDEF", ARM_FT_EXCEPTION },
3148 { "undef", ARM_FT_EXCEPTION },
3149 { "SWI", ARM_FT_EXCEPTION },
3150 { "swi", ARM_FT_EXCEPTION },
3151 { NULL, ARM_FT_NORMAL }
3152 };
3153
3154 /* Returns the (interrupt) function type of the current
3155 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3156
3157 static unsigned long
3158 arm_isr_value (tree argument)
3159 {
3160 const isr_attribute_arg * ptr;
3161 const char * arg;
3162
3163 if (!arm_arch_notm)
3164 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3165
3166 /* No argument - default to IRQ. */
3167 if (argument == NULL_TREE)
3168 return ARM_FT_ISR;
3169
3170 /* Get the value of the argument. */
3171 if (TREE_VALUE (argument) == NULL_TREE
3172 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3173 return ARM_FT_UNKNOWN;
3174
3175 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3176
3177 /* Check it against the list of known arguments. */
3178 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3179 if (streq (arg, ptr->arg))
3180 return ptr->return_value;
3181
3182 /* An unrecognized interrupt type. */
3183 return ARM_FT_UNKNOWN;
3184 }
3185
3186 /* Computes the type of the current function. */
3187
3188 static unsigned long
3189 arm_compute_func_type (void)
3190 {
3191 unsigned long type = ARM_FT_UNKNOWN;
3192 tree a;
3193 tree attr;
3194
3195 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3196
3197 /* Decide if the current function is volatile. Such functions
3198 never return, and many memory cycles can be saved by not storing
3199 register values that will never be needed again. This optimization
3200 was added to speed up context switching in a kernel application. */
3201 if (optimize > 0
3202 && (TREE_NOTHROW (current_function_decl)
3203 || !(flag_unwind_tables
3204 || (flag_exceptions
3205 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3206 && TREE_THIS_VOLATILE (current_function_decl))
3207 type |= ARM_FT_VOLATILE;
3208
3209 if (cfun->static_chain_decl != NULL)
3210 type |= ARM_FT_NESTED;
3211
3212 attr = DECL_ATTRIBUTES (current_function_decl);
3213
3214 a = lookup_attribute ("naked", attr);
3215 if (a != NULL_TREE)
3216 type |= ARM_FT_NAKED;
3217
3218 a = lookup_attribute ("isr", attr);
3219 if (a == NULL_TREE)
3220 a = lookup_attribute ("interrupt", attr);
3221
3222 if (a == NULL_TREE)
3223 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3224 else
3225 type |= arm_isr_value (TREE_VALUE (a));
3226
3227 return type;
3228 }
3229
3230 /* Returns the type of the current function. */
3231
3232 unsigned long
3233 arm_current_func_type (void)
3234 {
3235 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3236 cfun->machine->func_type = arm_compute_func_type ();
3237
3238 return cfun->machine->func_type;
3239 }
3240
3241 bool
3242 arm_allocate_stack_slots_for_args (void)
3243 {
3244 /* Naked functions should not allocate stack slots for arguments. */
3245 return !IS_NAKED (arm_current_func_type ());
3246 }
3247
3248 static bool
3249 arm_warn_func_return (tree decl)
3250 {
3251 /* Naked functions are implemented entirely in assembly, including the
3252 return sequence, so suppress warnings about this. */
3253 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3254 }
3255
3256 \f
3257 /* Output assembler code for a block containing the constant parts
3258 of a trampoline, leaving space for the variable parts.
3259
3260 On the ARM, (if r8 is the static chain regnum, and remembering that
3261 referencing pc adds an offset of 8) the trampoline looks like:
3262 ldr r8, [pc, #0]
3263 ldr pc, [pc]
3264 .word static chain value
3265 .word function's address
3266 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3267
3268 static void
3269 arm_asm_trampoline_template (FILE *f)
3270 {
3271 if (TARGET_ARM)
3272 {
3273 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3274 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3275 }
3276 else if (TARGET_THUMB2)
3277 {
3278 /* The Thumb-2 trampoline is similar to the arm implementation.
3279 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3280 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3281 STATIC_CHAIN_REGNUM, PC_REGNUM);
3282 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3283 }
3284 else
3285 {
3286 ASM_OUTPUT_ALIGN (f, 2);
3287 fprintf (f, "\t.code\t16\n");
3288 fprintf (f, ".Ltrampoline_start:\n");
3289 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3290 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3291 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3292 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3293 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3294 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3295 }
3296 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3297 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3298 }
3299
3300 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3301
3302 static void
3303 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3304 {
3305 rtx fnaddr, mem, a_tramp;
3306
3307 emit_block_move (m_tramp, assemble_trampoline_template (),
3308 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3309
3310 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3311 emit_move_insn (mem, chain_value);
3312
3313 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3314 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3315 emit_move_insn (mem, fnaddr);
3316
3317 a_tramp = XEXP (m_tramp, 0);
3318 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3319 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3320 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3321 }
3322
3323 /* Thumb trampolines should be entered in thumb mode, so set
3324 the bottom bit of the address. */
3325
3326 static rtx
3327 arm_trampoline_adjust_address (rtx addr)
3328 {
3329 if (TARGET_THUMB)
3330 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3331 NULL, 0, OPTAB_LIB_WIDEN);
3332 return addr;
3333 }
3334 \f
3335 /* Return 1 if it is possible to return using a single instruction.
3336 If SIBLING is non-null, this is a test for a return before a sibling
3337 call. SIBLING is the call insn, so we can examine its register usage. */
3338
3339 int
3340 use_return_insn (int iscond, rtx sibling)
3341 {
3342 int regno;
3343 unsigned int func_type;
3344 unsigned long saved_int_regs;
3345 unsigned HOST_WIDE_INT stack_adjust;
3346 arm_stack_offsets *offsets;
3347
3348 /* Never use a return instruction before reload has run. */
3349 if (!reload_completed)
3350 return 0;
3351
3352 func_type = arm_current_func_type ();
3353
3354 /* Naked, volatile and stack alignment functions need special
3355 consideration. */
3356 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3357 return 0;
3358
3359 /* So do interrupt functions that use the frame pointer and Thumb
3360 interrupt functions. */
3361 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3362 return 0;
3363
3364 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3365 && !optimize_function_for_size_p (cfun))
3366 return 0;
3367
3368 offsets = arm_get_frame_offsets ();
3369 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3370
3371 /* As do variadic functions. */
3372 if (crtl->args.pretend_args_size
3373 || cfun->machine->uses_anonymous_args
3374 /* Or if the function calls __builtin_eh_return () */
3375 || crtl->calls_eh_return
3376 /* Or if the function calls alloca */
3377 || cfun->calls_alloca
3378 /* Or if there is a stack adjustment. However, if the stack pointer
3379 is saved on the stack, we can use a pre-incrementing stack load. */
3380 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3381 && stack_adjust == 4)))
3382 return 0;
3383
3384 saved_int_regs = offsets->saved_regs_mask;
3385
3386 /* Unfortunately, the insn
3387
3388 ldmib sp, {..., sp, ...}
3389
3390 triggers a bug on most SA-110 based devices, such that the stack
3391 pointer won't be correctly restored if the instruction takes a
3392 page fault. We work around this problem by popping r3 along with
3393 the other registers, since that is never slower than executing
3394 another instruction.
3395
3396 We test for !arm_arch5 here, because code for any architecture
3397 less than this could potentially be run on one of the buggy
3398 chips. */
3399 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3400 {
3401 /* Validate that r3 is a call-clobbered register (always true in
3402 the default abi) ... */
3403 if (!call_used_regs[3])
3404 return 0;
3405
3406 /* ... that it isn't being used for a return value ... */
3407 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3408 return 0;
3409
3410 /* ... or for a tail-call argument ... */
3411 if (sibling)
3412 {
3413 gcc_assert (CALL_P (sibling));
3414
3415 if (find_regno_fusage (sibling, USE, 3))
3416 return 0;
3417 }
3418
3419 /* ... and that there are no call-saved registers in r0-r2
3420 (always true in the default ABI). */
3421 if (saved_int_regs & 0x7)
3422 return 0;
3423 }
3424
3425 /* Can't be done if interworking with Thumb, and any registers have been
3426 stacked. */
3427 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3428 return 0;
3429
3430 /* On StrongARM, conditional returns are expensive if they aren't
3431 taken and multiple registers have been stacked. */
3432 if (iscond && arm_tune_strongarm)
3433 {
3434 /* Conditional return when just the LR is stored is a simple
3435 conditional-load instruction, that's not expensive. */
3436 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3437 return 0;
3438
3439 if (flag_pic
3440 && arm_pic_register != INVALID_REGNUM
3441 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3442 return 0;
3443 }
3444
3445 /* If there are saved registers but the LR isn't saved, then we need
3446 two instructions for the return. */
3447 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3448 return 0;
3449
3450 /* Can't be done if any of the VFP regs are pushed,
3451 since this also requires an insn. */
3452 if (TARGET_HARD_FLOAT && TARGET_VFP)
3453 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3454 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3455 return 0;
3456
3457 if (TARGET_REALLY_IWMMXT)
3458 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3459 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3460 return 0;
3461
3462 return 1;
3463 }
3464
3465 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3466 shrink-wrapping if possible. This is the case if we need to emit a
3467 prologue, which we can test by looking at the offsets. */
3468 bool
3469 use_simple_return_p (void)
3470 {
3471 arm_stack_offsets *offsets;
3472
3473 offsets = arm_get_frame_offsets ();
3474 return offsets->outgoing_args != 0;
3475 }
3476
3477 /* Return TRUE if int I is a valid immediate ARM constant. */
3478
3479 int
3480 const_ok_for_arm (HOST_WIDE_INT i)
3481 {
3482 int lowbit;
3483
3484 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3485 be all zero, or all one. */
3486 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3487 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3488 != ((~(unsigned HOST_WIDE_INT) 0)
3489 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3490 return FALSE;
3491
3492 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3493
3494 /* Fast return for 0 and small values. We must do this for zero, since
3495 the code below can't handle that one case. */
3496 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3497 return TRUE;
3498
3499 /* Get the number of trailing zeros. */
3500 lowbit = ffs((int) i) - 1;
3501
3502 /* Only even shifts are allowed in ARM mode so round down to the
3503 nearest even number. */
3504 if (TARGET_ARM)
3505 lowbit &= ~1;
3506
3507 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3508 return TRUE;
3509
3510 if (TARGET_ARM)
3511 {
3512 /* Allow rotated constants in ARM mode. */
3513 if (lowbit <= 4
3514 && ((i & ~0xc000003f) == 0
3515 || (i & ~0xf000000f) == 0
3516 || (i & ~0xfc000003) == 0))
3517 return TRUE;
3518 }
3519 else
3520 {
3521 HOST_WIDE_INT v;
3522
3523 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3524 v = i & 0xff;
3525 v |= v << 16;
3526 if (i == v || i == (v | (v << 8)))
3527 return TRUE;
3528
3529 /* Allow repeated pattern 0xXY00XY00. */
3530 v = i & 0xff00;
3531 v |= v << 16;
3532 if (i == v)
3533 return TRUE;
3534 }
3535
3536 return FALSE;
3537 }
3538
3539 /* Return true if I is a valid constant for the operation CODE. */
3540 int
3541 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3542 {
3543 if (const_ok_for_arm (i))
3544 return 1;
3545
3546 switch (code)
3547 {
3548 case SET:
3549 /* See if we can use movw. */
3550 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3551 return 1;
3552 else
3553 /* Otherwise, try mvn. */
3554 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3555
3556 case PLUS:
3557 /* See if we can use addw or subw. */
3558 if (TARGET_THUMB2
3559 && ((i & 0xfffff000) == 0
3560 || ((-i) & 0xfffff000) == 0))
3561 return 1;
3562 /* else fall through. */
3563
3564 case COMPARE:
3565 case EQ:
3566 case NE:
3567 case GT:
3568 case LE:
3569 case LT:
3570 case GE:
3571 case GEU:
3572 case LTU:
3573 case GTU:
3574 case LEU:
3575 case UNORDERED:
3576 case ORDERED:
3577 case UNEQ:
3578 case UNGE:
3579 case UNLT:
3580 case UNGT:
3581 case UNLE:
3582 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3583
3584 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3585 case XOR:
3586 return 0;
3587
3588 case IOR:
3589 if (TARGET_THUMB2)
3590 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3591 return 0;
3592
3593 case AND:
3594 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3595
3596 default:
3597 gcc_unreachable ();
3598 }
3599 }
3600
3601 /* Return true if I is a valid di mode constant for the operation CODE. */
3602 int
3603 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3604 {
3605 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3606 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3607 rtx hi = GEN_INT (hi_val);
3608 rtx lo = GEN_INT (lo_val);
3609
3610 if (TARGET_THUMB1)
3611 return 0;
3612
3613 switch (code)
3614 {
3615 case AND:
3616 case IOR:
3617 case XOR:
3618 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3619 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3620 case PLUS:
3621 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3622
3623 default:
3624 return 0;
3625 }
3626 }
3627
3628 /* Emit a sequence of insns to handle a large constant.
3629 CODE is the code of the operation required, it can be any of SET, PLUS,
3630 IOR, AND, XOR, MINUS;
3631 MODE is the mode in which the operation is being performed;
3632 VAL is the integer to operate on;
3633 SOURCE is the other operand (a register, or a null-pointer for SET);
3634 SUBTARGETS means it is safe to create scratch registers if that will
3635 either produce a simpler sequence, or we will want to cse the values.
3636 Return value is the number of insns emitted. */
3637
3638 /* ??? Tweak this for thumb2. */
3639 int
3640 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3641 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3642 {
3643 rtx cond;
3644
3645 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3646 cond = COND_EXEC_TEST (PATTERN (insn));
3647 else
3648 cond = NULL_RTX;
3649
3650 if (subtargets || code == SET
3651 || (REG_P (target) && REG_P (source)
3652 && REGNO (target) != REGNO (source)))
3653 {
3654 /* After arm_reorg has been called, we can't fix up expensive
3655 constants by pushing them into memory so we must synthesize
3656 them in-line, regardless of the cost. This is only likely to
3657 be more costly on chips that have load delay slots and we are
3658 compiling without running the scheduler (so no splitting
3659 occurred before the final instruction emission).
3660
3661 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3662 */
3663 if (!cfun->machine->after_arm_reorg
3664 && !cond
3665 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3666 1, 0)
3667 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3668 + (code != SET))))
3669 {
3670 if (code == SET)
3671 {
3672 /* Currently SET is the only monadic value for CODE, all
3673 the rest are diadic. */
3674 if (TARGET_USE_MOVT)
3675 arm_emit_movpair (target, GEN_INT (val));
3676 else
3677 emit_set_insn (target, GEN_INT (val));
3678
3679 return 1;
3680 }
3681 else
3682 {
3683 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3684
3685 if (TARGET_USE_MOVT)
3686 arm_emit_movpair (temp, GEN_INT (val));
3687 else
3688 emit_set_insn (temp, GEN_INT (val));
3689
3690 /* For MINUS, the value is subtracted from, since we never
3691 have subtraction of a constant. */
3692 if (code == MINUS)
3693 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3694 else
3695 emit_set_insn (target,
3696 gen_rtx_fmt_ee (code, mode, source, temp));
3697 return 2;
3698 }
3699 }
3700 }
3701
3702 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3703 1);
3704 }
3705
3706 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3707 ARM/THUMB2 immediates, and add up to VAL.
3708 Thr function return value gives the number of insns required. */
3709 static int
3710 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3711 struct four_ints *return_sequence)
3712 {
3713 int best_consecutive_zeros = 0;
3714 int i;
3715 int best_start = 0;
3716 int insns1, insns2;
3717 struct four_ints tmp_sequence;
3718
3719 /* If we aren't targeting ARM, the best place to start is always at
3720 the bottom, otherwise look more closely. */
3721 if (TARGET_ARM)
3722 {
3723 for (i = 0; i < 32; i += 2)
3724 {
3725 int consecutive_zeros = 0;
3726
3727 if (!(val & (3 << i)))
3728 {
3729 while ((i < 32) && !(val & (3 << i)))
3730 {
3731 consecutive_zeros += 2;
3732 i += 2;
3733 }
3734 if (consecutive_zeros > best_consecutive_zeros)
3735 {
3736 best_consecutive_zeros = consecutive_zeros;
3737 best_start = i - consecutive_zeros;
3738 }
3739 i -= 2;
3740 }
3741 }
3742 }
3743
3744 /* So long as it won't require any more insns to do so, it's
3745 desirable to emit a small constant (in bits 0...9) in the last
3746 insn. This way there is more chance that it can be combined with
3747 a later addressing insn to form a pre-indexed load or store
3748 operation. Consider:
3749
3750 *((volatile int *)0xe0000100) = 1;
3751 *((volatile int *)0xe0000110) = 2;
3752
3753 We want this to wind up as:
3754
3755 mov rA, #0xe0000000
3756 mov rB, #1
3757 str rB, [rA, #0x100]
3758 mov rB, #2
3759 str rB, [rA, #0x110]
3760
3761 rather than having to synthesize both large constants from scratch.
3762
3763 Therefore, we calculate how many insns would be required to emit
3764 the constant starting from `best_start', and also starting from
3765 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3766 yield a shorter sequence, we may as well use zero. */
3767 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3768 if (best_start != 0
3769 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3770 {
3771 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3772 if (insns2 <= insns1)
3773 {
3774 *return_sequence = tmp_sequence;
3775 insns1 = insns2;
3776 }
3777 }
3778
3779 return insns1;
3780 }
3781
3782 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3783 static int
3784 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3785 struct four_ints *return_sequence, int i)
3786 {
3787 int remainder = val & 0xffffffff;
3788 int insns = 0;
3789
3790 /* Try and find a way of doing the job in either two or three
3791 instructions.
3792
3793 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3794 location. We start at position I. This may be the MSB, or
3795 optimial_immediate_sequence may have positioned it at the largest block
3796 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3797 wrapping around to the top of the word when we drop off the bottom.
3798 In the worst case this code should produce no more than four insns.
3799
3800 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3801 constants, shifted to any arbitrary location. We should always start
3802 at the MSB. */
3803 do
3804 {
3805 int end;
3806 unsigned int b1, b2, b3, b4;
3807 unsigned HOST_WIDE_INT result;
3808 int loc;
3809
3810 gcc_assert (insns < 4);
3811
3812 if (i <= 0)
3813 i += 32;
3814
3815 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3816 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3817 {
3818 loc = i;
3819 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3820 /* We can use addw/subw for the last 12 bits. */
3821 result = remainder;
3822 else
3823 {
3824 /* Use an 8-bit shifted/rotated immediate. */
3825 end = i - 8;
3826 if (end < 0)
3827 end += 32;
3828 result = remainder & ((0x0ff << end)
3829 | ((i < end) ? (0xff >> (32 - end))
3830 : 0));
3831 i -= 8;
3832 }
3833 }
3834 else
3835 {
3836 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3837 arbitrary shifts. */
3838 i -= TARGET_ARM ? 2 : 1;
3839 continue;
3840 }
3841
3842 /* Next, see if we can do a better job with a thumb2 replicated
3843 constant.
3844
3845 We do it this way around to catch the cases like 0x01F001E0 where
3846 two 8-bit immediates would work, but a replicated constant would
3847 make it worse.
3848
3849 TODO: 16-bit constants that don't clear all the bits, but still win.
3850 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3851 if (TARGET_THUMB2)
3852 {
3853 b1 = (remainder & 0xff000000) >> 24;
3854 b2 = (remainder & 0x00ff0000) >> 16;
3855 b3 = (remainder & 0x0000ff00) >> 8;
3856 b4 = remainder & 0xff;
3857
3858 if (loc > 24)
3859 {
3860 /* The 8-bit immediate already found clears b1 (and maybe b2),
3861 but must leave b3 and b4 alone. */
3862
3863 /* First try to find a 32-bit replicated constant that clears
3864 almost everything. We can assume that we can't do it in one,
3865 or else we wouldn't be here. */
3866 unsigned int tmp = b1 & b2 & b3 & b4;
3867 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3868 + (tmp << 24);
3869 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3870 + (tmp == b3) + (tmp == b4);
3871 if (tmp
3872 && (matching_bytes >= 3
3873 || (matching_bytes == 2
3874 && const_ok_for_op (remainder & ~tmp2, code))))
3875 {
3876 /* At least 3 of the bytes match, and the fourth has at
3877 least as many bits set, or two of the bytes match
3878 and it will only require one more insn to finish. */
3879 result = tmp2;
3880 i = tmp != b1 ? 32
3881 : tmp != b2 ? 24
3882 : tmp != b3 ? 16
3883 : 8;
3884 }
3885
3886 /* Second, try to find a 16-bit replicated constant that can
3887 leave three of the bytes clear. If b2 or b4 is already
3888 zero, then we can. If the 8-bit from above would not
3889 clear b2 anyway, then we still win. */
3890 else if (b1 == b3 && (!b2 || !b4
3891 || (remainder & 0x00ff0000 & ~result)))
3892 {
3893 result = remainder & 0xff00ff00;
3894 i = 24;
3895 }
3896 }
3897 else if (loc > 16)
3898 {
3899 /* The 8-bit immediate already found clears b2 (and maybe b3)
3900 and we don't get here unless b1 is alredy clear, but it will
3901 leave b4 unchanged. */
3902
3903 /* If we can clear b2 and b4 at once, then we win, since the
3904 8-bits couldn't possibly reach that far. */
3905 if (b2 == b4)
3906 {
3907 result = remainder & 0x00ff00ff;
3908 i = 16;
3909 }
3910 }
3911 }
3912
3913 return_sequence->i[insns++] = result;
3914 remainder &= ~result;
3915
3916 if (code == SET || code == MINUS)
3917 code = PLUS;
3918 }
3919 while (remainder);
3920
3921 return insns;
3922 }
3923
3924 /* Emit an instruction with the indicated PATTERN. If COND is
3925 non-NULL, conditionalize the execution of the instruction on COND
3926 being true. */
3927
3928 static void
3929 emit_constant_insn (rtx cond, rtx pattern)
3930 {
3931 if (cond)
3932 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3933 emit_insn (pattern);
3934 }
3935
3936 /* As above, but extra parameter GENERATE which, if clear, suppresses
3937 RTL generation. */
3938
3939 static int
3940 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
3941 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3942 int generate)
3943 {
3944 int can_invert = 0;
3945 int can_negate = 0;
3946 int final_invert = 0;
3947 int i;
3948 int set_sign_bit_copies = 0;
3949 int clear_sign_bit_copies = 0;
3950 int clear_zero_bit_copies = 0;
3951 int set_zero_bit_copies = 0;
3952 int insns = 0, neg_insns, inv_insns;
3953 unsigned HOST_WIDE_INT temp1, temp2;
3954 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3955 struct four_ints *immediates;
3956 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3957
3958 /* Find out which operations are safe for a given CODE. Also do a quick
3959 check for degenerate cases; these can occur when DImode operations
3960 are split. */
3961 switch (code)
3962 {
3963 case SET:
3964 can_invert = 1;
3965 break;
3966
3967 case PLUS:
3968 can_negate = 1;
3969 break;
3970
3971 case IOR:
3972 if (remainder == 0xffffffff)
3973 {
3974 if (generate)
3975 emit_constant_insn (cond,
3976 gen_rtx_SET (VOIDmode, target,
3977 GEN_INT (ARM_SIGN_EXTEND (val))));
3978 return 1;
3979 }
3980
3981 if (remainder == 0)
3982 {
3983 if (reload_completed && rtx_equal_p (target, source))
3984 return 0;
3985
3986 if (generate)
3987 emit_constant_insn (cond,
3988 gen_rtx_SET (VOIDmode, target, source));
3989 return 1;
3990 }
3991 break;
3992
3993 case AND:
3994 if (remainder == 0)
3995 {
3996 if (generate)
3997 emit_constant_insn (cond,
3998 gen_rtx_SET (VOIDmode, target, const0_rtx));
3999 return 1;
4000 }
4001 if (remainder == 0xffffffff)
4002 {
4003 if (reload_completed && rtx_equal_p (target, source))
4004 return 0;
4005 if (generate)
4006 emit_constant_insn (cond,
4007 gen_rtx_SET (VOIDmode, target, source));
4008 return 1;
4009 }
4010 can_invert = 1;
4011 break;
4012
4013 case XOR:
4014 if (remainder == 0)
4015 {
4016 if (reload_completed && rtx_equal_p (target, source))
4017 return 0;
4018 if (generate)
4019 emit_constant_insn (cond,
4020 gen_rtx_SET (VOIDmode, target, source));
4021 return 1;
4022 }
4023
4024 if (remainder == 0xffffffff)
4025 {
4026 if (generate)
4027 emit_constant_insn (cond,
4028 gen_rtx_SET (VOIDmode, target,
4029 gen_rtx_NOT (mode, source)));
4030 return 1;
4031 }
4032 final_invert = 1;
4033 break;
4034
4035 case MINUS:
4036 /* We treat MINUS as (val - source), since (source - val) is always
4037 passed as (source + (-val)). */
4038 if (remainder == 0)
4039 {
4040 if (generate)
4041 emit_constant_insn (cond,
4042 gen_rtx_SET (VOIDmode, target,
4043 gen_rtx_NEG (mode, source)));
4044 return 1;
4045 }
4046 if (const_ok_for_arm (val))
4047 {
4048 if (generate)
4049 emit_constant_insn (cond,
4050 gen_rtx_SET (VOIDmode, target,
4051 gen_rtx_MINUS (mode, GEN_INT (val),
4052 source)));
4053 return 1;
4054 }
4055
4056 break;
4057
4058 default:
4059 gcc_unreachable ();
4060 }
4061
4062 /* If we can do it in one insn get out quickly. */
4063 if (const_ok_for_op (val, code))
4064 {
4065 if (generate)
4066 emit_constant_insn (cond,
4067 gen_rtx_SET (VOIDmode, target,
4068 (source
4069 ? gen_rtx_fmt_ee (code, mode, source,
4070 GEN_INT (val))
4071 : GEN_INT (val))));
4072 return 1;
4073 }
4074
4075 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4076 insn. */
4077 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4078 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4079 {
4080 if (generate)
4081 {
4082 if (mode == SImode && i == 16)
4083 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4084 smaller insn. */
4085 emit_constant_insn (cond,
4086 gen_zero_extendhisi2
4087 (target, gen_lowpart (HImode, source)));
4088 else
4089 /* Extz only supports SImode, but we can coerce the operands
4090 into that mode. */
4091 emit_constant_insn (cond,
4092 gen_extzv_t2 (gen_lowpart (SImode, target),
4093 gen_lowpart (SImode, source),
4094 GEN_INT (i), const0_rtx));
4095 }
4096
4097 return 1;
4098 }
4099
4100 /* Calculate a few attributes that may be useful for specific
4101 optimizations. */
4102 /* Count number of leading zeros. */
4103 for (i = 31; i >= 0; i--)
4104 {
4105 if ((remainder & (1 << i)) == 0)
4106 clear_sign_bit_copies++;
4107 else
4108 break;
4109 }
4110
4111 /* Count number of leading 1's. */
4112 for (i = 31; i >= 0; i--)
4113 {
4114 if ((remainder & (1 << i)) != 0)
4115 set_sign_bit_copies++;
4116 else
4117 break;
4118 }
4119
4120 /* Count number of trailing zero's. */
4121 for (i = 0; i <= 31; i++)
4122 {
4123 if ((remainder & (1 << i)) == 0)
4124 clear_zero_bit_copies++;
4125 else
4126 break;
4127 }
4128
4129 /* Count number of trailing 1's. */
4130 for (i = 0; i <= 31; i++)
4131 {
4132 if ((remainder & (1 << i)) != 0)
4133 set_zero_bit_copies++;
4134 else
4135 break;
4136 }
4137
4138 switch (code)
4139 {
4140 case SET:
4141 /* See if we can do this by sign_extending a constant that is known
4142 to be negative. This is a good, way of doing it, since the shift
4143 may well merge into a subsequent insn. */
4144 if (set_sign_bit_copies > 1)
4145 {
4146 if (const_ok_for_arm
4147 (temp1 = ARM_SIGN_EXTEND (remainder
4148 << (set_sign_bit_copies - 1))))
4149 {
4150 if (generate)
4151 {
4152 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4153 emit_constant_insn (cond,
4154 gen_rtx_SET (VOIDmode, new_src,
4155 GEN_INT (temp1)));
4156 emit_constant_insn (cond,
4157 gen_ashrsi3 (target, new_src,
4158 GEN_INT (set_sign_bit_copies - 1)));
4159 }
4160 return 2;
4161 }
4162 /* For an inverted constant, we will need to set the low bits,
4163 these will be shifted out of harm's way. */
4164 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4165 if (const_ok_for_arm (~temp1))
4166 {
4167 if (generate)
4168 {
4169 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4170 emit_constant_insn (cond,
4171 gen_rtx_SET (VOIDmode, new_src,
4172 GEN_INT (temp1)));
4173 emit_constant_insn (cond,
4174 gen_ashrsi3 (target, new_src,
4175 GEN_INT (set_sign_bit_copies - 1)));
4176 }
4177 return 2;
4178 }
4179 }
4180
4181 /* See if we can calculate the value as the difference between two
4182 valid immediates. */
4183 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4184 {
4185 int topshift = clear_sign_bit_copies & ~1;
4186
4187 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4188 & (0xff000000 >> topshift));
4189
4190 /* If temp1 is zero, then that means the 9 most significant
4191 bits of remainder were 1 and we've caused it to overflow.
4192 When topshift is 0 we don't need to do anything since we
4193 can borrow from 'bit 32'. */
4194 if (temp1 == 0 && topshift != 0)
4195 temp1 = 0x80000000 >> (topshift - 1);
4196
4197 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4198
4199 if (const_ok_for_arm (temp2))
4200 {
4201 if (generate)
4202 {
4203 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4204 emit_constant_insn (cond,
4205 gen_rtx_SET (VOIDmode, new_src,
4206 GEN_INT (temp1)));
4207 emit_constant_insn (cond,
4208 gen_addsi3 (target, new_src,
4209 GEN_INT (-temp2)));
4210 }
4211
4212 return 2;
4213 }
4214 }
4215
4216 /* See if we can generate this by setting the bottom (or the top)
4217 16 bits, and then shifting these into the other half of the
4218 word. We only look for the simplest cases, to do more would cost
4219 too much. Be careful, however, not to generate this when the
4220 alternative would take fewer insns. */
4221 if (val & 0xffff0000)
4222 {
4223 temp1 = remainder & 0xffff0000;
4224 temp2 = remainder & 0x0000ffff;
4225
4226 /* Overlaps outside this range are best done using other methods. */
4227 for (i = 9; i < 24; i++)
4228 {
4229 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4230 && !const_ok_for_arm (temp2))
4231 {
4232 rtx new_src = (subtargets
4233 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4234 : target);
4235 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4236 source, subtargets, generate);
4237 source = new_src;
4238 if (generate)
4239 emit_constant_insn
4240 (cond,
4241 gen_rtx_SET
4242 (VOIDmode, target,
4243 gen_rtx_IOR (mode,
4244 gen_rtx_ASHIFT (mode, source,
4245 GEN_INT (i)),
4246 source)));
4247 return insns + 1;
4248 }
4249 }
4250
4251 /* Don't duplicate cases already considered. */
4252 for (i = 17; i < 24; i++)
4253 {
4254 if (((temp1 | (temp1 >> i)) == remainder)
4255 && !const_ok_for_arm (temp1))
4256 {
4257 rtx new_src = (subtargets
4258 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4259 : target);
4260 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4261 source, subtargets, generate);
4262 source = new_src;
4263 if (generate)
4264 emit_constant_insn
4265 (cond,
4266 gen_rtx_SET (VOIDmode, target,
4267 gen_rtx_IOR
4268 (mode,
4269 gen_rtx_LSHIFTRT (mode, source,
4270 GEN_INT (i)),
4271 source)));
4272 return insns + 1;
4273 }
4274 }
4275 }
4276 break;
4277
4278 case IOR:
4279 case XOR:
4280 /* If we have IOR or XOR, and the constant can be loaded in a
4281 single instruction, and we can find a temporary to put it in,
4282 then this can be done in two instructions instead of 3-4. */
4283 if (subtargets
4284 /* TARGET can't be NULL if SUBTARGETS is 0 */
4285 || (reload_completed && !reg_mentioned_p (target, source)))
4286 {
4287 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4288 {
4289 if (generate)
4290 {
4291 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4292
4293 emit_constant_insn (cond,
4294 gen_rtx_SET (VOIDmode, sub,
4295 GEN_INT (val)));
4296 emit_constant_insn (cond,
4297 gen_rtx_SET (VOIDmode, target,
4298 gen_rtx_fmt_ee (code, mode,
4299 source, sub)));
4300 }
4301 return 2;
4302 }
4303 }
4304
4305 if (code == XOR)
4306 break;
4307
4308 /* Convert.
4309 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4310 and the remainder 0s for e.g. 0xfff00000)
4311 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4312
4313 This can be done in 2 instructions by using shifts with mov or mvn.
4314 e.g. for
4315 x = x | 0xfff00000;
4316 we generate.
4317 mvn r0, r0, asl #12
4318 mvn r0, r0, lsr #12 */
4319 if (set_sign_bit_copies > 8
4320 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4321 {
4322 if (generate)
4323 {
4324 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4325 rtx shift = GEN_INT (set_sign_bit_copies);
4326
4327 emit_constant_insn
4328 (cond,
4329 gen_rtx_SET (VOIDmode, sub,
4330 gen_rtx_NOT (mode,
4331 gen_rtx_ASHIFT (mode,
4332 source,
4333 shift))));
4334 emit_constant_insn
4335 (cond,
4336 gen_rtx_SET (VOIDmode, target,
4337 gen_rtx_NOT (mode,
4338 gen_rtx_LSHIFTRT (mode, sub,
4339 shift))));
4340 }
4341 return 2;
4342 }
4343
4344 /* Convert
4345 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4346 to
4347 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4348
4349 For eg. r0 = r0 | 0xfff
4350 mvn r0, r0, lsr #12
4351 mvn r0, r0, asl #12
4352
4353 */
4354 if (set_zero_bit_copies > 8
4355 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4356 {
4357 if (generate)
4358 {
4359 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4360 rtx shift = GEN_INT (set_zero_bit_copies);
4361
4362 emit_constant_insn
4363 (cond,
4364 gen_rtx_SET (VOIDmode, sub,
4365 gen_rtx_NOT (mode,
4366 gen_rtx_LSHIFTRT (mode,
4367 source,
4368 shift))));
4369 emit_constant_insn
4370 (cond,
4371 gen_rtx_SET (VOIDmode, target,
4372 gen_rtx_NOT (mode,
4373 gen_rtx_ASHIFT (mode, sub,
4374 shift))));
4375 }
4376 return 2;
4377 }
4378
4379 /* This will never be reached for Thumb2 because orn is a valid
4380 instruction. This is for Thumb1 and the ARM 32 bit cases.
4381
4382 x = y | constant (such that ~constant is a valid constant)
4383 Transform this to
4384 x = ~(~y & ~constant).
4385 */
4386 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4387 {
4388 if (generate)
4389 {
4390 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4391 emit_constant_insn (cond,
4392 gen_rtx_SET (VOIDmode, sub,
4393 gen_rtx_NOT (mode, source)));
4394 source = sub;
4395 if (subtargets)
4396 sub = gen_reg_rtx (mode);
4397 emit_constant_insn (cond,
4398 gen_rtx_SET (VOIDmode, sub,
4399 gen_rtx_AND (mode, source,
4400 GEN_INT (temp1))));
4401 emit_constant_insn (cond,
4402 gen_rtx_SET (VOIDmode, target,
4403 gen_rtx_NOT (mode, sub)));
4404 }
4405 return 3;
4406 }
4407 break;
4408
4409 case AND:
4410 /* See if two shifts will do 2 or more insn's worth of work. */
4411 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4412 {
4413 HOST_WIDE_INT shift_mask = ((0xffffffff
4414 << (32 - clear_sign_bit_copies))
4415 & 0xffffffff);
4416
4417 if ((remainder | shift_mask) != 0xffffffff)
4418 {
4419 if (generate)
4420 {
4421 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4422 insns = arm_gen_constant (AND, mode, cond,
4423 remainder | shift_mask,
4424 new_src, source, subtargets, 1);
4425 source = new_src;
4426 }
4427 else
4428 {
4429 rtx targ = subtargets ? NULL_RTX : target;
4430 insns = arm_gen_constant (AND, mode, cond,
4431 remainder | shift_mask,
4432 targ, source, subtargets, 0);
4433 }
4434 }
4435
4436 if (generate)
4437 {
4438 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4439 rtx shift = GEN_INT (clear_sign_bit_copies);
4440
4441 emit_insn (gen_ashlsi3 (new_src, source, shift));
4442 emit_insn (gen_lshrsi3 (target, new_src, shift));
4443 }
4444
4445 return insns + 2;
4446 }
4447
4448 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4449 {
4450 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4451
4452 if ((remainder | shift_mask) != 0xffffffff)
4453 {
4454 if (generate)
4455 {
4456 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4457
4458 insns = arm_gen_constant (AND, mode, cond,
4459 remainder | shift_mask,
4460 new_src, source, subtargets, 1);
4461 source = new_src;
4462 }
4463 else
4464 {
4465 rtx targ = subtargets ? NULL_RTX : target;
4466
4467 insns = arm_gen_constant (AND, mode, cond,
4468 remainder | shift_mask,
4469 targ, source, subtargets, 0);
4470 }
4471 }
4472
4473 if (generate)
4474 {
4475 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4476 rtx shift = GEN_INT (clear_zero_bit_copies);
4477
4478 emit_insn (gen_lshrsi3 (new_src, source, shift));
4479 emit_insn (gen_ashlsi3 (target, new_src, shift));
4480 }
4481
4482 return insns + 2;
4483 }
4484
4485 break;
4486
4487 default:
4488 break;
4489 }
4490
4491 /* Calculate what the instruction sequences would be if we generated it
4492 normally, negated, or inverted. */
4493 if (code == AND)
4494 /* AND cannot be split into multiple insns, so invert and use BIC. */
4495 insns = 99;
4496 else
4497 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4498
4499 if (can_negate)
4500 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4501 &neg_immediates);
4502 else
4503 neg_insns = 99;
4504
4505 if (can_invert || final_invert)
4506 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4507 &inv_immediates);
4508 else
4509 inv_insns = 99;
4510
4511 immediates = &pos_immediates;
4512
4513 /* Is the negated immediate sequence more efficient? */
4514 if (neg_insns < insns && neg_insns <= inv_insns)
4515 {
4516 insns = neg_insns;
4517 immediates = &neg_immediates;
4518 }
4519 else
4520 can_negate = 0;
4521
4522 /* Is the inverted immediate sequence more efficient?
4523 We must allow for an extra NOT instruction for XOR operations, although
4524 there is some chance that the final 'mvn' will get optimized later. */
4525 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4526 {
4527 insns = inv_insns;
4528 immediates = &inv_immediates;
4529 }
4530 else
4531 {
4532 can_invert = 0;
4533 final_invert = 0;
4534 }
4535
4536 /* Now output the chosen sequence as instructions. */
4537 if (generate)
4538 {
4539 for (i = 0; i < insns; i++)
4540 {
4541 rtx new_src, temp1_rtx;
4542
4543 temp1 = immediates->i[i];
4544
4545 if (code == SET || code == MINUS)
4546 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4547 else if ((final_invert || i < (insns - 1)) && subtargets)
4548 new_src = gen_reg_rtx (mode);
4549 else
4550 new_src = target;
4551
4552 if (can_invert)
4553 temp1 = ~temp1;
4554 else if (can_negate)
4555 temp1 = -temp1;
4556
4557 temp1 = trunc_int_for_mode (temp1, mode);
4558 temp1_rtx = GEN_INT (temp1);
4559
4560 if (code == SET)
4561 ;
4562 else if (code == MINUS)
4563 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4564 else
4565 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4566
4567 emit_constant_insn (cond,
4568 gen_rtx_SET (VOIDmode, new_src,
4569 temp1_rtx));
4570 source = new_src;
4571
4572 if (code == SET)
4573 {
4574 can_negate = can_invert;
4575 can_invert = 0;
4576 code = PLUS;
4577 }
4578 else if (code == MINUS)
4579 code = PLUS;
4580 }
4581 }
4582
4583 if (final_invert)
4584 {
4585 if (generate)
4586 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4587 gen_rtx_NOT (mode, source)));
4588 insns++;
4589 }
4590
4591 return insns;
4592 }
4593
4594 /* Canonicalize a comparison so that we are more likely to recognize it.
4595 This can be done for a few constant compares, where we can make the
4596 immediate value easier to load. */
4597
4598 static void
4599 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4600 bool op0_preserve_value)
4601 {
4602 machine_mode mode;
4603 unsigned HOST_WIDE_INT i, maxval;
4604
4605 mode = GET_MODE (*op0);
4606 if (mode == VOIDmode)
4607 mode = GET_MODE (*op1);
4608
4609 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4610
4611 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4612 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4613 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4614 for GTU/LEU in Thumb mode. */
4615 if (mode == DImode)
4616 {
4617
4618 if (*code == GT || *code == LE
4619 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4620 {
4621 /* Missing comparison. First try to use an available
4622 comparison. */
4623 if (CONST_INT_P (*op1))
4624 {
4625 i = INTVAL (*op1);
4626 switch (*code)
4627 {
4628 case GT:
4629 case LE:
4630 if (i != maxval
4631 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4632 {
4633 *op1 = GEN_INT (i + 1);
4634 *code = *code == GT ? GE : LT;
4635 return;
4636 }
4637 break;
4638 case GTU:
4639 case LEU:
4640 if (i != ~((unsigned HOST_WIDE_INT) 0)
4641 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4642 {
4643 *op1 = GEN_INT (i + 1);
4644 *code = *code == GTU ? GEU : LTU;
4645 return;
4646 }
4647 break;
4648 default:
4649 gcc_unreachable ();
4650 }
4651 }
4652
4653 /* If that did not work, reverse the condition. */
4654 if (!op0_preserve_value)
4655 {
4656 std::swap (*op0, *op1);
4657 *code = (int)swap_condition ((enum rtx_code)*code);
4658 }
4659 }
4660 return;
4661 }
4662
4663 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4664 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4665 to facilitate possible combining with a cmp into 'ands'. */
4666 if (mode == SImode
4667 && GET_CODE (*op0) == ZERO_EXTEND
4668 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4669 && GET_MODE (XEXP (*op0, 0)) == QImode
4670 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4671 && subreg_lowpart_p (XEXP (*op0, 0))
4672 && *op1 == const0_rtx)
4673 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4674 GEN_INT (255));
4675
4676 /* Comparisons smaller than DImode. Only adjust comparisons against
4677 an out-of-range constant. */
4678 if (!CONST_INT_P (*op1)
4679 || const_ok_for_arm (INTVAL (*op1))
4680 || const_ok_for_arm (- INTVAL (*op1)))
4681 return;
4682
4683 i = INTVAL (*op1);
4684
4685 switch (*code)
4686 {
4687 case EQ:
4688 case NE:
4689 return;
4690
4691 case GT:
4692 case LE:
4693 if (i != maxval
4694 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4695 {
4696 *op1 = GEN_INT (i + 1);
4697 *code = *code == GT ? GE : LT;
4698 return;
4699 }
4700 break;
4701
4702 case GE:
4703 case LT:
4704 if (i != ~maxval
4705 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4706 {
4707 *op1 = GEN_INT (i - 1);
4708 *code = *code == GE ? GT : LE;
4709 return;
4710 }
4711 break;
4712
4713 case GTU:
4714 case LEU:
4715 if (i != ~((unsigned HOST_WIDE_INT) 0)
4716 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4717 {
4718 *op1 = GEN_INT (i + 1);
4719 *code = *code == GTU ? GEU : LTU;
4720 return;
4721 }
4722 break;
4723
4724 case GEU:
4725 case LTU:
4726 if (i != 0
4727 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4728 {
4729 *op1 = GEN_INT (i - 1);
4730 *code = *code == GEU ? GTU : LEU;
4731 return;
4732 }
4733 break;
4734
4735 default:
4736 gcc_unreachable ();
4737 }
4738 }
4739
4740
4741 /* Define how to find the value returned by a function. */
4742
4743 static rtx
4744 arm_function_value(const_tree type, const_tree func,
4745 bool outgoing ATTRIBUTE_UNUSED)
4746 {
4747 machine_mode mode;
4748 int unsignedp ATTRIBUTE_UNUSED;
4749 rtx r ATTRIBUTE_UNUSED;
4750
4751 mode = TYPE_MODE (type);
4752
4753 if (TARGET_AAPCS_BASED)
4754 return aapcs_allocate_return_reg (mode, type, func);
4755
4756 /* Promote integer types. */
4757 if (INTEGRAL_TYPE_P (type))
4758 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4759
4760 /* Promotes small structs returned in a register to full-word size
4761 for big-endian AAPCS. */
4762 if (arm_return_in_msb (type))
4763 {
4764 HOST_WIDE_INT size = int_size_in_bytes (type);
4765 if (size % UNITS_PER_WORD != 0)
4766 {
4767 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4768 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4769 }
4770 }
4771
4772 return arm_libcall_value_1 (mode);
4773 }
4774
4775 /* libcall hashtable helpers. */
4776
4777 struct libcall_hasher : typed_noop_remove <rtx_def>
4778 {
4779 typedef rtx_def value_type;
4780 typedef rtx_def compare_type;
4781 static inline hashval_t hash (const value_type *);
4782 static inline bool equal (const value_type *, const compare_type *);
4783 static inline void remove (value_type *);
4784 };
4785
4786 inline bool
4787 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4788 {
4789 return rtx_equal_p (p1, p2);
4790 }
4791
4792 inline hashval_t
4793 libcall_hasher::hash (const value_type *p1)
4794 {
4795 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4796 }
4797
4798 typedef hash_table<libcall_hasher> libcall_table_type;
4799
4800 static void
4801 add_libcall (libcall_table_type *htab, rtx libcall)
4802 {
4803 *htab->find_slot (libcall, INSERT) = libcall;
4804 }
4805
4806 static bool
4807 arm_libcall_uses_aapcs_base (const_rtx libcall)
4808 {
4809 static bool init_done = false;
4810 static libcall_table_type *libcall_htab = NULL;
4811
4812 if (!init_done)
4813 {
4814 init_done = true;
4815
4816 libcall_htab = new libcall_table_type (31);
4817 add_libcall (libcall_htab,
4818 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4819 add_libcall (libcall_htab,
4820 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4821 add_libcall (libcall_htab,
4822 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4823 add_libcall (libcall_htab,
4824 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4825
4826 add_libcall (libcall_htab,
4827 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4828 add_libcall (libcall_htab,
4829 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4830 add_libcall (libcall_htab,
4831 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4832 add_libcall (libcall_htab,
4833 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4834
4835 add_libcall (libcall_htab,
4836 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4837 add_libcall (libcall_htab,
4838 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4839 add_libcall (libcall_htab,
4840 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4841 add_libcall (libcall_htab,
4842 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4843 add_libcall (libcall_htab,
4844 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4845 add_libcall (libcall_htab,
4846 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4847 add_libcall (libcall_htab,
4848 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4849 add_libcall (libcall_htab,
4850 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4851
4852 /* Values from double-precision helper functions are returned in core
4853 registers if the selected core only supports single-precision
4854 arithmetic, even if we are using the hard-float ABI. The same is
4855 true for single-precision helpers, but we will never be using the
4856 hard-float ABI on a CPU which doesn't support single-precision
4857 operations in hardware. */
4858 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4859 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4860 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4861 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4862 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4863 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4864 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4865 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4866 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4867 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4868 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4869 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4870 SFmode));
4871 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4872 DFmode));
4873 }
4874
4875 return libcall && libcall_htab->find (libcall) != NULL;
4876 }
4877
4878 static rtx
4879 arm_libcall_value_1 (machine_mode mode)
4880 {
4881 if (TARGET_AAPCS_BASED)
4882 return aapcs_libcall_value (mode);
4883 else if (TARGET_IWMMXT_ABI
4884 && arm_vector_mode_supported_p (mode))
4885 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4886 else
4887 return gen_rtx_REG (mode, ARG_REGISTER (1));
4888 }
4889
4890 /* Define how to find the value returned by a library function
4891 assuming the value has mode MODE. */
4892
4893 static rtx
4894 arm_libcall_value (machine_mode mode, const_rtx libcall)
4895 {
4896 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4897 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4898 {
4899 /* The following libcalls return their result in integer registers,
4900 even though they return a floating point value. */
4901 if (arm_libcall_uses_aapcs_base (libcall))
4902 return gen_rtx_REG (mode, ARG_REGISTER(1));
4903
4904 }
4905
4906 return arm_libcall_value_1 (mode);
4907 }
4908
4909 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4910
4911 static bool
4912 arm_function_value_regno_p (const unsigned int regno)
4913 {
4914 if (regno == ARG_REGISTER (1)
4915 || (TARGET_32BIT
4916 && TARGET_AAPCS_BASED
4917 && TARGET_VFP
4918 && TARGET_HARD_FLOAT
4919 && regno == FIRST_VFP_REGNUM)
4920 || (TARGET_IWMMXT_ABI
4921 && regno == FIRST_IWMMXT_REGNUM))
4922 return true;
4923
4924 return false;
4925 }
4926
4927 /* Determine the amount of memory needed to store the possible return
4928 registers of an untyped call. */
4929 int
4930 arm_apply_result_size (void)
4931 {
4932 int size = 16;
4933
4934 if (TARGET_32BIT)
4935 {
4936 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4937 size += 32;
4938 if (TARGET_IWMMXT_ABI)
4939 size += 8;
4940 }
4941
4942 return size;
4943 }
4944
4945 /* Decide whether TYPE should be returned in memory (true)
4946 or in a register (false). FNTYPE is the type of the function making
4947 the call. */
4948 static bool
4949 arm_return_in_memory (const_tree type, const_tree fntype)
4950 {
4951 HOST_WIDE_INT size;
4952
4953 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4954
4955 if (TARGET_AAPCS_BASED)
4956 {
4957 /* Simple, non-aggregate types (ie not including vectors and
4958 complex) are always returned in a register (or registers).
4959 We don't care about which register here, so we can short-cut
4960 some of the detail. */
4961 if (!AGGREGATE_TYPE_P (type)
4962 && TREE_CODE (type) != VECTOR_TYPE
4963 && TREE_CODE (type) != COMPLEX_TYPE)
4964 return false;
4965
4966 /* Any return value that is no larger than one word can be
4967 returned in r0. */
4968 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4969 return false;
4970
4971 /* Check any available co-processors to see if they accept the
4972 type as a register candidate (VFP, for example, can return
4973 some aggregates in consecutive registers). These aren't
4974 available if the call is variadic. */
4975 if (aapcs_select_return_coproc (type, fntype) >= 0)
4976 return false;
4977
4978 /* Vector values should be returned using ARM registers, not
4979 memory (unless they're over 16 bytes, which will break since
4980 we only have four call-clobbered registers to play with). */
4981 if (TREE_CODE (type) == VECTOR_TYPE)
4982 return (size < 0 || size > (4 * UNITS_PER_WORD));
4983
4984 /* The rest go in memory. */
4985 return true;
4986 }
4987
4988 if (TREE_CODE (type) == VECTOR_TYPE)
4989 return (size < 0 || size > (4 * UNITS_PER_WORD));
4990
4991 if (!AGGREGATE_TYPE_P (type) &&
4992 (TREE_CODE (type) != VECTOR_TYPE))
4993 /* All simple types are returned in registers. */
4994 return false;
4995
4996 if (arm_abi != ARM_ABI_APCS)
4997 {
4998 /* ATPCS and later return aggregate types in memory only if they are
4999 larger than a word (or are variable size). */
5000 return (size < 0 || size > UNITS_PER_WORD);
5001 }
5002
5003 /* For the arm-wince targets we choose to be compatible with Microsoft's
5004 ARM and Thumb compilers, which always return aggregates in memory. */
5005 #ifndef ARM_WINCE
5006 /* All structures/unions bigger than one word are returned in memory.
5007 Also catch the case where int_size_in_bytes returns -1. In this case
5008 the aggregate is either huge or of variable size, and in either case
5009 we will want to return it via memory and not in a register. */
5010 if (size < 0 || size > UNITS_PER_WORD)
5011 return true;
5012
5013 if (TREE_CODE (type) == RECORD_TYPE)
5014 {
5015 tree field;
5016
5017 /* For a struct the APCS says that we only return in a register
5018 if the type is 'integer like' and every addressable element
5019 has an offset of zero. For practical purposes this means
5020 that the structure can have at most one non bit-field element
5021 and that this element must be the first one in the structure. */
5022
5023 /* Find the first field, ignoring non FIELD_DECL things which will
5024 have been created by C++. */
5025 for (field = TYPE_FIELDS (type);
5026 field && TREE_CODE (field) != FIELD_DECL;
5027 field = DECL_CHAIN (field))
5028 continue;
5029
5030 if (field == NULL)
5031 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5032
5033 /* Check that the first field is valid for returning in a register. */
5034
5035 /* ... Floats are not allowed */
5036 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5037 return true;
5038
5039 /* ... Aggregates that are not themselves valid for returning in
5040 a register are not allowed. */
5041 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5042 return true;
5043
5044 /* Now check the remaining fields, if any. Only bitfields are allowed,
5045 since they are not addressable. */
5046 for (field = DECL_CHAIN (field);
5047 field;
5048 field = DECL_CHAIN (field))
5049 {
5050 if (TREE_CODE (field) != FIELD_DECL)
5051 continue;
5052
5053 if (!DECL_BIT_FIELD_TYPE (field))
5054 return true;
5055 }
5056
5057 return false;
5058 }
5059
5060 if (TREE_CODE (type) == UNION_TYPE)
5061 {
5062 tree field;
5063
5064 /* Unions can be returned in registers if every element is
5065 integral, or can be returned in an integer register. */
5066 for (field = TYPE_FIELDS (type);
5067 field;
5068 field = DECL_CHAIN (field))
5069 {
5070 if (TREE_CODE (field) != FIELD_DECL)
5071 continue;
5072
5073 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5074 return true;
5075
5076 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5077 return true;
5078 }
5079
5080 return false;
5081 }
5082 #endif /* not ARM_WINCE */
5083
5084 /* Return all other types in memory. */
5085 return true;
5086 }
5087
5088 const struct pcs_attribute_arg
5089 {
5090 const char *arg;
5091 enum arm_pcs value;
5092 } pcs_attribute_args[] =
5093 {
5094 {"aapcs", ARM_PCS_AAPCS},
5095 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5096 #if 0
5097 /* We could recognize these, but changes would be needed elsewhere
5098 * to implement them. */
5099 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5100 {"atpcs", ARM_PCS_ATPCS},
5101 {"apcs", ARM_PCS_APCS},
5102 #endif
5103 {NULL, ARM_PCS_UNKNOWN}
5104 };
5105
5106 static enum arm_pcs
5107 arm_pcs_from_attribute (tree attr)
5108 {
5109 const struct pcs_attribute_arg *ptr;
5110 const char *arg;
5111
5112 /* Get the value of the argument. */
5113 if (TREE_VALUE (attr) == NULL_TREE
5114 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5115 return ARM_PCS_UNKNOWN;
5116
5117 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5118
5119 /* Check it against the list of known arguments. */
5120 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5121 if (streq (arg, ptr->arg))
5122 return ptr->value;
5123
5124 /* An unrecognized interrupt type. */
5125 return ARM_PCS_UNKNOWN;
5126 }
5127
5128 /* Get the PCS variant to use for this call. TYPE is the function's type
5129 specification, DECL is the specific declartion. DECL may be null if
5130 the call could be indirect or if this is a library call. */
5131 static enum arm_pcs
5132 arm_get_pcs_model (const_tree type, const_tree decl)
5133 {
5134 bool user_convention = false;
5135 enum arm_pcs user_pcs = arm_pcs_default;
5136 tree attr;
5137
5138 gcc_assert (type);
5139
5140 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5141 if (attr)
5142 {
5143 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5144 user_convention = true;
5145 }
5146
5147 if (TARGET_AAPCS_BASED)
5148 {
5149 /* Detect varargs functions. These always use the base rules
5150 (no argument is ever a candidate for a co-processor
5151 register). */
5152 bool base_rules = stdarg_p (type);
5153
5154 if (user_convention)
5155 {
5156 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5157 sorry ("non-AAPCS derived PCS variant");
5158 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5159 error ("variadic functions must use the base AAPCS variant");
5160 }
5161
5162 if (base_rules)
5163 return ARM_PCS_AAPCS;
5164 else if (user_convention)
5165 return user_pcs;
5166 else if (decl && flag_unit_at_a_time)
5167 {
5168 /* Local functions never leak outside this compilation unit,
5169 so we are free to use whatever conventions are
5170 appropriate. */
5171 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5172 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5173 if (i && i->local)
5174 return ARM_PCS_AAPCS_LOCAL;
5175 }
5176 }
5177 else if (user_convention && user_pcs != arm_pcs_default)
5178 sorry ("PCS variant");
5179
5180 /* For everything else we use the target's default. */
5181 return arm_pcs_default;
5182 }
5183
5184
5185 static void
5186 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5187 const_tree fntype ATTRIBUTE_UNUSED,
5188 rtx libcall ATTRIBUTE_UNUSED,
5189 const_tree fndecl ATTRIBUTE_UNUSED)
5190 {
5191 /* Record the unallocated VFP registers. */
5192 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5193 pcum->aapcs_vfp_reg_alloc = 0;
5194 }
5195
5196 /* Walk down the type tree of TYPE counting consecutive base elements.
5197 If *MODEP is VOIDmode, then set it to the first valid floating point
5198 type. If a non-floating point type is found, or if a floating point
5199 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5200 otherwise return the count in the sub-tree. */
5201 static int
5202 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5203 {
5204 machine_mode mode;
5205 HOST_WIDE_INT size;
5206
5207 switch (TREE_CODE (type))
5208 {
5209 case REAL_TYPE:
5210 mode = TYPE_MODE (type);
5211 if (mode != DFmode && mode != SFmode)
5212 return -1;
5213
5214 if (*modep == VOIDmode)
5215 *modep = mode;
5216
5217 if (*modep == mode)
5218 return 1;
5219
5220 break;
5221
5222 case COMPLEX_TYPE:
5223 mode = TYPE_MODE (TREE_TYPE (type));
5224 if (mode != DFmode && mode != SFmode)
5225 return -1;
5226
5227 if (*modep == VOIDmode)
5228 *modep = mode;
5229
5230 if (*modep == mode)
5231 return 2;
5232
5233 break;
5234
5235 case VECTOR_TYPE:
5236 /* Use V2SImode and V4SImode as representatives of all 64-bit
5237 and 128-bit vector types, whether or not those modes are
5238 supported with the present options. */
5239 size = int_size_in_bytes (type);
5240 switch (size)
5241 {
5242 case 8:
5243 mode = V2SImode;
5244 break;
5245 case 16:
5246 mode = V4SImode;
5247 break;
5248 default:
5249 return -1;
5250 }
5251
5252 if (*modep == VOIDmode)
5253 *modep = mode;
5254
5255 /* Vector modes are considered to be opaque: two vectors are
5256 equivalent for the purposes of being homogeneous aggregates
5257 if they are the same size. */
5258 if (*modep == mode)
5259 return 1;
5260
5261 break;
5262
5263 case ARRAY_TYPE:
5264 {
5265 int count;
5266 tree index = TYPE_DOMAIN (type);
5267
5268 /* Can't handle incomplete types nor sizes that are not
5269 fixed. */
5270 if (!COMPLETE_TYPE_P (type)
5271 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5272 return -1;
5273
5274 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5275 if (count == -1
5276 || !index
5277 || !TYPE_MAX_VALUE (index)
5278 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5279 || !TYPE_MIN_VALUE (index)
5280 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5281 || count < 0)
5282 return -1;
5283
5284 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5285 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5286
5287 /* There must be no padding. */
5288 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5289 return -1;
5290
5291 return count;
5292 }
5293
5294 case RECORD_TYPE:
5295 {
5296 int count = 0;
5297 int sub_count;
5298 tree field;
5299
5300 /* Can't handle incomplete types nor sizes that are not
5301 fixed. */
5302 if (!COMPLETE_TYPE_P (type)
5303 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5304 return -1;
5305
5306 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5307 {
5308 if (TREE_CODE (field) != FIELD_DECL)
5309 continue;
5310
5311 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5312 if (sub_count < 0)
5313 return -1;
5314 count += sub_count;
5315 }
5316
5317 /* There must be no padding. */
5318 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5319 return -1;
5320
5321 return count;
5322 }
5323
5324 case UNION_TYPE:
5325 case QUAL_UNION_TYPE:
5326 {
5327 /* These aren't very interesting except in a degenerate case. */
5328 int count = 0;
5329 int sub_count;
5330 tree field;
5331
5332 /* Can't handle incomplete types nor sizes that are not
5333 fixed. */
5334 if (!COMPLETE_TYPE_P (type)
5335 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5336 return -1;
5337
5338 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5339 {
5340 if (TREE_CODE (field) != FIELD_DECL)
5341 continue;
5342
5343 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5344 if (sub_count < 0)
5345 return -1;
5346 count = count > sub_count ? count : sub_count;
5347 }
5348
5349 /* There must be no padding. */
5350 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5351 return -1;
5352
5353 return count;
5354 }
5355
5356 default:
5357 break;
5358 }
5359
5360 return -1;
5361 }
5362
5363 /* Return true if PCS_VARIANT should use VFP registers. */
5364 static bool
5365 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5366 {
5367 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5368 {
5369 static bool seen_thumb1_vfp = false;
5370
5371 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5372 {
5373 sorry ("Thumb-1 hard-float VFP ABI");
5374 /* sorry() is not immediately fatal, so only display this once. */
5375 seen_thumb1_vfp = true;
5376 }
5377
5378 return true;
5379 }
5380
5381 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5382 return false;
5383
5384 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5385 (TARGET_VFP_DOUBLE || !is_double));
5386 }
5387
5388 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5389 suitable for passing or returning in VFP registers for the PCS
5390 variant selected. If it is, then *BASE_MODE is updated to contain
5391 a machine mode describing each element of the argument's type and
5392 *COUNT to hold the number of such elements. */
5393 static bool
5394 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5395 machine_mode mode, const_tree type,
5396 machine_mode *base_mode, int *count)
5397 {
5398 machine_mode new_mode = VOIDmode;
5399
5400 /* If we have the type information, prefer that to working things
5401 out from the mode. */
5402 if (type)
5403 {
5404 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5405
5406 if (ag_count > 0 && ag_count <= 4)
5407 *count = ag_count;
5408 else
5409 return false;
5410 }
5411 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5412 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5413 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5414 {
5415 *count = 1;
5416 new_mode = mode;
5417 }
5418 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5419 {
5420 *count = 2;
5421 new_mode = (mode == DCmode ? DFmode : SFmode);
5422 }
5423 else
5424 return false;
5425
5426
5427 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5428 return false;
5429
5430 *base_mode = new_mode;
5431 return true;
5432 }
5433
5434 static bool
5435 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5436 machine_mode mode, const_tree type)
5437 {
5438 int count ATTRIBUTE_UNUSED;
5439 machine_mode ag_mode ATTRIBUTE_UNUSED;
5440
5441 if (!use_vfp_abi (pcs_variant, false))
5442 return false;
5443 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5444 &ag_mode, &count);
5445 }
5446
5447 static bool
5448 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5449 const_tree type)
5450 {
5451 if (!use_vfp_abi (pcum->pcs_variant, false))
5452 return false;
5453
5454 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5455 &pcum->aapcs_vfp_rmode,
5456 &pcum->aapcs_vfp_rcount);
5457 }
5458
5459 static bool
5460 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5461 const_tree type ATTRIBUTE_UNUSED)
5462 {
5463 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5464 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5465 int regno;
5466
5467 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5468 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5469 {
5470 pcum->aapcs_vfp_reg_alloc = mask << regno;
5471 if (mode == BLKmode
5472 || (mode == TImode && ! TARGET_NEON)
5473 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5474 {
5475 int i;
5476 int rcount = pcum->aapcs_vfp_rcount;
5477 int rshift = shift;
5478 machine_mode rmode = pcum->aapcs_vfp_rmode;
5479 rtx par;
5480 if (!TARGET_NEON)
5481 {
5482 /* Avoid using unsupported vector modes. */
5483 if (rmode == V2SImode)
5484 rmode = DImode;
5485 else if (rmode == V4SImode)
5486 {
5487 rmode = DImode;
5488 rcount *= 2;
5489 rshift /= 2;
5490 }
5491 }
5492 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5493 for (i = 0; i < rcount; i++)
5494 {
5495 rtx tmp = gen_rtx_REG (rmode,
5496 FIRST_VFP_REGNUM + regno + i * rshift);
5497 tmp = gen_rtx_EXPR_LIST
5498 (VOIDmode, tmp,
5499 GEN_INT (i * GET_MODE_SIZE (rmode)));
5500 XVECEXP (par, 0, i) = tmp;
5501 }
5502
5503 pcum->aapcs_reg = par;
5504 }
5505 else
5506 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5507 return true;
5508 }
5509 return false;
5510 }
5511
5512 static rtx
5513 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5514 machine_mode mode,
5515 const_tree type ATTRIBUTE_UNUSED)
5516 {
5517 if (!use_vfp_abi (pcs_variant, false))
5518 return NULL;
5519
5520 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5521 {
5522 int count;
5523 machine_mode ag_mode;
5524 int i;
5525 rtx par;
5526 int shift;
5527
5528 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5529 &ag_mode, &count);
5530
5531 if (!TARGET_NEON)
5532 {
5533 if (ag_mode == V2SImode)
5534 ag_mode = DImode;
5535 else if (ag_mode == V4SImode)
5536 {
5537 ag_mode = DImode;
5538 count *= 2;
5539 }
5540 }
5541 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5542 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5543 for (i = 0; i < count; i++)
5544 {
5545 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5546 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5547 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5548 XVECEXP (par, 0, i) = tmp;
5549 }
5550
5551 return par;
5552 }
5553
5554 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5555 }
5556
5557 static void
5558 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5559 machine_mode mode ATTRIBUTE_UNUSED,
5560 const_tree type ATTRIBUTE_UNUSED)
5561 {
5562 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5563 pcum->aapcs_vfp_reg_alloc = 0;
5564 return;
5565 }
5566
5567 #define AAPCS_CP(X) \
5568 { \
5569 aapcs_ ## X ## _cum_init, \
5570 aapcs_ ## X ## _is_call_candidate, \
5571 aapcs_ ## X ## _allocate, \
5572 aapcs_ ## X ## _is_return_candidate, \
5573 aapcs_ ## X ## _allocate_return_reg, \
5574 aapcs_ ## X ## _advance \
5575 }
5576
5577 /* Table of co-processors that can be used to pass arguments in
5578 registers. Idealy no arugment should be a candidate for more than
5579 one co-processor table entry, but the table is processed in order
5580 and stops after the first match. If that entry then fails to put
5581 the argument into a co-processor register, the argument will go on
5582 the stack. */
5583 static struct
5584 {
5585 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5586 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5587
5588 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5589 BLKmode) is a candidate for this co-processor's registers; this
5590 function should ignore any position-dependent state in
5591 CUMULATIVE_ARGS and only use call-type dependent information. */
5592 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5593
5594 /* Return true if the argument does get a co-processor register; it
5595 should set aapcs_reg to an RTX of the register allocated as is
5596 required for a return from FUNCTION_ARG. */
5597 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5598
5599 /* Return true if a result of mode MODE (or type TYPE if MODE is
5600 BLKmode) is can be returned in this co-processor's registers. */
5601 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5602
5603 /* Allocate and return an RTX element to hold the return type of a
5604 call, this routine must not fail and will only be called if
5605 is_return_candidate returned true with the same parameters. */
5606 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5607
5608 /* Finish processing this argument and prepare to start processing
5609 the next one. */
5610 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5611 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5612 {
5613 AAPCS_CP(vfp)
5614 };
5615
5616 #undef AAPCS_CP
5617
5618 static int
5619 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5620 const_tree type)
5621 {
5622 int i;
5623
5624 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5625 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5626 return i;
5627
5628 return -1;
5629 }
5630
5631 static int
5632 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5633 {
5634 /* We aren't passed a decl, so we can't check that a call is local.
5635 However, it isn't clear that that would be a win anyway, since it
5636 might limit some tail-calling opportunities. */
5637 enum arm_pcs pcs_variant;
5638
5639 if (fntype)
5640 {
5641 const_tree fndecl = NULL_TREE;
5642
5643 if (TREE_CODE (fntype) == FUNCTION_DECL)
5644 {
5645 fndecl = fntype;
5646 fntype = TREE_TYPE (fntype);
5647 }
5648
5649 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5650 }
5651 else
5652 pcs_variant = arm_pcs_default;
5653
5654 if (pcs_variant != ARM_PCS_AAPCS)
5655 {
5656 int i;
5657
5658 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5659 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5660 TYPE_MODE (type),
5661 type))
5662 return i;
5663 }
5664 return -1;
5665 }
5666
5667 static rtx
5668 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5669 const_tree fntype)
5670 {
5671 /* We aren't passed a decl, so we can't check that a call is local.
5672 However, it isn't clear that that would be a win anyway, since it
5673 might limit some tail-calling opportunities. */
5674 enum arm_pcs pcs_variant;
5675 int unsignedp ATTRIBUTE_UNUSED;
5676
5677 if (fntype)
5678 {
5679 const_tree fndecl = NULL_TREE;
5680
5681 if (TREE_CODE (fntype) == FUNCTION_DECL)
5682 {
5683 fndecl = fntype;
5684 fntype = TREE_TYPE (fntype);
5685 }
5686
5687 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5688 }
5689 else
5690 pcs_variant = arm_pcs_default;
5691
5692 /* Promote integer types. */
5693 if (type && INTEGRAL_TYPE_P (type))
5694 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5695
5696 if (pcs_variant != ARM_PCS_AAPCS)
5697 {
5698 int i;
5699
5700 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5701 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5702 type))
5703 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5704 mode, type);
5705 }
5706
5707 /* Promotes small structs returned in a register to full-word size
5708 for big-endian AAPCS. */
5709 if (type && arm_return_in_msb (type))
5710 {
5711 HOST_WIDE_INT size = int_size_in_bytes (type);
5712 if (size % UNITS_PER_WORD != 0)
5713 {
5714 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5715 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5716 }
5717 }
5718
5719 return gen_rtx_REG (mode, R0_REGNUM);
5720 }
5721
5722 static rtx
5723 aapcs_libcall_value (machine_mode mode)
5724 {
5725 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5726 && GET_MODE_SIZE (mode) <= 4)
5727 mode = SImode;
5728
5729 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5730 }
5731
5732 /* Lay out a function argument using the AAPCS rules. The rule
5733 numbers referred to here are those in the AAPCS. */
5734 static void
5735 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5736 const_tree type, bool named)
5737 {
5738 int nregs, nregs2;
5739 int ncrn;
5740
5741 /* We only need to do this once per argument. */
5742 if (pcum->aapcs_arg_processed)
5743 return;
5744
5745 pcum->aapcs_arg_processed = true;
5746
5747 /* Special case: if named is false then we are handling an incoming
5748 anonymous argument which is on the stack. */
5749 if (!named)
5750 return;
5751
5752 /* Is this a potential co-processor register candidate? */
5753 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5754 {
5755 int slot = aapcs_select_call_coproc (pcum, mode, type);
5756 pcum->aapcs_cprc_slot = slot;
5757
5758 /* We don't have to apply any of the rules from part B of the
5759 preparation phase, these are handled elsewhere in the
5760 compiler. */
5761
5762 if (slot >= 0)
5763 {
5764 /* A Co-processor register candidate goes either in its own
5765 class of registers or on the stack. */
5766 if (!pcum->aapcs_cprc_failed[slot])
5767 {
5768 /* C1.cp - Try to allocate the argument to co-processor
5769 registers. */
5770 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5771 return;
5772
5773 /* C2.cp - Put the argument on the stack and note that we
5774 can't assign any more candidates in this slot. We also
5775 need to note that we have allocated stack space, so that
5776 we won't later try to split a non-cprc candidate between
5777 core registers and the stack. */
5778 pcum->aapcs_cprc_failed[slot] = true;
5779 pcum->can_split = false;
5780 }
5781
5782 /* We didn't get a register, so this argument goes on the
5783 stack. */
5784 gcc_assert (pcum->can_split == false);
5785 return;
5786 }
5787 }
5788
5789 /* C3 - For double-word aligned arguments, round the NCRN up to the
5790 next even number. */
5791 ncrn = pcum->aapcs_ncrn;
5792 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5793 ncrn++;
5794
5795 nregs = ARM_NUM_REGS2(mode, type);
5796
5797 /* Sigh, this test should really assert that nregs > 0, but a GCC
5798 extension allows empty structs and then gives them empty size; it
5799 then allows such a structure to be passed by value. For some of
5800 the code below we have to pretend that such an argument has
5801 non-zero size so that we 'locate' it correctly either in
5802 registers or on the stack. */
5803 gcc_assert (nregs >= 0);
5804
5805 nregs2 = nregs ? nregs : 1;
5806
5807 /* C4 - Argument fits entirely in core registers. */
5808 if (ncrn + nregs2 <= NUM_ARG_REGS)
5809 {
5810 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5811 pcum->aapcs_next_ncrn = ncrn + nregs;
5812 return;
5813 }
5814
5815 /* C5 - Some core registers left and there are no arguments already
5816 on the stack: split this argument between the remaining core
5817 registers and the stack. */
5818 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5819 {
5820 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5821 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5822 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5823 return;
5824 }
5825
5826 /* C6 - NCRN is set to 4. */
5827 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5828
5829 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5830 return;
5831 }
5832
5833 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5834 for a call to a function whose data type is FNTYPE.
5835 For a library call, FNTYPE is NULL. */
5836 void
5837 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5838 rtx libname,
5839 tree fndecl ATTRIBUTE_UNUSED)
5840 {
5841 /* Long call handling. */
5842 if (fntype)
5843 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5844 else
5845 pcum->pcs_variant = arm_pcs_default;
5846
5847 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5848 {
5849 if (arm_libcall_uses_aapcs_base (libname))
5850 pcum->pcs_variant = ARM_PCS_AAPCS;
5851
5852 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5853 pcum->aapcs_reg = NULL_RTX;
5854 pcum->aapcs_partial = 0;
5855 pcum->aapcs_arg_processed = false;
5856 pcum->aapcs_cprc_slot = -1;
5857 pcum->can_split = true;
5858
5859 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5860 {
5861 int i;
5862
5863 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5864 {
5865 pcum->aapcs_cprc_failed[i] = false;
5866 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5867 }
5868 }
5869 return;
5870 }
5871
5872 /* Legacy ABIs */
5873
5874 /* On the ARM, the offset starts at 0. */
5875 pcum->nregs = 0;
5876 pcum->iwmmxt_nregs = 0;
5877 pcum->can_split = true;
5878
5879 /* Varargs vectors are treated the same as long long.
5880 named_count avoids having to change the way arm handles 'named' */
5881 pcum->named_count = 0;
5882 pcum->nargs = 0;
5883
5884 if (TARGET_REALLY_IWMMXT && fntype)
5885 {
5886 tree fn_arg;
5887
5888 for (fn_arg = TYPE_ARG_TYPES (fntype);
5889 fn_arg;
5890 fn_arg = TREE_CHAIN (fn_arg))
5891 pcum->named_count += 1;
5892
5893 if (! pcum->named_count)
5894 pcum->named_count = INT_MAX;
5895 }
5896 }
5897
5898 /* Return true if we use LRA instead of reload pass. */
5899 static bool
5900 arm_lra_p (void)
5901 {
5902 return arm_lra_flag;
5903 }
5904
5905 /* Return true if mode/type need doubleword alignment. */
5906 static bool
5907 arm_needs_doubleword_align (machine_mode mode, const_tree type)
5908 {
5909 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5910 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5911 }
5912
5913
5914 /* Determine where to put an argument to a function.
5915 Value is zero to push the argument on the stack,
5916 or a hard register in which to store the argument.
5917
5918 MODE is the argument's machine mode.
5919 TYPE is the data type of the argument (as a tree).
5920 This is null for libcalls where that information may
5921 not be available.
5922 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5923 the preceding args and about the function being called.
5924 NAMED is nonzero if this argument is a named parameter
5925 (otherwise it is an extra parameter matching an ellipsis).
5926
5927 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5928 other arguments are passed on the stack. If (NAMED == 0) (which happens
5929 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5930 defined), say it is passed in the stack (function_prologue will
5931 indeed make it pass in the stack if necessary). */
5932
5933 static rtx
5934 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
5935 const_tree type, bool named)
5936 {
5937 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5938 int nregs;
5939
5940 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5941 a call insn (op3 of a call_value insn). */
5942 if (mode == VOIDmode)
5943 return const0_rtx;
5944
5945 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5946 {
5947 aapcs_layout_arg (pcum, mode, type, named);
5948 return pcum->aapcs_reg;
5949 }
5950
5951 /* Varargs vectors are treated the same as long long.
5952 named_count avoids having to change the way arm handles 'named' */
5953 if (TARGET_IWMMXT_ABI
5954 && arm_vector_mode_supported_p (mode)
5955 && pcum->named_count > pcum->nargs + 1)
5956 {
5957 if (pcum->iwmmxt_nregs <= 9)
5958 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5959 else
5960 {
5961 pcum->can_split = false;
5962 return NULL_RTX;
5963 }
5964 }
5965
5966 /* Put doubleword aligned quantities in even register pairs. */
5967 if (pcum->nregs & 1
5968 && ARM_DOUBLEWORD_ALIGN
5969 && arm_needs_doubleword_align (mode, type))
5970 pcum->nregs++;
5971
5972 /* Only allow splitting an arg between regs and memory if all preceding
5973 args were allocated to regs. For args passed by reference we only count
5974 the reference pointer. */
5975 if (pcum->can_split)
5976 nregs = 1;
5977 else
5978 nregs = ARM_NUM_REGS2 (mode, type);
5979
5980 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5981 return NULL_RTX;
5982
5983 return gen_rtx_REG (mode, pcum->nregs);
5984 }
5985
5986 static unsigned int
5987 arm_function_arg_boundary (machine_mode mode, const_tree type)
5988 {
5989 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5990 ? DOUBLEWORD_ALIGNMENT
5991 : PARM_BOUNDARY);
5992 }
5993
5994 static int
5995 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
5996 tree type, bool named)
5997 {
5998 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5999 int nregs = pcum->nregs;
6000
6001 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6002 {
6003 aapcs_layout_arg (pcum, mode, type, named);
6004 return pcum->aapcs_partial;
6005 }
6006
6007 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6008 return 0;
6009
6010 if (NUM_ARG_REGS > nregs
6011 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6012 && pcum->can_split)
6013 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6014
6015 return 0;
6016 }
6017
6018 /* Update the data in PCUM to advance over an argument
6019 of mode MODE and data type TYPE.
6020 (TYPE is null for libcalls where that information may not be available.) */
6021
6022 static void
6023 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6024 const_tree type, bool named)
6025 {
6026 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6027
6028 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6029 {
6030 aapcs_layout_arg (pcum, mode, type, named);
6031
6032 if (pcum->aapcs_cprc_slot >= 0)
6033 {
6034 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6035 type);
6036 pcum->aapcs_cprc_slot = -1;
6037 }
6038
6039 /* Generic stuff. */
6040 pcum->aapcs_arg_processed = false;
6041 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6042 pcum->aapcs_reg = NULL_RTX;
6043 pcum->aapcs_partial = 0;
6044 }
6045 else
6046 {
6047 pcum->nargs += 1;
6048 if (arm_vector_mode_supported_p (mode)
6049 && pcum->named_count > pcum->nargs
6050 && TARGET_IWMMXT_ABI)
6051 pcum->iwmmxt_nregs += 1;
6052 else
6053 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6054 }
6055 }
6056
6057 /* Variable sized types are passed by reference. This is a GCC
6058 extension to the ARM ABI. */
6059
6060 static bool
6061 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6062 machine_mode mode ATTRIBUTE_UNUSED,
6063 const_tree type, bool named ATTRIBUTE_UNUSED)
6064 {
6065 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6066 }
6067 \f
6068 /* Encode the current state of the #pragma [no_]long_calls. */
6069 typedef enum
6070 {
6071 OFF, /* No #pragma [no_]long_calls is in effect. */
6072 LONG, /* #pragma long_calls is in effect. */
6073 SHORT /* #pragma no_long_calls is in effect. */
6074 } arm_pragma_enum;
6075
6076 static arm_pragma_enum arm_pragma_long_calls = OFF;
6077
6078 void
6079 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6080 {
6081 arm_pragma_long_calls = LONG;
6082 }
6083
6084 void
6085 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6086 {
6087 arm_pragma_long_calls = SHORT;
6088 }
6089
6090 void
6091 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6092 {
6093 arm_pragma_long_calls = OFF;
6094 }
6095 \f
6096 /* Handle an attribute requiring a FUNCTION_DECL;
6097 arguments as in struct attribute_spec.handler. */
6098 static tree
6099 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6100 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6101 {
6102 if (TREE_CODE (*node) != FUNCTION_DECL)
6103 {
6104 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6105 name);
6106 *no_add_attrs = true;
6107 }
6108
6109 return NULL_TREE;
6110 }
6111
6112 /* Handle an "interrupt" or "isr" attribute;
6113 arguments as in struct attribute_spec.handler. */
6114 static tree
6115 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6116 bool *no_add_attrs)
6117 {
6118 if (DECL_P (*node))
6119 {
6120 if (TREE_CODE (*node) != FUNCTION_DECL)
6121 {
6122 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6123 name);
6124 *no_add_attrs = true;
6125 }
6126 /* FIXME: the argument if any is checked for type attributes;
6127 should it be checked for decl ones? */
6128 }
6129 else
6130 {
6131 if (TREE_CODE (*node) == FUNCTION_TYPE
6132 || TREE_CODE (*node) == METHOD_TYPE)
6133 {
6134 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6135 {
6136 warning (OPT_Wattributes, "%qE attribute ignored",
6137 name);
6138 *no_add_attrs = true;
6139 }
6140 }
6141 else if (TREE_CODE (*node) == POINTER_TYPE
6142 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6143 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6144 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6145 {
6146 *node = build_variant_type_copy (*node);
6147 TREE_TYPE (*node) = build_type_attribute_variant
6148 (TREE_TYPE (*node),
6149 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6150 *no_add_attrs = true;
6151 }
6152 else
6153 {
6154 /* Possibly pass this attribute on from the type to a decl. */
6155 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6156 | (int) ATTR_FLAG_FUNCTION_NEXT
6157 | (int) ATTR_FLAG_ARRAY_NEXT))
6158 {
6159 *no_add_attrs = true;
6160 return tree_cons (name, args, NULL_TREE);
6161 }
6162 else
6163 {
6164 warning (OPT_Wattributes, "%qE attribute ignored",
6165 name);
6166 }
6167 }
6168 }
6169
6170 return NULL_TREE;
6171 }
6172
6173 /* Handle a "pcs" attribute; arguments as in struct
6174 attribute_spec.handler. */
6175 static tree
6176 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6177 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6178 {
6179 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6180 {
6181 warning (OPT_Wattributes, "%qE attribute ignored", name);
6182 *no_add_attrs = true;
6183 }
6184 return NULL_TREE;
6185 }
6186
6187 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6188 /* Handle the "notshared" attribute. This attribute is another way of
6189 requesting hidden visibility. ARM's compiler supports
6190 "__declspec(notshared)"; we support the same thing via an
6191 attribute. */
6192
6193 static tree
6194 arm_handle_notshared_attribute (tree *node,
6195 tree name ATTRIBUTE_UNUSED,
6196 tree args ATTRIBUTE_UNUSED,
6197 int flags ATTRIBUTE_UNUSED,
6198 bool *no_add_attrs)
6199 {
6200 tree decl = TYPE_NAME (*node);
6201
6202 if (decl)
6203 {
6204 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6205 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6206 *no_add_attrs = false;
6207 }
6208 return NULL_TREE;
6209 }
6210 #endif
6211
6212 /* Return 0 if the attributes for two types are incompatible, 1 if they
6213 are compatible, and 2 if they are nearly compatible (which causes a
6214 warning to be generated). */
6215 static int
6216 arm_comp_type_attributes (const_tree type1, const_tree type2)
6217 {
6218 int l1, l2, s1, s2;
6219
6220 /* Check for mismatch of non-default calling convention. */
6221 if (TREE_CODE (type1) != FUNCTION_TYPE)
6222 return 1;
6223
6224 /* Check for mismatched call attributes. */
6225 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6226 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6227 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6228 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6229
6230 /* Only bother to check if an attribute is defined. */
6231 if (l1 | l2 | s1 | s2)
6232 {
6233 /* If one type has an attribute, the other must have the same attribute. */
6234 if ((l1 != l2) || (s1 != s2))
6235 return 0;
6236
6237 /* Disallow mixed attributes. */
6238 if ((l1 & s2) || (l2 & s1))
6239 return 0;
6240 }
6241
6242 /* Check for mismatched ISR attribute. */
6243 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6244 if (! l1)
6245 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6246 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6247 if (! l2)
6248 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6249 if (l1 != l2)
6250 return 0;
6251
6252 return 1;
6253 }
6254
6255 /* Assigns default attributes to newly defined type. This is used to
6256 set short_call/long_call attributes for function types of
6257 functions defined inside corresponding #pragma scopes. */
6258 static void
6259 arm_set_default_type_attributes (tree type)
6260 {
6261 /* Add __attribute__ ((long_call)) to all functions, when
6262 inside #pragma long_calls or __attribute__ ((short_call)),
6263 when inside #pragma no_long_calls. */
6264 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6265 {
6266 tree type_attr_list, attr_name;
6267 type_attr_list = TYPE_ATTRIBUTES (type);
6268
6269 if (arm_pragma_long_calls == LONG)
6270 attr_name = get_identifier ("long_call");
6271 else if (arm_pragma_long_calls == SHORT)
6272 attr_name = get_identifier ("short_call");
6273 else
6274 return;
6275
6276 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6277 TYPE_ATTRIBUTES (type) = type_attr_list;
6278 }
6279 }
6280 \f
6281 /* Return true if DECL is known to be linked into section SECTION. */
6282
6283 static bool
6284 arm_function_in_section_p (tree decl, section *section)
6285 {
6286 /* We can only be certain about functions defined in the same
6287 compilation unit. */
6288 if (!TREE_STATIC (decl))
6289 return false;
6290
6291 /* Make sure that SYMBOL always binds to the definition in this
6292 compilation unit. */
6293 if (!targetm.binds_local_p (decl))
6294 return false;
6295
6296 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6297 if (!DECL_SECTION_NAME (decl))
6298 {
6299 /* Make sure that we will not create a unique section for DECL. */
6300 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6301 return false;
6302 }
6303
6304 return function_section (decl) == section;
6305 }
6306
6307 /* Return nonzero if a 32-bit "long_call" should be generated for
6308 a call from the current function to DECL. We generate a long_call
6309 if the function:
6310
6311 a. has an __attribute__((long call))
6312 or b. is within the scope of a #pragma long_calls
6313 or c. the -mlong-calls command line switch has been specified
6314
6315 However we do not generate a long call if the function:
6316
6317 d. has an __attribute__ ((short_call))
6318 or e. is inside the scope of a #pragma no_long_calls
6319 or f. is defined in the same section as the current function. */
6320
6321 bool
6322 arm_is_long_call_p (tree decl)
6323 {
6324 tree attrs;
6325
6326 if (!decl)
6327 return TARGET_LONG_CALLS;
6328
6329 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6330 if (lookup_attribute ("short_call", attrs))
6331 return false;
6332
6333 /* For "f", be conservative, and only cater for cases in which the
6334 whole of the current function is placed in the same section. */
6335 if (!flag_reorder_blocks_and_partition
6336 && TREE_CODE (decl) == FUNCTION_DECL
6337 && arm_function_in_section_p (decl, current_function_section ()))
6338 return false;
6339
6340 if (lookup_attribute ("long_call", attrs))
6341 return true;
6342
6343 return TARGET_LONG_CALLS;
6344 }
6345
6346 /* Return nonzero if it is ok to make a tail-call to DECL. */
6347 static bool
6348 arm_function_ok_for_sibcall (tree decl, tree exp)
6349 {
6350 unsigned long func_type;
6351
6352 if (cfun->machine->sibcall_blocked)
6353 return false;
6354
6355 /* Never tailcall something if we are generating code for Thumb-1. */
6356 if (TARGET_THUMB1)
6357 return false;
6358
6359 /* The PIC register is live on entry to VxWorks PLT entries, so we
6360 must make the call before restoring the PIC register. */
6361 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6362 return false;
6363
6364 /* If we are interworking and the function is not declared static
6365 then we can't tail-call it unless we know that it exists in this
6366 compilation unit (since it might be a Thumb routine). */
6367 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6368 && !TREE_ASM_WRITTEN (decl))
6369 return false;
6370
6371 func_type = arm_current_func_type ();
6372 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6373 if (IS_INTERRUPT (func_type))
6374 return false;
6375
6376 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6377 {
6378 /* Check that the return value locations are the same. For
6379 example that we aren't returning a value from the sibling in
6380 a VFP register but then need to transfer it to a core
6381 register. */
6382 rtx a, b;
6383
6384 a = arm_function_value (TREE_TYPE (exp), decl, false);
6385 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6386 cfun->decl, false);
6387 if (!rtx_equal_p (a, b))
6388 return false;
6389 }
6390
6391 /* Never tailcall if function may be called with a misaligned SP. */
6392 if (IS_STACKALIGN (func_type))
6393 return false;
6394
6395 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6396 references should become a NOP. Don't convert such calls into
6397 sibling calls. */
6398 if (TARGET_AAPCS_BASED
6399 && arm_abi == ARM_ABI_AAPCS
6400 && decl
6401 && DECL_WEAK (decl))
6402 return false;
6403
6404 /* Everything else is ok. */
6405 return true;
6406 }
6407
6408 \f
6409 /* Addressing mode support functions. */
6410
6411 /* Return nonzero if X is a legitimate immediate operand when compiling
6412 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6413 int
6414 legitimate_pic_operand_p (rtx x)
6415 {
6416 if (GET_CODE (x) == SYMBOL_REF
6417 || (GET_CODE (x) == CONST
6418 && GET_CODE (XEXP (x, 0)) == PLUS
6419 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6420 return 0;
6421
6422 return 1;
6423 }
6424
6425 /* Record that the current function needs a PIC register. Initialize
6426 cfun->machine->pic_reg if we have not already done so. */
6427
6428 static void
6429 require_pic_register (void)
6430 {
6431 /* A lot of the logic here is made obscure by the fact that this
6432 routine gets called as part of the rtx cost estimation process.
6433 We don't want those calls to affect any assumptions about the real
6434 function; and further, we can't call entry_of_function() until we
6435 start the real expansion process. */
6436 if (!crtl->uses_pic_offset_table)
6437 {
6438 gcc_assert (can_create_pseudo_p ());
6439 if (arm_pic_register != INVALID_REGNUM
6440 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6441 {
6442 if (!cfun->machine->pic_reg)
6443 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6444
6445 /* Play games to avoid marking the function as needing pic
6446 if we are being called as part of the cost-estimation
6447 process. */
6448 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6449 crtl->uses_pic_offset_table = 1;
6450 }
6451 else
6452 {
6453 rtx_insn *seq, *insn;
6454
6455 if (!cfun->machine->pic_reg)
6456 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6457
6458 /* Play games to avoid marking the function as needing pic
6459 if we are being called as part of the cost-estimation
6460 process. */
6461 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6462 {
6463 crtl->uses_pic_offset_table = 1;
6464 start_sequence ();
6465
6466 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6467 && arm_pic_register > LAST_LO_REGNUM)
6468 emit_move_insn (cfun->machine->pic_reg,
6469 gen_rtx_REG (Pmode, arm_pic_register));
6470 else
6471 arm_load_pic_register (0UL);
6472
6473 seq = get_insns ();
6474 end_sequence ();
6475
6476 for (insn = seq; insn; insn = NEXT_INSN (insn))
6477 if (INSN_P (insn))
6478 INSN_LOCATION (insn) = prologue_location;
6479
6480 /* We can be called during expansion of PHI nodes, where
6481 we can't yet emit instructions directly in the final
6482 insn stream. Queue the insns on the entry edge, they will
6483 be committed after everything else is expanded. */
6484 insert_insn_on_edge (seq,
6485 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6486 }
6487 }
6488 }
6489 }
6490
6491 rtx
6492 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6493 {
6494 if (GET_CODE (orig) == SYMBOL_REF
6495 || GET_CODE (orig) == LABEL_REF)
6496 {
6497 rtx insn;
6498
6499 if (reg == 0)
6500 {
6501 gcc_assert (can_create_pseudo_p ());
6502 reg = gen_reg_rtx (Pmode);
6503 }
6504
6505 /* VxWorks does not impose a fixed gap between segments; the run-time
6506 gap can be different from the object-file gap. We therefore can't
6507 use GOTOFF unless we are absolutely sure that the symbol is in the
6508 same segment as the GOT. Unfortunately, the flexibility of linker
6509 scripts means that we can't be sure of that in general, so assume
6510 that GOTOFF is never valid on VxWorks. */
6511 if ((GET_CODE (orig) == LABEL_REF
6512 || (GET_CODE (orig) == SYMBOL_REF &&
6513 SYMBOL_REF_LOCAL_P (orig)))
6514 && NEED_GOT_RELOC
6515 && arm_pic_data_is_text_relative)
6516 insn = arm_pic_static_addr (orig, reg);
6517 else
6518 {
6519 rtx pat;
6520 rtx mem;
6521
6522 /* If this function doesn't have a pic register, create one now. */
6523 require_pic_register ();
6524
6525 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6526
6527 /* Make the MEM as close to a constant as possible. */
6528 mem = SET_SRC (pat);
6529 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6530 MEM_READONLY_P (mem) = 1;
6531 MEM_NOTRAP_P (mem) = 1;
6532
6533 insn = emit_insn (pat);
6534 }
6535
6536 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6537 by loop. */
6538 set_unique_reg_note (insn, REG_EQUAL, orig);
6539
6540 return reg;
6541 }
6542 else if (GET_CODE (orig) == CONST)
6543 {
6544 rtx base, offset;
6545
6546 if (GET_CODE (XEXP (orig, 0)) == PLUS
6547 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6548 return orig;
6549
6550 /* Handle the case where we have: const (UNSPEC_TLS). */
6551 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6552 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6553 return orig;
6554
6555 /* Handle the case where we have:
6556 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6557 CONST_INT. */
6558 if (GET_CODE (XEXP (orig, 0)) == PLUS
6559 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6560 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6561 {
6562 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6563 return orig;
6564 }
6565
6566 if (reg == 0)
6567 {
6568 gcc_assert (can_create_pseudo_p ());
6569 reg = gen_reg_rtx (Pmode);
6570 }
6571
6572 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6573
6574 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6575 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6576 base == reg ? 0 : reg);
6577
6578 if (CONST_INT_P (offset))
6579 {
6580 /* The base register doesn't really matter, we only want to
6581 test the index for the appropriate mode. */
6582 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6583 {
6584 gcc_assert (can_create_pseudo_p ());
6585 offset = force_reg (Pmode, offset);
6586 }
6587
6588 if (CONST_INT_P (offset))
6589 return plus_constant (Pmode, base, INTVAL (offset));
6590 }
6591
6592 if (GET_MODE_SIZE (mode) > 4
6593 && (GET_MODE_CLASS (mode) == MODE_INT
6594 || TARGET_SOFT_FLOAT))
6595 {
6596 emit_insn (gen_addsi3 (reg, base, offset));
6597 return reg;
6598 }
6599
6600 return gen_rtx_PLUS (Pmode, base, offset);
6601 }
6602
6603 return orig;
6604 }
6605
6606
6607 /* Find a spare register to use during the prolog of a function. */
6608
6609 static int
6610 thumb_find_work_register (unsigned long pushed_regs_mask)
6611 {
6612 int reg;
6613
6614 /* Check the argument registers first as these are call-used. The
6615 register allocation order means that sometimes r3 might be used
6616 but earlier argument registers might not, so check them all. */
6617 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6618 if (!df_regs_ever_live_p (reg))
6619 return reg;
6620
6621 /* Before going on to check the call-saved registers we can try a couple
6622 more ways of deducing that r3 is available. The first is when we are
6623 pushing anonymous arguments onto the stack and we have less than 4
6624 registers worth of fixed arguments(*). In this case r3 will be part of
6625 the variable argument list and so we can be sure that it will be
6626 pushed right at the start of the function. Hence it will be available
6627 for the rest of the prologue.
6628 (*): ie crtl->args.pretend_args_size is greater than 0. */
6629 if (cfun->machine->uses_anonymous_args
6630 && crtl->args.pretend_args_size > 0)
6631 return LAST_ARG_REGNUM;
6632
6633 /* The other case is when we have fixed arguments but less than 4 registers
6634 worth. In this case r3 might be used in the body of the function, but
6635 it is not being used to convey an argument into the function. In theory
6636 we could just check crtl->args.size to see how many bytes are
6637 being passed in argument registers, but it seems that it is unreliable.
6638 Sometimes it will have the value 0 when in fact arguments are being
6639 passed. (See testcase execute/20021111-1.c for an example). So we also
6640 check the args_info.nregs field as well. The problem with this field is
6641 that it makes no allowances for arguments that are passed to the
6642 function but which are not used. Hence we could miss an opportunity
6643 when a function has an unused argument in r3. But it is better to be
6644 safe than to be sorry. */
6645 if (! cfun->machine->uses_anonymous_args
6646 && crtl->args.size >= 0
6647 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6648 && (TARGET_AAPCS_BASED
6649 ? crtl->args.info.aapcs_ncrn < 4
6650 : crtl->args.info.nregs < 4))
6651 return LAST_ARG_REGNUM;
6652
6653 /* Otherwise look for a call-saved register that is going to be pushed. */
6654 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6655 if (pushed_regs_mask & (1 << reg))
6656 return reg;
6657
6658 if (TARGET_THUMB2)
6659 {
6660 /* Thumb-2 can use high regs. */
6661 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6662 if (pushed_regs_mask & (1 << reg))
6663 return reg;
6664 }
6665 /* Something went wrong - thumb_compute_save_reg_mask()
6666 should have arranged for a suitable register to be pushed. */
6667 gcc_unreachable ();
6668 }
6669
6670 static GTY(()) int pic_labelno;
6671
6672 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6673 low register. */
6674
6675 void
6676 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6677 {
6678 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6679
6680 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6681 return;
6682
6683 gcc_assert (flag_pic);
6684
6685 pic_reg = cfun->machine->pic_reg;
6686 if (TARGET_VXWORKS_RTP)
6687 {
6688 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6689 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6690 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6691
6692 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6693
6694 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6695 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6696 }
6697 else
6698 {
6699 /* We use an UNSPEC rather than a LABEL_REF because this label
6700 never appears in the code stream. */
6701
6702 labelno = GEN_INT (pic_labelno++);
6703 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6704 l1 = gen_rtx_CONST (VOIDmode, l1);
6705
6706 /* On the ARM the PC register contains 'dot + 8' at the time of the
6707 addition, on the Thumb it is 'dot + 4'. */
6708 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6709 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6710 UNSPEC_GOTSYM_OFF);
6711 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6712
6713 if (TARGET_32BIT)
6714 {
6715 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6716 }
6717 else /* TARGET_THUMB1 */
6718 {
6719 if (arm_pic_register != INVALID_REGNUM
6720 && REGNO (pic_reg) > LAST_LO_REGNUM)
6721 {
6722 /* We will have pushed the pic register, so we should always be
6723 able to find a work register. */
6724 pic_tmp = gen_rtx_REG (SImode,
6725 thumb_find_work_register (saved_regs));
6726 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6727 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6728 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6729 }
6730 else if (arm_pic_register != INVALID_REGNUM
6731 && arm_pic_register > LAST_LO_REGNUM
6732 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6733 {
6734 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6735 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6736 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6737 }
6738 else
6739 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6740 }
6741 }
6742
6743 /* Need to emit this whether or not we obey regdecls,
6744 since setjmp/longjmp can cause life info to screw up. */
6745 emit_use (pic_reg);
6746 }
6747
6748 /* Generate code to load the address of a static var when flag_pic is set. */
6749 static rtx
6750 arm_pic_static_addr (rtx orig, rtx reg)
6751 {
6752 rtx l1, labelno, offset_rtx, insn;
6753
6754 gcc_assert (flag_pic);
6755
6756 /* We use an UNSPEC rather than a LABEL_REF because this label
6757 never appears in the code stream. */
6758 labelno = GEN_INT (pic_labelno++);
6759 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6760 l1 = gen_rtx_CONST (VOIDmode, l1);
6761
6762 /* On the ARM the PC register contains 'dot + 8' at the time of the
6763 addition, on the Thumb it is 'dot + 4'. */
6764 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6765 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6766 UNSPEC_SYMBOL_OFFSET);
6767 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6768
6769 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6770 return insn;
6771 }
6772
6773 /* Return nonzero if X is valid as an ARM state addressing register. */
6774 static int
6775 arm_address_register_rtx_p (rtx x, int strict_p)
6776 {
6777 int regno;
6778
6779 if (!REG_P (x))
6780 return 0;
6781
6782 regno = REGNO (x);
6783
6784 if (strict_p)
6785 return ARM_REGNO_OK_FOR_BASE_P (regno);
6786
6787 return (regno <= LAST_ARM_REGNUM
6788 || regno >= FIRST_PSEUDO_REGISTER
6789 || regno == FRAME_POINTER_REGNUM
6790 || regno == ARG_POINTER_REGNUM);
6791 }
6792
6793 /* Return TRUE if this rtx is the difference of a symbol and a label,
6794 and will reduce to a PC-relative relocation in the object file.
6795 Expressions like this can be left alone when generating PIC, rather
6796 than forced through the GOT. */
6797 static int
6798 pcrel_constant_p (rtx x)
6799 {
6800 if (GET_CODE (x) == MINUS)
6801 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6802
6803 return FALSE;
6804 }
6805
6806 /* Return true if X will surely end up in an index register after next
6807 splitting pass. */
6808 static bool
6809 will_be_in_index_register (const_rtx x)
6810 {
6811 /* arm.md: calculate_pic_address will split this into a register. */
6812 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6813 }
6814
6815 /* Return nonzero if X is a valid ARM state address operand. */
6816 int
6817 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6818 int strict_p)
6819 {
6820 bool use_ldrd;
6821 enum rtx_code code = GET_CODE (x);
6822
6823 if (arm_address_register_rtx_p (x, strict_p))
6824 return 1;
6825
6826 use_ldrd = (TARGET_LDRD
6827 && (mode == DImode
6828 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6829
6830 if (code == POST_INC || code == PRE_DEC
6831 || ((code == PRE_INC || code == POST_DEC)
6832 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6833 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6834
6835 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6836 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6837 && GET_CODE (XEXP (x, 1)) == PLUS
6838 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6839 {
6840 rtx addend = XEXP (XEXP (x, 1), 1);
6841
6842 /* Don't allow ldrd post increment by register because it's hard
6843 to fixup invalid register choices. */
6844 if (use_ldrd
6845 && GET_CODE (x) == POST_MODIFY
6846 && REG_P (addend))
6847 return 0;
6848
6849 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6850 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6851 }
6852
6853 /* After reload constants split into minipools will have addresses
6854 from a LABEL_REF. */
6855 else if (reload_completed
6856 && (code == LABEL_REF
6857 || (code == CONST
6858 && GET_CODE (XEXP (x, 0)) == PLUS
6859 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6860 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6861 return 1;
6862
6863 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6864 return 0;
6865
6866 else if (code == PLUS)
6867 {
6868 rtx xop0 = XEXP (x, 0);
6869 rtx xop1 = XEXP (x, 1);
6870
6871 return ((arm_address_register_rtx_p (xop0, strict_p)
6872 && ((CONST_INT_P (xop1)
6873 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6874 || (!strict_p && will_be_in_index_register (xop1))))
6875 || (arm_address_register_rtx_p (xop1, strict_p)
6876 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6877 }
6878
6879 #if 0
6880 /* Reload currently can't handle MINUS, so disable this for now */
6881 else if (GET_CODE (x) == MINUS)
6882 {
6883 rtx xop0 = XEXP (x, 0);
6884 rtx xop1 = XEXP (x, 1);
6885
6886 return (arm_address_register_rtx_p (xop0, strict_p)
6887 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6888 }
6889 #endif
6890
6891 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6892 && code == SYMBOL_REF
6893 && CONSTANT_POOL_ADDRESS_P (x)
6894 && ! (flag_pic
6895 && symbol_mentioned_p (get_pool_constant (x))
6896 && ! pcrel_constant_p (get_pool_constant (x))))
6897 return 1;
6898
6899 return 0;
6900 }
6901
6902 /* Return nonzero if X is a valid Thumb-2 address operand. */
6903 static int
6904 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
6905 {
6906 bool use_ldrd;
6907 enum rtx_code code = GET_CODE (x);
6908
6909 if (arm_address_register_rtx_p (x, strict_p))
6910 return 1;
6911
6912 use_ldrd = (TARGET_LDRD
6913 && (mode == DImode
6914 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6915
6916 if (code == POST_INC || code == PRE_DEC
6917 || ((code == PRE_INC || code == POST_DEC)
6918 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6919 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6920
6921 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6922 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6923 && GET_CODE (XEXP (x, 1)) == PLUS
6924 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6925 {
6926 /* Thumb-2 only has autoincrement by constant. */
6927 rtx addend = XEXP (XEXP (x, 1), 1);
6928 HOST_WIDE_INT offset;
6929
6930 if (!CONST_INT_P (addend))
6931 return 0;
6932
6933 offset = INTVAL(addend);
6934 if (GET_MODE_SIZE (mode) <= 4)
6935 return (offset > -256 && offset < 256);
6936
6937 return (use_ldrd && offset > -1024 && offset < 1024
6938 && (offset & 3) == 0);
6939 }
6940
6941 /* After reload constants split into minipools will have addresses
6942 from a LABEL_REF. */
6943 else if (reload_completed
6944 && (code == LABEL_REF
6945 || (code == CONST
6946 && GET_CODE (XEXP (x, 0)) == PLUS
6947 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6948 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6949 return 1;
6950
6951 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6952 return 0;
6953
6954 else if (code == PLUS)
6955 {
6956 rtx xop0 = XEXP (x, 0);
6957 rtx xop1 = XEXP (x, 1);
6958
6959 return ((arm_address_register_rtx_p (xop0, strict_p)
6960 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6961 || (!strict_p && will_be_in_index_register (xop1))))
6962 || (arm_address_register_rtx_p (xop1, strict_p)
6963 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6964 }
6965
6966 /* Normally we can assign constant values to target registers without
6967 the help of constant pool. But there are cases we have to use constant
6968 pool like:
6969 1) assign a label to register.
6970 2) sign-extend a 8bit value to 32bit and then assign to register.
6971
6972 Constant pool access in format:
6973 (set (reg r0) (mem (symbol_ref (".LC0"))))
6974 will cause the use of literal pool (later in function arm_reorg).
6975 So here we mark such format as an invalid format, then the compiler
6976 will adjust it into:
6977 (set (reg r0) (symbol_ref (".LC0")))
6978 (set (reg r0) (mem (reg r0))).
6979 No extra register is required, and (mem (reg r0)) won't cause the use
6980 of literal pools. */
6981 else if (arm_disable_literal_pool && code == SYMBOL_REF
6982 && CONSTANT_POOL_ADDRESS_P (x))
6983 return 0;
6984
6985 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6986 && code == SYMBOL_REF
6987 && CONSTANT_POOL_ADDRESS_P (x)
6988 && ! (flag_pic
6989 && symbol_mentioned_p (get_pool_constant (x))
6990 && ! pcrel_constant_p (get_pool_constant (x))))
6991 return 1;
6992
6993 return 0;
6994 }
6995
6996 /* Return nonzero if INDEX is valid for an address index operand in
6997 ARM state. */
6998 static int
6999 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7000 int strict_p)
7001 {
7002 HOST_WIDE_INT range;
7003 enum rtx_code code = GET_CODE (index);
7004
7005 /* Standard coprocessor addressing modes. */
7006 if (TARGET_HARD_FLOAT
7007 && TARGET_VFP
7008 && (mode == SFmode || mode == DFmode))
7009 return (code == CONST_INT && INTVAL (index) < 1024
7010 && INTVAL (index) > -1024
7011 && (INTVAL (index) & 3) == 0);
7012
7013 /* For quad modes, we restrict the constant offset to be slightly less
7014 than what the instruction format permits. We do this because for
7015 quad mode moves, we will actually decompose them into two separate
7016 double-mode reads or writes. INDEX must therefore be a valid
7017 (double-mode) offset and so should INDEX+8. */
7018 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7019 return (code == CONST_INT
7020 && INTVAL (index) < 1016
7021 && INTVAL (index) > -1024
7022 && (INTVAL (index) & 3) == 0);
7023
7024 /* We have no such constraint on double mode offsets, so we permit the
7025 full range of the instruction format. */
7026 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7027 return (code == CONST_INT
7028 && INTVAL (index) < 1024
7029 && INTVAL (index) > -1024
7030 && (INTVAL (index) & 3) == 0);
7031
7032 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7033 return (code == CONST_INT
7034 && INTVAL (index) < 1024
7035 && INTVAL (index) > -1024
7036 && (INTVAL (index) & 3) == 0);
7037
7038 if (arm_address_register_rtx_p (index, strict_p)
7039 && (GET_MODE_SIZE (mode) <= 4))
7040 return 1;
7041
7042 if (mode == DImode || mode == DFmode)
7043 {
7044 if (code == CONST_INT)
7045 {
7046 HOST_WIDE_INT val = INTVAL (index);
7047
7048 if (TARGET_LDRD)
7049 return val > -256 && val < 256;
7050 else
7051 return val > -4096 && val < 4092;
7052 }
7053
7054 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7055 }
7056
7057 if (GET_MODE_SIZE (mode) <= 4
7058 && ! (arm_arch4
7059 && (mode == HImode
7060 || mode == HFmode
7061 || (mode == QImode && outer == SIGN_EXTEND))))
7062 {
7063 if (code == MULT)
7064 {
7065 rtx xiop0 = XEXP (index, 0);
7066 rtx xiop1 = XEXP (index, 1);
7067
7068 return ((arm_address_register_rtx_p (xiop0, strict_p)
7069 && power_of_two_operand (xiop1, SImode))
7070 || (arm_address_register_rtx_p (xiop1, strict_p)
7071 && power_of_two_operand (xiop0, SImode)));
7072 }
7073 else if (code == LSHIFTRT || code == ASHIFTRT
7074 || code == ASHIFT || code == ROTATERT)
7075 {
7076 rtx op = XEXP (index, 1);
7077
7078 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7079 && CONST_INT_P (op)
7080 && INTVAL (op) > 0
7081 && INTVAL (op) <= 31);
7082 }
7083 }
7084
7085 /* For ARM v4 we may be doing a sign-extend operation during the
7086 load. */
7087 if (arm_arch4)
7088 {
7089 if (mode == HImode
7090 || mode == HFmode
7091 || (outer == SIGN_EXTEND && mode == QImode))
7092 range = 256;
7093 else
7094 range = 4096;
7095 }
7096 else
7097 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7098
7099 return (code == CONST_INT
7100 && INTVAL (index) < range
7101 && INTVAL (index) > -range);
7102 }
7103
7104 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7105 index operand. i.e. 1, 2, 4 or 8. */
7106 static bool
7107 thumb2_index_mul_operand (rtx op)
7108 {
7109 HOST_WIDE_INT val;
7110
7111 if (!CONST_INT_P (op))
7112 return false;
7113
7114 val = INTVAL(op);
7115 return (val == 1 || val == 2 || val == 4 || val == 8);
7116 }
7117
7118 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7119 static int
7120 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7121 {
7122 enum rtx_code code = GET_CODE (index);
7123
7124 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7125 /* Standard coprocessor addressing modes. */
7126 if (TARGET_HARD_FLOAT
7127 && TARGET_VFP
7128 && (mode == SFmode || mode == DFmode))
7129 return (code == CONST_INT && INTVAL (index) < 1024
7130 /* Thumb-2 allows only > -256 index range for it's core register
7131 load/stores. Since we allow SF/DF in core registers, we have
7132 to use the intersection between -256~4096 (core) and -1024~1024
7133 (coprocessor). */
7134 && INTVAL (index) > -256
7135 && (INTVAL (index) & 3) == 0);
7136
7137 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7138 {
7139 /* For DImode assume values will usually live in core regs
7140 and only allow LDRD addressing modes. */
7141 if (!TARGET_LDRD || mode != DImode)
7142 return (code == CONST_INT
7143 && INTVAL (index) < 1024
7144 && INTVAL (index) > -1024
7145 && (INTVAL (index) & 3) == 0);
7146 }
7147
7148 /* For quad modes, we restrict the constant offset to be slightly less
7149 than what the instruction format permits. We do this because for
7150 quad mode moves, we will actually decompose them into two separate
7151 double-mode reads or writes. INDEX must therefore be a valid
7152 (double-mode) offset and so should INDEX+8. */
7153 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7154 return (code == CONST_INT
7155 && INTVAL (index) < 1016
7156 && INTVAL (index) > -1024
7157 && (INTVAL (index) & 3) == 0);
7158
7159 /* We have no such constraint on double mode offsets, so we permit the
7160 full range of the instruction format. */
7161 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7162 return (code == CONST_INT
7163 && INTVAL (index) < 1024
7164 && INTVAL (index) > -1024
7165 && (INTVAL (index) & 3) == 0);
7166
7167 if (arm_address_register_rtx_p (index, strict_p)
7168 && (GET_MODE_SIZE (mode) <= 4))
7169 return 1;
7170
7171 if (mode == DImode || mode == DFmode)
7172 {
7173 if (code == CONST_INT)
7174 {
7175 HOST_WIDE_INT val = INTVAL (index);
7176 /* ??? Can we assume ldrd for thumb2? */
7177 /* Thumb-2 ldrd only has reg+const addressing modes. */
7178 /* ldrd supports offsets of +-1020.
7179 However the ldr fallback does not. */
7180 return val > -256 && val < 256 && (val & 3) == 0;
7181 }
7182 else
7183 return 0;
7184 }
7185
7186 if (code == MULT)
7187 {
7188 rtx xiop0 = XEXP (index, 0);
7189 rtx xiop1 = XEXP (index, 1);
7190
7191 return ((arm_address_register_rtx_p (xiop0, strict_p)
7192 && thumb2_index_mul_operand (xiop1))
7193 || (arm_address_register_rtx_p (xiop1, strict_p)
7194 && thumb2_index_mul_operand (xiop0)));
7195 }
7196 else if (code == ASHIFT)
7197 {
7198 rtx op = XEXP (index, 1);
7199
7200 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7201 && CONST_INT_P (op)
7202 && INTVAL (op) > 0
7203 && INTVAL (op) <= 3);
7204 }
7205
7206 return (code == CONST_INT
7207 && INTVAL (index) < 4096
7208 && INTVAL (index) > -256);
7209 }
7210
7211 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7212 static int
7213 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7214 {
7215 int regno;
7216
7217 if (!REG_P (x))
7218 return 0;
7219
7220 regno = REGNO (x);
7221
7222 if (strict_p)
7223 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7224
7225 return (regno <= LAST_LO_REGNUM
7226 || regno > LAST_VIRTUAL_REGISTER
7227 || regno == FRAME_POINTER_REGNUM
7228 || (GET_MODE_SIZE (mode) >= 4
7229 && (regno == STACK_POINTER_REGNUM
7230 || regno >= FIRST_PSEUDO_REGISTER
7231 || x == hard_frame_pointer_rtx
7232 || x == arg_pointer_rtx)));
7233 }
7234
7235 /* Return nonzero if x is a legitimate index register. This is the case
7236 for any base register that can access a QImode object. */
7237 inline static int
7238 thumb1_index_register_rtx_p (rtx x, int strict_p)
7239 {
7240 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7241 }
7242
7243 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7244
7245 The AP may be eliminated to either the SP or the FP, so we use the
7246 least common denominator, e.g. SImode, and offsets from 0 to 64.
7247
7248 ??? Verify whether the above is the right approach.
7249
7250 ??? Also, the FP may be eliminated to the SP, so perhaps that
7251 needs special handling also.
7252
7253 ??? Look at how the mips16 port solves this problem. It probably uses
7254 better ways to solve some of these problems.
7255
7256 Although it is not incorrect, we don't accept QImode and HImode
7257 addresses based on the frame pointer or arg pointer until the
7258 reload pass starts. This is so that eliminating such addresses
7259 into stack based ones won't produce impossible code. */
7260 int
7261 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7262 {
7263 /* ??? Not clear if this is right. Experiment. */
7264 if (GET_MODE_SIZE (mode) < 4
7265 && !(reload_in_progress || reload_completed)
7266 && (reg_mentioned_p (frame_pointer_rtx, x)
7267 || reg_mentioned_p (arg_pointer_rtx, x)
7268 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7269 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7270 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7271 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7272 return 0;
7273
7274 /* Accept any base register. SP only in SImode or larger. */
7275 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7276 return 1;
7277
7278 /* This is PC relative data before arm_reorg runs. */
7279 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7280 && GET_CODE (x) == SYMBOL_REF
7281 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7282 return 1;
7283
7284 /* This is PC relative data after arm_reorg runs. */
7285 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7286 && reload_completed
7287 && (GET_CODE (x) == LABEL_REF
7288 || (GET_CODE (x) == CONST
7289 && GET_CODE (XEXP (x, 0)) == PLUS
7290 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7291 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7292 return 1;
7293
7294 /* Post-inc indexing only supported for SImode and larger. */
7295 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7296 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7297 return 1;
7298
7299 else if (GET_CODE (x) == PLUS)
7300 {
7301 /* REG+REG address can be any two index registers. */
7302 /* We disallow FRAME+REG addressing since we know that FRAME
7303 will be replaced with STACK, and SP relative addressing only
7304 permits SP+OFFSET. */
7305 if (GET_MODE_SIZE (mode) <= 4
7306 && XEXP (x, 0) != frame_pointer_rtx
7307 && XEXP (x, 1) != frame_pointer_rtx
7308 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7309 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7310 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7311 return 1;
7312
7313 /* REG+const has 5-7 bit offset for non-SP registers. */
7314 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7315 || XEXP (x, 0) == arg_pointer_rtx)
7316 && CONST_INT_P (XEXP (x, 1))
7317 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7318 return 1;
7319
7320 /* REG+const has 10-bit offset for SP, but only SImode and
7321 larger is supported. */
7322 /* ??? Should probably check for DI/DFmode overflow here
7323 just like GO_IF_LEGITIMATE_OFFSET does. */
7324 else if (REG_P (XEXP (x, 0))
7325 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7326 && GET_MODE_SIZE (mode) >= 4
7327 && CONST_INT_P (XEXP (x, 1))
7328 && INTVAL (XEXP (x, 1)) >= 0
7329 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7330 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7331 return 1;
7332
7333 else if (REG_P (XEXP (x, 0))
7334 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7335 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7336 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7337 && REGNO (XEXP (x, 0))
7338 <= LAST_VIRTUAL_POINTER_REGISTER))
7339 && GET_MODE_SIZE (mode) >= 4
7340 && CONST_INT_P (XEXP (x, 1))
7341 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7342 return 1;
7343 }
7344
7345 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7346 && GET_MODE_SIZE (mode) == 4
7347 && GET_CODE (x) == SYMBOL_REF
7348 && CONSTANT_POOL_ADDRESS_P (x)
7349 && ! (flag_pic
7350 && symbol_mentioned_p (get_pool_constant (x))
7351 && ! pcrel_constant_p (get_pool_constant (x))))
7352 return 1;
7353
7354 return 0;
7355 }
7356
7357 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7358 instruction of mode MODE. */
7359 int
7360 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7361 {
7362 switch (GET_MODE_SIZE (mode))
7363 {
7364 case 1:
7365 return val >= 0 && val < 32;
7366
7367 case 2:
7368 return val >= 0 && val < 64 && (val & 1) == 0;
7369
7370 default:
7371 return (val >= 0
7372 && (val + GET_MODE_SIZE (mode)) <= 128
7373 && (val & 3) == 0);
7374 }
7375 }
7376
7377 bool
7378 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7379 {
7380 if (TARGET_ARM)
7381 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7382 else if (TARGET_THUMB2)
7383 return thumb2_legitimate_address_p (mode, x, strict_p);
7384 else /* if (TARGET_THUMB1) */
7385 return thumb1_legitimate_address_p (mode, x, strict_p);
7386 }
7387
7388 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7389
7390 Given an rtx X being reloaded into a reg required to be
7391 in class CLASS, return the class of reg to actually use.
7392 In general this is just CLASS, but for the Thumb core registers and
7393 immediate constants we prefer a LO_REGS class or a subset. */
7394
7395 static reg_class_t
7396 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7397 {
7398 if (TARGET_32BIT)
7399 return rclass;
7400 else
7401 {
7402 if (rclass == GENERAL_REGS)
7403 return LO_REGS;
7404 else
7405 return rclass;
7406 }
7407 }
7408
7409 /* Build the SYMBOL_REF for __tls_get_addr. */
7410
7411 static GTY(()) rtx tls_get_addr_libfunc;
7412
7413 static rtx
7414 get_tls_get_addr (void)
7415 {
7416 if (!tls_get_addr_libfunc)
7417 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7418 return tls_get_addr_libfunc;
7419 }
7420
7421 rtx
7422 arm_load_tp (rtx target)
7423 {
7424 if (!target)
7425 target = gen_reg_rtx (SImode);
7426
7427 if (TARGET_HARD_TP)
7428 {
7429 /* Can return in any reg. */
7430 emit_insn (gen_load_tp_hard (target));
7431 }
7432 else
7433 {
7434 /* Always returned in r0. Immediately copy the result into a pseudo,
7435 otherwise other uses of r0 (e.g. setting up function arguments) may
7436 clobber the value. */
7437
7438 rtx tmp;
7439
7440 emit_insn (gen_load_tp_soft ());
7441
7442 tmp = gen_rtx_REG (SImode, 0);
7443 emit_move_insn (target, tmp);
7444 }
7445 return target;
7446 }
7447
7448 static rtx
7449 load_tls_operand (rtx x, rtx reg)
7450 {
7451 rtx tmp;
7452
7453 if (reg == NULL_RTX)
7454 reg = gen_reg_rtx (SImode);
7455
7456 tmp = gen_rtx_CONST (SImode, x);
7457
7458 emit_move_insn (reg, tmp);
7459
7460 return reg;
7461 }
7462
7463 static rtx
7464 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7465 {
7466 rtx insns, label, labelno, sum;
7467
7468 gcc_assert (reloc != TLS_DESCSEQ);
7469 start_sequence ();
7470
7471 labelno = GEN_INT (pic_labelno++);
7472 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7473 label = gen_rtx_CONST (VOIDmode, label);
7474
7475 sum = gen_rtx_UNSPEC (Pmode,
7476 gen_rtvec (4, x, GEN_INT (reloc), label,
7477 GEN_INT (TARGET_ARM ? 8 : 4)),
7478 UNSPEC_TLS);
7479 reg = load_tls_operand (sum, reg);
7480
7481 if (TARGET_ARM)
7482 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7483 else
7484 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7485
7486 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7487 LCT_PURE, /* LCT_CONST? */
7488 Pmode, 1, reg, Pmode);
7489
7490 insns = get_insns ();
7491 end_sequence ();
7492
7493 return insns;
7494 }
7495
7496 static rtx
7497 arm_tls_descseq_addr (rtx x, rtx reg)
7498 {
7499 rtx labelno = GEN_INT (pic_labelno++);
7500 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7501 rtx sum = gen_rtx_UNSPEC (Pmode,
7502 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7503 gen_rtx_CONST (VOIDmode, label),
7504 GEN_INT (!TARGET_ARM)),
7505 UNSPEC_TLS);
7506 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7507
7508 emit_insn (gen_tlscall (x, labelno));
7509 if (!reg)
7510 reg = gen_reg_rtx (SImode);
7511 else
7512 gcc_assert (REGNO (reg) != 0);
7513
7514 emit_move_insn (reg, reg0);
7515
7516 return reg;
7517 }
7518
7519 rtx
7520 legitimize_tls_address (rtx x, rtx reg)
7521 {
7522 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7523 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7524
7525 switch (model)
7526 {
7527 case TLS_MODEL_GLOBAL_DYNAMIC:
7528 if (TARGET_GNU2_TLS)
7529 {
7530 reg = arm_tls_descseq_addr (x, reg);
7531
7532 tp = arm_load_tp (NULL_RTX);
7533
7534 dest = gen_rtx_PLUS (Pmode, tp, reg);
7535 }
7536 else
7537 {
7538 /* Original scheme */
7539 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7540 dest = gen_reg_rtx (Pmode);
7541 emit_libcall_block (insns, dest, ret, x);
7542 }
7543 return dest;
7544
7545 case TLS_MODEL_LOCAL_DYNAMIC:
7546 if (TARGET_GNU2_TLS)
7547 {
7548 reg = arm_tls_descseq_addr (x, reg);
7549
7550 tp = arm_load_tp (NULL_RTX);
7551
7552 dest = gen_rtx_PLUS (Pmode, tp, reg);
7553 }
7554 else
7555 {
7556 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7557
7558 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7559 share the LDM result with other LD model accesses. */
7560 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7561 UNSPEC_TLS);
7562 dest = gen_reg_rtx (Pmode);
7563 emit_libcall_block (insns, dest, ret, eqv);
7564
7565 /* Load the addend. */
7566 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7567 GEN_INT (TLS_LDO32)),
7568 UNSPEC_TLS);
7569 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7570 dest = gen_rtx_PLUS (Pmode, dest, addend);
7571 }
7572 return dest;
7573
7574 case TLS_MODEL_INITIAL_EXEC:
7575 labelno = GEN_INT (pic_labelno++);
7576 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7577 label = gen_rtx_CONST (VOIDmode, label);
7578 sum = gen_rtx_UNSPEC (Pmode,
7579 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7580 GEN_INT (TARGET_ARM ? 8 : 4)),
7581 UNSPEC_TLS);
7582 reg = load_tls_operand (sum, reg);
7583
7584 if (TARGET_ARM)
7585 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7586 else if (TARGET_THUMB2)
7587 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7588 else
7589 {
7590 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7591 emit_move_insn (reg, gen_const_mem (SImode, reg));
7592 }
7593
7594 tp = arm_load_tp (NULL_RTX);
7595
7596 return gen_rtx_PLUS (Pmode, tp, reg);
7597
7598 case TLS_MODEL_LOCAL_EXEC:
7599 tp = arm_load_tp (NULL_RTX);
7600
7601 reg = gen_rtx_UNSPEC (Pmode,
7602 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7603 UNSPEC_TLS);
7604 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7605
7606 return gen_rtx_PLUS (Pmode, tp, reg);
7607
7608 default:
7609 abort ();
7610 }
7611 }
7612
7613 /* Try machine-dependent ways of modifying an illegitimate address
7614 to be legitimate. If we find one, return the new, valid address. */
7615 rtx
7616 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7617 {
7618 if (arm_tls_referenced_p (x))
7619 {
7620 rtx addend = NULL;
7621
7622 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7623 {
7624 addend = XEXP (XEXP (x, 0), 1);
7625 x = XEXP (XEXP (x, 0), 0);
7626 }
7627
7628 if (GET_CODE (x) != SYMBOL_REF)
7629 return x;
7630
7631 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7632
7633 x = legitimize_tls_address (x, NULL_RTX);
7634
7635 if (addend)
7636 {
7637 x = gen_rtx_PLUS (SImode, x, addend);
7638 orig_x = x;
7639 }
7640 else
7641 return x;
7642 }
7643
7644 if (!TARGET_ARM)
7645 {
7646 /* TODO: legitimize_address for Thumb2. */
7647 if (TARGET_THUMB2)
7648 return x;
7649 return thumb_legitimize_address (x, orig_x, mode);
7650 }
7651
7652 if (GET_CODE (x) == PLUS)
7653 {
7654 rtx xop0 = XEXP (x, 0);
7655 rtx xop1 = XEXP (x, 1);
7656
7657 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7658 xop0 = force_reg (SImode, xop0);
7659
7660 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7661 && !symbol_mentioned_p (xop1))
7662 xop1 = force_reg (SImode, xop1);
7663
7664 if (ARM_BASE_REGISTER_RTX_P (xop0)
7665 && CONST_INT_P (xop1))
7666 {
7667 HOST_WIDE_INT n, low_n;
7668 rtx base_reg, val;
7669 n = INTVAL (xop1);
7670
7671 /* VFP addressing modes actually allow greater offsets, but for
7672 now we just stick with the lowest common denominator. */
7673 if (mode == DImode
7674 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7675 {
7676 low_n = n & 0x0f;
7677 n &= ~0x0f;
7678 if (low_n > 4)
7679 {
7680 n += 16;
7681 low_n -= 16;
7682 }
7683 }
7684 else
7685 {
7686 low_n = ((mode) == TImode ? 0
7687 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7688 n -= low_n;
7689 }
7690
7691 base_reg = gen_reg_rtx (SImode);
7692 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7693 emit_move_insn (base_reg, val);
7694 x = plus_constant (Pmode, base_reg, low_n);
7695 }
7696 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7697 x = gen_rtx_PLUS (SImode, xop0, xop1);
7698 }
7699
7700 /* XXX We don't allow MINUS any more -- see comment in
7701 arm_legitimate_address_outer_p (). */
7702 else if (GET_CODE (x) == MINUS)
7703 {
7704 rtx xop0 = XEXP (x, 0);
7705 rtx xop1 = XEXP (x, 1);
7706
7707 if (CONSTANT_P (xop0))
7708 xop0 = force_reg (SImode, xop0);
7709
7710 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7711 xop1 = force_reg (SImode, xop1);
7712
7713 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7714 x = gen_rtx_MINUS (SImode, xop0, xop1);
7715 }
7716
7717 /* Make sure to take full advantage of the pre-indexed addressing mode
7718 with absolute addresses which often allows for the base register to
7719 be factorized for multiple adjacent memory references, and it might
7720 even allows for the mini pool to be avoided entirely. */
7721 else if (CONST_INT_P (x) && optimize > 0)
7722 {
7723 unsigned int bits;
7724 HOST_WIDE_INT mask, base, index;
7725 rtx base_reg;
7726
7727 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7728 use a 8-bit index. So let's use a 12-bit index for SImode only and
7729 hope that arm_gen_constant will enable ldrb to use more bits. */
7730 bits = (mode == SImode) ? 12 : 8;
7731 mask = (1 << bits) - 1;
7732 base = INTVAL (x) & ~mask;
7733 index = INTVAL (x) & mask;
7734 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7735 {
7736 /* It'll most probably be more efficient to generate the base
7737 with more bits set and use a negative index instead. */
7738 base |= mask;
7739 index -= mask;
7740 }
7741 base_reg = force_reg (SImode, GEN_INT (base));
7742 x = plus_constant (Pmode, base_reg, index);
7743 }
7744
7745 if (flag_pic)
7746 {
7747 /* We need to find and carefully transform any SYMBOL and LABEL
7748 references; so go back to the original address expression. */
7749 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7750
7751 if (new_x != orig_x)
7752 x = new_x;
7753 }
7754
7755 return x;
7756 }
7757
7758
7759 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7760 to be legitimate. If we find one, return the new, valid address. */
7761 rtx
7762 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7763 {
7764 if (GET_CODE (x) == PLUS
7765 && CONST_INT_P (XEXP (x, 1))
7766 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7767 || INTVAL (XEXP (x, 1)) < 0))
7768 {
7769 rtx xop0 = XEXP (x, 0);
7770 rtx xop1 = XEXP (x, 1);
7771 HOST_WIDE_INT offset = INTVAL (xop1);
7772
7773 /* Try and fold the offset into a biasing of the base register and
7774 then offsetting that. Don't do this when optimizing for space
7775 since it can cause too many CSEs. */
7776 if (optimize_size && offset >= 0
7777 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7778 {
7779 HOST_WIDE_INT delta;
7780
7781 if (offset >= 256)
7782 delta = offset - (256 - GET_MODE_SIZE (mode));
7783 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7784 delta = 31 * GET_MODE_SIZE (mode);
7785 else
7786 delta = offset & (~31 * GET_MODE_SIZE (mode));
7787
7788 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7789 NULL_RTX);
7790 x = plus_constant (Pmode, xop0, delta);
7791 }
7792 else if (offset < 0 && offset > -256)
7793 /* Small negative offsets are best done with a subtract before the
7794 dereference, forcing these into a register normally takes two
7795 instructions. */
7796 x = force_operand (x, NULL_RTX);
7797 else
7798 {
7799 /* For the remaining cases, force the constant into a register. */
7800 xop1 = force_reg (SImode, xop1);
7801 x = gen_rtx_PLUS (SImode, xop0, xop1);
7802 }
7803 }
7804 else if (GET_CODE (x) == PLUS
7805 && s_register_operand (XEXP (x, 1), SImode)
7806 && !s_register_operand (XEXP (x, 0), SImode))
7807 {
7808 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7809
7810 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7811 }
7812
7813 if (flag_pic)
7814 {
7815 /* We need to find and carefully transform any SYMBOL and LABEL
7816 references; so go back to the original address expression. */
7817 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7818
7819 if (new_x != orig_x)
7820 x = new_x;
7821 }
7822
7823 return x;
7824 }
7825
7826 bool
7827 arm_legitimize_reload_address (rtx *p,
7828 machine_mode mode,
7829 int opnum, int type,
7830 int ind_levels ATTRIBUTE_UNUSED)
7831 {
7832 /* We must recognize output that we have already generated ourselves. */
7833 if (GET_CODE (*p) == PLUS
7834 && GET_CODE (XEXP (*p, 0)) == PLUS
7835 && REG_P (XEXP (XEXP (*p, 0), 0))
7836 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7837 && CONST_INT_P (XEXP (*p, 1)))
7838 {
7839 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7840 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7841 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7842 return true;
7843 }
7844
7845 if (GET_CODE (*p) == PLUS
7846 && REG_P (XEXP (*p, 0))
7847 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7848 /* If the base register is equivalent to a constant, let the generic
7849 code handle it. Otherwise we will run into problems if a future
7850 reload pass decides to rematerialize the constant. */
7851 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7852 && CONST_INT_P (XEXP (*p, 1)))
7853 {
7854 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7855 HOST_WIDE_INT low, high;
7856
7857 /* Detect coprocessor load/stores. */
7858 bool coproc_p = ((TARGET_HARD_FLOAT
7859 && TARGET_VFP
7860 && (mode == SFmode || mode == DFmode))
7861 || (TARGET_REALLY_IWMMXT
7862 && VALID_IWMMXT_REG_MODE (mode))
7863 || (TARGET_NEON
7864 && (VALID_NEON_DREG_MODE (mode)
7865 || VALID_NEON_QREG_MODE (mode))));
7866
7867 /* For some conditions, bail out when lower two bits are unaligned. */
7868 if ((val & 0x3) != 0
7869 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7870 && (coproc_p
7871 /* For DI, and DF under soft-float: */
7872 || ((mode == DImode || mode == DFmode)
7873 /* Without ldrd, we use stm/ldm, which does not
7874 fair well with unaligned bits. */
7875 && (! TARGET_LDRD
7876 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7877 || TARGET_THUMB2))))
7878 return false;
7879
7880 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7881 of which the (reg+high) gets turned into a reload add insn,
7882 we try to decompose the index into high/low values that can often
7883 also lead to better reload CSE.
7884 For example:
7885 ldr r0, [r2, #4100] // Offset too large
7886 ldr r1, [r2, #4104] // Offset too large
7887
7888 is best reloaded as:
7889 add t1, r2, #4096
7890 ldr r0, [t1, #4]
7891 add t2, r2, #4096
7892 ldr r1, [t2, #8]
7893
7894 which post-reload CSE can simplify in most cases to eliminate the
7895 second add instruction:
7896 add t1, r2, #4096
7897 ldr r0, [t1, #4]
7898 ldr r1, [t1, #8]
7899
7900 The idea here is that we want to split out the bits of the constant
7901 as a mask, rather than as subtracting the maximum offset that the
7902 respective type of load/store used can handle.
7903
7904 When encountering negative offsets, we can still utilize it even if
7905 the overall offset is positive; sometimes this may lead to an immediate
7906 that can be constructed with fewer instructions.
7907 For example:
7908 ldr r0, [r2, #0x3FFFFC]
7909
7910 This is best reloaded as:
7911 add t1, r2, #0x400000
7912 ldr r0, [t1, #-4]
7913
7914 The trick for spotting this for a load insn with N bits of offset
7915 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7916 negative offset that is going to make bit N and all the bits below
7917 it become zero in the remainder part.
7918
7919 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7920 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7921 used in most cases of ARM load/store instructions. */
7922
7923 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7924 (((VAL) & ((1 << (N)) - 1)) \
7925 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7926 : 0)
7927
7928 if (coproc_p)
7929 {
7930 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7931
7932 /* NEON quad-word load/stores are made of two double-word accesses,
7933 so the valid index range is reduced by 8. Treat as 9-bit range if
7934 we go over it. */
7935 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7936 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7937 }
7938 else if (GET_MODE_SIZE (mode) == 8)
7939 {
7940 if (TARGET_LDRD)
7941 low = (TARGET_THUMB2
7942 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7943 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7944 else
7945 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7946 to access doublewords. The supported load/store offsets are
7947 -8, -4, and 4, which we try to produce here. */
7948 low = ((val & 0xf) ^ 0x8) - 0x8;
7949 }
7950 else if (GET_MODE_SIZE (mode) < 8)
7951 {
7952 /* NEON element load/stores do not have an offset. */
7953 if (TARGET_NEON_FP16 && mode == HFmode)
7954 return false;
7955
7956 if (TARGET_THUMB2)
7957 {
7958 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7959 Try the wider 12-bit range first, and re-try if the result
7960 is out of range. */
7961 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7962 if (low < -255)
7963 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7964 }
7965 else
7966 {
7967 if (mode == HImode || mode == HFmode)
7968 {
7969 if (arm_arch4)
7970 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7971 else
7972 {
7973 /* The storehi/movhi_bytes fallbacks can use only
7974 [-4094,+4094] of the full ldrb/strb index range. */
7975 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7976 if (low == 4095 || low == -4095)
7977 return false;
7978 }
7979 }
7980 else
7981 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7982 }
7983 }
7984 else
7985 return false;
7986
7987 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7988 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7989 - (unsigned HOST_WIDE_INT) 0x80000000);
7990 /* Check for overflow or zero */
7991 if (low == 0 || high == 0 || (high + low != val))
7992 return false;
7993
7994 /* Reload the high part into a base reg; leave the low part
7995 in the mem.
7996 Note that replacing this gen_rtx_PLUS with plus_constant is
7997 wrong in this case because we rely on the
7998 (plus (plus reg c1) c2) structure being preserved so that
7999 XEXP (*p, 0) in push_reload below uses the correct term. */
8000 *p = gen_rtx_PLUS (GET_MODE (*p),
8001 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8002 GEN_INT (high)),
8003 GEN_INT (low));
8004 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8005 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8006 VOIDmode, 0, 0, opnum, (enum reload_type) type);
8007 return true;
8008 }
8009
8010 return false;
8011 }
8012
8013 rtx
8014 thumb_legitimize_reload_address (rtx *x_p,
8015 machine_mode mode,
8016 int opnum, int type,
8017 int ind_levels ATTRIBUTE_UNUSED)
8018 {
8019 rtx x = *x_p;
8020
8021 if (GET_CODE (x) == PLUS
8022 && GET_MODE_SIZE (mode) < 4
8023 && REG_P (XEXP (x, 0))
8024 && XEXP (x, 0) == stack_pointer_rtx
8025 && CONST_INT_P (XEXP (x, 1))
8026 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8027 {
8028 rtx orig_x = x;
8029
8030 x = copy_rtx (x);
8031 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8032 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8033 return x;
8034 }
8035
8036 /* If both registers are hi-regs, then it's better to reload the
8037 entire expression rather than each register individually. That
8038 only requires one reload register rather than two. */
8039 if (GET_CODE (x) == PLUS
8040 && REG_P (XEXP (x, 0))
8041 && REG_P (XEXP (x, 1))
8042 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8043 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8044 {
8045 rtx orig_x = x;
8046
8047 x = copy_rtx (x);
8048 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8049 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8050 return x;
8051 }
8052
8053 return NULL;
8054 }
8055
8056 /* Return TRUE if X contains any TLS symbol references. */
8057
8058 bool
8059 arm_tls_referenced_p (rtx x)
8060 {
8061 if (! TARGET_HAVE_TLS)
8062 return false;
8063
8064 subrtx_iterator::array_type array;
8065 FOR_EACH_SUBRTX (iter, array, x, ALL)
8066 {
8067 const_rtx x = *iter;
8068 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8069 return true;
8070
8071 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8072 TLS offsets, not real symbol references. */
8073 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8074 iter.skip_subrtxes ();
8075 }
8076 return false;
8077 }
8078
8079 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8080
8081 On the ARM, allow any integer (invalid ones are removed later by insn
8082 patterns), nice doubles and symbol_refs which refer to the function's
8083 constant pool XXX.
8084
8085 When generating pic allow anything. */
8086
8087 static bool
8088 arm_legitimate_constant_p_1 (machine_mode mode, rtx x)
8089 {
8090 /* At present, we have no support for Neon structure constants, so forbid
8091 them here. It might be possible to handle simple cases like 0 and -1
8092 in future. */
8093 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8094 return false;
8095
8096 return flag_pic || !label_mentioned_p (x);
8097 }
8098
8099 static bool
8100 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8101 {
8102 return (CONST_INT_P (x)
8103 || CONST_DOUBLE_P (x)
8104 || CONSTANT_ADDRESS_P (x)
8105 || flag_pic);
8106 }
8107
8108 static bool
8109 arm_legitimate_constant_p (machine_mode mode, rtx x)
8110 {
8111 return (!arm_cannot_force_const_mem (mode, x)
8112 && (TARGET_32BIT
8113 ? arm_legitimate_constant_p_1 (mode, x)
8114 : thumb_legitimate_constant_p (mode, x)));
8115 }
8116
8117 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8118
8119 static bool
8120 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8121 {
8122 rtx base, offset;
8123
8124 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8125 {
8126 split_const (x, &base, &offset);
8127 if (GET_CODE (base) == SYMBOL_REF
8128 && !offset_within_block_p (base, INTVAL (offset)))
8129 return true;
8130 }
8131 return arm_tls_referenced_p (x);
8132 }
8133 \f
8134 #define REG_OR_SUBREG_REG(X) \
8135 (REG_P (X) \
8136 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8137
8138 #define REG_OR_SUBREG_RTX(X) \
8139 (REG_P (X) ? (X) : SUBREG_REG (X))
8140
8141 static inline int
8142 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8143 {
8144 machine_mode mode = GET_MODE (x);
8145 int total, words;
8146
8147 switch (code)
8148 {
8149 case ASHIFT:
8150 case ASHIFTRT:
8151 case LSHIFTRT:
8152 case ROTATERT:
8153 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8154
8155 case PLUS:
8156 case MINUS:
8157 case COMPARE:
8158 case NEG:
8159 case NOT:
8160 return COSTS_N_INSNS (1);
8161
8162 case MULT:
8163 if (CONST_INT_P (XEXP (x, 1)))
8164 {
8165 int cycles = 0;
8166 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8167
8168 while (i)
8169 {
8170 i >>= 2;
8171 cycles++;
8172 }
8173 return COSTS_N_INSNS (2) + cycles;
8174 }
8175 return COSTS_N_INSNS (1) + 16;
8176
8177 case SET:
8178 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8179 the mode. */
8180 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8181 return (COSTS_N_INSNS (words)
8182 + 4 * ((MEM_P (SET_SRC (x)))
8183 + MEM_P (SET_DEST (x))));
8184
8185 case CONST_INT:
8186 if (outer == SET)
8187 {
8188 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8189 return 0;
8190 if (thumb_shiftable_const (INTVAL (x)))
8191 return COSTS_N_INSNS (2);
8192 return COSTS_N_INSNS (3);
8193 }
8194 else if ((outer == PLUS || outer == COMPARE)
8195 && INTVAL (x) < 256 && INTVAL (x) > -256)
8196 return 0;
8197 else if ((outer == IOR || outer == XOR || outer == AND)
8198 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8199 return COSTS_N_INSNS (1);
8200 else if (outer == AND)
8201 {
8202 int i;
8203 /* This duplicates the tests in the andsi3 expander. */
8204 for (i = 9; i <= 31; i++)
8205 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8206 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8207 return COSTS_N_INSNS (2);
8208 }
8209 else if (outer == ASHIFT || outer == ASHIFTRT
8210 || outer == LSHIFTRT)
8211 return 0;
8212 return COSTS_N_INSNS (2);
8213
8214 case CONST:
8215 case CONST_DOUBLE:
8216 case LABEL_REF:
8217 case SYMBOL_REF:
8218 return COSTS_N_INSNS (3);
8219
8220 case UDIV:
8221 case UMOD:
8222 case DIV:
8223 case MOD:
8224 return 100;
8225
8226 case TRUNCATE:
8227 return 99;
8228
8229 case AND:
8230 case XOR:
8231 case IOR:
8232 /* XXX guess. */
8233 return 8;
8234
8235 case MEM:
8236 /* XXX another guess. */
8237 /* Memory costs quite a lot for the first word, but subsequent words
8238 load at the equivalent of a single insn each. */
8239 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8240 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8241 ? 4 : 0));
8242
8243 case IF_THEN_ELSE:
8244 /* XXX a guess. */
8245 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8246 return 14;
8247 return 2;
8248
8249 case SIGN_EXTEND:
8250 case ZERO_EXTEND:
8251 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8252 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8253
8254 if (mode == SImode)
8255 return total;
8256
8257 if (arm_arch6)
8258 return total + COSTS_N_INSNS (1);
8259
8260 /* Assume a two-shift sequence. Increase the cost slightly so
8261 we prefer actual shifts over an extend operation. */
8262 return total + 1 + COSTS_N_INSNS (2);
8263
8264 default:
8265 return 99;
8266 }
8267 }
8268
8269 static inline bool
8270 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8271 {
8272 machine_mode mode = GET_MODE (x);
8273 enum rtx_code subcode;
8274 rtx operand;
8275 enum rtx_code code = GET_CODE (x);
8276 *total = 0;
8277
8278 switch (code)
8279 {
8280 case MEM:
8281 /* Memory costs quite a lot for the first word, but subsequent words
8282 load at the equivalent of a single insn each. */
8283 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8284 return true;
8285
8286 case DIV:
8287 case MOD:
8288 case UDIV:
8289 case UMOD:
8290 if (TARGET_HARD_FLOAT && mode == SFmode)
8291 *total = COSTS_N_INSNS (2);
8292 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8293 *total = COSTS_N_INSNS (4);
8294 else
8295 *total = COSTS_N_INSNS (20);
8296 return false;
8297
8298 case ROTATE:
8299 if (REG_P (XEXP (x, 1)))
8300 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8301 else if (!CONST_INT_P (XEXP (x, 1)))
8302 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8303
8304 /* Fall through */
8305 case ROTATERT:
8306 if (mode != SImode)
8307 {
8308 *total += COSTS_N_INSNS (4);
8309 return true;
8310 }
8311
8312 /* Fall through */
8313 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8314 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8315 if (mode == DImode)
8316 {
8317 *total += COSTS_N_INSNS (3);
8318 return true;
8319 }
8320
8321 *total += COSTS_N_INSNS (1);
8322 /* Increase the cost of complex shifts because they aren't any faster,
8323 and reduce dual issue opportunities. */
8324 if (arm_tune_cortex_a9
8325 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8326 ++*total;
8327
8328 return true;
8329
8330 case MINUS:
8331 if (mode == DImode)
8332 {
8333 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8334 if (CONST_INT_P (XEXP (x, 0))
8335 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8336 {
8337 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8338 return true;
8339 }
8340
8341 if (CONST_INT_P (XEXP (x, 1))
8342 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8343 {
8344 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8345 return true;
8346 }
8347
8348 return false;
8349 }
8350
8351 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8352 {
8353 if (TARGET_HARD_FLOAT
8354 && (mode == SFmode
8355 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8356 {
8357 *total = COSTS_N_INSNS (1);
8358 if (CONST_DOUBLE_P (XEXP (x, 0))
8359 && arm_const_double_rtx (XEXP (x, 0)))
8360 {
8361 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8362 return true;
8363 }
8364
8365 if (CONST_DOUBLE_P (XEXP (x, 1))
8366 && arm_const_double_rtx (XEXP (x, 1)))
8367 {
8368 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8369 return true;
8370 }
8371
8372 return false;
8373 }
8374 *total = COSTS_N_INSNS (20);
8375 return false;
8376 }
8377
8378 *total = COSTS_N_INSNS (1);
8379 if (CONST_INT_P (XEXP (x, 0))
8380 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8381 {
8382 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8383 return true;
8384 }
8385
8386 subcode = GET_CODE (XEXP (x, 1));
8387 if (subcode == ASHIFT || subcode == ASHIFTRT
8388 || subcode == LSHIFTRT
8389 || subcode == ROTATE || subcode == ROTATERT)
8390 {
8391 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8392 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8393 return true;
8394 }
8395
8396 /* A shift as a part of RSB costs no more than RSB itself. */
8397 if (GET_CODE (XEXP (x, 0)) == MULT
8398 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8399 {
8400 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8401 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8402 return true;
8403 }
8404
8405 if (subcode == MULT
8406 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8407 {
8408 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8409 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8410 return true;
8411 }
8412
8413 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8414 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8415 {
8416 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8417 if (REG_P (XEXP (XEXP (x, 1), 0))
8418 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8419 *total += COSTS_N_INSNS (1);
8420
8421 return true;
8422 }
8423
8424 /* Fall through */
8425
8426 case PLUS:
8427 if (code == PLUS && arm_arch6 && mode == SImode
8428 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8429 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8430 {
8431 *total = COSTS_N_INSNS (1);
8432 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8433 0, speed);
8434 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8435 return true;
8436 }
8437
8438 /* MLA: All arguments must be registers. We filter out
8439 multiplication by a power of two, so that we fall down into
8440 the code below. */
8441 if (GET_CODE (XEXP (x, 0)) == MULT
8442 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8443 {
8444 /* The cost comes from the cost of the multiply. */
8445 return false;
8446 }
8447
8448 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8449 {
8450 if (TARGET_HARD_FLOAT
8451 && (mode == SFmode
8452 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8453 {
8454 *total = COSTS_N_INSNS (1);
8455 if (CONST_DOUBLE_P (XEXP (x, 1))
8456 && arm_const_double_rtx (XEXP (x, 1)))
8457 {
8458 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8459 return true;
8460 }
8461
8462 return false;
8463 }
8464
8465 *total = COSTS_N_INSNS (20);
8466 return false;
8467 }
8468
8469 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8470 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8471 {
8472 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8473 if (REG_P (XEXP (XEXP (x, 0), 0))
8474 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8475 *total += COSTS_N_INSNS (1);
8476 return true;
8477 }
8478
8479 /* Fall through */
8480
8481 case AND: case XOR: case IOR:
8482
8483 /* Normally the frame registers will be spilt into reg+const during
8484 reload, so it is a bad idea to combine them with other instructions,
8485 since then they might not be moved outside of loops. As a compromise
8486 we allow integration with ops that have a constant as their second
8487 operand. */
8488 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8489 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8490 && !CONST_INT_P (XEXP (x, 1)))
8491 *total = COSTS_N_INSNS (1);
8492
8493 if (mode == DImode)
8494 {
8495 *total += COSTS_N_INSNS (2);
8496 if (CONST_INT_P (XEXP (x, 1))
8497 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8498 {
8499 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8500 return true;
8501 }
8502
8503 return false;
8504 }
8505
8506 *total += COSTS_N_INSNS (1);
8507 if (CONST_INT_P (XEXP (x, 1))
8508 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8509 {
8510 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8511 return true;
8512 }
8513 subcode = GET_CODE (XEXP (x, 0));
8514 if (subcode == ASHIFT || subcode == ASHIFTRT
8515 || subcode == LSHIFTRT
8516 || subcode == ROTATE || subcode == ROTATERT)
8517 {
8518 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8519 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8520 return true;
8521 }
8522
8523 if (subcode == MULT
8524 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8525 {
8526 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8527 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8528 return true;
8529 }
8530
8531 if (subcode == UMIN || subcode == UMAX
8532 || subcode == SMIN || subcode == SMAX)
8533 {
8534 *total = COSTS_N_INSNS (3);
8535 return true;
8536 }
8537
8538 return false;
8539
8540 case MULT:
8541 /* This should have been handled by the CPU specific routines. */
8542 gcc_unreachable ();
8543
8544 case TRUNCATE:
8545 if (arm_arch3m && mode == SImode
8546 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8547 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8548 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8549 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8550 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8551 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8552 {
8553 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8554 return true;
8555 }
8556 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8557 return false;
8558
8559 case NEG:
8560 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8561 {
8562 if (TARGET_HARD_FLOAT
8563 && (mode == SFmode
8564 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8565 {
8566 *total = COSTS_N_INSNS (1);
8567 return false;
8568 }
8569 *total = COSTS_N_INSNS (2);
8570 return false;
8571 }
8572
8573 /* Fall through */
8574 case NOT:
8575 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8576 if (mode == SImode && code == NOT)
8577 {
8578 subcode = GET_CODE (XEXP (x, 0));
8579 if (subcode == ASHIFT || subcode == ASHIFTRT
8580 || subcode == LSHIFTRT
8581 || subcode == ROTATE || subcode == ROTATERT
8582 || (subcode == MULT
8583 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8584 {
8585 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8586 /* Register shifts cost an extra cycle. */
8587 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8588 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8589 subcode, 1, speed);
8590 return true;
8591 }
8592 }
8593
8594 return false;
8595
8596 case IF_THEN_ELSE:
8597 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8598 {
8599 *total = COSTS_N_INSNS (4);
8600 return true;
8601 }
8602
8603 operand = XEXP (x, 0);
8604
8605 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8606 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8607 && REG_P (XEXP (operand, 0))
8608 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8609 *total += COSTS_N_INSNS (1);
8610 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8611 + rtx_cost (XEXP (x, 2), code, 2, speed));
8612 return true;
8613
8614 case NE:
8615 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8616 {
8617 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8618 return true;
8619 }
8620 goto scc_insn;
8621
8622 case GE:
8623 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8624 && mode == SImode && XEXP (x, 1) == const0_rtx)
8625 {
8626 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8627 return true;
8628 }
8629 goto scc_insn;
8630
8631 case LT:
8632 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8633 && mode == SImode && XEXP (x, 1) == const0_rtx)
8634 {
8635 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8636 return true;
8637 }
8638 goto scc_insn;
8639
8640 case EQ:
8641 case GT:
8642 case LE:
8643 case GEU:
8644 case LTU:
8645 case GTU:
8646 case LEU:
8647 case UNORDERED:
8648 case ORDERED:
8649 case UNEQ:
8650 case UNGE:
8651 case UNLT:
8652 case UNGT:
8653 case UNLE:
8654 scc_insn:
8655 /* SCC insns. In the case where the comparison has already been
8656 performed, then they cost 2 instructions. Otherwise they need
8657 an additional comparison before them. */
8658 *total = COSTS_N_INSNS (2);
8659 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8660 {
8661 return true;
8662 }
8663
8664 /* Fall through */
8665 case COMPARE:
8666 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8667 {
8668 *total = 0;
8669 return true;
8670 }
8671
8672 *total += COSTS_N_INSNS (1);
8673 if (CONST_INT_P (XEXP (x, 1))
8674 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8675 {
8676 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8677 return true;
8678 }
8679
8680 subcode = GET_CODE (XEXP (x, 0));
8681 if (subcode == ASHIFT || subcode == ASHIFTRT
8682 || subcode == LSHIFTRT
8683 || subcode == ROTATE || subcode == ROTATERT)
8684 {
8685 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8686 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8687 return true;
8688 }
8689
8690 if (subcode == MULT
8691 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8692 {
8693 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8694 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8695 return true;
8696 }
8697
8698 return false;
8699
8700 case UMIN:
8701 case UMAX:
8702 case SMIN:
8703 case SMAX:
8704 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8705 if (!CONST_INT_P (XEXP (x, 1))
8706 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8707 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8708 return true;
8709
8710 case ABS:
8711 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8712 {
8713 if (TARGET_HARD_FLOAT
8714 && (mode == SFmode
8715 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8716 {
8717 *total = COSTS_N_INSNS (1);
8718 return false;
8719 }
8720 *total = COSTS_N_INSNS (20);
8721 return false;
8722 }
8723 *total = COSTS_N_INSNS (1);
8724 if (mode == DImode)
8725 *total += COSTS_N_INSNS (3);
8726 return false;
8727
8728 case SIGN_EXTEND:
8729 case ZERO_EXTEND:
8730 *total = 0;
8731 if (GET_MODE_CLASS (mode) == MODE_INT)
8732 {
8733 rtx op = XEXP (x, 0);
8734 machine_mode opmode = GET_MODE (op);
8735
8736 if (mode == DImode)
8737 *total += COSTS_N_INSNS (1);
8738
8739 if (opmode != SImode)
8740 {
8741 if (MEM_P (op))
8742 {
8743 /* If !arm_arch4, we use one of the extendhisi2_mem
8744 or movhi_bytes patterns for HImode. For a QImode
8745 sign extension, we first zero-extend from memory
8746 and then perform a shift sequence. */
8747 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8748 *total += COSTS_N_INSNS (2);
8749 }
8750 else if (arm_arch6)
8751 *total += COSTS_N_INSNS (1);
8752
8753 /* We don't have the necessary insn, so we need to perform some
8754 other operation. */
8755 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8756 /* An and with constant 255. */
8757 *total += COSTS_N_INSNS (1);
8758 else
8759 /* A shift sequence. Increase costs slightly to avoid
8760 combining two shifts into an extend operation. */
8761 *total += COSTS_N_INSNS (2) + 1;
8762 }
8763
8764 return false;
8765 }
8766
8767 switch (GET_MODE (XEXP (x, 0)))
8768 {
8769 case V8QImode:
8770 case V4HImode:
8771 case V2SImode:
8772 case V4QImode:
8773 case V2HImode:
8774 *total = COSTS_N_INSNS (1);
8775 return false;
8776
8777 default:
8778 gcc_unreachable ();
8779 }
8780 gcc_unreachable ();
8781
8782 case ZERO_EXTRACT:
8783 case SIGN_EXTRACT:
8784 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8785 return true;
8786
8787 case CONST_INT:
8788 if (const_ok_for_arm (INTVAL (x))
8789 || const_ok_for_arm (~INTVAL (x)))
8790 *total = COSTS_N_INSNS (1);
8791 else
8792 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8793 INTVAL (x), NULL_RTX,
8794 NULL_RTX, 0, 0));
8795 return true;
8796
8797 case CONST:
8798 case LABEL_REF:
8799 case SYMBOL_REF:
8800 *total = COSTS_N_INSNS (3);
8801 return true;
8802
8803 case HIGH:
8804 *total = COSTS_N_INSNS (1);
8805 return true;
8806
8807 case LO_SUM:
8808 *total = COSTS_N_INSNS (1);
8809 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8810 return true;
8811
8812 case CONST_DOUBLE:
8813 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8814 && (mode == SFmode || !TARGET_VFP_SINGLE))
8815 *total = COSTS_N_INSNS (1);
8816 else
8817 *total = COSTS_N_INSNS (4);
8818 return true;
8819
8820 case SET:
8821 /* The vec_extract patterns accept memory operands that require an
8822 address reload. Account for the cost of that reload to give the
8823 auto-inc-dec pass an incentive to try to replace them. */
8824 if (TARGET_NEON && MEM_P (SET_DEST (x))
8825 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8826 {
8827 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8828 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8829 *total += COSTS_N_INSNS (1);
8830 return true;
8831 }
8832 /* Likewise for the vec_set patterns. */
8833 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8834 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8835 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8836 {
8837 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8838 *total = rtx_cost (mem, code, 0, speed);
8839 if (!neon_vector_mem_operand (mem, 2, true))
8840 *total += COSTS_N_INSNS (1);
8841 return true;
8842 }
8843 return false;
8844
8845 case UNSPEC:
8846 /* We cost this as high as our memory costs to allow this to
8847 be hoisted from loops. */
8848 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8849 {
8850 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8851 }
8852 return true;
8853
8854 case CONST_VECTOR:
8855 if (TARGET_NEON
8856 && TARGET_HARD_FLOAT
8857 && outer == SET
8858 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8859 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8860 *total = COSTS_N_INSNS (1);
8861 else
8862 *total = COSTS_N_INSNS (4);
8863 return true;
8864
8865 default:
8866 *total = COSTS_N_INSNS (4);
8867 return false;
8868 }
8869 }
8870
8871 /* Estimates the size cost of thumb1 instructions.
8872 For now most of the code is copied from thumb1_rtx_costs. We need more
8873 fine grain tuning when we have more related test cases. */
8874 static inline int
8875 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8876 {
8877 machine_mode mode = GET_MODE (x);
8878 int words;
8879
8880 switch (code)
8881 {
8882 case ASHIFT:
8883 case ASHIFTRT:
8884 case LSHIFTRT:
8885 case ROTATERT:
8886 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8887
8888 case PLUS:
8889 case MINUS:
8890 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8891 defined by RTL expansion, especially for the expansion of
8892 multiplication. */
8893 if ((GET_CODE (XEXP (x, 0)) == MULT
8894 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8895 || (GET_CODE (XEXP (x, 1)) == MULT
8896 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8897 return COSTS_N_INSNS (2);
8898 /* On purpose fall through for normal RTX. */
8899 case COMPARE:
8900 case NEG:
8901 case NOT:
8902 return COSTS_N_INSNS (1);
8903
8904 case MULT:
8905 if (CONST_INT_P (XEXP (x, 1)))
8906 {
8907 /* Thumb1 mul instruction can't operate on const. We must Load it
8908 into a register first. */
8909 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8910 /* For the targets which have a very small and high-latency multiply
8911 unit, we prefer to synthesize the mult with up to 5 instructions,
8912 giving a good balance between size and performance. */
8913 if (arm_arch6m && arm_m_profile_small_mul)
8914 return COSTS_N_INSNS (5);
8915 else
8916 return COSTS_N_INSNS (1) + const_size;
8917 }
8918 return COSTS_N_INSNS (1);
8919
8920 case SET:
8921 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8922 the mode. */
8923 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8924 return COSTS_N_INSNS (words)
8925 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8926 || satisfies_constraint_K (SET_SRC (x))
8927 /* thumb1_movdi_insn. */
8928 || ((words > 1) && MEM_P (SET_SRC (x))));
8929
8930 case CONST_INT:
8931 if (outer == SET)
8932 {
8933 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8934 return COSTS_N_INSNS (1);
8935 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8936 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8937 return COSTS_N_INSNS (2);
8938 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8939 if (thumb_shiftable_const (INTVAL (x)))
8940 return COSTS_N_INSNS (2);
8941 return COSTS_N_INSNS (3);
8942 }
8943 else if ((outer == PLUS || outer == COMPARE)
8944 && INTVAL (x) < 256 && INTVAL (x) > -256)
8945 return 0;
8946 else if ((outer == IOR || outer == XOR || outer == AND)
8947 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8948 return COSTS_N_INSNS (1);
8949 else if (outer == AND)
8950 {
8951 int i;
8952 /* This duplicates the tests in the andsi3 expander. */
8953 for (i = 9; i <= 31; i++)
8954 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8955 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8956 return COSTS_N_INSNS (2);
8957 }
8958 else if (outer == ASHIFT || outer == ASHIFTRT
8959 || outer == LSHIFTRT)
8960 return 0;
8961 return COSTS_N_INSNS (2);
8962
8963 case CONST:
8964 case CONST_DOUBLE:
8965 case LABEL_REF:
8966 case SYMBOL_REF:
8967 return COSTS_N_INSNS (3);
8968
8969 case UDIV:
8970 case UMOD:
8971 case DIV:
8972 case MOD:
8973 return 100;
8974
8975 case TRUNCATE:
8976 return 99;
8977
8978 case AND:
8979 case XOR:
8980 case IOR:
8981 return COSTS_N_INSNS (1);
8982
8983 case MEM:
8984 return (COSTS_N_INSNS (1)
8985 + COSTS_N_INSNS (1)
8986 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8987 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8988 ? COSTS_N_INSNS (1) : 0));
8989
8990 case IF_THEN_ELSE:
8991 /* XXX a guess. */
8992 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8993 return 14;
8994 return 2;
8995
8996 case ZERO_EXTEND:
8997 /* XXX still guessing. */
8998 switch (GET_MODE (XEXP (x, 0)))
8999 {
9000 case QImode:
9001 return (1 + (mode == DImode ? 4 : 0)
9002 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9003
9004 case HImode:
9005 return (4 + (mode == DImode ? 4 : 0)
9006 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9007
9008 case SImode:
9009 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9010
9011 default:
9012 return 99;
9013 }
9014
9015 default:
9016 return 99;
9017 }
9018 }
9019
9020 /* RTX costs when optimizing for size. */
9021 static bool
9022 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9023 int *total)
9024 {
9025 machine_mode mode = GET_MODE (x);
9026 if (TARGET_THUMB1)
9027 {
9028 *total = thumb1_size_rtx_costs (x, code, outer_code);
9029 return true;
9030 }
9031
9032 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9033 switch (code)
9034 {
9035 case MEM:
9036 /* A memory access costs 1 insn if the mode is small, or the address is
9037 a single register, otherwise it costs one insn per word. */
9038 if (REG_P (XEXP (x, 0)))
9039 *total = COSTS_N_INSNS (1);
9040 else if (flag_pic
9041 && GET_CODE (XEXP (x, 0)) == PLUS
9042 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9043 /* This will be split into two instructions.
9044 See arm.md:calculate_pic_address. */
9045 *total = COSTS_N_INSNS (2);
9046 else
9047 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9048 return true;
9049
9050 case DIV:
9051 case MOD:
9052 case UDIV:
9053 case UMOD:
9054 /* Needs a libcall, so it costs about this. */
9055 *total = COSTS_N_INSNS (2);
9056 return false;
9057
9058 case ROTATE:
9059 if (mode == SImode && REG_P (XEXP (x, 1)))
9060 {
9061 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9062 return true;
9063 }
9064 /* Fall through */
9065 case ROTATERT:
9066 case ASHIFT:
9067 case LSHIFTRT:
9068 case ASHIFTRT:
9069 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9070 {
9071 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9072 return true;
9073 }
9074 else if (mode == SImode)
9075 {
9076 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9077 /* Slightly disparage register shifts, but not by much. */
9078 if (!CONST_INT_P (XEXP (x, 1)))
9079 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9080 return true;
9081 }
9082
9083 /* Needs a libcall. */
9084 *total = COSTS_N_INSNS (2);
9085 return false;
9086
9087 case MINUS:
9088 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9089 && (mode == SFmode || !TARGET_VFP_SINGLE))
9090 {
9091 *total = COSTS_N_INSNS (1);
9092 return false;
9093 }
9094
9095 if (mode == SImode)
9096 {
9097 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9098 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9099
9100 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9101 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9102 || subcode1 == ROTATE || subcode1 == ROTATERT
9103 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9104 || subcode1 == ASHIFTRT)
9105 {
9106 /* It's just the cost of the two operands. */
9107 *total = 0;
9108 return false;
9109 }
9110
9111 *total = COSTS_N_INSNS (1);
9112 return false;
9113 }
9114
9115 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9116 return false;
9117
9118 case PLUS:
9119 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9120 && (mode == SFmode || !TARGET_VFP_SINGLE))
9121 {
9122 *total = COSTS_N_INSNS (1);
9123 return false;
9124 }
9125
9126 /* A shift as a part of ADD costs nothing. */
9127 if (GET_CODE (XEXP (x, 0)) == MULT
9128 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9129 {
9130 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9131 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9132 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9133 return true;
9134 }
9135
9136 /* Fall through */
9137 case AND: case XOR: case IOR:
9138 if (mode == SImode)
9139 {
9140 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9141
9142 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9143 || subcode == LSHIFTRT || subcode == ASHIFTRT
9144 || (code == AND && subcode == NOT))
9145 {
9146 /* It's just the cost of the two operands. */
9147 *total = 0;
9148 return false;
9149 }
9150 }
9151
9152 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9153 return false;
9154
9155 case MULT:
9156 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9157 return false;
9158
9159 case NEG:
9160 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9161 && (mode == SFmode || !TARGET_VFP_SINGLE))
9162 {
9163 *total = COSTS_N_INSNS (1);
9164 return false;
9165 }
9166
9167 /* Fall through */
9168 case NOT:
9169 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9170
9171 return false;
9172
9173 case IF_THEN_ELSE:
9174 *total = 0;
9175 return false;
9176
9177 case COMPARE:
9178 if (cc_register (XEXP (x, 0), VOIDmode))
9179 * total = 0;
9180 else
9181 *total = COSTS_N_INSNS (1);
9182 return false;
9183
9184 case ABS:
9185 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9186 && (mode == SFmode || !TARGET_VFP_SINGLE))
9187 *total = COSTS_N_INSNS (1);
9188 else
9189 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9190 return false;
9191
9192 case SIGN_EXTEND:
9193 case ZERO_EXTEND:
9194 return arm_rtx_costs_1 (x, outer_code, total, 0);
9195
9196 case CONST_INT:
9197 if (const_ok_for_arm (INTVAL (x)))
9198 /* A multiplication by a constant requires another instruction
9199 to load the constant to a register. */
9200 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9201 ? 1 : 0);
9202 else if (const_ok_for_arm (~INTVAL (x)))
9203 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9204 else if (const_ok_for_arm (-INTVAL (x)))
9205 {
9206 if (outer_code == COMPARE || outer_code == PLUS
9207 || outer_code == MINUS)
9208 *total = 0;
9209 else
9210 *total = COSTS_N_INSNS (1);
9211 }
9212 else
9213 *total = COSTS_N_INSNS (2);
9214 return true;
9215
9216 case CONST:
9217 case LABEL_REF:
9218 case SYMBOL_REF:
9219 *total = COSTS_N_INSNS (2);
9220 return true;
9221
9222 case CONST_DOUBLE:
9223 *total = COSTS_N_INSNS (4);
9224 return true;
9225
9226 case CONST_VECTOR:
9227 if (TARGET_NEON
9228 && TARGET_HARD_FLOAT
9229 && outer_code == SET
9230 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9231 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9232 *total = COSTS_N_INSNS (1);
9233 else
9234 *total = COSTS_N_INSNS (4);
9235 return true;
9236
9237 case HIGH:
9238 case LO_SUM:
9239 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9240 cost of these slightly. */
9241 *total = COSTS_N_INSNS (1) + 1;
9242 return true;
9243
9244 case SET:
9245 return false;
9246
9247 default:
9248 if (mode != VOIDmode)
9249 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9250 else
9251 *total = COSTS_N_INSNS (4); /* How knows? */
9252 return false;
9253 }
9254 }
9255
9256 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9257 operand, then return the operand that is being shifted. If the shift
9258 is not by a constant, then set SHIFT_REG to point to the operand.
9259 Return NULL if OP is not a shifter operand. */
9260 static rtx
9261 shifter_op_p (rtx op, rtx *shift_reg)
9262 {
9263 enum rtx_code code = GET_CODE (op);
9264
9265 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9266 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9267 return XEXP (op, 0);
9268 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9269 return XEXP (op, 0);
9270 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9271 || code == ASHIFTRT)
9272 {
9273 if (!CONST_INT_P (XEXP (op, 1)))
9274 *shift_reg = XEXP (op, 1);
9275 return XEXP (op, 0);
9276 }
9277
9278 return NULL;
9279 }
9280
9281 static bool
9282 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9283 {
9284 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9285 gcc_assert (GET_CODE (x) == UNSPEC);
9286
9287 switch (XINT (x, 1))
9288 {
9289 case UNSPEC_UNALIGNED_LOAD:
9290 /* We can only do unaligned loads into the integer unit, and we can't
9291 use LDM or LDRD. */
9292 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9293 if (speed_p)
9294 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9295 + extra_cost->ldst.load_unaligned);
9296
9297 #ifdef NOT_YET
9298 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9299 ADDR_SPACE_GENERIC, speed_p);
9300 #endif
9301 return true;
9302
9303 case UNSPEC_UNALIGNED_STORE:
9304 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9305 if (speed_p)
9306 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9307 + extra_cost->ldst.store_unaligned);
9308
9309 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9310 #ifdef NOT_YET
9311 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9312 ADDR_SPACE_GENERIC, speed_p);
9313 #endif
9314 return true;
9315
9316 case UNSPEC_VRINTZ:
9317 case UNSPEC_VRINTP:
9318 case UNSPEC_VRINTM:
9319 case UNSPEC_VRINTR:
9320 case UNSPEC_VRINTX:
9321 case UNSPEC_VRINTA:
9322 *cost = COSTS_N_INSNS (1);
9323 if (speed_p)
9324 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9325
9326 return true;
9327 default:
9328 *cost = COSTS_N_INSNS (2);
9329 break;
9330 }
9331 return false;
9332 }
9333
9334 /* Cost of a libcall. We assume one insn per argument, an amount for the
9335 call (one insn for -Os) and then one for processing the result. */
9336 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9337
9338 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9339 do \
9340 { \
9341 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9342 if (shift_op != NULL \
9343 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9344 { \
9345 if (shift_reg) \
9346 { \
9347 if (speed_p) \
9348 *cost += extra_cost->alu.arith_shift_reg; \
9349 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9350 } \
9351 else if (speed_p) \
9352 *cost += extra_cost->alu.arith_shift; \
9353 \
9354 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9355 + rtx_cost (XEXP (x, 1 - IDX), \
9356 OP, 1, speed_p)); \
9357 return true; \
9358 } \
9359 } \
9360 while (0);
9361
9362 /* RTX costs. Make an estimate of the cost of executing the operation
9363 X, which is contained with an operation with code OUTER_CODE.
9364 SPEED_P indicates whether the cost desired is the performance cost,
9365 or the size cost. The estimate is stored in COST and the return
9366 value is TRUE if the cost calculation is final, or FALSE if the
9367 caller should recurse through the operands of X to add additional
9368 costs.
9369
9370 We currently make no attempt to model the size savings of Thumb-2
9371 16-bit instructions. At the normal points in compilation where
9372 this code is called we have no measure of whether the condition
9373 flags are live or not, and thus no realistic way to determine what
9374 the size will eventually be. */
9375 static bool
9376 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9377 const struct cpu_cost_table *extra_cost,
9378 int *cost, bool speed_p)
9379 {
9380 machine_mode mode = GET_MODE (x);
9381
9382 if (TARGET_THUMB1)
9383 {
9384 if (speed_p)
9385 *cost = thumb1_rtx_costs (x, code, outer_code);
9386 else
9387 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9388 return true;
9389 }
9390
9391 switch (code)
9392 {
9393 case SET:
9394 *cost = 0;
9395 /* SET RTXs don't have a mode so we get it from the destination. */
9396 mode = GET_MODE (SET_DEST (x));
9397
9398 if (REG_P (SET_SRC (x))
9399 && REG_P (SET_DEST (x)))
9400 {
9401 /* Assume that most copies can be done with a single insn,
9402 unless we don't have HW FP, in which case everything
9403 larger than word mode will require two insns. */
9404 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9405 && GET_MODE_SIZE (mode) > 4)
9406 || mode == DImode)
9407 ? 2 : 1);
9408 /* Conditional register moves can be encoded
9409 in 16 bits in Thumb mode. */
9410 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9411 *cost >>= 1;
9412
9413 return true;
9414 }
9415
9416 if (CONST_INT_P (SET_SRC (x)))
9417 {
9418 /* Handle CONST_INT here, since the value doesn't have a mode
9419 and we would otherwise be unable to work out the true cost. */
9420 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9421 outer_code = SET;
9422 /* Slightly lower the cost of setting a core reg to a constant.
9423 This helps break up chains and allows for better scheduling. */
9424 if (REG_P (SET_DEST (x))
9425 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9426 *cost -= 1;
9427 x = SET_SRC (x);
9428 /* Immediate moves with an immediate in the range [0, 255] can be
9429 encoded in 16 bits in Thumb mode. */
9430 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9431 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9432 *cost >>= 1;
9433 goto const_int_cost;
9434 }
9435
9436 return false;
9437
9438 case MEM:
9439 /* A memory access costs 1 insn if the mode is small, or the address is
9440 a single register, otherwise it costs one insn per word. */
9441 if (REG_P (XEXP (x, 0)))
9442 *cost = COSTS_N_INSNS (1);
9443 else if (flag_pic
9444 && GET_CODE (XEXP (x, 0)) == PLUS
9445 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9446 /* This will be split into two instructions.
9447 See arm.md:calculate_pic_address. */
9448 *cost = COSTS_N_INSNS (2);
9449 else
9450 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9451
9452 /* For speed optimizations, add the costs of the address and
9453 accessing memory. */
9454 if (speed_p)
9455 #ifdef NOT_YET
9456 *cost += (extra_cost->ldst.load
9457 + arm_address_cost (XEXP (x, 0), mode,
9458 ADDR_SPACE_GENERIC, speed_p));
9459 #else
9460 *cost += extra_cost->ldst.load;
9461 #endif
9462 return true;
9463
9464 case PARALLEL:
9465 {
9466 /* Calculations of LDM costs are complex. We assume an initial cost
9467 (ldm_1st) which will load the number of registers mentioned in
9468 ldm_regs_per_insn_1st registers; then each additional
9469 ldm_regs_per_insn_subsequent registers cost one more insn. The
9470 formula for N regs is thus:
9471
9472 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9473 + ldm_regs_per_insn_subsequent - 1)
9474 / ldm_regs_per_insn_subsequent).
9475
9476 Additional costs may also be added for addressing. A similar
9477 formula is used for STM. */
9478
9479 bool is_ldm = load_multiple_operation (x, SImode);
9480 bool is_stm = store_multiple_operation (x, SImode);
9481
9482 *cost = COSTS_N_INSNS (1);
9483
9484 if (is_ldm || is_stm)
9485 {
9486 if (speed_p)
9487 {
9488 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9489 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9490 ? extra_cost->ldst.ldm_regs_per_insn_1st
9491 : extra_cost->ldst.stm_regs_per_insn_1st;
9492 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9493 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9494 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9495
9496 *cost += regs_per_insn_1st
9497 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9498 + regs_per_insn_sub - 1)
9499 / regs_per_insn_sub);
9500 return true;
9501 }
9502
9503 }
9504 return false;
9505 }
9506 case DIV:
9507 case UDIV:
9508 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9509 && (mode == SFmode || !TARGET_VFP_SINGLE))
9510 *cost = COSTS_N_INSNS (speed_p
9511 ? extra_cost->fp[mode != SFmode].div : 1);
9512 else if (mode == SImode && TARGET_IDIV)
9513 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9514 else
9515 *cost = LIBCALL_COST (2);
9516 return false; /* All arguments must be in registers. */
9517
9518 case MOD:
9519 case UMOD:
9520 *cost = LIBCALL_COST (2);
9521 return false; /* All arguments must be in registers. */
9522
9523 case ROTATE:
9524 if (mode == SImode && REG_P (XEXP (x, 1)))
9525 {
9526 *cost = (COSTS_N_INSNS (2)
9527 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9528 if (speed_p)
9529 *cost += extra_cost->alu.shift_reg;
9530 return true;
9531 }
9532 /* Fall through */
9533 case ROTATERT:
9534 case ASHIFT:
9535 case LSHIFTRT:
9536 case ASHIFTRT:
9537 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9538 {
9539 *cost = (COSTS_N_INSNS (3)
9540 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9541 if (speed_p)
9542 *cost += 2 * extra_cost->alu.shift;
9543 return true;
9544 }
9545 else if (mode == SImode)
9546 {
9547 *cost = (COSTS_N_INSNS (1)
9548 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9549 /* Slightly disparage register shifts at -Os, but not by much. */
9550 if (!CONST_INT_P (XEXP (x, 1)))
9551 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9552 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9553 return true;
9554 }
9555 else if (GET_MODE_CLASS (mode) == MODE_INT
9556 && GET_MODE_SIZE (mode) < 4)
9557 {
9558 if (code == ASHIFT)
9559 {
9560 *cost = (COSTS_N_INSNS (1)
9561 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9562 /* Slightly disparage register shifts at -Os, but not by
9563 much. */
9564 if (!CONST_INT_P (XEXP (x, 1)))
9565 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9566 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9567 }
9568 else if (code == LSHIFTRT || code == ASHIFTRT)
9569 {
9570 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9571 {
9572 /* Can use SBFX/UBFX. */
9573 *cost = COSTS_N_INSNS (1);
9574 if (speed_p)
9575 *cost += extra_cost->alu.bfx;
9576 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9577 }
9578 else
9579 {
9580 *cost = COSTS_N_INSNS (2);
9581 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9582 if (speed_p)
9583 {
9584 if (CONST_INT_P (XEXP (x, 1)))
9585 *cost += 2 * extra_cost->alu.shift;
9586 else
9587 *cost += (extra_cost->alu.shift
9588 + extra_cost->alu.shift_reg);
9589 }
9590 else
9591 /* Slightly disparage register shifts. */
9592 *cost += !CONST_INT_P (XEXP (x, 1));
9593 }
9594 }
9595 else /* Rotates. */
9596 {
9597 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9598 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9599 if (speed_p)
9600 {
9601 if (CONST_INT_P (XEXP (x, 1)))
9602 *cost += (2 * extra_cost->alu.shift
9603 + extra_cost->alu.log_shift);
9604 else
9605 *cost += (extra_cost->alu.shift
9606 + extra_cost->alu.shift_reg
9607 + extra_cost->alu.log_shift_reg);
9608 }
9609 }
9610 return true;
9611 }
9612
9613 *cost = LIBCALL_COST (2);
9614 return false;
9615
9616 case BSWAP:
9617 if (arm_arch6)
9618 {
9619 if (mode == SImode)
9620 {
9621 *cost = COSTS_N_INSNS (1);
9622 if (speed_p)
9623 *cost += extra_cost->alu.rev;
9624
9625 return false;
9626 }
9627 }
9628 else
9629 {
9630 /* No rev instruction available. Look at arm_legacy_rev
9631 and thumb_legacy_rev for the form of RTL used then. */
9632 if (TARGET_THUMB)
9633 {
9634 *cost = COSTS_N_INSNS (10);
9635
9636 if (speed_p)
9637 {
9638 *cost += 6 * extra_cost->alu.shift;
9639 *cost += 3 * extra_cost->alu.logical;
9640 }
9641 }
9642 else
9643 {
9644 *cost = COSTS_N_INSNS (5);
9645
9646 if (speed_p)
9647 {
9648 *cost += 2 * extra_cost->alu.shift;
9649 *cost += extra_cost->alu.arith_shift;
9650 *cost += 2 * extra_cost->alu.logical;
9651 }
9652 }
9653 return true;
9654 }
9655 return false;
9656
9657 case MINUS:
9658 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9659 && (mode == SFmode || !TARGET_VFP_SINGLE))
9660 {
9661 *cost = COSTS_N_INSNS (1);
9662 if (GET_CODE (XEXP (x, 0)) == MULT
9663 || GET_CODE (XEXP (x, 1)) == MULT)
9664 {
9665 rtx mul_op0, mul_op1, sub_op;
9666
9667 if (speed_p)
9668 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9669
9670 if (GET_CODE (XEXP (x, 0)) == MULT)
9671 {
9672 mul_op0 = XEXP (XEXP (x, 0), 0);
9673 mul_op1 = XEXP (XEXP (x, 0), 1);
9674 sub_op = XEXP (x, 1);
9675 }
9676 else
9677 {
9678 mul_op0 = XEXP (XEXP (x, 1), 0);
9679 mul_op1 = XEXP (XEXP (x, 1), 1);
9680 sub_op = XEXP (x, 0);
9681 }
9682
9683 /* The first operand of the multiply may be optionally
9684 negated. */
9685 if (GET_CODE (mul_op0) == NEG)
9686 mul_op0 = XEXP (mul_op0, 0);
9687
9688 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9689 + rtx_cost (mul_op1, code, 0, speed_p)
9690 + rtx_cost (sub_op, code, 0, speed_p));
9691
9692 return true;
9693 }
9694
9695 if (speed_p)
9696 *cost += extra_cost->fp[mode != SFmode].addsub;
9697 return false;
9698 }
9699
9700 if (mode == SImode)
9701 {
9702 rtx shift_by_reg = NULL;
9703 rtx shift_op;
9704 rtx non_shift_op;
9705
9706 *cost = COSTS_N_INSNS (1);
9707
9708 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9709 if (shift_op == NULL)
9710 {
9711 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9712 non_shift_op = XEXP (x, 0);
9713 }
9714 else
9715 non_shift_op = XEXP (x, 1);
9716
9717 if (shift_op != NULL)
9718 {
9719 if (shift_by_reg != NULL)
9720 {
9721 if (speed_p)
9722 *cost += extra_cost->alu.arith_shift_reg;
9723 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9724 }
9725 else if (speed_p)
9726 *cost += extra_cost->alu.arith_shift;
9727
9728 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9729 + rtx_cost (non_shift_op, code, 0, speed_p));
9730 return true;
9731 }
9732
9733 if (arm_arch_thumb2
9734 && GET_CODE (XEXP (x, 1)) == MULT)
9735 {
9736 /* MLS. */
9737 if (speed_p)
9738 *cost += extra_cost->mult[0].add;
9739 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9740 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9741 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9742 return true;
9743 }
9744
9745 if (CONST_INT_P (XEXP (x, 0)))
9746 {
9747 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9748 INTVAL (XEXP (x, 0)), NULL_RTX,
9749 NULL_RTX, 1, 0);
9750 *cost = COSTS_N_INSNS (insns);
9751 if (speed_p)
9752 *cost += insns * extra_cost->alu.arith;
9753 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9754 return true;
9755 }
9756 else if (speed_p)
9757 *cost += extra_cost->alu.arith;
9758
9759 return false;
9760 }
9761
9762 if (GET_MODE_CLASS (mode) == MODE_INT
9763 && GET_MODE_SIZE (mode) < 4)
9764 {
9765 rtx shift_op, shift_reg;
9766 shift_reg = NULL;
9767
9768 /* We check both sides of the MINUS for shifter operands since,
9769 unlike PLUS, it's not commutative. */
9770
9771 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9772 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9773
9774 /* Slightly disparage, as we might need to widen the result. */
9775 *cost = 1 + COSTS_N_INSNS (1);
9776 if (speed_p)
9777 *cost += extra_cost->alu.arith;
9778
9779 if (CONST_INT_P (XEXP (x, 0)))
9780 {
9781 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9782 return true;
9783 }
9784
9785 return false;
9786 }
9787
9788 if (mode == DImode)
9789 {
9790 *cost = COSTS_N_INSNS (2);
9791
9792 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9793 {
9794 rtx op1 = XEXP (x, 1);
9795
9796 if (speed_p)
9797 *cost += 2 * extra_cost->alu.arith;
9798
9799 if (GET_CODE (op1) == ZERO_EXTEND)
9800 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9801 else
9802 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9803 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9804 0, speed_p);
9805 return true;
9806 }
9807 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9808 {
9809 if (speed_p)
9810 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9811 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9812 0, speed_p)
9813 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9814 return true;
9815 }
9816 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9817 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9818 {
9819 if (speed_p)
9820 *cost += (extra_cost->alu.arith
9821 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9822 ? extra_cost->alu.arith
9823 : extra_cost->alu.arith_shift));
9824 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9825 + rtx_cost (XEXP (XEXP (x, 1), 0),
9826 GET_CODE (XEXP (x, 1)), 0, speed_p));
9827 return true;
9828 }
9829
9830 if (speed_p)
9831 *cost += 2 * extra_cost->alu.arith;
9832 return false;
9833 }
9834
9835 /* Vector mode? */
9836
9837 *cost = LIBCALL_COST (2);
9838 return false;
9839
9840 case PLUS:
9841 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9842 && (mode == SFmode || !TARGET_VFP_SINGLE))
9843 {
9844 *cost = COSTS_N_INSNS (1);
9845 if (GET_CODE (XEXP (x, 0)) == MULT)
9846 {
9847 rtx mul_op0, mul_op1, add_op;
9848
9849 if (speed_p)
9850 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9851
9852 mul_op0 = XEXP (XEXP (x, 0), 0);
9853 mul_op1 = XEXP (XEXP (x, 0), 1);
9854 add_op = XEXP (x, 1);
9855
9856 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9857 + rtx_cost (mul_op1, code, 0, speed_p)
9858 + rtx_cost (add_op, code, 0, speed_p));
9859
9860 return true;
9861 }
9862
9863 if (speed_p)
9864 *cost += extra_cost->fp[mode != SFmode].addsub;
9865 return false;
9866 }
9867 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9868 {
9869 *cost = LIBCALL_COST (2);
9870 return false;
9871 }
9872
9873 /* Narrow modes can be synthesized in SImode, but the range
9874 of useful sub-operations is limited. Check for shift operations
9875 on one of the operands. Only left shifts can be used in the
9876 narrow modes. */
9877 if (GET_MODE_CLASS (mode) == MODE_INT
9878 && GET_MODE_SIZE (mode) < 4)
9879 {
9880 rtx shift_op, shift_reg;
9881 shift_reg = NULL;
9882
9883 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9884
9885 if (CONST_INT_P (XEXP (x, 1)))
9886 {
9887 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9888 INTVAL (XEXP (x, 1)), NULL_RTX,
9889 NULL_RTX, 1, 0);
9890 *cost = COSTS_N_INSNS (insns);
9891 if (speed_p)
9892 *cost += insns * extra_cost->alu.arith;
9893 /* Slightly penalize a narrow operation as the result may
9894 need widening. */
9895 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9896 return true;
9897 }
9898
9899 /* Slightly penalize a narrow operation as the result may
9900 need widening. */
9901 *cost = 1 + COSTS_N_INSNS (1);
9902 if (speed_p)
9903 *cost += extra_cost->alu.arith;
9904
9905 return false;
9906 }
9907
9908 if (mode == SImode)
9909 {
9910 rtx shift_op, shift_reg;
9911
9912 *cost = COSTS_N_INSNS (1);
9913 if (TARGET_INT_SIMD
9914 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9915 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9916 {
9917 /* UXTA[BH] or SXTA[BH]. */
9918 if (speed_p)
9919 *cost += extra_cost->alu.extend_arith;
9920 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9921 speed_p)
9922 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9923 return true;
9924 }
9925
9926 shift_reg = NULL;
9927 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9928 if (shift_op != NULL)
9929 {
9930 if (shift_reg)
9931 {
9932 if (speed_p)
9933 *cost += extra_cost->alu.arith_shift_reg;
9934 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9935 }
9936 else if (speed_p)
9937 *cost += extra_cost->alu.arith_shift;
9938
9939 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9940 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9941 return true;
9942 }
9943 if (GET_CODE (XEXP (x, 0)) == MULT)
9944 {
9945 rtx mul_op = XEXP (x, 0);
9946
9947 *cost = COSTS_N_INSNS (1);
9948
9949 if (TARGET_DSP_MULTIPLY
9950 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9951 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9952 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9953 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9954 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9955 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9956 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9957 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9958 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9959 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9960 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9961 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9962 == 16))))))
9963 {
9964 /* SMLA[BT][BT]. */
9965 if (speed_p)
9966 *cost += extra_cost->mult[0].extend_add;
9967 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9968 SIGN_EXTEND, 0, speed_p)
9969 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9970 SIGN_EXTEND, 0, speed_p)
9971 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9972 return true;
9973 }
9974
9975 if (speed_p)
9976 *cost += extra_cost->mult[0].add;
9977 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9978 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9979 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9980 return true;
9981 }
9982 if (CONST_INT_P (XEXP (x, 1)))
9983 {
9984 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9985 INTVAL (XEXP (x, 1)), NULL_RTX,
9986 NULL_RTX, 1, 0);
9987 *cost = COSTS_N_INSNS (insns);
9988 if (speed_p)
9989 *cost += insns * extra_cost->alu.arith;
9990 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9991 return true;
9992 }
9993 else if (speed_p)
9994 *cost += extra_cost->alu.arith;
9995
9996 return false;
9997 }
9998
9999 if (mode == DImode)
10000 {
10001 if (arm_arch3m
10002 && GET_CODE (XEXP (x, 0)) == MULT
10003 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10004 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10005 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10006 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10007 {
10008 *cost = COSTS_N_INSNS (1);
10009 if (speed_p)
10010 *cost += extra_cost->mult[1].extend_add;
10011 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10012 ZERO_EXTEND, 0, speed_p)
10013 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10014 ZERO_EXTEND, 0, speed_p)
10015 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10016 return true;
10017 }
10018
10019 *cost = COSTS_N_INSNS (2);
10020
10021 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10022 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10023 {
10024 if (speed_p)
10025 *cost += (extra_cost->alu.arith
10026 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10027 ? extra_cost->alu.arith
10028 : extra_cost->alu.arith_shift));
10029
10030 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10031 speed_p)
10032 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10033 return true;
10034 }
10035
10036 if (speed_p)
10037 *cost += 2 * extra_cost->alu.arith;
10038 return false;
10039 }
10040
10041 /* Vector mode? */
10042 *cost = LIBCALL_COST (2);
10043 return false;
10044 case IOR:
10045 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10046 {
10047 *cost = COSTS_N_INSNS (1);
10048 if (speed_p)
10049 *cost += extra_cost->alu.rev;
10050
10051 return true;
10052 }
10053 /* Fall through. */
10054 case AND: case XOR:
10055 if (mode == SImode)
10056 {
10057 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10058 rtx op0 = XEXP (x, 0);
10059 rtx shift_op, shift_reg;
10060
10061 *cost = COSTS_N_INSNS (1);
10062
10063 if (subcode == NOT
10064 && (code == AND
10065 || (code == IOR && TARGET_THUMB2)))
10066 op0 = XEXP (op0, 0);
10067
10068 shift_reg = NULL;
10069 shift_op = shifter_op_p (op0, &shift_reg);
10070 if (shift_op != NULL)
10071 {
10072 if (shift_reg)
10073 {
10074 if (speed_p)
10075 *cost += extra_cost->alu.log_shift_reg;
10076 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10077 }
10078 else if (speed_p)
10079 *cost += extra_cost->alu.log_shift;
10080
10081 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10082 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10083 return true;
10084 }
10085
10086 if (CONST_INT_P (XEXP (x, 1)))
10087 {
10088 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10089 INTVAL (XEXP (x, 1)), NULL_RTX,
10090 NULL_RTX, 1, 0);
10091
10092 *cost = COSTS_N_INSNS (insns);
10093 if (speed_p)
10094 *cost += insns * extra_cost->alu.logical;
10095 *cost += rtx_cost (op0, code, 0, speed_p);
10096 return true;
10097 }
10098
10099 if (speed_p)
10100 *cost += extra_cost->alu.logical;
10101 *cost += (rtx_cost (op0, code, 0, speed_p)
10102 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10103 return true;
10104 }
10105
10106 if (mode == DImode)
10107 {
10108 rtx op0 = XEXP (x, 0);
10109 enum rtx_code subcode = GET_CODE (op0);
10110
10111 *cost = COSTS_N_INSNS (2);
10112
10113 if (subcode == NOT
10114 && (code == AND
10115 || (code == IOR && TARGET_THUMB2)))
10116 op0 = XEXP (op0, 0);
10117
10118 if (GET_CODE (op0) == ZERO_EXTEND)
10119 {
10120 if (speed_p)
10121 *cost += 2 * extra_cost->alu.logical;
10122
10123 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10124 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10125 return true;
10126 }
10127 else if (GET_CODE (op0) == SIGN_EXTEND)
10128 {
10129 if (speed_p)
10130 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10131
10132 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10133 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10134 return true;
10135 }
10136
10137 if (speed_p)
10138 *cost += 2 * extra_cost->alu.logical;
10139
10140 return true;
10141 }
10142 /* Vector mode? */
10143
10144 *cost = LIBCALL_COST (2);
10145 return false;
10146
10147 case MULT:
10148 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10149 && (mode == SFmode || !TARGET_VFP_SINGLE))
10150 {
10151 rtx op0 = XEXP (x, 0);
10152
10153 *cost = COSTS_N_INSNS (1);
10154
10155 if (GET_CODE (op0) == NEG)
10156 op0 = XEXP (op0, 0);
10157
10158 if (speed_p)
10159 *cost += extra_cost->fp[mode != SFmode].mult;
10160
10161 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10162 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10163 return true;
10164 }
10165 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10166 {
10167 *cost = LIBCALL_COST (2);
10168 return false;
10169 }
10170
10171 if (mode == SImode)
10172 {
10173 *cost = COSTS_N_INSNS (1);
10174 if (TARGET_DSP_MULTIPLY
10175 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10176 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10177 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10178 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10179 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10180 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10181 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10182 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10183 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10184 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10185 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10186 && (INTVAL (XEXP (XEXP (x, 1), 1))
10187 == 16))))))
10188 {
10189 /* SMUL[TB][TB]. */
10190 if (speed_p)
10191 *cost += extra_cost->mult[0].extend;
10192 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10193 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10194 return true;
10195 }
10196 if (speed_p)
10197 *cost += extra_cost->mult[0].simple;
10198 return false;
10199 }
10200
10201 if (mode == DImode)
10202 {
10203 if (arm_arch3m
10204 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10205 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10206 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10207 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10208 {
10209 *cost = COSTS_N_INSNS (1);
10210 if (speed_p)
10211 *cost += extra_cost->mult[1].extend;
10212 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10213 ZERO_EXTEND, 0, speed_p)
10214 + rtx_cost (XEXP (XEXP (x, 1), 0),
10215 ZERO_EXTEND, 0, speed_p));
10216 return true;
10217 }
10218
10219 *cost = LIBCALL_COST (2);
10220 return false;
10221 }
10222
10223 /* Vector mode? */
10224 *cost = LIBCALL_COST (2);
10225 return false;
10226
10227 case NEG:
10228 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10229 && (mode == SFmode || !TARGET_VFP_SINGLE))
10230 {
10231 *cost = COSTS_N_INSNS (1);
10232 if (speed_p)
10233 *cost += extra_cost->fp[mode != SFmode].neg;
10234
10235 return false;
10236 }
10237 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10238 {
10239 *cost = LIBCALL_COST (1);
10240 return false;
10241 }
10242
10243 if (mode == SImode)
10244 {
10245 if (GET_CODE (XEXP (x, 0)) == ABS)
10246 {
10247 *cost = COSTS_N_INSNS (2);
10248 /* Assume the non-flag-changing variant. */
10249 if (speed_p)
10250 *cost += (extra_cost->alu.log_shift
10251 + extra_cost->alu.arith_shift);
10252 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10253 return true;
10254 }
10255
10256 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10257 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10258 {
10259 *cost = COSTS_N_INSNS (2);
10260 /* No extra cost for MOV imm and MVN imm. */
10261 /* If the comparison op is using the flags, there's no further
10262 cost, otherwise we need to add the cost of the comparison. */
10263 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10264 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10265 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10266 {
10267 *cost += (COSTS_N_INSNS (1)
10268 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10269 speed_p)
10270 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10271 speed_p));
10272 if (speed_p)
10273 *cost += extra_cost->alu.arith;
10274 }
10275 return true;
10276 }
10277 *cost = COSTS_N_INSNS (1);
10278 if (speed_p)
10279 *cost += extra_cost->alu.arith;
10280 return false;
10281 }
10282
10283 if (GET_MODE_CLASS (mode) == MODE_INT
10284 && GET_MODE_SIZE (mode) < 4)
10285 {
10286 /* Slightly disparage, as we might need an extend operation. */
10287 *cost = 1 + COSTS_N_INSNS (1);
10288 if (speed_p)
10289 *cost += extra_cost->alu.arith;
10290 return false;
10291 }
10292
10293 if (mode == DImode)
10294 {
10295 *cost = COSTS_N_INSNS (2);
10296 if (speed_p)
10297 *cost += 2 * extra_cost->alu.arith;
10298 return false;
10299 }
10300
10301 /* Vector mode? */
10302 *cost = LIBCALL_COST (1);
10303 return false;
10304
10305 case NOT:
10306 if (mode == SImode)
10307 {
10308 rtx shift_op;
10309 rtx shift_reg = NULL;
10310
10311 *cost = COSTS_N_INSNS (1);
10312 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10313
10314 if (shift_op)
10315 {
10316 if (shift_reg != NULL)
10317 {
10318 if (speed_p)
10319 *cost += extra_cost->alu.log_shift_reg;
10320 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10321 }
10322 else if (speed_p)
10323 *cost += extra_cost->alu.log_shift;
10324 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10325 return true;
10326 }
10327
10328 if (speed_p)
10329 *cost += extra_cost->alu.logical;
10330 return false;
10331 }
10332 if (mode == DImode)
10333 {
10334 *cost = COSTS_N_INSNS (2);
10335 return false;
10336 }
10337
10338 /* Vector mode? */
10339
10340 *cost += LIBCALL_COST (1);
10341 return false;
10342
10343 case IF_THEN_ELSE:
10344 {
10345 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10346 {
10347 *cost = COSTS_N_INSNS (4);
10348 return true;
10349 }
10350 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10351 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10352
10353 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10354 /* Assume that if one arm of the if_then_else is a register,
10355 that it will be tied with the result and eliminate the
10356 conditional insn. */
10357 if (REG_P (XEXP (x, 1)))
10358 *cost += op2cost;
10359 else if (REG_P (XEXP (x, 2)))
10360 *cost += op1cost;
10361 else
10362 {
10363 if (speed_p)
10364 {
10365 if (extra_cost->alu.non_exec_costs_exec)
10366 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10367 else
10368 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10369 }
10370 else
10371 *cost += op1cost + op2cost;
10372 }
10373 }
10374 return true;
10375
10376 case COMPARE:
10377 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10378 *cost = 0;
10379 else
10380 {
10381 machine_mode op0mode;
10382 /* We'll mostly assume that the cost of a compare is the cost of the
10383 LHS. However, there are some notable exceptions. */
10384
10385 /* Floating point compares are never done as side-effects. */
10386 op0mode = GET_MODE (XEXP (x, 0));
10387 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10388 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10389 {
10390 *cost = COSTS_N_INSNS (1);
10391 if (speed_p)
10392 *cost += extra_cost->fp[op0mode != SFmode].compare;
10393
10394 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10395 {
10396 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10397 return true;
10398 }
10399
10400 return false;
10401 }
10402 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10403 {
10404 *cost = LIBCALL_COST (2);
10405 return false;
10406 }
10407
10408 /* DImode compares normally take two insns. */
10409 if (op0mode == DImode)
10410 {
10411 *cost = COSTS_N_INSNS (2);
10412 if (speed_p)
10413 *cost += 2 * extra_cost->alu.arith;
10414 return false;
10415 }
10416
10417 if (op0mode == SImode)
10418 {
10419 rtx shift_op;
10420 rtx shift_reg;
10421
10422 if (XEXP (x, 1) == const0_rtx
10423 && !(REG_P (XEXP (x, 0))
10424 || (GET_CODE (XEXP (x, 0)) == SUBREG
10425 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10426 {
10427 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10428
10429 /* Multiply operations that set the flags are often
10430 significantly more expensive. */
10431 if (speed_p
10432 && GET_CODE (XEXP (x, 0)) == MULT
10433 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10434 *cost += extra_cost->mult[0].flag_setting;
10435
10436 if (speed_p
10437 && GET_CODE (XEXP (x, 0)) == PLUS
10438 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10439 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10440 0), 1), mode))
10441 *cost += extra_cost->mult[0].flag_setting;
10442 return true;
10443 }
10444
10445 shift_reg = NULL;
10446 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10447 if (shift_op != NULL)
10448 {
10449 *cost = COSTS_N_INSNS (1);
10450 if (shift_reg != NULL)
10451 {
10452 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10453 if (speed_p)
10454 *cost += extra_cost->alu.arith_shift_reg;
10455 }
10456 else if (speed_p)
10457 *cost += extra_cost->alu.arith_shift;
10458 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10459 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10460 return true;
10461 }
10462
10463 *cost = COSTS_N_INSNS (1);
10464 if (speed_p)
10465 *cost += extra_cost->alu.arith;
10466 if (CONST_INT_P (XEXP (x, 1))
10467 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10468 {
10469 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10470 return true;
10471 }
10472 return false;
10473 }
10474
10475 /* Vector mode? */
10476
10477 *cost = LIBCALL_COST (2);
10478 return false;
10479 }
10480 return true;
10481
10482 case EQ:
10483 case NE:
10484 case LT:
10485 case LE:
10486 case GT:
10487 case GE:
10488 case LTU:
10489 case LEU:
10490 case GEU:
10491 case GTU:
10492 case ORDERED:
10493 case UNORDERED:
10494 case UNEQ:
10495 case UNLE:
10496 case UNLT:
10497 case UNGE:
10498 case UNGT:
10499 case LTGT:
10500 if (outer_code == SET)
10501 {
10502 /* Is it a store-flag operation? */
10503 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10504 && XEXP (x, 1) == const0_rtx)
10505 {
10506 /* Thumb also needs an IT insn. */
10507 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10508 return true;
10509 }
10510 if (XEXP (x, 1) == const0_rtx)
10511 {
10512 switch (code)
10513 {
10514 case LT:
10515 /* LSR Rd, Rn, #31. */
10516 *cost = COSTS_N_INSNS (1);
10517 if (speed_p)
10518 *cost += extra_cost->alu.shift;
10519 break;
10520
10521 case EQ:
10522 /* RSBS T1, Rn, #0
10523 ADC Rd, Rn, T1. */
10524
10525 case NE:
10526 /* SUBS T1, Rn, #1
10527 SBC Rd, Rn, T1. */
10528 *cost = COSTS_N_INSNS (2);
10529 break;
10530
10531 case LE:
10532 /* RSBS T1, Rn, Rn, LSR #31
10533 ADC Rd, Rn, T1. */
10534 *cost = COSTS_N_INSNS (2);
10535 if (speed_p)
10536 *cost += extra_cost->alu.arith_shift;
10537 break;
10538
10539 case GT:
10540 /* RSB Rd, Rn, Rn, ASR #1
10541 LSR Rd, Rd, #31. */
10542 *cost = COSTS_N_INSNS (2);
10543 if (speed_p)
10544 *cost += (extra_cost->alu.arith_shift
10545 + extra_cost->alu.shift);
10546 break;
10547
10548 case GE:
10549 /* ASR Rd, Rn, #31
10550 ADD Rd, Rn, #1. */
10551 *cost = COSTS_N_INSNS (2);
10552 if (speed_p)
10553 *cost += extra_cost->alu.shift;
10554 break;
10555
10556 default:
10557 /* Remaining cases are either meaningless or would take
10558 three insns anyway. */
10559 *cost = COSTS_N_INSNS (3);
10560 break;
10561 }
10562 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10563 return true;
10564 }
10565 else
10566 {
10567 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10568 if (CONST_INT_P (XEXP (x, 1))
10569 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10570 {
10571 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10572 return true;
10573 }
10574
10575 return false;
10576 }
10577 }
10578 /* Not directly inside a set. If it involves the condition code
10579 register it must be the condition for a branch, cond_exec or
10580 I_T_E operation. Since the comparison is performed elsewhere
10581 this is just the control part which has no additional
10582 cost. */
10583 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10584 && XEXP (x, 1) == const0_rtx)
10585 {
10586 *cost = 0;
10587 return true;
10588 }
10589 return false;
10590
10591 case ABS:
10592 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10593 && (mode == SFmode || !TARGET_VFP_SINGLE))
10594 {
10595 *cost = COSTS_N_INSNS (1);
10596 if (speed_p)
10597 *cost += extra_cost->fp[mode != SFmode].neg;
10598
10599 return false;
10600 }
10601 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10602 {
10603 *cost = LIBCALL_COST (1);
10604 return false;
10605 }
10606
10607 if (mode == SImode)
10608 {
10609 *cost = COSTS_N_INSNS (1);
10610 if (speed_p)
10611 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10612 return false;
10613 }
10614 /* Vector mode? */
10615 *cost = LIBCALL_COST (1);
10616 return false;
10617
10618 case SIGN_EXTEND:
10619 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10620 && MEM_P (XEXP (x, 0)))
10621 {
10622 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10623
10624 if (mode == DImode)
10625 *cost += COSTS_N_INSNS (1);
10626
10627 if (!speed_p)
10628 return true;
10629
10630 if (GET_MODE (XEXP (x, 0)) == SImode)
10631 *cost += extra_cost->ldst.load;
10632 else
10633 *cost += extra_cost->ldst.load_sign_extend;
10634
10635 if (mode == DImode)
10636 *cost += extra_cost->alu.shift;
10637
10638 return true;
10639 }
10640
10641 /* Widening from less than 32-bits requires an extend operation. */
10642 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10643 {
10644 /* We have SXTB/SXTH. */
10645 *cost = COSTS_N_INSNS (1);
10646 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10647 if (speed_p)
10648 *cost += extra_cost->alu.extend;
10649 }
10650 else if (GET_MODE (XEXP (x, 0)) != SImode)
10651 {
10652 /* Needs two shifts. */
10653 *cost = COSTS_N_INSNS (2);
10654 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10655 if (speed_p)
10656 *cost += 2 * extra_cost->alu.shift;
10657 }
10658
10659 /* Widening beyond 32-bits requires one more insn. */
10660 if (mode == DImode)
10661 {
10662 *cost += COSTS_N_INSNS (1);
10663 if (speed_p)
10664 *cost += extra_cost->alu.shift;
10665 }
10666
10667 return true;
10668
10669 case ZERO_EXTEND:
10670 if ((arm_arch4
10671 || GET_MODE (XEXP (x, 0)) == SImode
10672 || GET_MODE (XEXP (x, 0)) == QImode)
10673 && MEM_P (XEXP (x, 0)))
10674 {
10675 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10676
10677 if (mode == DImode)
10678 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10679
10680 return true;
10681 }
10682
10683 /* Widening from less than 32-bits requires an extend operation. */
10684 if (GET_MODE (XEXP (x, 0)) == QImode)
10685 {
10686 /* UXTB can be a shorter instruction in Thumb2, but it might
10687 be slower than the AND Rd, Rn, #255 alternative. When
10688 optimizing for speed it should never be slower to use
10689 AND, and we don't really model 16-bit vs 32-bit insns
10690 here. */
10691 *cost = COSTS_N_INSNS (1);
10692 if (speed_p)
10693 *cost += extra_cost->alu.logical;
10694 }
10695 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10696 {
10697 /* We have UXTB/UXTH. */
10698 *cost = COSTS_N_INSNS (1);
10699 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10700 if (speed_p)
10701 *cost += extra_cost->alu.extend;
10702 }
10703 else if (GET_MODE (XEXP (x, 0)) != SImode)
10704 {
10705 /* Needs two shifts. It's marginally preferable to use
10706 shifts rather than two BIC instructions as the second
10707 shift may merge with a subsequent insn as a shifter
10708 op. */
10709 *cost = COSTS_N_INSNS (2);
10710 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10711 if (speed_p)
10712 *cost += 2 * extra_cost->alu.shift;
10713 }
10714 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10715 *cost = COSTS_N_INSNS (1);
10716
10717 /* Widening beyond 32-bits requires one more insn. */
10718 if (mode == DImode)
10719 {
10720 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10721 }
10722
10723 return true;
10724
10725 case CONST_INT:
10726 *cost = 0;
10727 /* CONST_INT has no mode, so we cannot tell for sure how many
10728 insns are really going to be needed. The best we can do is
10729 look at the value passed. If it fits in SImode, then assume
10730 that's the mode it will be used for. Otherwise assume it
10731 will be used in DImode. */
10732 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10733 mode = SImode;
10734 else
10735 mode = DImode;
10736
10737 /* Avoid blowing up in arm_gen_constant (). */
10738 if (!(outer_code == PLUS
10739 || outer_code == AND
10740 || outer_code == IOR
10741 || outer_code == XOR
10742 || outer_code == MINUS))
10743 outer_code = SET;
10744
10745 const_int_cost:
10746 if (mode == SImode)
10747 {
10748 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10749 INTVAL (x), NULL, NULL,
10750 0, 0));
10751 /* Extra costs? */
10752 }
10753 else
10754 {
10755 *cost += COSTS_N_INSNS (arm_gen_constant
10756 (outer_code, SImode, NULL,
10757 trunc_int_for_mode (INTVAL (x), SImode),
10758 NULL, NULL, 0, 0)
10759 + arm_gen_constant (outer_code, SImode, NULL,
10760 INTVAL (x) >> 32, NULL,
10761 NULL, 0, 0));
10762 /* Extra costs? */
10763 }
10764
10765 return true;
10766
10767 case CONST:
10768 case LABEL_REF:
10769 case SYMBOL_REF:
10770 if (speed_p)
10771 {
10772 if (arm_arch_thumb2 && !flag_pic)
10773 *cost = COSTS_N_INSNS (2);
10774 else
10775 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10776 }
10777 else
10778 *cost = COSTS_N_INSNS (2);
10779
10780 if (flag_pic)
10781 {
10782 *cost += COSTS_N_INSNS (1);
10783 if (speed_p)
10784 *cost += extra_cost->alu.arith;
10785 }
10786
10787 return true;
10788
10789 case CONST_FIXED:
10790 *cost = COSTS_N_INSNS (4);
10791 /* Fixme. */
10792 return true;
10793
10794 case CONST_DOUBLE:
10795 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10796 && (mode == SFmode || !TARGET_VFP_SINGLE))
10797 {
10798 if (vfp3_const_double_rtx (x))
10799 {
10800 *cost = COSTS_N_INSNS (1);
10801 if (speed_p)
10802 *cost += extra_cost->fp[mode == DFmode].fpconst;
10803 return true;
10804 }
10805
10806 if (speed_p)
10807 {
10808 *cost = COSTS_N_INSNS (1);
10809 if (mode == DFmode)
10810 *cost += extra_cost->ldst.loadd;
10811 else
10812 *cost += extra_cost->ldst.loadf;
10813 }
10814 else
10815 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10816
10817 return true;
10818 }
10819 *cost = COSTS_N_INSNS (4);
10820 return true;
10821
10822 case CONST_VECTOR:
10823 /* Fixme. */
10824 if (TARGET_NEON
10825 && TARGET_HARD_FLOAT
10826 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10827 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10828 *cost = COSTS_N_INSNS (1);
10829 else
10830 *cost = COSTS_N_INSNS (4);
10831 return true;
10832
10833 case HIGH:
10834 case LO_SUM:
10835 *cost = COSTS_N_INSNS (1);
10836 /* When optimizing for size, we prefer constant pool entries to
10837 MOVW/MOVT pairs, so bump the cost of these slightly. */
10838 if (!speed_p)
10839 *cost += 1;
10840 return true;
10841
10842 case CLZ:
10843 *cost = COSTS_N_INSNS (1);
10844 if (speed_p)
10845 *cost += extra_cost->alu.clz;
10846 return false;
10847
10848 case SMIN:
10849 if (XEXP (x, 1) == const0_rtx)
10850 {
10851 *cost = COSTS_N_INSNS (1);
10852 if (speed_p)
10853 *cost += extra_cost->alu.log_shift;
10854 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10855 return true;
10856 }
10857 /* Fall through. */
10858 case SMAX:
10859 case UMIN:
10860 case UMAX:
10861 *cost = COSTS_N_INSNS (2);
10862 return false;
10863
10864 case TRUNCATE:
10865 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10866 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10867 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10868 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10869 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10870 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10871 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10872 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10873 == ZERO_EXTEND))))
10874 {
10875 *cost = COSTS_N_INSNS (1);
10876 if (speed_p)
10877 *cost += extra_cost->mult[1].extend;
10878 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10879 speed_p)
10880 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10881 0, speed_p));
10882 return true;
10883 }
10884 *cost = LIBCALL_COST (1);
10885 return false;
10886
10887 case UNSPEC:
10888 return arm_unspec_cost (x, outer_code, speed_p, cost);
10889
10890 case PC:
10891 /* Reading the PC is like reading any other register. Writing it
10892 is more expensive, but we take that into account elsewhere. */
10893 *cost = 0;
10894 return true;
10895
10896 case ZERO_EXTRACT:
10897 /* TODO: Simple zero_extract of bottom bits using AND. */
10898 /* Fall through. */
10899 case SIGN_EXTRACT:
10900 if (arm_arch6
10901 && mode == SImode
10902 && CONST_INT_P (XEXP (x, 1))
10903 && CONST_INT_P (XEXP (x, 2)))
10904 {
10905 *cost = COSTS_N_INSNS (1);
10906 if (speed_p)
10907 *cost += extra_cost->alu.bfx;
10908 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10909 return true;
10910 }
10911 /* Without UBFX/SBFX, need to resort to shift operations. */
10912 *cost = COSTS_N_INSNS (2);
10913 if (speed_p)
10914 *cost += 2 * extra_cost->alu.shift;
10915 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10916 return true;
10917
10918 case FLOAT_EXTEND:
10919 if (TARGET_HARD_FLOAT)
10920 {
10921 *cost = COSTS_N_INSNS (1);
10922 if (speed_p)
10923 *cost += extra_cost->fp[mode == DFmode].widen;
10924 if (!TARGET_FPU_ARMV8
10925 && GET_MODE (XEXP (x, 0)) == HFmode)
10926 {
10927 /* Pre v8, widening HF->DF is a two-step process, first
10928 widening to SFmode. */
10929 *cost += COSTS_N_INSNS (1);
10930 if (speed_p)
10931 *cost += extra_cost->fp[0].widen;
10932 }
10933 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10934 return true;
10935 }
10936
10937 *cost = LIBCALL_COST (1);
10938 return false;
10939
10940 case FLOAT_TRUNCATE:
10941 if (TARGET_HARD_FLOAT)
10942 {
10943 *cost = COSTS_N_INSNS (1);
10944 if (speed_p)
10945 *cost += extra_cost->fp[mode == DFmode].narrow;
10946 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10947 return true;
10948 /* Vector modes? */
10949 }
10950 *cost = LIBCALL_COST (1);
10951 return false;
10952
10953 case FMA:
10954 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10955 {
10956 rtx op0 = XEXP (x, 0);
10957 rtx op1 = XEXP (x, 1);
10958 rtx op2 = XEXP (x, 2);
10959
10960 *cost = COSTS_N_INSNS (1);
10961
10962 /* vfms or vfnma. */
10963 if (GET_CODE (op0) == NEG)
10964 op0 = XEXP (op0, 0);
10965
10966 /* vfnms or vfnma. */
10967 if (GET_CODE (op2) == NEG)
10968 op2 = XEXP (op2, 0);
10969
10970 *cost += rtx_cost (op0, FMA, 0, speed_p);
10971 *cost += rtx_cost (op1, FMA, 1, speed_p);
10972 *cost += rtx_cost (op2, FMA, 2, speed_p);
10973
10974 if (speed_p)
10975 *cost += extra_cost->fp[mode ==DFmode].fma;
10976
10977 return true;
10978 }
10979
10980 *cost = LIBCALL_COST (3);
10981 return false;
10982
10983 case FIX:
10984 case UNSIGNED_FIX:
10985 if (TARGET_HARD_FLOAT)
10986 {
10987 if (GET_MODE_CLASS (mode) == MODE_INT)
10988 {
10989 *cost = COSTS_N_INSNS (1);
10990 if (speed_p)
10991 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10992 /* Strip of the 'cost' of rounding towards zero. */
10993 if (GET_CODE (XEXP (x, 0)) == FIX)
10994 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10995 else
10996 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10997 /* ??? Increase the cost to deal with transferring from
10998 FP -> CORE registers? */
10999 return true;
11000 }
11001 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11002 && TARGET_FPU_ARMV8)
11003 {
11004 *cost = COSTS_N_INSNS (1);
11005 if (speed_p)
11006 *cost += extra_cost->fp[mode == DFmode].roundint;
11007 return false;
11008 }
11009 /* Vector costs? */
11010 }
11011 *cost = LIBCALL_COST (1);
11012 return false;
11013
11014 case FLOAT:
11015 case UNSIGNED_FLOAT:
11016 if (TARGET_HARD_FLOAT)
11017 {
11018 /* ??? Increase the cost to deal with transferring from CORE
11019 -> FP registers? */
11020 *cost = COSTS_N_INSNS (1);
11021 if (speed_p)
11022 *cost += extra_cost->fp[mode == DFmode].fromint;
11023 return false;
11024 }
11025 *cost = LIBCALL_COST (1);
11026 return false;
11027
11028 case CALL:
11029 *cost = COSTS_N_INSNS (1);
11030 return true;
11031
11032 case ASM_OPERANDS:
11033 {
11034 /* Just a guess. Guess number of instructions in the asm
11035 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11036 though (see PR60663). */
11037 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11038 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11039
11040 *cost = COSTS_N_INSNS (asm_length + num_operands);
11041 return true;
11042 }
11043 default:
11044 if (mode != VOIDmode)
11045 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11046 else
11047 *cost = COSTS_N_INSNS (4); /* Who knows? */
11048 return false;
11049 }
11050 }
11051
11052 #undef HANDLE_NARROW_SHIFT_ARITH
11053
11054 /* RTX costs when optimizing for size. */
11055 static bool
11056 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11057 int *total, bool speed)
11058 {
11059 bool result;
11060
11061 if (TARGET_OLD_RTX_COSTS
11062 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11063 {
11064 /* Old way. (Deprecated.) */
11065 if (!speed)
11066 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11067 (enum rtx_code) outer_code, total);
11068 else
11069 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11070 (enum rtx_code) outer_code, total,
11071 speed);
11072 }
11073 else
11074 {
11075 /* New way. */
11076 if (current_tune->insn_extra_cost)
11077 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11078 (enum rtx_code) outer_code,
11079 current_tune->insn_extra_cost,
11080 total, speed);
11081 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11082 && current_tune->insn_extra_cost != NULL */
11083 else
11084 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11085 (enum rtx_code) outer_code,
11086 &generic_extra_costs, total, speed);
11087 }
11088
11089 if (dump_file && (dump_flags & TDF_DETAILS))
11090 {
11091 print_rtl_single (dump_file, x);
11092 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11093 *total, result ? "final" : "partial");
11094 }
11095 return result;
11096 }
11097
11098 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11099 supported on any "slowmul" cores, so it can be ignored. */
11100
11101 static bool
11102 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11103 int *total, bool speed)
11104 {
11105 machine_mode mode = GET_MODE (x);
11106
11107 if (TARGET_THUMB)
11108 {
11109 *total = thumb1_rtx_costs (x, code, outer_code);
11110 return true;
11111 }
11112
11113 switch (code)
11114 {
11115 case MULT:
11116 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11117 || mode == DImode)
11118 {
11119 *total = COSTS_N_INSNS (20);
11120 return false;
11121 }
11122
11123 if (CONST_INT_P (XEXP (x, 1)))
11124 {
11125 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11126 & (unsigned HOST_WIDE_INT) 0xffffffff);
11127 int cost, const_ok = const_ok_for_arm (i);
11128 int j, booth_unit_size;
11129
11130 /* Tune as appropriate. */
11131 cost = const_ok ? 4 : 8;
11132 booth_unit_size = 2;
11133 for (j = 0; i && j < 32; j += booth_unit_size)
11134 {
11135 i >>= booth_unit_size;
11136 cost++;
11137 }
11138
11139 *total = COSTS_N_INSNS (cost);
11140 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11141 return true;
11142 }
11143
11144 *total = COSTS_N_INSNS (20);
11145 return false;
11146
11147 default:
11148 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11149 }
11150 }
11151
11152
11153 /* RTX cost for cores with a fast multiply unit (M variants). */
11154
11155 static bool
11156 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11157 int *total, bool speed)
11158 {
11159 machine_mode mode = GET_MODE (x);
11160
11161 if (TARGET_THUMB1)
11162 {
11163 *total = thumb1_rtx_costs (x, code, outer_code);
11164 return true;
11165 }
11166
11167 /* ??? should thumb2 use different costs? */
11168 switch (code)
11169 {
11170 case MULT:
11171 /* There is no point basing this on the tuning, since it is always the
11172 fast variant if it exists at all. */
11173 if (mode == DImode
11174 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11175 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11176 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11177 {
11178 *total = COSTS_N_INSNS(2);
11179 return false;
11180 }
11181
11182
11183 if (mode == DImode)
11184 {
11185 *total = COSTS_N_INSNS (5);
11186 return false;
11187 }
11188
11189 if (CONST_INT_P (XEXP (x, 1)))
11190 {
11191 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11192 & (unsigned HOST_WIDE_INT) 0xffffffff);
11193 int cost, const_ok = const_ok_for_arm (i);
11194 int j, booth_unit_size;
11195
11196 /* Tune as appropriate. */
11197 cost = const_ok ? 4 : 8;
11198 booth_unit_size = 8;
11199 for (j = 0; i && j < 32; j += booth_unit_size)
11200 {
11201 i >>= booth_unit_size;
11202 cost++;
11203 }
11204
11205 *total = COSTS_N_INSNS(cost);
11206 return false;
11207 }
11208
11209 if (mode == SImode)
11210 {
11211 *total = COSTS_N_INSNS (4);
11212 return false;
11213 }
11214
11215 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11216 {
11217 if (TARGET_HARD_FLOAT
11218 && (mode == SFmode
11219 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11220 {
11221 *total = COSTS_N_INSNS (1);
11222 return false;
11223 }
11224 }
11225
11226 /* Requires a lib call */
11227 *total = COSTS_N_INSNS (20);
11228 return false;
11229
11230 default:
11231 return arm_rtx_costs_1 (x, outer_code, total, speed);
11232 }
11233 }
11234
11235
11236 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11237 so it can be ignored. */
11238
11239 static bool
11240 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11241 int *total, bool speed)
11242 {
11243 machine_mode mode = GET_MODE (x);
11244
11245 if (TARGET_THUMB)
11246 {
11247 *total = thumb1_rtx_costs (x, code, outer_code);
11248 return true;
11249 }
11250
11251 switch (code)
11252 {
11253 case COMPARE:
11254 if (GET_CODE (XEXP (x, 0)) != MULT)
11255 return arm_rtx_costs_1 (x, outer_code, total, speed);
11256
11257 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11258 will stall until the multiplication is complete. */
11259 *total = COSTS_N_INSNS (3);
11260 return false;
11261
11262 case MULT:
11263 /* There is no point basing this on the tuning, since it is always the
11264 fast variant if it exists at all. */
11265 if (mode == DImode
11266 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11267 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11268 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11269 {
11270 *total = COSTS_N_INSNS (2);
11271 return false;
11272 }
11273
11274
11275 if (mode == DImode)
11276 {
11277 *total = COSTS_N_INSNS (5);
11278 return false;
11279 }
11280
11281 if (CONST_INT_P (XEXP (x, 1)))
11282 {
11283 /* If operand 1 is a constant we can more accurately
11284 calculate the cost of the multiply. The multiplier can
11285 retire 15 bits on the first cycle and a further 12 on the
11286 second. We do, of course, have to load the constant into
11287 a register first. */
11288 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11289 /* There's a general overhead of one cycle. */
11290 int cost = 1;
11291 unsigned HOST_WIDE_INT masked_const;
11292
11293 if (i & 0x80000000)
11294 i = ~i;
11295
11296 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11297
11298 masked_const = i & 0xffff8000;
11299 if (masked_const != 0)
11300 {
11301 cost++;
11302 masked_const = i & 0xf8000000;
11303 if (masked_const != 0)
11304 cost++;
11305 }
11306 *total = COSTS_N_INSNS (cost);
11307 return false;
11308 }
11309
11310 if (mode == SImode)
11311 {
11312 *total = COSTS_N_INSNS (3);
11313 return false;
11314 }
11315
11316 /* Requires a lib call */
11317 *total = COSTS_N_INSNS (20);
11318 return false;
11319
11320 default:
11321 return arm_rtx_costs_1 (x, outer_code, total, speed);
11322 }
11323 }
11324
11325
11326 /* RTX costs for 9e (and later) cores. */
11327
11328 static bool
11329 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11330 int *total, bool speed)
11331 {
11332 machine_mode mode = GET_MODE (x);
11333
11334 if (TARGET_THUMB1)
11335 {
11336 switch (code)
11337 {
11338 case MULT:
11339 /* Small multiply: 32 cycles for an integer multiply inst. */
11340 if (arm_arch6m && arm_m_profile_small_mul)
11341 *total = COSTS_N_INSNS (32);
11342 else
11343 *total = COSTS_N_INSNS (3);
11344 return true;
11345
11346 default:
11347 *total = thumb1_rtx_costs (x, code, outer_code);
11348 return true;
11349 }
11350 }
11351
11352 switch (code)
11353 {
11354 case MULT:
11355 /* There is no point basing this on the tuning, since it is always the
11356 fast variant if it exists at all. */
11357 if (mode == DImode
11358 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11359 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11360 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11361 {
11362 *total = COSTS_N_INSNS (2);
11363 return false;
11364 }
11365
11366
11367 if (mode == DImode)
11368 {
11369 *total = COSTS_N_INSNS (5);
11370 return false;
11371 }
11372
11373 if (mode == SImode)
11374 {
11375 *total = COSTS_N_INSNS (2);
11376 return false;
11377 }
11378
11379 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11380 {
11381 if (TARGET_HARD_FLOAT
11382 && (mode == SFmode
11383 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11384 {
11385 *total = COSTS_N_INSNS (1);
11386 return false;
11387 }
11388 }
11389
11390 *total = COSTS_N_INSNS (20);
11391 return false;
11392
11393 default:
11394 return arm_rtx_costs_1 (x, outer_code, total, speed);
11395 }
11396 }
11397 /* All address computations that can be done are free, but rtx cost returns
11398 the same for practically all of them. So we weight the different types
11399 of address here in the order (most pref first):
11400 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11401 static inline int
11402 arm_arm_address_cost (rtx x)
11403 {
11404 enum rtx_code c = GET_CODE (x);
11405
11406 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11407 return 0;
11408 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11409 return 10;
11410
11411 if (c == PLUS)
11412 {
11413 if (CONST_INT_P (XEXP (x, 1)))
11414 return 2;
11415
11416 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11417 return 3;
11418
11419 return 4;
11420 }
11421
11422 return 6;
11423 }
11424
11425 static inline int
11426 arm_thumb_address_cost (rtx x)
11427 {
11428 enum rtx_code c = GET_CODE (x);
11429
11430 if (c == REG)
11431 return 1;
11432 if (c == PLUS
11433 && REG_P (XEXP (x, 0))
11434 && CONST_INT_P (XEXP (x, 1)))
11435 return 1;
11436
11437 return 2;
11438 }
11439
11440 static int
11441 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11442 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11443 {
11444 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11445 }
11446
11447 /* Adjust cost hook for XScale. */
11448 static bool
11449 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11450 {
11451 /* Some true dependencies can have a higher cost depending
11452 on precisely how certain input operands are used. */
11453 if (REG_NOTE_KIND(link) == 0
11454 && recog_memoized (insn) >= 0
11455 && recog_memoized (dep) >= 0)
11456 {
11457 int shift_opnum = get_attr_shift (insn);
11458 enum attr_type attr_type = get_attr_type (dep);
11459
11460 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11461 operand for INSN. If we have a shifted input operand and the
11462 instruction we depend on is another ALU instruction, then we may
11463 have to account for an additional stall. */
11464 if (shift_opnum != 0
11465 && (attr_type == TYPE_ALU_SHIFT_IMM
11466 || attr_type == TYPE_ALUS_SHIFT_IMM
11467 || attr_type == TYPE_LOGIC_SHIFT_IMM
11468 || attr_type == TYPE_LOGICS_SHIFT_IMM
11469 || attr_type == TYPE_ALU_SHIFT_REG
11470 || attr_type == TYPE_ALUS_SHIFT_REG
11471 || attr_type == TYPE_LOGIC_SHIFT_REG
11472 || attr_type == TYPE_LOGICS_SHIFT_REG
11473 || attr_type == TYPE_MOV_SHIFT
11474 || attr_type == TYPE_MVN_SHIFT
11475 || attr_type == TYPE_MOV_SHIFT_REG
11476 || attr_type == TYPE_MVN_SHIFT_REG))
11477 {
11478 rtx shifted_operand;
11479 int opno;
11480
11481 /* Get the shifted operand. */
11482 extract_insn (insn);
11483 shifted_operand = recog_data.operand[shift_opnum];
11484
11485 /* Iterate over all the operands in DEP. If we write an operand
11486 that overlaps with SHIFTED_OPERAND, then we have increase the
11487 cost of this dependency. */
11488 extract_insn (dep);
11489 preprocess_constraints (dep);
11490 for (opno = 0; opno < recog_data.n_operands; opno++)
11491 {
11492 /* We can ignore strict inputs. */
11493 if (recog_data.operand_type[opno] == OP_IN)
11494 continue;
11495
11496 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11497 shifted_operand))
11498 {
11499 *cost = 2;
11500 return false;
11501 }
11502 }
11503 }
11504 }
11505 return true;
11506 }
11507
11508 /* Adjust cost hook for Cortex A9. */
11509 static bool
11510 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11511 {
11512 switch (REG_NOTE_KIND (link))
11513 {
11514 case REG_DEP_ANTI:
11515 *cost = 0;
11516 return false;
11517
11518 case REG_DEP_TRUE:
11519 case REG_DEP_OUTPUT:
11520 if (recog_memoized (insn) >= 0
11521 && recog_memoized (dep) >= 0)
11522 {
11523 if (GET_CODE (PATTERN (insn)) == SET)
11524 {
11525 if (GET_MODE_CLASS
11526 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11527 || GET_MODE_CLASS
11528 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11529 {
11530 enum attr_type attr_type_insn = get_attr_type (insn);
11531 enum attr_type attr_type_dep = get_attr_type (dep);
11532
11533 /* By default all dependencies of the form
11534 s0 = s0 <op> s1
11535 s0 = s0 <op> s2
11536 have an extra latency of 1 cycle because
11537 of the input and output dependency in this
11538 case. However this gets modeled as an true
11539 dependency and hence all these checks. */
11540 if (REG_P (SET_DEST (PATTERN (insn)))
11541 && REG_P (SET_DEST (PATTERN (dep)))
11542 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11543 SET_DEST (PATTERN (dep))))
11544 {
11545 /* FMACS is a special case where the dependent
11546 instruction can be issued 3 cycles before
11547 the normal latency in case of an output
11548 dependency. */
11549 if ((attr_type_insn == TYPE_FMACS
11550 || attr_type_insn == TYPE_FMACD)
11551 && (attr_type_dep == TYPE_FMACS
11552 || attr_type_dep == TYPE_FMACD))
11553 {
11554 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11555 *cost = insn_default_latency (dep) - 3;
11556 else
11557 *cost = insn_default_latency (dep);
11558 return false;
11559 }
11560 else
11561 {
11562 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11563 *cost = insn_default_latency (dep) + 1;
11564 else
11565 *cost = insn_default_latency (dep);
11566 }
11567 return false;
11568 }
11569 }
11570 }
11571 }
11572 break;
11573
11574 default:
11575 gcc_unreachable ();
11576 }
11577
11578 return true;
11579 }
11580
11581 /* Adjust cost hook for FA726TE. */
11582 static bool
11583 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11584 {
11585 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11586 have penalty of 3. */
11587 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11588 && recog_memoized (insn) >= 0
11589 && recog_memoized (dep) >= 0
11590 && get_attr_conds (dep) == CONDS_SET)
11591 {
11592 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11593 if (get_attr_conds (insn) == CONDS_USE
11594 && get_attr_type (insn) != TYPE_BRANCH)
11595 {
11596 *cost = 3;
11597 return false;
11598 }
11599
11600 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11601 || get_attr_conds (insn) == CONDS_USE)
11602 {
11603 *cost = 0;
11604 return false;
11605 }
11606 }
11607
11608 return true;
11609 }
11610
11611 /* Implement TARGET_REGISTER_MOVE_COST.
11612
11613 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11614 it is typically more expensive than a single memory access. We set
11615 the cost to less than two memory accesses so that floating
11616 point to integer conversion does not go through memory. */
11617
11618 int
11619 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11620 reg_class_t from, reg_class_t to)
11621 {
11622 if (TARGET_32BIT)
11623 {
11624 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11625 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11626 return 15;
11627 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11628 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11629 return 4;
11630 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11631 return 20;
11632 else
11633 return 2;
11634 }
11635 else
11636 {
11637 if (from == HI_REGS || to == HI_REGS)
11638 return 4;
11639 else
11640 return 2;
11641 }
11642 }
11643
11644 /* Implement TARGET_MEMORY_MOVE_COST. */
11645
11646 int
11647 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11648 bool in ATTRIBUTE_UNUSED)
11649 {
11650 if (TARGET_32BIT)
11651 return 10;
11652 else
11653 {
11654 if (GET_MODE_SIZE (mode) < 4)
11655 return 8;
11656 else
11657 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11658 }
11659 }
11660
11661 /* Vectorizer cost model implementation. */
11662
11663 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11664 static int
11665 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11666 tree vectype,
11667 int misalign ATTRIBUTE_UNUSED)
11668 {
11669 unsigned elements;
11670
11671 switch (type_of_cost)
11672 {
11673 case scalar_stmt:
11674 return current_tune->vec_costs->scalar_stmt_cost;
11675
11676 case scalar_load:
11677 return current_tune->vec_costs->scalar_load_cost;
11678
11679 case scalar_store:
11680 return current_tune->vec_costs->scalar_store_cost;
11681
11682 case vector_stmt:
11683 return current_tune->vec_costs->vec_stmt_cost;
11684
11685 case vector_load:
11686 return current_tune->vec_costs->vec_align_load_cost;
11687
11688 case vector_store:
11689 return current_tune->vec_costs->vec_store_cost;
11690
11691 case vec_to_scalar:
11692 return current_tune->vec_costs->vec_to_scalar_cost;
11693
11694 case scalar_to_vec:
11695 return current_tune->vec_costs->scalar_to_vec_cost;
11696
11697 case unaligned_load:
11698 return current_tune->vec_costs->vec_unalign_load_cost;
11699
11700 case unaligned_store:
11701 return current_tune->vec_costs->vec_unalign_store_cost;
11702
11703 case cond_branch_taken:
11704 return current_tune->vec_costs->cond_taken_branch_cost;
11705
11706 case cond_branch_not_taken:
11707 return current_tune->vec_costs->cond_not_taken_branch_cost;
11708
11709 case vec_perm:
11710 case vec_promote_demote:
11711 return current_tune->vec_costs->vec_stmt_cost;
11712
11713 case vec_construct:
11714 elements = TYPE_VECTOR_SUBPARTS (vectype);
11715 return elements / 2 + 1;
11716
11717 default:
11718 gcc_unreachable ();
11719 }
11720 }
11721
11722 /* Implement targetm.vectorize.add_stmt_cost. */
11723
11724 static unsigned
11725 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11726 struct _stmt_vec_info *stmt_info, int misalign,
11727 enum vect_cost_model_location where)
11728 {
11729 unsigned *cost = (unsigned *) data;
11730 unsigned retval = 0;
11731
11732 if (flag_vect_cost_model)
11733 {
11734 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11735 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11736
11737 /* Statements in an inner loop relative to the loop being
11738 vectorized are weighted more heavily. The value here is
11739 arbitrary and could potentially be improved with analysis. */
11740 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11741 count *= 50; /* FIXME. */
11742
11743 retval = (unsigned) (count * stmt_cost);
11744 cost[where] += retval;
11745 }
11746
11747 return retval;
11748 }
11749
11750 /* Return true if and only if this insn can dual-issue only as older. */
11751 static bool
11752 cortexa7_older_only (rtx_insn *insn)
11753 {
11754 if (recog_memoized (insn) < 0)
11755 return false;
11756
11757 switch (get_attr_type (insn))
11758 {
11759 case TYPE_ALU_DSP_REG:
11760 case TYPE_ALU_SREG:
11761 case TYPE_ALUS_SREG:
11762 case TYPE_LOGIC_REG:
11763 case TYPE_LOGICS_REG:
11764 case TYPE_ADC_REG:
11765 case TYPE_ADCS_REG:
11766 case TYPE_ADR:
11767 case TYPE_BFM:
11768 case TYPE_REV:
11769 case TYPE_MVN_REG:
11770 case TYPE_SHIFT_IMM:
11771 case TYPE_SHIFT_REG:
11772 case TYPE_LOAD_BYTE:
11773 case TYPE_LOAD1:
11774 case TYPE_STORE1:
11775 case TYPE_FFARITHS:
11776 case TYPE_FADDS:
11777 case TYPE_FFARITHD:
11778 case TYPE_FADDD:
11779 case TYPE_FMOV:
11780 case TYPE_F_CVT:
11781 case TYPE_FCMPS:
11782 case TYPE_FCMPD:
11783 case TYPE_FCONSTS:
11784 case TYPE_FCONSTD:
11785 case TYPE_FMULS:
11786 case TYPE_FMACS:
11787 case TYPE_FMULD:
11788 case TYPE_FMACD:
11789 case TYPE_FDIVS:
11790 case TYPE_FDIVD:
11791 case TYPE_F_MRC:
11792 case TYPE_F_MRRC:
11793 case TYPE_F_FLAG:
11794 case TYPE_F_LOADS:
11795 case TYPE_F_STORES:
11796 return true;
11797 default:
11798 return false;
11799 }
11800 }
11801
11802 /* Return true if and only if this insn can dual-issue as younger. */
11803 static bool
11804 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11805 {
11806 if (recog_memoized (insn) < 0)
11807 {
11808 if (verbose > 5)
11809 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11810 return false;
11811 }
11812
11813 switch (get_attr_type (insn))
11814 {
11815 case TYPE_ALU_IMM:
11816 case TYPE_ALUS_IMM:
11817 case TYPE_LOGIC_IMM:
11818 case TYPE_LOGICS_IMM:
11819 case TYPE_EXTEND:
11820 case TYPE_MVN_IMM:
11821 case TYPE_MOV_IMM:
11822 case TYPE_MOV_REG:
11823 case TYPE_MOV_SHIFT:
11824 case TYPE_MOV_SHIFT_REG:
11825 case TYPE_BRANCH:
11826 case TYPE_CALL:
11827 return true;
11828 default:
11829 return false;
11830 }
11831 }
11832
11833
11834 /* Look for an instruction that can dual issue only as an older
11835 instruction, and move it in front of any instructions that can
11836 dual-issue as younger, while preserving the relative order of all
11837 other instructions in the ready list. This is a hueuristic to help
11838 dual-issue in later cycles, by postponing issue of more flexible
11839 instructions. This heuristic may affect dual issue opportunities
11840 in the current cycle. */
11841 static void
11842 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11843 int *n_readyp, int clock)
11844 {
11845 int i;
11846 int first_older_only = -1, first_younger = -1;
11847
11848 if (verbose > 5)
11849 fprintf (file,
11850 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11851 clock,
11852 *n_readyp);
11853
11854 /* Traverse the ready list from the head (the instruction to issue
11855 first), and looking for the first instruction that can issue as
11856 younger and the first instruction that can dual-issue only as
11857 older. */
11858 for (i = *n_readyp - 1; i >= 0; i--)
11859 {
11860 rtx_insn *insn = ready[i];
11861 if (cortexa7_older_only (insn))
11862 {
11863 first_older_only = i;
11864 if (verbose > 5)
11865 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11866 break;
11867 }
11868 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11869 first_younger = i;
11870 }
11871
11872 /* Nothing to reorder because either no younger insn found or insn
11873 that can dual-issue only as older appears before any insn that
11874 can dual-issue as younger. */
11875 if (first_younger == -1)
11876 {
11877 if (verbose > 5)
11878 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11879 return;
11880 }
11881
11882 /* Nothing to reorder because no older-only insn in the ready list. */
11883 if (first_older_only == -1)
11884 {
11885 if (verbose > 5)
11886 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11887 return;
11888 }
11889
11890 /* Move first_older_only insn before first_younger. */
11891 if (verbose > 5)
11892 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11893 INSN_UID(ready [first_older_only]),
11894 INSN_UID(ready [first_younger]));
11895 rtx_insn *first_older_only_insn = ready [first_older_only];
11896 for (i = first_older_only; i < first_younger; i++)
11897 {
11898 ready[i] = ready[i+1];
11899 }
11900
11901 ready[i] = first_older_only_insn;
11902 return;
11903 }
11904
11905 /* Implement TARGET_SCHED_REORDER. */
11906 static int
11907 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11908 int clock)
11909 {
11910 switch (arm_tune)
11911 {
11912 case cortexa7:
11913 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11914 break;
11915 default:
11916 /* Do nothing for other cores. */
11917 break;
11918 }
11919
11920 return arm_issue_rate ();
11921 }
11922
11923 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11924 It corrects the value of COST based on the relationship between
11925 INSN and DEP through the dependence LINK. It returns the new
11926 value. There is a per-core adjust_cost hook to adjust scheduler costs
11927 and the per-core hook can choose to completely override the generic
11928 adjust_cost function. Only put bits of code into arm_adjust_cost that
11929 are common across all cores. */
11930 static int
11931 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11932 {
11933 rtx i_pat, d_pat;
11934
11935 /* When generating Thumb-1 code, we want to place flag-setting operations
11936 close to a conditional branch which depends on them, so that we can
11937 omit the comparison. */
11938 if (TARGET_THUMB1
11939 && REG_NOTE_KIND (link) == 0
11940 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11941 && recog_memoized (dep) >= 0
11942 && get_attr_conds (dep) == CONDS_SET)
11943 return 0;
11944
11945 if (current_tune->sched_adjust_cost != NULL)
11946 {
11947 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11948 return cost;
11949 }
11950
11951 /* XXX Is this strictly true? */
11952 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11953 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11954 return 0;
11955
11956 /* Call insns don't incur a stall, even if they follow a load. */
11957 if (REG_NOTE_KIND (link) == 0
11958 && CALL_P (insn))
11959 return 1;
11960
11961 if ((i_pat = single_set (insn)) != NULL
11962 && MEM_P (SET_SRC (i_pat))
11963 && (d_pat = single_set (dep)) != NULL
11964 && MEM_P (SET_DEST (d_pat)))
11965 {
11966 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11967 /* This is a load after a store, there is no conflict if the load reads
11968 from a cached area. Assume that loads from the stack, and from the
11969 constant pool are cached, and that others will miss. This is a
11970 hack. */
11971
11972 if ((GET_CODE (src_mem) == SYMBOL_REF
11973 && CONSTANT_POOL_ADDRESS_P (src_mem))
11974 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11975 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11976 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11977 return 1;
11978 }
11979
11980 return cost;
11981 }
11982
11983 int
11984 arm_max_conditional_execute (void)
11985 {
11986 return max_insns_skipped;
11987 }
11988
11989 static int
11990 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11991 {
11992 if (TARGET_32BIT)
11993 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11994 else
11995 return (optimize > 0) ? 2 : 0;
11996 }
11997
11998 static int
11999 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12000 {
12001 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12002 }
12003
12004 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12005 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12006 sequences of non-executed instructions in IT blocks probably take the same
12007 amount of time as executed instructions (and the IT instruction itself takes
12008 space in icache). This function was experimentally determined to give good
12009 results on a popular embedded benchmark. */
12010
12011 static int
12012 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12013 {
12014 return (TARGET_32BIT && speed_p) ? 1
12015 : arm_default_branch_cost (speed_p, predictable_p);
12016 }
12017
12018 static bool fp_consts_inited = false;
12019
12020 static REAL_VALUE_TYPE value_fp0;
12021
12022 static void
12023 init_fp_table (void)
12024 {
12025 REAL_VALUE_TYPE r;
12026
12027 r = REAL_VALUE_ATOF ("0", DFmode);
12028 value_fp0 = r;
12029 fp_consts_inited = true;
12030 }
12031
12032 /* Return TRUE if rtx X is a valid immediate FP constant. */
12033 int
12034 arm_const_double_rtx (rtx x)
12035 {
12036 REAL_VALUE_TYPE r;
12037
12038 if (!fp_consts_inited)
12039 init_fp_table ();
12040
12041 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12042 if (REAL_VALUE_MINUS_ZERO (r))
12043 return 0;
12044
12045 if (REAL_VALUES_EQUAL (r, value_fp0))
12046 return 1;
12047
12048 return 0;
12049 }
12050
12051 /* VFPv3 has a fairly wide range of representable immediates, formed from
12052 "quarter-precision" floating-point values. These can be evaluated using this
12053 formula (with ^ for exponentiation):
12054
12055 -1^s * n * 2^-r
12056
12057 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12058 16 <= n <= 31 and 0 <= r <= 7.
12059
12060 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12061
12062 - A (most-significant) is the sign bit.
12063 - BCD are the exponent (encoded as r XOR 3).
12064 - EFGH are the mantissa (encoded as n - 16).
12065 */
12066
12067 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12068 fconst[sd] instruction, or -1 if X isn't suitable. */
12069 static int
12070 vfp3_const_double_index (rtx x)
12071 {
12072 REAL_VALUE_TYPE r, m;
12073 int sign, exponent;
12074 unsigned HOST_WIDE_INT mantissa, mant_hi;
12075 unsigned HOST_WIDE_INT mask;
12076 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12077 bool fail;
12078
12079 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12080 return -1;
12081
12082 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12083
12084 /* We can't represent these things, so detect them first. */
12085 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12086 return -1;
12087
12088 /* Extract sign, exponent and mantissa. */
12089 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12090 r = real_value_abs (&r);
12091 exponent = REAL_EXP (&r);
12092 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12093 highest (sign) bit, with a fixed binary point at bit point_pos.
12094 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12095 bits for the mantissa, this may fail (low bits would be lost). */
12096 real_ldexp (&m, &r, point_pos - exponent);
12097 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12098 mantissa = w.elt (0);
12099 mant_hi = w.elt (1);
12100
12101 /* If there are bits set in the low part of the mantissa, we can't
12102 represent this value. */
12103 if (mantissa != 0)
12104 return -1;
12105
12106 /* Now make it so that mantissa contains the most-significant bits, and move
12107 the point_pos to indicate that the least-significant bits have been
12108 discarded. */
12109 point_pos -= HOST_BITS_PER_WIDE_INT;
12110 mantissa = mant_hi;
12111
12112 /* We can permit four significant bits of mantissa only, plus a high bit
12113 which is always 1. */
12114 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12115 if ((mantissa & mask) != 0)
12116 return -1;
12117
12118 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12119 mantissa >>= point_pos - 5;
12120
12121 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12122 floating-point immediate zero with Neon using an integer-zero load, but
12123 that case is handled elsewhere.) */
12124 if (mantissa == 0)
12125 return -1;
12126
12127 gcc_assert (mantissa >= 16 && mantissa <= 31);
12128
12129 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12130 normalized significands are in the range [1, 2). (Our mantissa is shifted
12131 left 4 places at this point relative to normalized IEEE754 values). GCC
12132 internally uses [0.5, 1) (see real.c), so the exponent returned from
12133 REAL_EXP must be altered. */
12134 exponent = 5 - exponent;
12135
12136 if (exponent < 0 || exponent > 7)
12137 return -1;
12138
12139 /* Sign, mantissa and exponent are now in the correct form to plug into the
12140 formula described in the comment above. */
12141 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12142 }
12143
12144 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12145 int
12146 vfp3_const_double_rtx (rtx x)
12147 {
12148 if (!TARGET_VFP3)
12149 return 0;
12150
12151 return vfp3_const_double_index (x) != -1;
12152 }
12153
12154 /* Recognize immediates which can be used in various Neon instructions. Legal
12155 immediates are described by the following table (for VMVN variants, the
12156 bitwise inverse of the constant shown is recognized. In either case, VMOV
12157 is output and the correct instruction to use for a given constant is chosen
12158 by the assembler). The constant shown is replicated across all elements of
12159 the destination vector.
12160
12161 insn elems variant constant (binary)
12162 ---- ----- ------- -----------------
12163 vmov i32 0 00000000 00000000 00000000 abcdefgh
12164 vmov i32 1 00000000 00000000 abcdefgh 00000000
12165 vmov i32 2 00000000 abcdefgh 00000000 00000000
12166 vmov i32 3 abcdefgh 00000000 00000000 00000000
12167 vmov i16 4 00000000 abcdefgh
12168 vmov i16 5 abcdefgh 00000000
12169 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12170 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12171 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12172 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12173 vmvn i16 10 00000000 abcdefgh
12174 vmvn i16 11 abcdefgh 00000000
12175 vmov i32 12 00000000 00000000 abcdefgh 11111111
12176 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12177 vmov i32 14 00000000 abcdefgh 11111111 11111111
12178 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12179 vmov i8 16 abcdefgh
12180 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12181 eeeeeeee ffffffff gggggggg hhhhhhhh
12182 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12183 vmov f32 19 00000000 00000000 00000000 00000000
12184
12185 For case 18, B = !b. Representable values are exactly those accepted by
12186 vfp3_const_double_index, but are output as floating-point numbers rather
12187 than indices.
12188
12189 For case 19, we will change it to vmov.i32 when assembling.
12190
12191 Variants 0-5 (inclusive) may also be used as immediates for the second
12192 operand of VORR/VBIC instructions.
12193
12194 The INVERSE argument causes the bitwise inverse of the given operand to be
12195 recognized instead (used for recognizing legal immediates for the VAND/VORN
12196 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12197 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12198 output, rather than the real insns vbic/vorr).
12199
12200 INVERSE makes no difference to the recognition of float vectors.
12201
12202 The return value is the variant of immediate as shown in the above table, or
12203 -1 if the given value doesn't match any of the listed patterns.
12204 */
12205 static int
12206 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12207 rtx *modconst, int *elementwidth)
12208 {
12209 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12210 matches = 1; \
12211 for (i = 0; i < idx; i += (STRIDE)) \
12212 if (!(TEST)) \
12213 matches = 0; \
12214 if (matches) \
12215 { \
12216 immtype = (CLASS); \
12217 elsize = (ELSIZE); \
12218 break; \
12219 }
12220
12221 unsigned int i, elsize = 0, idx = 0, n_elts;
12222 unsigned int innersize;
12223 unsigned char bytes[16];
12224 int immtype = -1, matches;
12225 unsigned int invmask = inverse ? 0xff : 0;
12226 bool vector = GET_CODE (op) == CONST_VECTOR;
12227
12228 if (vector)
12229 {
12230 n_elts = CONST_VECTOR_NUNITS (op);
12231 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12232 }
12233 else
12234 {
12235 n_elts = 1;
12236 if (mode == VOIDmode)
12237 mode = DImode;
12238 innersize = GET_MODE_SIZE (mode);
12239 }
12240
12241 /* Vectors of float constants. */
12242 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12243 {
12244 rtx el0 = CONST_VECTOR_ELT (op, 0);
12245 REAL_VALUE_TYPE r0;
12246
12247 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12248 return -1;
12249
12250 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12251
12252 for (i = 1; i < n_elts; i++)
12253 {
12254 rtx elt = CONST_VECTOR_ELT (op, i);
12255 REAL_VALUE_TYPE re;
12256
12257 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12258
12259 if (!REAL_VALUES_EQUAL (r0, re))
12260 return -1;
12261 }
12262
12263 if (modconst)
12264 *modconst = CONST_VECTOR_ELT (op, 0);
12265
12266 if (elementwidth)
12267 *elementwidth = 0;
12268
12269 if (el0 == CONST0_RTX (GET_MODE (el0)))
12270 return 19;
12271 else
12272 return 18;
12273 }
12274
12275 /* Splat vector constant out into a byte vector. */
12276 for (i = 0; i < n_elts; i++)
12277 {
12278 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12279 unsigned HOST_WIDE_INT elpart;
12280 unsigned int part, parts;
12281
12282 if (CONST_INT_P (el))
12283 {
12284 elpart = INTVAL (el);
12285 parts = 1;
12286 }
12287 else if (CONST_DOUBLE_P (el))
12288 {
12289 elpart = CONST_DOUBLE_LOW (el);
12290 parts = 2;
12291 }
12292 else
12293 gcc_unreachable ();
12294
12295 for (part = 0; part < parts; part++)
12296 {
12297 unsigned int byte;
12298 for (byte = 0; byte < innersize; byte++)
12299 {
12300 bytes[idx++] = (elpart & 0xff) ^ invmask;
12301 elpart >>= BITS_PER_UNIT;
12302 }
12303 if (CONST_DOUBLE_P (el))
12304 elpart = CONST_DOUBLE_HIGH (el);
12305 }
12306 }
12307
12308 /* Sanity check. */
12309 gcc_assert (idx == GET_MODE_SIZE (mode));
12310
12311 do
12312 {
12313 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12314 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12315
12316 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12317 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12318
12319 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12320 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12321
12322 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12323 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12324
12325 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12326
12327 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12328
12329 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12330 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12331
12332 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12333 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12334
12335 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12336 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12337
12338 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12339 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12340
12341 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12342
12343 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12344
12345 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12346 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12347
12348 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12349 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12350
12351 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12352 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12353
12354 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12355 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12356
12357 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12358
12359 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12360 && bytes[i] == bytes[(i + 8) % idx]);
12361 }
12362 while (0);
12363
12364 if (immtype == -1)
12365 return -1;
12366
12367 if (elementwidth)
12368 *elementwidth = elsize;
12369
12370 if (modconst)
12371 {
12372 unsigned HOST_WIDE_INT imm = 0;
12373
12374 /* Un-invert bytes of recognized vector, if necessary. */
12375 if (invmask != 0)
12376 for (i = 0; i < idx; i++)
12377 bytes[i] ^= invmask;
12378
12379 if (immtype == 17)
12380 {
12381 /* FIXME: Broken on 32-bit H_W_I hosts. */
12382 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12383
12384 for (i = 0; i < 8; i++)
12385 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12386 << (i * BITS_PER_UNIT);
12387
12388 *modconst = GEN_INT (imm);
12389 }
12390 else
12391 {
12392 unsigned HOST_WIDE_INT imm = 0;
12393
12394 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12395 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12396
12397 *modconst = GEN_INT (imm);
12398 }
12399 }
12400
12401 return immtype;
12402 #undef CHECK
12403 }
12404
12405 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12406 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12407 float elements), and a modified constant (whatever should be output for a
12408 VMOV) in *MODCONST. */
12409
12410 int
12411 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12412 rtx *modconst, int *elementwidth)
12413 {
12414 rtx tmpconst;
12415 int tmpwidth;
12416 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12417
12418 if (retval == -1)
12419 return 0;
12420
12421 if (modconst)
12422 *modconst = tmpconst;
12423
12424 if (elementwidth)
12425 *elementwidth = tmpwidth;
12426
12427 return 1;
12428 }
12429
12430 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12431 the immediate is valid, write a constant suitable for using as an operand
12432 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12433 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12434
12435 int
12436 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12437 rtx *modconst, int *elementwidth)
12438 {
12439 rtx tmpconst;
12440 int tmpwidth;
12441 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12442
12443 if (retval < 0 || retval > 5)
12444 return 0;
12445
12446 if (modconst)
12447 *modconst = tmpconst;
12448
12449 if (elementwidth)
12450 *elementwidth = tmpwidth;
12451
12452 return 1;
12453 }
12454
12455 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12456 the immediate is valid, write a constant suitable for using as an operand
12457 to VSHR/VSHL to *MODCONST and the corresponding element width to
12458 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12459 because they have different limitations. */
12460
12461 int
12462 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12463 rtx *modconst, int *elementwidth,
12464 bool isleftshift)
12465 {
12466 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12467 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12468 unsigned HOST_WIDE_INT last_elt = 0;
12469 unsigned HOST_WIDE_INT maxshift;
12470
12471 /* Split vector constant out into a byte vector. */
12472 for (i = 0; i < n_elts; i++)
12473 {
12474 rtx el = CONST_VECTOR_ELT (op, i);
12475 unsigned HOST_WIDE_INT elpart;
12476
12477 if (CONST_INT_P (el))
12478 elpart = INTVAL (el);
12479 else if (CONST_DOUBLE_P (el))
12480 return 0;
12481 else
12482 gcc_unreachable ();
12483
12484 if (i != 0 && elpart != last_elt)
12485 return 0;
12486
12487 last_elt = elpart;
12488 }
12489
12490 /* Shift less than element size. */
12491 maxshift = innersize * 8;
12492
12493 if (isleftshift)
12494 {
12495 /* Left shift immediate value can be from 0 to <size>-1. */
12496 if (last_elt >= maxshift)
12497 return 0;
12498 }
12499 else
12500 {
12501 /* Right shift immediate value can be from 1 to <size>. */
12502 if (last_elt == 0 || last_elt > maxshift)
12503 return 0;
12504 }
12505
12506 if (elementwidth)
12507 *elementwidth = innersize * 8;
12508
12509 if (modconst)
12510 *modconst = CONST_VECTOR_ELT (op, 0);
12511
12512 return 1;
12513 }
12514
12515 /* Return a string suitable for output of Neon immediate logic operation
12516 MNEM. */
12517
12518 char *
12519 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12520 int inverse, int quad)
12521 {
12522 int width, is_valid;
12523 static char templ[40];
12524
12525 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12526
12527 gcc_assert (is_valid != 0);
12528
12529 if (quad)
12530 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12531 else
12532 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12533
12534 return templ;
12535 }
12536
12537 /* Return a string suitable for output of Neon immediate shift operation
12538 (VSHR or VSHL) MNEM. */
12539
12540 char *
12541 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12542 machine_mode mode, int quad,
12543 bool isleftshift)
12544 {
12545 int width, is_valid;
12546 static char templ[40];
12547
12548 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12549 gcc_assert (is_valid != 0);
12550
12551 if (quad)
12552 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12553 else
12554 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12555
12556 return templ;
12557 }
12558
12559 /* Output a sequence of pairwise operations to implement a reduction.
12560 NOTE: We do "too much work" here, because pairwise operations work on two
12561 registers-worth of operands in one go. Unfortunately we can't exploit those
12562 extra calculations to do the full operation in fewer steps, I don't think.
12563 Although all vector elements of the result but the first are ignored, we
12564 actually calculate the same result in each of the elements. An alternative
12565 such as initially loading a vector with zero to use as each of the second
12566 operands would use up an additional register and take an extra instruction,
12567 for no particular gain. */
12568
12569 void
12570 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12571 rtx (*reduc) (rtx, rtx, rtx))
12572 {
12573 machine_mode inner = GET_MODE_INNER (mode);
12574 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12575 rtx tmpsum = op1;
12576
12577 for (i = parts / 2; i >= 1; i /= 2)
12578 {
12579 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12580 emit_insn (reduc (dest, tmpsum, tmpsum));
12581 tmpsum = dest;
12582 }
12583 }
12584
12585 /* If VALS is a vector constant that can be loaded into a register
12586 using VDUP, generate instructions to do so and return an RTX to
12587 assign to the register. Otherwise return NULL_RTX. */
12588
12589 static rtx
12590 neon_vdup_constant (rtx vals)
12591 {
12592 machine_mode mode = GET_MODE (vals);
12593 machine_mode inner_mode = GET_MODE_INNER (mode);
12594 int n_elts = GET_MODE_NUNITS (mode);
12595 bool all_same = true;
12596 rtx x;
12597 int i;
12598
12599 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12600 return NULL_RTX;
12601
12602 for (i = 0; i < n_elts; ++i)
12603 {
12604 x = XVECEXP (vals, 0, i);
12605 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12606 all_same = false;
12607 }
12608
12609 if (!all_same)
12610 /* The elements are not all the same. We could handle repeating
12611 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12612 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12613 vdup.i16). */
12614 return NULL_RTX;
12615
12616 /* We can load this constant by using VDUP and a constant in a
12617 single ARM register. This will be cheaper than a vector
12618 load. */
12619
12620 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12621 return gen_rtx_VEC_DUPLICATE (mode, x);
12622 }
12623
12624 /* Generate code to load VALS, which is a PARALLEL containing only
12625 constants (for vec_init) or CONST_VECTOR, efficiently into a
12626 register. Returns an RTX to copy into the register, or NULL_RTX
12627 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12628
12629 rtx
12630 neon_make_constant (rtx vals)
12631 {
12632 machine_mode mode = GET_MODE (vals);
12633 rtx target;
12634 rtx const_vec = NULL_RTX;
12635 int n_elts = GET_MODE_NUNITS (mode);
12636 int n_const = 0;
12637 int i;
12638
12639 if (GET_CODE (vals) == CONST_VECTOR)
12640 const_vec = vals;
12641 else if (GET_CODE (vals) == PARALLEL)
12642 {
12643 /* A CONST_VECTOR must contain only CONST_INTs and
12644 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12645 Only store valid constants in a CONST_VECTOR. */
12646 for (i = 0; i < n_elts; ++i)
12647 {
12648 rtx x = XVECEXP (vals, 0, i);
12649 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12650 n_const++;
12651 }
12652 if (n_const == n_elts)
12653 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12654 }
12655 else
12656 gcc_unreachable ();
12657
12658 if (const_vec != NULL
12659 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12660 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12661 return const_vec;
12662 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12663 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12664 pipeline cycle; creating the constant takes one or two ARM
12665 pipeline cycles. */
12666 return target;
12667 else if (const_vec != NULL_RTX)
12668 /* Load from constant pool. On Cortex-A8 this takes two cycles
12669 (for either double or quad vectors). We can not take advantage
12670 of single-cycle VLD1 because we need a PC-relative addressing
12671 mode. */
12672 return const_vec;
12673 else
12674 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12675 We can not construct an initializer. */
12676 return NULL_RTX;
12677 }
12678
12679 /* Initialize vector TARGET to VALS. */
12680
12681 void
12682 neon_expand_vector_init (rtx target, rtx vals)
12683 {
12684 machine_mode mode = GET_MODE (target);
12685 machine_mode inner_mode = GET_MODE_INNER (mode);
12686 int n_elts = GET_MODE_NUNITS (mode);
12687 int n_var = 0, one_var = -1;
12688 bool all_same = true;
12689 rtx x, mem;
12690 int i;
12691
12692 for (i = 0; i < n_elts; ++i)
12693 {
12694 x = XVECEXP (vals, 0, i);
12695 if (!CONSTANT_P (x))
12696 ++n_var, one_var = i;
12697
12698 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12699 all_same = false;
12700 }
12701
12702 if (n_var == 0)
12703 {
12704 rtx constant = neon_make_constant (vals);
12705 if (constant != NULL_RTX)
12706 {
12707 emit_move_insn (target, constant);
12708 return;
12709 }
12710 }
12711
12712 /* Splat a single non-constant element if we can. */
12713 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12714 {
12715 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12716 emit_insn (gen_rtx_SET (VOIDmode, target,
12717 gen_rtx_VEC_DUPLICATE (mode, x)));
12718 return;
12719 }
12720
12721 /* One field is non-constant. Load constant then overwrite varying
12722 field. This is more efficient than using the stack. */
12723 if (n_var == 1)
12724 {
12725 rtx copy = copy_rtx (vals);
12726 rtx index = GEN_INT (one_var);
12727
12728 /* Load constant part of vector, substitute neighboring value for
12729 varying element. */
12730 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12731 neon_expand_vector_init (target, copy);
12732
12733 /* Insert variable. */
12734 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12735 switch (mode)
12736 {
12737 case V8QImode:
12738 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12739 break;
12740 case V16QImode:
12741 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12742 break;
12743 case V4HImode:
12744 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12745 break;
12746 case V8HImode:
12747 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12748 break;
12749 case V2SImode:
12750 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12751 break;
12752 case V4SImode:
12753 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12754 break;
12755 case V2SFmode:
12756 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12757 break;
12758 case V4SFmode:
12759 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12760 break;
12761 case V2DImode:
12762 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12763 break;
12764 default:
12765 gcc_unreachable ();
12766 }
12767 return;
12768 }
12769
12770 /* Construct the vector in memory one field at a time
12771 and load the whole vector. */
12772 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12773 for (i = 0; i < n_elts; i++)
12774 emit_move_insn (adjust_address_nv (mem, inner_mode,
12775 i * GET_MODE_SIZE (inner_mode)),
12776 XVECEXP (vals, 0, i));
12777 emit_move_insn (target, mem);
12778 }
12779
12780 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12781 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12782 reported source locations are bogus. */
12783
12784 static void
12785 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12786 const char *err)
12787 {
12788 HOST_WIDE_INT lane;
12789
12790 gcc_assert (CONST_INT_P (operand));
12791
12792 lane = INTVAL (operand);
12793
12794 if (lane < low || lane >= high)
12795 error (err);
12796 }
12797
12798 /* Bounds-check lanes. */
12799
12800 void
12801 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12802 {
12803 bounds_check (operand, low, high, "lane out of range");
12804 }
12805
12806 /* Bounds-check constants. */
12807
12808 void
12809 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12810 {
12811 bounds_check (operand, low, high, "constant out of range");
12812 }
12813
12814 HOST_WIDE_INT
12815 neon_element_bits (machine_mode mode)
12816 {
12817 if (mode == DImode)
12818 return GET_MODE_BITSIZE (mode);
12819 else
12820 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12821 }
12822
12823 \f
12824 /* Predicates for `match_operand' and `match_operator'. */
12825
12826 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12827 WB is true if full writeback address modes are allowed and is false
12828 if limited writeback address modes (POST_INC and PRE_DEC) are
12829 allowed. */
12830
12831 int
12832 arm_coproc_mem_operand (rtx op, bool wb)
12833 {
12834 rtx ind;
12835
12836 /* Reject eliminable registers. */
12837 if (! (reload_in_progress || reload_completed || lra_in_progress)
12838 && ( reg_mentioned_p (frame_pointer_rtx, op)
12839 || reg_mentioned_p (arg_pointer_rtx, op)
12840 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12841 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12842 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12843 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12844 return FALSE;
12845
12846 /* Constants are converted into offsets from labels. */
12847 if (!MEM_P (op))
12848 return FALSE;
12849
12850 ind = XEXP (op, 0);
12851
12852 if (reload_completed
12853 && (GET_CODE (ind) == LABEL_REF
12854 || (GET_CODE (ind) == CONST
12855 && GET_CODE (XEXP (ind, 0)) == PLUS
12856 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12857 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12858 return TRUE;
12859
12860 /* Match: (mem (reg)). */
12861 if (REG_P (ind))
12862 return arm_address_register_rtx_p (ind, 0);
12863
12864 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12865 acceptable in any case (subject to verification by
12866 arm_address_register_rtx_p). We need WB to be true to accept
12867 PRE_INC and POST_DEC. */
12868 if (GET_CODE (ind) == POST_INC
12869 || GET_CODE (ind) == PRE_DEC
12870 || (wb
12871 && (GET_CODE (ind) == PRE_INC
12872 || GET_CODE (ind) == POST_DEC)))
12873 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12874
12875 if (wb
12876 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12877 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12878 && GET_CODE (XEXP (ind, 1)) == PLUS
12879 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12880 ind = XEXP (ind, 1);
12881
12882 /* Match:
12883 (plus (reg)
12884 (const)). */
12885 if (GET_CODE (ind) == PLUS
12886 && REG_P (XEXP (ind, 0))
12887 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12888 && CONST_INT_P (XEXP (ind, 1))
12889 && INTVAL (XEXP (ind, 1)) > -1024
12890 && INTVAL (XEXP (ind, 1)) < 1024
12891 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12892 return TRUE;
12893
12894 return FALSE;
12895 }
12896
12897 /* Return TRUE if OP is a memory operand which we can load or store a vector
12898 to/from. TYPE is one of the following values:
12899 0 - Vector load/stor (vldr)
12900 1 - Core registers (ldm)
12901 2 - Element/structure loads (vld1)
12902 */
12903 int
12904 neon_vector_mem_operand (rtx op, int type, bool strict)
12905 {
12906 rtx ind;
12907
12908 /* Reject eliminable registers. */
12909 if (! (reload_in_progress || reload_completed)
12910 && ( reg_mentioned_p (frame_pointer_rtx, op)
12911 || reg_mentioned_p (arg_pointer_rtx, op)
12912 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12913 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12914 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12915 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12916 return !strict;
12917
12918 /* Constants are converted into offsets from labels. */
12919 if (!MEM_P (op))
12920 return FALSE;
12921
12922 ind = XEXP (op, 0);
12923
12924 if (reload_completed
12925 && (GET_CODE (ind) == LABEL_REF
12926 || (GET_CODE (ind) == CONST
12927 && GET_CODE (XEXP (ind, 0)) == PLUS
12928 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12929 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12930 return TRUE;
12931
12932 /* Match: (mem (reg)). */
12933 if (REG_P (ind))
12934 return arm_address_register_rtx_p (ind, 0);
12935
12936 /* Allow post-increment with Neon registers. */
12937 if ((type != 1 && GET_CODE (ind) == POST_INC)
12938 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12939 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12940
12941 /* Allow post-increment by register for VLDn */
12942 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12943 && GET_CODE (XEXP (ind, 1)) == PLUS
12944 && REG_P (XEXP (XEXP (ind, 1), 1)))
12945 return true;
12946
12947 /* Match:
12948 (plus (reg)
12949 (const)). */
12950 if (type == 0
12951 && GET_CODE (ind) == PLUS
12952 && REG_P (XEXP (ind, 0))
12953 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12954 && CONST_INT_P (XEXP (ind, 1))
12955 && INTVAL (XEXP (ind, 1)) > -1024
12956 /* For quad modes, we restrict the constant offset to be slightly less
12957 than what the instruction format permits. We have no such constraint
12958 on double mode offsets. (This must match arm_legitimate_index_p.) */
12959 && (INTVAL (XEXP (ind, 1))
12960 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12961 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12962 return TRUE;
12963
12964 return FALSE;
12965 }
12966
12967 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12968 type. */
12969 int
12970 neon_struct_mem_operand (rtx op)
12971 {
12972 rtx ind;
12973
12974 /* Reject eliminable registers. */
12975 if (! (reload_in_progress || reload_completed)
12976 && ( reg_mentioned_p (frame_pointer_rtx, op)
12977 || reg_mentioned_p (arg_pointer_rtx, op)
12978 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12979 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12980 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12981 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12982 return FALSE;
12983
12984 /* Constants are converted into offsets from labels. */
12985 if (!MEM_P (op))
12986 return FALSE;
12987
12988 ind = XEXP (op, 0);
12989
12990 if (reload_completed
12991 && (GET_CODE (ind) == LABEL_REF
12992 || (GET_CODE (ind) == CONST
12993 && GET_CODE (XEXP (ind, 0)) == PLUS
12994 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12995 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12996 return TRUE;
12997
12998 /* Match: (mem (reg)). */
12999 if (REG_P (ind))
13000 return arm_address_register_rtx_p (ind, 0);
13001
13002 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13003 if (GET_CODE (ind) == POST_INC
13004 || GET_CODE (ind) == PRE_DEC)
13005 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13006
13007 return FALSE;
13008 }
13009
13010 /* Return true if X is a register that will be eliminated later on. */
13011 int
13012 arm_eliminable_register (rtx x)
13013 {
13014 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13015 || REGNO (x) == ARG_POINTER_REGNUM
13016 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13017 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13018 }
13019
13020 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13021 coprocessor registers. Otherwise return NO_REGS. */
13022
13023 enum reg_class
13024 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13025 {
13026 if (mode == HFmode)
13027 {
13028 if (!TARGET_NEON_FP16)
13029 return GENERAL_REGS;
13030 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13031 return NO_REGS;
13032 return GENERAL_REGS;
13033 }
13034
13035 /* The neon move patterns handle all legitimate vector and struct
13036 addresses. */
13037 if (TARGET_NEON
13038 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13039 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13040 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13041 || VALID_NEON_STRUCT_MODE (mode)))
13042 return NO_REGS;
13043
13044 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13045 return NO_REGS;
13046
13047 return GENERAL_REGS;
13048 }
13049
13050 /* Values which must be returned in the most-significant end of the return
13051 register. */
13052
13053 static bool
13054 arm_return_in_msb (const_tree valtype)
13055 {
13056 return (TARGET_AAPCS_BASED
13057 && BYTES_BIG_ENDIAN
13058 && (AGGREGATE_TYPE_P (valtype)
13059 || TREE_CODE (valtype) == COMPLEX_TYPE
13060 || FIXED_POINT_TYPE_P (valtype)));
13061 }
13062
13063 /* Return TRUE if X references a SYMBOL_REF. */
13064 int
13065 symbol_mentioned_p (rtx x)
13066 {
13067 const char * fmt;
13068 int i;
13069
13070 if (GET_CODE (x) == SYMBOL_REF)
13071 return 1;
13072
13073 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13074 are constant offsets, not symbols. */
13075 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13076 return 0;
13077
13078 fmt = GET_RTX_FORMAT (GET_CODE (x));
13079
13080 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13081 {
13082 if (fmt[i] == 'E')
13083 {
13084 int j;
13085
13086 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13087 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13088 return 1;
13089 }
13090 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13091 return 1;
13092 }
13093
13094 return 0;
13095 }
13096
13097 /* Return TRUE if X references a LABEL_REF. */
13098 int
13099 label_mentioned_p (rtx x)
13100 {
13101 const char * fmt;
13102 int i;
13103
13104 if (GET_CODE (x) == LABEL_REF)
13105 return 1;
13106
13107 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13108 instruction, but they are constant offsets, not symbols. */
13109 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13110 return 0;
13111
13112 fmt = GET_RTX_FORMAT (GET_CODE (x));
13113 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13114 {
13115 if (fmt[i] == 'E')
13116 {
13117 int j;
13118
13119 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13120 if (label_mentioned_p (XVECEXP (x, i, j)))
13121 return 1;
13122 }
13123 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13124 return 1;
13125 }
13126
13127 return 0;
13128 }
13129
13130 int
13131 tls_mentioned_p (rtx x)
13132 {
13133 switch (GET_CODE (x))
13134 {
13135 case CONST:
13136 return tls_mentioned_p (XEXP (x, 0));
13137
13138 case UNSPEC:
13139 if (XINT (x, 1) == UNSPEC_TLS)
13140 return 1;
13141
13142 default:
13143 return 0;
13144 }
13145 }
13146
13147 /* Must not copy any rtx that uses a pc-relative address. */
13148
13149 static bool
13150 arm_cannot_copy_insn_p (rtx_insn *insn)
13151 {
13152 /* The tls call insn cannot be copied, as it is paired with a data
13153 word. */
13154 if (recog_memoized (insn) == CODE_FOR_tlscall)
13155 return true;
13156
13157 subrtx_iterator::array_type array;
13158 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13159 {
13160 const_rtx x = *iter;
13161 if (GET_CODE (x) == UNSPEC
13162 && (XINT (x, 1) == UNSPEC_PIC_BASE
13163 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13164 return true;
13165 }
13166 return false;
13167 }
13168
13169 enum rtx_code
13170 minmax_code (rtx x)
13171 {
13172 enum rtx_code code = GET_CODE (x);
13173
13174 switch (code)
13175 {
13176 case SMAX:
13177 return GE;
13178 case SMIN:
13179 return LE;
13180 case UMIN:
13181 return LEU;
13182 case UMAX:
13183 return GEU;
13184 default:
13185 gcc_unreachable ();
13186 }
13187 }
13188
13189 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13190
13191 bool
13192 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13193 int *mask, bool *signed_sat)
13194 {
13195 /* The high bound must be a power of two minus one. */
13196 int log = exact_log2 (INTVAL (hi_bound) + 1);
13197 if (log == -1)
13198 return false;
13199
13200 /* The low bound is either zero (for usat) or one less than the
13201 negation of the high bound (for ssat). */
13202 if (INTVAL (lo_bound) == 0)
13203 {
13204 if (mask)
13205 *mask = log;
13206 if (signed_sat)
13207 *signed_sat = false;
13208
13209 return true;
13210 }
13211
13212 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13213 {
13214 if (mask)
13215 *mask = log + 1;
13216 if (signed_sat)
13217 *signed_sat = true;
13218
13219 return true;
13220 }
13221
13222 return false;
13223 }
13224
13225 /* Return 1 if memory locations are adjacent. */
13226 int
13227 adjacent_mem_locations (rtx a, rtx b)
13228 {
13229 /* We don't guarantee to preserve the order of these memory refs. */
13230 if (volatile_refs_p (a) || volatile_refs_p (b))
13231 return 0;
13232
13233 if ((REG_P (XEXP (a, 0))
13234 || (GET_CODE (XEXP (a, 0)) == PLUS
13235 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13236 && (REG_P (XEXP (b, 0))
13237 || (GET_CODE (XEXP (b, 0)) == PLUS
13238 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13239 {
13240 HOST_WIDE_INT val0 = 0, val1 = 0;
13241 rtx reg0, reg1;
13242 int val_diff;
13243
13244 if (GET_CODE (XEXP (a, 0)) == PLUS)
13245 {
13246 reg0 = XEXP (XEXP (a, 0), 0);
13247 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13248 }
13249 else
13250 reg0 = XEXP (a, 0);
13251
13252 if (GET_CODE (XEXP (b, 0)) == PLUS)
13253 {
13254 reg1 = XEXP (XEXP (b, 0), 0);
13255 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13256 }
13257 else
13258 reg1 = XEXP (b, 0);
13259
13260 /* Don't accept any offset that will require multiple
13261 instructions to handle, since this would cause the
13262 arith_adjacentmem pattern to output an overlong sequence. */
13263 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13264 return 0;
13265
13266 /* Don't allow an eliminable register: register elimination can make
13267 the offset too large. */
13268 if (arm_eliminable_register (reg0))
13269 return 0;
13270
13271 val_diff = val1 - val0;
13272
13273 if (arm_ld_sched)
13274 {
13275 /* If the target has load delay slots, then there's no benefit
13276 to using an ldm instruction unless the offset is zero and
13277 we are optimizing for size. */
13278 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13279 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13280 && (val_diff == 4 || val_diff == -4));
13281 }
13282
13283 return ((REGNO (reg0) == REGNO (reg1))
13284 && (val_diff == 4 || val_diff == -4));
13285 }
13286
13287 return 0;
13288 }
13289
13290 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13291 for load operations, false for store operations. CONSECUTIVE is true
13292 if the register numbers in the operation must be consecutive in the register
13293 bank. RETURN_PC is true if value is to be loaded in PC.
13294 The pattern we are trying to match for load is:
13295 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13296 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13297 :
13298 :
13299 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13300 ]
13301 where
13302 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13303 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13304 3. If consecutive is TRUE, then for kth register being loaded,
13305 REGNO (R_dk) = REGNO (R_d0) + k.
13306 The pattern for store is similar. */
13307 bool
13308 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13309 bool consecutive, bool return_pc)
13310 {
13311 HOST_WIDE_INT count = XVECLEN (op, 0);
13312 rtx reg, mem, addr;
13313 unsigned regno;
13314 unsigned first_regno;
13315 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13316 rtx elt;
13317 bool addr_reg_in_reglist = false;
13318 bool update = false;
13319 int reg_increment;
13320 int offset_adj;
13321 int regs_per_val;
13322
13323 /* If not in SImode, then registers must be consecutive
13324 (e.g., VLDM instructions for DFmode). */
13325 gcc_assert ((mode == SImode) || consecutive);
13326 /* Setting return_pc for stores is illegal. */
13327 gcc_assert (!return_pc || load);
13328
13329 /* Set up the increments and the regs per val based on the mode. */
13330 reg_increment = GET_MODE_SIZE (mode);
13331 regs_per_val = reg_increment / 4;
13332 offset_adj = return_pc ? 1 : 0;
13333
13334 if (count <= 1
13335 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13336 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13337 return false;
13338
13339 /* Check if this is a write-back. */
13340 elt = XVECEXP (op, 0, offset_adj);
13341 if (GET_CODE (SET_SRC (elt)) == PLUS)
13342 {
13343 i++;
13344 base = 1;
13345 update = true;
13346
13347 /* The offset adjustment must be the number of registers being
13348 popped times the size of a single register. */
13349 if (!REG_P (SET_DEST (elt))
13350 || !REG_P (XEXP (SET_SRC (elt), 0))
13351 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13352 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13353 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13354 ((count - 1 - offset_adj) * reg_increment))
13355 return false;
13356 }
13357
13358 i = i + offset_adj;
13359 base = base + offset_adj;
13360 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13361 success depends on the type: VLDM can do just one reg,
13362 LDM must do at least two. */
13363 if ((count <= i) && (mode == SImode))
13364 return false;
13365
13366 elt = XVECEXP (op, 0, i - 1);
13367 if (GET_CODE (elt) != SET)
13368 return false;
13369
13370 if (load)
13371 {
13372 reg = SET_DEST (elt);
13373 mem = SET_SRC (elt);
13374 }
13375 else
13376 {
13377 reg = SET_SRC (elt);
13378 mem = SET_DEST (elt);
13379 }
13380
13381 if (!REG_P (reg) || !MEM_P (mem))
13382 return false;
13383
13384 regno = REGNO (reg);
13385 first_regno = regno;
13386 addr = XEXP (mem, 0);
13387 if (GET_CODE (addr) == PLUS)
13388 {
13389 if (!CONST_INT_P (XEXP (addr, 1)))
13390 return false;
13391
13392 offset = INTVAL (XEXP (addr, 1));
13393 addr = XEXP (addr, 0);
13394 }
13395
13396 if (!REG_P (addr))
13397 return false;
13398
13399 /* Don't allow SP to be loaded unless it is also the base register. It
13400 guarantees that SP is reset correctly when an LDM instruction
13401 is interrupted. Otherwise, we might end up with a corrupt stack. */
13402 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13403 return false;
13404
13405 for (; i < count; i++)
13406 {
13407 elt = XVECEXP (op, 0, i);
13408 if (GET_CODE (elt) != SET)
13409 return false;
13410
13411 if (load)
13412 {
13413 reg = SET_DEST (elt);
13414 mem = SET_SRC (elt);
13415 }
13416 else
13417 {
13418 reg = SET_SRC (elt);
13419 mem = SET_DEST (elt);
13420 }
13421
13422 if (!REG_P (reg)
13423 || GET_MODE (reg) != mode
13424 || REGNO (reg) <= regno
13425 || (consecutive
13426 && (REGNO (reg) !=
13427 (unsigned int) (first_regno + regs_per_val * (i - base))))
13428 /* Don't allow SP to be loaded unless it is also the base register. It
13429 guarantees that SP is reset correctly when an LDM instruction
13430 is interrupted. Otherwise, we might end up with a corrupt stack. */
13431 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13432 || !MEM_P (mem)
13433 || GET_MODE (mem) != mode
13434 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13435 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13436 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13437 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13438 offset + (i - base) * reg_increment))
13439 && (!REG_P (XEXP (mem, 0))
13440 || offset + (i - base) * reg_increment != 0)))
13441 return false;
13442
13443 regno = REGNO (reg);
13444 if (regno == REGNO (addr))
13445 addr_reg_in_reglist = true;
13446 }
13447
13448 if (load)
13449 {
13450 if (update && addr_reg_in_reglist)
13451 return false;
13452
13453 /* For Thumb-1, address register is always modified - either by write-back
13454 or by explicit load. If the pattern does not describe an update,
13455 then the address register must be in the list of loaded registers. */
13456 if (TARGET_THUMB1)
13457 return update || addr_reg_in_reglist;
13458 }
13459
13460 return true;
13461 }
13462
13463 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13464 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13465 instruction. ADD_OFFSET is nonzero if the base address register needs
13466 to be modified with an add instruction before we can use it. */
13467
13468 static bool
13469 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13470 int nops, HOST_WIDE_INT add_offset)
13471 {
13472 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13473 if the offset isn't small enough. The reason 2 ldrs are faster
13474 is because these ARMs are able to do more than one cache access
13475 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13476 whilst the ARM8 has a double bandwidth cache. This means that
13477 these cores can do both an instruction fetch and a data fetch in
13478 a single cycle, so the trick of calculating the address into a
13479 scratch register (one of the result regs) and then doing a load
13480 multiple actually becomes slower (and no smaller in code size).
13481 That is the transformation
13482
13483 ldr rd1, [rbase + offset]
13484 ldr rd2, [rbase + offset + 4]
13485
13486 to
13487
13488 add rd1, rbase, offset
13489 ldmia rd1, {rd1, rd2}
13490
13491 produces worse code -- '3 cycles + any stalls on rd2' instead of
13492 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13493 access per cycle, the first sequence could never complete in less
13494 than 6 cycles, whereas the ldm sequence would only take 5 and
13495 would make better use of sequential accesses if not hitting the
13496 cache.
13497
13498 We cheat here and test 'arm_ld_sched' which we currently know to
13499 only be true for the ARM8, ARM9 and StrongARM. If this ever
13500 changes, then the test below needs to be reworked. */
13501 if (nops == 2 && arm_ld_sched && add_offset != 0)
13502 return false;
13503
13504 /* XScale has load-store double instructions, but they have stricter
13505 alignment requirements than load-store multiple, so we cannot
13506 use them.
13507
13508 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13509 the pipeline until completion.
13510
13511 NREGS CYCLES
13512 1 3
13513 2 4
13514 3 5
13515 4 6
13516
13517 An ldr instruction takes 1-3 cycles, but does not block the
13518 pipeline.
13519
13520 NREGS CYCLES
13521 1 1-3
13522 2 2-6
13523 3 3-9
13524 4 4-12
13525
13526 Best case ldr will always win. However, the more ldr instructions
13527 we issue, the less likely we are to be able to schedule them well.
13528 Using ldr instructions also increases code size.
13529
13530 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13531 for counts of 3 or 4 regs. */
13532 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13533 return false;
13534 return true;
13535 }
13536
13537 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13538 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13539 an array ORDER which describes the sequence to use when accessing the
13540 offsets that produces an ascending order. In this sequence, each
13541 offset must be larger by exactly 4 than the previous one. ORDER[0]
13542 must have been filled in with the lowest offset by the caller.
13543 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13544 we use to verify that ORDER produces an ascending order of registers.
13545 Return true if it was possible to construct such an order, false if
13546 not. */
13547
13548 static bool
13549 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13550 int *unsorted_regs)
13551 {
13552 int i;
13553 for (i = 1; i < nops; i++)
13554 {
13555 int j;
13556
13557 order[i] = order[i - 1];
13558 for (j = 0; j < nops; j++)
13559 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13560 {
13561 /* We must find exactly one offset that is higher than the
13562 previous one by 4. */
13563 if (order[i] != order[i - 1])
13564 return false;
13565 order[i] = j;
13566 }
13567 if (order[i] == order[i - 1])
13568 return false;
13569 /* The register numbers must be ascending. */
13570 if (unsorted_regs != NULL
13571 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13572 return false;
13573 }
13574 return true;
13575 }
13576
13577 /* Used to determine in a peephole whether a sequence of load
13578 instructions can be changed into a load-multiple instruction.
13579 NOPS is the number of separate load instructions we are examining. The
13580 first NOPS entries in OPERANDS are the destination registers, the
13581 next NOPS entries are memory operands. If this function is
13582 successful, *BASE is set to the common base register of the memory
13583 accesses; *LOAD_OFFSET is set to the first memory location's offset
13584 from that base register.
13585 REGS is an array filled in with the destination register numbers.
13586 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13587 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13588 the sequence of registers in REGS matches the loads from ascending memory
13589 locations, and the function verifies that the register numbers are
13590 themselves ascending. If CHECK_REGS is false, the register numbers
13591 are stored in the order they are found in the operands. */
13592 static int
13593 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13594 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13595 {
13596 int unsorted_regs[MAX_LDM_STM_OPS];
13597 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13598 int order[MAX_LDM_STM_OPS];
13599 rtx base_reg_rtx = NULL;
13600 int base_reg = -1;
13601 int i, ldm_case;
13602
13603 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13604 easily extended if required. */
13605 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13606
13607 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13608
13609 /* Loop over the operands and check that the memory references are
13610 suitable (i.e. immediate offsets from the same base register). At
13611 the same time, extract the target register, and the memory
13612 offsets. */
13613 for (i = 0; i < nops; i++)
13614 {
13615 rtx reg;
13616 rtx offset;
13617
13618 /* Convert a subreg of a mem into the mem itself. */
13619 if (GET_CODE (operands[nops + i]) == SUBREG)
13620 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13621
13622 gcc_assert (MEM_P (operands[nops + i]));
13623
13624 /* Don't reorder volatile memory references; it doesn't seem worth
13625 looking for the case where the order is ok anyway. */
13626 if (MEM_VOLATILE_P (operands[nops + i]))
13627 return 0;
13628
13629 offset = const0_rtx;
13630
13631 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13632 || (GET_CODE (reg) == SUBREG
13633 && REG_P (reg = SUBREG_REG (reg))))
13634 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13635 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13636 || (GET_CODE (reg) == SUBREG
13637 && REG_P (reg = SUBREG_REG (reg))))
13638 && (CONST_INT_P (offset
13639 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13640 {
13641 if (i == 0)
13642 {
13643 base_reg = REGNO (reg);
13644 base_reg_rtx = reg;
13645 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13646 return 0;
13647 }
13648 else if (base_reg != (int) REGNO (reg))
13649 /* Not addressed from the same base register. */
13650 return 0;
13651
13652 unsorted_regs[i] = (REG_P (operands[i])
13653 ? REGNO (operands[i])
13654 : REGNO (SUBREG_REG (operands[i])));
13655
13656 /* If it isn't an integer register, or if it overwrites the
13657 base register but isn't the last insn in the list, then
13658 we can't do this. */
13659 if (unsorted_regs[i] < 0
13660 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13661 || unsorted_regs[i] > 14
13662 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13663 return 0;
13664
13665 /* Don't allow SP to be loaded unless it is also the base
13666 register. It guarantees that SP is reset correctly when
13667 an LDM instruction is interrupted. Otherwise, we might
13668 end up with a corrupt stack. */
13669 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13670 return 0;
13671
13672 unsorted_offsets[i] = INTVAL (offset);
13673 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13674 order[0] = i;
13675 }
13676 else
13677 /* Not a suitable memory address. */
13678 return 0;
13679 }
13680
13681 /* All the useful information has now been extracted from the
13682 operands into unsorted_regs and unsorted_offsets; additionally,
13683 order[0] has been set to the lowest offset in the list. Sort
13684 the offsets into order, verifying that they are adjacent, and
13685 check that the register numbers are ascending. */
13686 if (!compute_offset_order (nops, unsorted_offsets, order,
13687 check_regs ? unsorted_regs : NULL))
13688 return 0;
13689
13690 if (saved_order)
13691 memcpy (saved_order, order, sizeof order);
13692
13693 if (base)
13694 {
13695 *base = base_reg;
13696
13697 for (i = 0; i < nops; i++)
13698 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13699
13700 *load_offset = unsorted_offsets[order[0]];
13701 }
13702
13703 if (TARGET_THUMB1
13704 && !peep2_reg_dead_p (nops, base_reg_rtx))
13705 return 0;
13706
13707 if (unsorted_offsets[order[0]] == 0)
13708 ldm_case = 1; /* ldmia */
13709 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13710 ldm_case = 2; /* ldmib */
13711 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13712 ldm_case = 3; /* ldmda */
13713 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13714 ldm_case = 4; /* ldmdb */
13715 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13716 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13717 ldm_case = 5;
13718 else
13719 return 0;
13720
13721 if (!multiple_operation_profitable_p (false, nops,
13722 ldm_case == 5
13723 ? unsorted_offsets[order[0]] : 0))
13724 return 0;
13725
13726 return ldm_case;
13727 }
13728
13729 /* Used to determine in a peephole whether a sequence of store instructions can
13730 be changed into a store-multiple instruction.
13731 NOPS is the number of separate store instructions we are examining.
13732 NOPS_TOTAL is the total number of instructions recognized by the peephole
13733 pattern.
13734 The first NOPS entries in OPERANDS are the source registers, the next
13735 NOPS entries are memory operands. If this function is successful, *BASE is
13736 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13737 to the first memory location's offset from that base register. REGS is an
13738 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13739 likewise filled with the corresponding rtx's.
13740 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13741 numbers to an ascending order of stores.
13742 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13743 from ascending memory locations, and the function verifies that the register
13744 numbers are themselves ascending. If CHECK_REGS is false, the register
13745 numbers are stored in the order they are found in the operands. */
13746 static int
13747 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13748 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13749 HOST_WIDE_INT *load_offset, bool check_regs)
13750 {
13751 int unsorted_regs[MAX_LDM_STM_OPS];
13752 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13753 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13754 int order[MAX_LDM_STM_OPS];
13755 int base_reg = -1;
13756 rtx base_reg_rtx = NULL;
13757 int i, stm_case;
13758
13759 /* Write back of base register is currently only supported for Thumb 1. */
13760 int base_writeback = TARGET_THUMB1;
13761
13762 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13763 easily extended if required. */
13764 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13765
13766 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13767
13768 /* Loop over the operands and check that the memory references are
13769 suitable (i.e. immediate offsets from the same base register). At
13770 the same time, extract the target register, and the memory
13771 offsets. */
13772 for (i = 0; i < nops; i++)
13773 {
13774 rtx reg;
13775 rtx offset;
13776
13777 /* Convert a subreg of a mem into the mem itself. */
13778 if (GET_CODE (operands[nops + i]) == SUBREG)
13779 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13780
13781 gcc_assert (MEM_P (operands[nops + i]));
13782
13783 /* Don't reorder volatile memory references; it doesn't seem worth
13784 looking for the case where the order is ok anyway. */
13785 if (MEM_VOLATILE_P (operands[nops + i]))
13786 return 0;
13787
13788 offset = const0_rtx;
13789
13790 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13791 || (GET_CODE (reg) == SUBREG
13792 && REG_P (reg = SUBREG_REG (reg))))
13793 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13794 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13795 || (GET_CODE (reg) == SUBREG
13796 && REG_P (reg = SUBREG_REG (reg))))
13797 && (CONST_INT_P (offset
13798 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13799 {
13800 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13801 ? operands[i] : SUBREG_REG (operands[i]));
13802 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13803
13804 if (i == 0)
13805 {
13806 base_reg = REGNO (reg);
13807 base_reg_rtx = reg;
13808 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13809 return 0;
13810 }
13811 else if (base_reg != (int) REGNO (reg))
13812 /* Not addressed from the same base register. */
13813 return 0;
13814
13815 /* If it isn't an integer register, then we can't do this. */
13816 if (unsorted_regs[i] < 0
13817 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13818 /* The effects are unpredictable if the base register is
13819 both updated and stored. */
13820 || (base_writeback && unsorted_regs[i] == base_reg)
13821 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13822 || unsorted_regs[i] > 14)
13823 return 0;
13824
13825 unsorted_offsets[i] = INTVAL (offset);
13826 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13827 order[0] = i;
13828 }
13829 else
13830 /* Not a suitable memory address. */
13831 return 0;
13832 }
13833
13834 /* All the useful information has now been extracted from the
13835 operands into unsorted_regs and unsorted_offsets; additionally,
13836 order[0] has been set to the lowest offset in the list. Sort
13837 the offsets into order, verifying that they are adjacent, and
13838 check that the register numbers are ascending. */
13839 if (!compute_offset_order (nops, unsorted_offsets, order,
13840 check_regs ? unsorted_regs : NULL))
13841 return 0;
13842
13843 if (saved_order)
13844 memcpy (saved_order, order, sizeof order);
13845
13846 if (base)
13847 {
13848 *base = base_reg;
13849
13850 for (i = 0; i < nops; i++)
13851 {
13852 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13853 if (reg_rtxs)
13854 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13855 }
13856
13857 *load_offset = unsorted_offsets[order[0]];
13858 }
13859
13860 if (TARGET_THUMB1
13861 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13862 return 0;
13863
13864 if (unsorted_offsets[order[0]] == 0)
13865 stm_case = 1; /* stmia */
13866 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13867 stm_case = 2; /* stmib */
13868 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13869 stm_case = 3; /* stmda */
13870 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13871 stm_case = 4; /* stmdb */
13872 else
13873 return 0;
13874
13875 if (!multiple_operation_profitable_p (false, nops, 0))
13876 return 0;
13877
13878 return stm_case;
13879 }
13880 \f
13881 /* Routines for use in generating RTL. */
13882
13883 /* Generate a load-multiple instruction. COUNT is the number of loads in
13884 the instruction; REGS and MEMS are arrays containing the operands.
13885 BASEREG is the base register to be used in addressing the memory operands.
13886 WBACK_OFFSET is nonzero if the instruction should update the base
13887 register. */
13888
13889 static rtx
13890 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13891 HOST_WIDE_INT wback_offset)
13892 {
13893 int i = 0, j;
13894 rtx result;
13895
13896 if (!multiple_operation_profitable_p (false, count, 0))
13897 {
13898 rtx seq;
13899
13900 start_sequence ();
13901
13902 for (i = 0; i < count; i++)
13903 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13904
13905 if (wback_offset != 0)
13906 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13907
13908 seq = get_insns ();
13909 end_sequence ();
13910
13911 return seq;
13912 }
13913
13914 result = gen_rtx_PARALLEL (VOIDmode,
13915 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13916 if (wback_offset != 0)
13917 {
13918 XVECEXP (result, 0, 0)
13919 = gen_rtx_SET (VOIDmode, basereg,
13920 plus_constant (Pmode, basereg, wback_offset));
13921 i = 1;
13922 count++;
13923 }
13924
13925 for (j = 0; i < count; i++, j++)
13926 XVECEXP (result, 0, i)
13927 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13928
13929 return result;
13930 }
13931
13932 /* Generate a store-multiple instruction. COUNT is the number of stores in
13933 the instruction; REGS and MEMS are arrays containing the operands.
13934 BASEREG is the base register to be used in addressing the memory operands.
13935 WBACK_OFFSET is nonzero if the instruction should update the base
13936 register. */
13937
13938 static rtx
13939 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13940 HOST_WIDE_INT wback_offset)
13941 {
13942 int i = 0, j;
13943 rtx result;
13944
13945 if (GET_CODE (basereg) == PLUS)
13946 basereg = XEXP (basereg, 0);
13947
13948 if (!multiple_operation_profitable_p (false, count, 0))
13949 {
13950 rtx seq;
13951
13952 start_sequence ();
13953
13954 for (i = 0; i < count; i++)
13955 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13956
13957 if (wback_offset != 0)
13958 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13959
13960 seq = get_insns ();
13961 end_sequence ();
13962
13963 return seq;
13964 }
13965
13966 result = gen_rtx_PARALLEL (VOIDmode,
13967 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13968 if (wback_offset != 0)
13969 {
13970 XVECEXP (result, 0, 0)
13971 = gen_rtx_SET (VOIDmode, basereg,
13972 plus_constant (Pmode, basereg, wback_offset));
13973 i = 1;
13974 count++;
13975 }
13976
13977 for (j = 0; i < count; i++, j++)
13978 XVECEXP (result, 0, i)
13979 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13980
13981 return result;
13982 }
13983
13984 /* Generate either a load-multiple or a store-multiple instruction. This
13985 function can be used in situations where we can start with a single MEM
13986 rtx and adjust its address upwards.
13987 COUNT is the number of operations in the instruction, not counting a
13988 possible update of the base register. REGS is an array containing the
13989 register operands.
13990 BASEREG is the base register to be used in addressing the memory operands,
13991 which are constructed from BASEMEM.
13992 WRITE_BACK specifies whether the generated instruction should include an
13993 update of the base register.
13994 OFFSETP is used to pass an offset to and from this function; this offset
13995 is not used when constructing the address (instead BASEMEM should have an
13996 appropriate offset in its address), it is used only for setting
13997 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13998
13999 static rtx
14000 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14001 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14002 {
14003 rtx mems[MAX_LDM_STM_OPS];
14004 HOST_WIDE_INT offset = *offsetp;
14005 int i;
14006
14007 gcc_assert (count <= MAX_LDM_STM_OPS);
14008
14009 if (GET_CODE (basereg) == PLUS)
14010 basereg = XEXP (basereg, 0);
14011
14012 for (i = 0; i < count; i++)
14013 {
14014 rtx addr = plus_constant (Pmode, basereg, i * 4);
14015 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14016 offset += 4;
14017 }
14018
14019 if (write_back)
14020 *offsetp = offset;
14021
14022 if (is_load)
14023 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14024 write_back ? 4 * count : 0);
14025 else
14026 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14027 write_back ? 4 * count : 0);
14028 }
14029
14030 rtx
14031 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14032 rtx basemem, HOST_WIDE_INT *offsetp)
14033 {
14034 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14035 offsetp);
14036 }
14037
14038 rtx
14039 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14040 rtx basemem, HOST_WIDE_INT *offsetp)
14041 {
14042 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14043 offsetp);
14044 }
14045
14046 /* Called from a peephole2 expander to turn a sequence of loads into an
14047 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14048 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14049 is true if we can reorder the registers because they are used commutatively
14050 subsequently.
14051 Returns true iff we could generate a new instruction. */
14052
14053 bool
14054 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14055 {
14056 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14057 rtx mems[MAX_LDM_STM_OPS];
14058 int i, j, base_reg;
14059 rtx base_reg_rtx;
14060 HOST_WIDE_INT offset;
14061 int write_back = FALSE;
14062 int ldm_case;
14063 rtx addr;
14064
14065 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14066 &base_reg, &offset, !sort_regs);
14067
14068 if (ldm_case == 0)
14069 return false;
14070
14071 if (sort_regs)
14072 for (i = 0; i < nops - 1; i++)
14073 for (j = i + 1; j < nops; j++)
14074 if (regs[i] > regs[j])
14075 {
14076 int t = regs[i];
14077 regs[i] = regs[j];
14078 regs[j] = t;
14079 }
14080 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14081
14082 if (TARGET_THUMB1)
14083 {
14084 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14085 gcc_assert (ldm_case == 1 || ldm_case == 5);
14086 write_back = TRUE;
14087 }
14088
14089 if (ldm_case == 5)
14090 {
14091 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14092 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14093 offset = 0;
14094 if (!TARGET_THUMB1)
14095 {
14096 base_reg = regs[0];
14097 base_reg_rtx = newbase;
14098 }
14099 }
14100
14101 for (i = 0; i < nops; i++)
14102 {
14103 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14104 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14105 SImode, addr, 0);
14106 }
14107 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14108 write_back ? offset + i * 4 : 0));
14109 return true;
14110 }
14111
14112 /* Called from a peephole2 expander to turn a sequence of stores into an
14113 STM instruction. OPERANDS are the operands found by the peephole matcher;
14114 NOPS indicates how many separate stores we are trying to combine.
14115 Returns true iff we could generate a new instruction. */
14116
14117 bool
14118 gen_stm_seq (rtx *operands, int nops)
14119 {
14120 int i;
14121 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14122 rtx mems[MAX_LDM_STM_OPS];
14123 int base_reg;
14124 rtx base_reg_rtx;
14125 HOST_WIDE_INT offset;
14126 int write_back = FALSE;
14127 int stm_case;
14128 rtx addr;
14129 bool base_reg_dies;
14130
14131 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14132 mem_order, &base_reg, &offset, true);
14133
14134 if (stm_case == 0)
14135 return false;
14136
14137 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14138
14139 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14140 if (TARGET_THUMB1)
14141 {
14142 gcc_assert (base_reg_dies);
14143 write_back = TRUE;
14144 }
14145
14146 if (stm_case == 5)
14147 {
14148 gcc_assert (base_reg_dies);
14149 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14150 offset = 0;
14151 }
14152
14153 addr = plus_constant (Pmode, base_reg_rtx, offset);
14154
14155 for (i = 0; i < nops; i++)
14156 {
14157 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14158 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14159 SImode, addr, 0);
14160 }
14161 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14162 write_back ? offset + i * 4 : 0));
14163 return true;
14164 }
14165
14166 /* Called from a peephole2 expander to turn a sequence of stores that are
14167 preceded by constant loads into an STM instruction. OPERANDS are the
14168 operands found by the peephole matcher; NOPS indicates how many
14169 separate stores we are trying to combine; there are 2 * NOPS
14170 instructions in the peephole.
14171 Returns true iff we could generate a new instruction. */
14172
14173 bool
14174 gen_const_stm_seq (rtx *operands, int nops)
14175 {
14176 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14177 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14178 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14179 rtx mems[MAX_LDM_STM_OPS];
14180 int base_reg;
14181 rtx base_reg_rtx;
14182 HOST_WIDE_INT offset;
14183 int write_back = FALSE;
14184 int stm_case;
14185 rtx addr;
14186 bool base_reg_dies;
14187 int i, j;
14188 HARD_REG_SET allocated;
14189
14190 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14191 mem_order, &base_reg, &offset, false);
14192
14193 if (stm_case == 0)
14194 return false;
14195
14196 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14197
14198 /* If the same register is used more than once, try to find a free
14199 register. */
14200 CLEAR_HARD_REG_SET (allocated);
14201 for (i = 0; i < nops; i++)
14202 {
14203 for (j = i + 1; j < nops; j++)
14204 if (regs[i] == regs[j])
14205 {
14206 rtx t = peep2_find_free_register (0, nops * 2,
14207 TARGET_THUMB1 ? "l" : "r",
14208 SImode, &allocated);
14209 if (t == NULL_RTX)
14210 return false;
14211 reg_rtxs[i] = t;
14212 regs[i] = REGNO (t);
14213 }
14214 }
14215
14216 /* Compute an ordering that maps the register numbers to an ascending
14217 sequence. */
14218 reg_order[0] = 0;
14219 for (i = 0; i < nops; i++)
14220 if (regs[i] < regs[reg_order[0]])
14221 reg_order[0] = i;
14222
14223 for (i = 1; i < nops; i++)
14224 {
14225 int this_order = reg_order[i - 1];
14226 for (j = 0; j < nops; j++)
14227 if (regs[j] > regs[reg_order[i - 1]]
14228 && (this_order == reg_order[i - 1]
14229 || regs[j] < regs[this_order]))
14230 this_order = j;
14231 reg_order[i] = this_order;
14232 }
14233
14234 /* Ensure that registers that must be live after the instruction end
14235 up with the correct value. */
14236 for (i = 0; i < nops; i++)
14237 {
14238 int this_order = reg_order[i];
14239 if ((this_order != mem_order[i]
14240 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14241 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14242 return false;
14243 }
14244
14245 /* Load the constants. */
14246 for (i = 0; i < nops; i++)
14247 {
14248 rtx op = operands[2 * nops + mem_order[i]];
14249 sorted_regs[i] = regs[reg_order[i]];
14250 emit_move_insn (reg_rtxs[reg_order[i]], op);
14251 }
14252
14253 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14254
14255 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14256 if (TARGET_THUMB1)
14257 {
14258 gcc_assert (base_reg_dies);
14259 write_back = TRUE;
14260 }
14261
14262 if (stm_case == 5)
14263 {
14264 gcc_assert (base_reg_dies);
14265 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14266 offset = 0;
14267 }
14268
14269 addr = plus_constant (Pmode, base_reg_rtx, offset);
14270
14271 for (i = 0; i < nops; i++)
14272 {
14273 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14274 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14275 SImode, addr, 0);
14276 }
14277 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14278 write_back ? offset + i * 4 : 0));
14279 return true;
14280 }
14281
14282 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14283 unaligned copies on processors which support unaligned semantics for those
14284 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14285 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14286 An interleave factor of 1 (the minimum) will perform no interleaving.
14287 Load/store multiple are used for aligned addresses where possible. */
14288
14289 static void
14290 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14291 HOST_WIDE_INT length,
14292 unsigned int interleave_factor)
14293 {
14294 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14295 int *regnos = XALLOCAVEC (int, interleave_factor);
14296 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14297 HOST_WIDE_INT i, j;
14298 HOST_WIDE_INT remaining = length, words;
14299 rtx halfword_tmp = NULL, byte_tmp = NULL;
14300 rtx dst, src;
14301 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14302 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14303 HOST_WIDE_INT srcoffset, dstoffset;
14304 HOST_WIDE_INT src_autoinc, dst_autoinc;
14305 rtx mem, addr;
14306
14307 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14308
14309 /* Use hard registers if we have aligned source or destination so we can use
14310 load/store multiple with contiguous registers. */
14311 if (dst_aligned || src_aligned)
14312 for (i = 0; i < interleave_factor; i++)
14313 regs[i] = gen_rtx_REG (SImode, i);
14314 else
14315 for (i = 0; i < interleave_factor; i++)
14316 regs[i] = gen_reg_rtx (SImode);
14317
14318 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14319 src = copy_addr_to_reg (XEXP (srcbase, 0));
14320
14321 srcoffset = dstoffset = 0;
14322
14323 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14324 For copying the last bytes we want to subtract this offset again. */
14325 src_autoinc = dst_autoinc = 0;
14326
14327 for (i = 0; i < interleave_factor; i++)
14328 regnos[i] = i;
14329
14330 /* Copy BLOCK_SIZE_BYTES chunks. */
14331
14332 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14333 {
14334 /* Load words. */
14335 if (src_aligned && interleave_factor > 1)
14336 {
14337 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14338 TRUE, srcbase, &srcoffset));
14339 src_autoinc += UNITS_PER_WORD * interleave_factor;
14340 }
14341 else
14342 {
14343 for (j = 0; j < interleave_factor; j++)
14344 {
14345 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14346 - src_autoinc));
14347 mem = adjust_automodify_address (srcbase, SImode, addr,
14348 srcoffset + j * UNITS_PER_WORD);
14349 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14350 }
14351 srcoffset += block_size_bytes;
14352 }
14353
14354 /* Store words. */
14355 if (dst_aligned && interleave_factor > 1)
14356 {
14357 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14358 TRUE, dstbase, &dstoffset));
14359 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14360 }
14361 else
14362 {
14363 for (j = 0; j < interleave_factor; j++)
14364 {
14365 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14366 - dst_autoinc));
14367 mem = adjust_automodify_address (dstbase, SImode, addr,
14368 dstoffset + j * UNITS_PER_WORD);
14369 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14370 }
14371 dstoffset += block_size_bytes;
14372 }
14373
14374 remaining -= block_size_bytes;
14375 }
14376
14377 /* Copy any whole words left (note these aren't interleaved with any
14378 subsequent halfword/byte load/stores in the interests of simplicity). */
14379
14380 words = remaining / UNITS_PER_WORD;
14381
14382 gcc_assert (words < interleave_factor);
14383
14384 if (src_aligned && words > 1)
14385 {
14386 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14387 &srcoffset));
14388 src_autoinc += UNITS_PER_WORD * words;
14389 }
14390 else
14391 {
14392 for (j = 0; j < words; j++)
14393 {
14394 addr = plus_constant (Pmode, src,
14395 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14396 mem = adjust_automodify_address (srcbase, SImode, addr,
14397 srcoffset + j * UNITS_PER_WORD);
14398 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14399 }
14400 srcoffset += words * UNITS_PER_WORD;
14401 }
14402
14403 if (dst_aligned && words > 1)
14404 {
14405 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14406 &dstoffset));
14407 dst_autoinc += words * UNITS_PER_WORD;
14408 }
14409 else
14410 {
14411 for (j = 0; j < words; j++)
14412 {
14413 addr = plus_constant (Pmode, dst,
14414 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14415 mem = adjust_automodify_address (dstbase, SImode, addr,
14416 dstoffset + j * UNITS_PER_WORD);
14417 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14418 }
14419 dstoffset += words * UNITS_PER_WORD;
14420 }
14421
14422 remaining -= words * UNITS_PER_WORD;
14423
14424 gcc_assert (remaining < 4);
14425
14426 /* Copy a halfword if necessary. */
14427
14428 if (remaining >= 2)
14429 {
14430 halfword_tmp = gen_reg_rtx (SImode);
14431
14432 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14433 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14434 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14435
14436 /* Either write out immediately, or delay until we've loaded the last
14437 byte, depending on interleave factor. */
14438 if (interleave_factor == 1)
14439 {
14440 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14441 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14442 emit_insn (gen_unaligned_storehi (mem,
14443 gen_lowpart (HImode, halfword_tmp)));
14444 halfword_tmp = NULL;
14445 dstoffset += 2;
14446 }
14447
14448 remaining -= 2;
14449 srcoffset += 2;
14450 }
14451
14452 gcc_assert (remaining < 2);
14453
14454 /* Copy last byte. */
14455
14456 if ((remaining & 1) != 0)
14457 {
14458 byte_tmp = gen_reg_rtx (SImode);
14459
14460 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14461 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14462 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14463
14464 if (interleave_factor == 1)
14465 {
14466 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14467 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14468 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14469 byte_tmp = NULL;
14470 dstoffset++;
14471 }
14472
14473 remaining--;
14474 srcoffset++;
14475 }
14476
14477 /* Store last halfword if we haven't done so already. */
14478
14479 if (halfword_tmp)
14480 {
14481 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14482 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14483 emit_insn (gen_unaligned_storehi (mem,
14484 gen_lowpart (HImode, halfword_tmp)));
14485 dstoffset += 2;
14486 }
14487
14488 /* Likewise for last byte. */
14489
14490 if (byte_tmp)
14491 {
14492 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14493 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14494 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14495 dstoffset++;
14496 }
14497
14498 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14499 }
14500
14501 /* From mips_adjust_block_mem:
14502
14503 Helper function for doing a loop-based block operation on memory
14504 reference MEM. Each iteration of the loop will operate on LENGTH
14505 bytes of MEM.
14506
14507 Create a new base register for use within the loop and point it to
14508 the start of MEM. Create a new memory reference that uses this
14509 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14510
14511 static void
14512 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14513 rtx *loop_mem)
14514 {
14515 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14516
14517 /* Although the new mem does not refer to a known location,
14518 it does keep up to LENGTH bytes of alignment. */
14519 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14520 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14521 }
14522
14523 /* From mips_block_move_loop:
14524
14525 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14526 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14527 the memory regions do not overlap. */
14528
14529 static void
14530 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14531 unsigned int interleave_factor,
14532 HOST_WIDE_INT bytes_per_iter)
14533 {
14534 rtx src_reg, dest_reg, final_src, test;
14535 HOST_WIDE_INT leftover;
14536
14537 leftover = length % bytes_per_iter;
14538 length -= leftover;
14539
14540 /* Create registers and memory references for use within the loop. */
14541 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14542 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14543
14544 /* Calculate the value that SRC_REG should have after the last iteration of
14545 the loop. */
14546 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14547 0, 0, OPTAB_WIDEN);
14548
14549 /* Emit the start of the loop. */
14550 rtx_code_label *label = gen_label_rtx ();
14551 emit_label (label);
14552
14553 /* Emit the loop body. */
14554 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14555 interleave_factor);
14556
14557 /* Move on to the next block. */
14558 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14559 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14560
14561 /* Emit the loop condition. */
14562 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14563 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14564
14565 /* Mop up any left-over bytes. */
14566 if (leftover)
14567 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14568 }
14569
14570 /* Emit a block move when either the source or destination is unaligned (not
14571 aligned to a four-byte boundary). This may need further tuning depending on
14572 core type, optimize_size setting, etc. */
14573
14574 static int
14575 arm_movmemqi_unaligned (rtx *operands)
14576 {
14577 HOST_WIDE_INT length = INTVAL (operands[2]);
14578
14579 if (optimize_size)
14580 {
14581 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14582 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14583 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14584 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14585 or dst_aligned though: allow more interleaving in those cases since the
14586 resulting code can be smaller. */
14587 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14588 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14589
14590 if (length > 12)
14591 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14592 interleave_factor, bytes_per_iter);
14593 else
14594 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14595 interleave_factor);
14596 }
14597 else
14598 {
14599 /* Note that the loop created by arm_block_move_unaligned_loop may be
14600 subject to loop unrolling, which makes tuning this condition a little
14601 redundant. */
14602 if (length > 32)
14603 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14604 else
14605 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14606 }
14607
14608 return 1;
14609 }
14610
14611 int
14612 arm_gen_movmemqi (rtx *operands)
14613 {
14614 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14615 HOST_WIDE_INT srcoffset, dstoffset;
14616 int i;
14617 rtx src, dst, srcbase, dstbase;
14618 rtx part_bytes_reg = NULL;
14619 rtx mem;
14620
14621 if (!CONST_INT_P (operands[2])
14622 || !CONST_INT_P (operands[3])
14623 || INTVAL (operands[2]) > 64)
14624 return 0;
14625
14626 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14627 return arm_movmemqi_unaligned (operands);
14628
14629 if (INTVAL (operands[3]) & 3)
14630 return 0;
14631
14632 dstbase = operands[0];
14633 srcbase = operands[1];
14634
14635 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14636 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14637
14638 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14639 out_words_to_go = INTVAL (operands[2]) / 4;
14640 last_bytes = INTVAL (operands[2]) & 3;
14641 dstoffset = srcoffset = 0;
14642
14643 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14644 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14645
14646 for (i = 0; in_words_to_go >= 2; i+=4)
14647 {
14648 if (in_words_to_go > 4)
14649 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14650 TRUE, srcbase, &srcoffset));
14651 else
14652 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14653 src, FALSE, srcbase,
14654 &srcoffset));
14655
14656 if (out_words_to_go)
14657 {
14658 if (out_words_to_go > 4)
14659 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14660 TRUE, dstbase, &dstoffset));
14661 else if (out_words_to_go != 1)
14662 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14663 out_words_to_go, dst,
14664 (last_bytes == 0
14665 ? FALSE : TRUE),
14666 dstbase, &dstoffset));
14667 else
14668 {
14669 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14670 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14671 if (last_bytes != 0)
14672 {
14673 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14674 dstoffset += 4;
14675 }
14676 }
14677 }
14678
14679 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14680 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14681 }
14682
14683 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14684 if (out_words_to_go)
14685 {
14686 rtx sreg;
14687
14688 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14689 sreg = copy_to_reg (mem);
14690
14691 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14692 emit_move_insn (mem, sreg);
14693 in_words_to_go--;
14694
14695 gcc_assert (!in_words_to_go); /* Sanity check */
14696 }
14697
14698 if (in_words_to_go)
14699 {
14700 gcc_assert (in_words_to_go > 0);
14701
14702 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14703 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14704 }
14705
14706 gcc_assert (!last_bytes || part_bytes_reg);
14707
14708 if (BYTES_BIG_ENDIAN && last_bytes)
14709 {
14710 rtx tmp = gen_reg_rtx (SImode);
14711
14712 /* The bytes we want are in the top end of the word. */
14713 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14714 GEN_INT (8 * (4 - last_bytes))));
14715 part_bytes_reg = tmp;
14716
14717 while (last_bytes)
14718 {
14719 mem = adjust_automodify_address (dstbase, QImode,
14720 plus_constant (Pmode, dst,
14721 last_bytes - 1),
14722 dstoffset + last_bytes - 1);
14723 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14724
14725 if (--last_bytes)
14726 {
14727 tmp = gen_reg_rtx (SImode);
14728 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14729 part_bytes_reg = tmp;
14730 }
14731 }
14732
14733 }
14734 else
14735 {
14736 if (last_bytes > 1)
14737 {
14738 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14739 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14740 last_bytes -= 2;
14741 if (last_bytes)
14742 {
14743 rtx tmp = gen_reg_rtx (SImode);
14744 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14745 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14746 part_bytes_reg = tmp;
14747 dstoffset += 2;
14748 }
14749 }
14750
14751 if (last_bytes)
14752 {
14753 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14754 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14755 }
14756 }
14757
14758 return 1;
14759 }
14760
14761 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14762 by mode size. */
14763 inline static rtx
14764 next_consecutive_mem (rtx mem)
14765 {
14766 machine_mode mode = GET_MODE (mem);
14767 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14768 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14769
14770 return adjust_automodify_address (mem, mode, addr, offset);
14771 }
14772
14773 /* Copy using LDRD/STRD instructions whenever possible.
14774 Returns true upon success. */
14775 bool
14776 gen_movmem_ldrd_strd (rtx *operands)
14777 {
14778 unsigned HOST_WIDE_INT len;
14779 HOST_WIDE_INT align;
14780 rtx src, dst, base;
14781 rtx reg0;
14782 bool src_aligned, dst_aligned;
14783 bool src_volatile, dst_volatile;
14784
14785 gcc_assert (CONST_INT_P (operands[2]));
14786 gcc_assert (CONST_INT_P (operands[3]));
14787
14788 len = UINTVAL (operands[2]);
14789 if (len > 64)
14790 return false;
14791
14792 /* Maximum alignment we can assume for both src and dst buffers. */
14793 align = INTVAL (operands[3]);
14794
14795 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14796 return false;
14797
14798 /* Place src and dst addresses in registers
14799 and update the corresponding mem rtx. */
14800 dst = operands[0];
14801 dst_volatile = MEM_VOLATILE_P (dst);
14802 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14803 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14804 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14805
14806 src = operands[1];
14807 src_volatile = MEM_VOLATILE_P (src);
14808 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14809 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14810 src = adjust_automodify_address (src, VOIDmode, base, 0);
14811
14812 if (!unaligned_access && !(src_aligned && dst_aligned))
14813 return false;
14814
14815 if (src_volatile || dst_volatile)
14816 return false;
14817
14818 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14819 if (!(dst_aligned || src_aligned))
14820 return arm_gen_movmemqi (operands);
14821
14822 src = adjust_address (src, DImode, 0);
14823 dst = adjust_address (dst, DImode, 0);
14824 while (len >= 8)
14825 {
14826 len -= 8;
14827 reg0 = gen_reg_rtx (DImode);
14828 if (src_aligned)
14829 emit_move_insn (reg0, src);
14830 else
14831 emit_insn (gen_unaligned_loaddi (reg0, src));
14832
14833 if (dst_aligned)
14834 emit_move_insn (dst, reg0);
14835 else
14836 emit_insn (gen_unaligned_storedi (dst, reg0));
14837
14838 src = next_consecutive_mem (src);
14839 dst = next_consecutive_mem (dst);
14840 }
14841
14842 gcc_assert (len < 8);
14843 if (len >= 4)
14844 {
14845 /* More than a word but less than a double-word to copy. Copy a word. */
14846 reg0 = gen_reg_rtx (SImode);
14847 src = adjust_address (src, SImode, 0);
14848 dst = adjust_address (dst, SImode, 0);
14849 if (src_aligned)
14850 emit_move_insn (reg0, src);
14851 else
14852 emit_insn (gen_unaligned_loadsi (reg0, src));
14853
14854 if (dst_aligned)
14855 emit_move_insn (dst, reg0);
14856 else
14857 emit_insn (gen_unaligned_storesi (dst, reg0));
14858
14859 src = next_consecutive_mem (src);
14860 dst = next_consecutive_mem (dst);
14861 len -= 4;
14862 }
14863
14864 if (len == 0)
14865 return true;
14866
14867 /* Copy the remaining bytes. */
14868 if (len >= 2)
14869 {
14870 dst = adjust_address (dst, HImode, 0);
14871 src = adjust_address (src, HImode, 0);
14872 reg0 = gen_reg_rtx (SImode);
14873 if (src_aligned)
14874 emit_insn (gen_zero_extendhisi2 (reg0, src));
14875 else
14876 emit_insn (gen_unaligned_loadhiu (reg0, src));
14877
14878 if (dst_aligned)
14879 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14880 else
14881 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14882
14883 src = next_consecutive_mem (src);
14884 dst = next_consecutive_mem (dst);
14885 if (len == 2)
14886 return true;
14887 }
14888
14889 dst = adjust_address (dst, QImode, 0);
14890 src = adjust_address (src, QImode, 0);
14891 reg0 = gen_reg_rtx (QImode);
14892 emit_move_insn (reg0, src);
14893 emit_move_insn (dst, reg0);
14894 return true;
14895 }
14896
14897 /* Select a dominance comparison mode if possible for a test of the general
14898 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14899 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14900 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14901 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14902 In all cases OP will be either EQ or NE, but we don't need to know which
14903 here. If we are unable to support a dominance comparison we return
14904 CC mode. This will then fail to match for the RTL expressions that
14905 generate this call. */
14906 machine_mode
14907 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14908 {
14909 enum rtx_code cond1, cond2;
14910 int swapped = 0;
14911
14912 /* Currently we will probably get the wrong result if the individual
14913 comparisons are not simple. This also ensures that it is safe to
14914 reverse a comparison if necessary. */
14915 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14916 != CCmode)
14917 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14918 != CCmode))
14919 return CCmode;
14920
14921 /* The if_then_else variant of this tests the second condition if the
14922 first passes, but is true if the first fails. Reverse the first
14923 condition to get a true "inclusive-or" expression. */
14924 if (cond_or == DOM_CC_NX_OR_Y)
14925 cond1 = reverse_condition (cond1);
14926
14927 /* If the comparisons are not equal, and one doesn't dominate the other,
14928 then we can't do this. */
14929 if (cond1 != cond2
14930 && !comparison_dominates_p (cond1, cond2)
14931 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14932 return CCmode;
14933
14934 if (swapped)
14935 std::swap (cond1, cond2);
14936
14937 switch (cond1)
14938 {
14939 case EQ:
14940 if (cond_or == DOM_CC_X_AND_Y)
14941 return CC_DEQmode;
14942
14943 switch (cond2)
14944 {
14945 case EQ: return CC_DEQmode;
14946 case LE: return CC_DLEmode;
14947 case LEU: return CC_DLEUmode;
14948 case GE: return CC_DGEmode;
14949 case GEU: return CC_DGEUmode;
14950 default: gcc_unreachable ();
14951 }
14952
14953 case LT:
14954 if (cond_or == DOM_CC_X_AND_Y)
14955 return CC_DLTmode;
14956
14957 switch (cond2)
14958 {
14959 case LT:
14960 return CC_DLTmode;
14961 case LE:
14962 return CC_DLEmode;
14963 case NE:
14964 return CC_DNEmode;
14965 default:
14966 gcc_unreachable ();
14967 }
14968
14969 case GT:
14970 if (cond_or == DOM_CC_X_AND_Y)
14971 return CC_DGTmode;
14972
14973 switch (cond2)
14974 {
14975 case GT:
14976 return CC_DGTmode;
14977 case GE:
14978 return CC_DGEmode;
14979 case NE:
14980 return CC_DNEmode;
14981 default:
14982 gcc_unreachable ();
14983 }
14984
14985 case LTU:
14986 if (cond_or == DOM_CC_X_AND_Y)
14987 return CC_DLTUmode;
14988
14989 switch (cond2)
14990 {
14991 case LTU:
14992 return CC_DLTUmode;
14993 case LEU:
14994 return CC_DLEUmode;
14995 case NE:
14996 return CC_DNEmode;
14997 default:
14998 gcc_unreachable ();
14999 }
15000
15001 case GTU:
15002 if (cond_or == DOM_CC_X_AND_Y)
15003 return CC_DGTUmode;
15004
15005 switch (cond2)
15006 {
15007 case GTU:
15008 return CC_DGTUmode;
15009 case GEU:
15010 return CC_DGEUmode;
15011 case NE:
15012 return CC_DNEmode;
15013 default:
15014 gcc_unreachable ();
15015 }
15016
15017 /* The remaining cases only occur when both comparisons are the
15018 same. */
15019 case NE:
15020 gcc_assert (cond1 == cond2);
15021 return CC_DNEmode;
15022
15023 case LE:
15024 gcc_assert (cond1 == cond2);
15025 return CC_DLEmode;
15026
15027 case GE:
15028 gcc_assert (cond1 == cond2);
15029 return CC_DGEmode;
15030
15031 case LEU:
15032 gcc_assert (cond1 == cond2);
15033 return CC_DLEUmode;
15034
15035 case GEU:
15036 gcc_assert (cond1 == cond2);
15037 return CC_DGEUmode;
15038
15039 default:
15040 gcc_unreachable ();
15041 }
15042 }
15043
15044 machine_mode
15045 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15046 {
15047 /* All floating point compares return CCFP if it is an equality
15048 comparison, and CCFPE otherwise. */
15049 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15050 {
15051 switch (op)
15052 {
15053 case EQ:
15054 case NE:
15055 case UNORDERED:
15056 case ORDERED:
15057 case UNLT:
15058 case UNLE:
15059 case UNGT:
15060 case UNGE:
15061 case UNEQ:
15062 case LTGT:
15063 return CCFPmode;
15064
15065 case LT:
15066 case LE:
15067 case GT:
15068 case GE:
15069 return CCFPEmode;
15070
15071 default:
15072 gcc_unreachable ();
15073 }
15074 }
15075
15076 /* A compare with a shifted operand. Because of canonicalization, the
15077 comparison will have to be swapped when we emit the assembler. */
15078 if (GET_MODE (y) == SImode
15079 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15080 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15081 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15082 || GET_CODE (x) == ROTATERT))
15083 return CC_SWPmode;
15084
15085 /* This operation is performed swapped, but since we only rely on the Z
15086 flag we don't need an additional mode. */
15087 if (GET_MODE (y) == SImode
15088 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15089 && GET_CODE (x) == NEG
15090 && (op == EQ || op == NE))
15091 return CC_Zmode;
15092
15093 /* This is a special case that is used by combine to allow a
15094 comparison of a shifted byte load to be split into a zero-extend
15095 followed by a comparison of the shifted integer (only valid for
15096 equalities and unsigned inequalities). */
15097 if (GET_MODE (x) == SImode
15098 && GET_CODE (x) == ASHIFT
15099 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15100 && GET_CODE (XEXP (x, 0)) == SUBREG
15101 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15102 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15103 && (op == EQ || op == NE
15104 || op == GEU || op == GTU || op == LTU || op == LEU)
15105 && CONST_INT_P (y))
15106 return CC_Zmode;
15107
15108 /* A construct for a conditional compare, if the false arm contains
15109 0, then both conditions must be true, otherwise either condition
15110 must be true. Not all conditions are possible, so CCmode is
15111 returned if it can't be done. */
15112 if (GET_CODE (x) == IF_THEN_ELSE
15113 && (XEXP (x, 2) == const0_rtx
15114 || XEXP (x, 2) == const1_rtx)
15115 && COMPARISON_P (XEXP (x, 0))
15116 && COMPARISON_P (XEXP (x, 1)))
15117 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15118 INTVAL (XEXP (x, 2)));
15119
15120 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15121 if (GET_CODE (x) == AND
15122 && (op == EQ || op == NE)
15123 && COMPARISON_P (XEXP (x, 0))
15124 && COMPARISON_P (XEXP (x, 1)))
15125 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15126 DOM_CC_X_AND_Y);
15127
15128 if (GET_CODE (x) == IOR
15129 && (op == EQ || op == NE)
15130 && COMPARISON_P (XEXP (x, 0))
15131 && COMPARISON_P (XEXP (x, 1)))
15132 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15133 DOM_CC_X_OR_Y);
15134
15135 /* An operation (on Thumb) where we want to test for a single bit.
15136 This is done by shifting that bit up into the top bit of a
15137 scratch register; we can then branch on the sign bit. */
15138 if (TARGET_THUMB1
15139 && GET_MODE (x) == SImode
15140 && (op == EQ || op == NE)
15141 && GET_CODE (x) == ZERO_EXTRACT
15142 && XEXP (x, 1) == const1_rtx)
15143 return CC_Nmode;
15144
15145 /* An operation that sets the condition codes as a side-effect, the
15146 V flag is not set correctly, so we can only use comparisons where
15147 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15148 instead.) */
15149 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15150 if (GET_MODE (x) == SImode
15151 && y == const0_rtx
15152 && (op == EQ || op == NE || op == LT || op == GE)
15153 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15154 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15155 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15156 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15157 || GET_CODE (x) == LSHIFTRT
15158 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15159 || GET_CODE (x) == ROTATERT
15160 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15161 return CC_NOOVmode;
15162
15163 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15164 return CC_Zmode;
15165
15166 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15167 && GET_CODE (x) == PLUS
15168 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15169 return CC_Cmode;
15170
15171 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15172 {
15173 switch (op)
15174 {
15175 case EQ:
15176 case NE:
15177 /* A DImode comparison against zero can be implemented by
15178 or'ing the two halves together. */
15179 if (y == const0_rtx)
15180 return CC_Zmode;
15181
15182 /* We can do an equality test in three Thumb instructions. */
15183 if (!TARGET_32BIT)
15184 return CC_Zmode;
15185
15186 /* FALLTHROUGH */
15187
15188 case LTU:
15189 case LEU:
15190 case GTU:
15191 case GEU:
15192 /* DImode unsigned comparisons can be implemented by cmp +
15193 cmpeq without a scratch register. Not worth doing in
15194 Thumb-2. */
15195 if (TARGET_32BIT)
15196 return CC_CZmode;
15197
15198 /* FALLTHROUGH */
15199
15200 case LT:
15201 case LE:
15202 case GT:
15203 case GE:
15204 /* DImode signed and unsigned comparisons can be implemented
15205 by cmp + sbcs with a scratch register, but that does not
15206 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15207 gcc_assert (op != EQ && op != NE);
15208 return CC_NCVmode;
15209
15210 default:
15211 gcc_unreachable ();
15212 }
15213 }
15214
15215 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15216 return GET_MODE (x);
15217
15218 return CCmode;
15219 }
15220
15221 /* X and Y are two things to compare using CODE. Emit the compare insn and
15222 return the rtx for register 0 in the proper mode. FP means this is a
15223 floating point compare: I don't think that it is needed on the arm. */
15224 rtx
15225 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15226 {
15227 machine_mode mode;
15228 rtx cc_reg;
15229 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15230
15231 /* We might have X as a constant, Y as a register because of the predicates
15232 used for cmpdi. If so, force X to a register here. */
15233 if (dimode_comparison && !REG_P (x))
15234 x = force_reg (DImode, x);
15235
15236 mode = SELECT_CC_MODE (code, x, y);
15237 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15238
15239 if (dimode_comparison
15240 && mode != CC_CZmode)
15241 {
15242 rtx clobber, set;
15243
15244 /* To compare two non-zero values for equality, XOR them and
15245 then compare against zero. Not used for ARM mode; there
15246 CC_CZmode is cheaper. */
15247 if (mode == CC_Zmode && y != const0_rtx)
15248 {
15249 gcc_assert (!reload_completed);
15250 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15251 y = const0_rtx;
15252 }
15253
15254 /* A scratch register is required. */
15255 if (reload_completed)
15256 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15257 else
15258 scratch = gen_rtx_SCRATCH (SImode);
15259
15260 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15261 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15262 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15263 }
15264 else
15265 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15266
15267 return cc_reg;
15268 }
15269
15270 /* Generate a sequence of insns that will generate the correct return
15271 address mask depending on the physical architecture that the program
15272 is running on. */
15273 rtx
15274 arm_gen_return_addr_mask (void)
15275 {
15276 rtx reg = gen_reg_rtx (Pmode);
15277
15278 emit_insn (gen_return_addr_mask (reg));
15279 return reg;
15280 }
15281
15282 void
15283 arm_reload_in_hi (rtx *operands)
15284 {
15285 rtx ref = operands[1];
15286 rtx base, scratch;
15287 HOST_WIDE_INT offset = 0;
15288
15289 if (GET_CODE (ref) == SUBREG)
15290 {
15291 offset = SUBREG_BYTE (ref);
15292 ref = SUBREG_REG (ref);
15293 }
15294
15295 if (REG_P (ref))
15296 {
15297 /* We have a pseudo which has been spilt onto the stack; there
15298 are two cases here: the first where there is a simple
15299 stack-slot replacement and a second where the stack-slot is
15300 out of range, or is used as a subreg. */
15301 if (reg_equiv_mem (REGNO (ref)))
15302 {
15303 ref = reg_equiv_mem (REGNO (ref));
15304 base = find_replacement (&XEXP (ref, 0));
15305 }
15306 else
15307 /* The slot is out of range, or was dressed up in a SUBREG. */
15308 base = reg_equiv_address (REGNO (ref));
15309 }
15310 else
15311 base = find_replacement (&XEXP (ref, 0));
15312
15313 /* Handle the case where the address is too complex to be offset by 1. */
15314 if (GET_CODE (base) == MINUS
15315 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15316 {
15317 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15318
15319 emit_set_insn (base_plus, base);
15320 base = base_plus;
15321 }
15322 else if (GET_CODE (base) == PLUS)
15323 {
15324 /* The addend must be CONST_INT, or we would have dealt with it above. */
15325 HOST_WIDE_INT hi, lo;
15326
15327 offset += INTVAL (XEXP (base, 1));
15328 base = XEXP (base, 0);
15329
15330 /* Rework the address into a legal sequence of insns. */
15331 /* Valid range for lo is -4095 -> 4095 */
15332 lo = (offset >= 0
15333 ? (offset & 0xfff)
15334 : -((-offset) & 0xfff));
15335
15336 /* Corner case, if lo is the max offset then we would be out of range
15337 once we have added the additional 1 below, so bump the msb into the
15338 pre-loading insn(s). */
15339 if (lo == 4095)
15340 lo &= 0x7ff;
15341
15342 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15343 ^ (HOST_WIDE_INT) 0x80000000)
15344 - (HOST_WIDE_INT) 0x80000000);
15345
15346 gcc_assert (hi + lo == offset);
15347
15348 if (hi != 0)
15349 {
15350 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15351
15352 /* Get the base address; addsi3 knows how to handle constants
15353 that require more than one insn. */
15354 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15355 base = base_plus;
15356 offset = lo;
15357 }
15358 }
15359
15360 /* Operands[2] may overlap operands[0] (though it won't overlap
15361 operands[1]), that's why we asked for a DImode reg -- so we can
15362 use the bit that does not overlap. */
15363 if (REGNO (operands[2]) == REGNO (operands[0]))
15364 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15365 else
15366 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15367
15368 emit_insn (gen_zero_extendqisi2 (scratch,
15369 gen_rtx_MEM (QImode,
15370 plus_constant (Pmode, base,
15371 offset))));
15372 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15373 gen_rtx_MEM (QImode,
15374 plus_constant (Pmode, base,
15375 offset + 1))));
15376 if (!BYTES_BIG_ENDIAN)
15377 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15378 gen_rtx_IOR (SImode,
15379 gen_rtx_ASHIFT
15380 (SImode,
15381 gen_rtx_SUBREG (SImode, operands[0], 0),
15382 GEN_INT (8)),
15383 scratch));
15384 else
15385 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15386 gen_rtx_IOR (SImode,
15387 gen_rtx_ASHIFT (SImode, scratch,
15388 GEN_INT (8)),
15389 gen_rtx_SUBREG (SImode, operands[0], 0)));
15390 }
15391
15392 /* Handle storing a half-word to memory during reload by synthesizing as two
15393 byte stores. Take care not to clobber the input values until after we
15394 have moved them somewhere safe. This code assumes that if the DImode
15395 scratch in operands[2] overlaps either the input value or output address
15396 in some way, then that value must die in this insn (we absolutely need
15397 two scratch registers for some corner cases). */
15398 void
15399 arm_reload_out_hi (rtx *operands)
15400 {
15401 rtx ref = operands[0];
15402 rtx outval = operands[1];
15403 rtx base, scratch;
15404 HOST_WIDE_INT offset = 0;
15405
15406 if (GET_CODE (ref) == SUBREG)
15407 {
15408 offset = SUBREG_BYTE (ref);
15409 ref = SUBREG_REG (ref);
15410 }
15411
15412 if (REG_P (ref))
15413 {
15414 /* We have a pseudo which has been spilt onto the stack; there
15415 are two cases here: the first where there is a simple
15416 stack-slot replacement and a second where the stack-slot is
15417 out of range, or is used as a subreg. */
15418 if (reg_equiv_mem (REGNO (ref)))
15419 {
15420 ref = reg_equiv_mem (REGNO (ref));
15421 base = find_replacement (&XEXP (ref, 0));
15422 }
15423 else
15424 /* The slot is out of range, or was dressed up in a SUBREG. */
15425 base = reg_equiv_address (REGNO (ref));
15426 }
15427 else
15428 base = find_replacement (&XEXP (ref, 0));
15429
15430 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15431
15432 /* Handle the case where the address is too complex to be offset by 1. */
15433 if (GET_CODE (base) == MINUS
15434 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15435 {
15436 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15437
15438 /* Be careful not to destroy OUTVAL. */
15439 if (reg_overlap_mentioned_p (base_plus, outval))
15440 {
15441 /* Updating base_plus might destroy outval, see if we can
15442 swap the scratch and base_plus. */
15443 if (!reg_overlap_mentioned_p (scratch, outval))
15444 std::swap (scratch, base_plus);
15445 else
15446 {
15447 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15448
15449 /* Be conservative and copy OUTVAL into the scratch now,
15450 this should only be necessary if outval is a subreg
15451 of something larger than a word. */
15452 /* XXX Might this clobber base? I can't see how it can,
15453 since scratch is known to overlap with OUTVAL, and
15454 must be wider than a word. */
15455 emit_insn (gen_movhi (scratch_hi, outval));
15456 outval = scratch_hi;
15457 }
15458 }
15459
15460 emit_set_insn (base_plus, base);
15461 base = base_plus;
15462 }
15463 else if (GET_CODE (base) == PLUS)
15464 {
15465 /* The addend must be CONST_INT, or we would have dealt with it above. */
15466 HOST_WIDE_INT hi, lo;
15467
15468 offset += INTVAL (XEXP (base, 1));
15469 base = XEXP (base, 0);
15470
15471 /* Rework the address into a legal sequence of insns. */
15472 /* Valid range for lo is -4095 -> 4095 */
15473 lo = (offset >= 0
15474 ? (offset & 0xfff)
15475 : -((-offset) & 0xfff));
15476
15477 /* Corner case, if lo is the max offset then we would be out of range
15478 once we have added the additional 1 below, so bump the msb into the
15479 pre-loading insn(s). */
15480 if (lo == 4095)
15481 lo &= 0x7ff;
15482
15483 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15484 ^ (HOST_WIDE_INT) 0x80000000)
15485 - (HOST_WIDE_INT) 0x80000000);
15486
15487 gcc_assert (hi + lo == offset);
15488
15489 if (hi != 0)
15490 {
15491 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15492
15493 /* Be careful not to destroy OUTVAL. */
15494 if (reg_overlap_mentioned_p (base_plus, outval))
15495 {
15496 /* Updating base_plus might destroy outval, see if we
15497 can swap the scratch and base_plus. */
15498 if (!reg_overlap_mentioned_p (scratch, outval))
15499 std::swap (scratch, base_plus);
15500 else
15501 {
15502 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15503
15504 /* Be conservative and copy outval into scratch now,
15505 this should only be necessary if outval is a
15506 subreg of something larger than a word. */
15507 /* XXX Might this clobber base? I can't see how it
15508 can, since scratch is known to overlap with
15509 outval. */
15510 emit_insn (gen_movhi (scratch_hi, outval));
15511 outval = scratch_hi;
15512 }
15513 }
15514
15515 /* Get the base address; addsi3 knows how to handle constants
15516 that require more than one insn. */
15517 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15518 base = base_plus;
15519 offset = lo;
15520 }
15521 }
15522
15523 if (BYTES_BIG_ENDIAN)
15524 {
15525 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15526 plus_constant (Pmode, base,
15527 offset + 1)),
15528 gen_lowpart (QImode, outval)));
15529 emit_insn (gen_lshrsi3 (scratch,
15530 gen_rtx_SUBREG (SImode, outval, 0),
15531 GEN_INT (8)));
15532 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15533 offset)),
15534 gen_lowpart (QImode, scratch)));
15535 }
15536 else
15537 {
15538 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15539 offset)),
15540 gen_lowpart (QImode, outval)));
15541 emit_insn (gen_lshrsi3 (scratch,
15542 gen_rtx_SUBREG (SImode, outval, 0),
15543 GEN_INT (8)));
15544 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15545 plus_constant (Pmode, base,
15546 offset + 1)),
15547 gen_lowpart (QImode, scratch)));
15548 }
15549 }
15550
15551 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15552 (padded to the size of a word) should be passed in a register. */
15553
15554 static bool
15555 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15556 {
15557 if (TARGET_AAPCS_BASED)
15558 return must_pass_in_stack_var_size (mode, type);
15559 else
15560 return must_pass_in_stack_var_size_or_pad (mode, type);
15561 }
15562
15563
15564 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15565 Return true if an argument passed on the stack should be padded upwards,
15566 i.e. if the least-significant byte has useful data.
15567 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15568 aggregate types are placed in the lowest memory address. */
15569
15570 bool
15571 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15572 {
15573 if (!TARGET_AAPCS_BASED)
15574 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15575
15576 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15577 return false;
15578
15579 return true;
15580 }
15581
15582
15583 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15584 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15585 register has useful data, and return the opposite if the most
15586 significant byte does. */
15587
15588 bool
15589 arm_pad_reg_upward (machine_mode mode,
15590 tree type, int first ATTRIBUTE_UNUSED)
15591 {
15592 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15593 {
15594 /* For AAPCS, small aggregates, small fixed-point types,
15595 and small complex types are always padded upwards. */
15596 if (type)
15597 {
15598 if ((AGGREGATE_TYPE_P (type)
15599 || TREE_CODE (type) == COMPLEX_TYPE
15600 || FIXED_POINT_TYPE_P (type))
15601 && int_size_in_bytes (type) <= 4)
15602 return true;
15603 }
15604 else
15605 {
15606 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15607 && GET_MODE_SIZE (mode) <= 4)
15608 return true;
15609 }
15610 }
15611
15612 /* Otherwise, use default padding. */
15613 return !BYTES_BIG_ENDIAN;
15614 }
15615
15616 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15617 assuming that the address in the base register is word aligned. */
15618 bool
15619 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15620 {
15621 HOST_WIDE_INT max_offset;
15622
15623 /* Offset must be a multiple of 4 in Thumb mode. */
15624 if (TARGET_THUMB2 && ((offset & 3) != 0))
15625 return false;
15626
15627 if (TARGET_THUMB2)
15628 max_offset = 1020;
15629 else if (TARGET_ARM)
15630 max_offset = 255;
15631 else
15632 return false;
15633
15634 return ((offset <= max_offset) && (offset >= -max_offset));
15635 }
15636
15637 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15638 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15639 Assumes that the address in the base register RN is word aligned. Pattern
15640 guarantees that both memory accesses use the same base register,
15641 the offsets are constants within the range, and the gap between the offsets is 4.
15642 If preload complete then check that registers are legal. WBACK indicates whether
15643 address is updated. LOAD indicates whether memory access is load or store. */
15644 bool
15645 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15646 bool wback, bool load)
15647 {
15648 unsigned int t, t2, n;
15649
15650 if (!reload_completed)
15651 return true;
15652
15653 if (!offset_ok_for_ldrd_strd (offset))
15654 return false;
15655
15656 t = REGNO (rt);
15657 t2 = REGNO (rt2);
15658 n = REGNO (rn);
15659
15660 if ((TARGET_THUMB2)
15661 && ((wback && (n == t || n == t2))
15662 || (t == SP_REGNUM)
15663 || (t == PC_REGNUM)
15664 || (t2 == SP_REGNUM)
15665 || (t2 == PC_REGNUM)
15666 || (!load && (n == PC_REGNUM))
15667 || (load && (t == t2))
15668 /* Triggers Cortex-M3 LDRD errata. */
15669 || (!wback && load && fix_cm3_ldrd && (n == t))))
15670 return false;
15671
15672 if ((TARGET_ARM)
15673 && ((wback && (n == t || n == t2))
15674 || (t2 == PC_REGNUM)
15675 || (t % 2 != 0) /* First destination register is not even. */
15676 || (t2 != t + 1)
15677 /* PC can be used as base register (for offset addressing only),
15678 but it is depricated. */
15679 || (n == PC_REGNUM)))
15680 return false;
15681
15682 return true;
15683 }
15684
15685 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15686 operand MEM's address contains an immediate offset from the base
15687 register and has no side effects, in which case it sets BASE and
15688 OFFSET accordingly. */
15689 static bool
15690 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15691 {
15692 rtx addr;
15693
15694 gcc_assert (base != NULL && offset != NULL);
15695
15696 /* TODO: Handle more general memory operand patterns, such as
15697 PRE_DEC and PRE_INC. */
15698
15699 if (side_effects_p (mem))
15700 return false;
15701
15702 /* Can't deal with subregs. */
15703 if (GET_CODE (mem) == SUBREG)
15704 return false;
15705
15706 gcc_assert (MEM_P (mem));
15707
15708 *offset = const0_rtx;
15709
15710 addr = XEXP (mem, 0);
15711
15712 /* If addr isn't valid for DImode, then we can't handle it. */
15713 if (!arm_legitimate_address_p (DImode, addr,
15714 reload_in_progress || reload_completed))
15715 return false;
15716
15717 if (REG_P (addr))
15718 {
15719 *base = addr;
15720 return true;
15721 }
15722 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15723 {
15724 *base = XEXP (addr, 0);
15725 *offset = XEXP (addr, 1);
15726 return (REG_P (*base) && CONST_INT_P (*offset));
15727 }
15728
15729 return false;
15730 }
15731
15732 /* Called from a peephole2 to replace two word-size accesses with a
15733 single LDRD/STRD instruction. Returns true iff we can generate a
15734 new instruction sequence. That is, both accesses use the same base
15735 register and the gap between constant offsets is 4. This function
15736 may reorder its operands to match ldrd/strd RTL templates.
15737 OPERANDS are the operands found by the peephole matcher;
15738 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15739 corresponding memory operands. LOAD indicaates whether the access
15740 is load or store. CONST_STORE indicates a store of constant
15741 integer values held in OPERANDS[4,5] and assumes that the pattern
15742 is of length 4 insn, for the purpose of checking dead registers.
15743 COMMUTE indicates that register operands may be reordered. */
15744 bool
15745 gen_operands_ldrd_strd (rtx *operands, bool load,
15746 bool const_store, bool commute)
15747 {
15748 int nops = 2;
15749 HOST_WIDE_INT offsets[2], offset;
15750 rtx base = NULL_RTX;
15751 rtx cur_base, cur_offset, tmp;
15752 int i, gap;
15753 HARD_REG_SET regset;
15754
15755 gcc_assert (!const_store || !load);
15756 /* Check that the memory references are immediate offsets from the
15757 same base register. Extract the base register, the destination
15758 registers, and the corresponding memory offsets. */
15759 for (i = 0; i < nops; i++)
15760 {
15761 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15762 return false;
15763
15764 if (i == 0)
15765 base = cur_base;
15766 else if (REGNO (base) != REGNO (cur_base))
15767 return false;
15768
15769 offsets[i] = INTVAL (cur_offset);
15770 if (GET_CODE (operands[i]) == SUBREG)
15771 {
15772 tmp = SUBREG_REG (operands[i]);
15773 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15774 operands[i] = tmp;
15775 }
15776 }
15777
15778 /* Make sure there is no dependency between the individual loads. */
15779 if (load && REGNO (operands[0]) == REGNO (base))
15780 return false; /* RAW */
15781
15782 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15783 return false; /* WAW */
15784
15785 /* If the same input register is used in both stores
15786 when storing different constants, try to find a free register.
15787 For example, the code
15788 mov r0, 0
15789 str r0, [r2]
15790 mov r0, 1
15791 str r0, [r2, #4]
15792 can be transformed into
15793 mov r1, 0
15794 strd r1, r0, [r2]
15795 in Thumb mode assuming that r1 is free. */
15796 if (const_store
15797 && REGNO (operands[0]) == REGNO (operands[1])
15798 && INTVAL (operands[4]) != INTVAL (operands[5]))
15799 {
15800 if (TARGET_THUMB2)
15801 {
15802 CLEAR_HARD_REG_SET (regset);
15803 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15804 if (tmp == NULL_RTX)
15805 return false;
15806
15807 /* Use the new register in the first load to ensure that
15808 if the original input register is not dead after peephole,
15809 then it will have the correct constant value. */
15810 operands[0] = tmp;
15811 }
15812 else if (TARGET_ARM)
15813 {
15814 return false;
15815 int regno = REGNO (operands[0]);
15816 if (!peep2_reg_dead_p (4, operands[0]))
15817 {
15818 /* When the input register is even and is not dead after the
15819 pattern, it has to hold the second constant but we cannot
15820 form a legal STRD in ARM mode with this register as the second
15821 register. */
15822 if (regno % 2 == 0)
15823 return false;
15824
15825 /* Is regno-1 free? */
15826 SET_HARD_REG_SET (regset);
15827 CLEAR_HARD_REG_BIT(regset, regno - 1);
15828 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15829 if (tmp == NULL_RTX)
15830 return false;
15831
15832 operands[0] = tmp;
15833 }
15834 else
15835 {
15836 /* Find a DImode register. */
15837 CLEAR_HARD_REG_SET (regset);
15838 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15839 if (tmp != NULL_RTX)
15840 {
15841 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15842 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15843 }
15844 else
15845 {
15846 /* Can we use the input register to form a DI register? */
15847 SET_HARD_REG_SET (regset);
15848 CLEAR_HARD_REG_BIT(regset,
15849 regno % 2 == 0 ? regno + 1 : regno - 1);
15850 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15851 if (tmp == NULL_RTX)
15852 return false;
15853 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15854 }
15855 }
15856
15857 gcc_assert (operands[0] != NULL_RTX);
15858 gcc_assert (operands[1] != NULL_RTX);
15859 gcc_assert (REGNO (operands[0]) % 2 == 0);
15860 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15861 }
15862 }
15863
15864 /* Make sure the instructions are ordered with lower memory access first. */
15865 if (offsets[0] > offsets[1])
15866 {
15867 gap = offsets[0] - offsets[1];
15868 offset = offsets[1];
15869
15870 /* Swap the instructions such that lower memory is accessed first. */
15871 std::swap (operands[0], operands[1]);
15872 std::swap (operands[2], operands[3]);
15873 if (const_store)
15874 std::swap (operands[4], operands[5]);
15875 }
15876 else
15877 {
15878 gap = offsets[1] - offsets[0];
15879 offset = offsets[0];
15880 }
15881
15882 /* Make sure accesses are to consecutive memory locations. */
15883 if (gap != 4)
15884 return false;
15885
15886 /* Make sure we generate legal instructions. */
15887 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15888 false, load))
15889 return true;
15890
15891 /* In Thumb state, where registers are almost unconstrained, there
15892 is little hope to fix it. */
15893 if (TARGET_THUMB2)
15894 return false;
15895
15896 if (load && commute)
15897 {
15898 /* Try reordering registers. */
15899 std::swap (operands[0], operands[1]);
15900 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15901 false, load))
15902 return true;
15903 }
15904
15905 if (const_store)
15906 {
15907 /* If input registers are dead after this pattern, they can be
15908 reordered or replaced by other registers that are free in the
15909 current pattern. */
15910 if (!peep2_reg_dead_p (4, operands[0])
15911 || !peep2_reg_dead_p (4, operands[1]))
15912 return false;
15913
15914 /* Try to reorder the input registers. */
15915 /* For example, the code
15916 mov r0, 0
15917 mov r1, 1
15918 str r1, [r2]
15919 str r0, [r2, #4]
15920 can be transformed into
15921 mov r1, 0
15922 mov r0, 1
15923 strd r0, [r2]
15924 */
15925 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15926 false, false))
15927 {
15928 std::swap (operands[0], operands[1]);
15929 return true;
15930 }
15931
15932 /* Try to find a free DI register. */
15933 CLEAR_HARD_REG_SET (regset);
15934 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15935 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15936 while (true)
15937 {
15938 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15939 if (tmp == NULL_RTX)
15940 return false;
15941
15942 /* DREG must be an even-numbered register in DImode.
15943 Split it into SI registers. */
15944 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15945 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15946 gcc_assert (operands[0] != NULL_RTX);
15947 gcc_assert (operands[1] != NULL_RTX);
15948 gcc_assert (REGNO (operands[0]) % 2 == 0);
15949 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15950
15951 return (operands_ok_ldrd_strd (operands[0], operands[1],
15952 base, offset,
15953 false, load));
15954 }
15955 }
15956
15957 return false;
15958 }
15959
15960
15961
15962 \f
15963 /* Print a symbolic form of X to the debug file, F. */
15964 static void
15965 arm_print_value (FILE *f, rtx x)
15966 {
15967 switch (GET_CODE (x))
15968 {
15969 case CONST_INT:
15970 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15971 return;
15972
15973 case CONST_DOUBLE:
15974 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15975 return;
15976
15977 case CONST_VECTOR:
15978 {
15979 int i;
15980
15981 fprintf (f, "<");
15982 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15983 {
15984 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15985 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15986 fputc (',', f);
15987 }
15988 fprintf (f, ">");
15989 }
15990 return;
15991
15992 case CONST_STRING:
15993 fprintf (f, "\"%s\"", XSTR (x, 0));
15994 return;
15995
15996 case SYMBOL_REF:
15997 fprintf (f, "`%s'", XSTR (x, 0));
15998 return;
15999
16000 case LABEL_REF:
16001 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16002 return;
16003
16004 case CONST:
16005 arm_print_value (f, XEXP (x, 0));
16006 return;
16007
16008 case PLUS:
16009 arm_print_value (f, XEXP (x, 0));
16010 fprintf (f, "+");
16011 arm_print_value (f, XEXP (x, 1));
16012 return;
16013
16014 case PC:
16015 fprintf (f, "pc");
16016 return;
16017
16018 default:
16019 fprintf (f, "????");
16020 return;
16021 }
16022 }
16023 \f
16024 /* Routines for manipulation of the constant pool. */
16025
16026 /* Arm instructions cannot load a large constant directly into a
16027 register; they have to come from a pc relative load. The constant
16028 must therefore be placed in the addressable range of the pc
16029 relative load. Depending on the precise pc relative load
16030 instruction the range is somewhere between 256 bytes and 4k. This
16031 means that we often have to dump a constant inside a function, and
16032 generate code to branch around it.
16033
16034 It is important to minimize this, since the branches will slow
16035 things down and make the code larger.
16036
16037 Normally we can hide the table after an existing unconditional
16038 branch so that there is no interruption of the flow, but in the
16039 worst case the code looks like this:
16040
16041 ldr rn, L1
16042 ...
16043 b L2
16044 align
16045 L1: .long value
16046 L2:
16047 ...
16048
16049 ldr rn, L3
16050 ...
16051 b L4
16052 align
16053 L3: .long value
16054 L4:
16055 ...
16056
16057 We fix this by performing a scan after scheduling, which notices
16058 which instructions need to have their operands fetched from the
16059 constant table and builds the table.
16060
16061 The algorithm starts by building a table of all the constants that
16062 need fixing up and all the natural barriers in the function (places
16063 where a constant table can be dropped without breaking the flow).
16064 For each fixup we note how far the pc-relative replacement will be
16065 able to reach and the offset of the instruction into the function.
16066
16067 Having built the table we then group the fixes together to form
16068 tables that are as large as possible (subject to addressing
16069 constraints) and emit each table of constants after the last
16070 barrier that is within range of all the instructions in the group.
16071 If a group does not contain a barrier, then we forcibly create one
16072 by inserting a jump instruction into the flow. Once the table has
16073 been inserted, the insns are then modified to reference the
16074 relevant entry in the pool.
16075
16076 Possible enhancements to the algorithm (not implemented) are:
16077
16078 1) For some processors and object formats, there may be benefit in
16079 aligning the pools to the start of cache lines; this alignment
16080 would need to be taken into account when calculating addressability
16081 of a pool. */
16082
16083 /* These typedefs are located at the start of this file, so that
16084 they can be used in the prototypes there. This comment is to
16085 remind readers of that fact so that the following structures
16086 can be understood more easily.
16087
16088 typedef struct minipool_node Mnode;
16089 typedef struct minipool_fixup Mfix; */
16090
16091 struct minipool_node
16092 {
16093 /* Doubly linked chain of entries. */
16094 Mnode * next;
16095 Mnode * prev;
16096 /* The maximum offset into the code that this entry can be placed. While
16097 pushing fixes for forward references, all entries are sorted in order
16098 of increasing max_address. */
16099 HOST_WIDE_INT max_address;
16100 /* Similarly for an entry inserted for a backwards ref. */
16101 HOST_WIDE_INT min_address;
16102 /* The number of fixes referencing this entry. This can become zero
16103 if we "unpush" an entry. In this case we ignore the entry when we
16104 come to emit the code. */
16105 int refcount;
16106 /* The offset from the start of the minipool. */
16107 HOST_WIDE_INT offset;
16108 /* The value in table. */
16109 rtx value;
16110 /* The mode of value. */
16111 machine_mode mode;
16112 /* The size of the value. With iWMMXt enabled
16113 sizes > 4 also imply an alignment of 8-bytes. */
16114 int fix_size;
16115 };
16116
16117 struct minipool_fixup
16118 {
16119 Mfix * next;
16120 rtx_insn * insn;
16121 HOST_WIDE_INT address;
16122 rtx * loc;
16123 machine_mode mode;
16124 int fix_size;
16125 rtx value;
16126 Mnode * minipool;
16127 HOST_WIDE_INT forwards;
16128 HOST_WIDE_INT backwards;
16129 };
16130
16131 /* Fixes less than a word need padding out to a word boundary. */
16132 #define MINIPOOL_FIX_SIZE(mode) \
16133 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16134
16135 static Mnode * minipool_vector_head;
16136 static Mnode * minipool_vector_tail;
16137 static rtx_code_label *minipool_vector_label;
16138 static int minipool_pad;
16139
16140 /* The linked list of all minipool fixes required for this function. */
16141 Mfix * minipool_fix_head;
16142 Mfix * minipool_fix_tail;
16143 /* The fix entry for the current minipool, once it has been placed. */
16144 Mfix * minipool_barrier;
16145
16146 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16147 #define JUMP_TABLES_IN_TEXT_SECTION 0
16148 #endif
16149
16150 static HOST_WIDE_INT
16151 get_jump_table_size (rtx_jump_table_data *insn)
16152 {
16153 /* ADDR_VECs only take room if read-only data does into the text
16154 section. */
16155 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16156 {
16157 rtx body = PATTERN (insn);
16158 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16159 HOST_WIDE_INT size;
16160 HOST_WIDE_INT modesize;
16161
16162 modesize = GET_MODE_SIZE (GET_MODE (body));
16163 size = modesize * XVECLEN (body, elt);
16164 switch (modesize)
16165 {
16166 case 1:
16167 /* Round up size of TBB table to a halfword boundary. */
16168 size = (size + 1) & ~(HOST_WIDE_INT)1;
16169 break;
16170 case 2:
16171 /* No padding necessary for TBH. */
16172 break;
16173 case 4:
16174 /* Add two bytes for alignment on Thumb. */
16175 if (TARGET_THUMB)
16176 size += 2;
16177 break;
16178 default:
16179 gcc_unreachable ();
16180 }
16181 return size;
16182 }
16183
16184 return 0;
16185 }
16186
16187 /* Return the maximum amount of padding that will be inserted before
16188 label LABEL. */
16189
16190 static HOST_WIDE_INT
16191 get_label_padding (rtx label)
16192 {
16193 HOST_WIDE_INT align, min_insn_size;
16194
16195 align = 1 << label_to_alignment (label);
16196 min_insn_size = TARGET_THUMB ? 2 : 4;
16197 return align > min_insn_size ? align - min_insn_size : 0;
16198 }
16199
16200 /* Move a minipool fix MP from its current location to before MAX_MP.
16201 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16202 constraints may need updating. */
16203 static Mnode *
16204 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16205 HOST_WIDE_INT max_address)
16206 {
16207 /* The code below assumes these are different. */
16208 gcc_assert (mp != max_mp);
16209
16210 if (max_mp == NULL)
16211 {
16212 if (max_address < mp->max_address)
16213 mp->max_address = max_address;
16214 }
16215 else
16216 {
16217 if (max_address > max_mp->max_address - mp->fix_size)
16218 mp->max_address = max_mp->max_address - mp->fix_size;
16219 else
16220 mp->max_address = max_address;
16221
16222 /* Unlink MP from its current position. Since max_mp is non-null,
16223 mp->prev must be non-null. */
16224 mp->prev->next = mp->next;
16225 if (mp->next != NULL)
16226 mp->next->prev = mp->prev;
16227 else
16228 minipool_vector_tail = mp->prev;
16229
16230 /* Re-insert it before MAX_MP. */
16231 mp->next = max_mp;
16232 mp->prev = max_mp->prev;
16233 max_mp->prev = mp;
16234
16235 if (mp->prev != NULL)
16236 mp->prev->next = mp;
16237 else
16238 minipool_vector_head = mp;
16239 }
16240
16241 /* Save the new entry. */
16242 max_mp = mp;
16243
16244 /* Scan over the preceding entries and adjust their addresses as
16245 required. */
16246 while (mp->prev != NULL
16247 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16248 {
16249 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16250 mp = mp->prev;
16251 }
16252
16253 return max_mp;
16254 }
16255
16256 /* Add a constant to the minipool for a forward reference. Returns the
16257 node added or NULL if the constant will not fit in this pool. */
16258 static Mnode *
16259 add_minipool_forward_ref (Mfix *fix)
16260 {
16261 /* If set, max_mp is the first pool_entry that has a lower
16262 constraint than the one we are trying to add. */
16263 Mnode * max_mp = NULL;
16264 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16265 Mnode * mp;
16266
16267 /* If the minipool starts before the end of FIX->INSN then this FIX
16268 can not be placed into the current pool. Furthermore, adding the
16269 new constant pool entry may cause the pool to start FIX_SIZE bytes
16270 earlier. */
16271 if (minipool_vector_head &&
16272 (fix->address + get_attr_length (fix->insn)
16273 >= minipool_vector_head->max_address - fix->fix_size))
16274 return NULL;
16275
16276 /* Scan the pool to see if a constant with the same value has
16277 already been added. While we are doing this, also note the
16278 location where we must insert the constant if it doesn't already
16279 exist. */
16280 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16281 {
16282 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16283 && fix->mode == mp->mode
16284 && (!LABEL_P (fix->value)
16285 || (CODE_LABEL_NUMBER (fix->value)
16286 == CODE_LABEL_NUMBER (mp->value)))
16287 && rtx_equal_p (fix->value, mp->value))
16288 {
16289 /* More than one fix references this entry. */
16290 mp->refcount++;
16291 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16292 }
16293
16294 /* Note the insertion point if necessary. */
16295 if (max_mp == NULL
16296 && mp->max_address > max_address)
16297 max_mp = mp;
16298
16299 /* If we are inserting an 8-bytes aligned quantity and
16300 we have not already found an insertion point, then
16301 make sure that all such 8-byte aligned quantities are
16302 placed at the start of the pool. */
16303 if (ARM_DOUBLEWORD_ALIGN
16304 && max_mp == NULL
16305 && fix->fix_size >= 8
16306 && mp->fix_size < 8)
16307 {
16308 max_mp = mp;
16309 max_address = mp->max_address;
16310 }
16311 }
16312
16313 /* The value is not currently in the minipool, so we need to create
16314 a new entry for it. If MAX_MP is NULL, the entry will be put on
16315 the end of the list since the placement is less constrained than
16316 any existing entry. Otherwise, we insert the new fix before
16317 MAX_MP and, if necessary, adjust the constraints on the other
16318 entries. */
16319 mp = XNEW (Mnode);
16320 mp->fix_size = fix->fix_size;
16321 mp->mode = fix->mode;
16322 mp->value = fix->value;
16323 mp->refcount = 1;
16324 /* Not yet required for a backwards ref. */
16325 mp->min_address = -65536;
16326
16327 if (max_mp == NULL)
16328 {
16329 mp->max_address = max_address;
16330 mp->next = NULL;
16331 mp->prev = minipool_vector_tail;
16332
16333 if (mp->prev == NULL)
16334 {
16335 minipool_vector_head = mp;
16336 minipool_vector_label = gen_label_rtx ();
16337 }
16338 else
16339 mp->prev->next = mp;
16340
16341 minipool_vector_tail = mp;
16342 }
16343 else
16344 {
16345 if (max_address > max_mp->max_address - mp->fix_size)
16346 mp->max_address = max_mp->max_address - mp->fix_size;
16347 else
16348 mp->max_address = max_address;
16349
16350 mp->next = max_mp;
16351 mp->prev = max_mp->prev;
16352 max_mp->prev = mp;
16353 if (mp->prev != NULL)
16354 mp->prev->next = mp;
16355 else
16356 minipool_vector_head = mp;
16357 }
16358
16359 /* Save the new entry. */
16360 max_mp = mp;
16361
16362 /* Scan over the preceding entries and adjust their addresses as
16363 required. */
16364 while (mp->prev != NULL
16365 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16366 {
16367 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16368 mp = mp->prev;
16369 }
16370
16371 return max_mp;
16372 }
16373
16374 static Mnode *
16375 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16376 HOST_WIDE_INT min_address)
16377 {
16378 HOST_WIDE_INT offset;
16379
16380 /* The code below assumes these are different. */
16381 gcc_assert (mp != min_mp);
16382
16383 if (min_mp == NULL)
16384 {
16385 if (min_address > mp->min_address)
16386 mp->min_address = min_address;
16387 }
16388 else
16389 {
16390 /* We will adjust this below if it is too loose. */
16391 mp->min_address = min_address;
16392
16393 /* Unlink MP from its current position. Since min_mp is non-null,
16394 mp->next must be non-null. */
16395 mp->next->prev = mp->prev;
16396 if (mp->prev != NULL)
16397 mp->prev->next = mp->next;
16398 else
16399 minipool_vector_head = mp->next;
16400
16401 /* Reinsert it after MIN_MP. */
16402 mp->prev = min_mp;
16403 mp->next = min_mp->next;
16404 min_mp->next = mp;
16405 if (mp->next != NULL)
16406 mp->next->prev = mp;
16407 else
16408 minipool_vector_tail = mp;
16409 }
16410
16411 min_mp = mp;
16412
16413 offset = 0;
16414 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16415 {
16416 mp->offset = offset;
16417 if (mp->refcount > 0)
16418 offset += mp->fix_size;
16419
16420 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16421 mp->next->min_address = mp->min_address + mp->fix_size;
16422 }
16423
16424 return min_mp;
16425 }
16426
16427 /* Add a constant to the minipool for a backward reference. Returns the
16428 node added or NULL if the constant will not fit in this pool.
16429
16430 Note that the code for insertion for a backwards reference can be
16431 somewhat confusing because the calculated offsets for each fix do
16432 not take into account the size of the pool (which is still under
16433 construction. */
16434 static Mnode *
16435 add_minipool_backward_ref (Mfix *fix)
16436 {
16437 /* If set, min_mp is the last pool_entry that has a lower constraint
16438 than the one we are trying to add. */
16439 Mnode *min_mp = NULL;
16440 /* This can be negative, since it is only a constraint. */
16441 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16442 Mnode *mp;
16443
16444 /* If we can't reach the current pool from this insn, or if we can't
16445 insert this entry at the end of the pool without pushing other
16446 fixes out of range, then we don't try. This ensures that we
16447 can't fail later on. */
16448 if (min_address >= minipool_barrier->address
16449 || (minipool_vector_tail->min_address + fix->fix_size
16450 >= minipool_barrier->address))
16451 return NULL;
16452
16453 /* Scan the pool to see if a constant with the same value has
16454 already been added. While we are doing this, also note the
16455 location where we must insert the constant if it doesn't already
16456 exist. */
16457 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16458 {
16459 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16460 && fix->mode == mp->mode
16461 && (!LABEL_P (fix->value)
16462 || (CODE_LABEL_NUMBER (fix->value)
16463 == CODE_LABEL_NUMBER (mp->value)))
16464 && rtx_equal_p (fix->value, mp->value)
16465 /* Check that there is enough slack to move this entry to the
16466 end of the table (this is conservative). */
16467 && (mp->max_address
16468 > (minipool_barrier->address
16469 + minipool_vector_tail->offset
16470 + minipool_vector_tail->fix_size)))
16471 {
16472 mp->refcount++;
16473 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16474 }
16475
16476 if (min_mp != NULL)
16477 mp->min_address += fix->fix_size;
16478 else
16479 {
16480 /* Note the insertion point if necessary. */
16481 if (mp->min_address < min_address)
16482 {
16483 /* For now, we do not allow the insertion of 8-byte alignment
16484 requiring nodes anywhere but at the start of the pool. */
16485 if (ARM_DOUBLEWORD_ALIGN
16486 && fix->fix_size >= 8 && mp->fix_size < 8)
16487 return NULL;
16488 else
16489 min_mp = mp;
16490 }
16491 else if (mp->max_address
16492 < minipool_barrier->address + mp->offset + fix->fix_size)
16493 {
16494 /* Inserting before this entry would push the fix beyond
16495 its maximum address (which can happen if we have
16496 re-located a forwards fix); force the new fix to come
16497 after it. */
16498 if (ARM_DOUBLEWORD_ALIGN
16499 && fix->fix_size >= 8 && mp->fix_size < 8)
16500 return NULL;
16501 else
16502 {
16503 min_mp = mp;
16504 min_address = mp->min_address + fix->fix_size;
16505 }
16506 }
16507 /* Do not insert a non-8-byte aligned quantity before 8-byte
16508 aligned quantities. */
16509 else if (ARM_DOUBLEWORD_ALIGN
16510 && fix->fix_size < 8
16511 && mp->fix_size >= 8)
16512 {
16513 min_mp = mp;
16514 min_address = mp->min_address + fix->fix_size;
16515 }
16516 }
16517 }
16518
16519 /* We need to create a new entry. */
16520 mp = XNEW (Mnode);
16521 mp->fix_size = fix->fix_size;
16522 mp->mode = fix->mode;
16523 mp->value = fix->value;
16524 mp->refcount = 1;
16525 mp->max_address = minipool_barrier->address + 65536;
16526
16527 mp->min_address = min_address;
16528
16529 if (min_mp == NULL)
16530 {
16531 mp->prev = NULL;
16532 mp->next = minipool_vector_head;
16533
16534 if (mp->next == NULL)
16535 {
16536 minipool_vector_tail = mp;
16537 minipool_vector_label = gen_label_rtx ();
16538 }
16539 else
16540 mp->next->prev = mp;
16541
16542 minipool_vector_head = mp;
16543 }
16544 else
16545 {
16546 mp->next = min_mp->next;
16547 mp->prev = min_mp;
16548 min_mp->next = mp;
16549
16550 if (mp->next != NULL)
16551 mp->next->prev = mp;
16552 else
16553 minipool_vector_tail = mp;
16554 }
16555
16556 /* Save the new entry. */
16557 min_mp = mp;
16558
16559 if (mp->prev)
16560 mp = mp->prev;
16561 else
16562 mp->offset = 0;
16563
16564 /* Scan over the following entries and adjust their offsets. */
16565 while (mp->next != NULL)
16566 {
16567 if (mp->next->min_address < mp->min_address + mp->fix_size)
16568 mp->next->min_address = mp->min_address + mp->fix_size;
16569
16570 if (mp->refcount)
16571 mp->next->offset = mp->offset + mp->fix_size;
16572 else
16573 mp->next->offset = mp->offset;
16574
16575 mp = mp->next;
16576 }
16577
16578 return min_mp;
16579 }
16580
16581 static void
16582 assign_minipool_offsets (Mfix *barrier)
16583 {
16584 HOST_WIDE_INT offset = 0;
16585 Mnode *mp;
16586
16587 minipool_barrier = barrier;
16588
16589 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16590 {
16591 mp->offset = offset;
16592
16593 if (mp->refcount > 0)
16594 offset += mp->fix_size;
16595 }
16596 }
16597
16598 /* Output the literal table */
16599 static void
16600 dump_minipool (rtx_insn *scan)
16601 {
16602 Mnode * mp;
16603 Mnode * nmp;
16604 int align64 = 0;
16605
16606 if (ARM_DOUBLEWORD_ALIGN)
16607 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16608 if (mp->refcount > 0 && mp->fix_size >= 8)
16609 {
16610 align64 = 1;
16611 break;
16612 }
16613
16614 if (dump_file)
16615 fprintf (dump_file,
16616 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16617 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16618
16619 scan = emit_label_after (gen_label_rtx (), scan);
16620 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16621 scan = emit_label_after (minipool_vector_label, scan);
16622
16623 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16624 {
16625 if (mp->refcount > 0)
16626 {
16627 if (dump_file)
16628 {
16629 fprintf (dump_file,
16630 ";; Offset %u, min %ld, max %ld ",
16631 (unsigned) mp->offset, (unsigned long) mp->min_address,
16632 (unsigned long) mp->max_address);
16633 arm_print_value (dump_file, mp->value);
16634 fputc ('\n', dump_file);
16635 }
16636
16637 switch (GET_MODE_SIZE (mp->mode))
16638 {
16639 #ifdef HAVE_consttable_1
16640 case 1:
16641 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16642 break;
16643
16644 #endif
16645 #ifdef HAVE_consttable_2
16646 case 2:
16647 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16648 break;
16649
16650 #endif
16651 #ifdef HAVE_consttable_4
16652 case 4:
16653 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16654 break;
16655
16656 #endif
16657 #ifdef HAVE_consttable_8
16658 case 8:
16659 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16660 break;
16661
16662 #endif
16663 #ifdef HAVE_consttable_16
16664 case 16:
16665 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16666 break;
16667
16668 #endif
16669 default:
16670 gcc_unreachable ();
16671 }
16672 }
16673
16674 nmp = mp->next;
16675 free (mp);
16676 }
16677
16678 minipool_vector_head = minipool_vector_tail = NULL;
16679 scan = emit_insn_after (gen_consttable_end (), scan);
16680 scan = emit_barrier_after (scan);
16681 }
16682
16683 /* Return the cost of forcibly inserting a barrier after INSN. */
16684 static int
16685 arm_barrier_cost (rtx insn)
16686 {
16687 /* Basing the location of the pool on the loop depth is preferable,
16688 but at the moment, the basic block information seems to be
16689 corrupt by this stage of the compilation. */
16690 int base_cost = 50;
16691 rtx next = next_nonnote_insn (insn);
16692
16693 if (next != NULL && LABEL_P (next))
16694 base_cost -= 20;
16695
16696 switch (GET_CODE (insn))
16697 {
16698 case CODE_LABEL:
16699 /* It will always be better to place the table before the label, rather
16700 than after it. */
16701 return 50;
16702
16703 case INSN:
16704 case CALL_INSN:
16705 return base_cost;
16706
16707 case JUMP_INSN:
16708 return base_cost - 10;
16709
16710 default:
16711 return base_cost + 10;
16712 }
16713 }
16714
16715 /* Find the best place in the insn stream in the range
16716 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16717 Create the barrier by inserting a jump and add a new fix entry for
16718 it. */
16719 static Mfix *
16720 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16721 {
16722 HOST_WIDE_INT count = 0;
16723 rtx_barrier *barrier;
16724 rtx_insn *from = fix->insn;
16725 /* The instruction after which we will insert the jump. */
16726 rtx_insn *selected = NULL;
16727 int selected_cost;
16728 /* The address at which the jump instruction will be placed. */
16729 HOST_WIDE_INT selected_address;
16730 Mfix * new_fix;
16731 HOST_WIDE_INT max_count = max_address - fix->address;
16732 rtx_code_label *label = gen_label_rtx ();
16733
16734 selected_cost = arm_barrier_cost (from);
16735 selected_address = fix->address;
16736
16737 while (from && count < max_count)
16738 {
16739 rtx_jump_table_data *tmp;
16740 int new_cost;
16741
16742 /* This code shouldn't have been called if there was a natural barrier
16743 within range. */
16744 gcc_assert (!BARRIER_P (from));
16745
16746 /* Count the length of this insn. This must stay in sync with the
16747 code that pushes minipool fixes. */
16748 if (LABEL_P (from))
16749 count += get_label_padding (from);
16750 else
16751 count += get_attr_length (from);
16752
16753 /* If there is a jump table, add its length. */
16754 if (tablejump_p (from, NULL, &tmp))
16755 {
16756 count += get_jump_table_size (tmp);
16757
16758 /* Jump tables aren't in a basic block, so base the cost on
16759 the dispatch insn. If we select this location, we will
16760 still put the pool after the table. */
16761 new_cost = arm_barrier_cost (from);
16762
16763 if (count < max_count
16764 && (!selected || new_cost <= selected_cost))
16765 {
16766 selected = tmp;
16767 selected_cost = new_cost;
16768 selected_address = fix->address + count;
16769 }
16770
16771 /* Continue after the dispatch table. */
16772 from = NEXT_INSN (tmp);
16773 continue;
16774 }
16775
16776 new_cost = arm_barrier_cost (from);
16777
16778 if (count < max_count
16779 && (!selected || new_cost <= selected_cost))
16780 {
16781 selected = from;
16782 selected_cost = new_cost;
16783 selected_address = fix->address + count;
16784 }
16785
16786 from = NEXT_INSN (from);
16787 }
16788
16789 /* Make sure that we found a place to insert the jump. */
16790 gcc_assert (selected);
16791
16792 /* Make sure we do not split a call and its corresponding
16793 CALL_ARG_LOCATION note. */
16794 if (CALL_P (selected))
16795 {
16796 rtx_insn *next = NEXT_INSN (selected);
16797 if (next && NOTE_P (next)
16798 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16799 selected = next;
16800 }
16801
16802 /* Create a new JUMP_INSN that branches around a barrier. */
16803 from = emit_jump_insn_after (gen_jump (label), selected);
16804 JUMP_LABEL (from) = label;
16805 barrier = emit_barrier_after (from);
16806 emit_label_after (label, barrier);
16807
16808 /* Create a minipool barrier entry for the new barrier. */
16809 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16810 new_fix->insn = barrier;
16811 new_fix->address = selected_address;
16812 new_fix->next = fix->next;
16813 fix->next = new_fix;
16814
16815 return new_fix;
16816 }
16817
16818 /* Record that there is a natural barrier in the insn stream at
16819 ADDRESS. */
16820 static void
16821 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16822 {
16823 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16824
16825 fix->insn = insn;
16826 fix->address = address;
16827
16828 fix->next = NULL;
16829 if (minipool_fix_head != NULL)
16830 minipool_fix_tail->next = fix;
16831 else
16832 minipool_fix_head = fix;
16833
16834 minipool_fix_tail = fix;
16835 }
16836
16837 /* Record INSN, which will need fixing up to load a value from the
16838 minipool. ADDRESS is the offset of the insn since the start of the
16839 function; LOC is a pointer to the part of the insn which requires
16840 fixing; VALUE is the constant that must be loaded, which is of type
16841 MODE. */
16842 static void
16843 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16844 machine_mode mode, rtx value)
16845 {
16846 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16847
16848 fix->insn = insn;
16849 fix->address = address;
16850 fix->loc = loc;
16851 fix->mode = mode;
16852 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16853 fix->value = value;
16854 fix->forwards = get_attr_pool_range (insn);
16855 fix->backwards = get_attr_neg_pool_range (insn);
16856 fix->minipool = NULL;
16857
16858 /* If an insn doesn't have a range defined for it, then it isn't
16859 expecting to be reworked by this code. Better to stop now than
16860 to generate duff assembly code. */
16861 gcc_assert (fix->forwards || fix->backwards);
16862
16863 /* If an entry requires 8-byte alignment then assume all constant pools
16864 require 4 bytes of padding. Trying to do this later on a per-pool
16865 basis is awkward because existing pool entries have to be modified. */
16866 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16867 minipool_pad = 4;
16868
16869 if (dump_file)
16870 {
16871 fprintf (dump_file,
16872 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16873 GET_MODE_NAME (mode),
16874 INSN_UID (insn), (unsigned long) address,
16875 -1 * (long)fix->backwards, (long)fix->forwards);
16876 arm_print_value (dump_file, fix->value);
16877 fprintf (dump_file, "\n");
16878 }
16879
16880 /* Add it to the chain of fixes. */
16881 fix->next = NULL;
16882
16883 if (minipool_fix_head != NULL)
16884 minipool_fix_tail->next = fix;
16885 else
16886 minipool_fix_head = fix;
16887
16888 minipool_fix_tail = fix;
16889 }
16890
16891 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16892 Returns the number of insns needed, or 99 if we always want to synthesize
16893 the value. */
16894 int
16895 arm_max_const_double_inline_cost ()
16896 {
16897 /* Let the value get synthesized to avoid the use of literal pools. */
16898 if (arm_disable_literal_pool)
16899 return 99;
16900
16901 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16902 }
16903
16904 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16905 Returns the number of insns needed, or 99 if we don't know how to
16906 do it. */
16907 int
16908 arm_const_double_inline_cost (rtx val)
16909 {
16910 rtx lowpart, highpart;
16911 machine_mode mode;
16912
16913 mode = GET_MODE (val);
16914
16915 if (mode == VOIDmode)
16916 mode = DImode;
16917
16918 gcc_assert (GET_MODE_SIZE (mode) == 8);
16919
16920 lowpart = gen_lowpart (SImode, val);
16921 highpart = gen_highpart_mode (SImode, mode, val);
16922
16923 gcc_assert (CONST_INT_P (lowpart));
16924 gcc_assert (CONST_INT_P (highpart));
16925
16926 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16927 NULL_RTX, NULL_RTX, 0, 0)
16928 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16929 NULL_RTX, NULL_RTX, 0, 0));
16930 }
16931
16932 /* Cost of loading a SImode constant. */
16933 static inline int
16934 arm_const_inline_cost (enum rtx_code code, rtx val)
16935 {
16936 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16937 NULL_RTX, NULL_RTX, 1, 0);
16938 }
16939
16940 /* Return true if it is worthwhile to split a 64-bit constant into two
16941 32-bit operations. This is the case if optimizing for size, or
16942 if we have load delay slots, or if one 32-bit part can be done with
16943 a single data operation. */
16944 bool
16945 arm_const_double_by_parts (rtx val)
16946 {
16947 machine_mode mode = GET_MODE (val);
16948 rtx part;
16949
16950 if (optimize_size || arm_ld_sched)
16951 return true;
16952
16953 if (mode == VOIDmode)
16954 mode = DImode;
16955
16956 part = gen_highpart_mode (SImode, mode, val);
16957
16958 gcc_assert (CONST_INT_P (part));
16959
16960 if (const_ok_for_arm (INTVAL (part))
16961 || const_ok_for_arm (~INTVAL (part)))
16962 return true;
16963
16964 part = gen_lowpart (SImode, val);
16965
16966 gcc_assert (CONST_INT_P (part));
16967
16968 if (const_ok_for_arm (INTVAL (part))
16969 || const_ok_for_arm (~INTVAL (part)))
16970 return true;
16971
16972 return false;
16973 }
16974
16975 /* Return true if it is possible to inline both the high and low parts
16976 of a 64-bit constant into 32-bit data processing instructions. */
16977 bool
16978 arm_const_double_by_immediates (rtx val)
16979 {
16980 machine_mode mode = GET_MODE (val);
16981 rtx part;
16982
16983 if (mode == VOIDmode)
16984 mode = DImode;
16985
16986 part = gen_highpart_mode (SImode, mode, val);
16987
16988 gcc_assert (CONST_INT_P (part));
16989
16990 if (!const_ok_for_arm (INTVAL (part)))
16991 return false;
16992
16993 part = gen_lowpart (SImode, val);
16994
16995 gcc_assert (CONST_INT_P (part));
16996
16997 if (!const_ok_for_arm (INTVAL (part)))
16998 return false;
16999
17000 return true;
17001 }
17002
17003 /* Scan INSN and note any of its operands that need fixing.
17004 If DO_PUSHES is false we do not actually push any of the fixups
17005 needed. */
17006 static void
17007 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17008 {
17009 int opno;
17010
17011 extract_constrain_insn (insn);
17012
17013 if (recog_data.n_alternatives == 0)
17014 return;
17015
17016 /* Fill in recog_op_alt with information about the constraints of
17017 this insn. */
17018 preprocess_constraints (insn);
17019
17020 const operand_alternative *op_alt = which_op_alt ();
17021 for (opno = 0; opno < recog_data.n_operands; opno++)
17022 {
17023 /* Things we need to fix can only occur in inputs. */
17024 if (recog_data.operand_type[opno] != OP_IN)
17025 continue;
17026
17027 /* If this alternative is a memory reference, then any mention
17028 of constants in this alternative is really to fool reload
17029 into allowing us to accept one there. We need to fix them up
17030 now so that we output the right code. */
17031 if (op_alt[opno].memory_ok)
17032 {
17033 rtx op = recog_data.operand[opno];
17034
17035 if (CONSTANT_P (op))
17036 {
17037 if (do_pushes)
17038 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17039 recog_data.operand_mode[opno], op);
17040 }
17041 else if (MEM_P (op)
17042 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17043 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17044 {
17045 if (do_pushes)
17046 {
17047 rtx cop = avoid_constant_pool_reference (op);
17048
17049 /* Casting the address of something to a mode narrower
17050 than a word can cause avoid_constant_pool_reference()
17051 to return the pool reference itself. That's no good to
17052 us here. Lets just hope that we can use the
17053 constant pool value directly. */
17054 if (op == cop)
17055 cop = get_pool_constant (XEXP (op, 0));
17056
17057 push_minipool_fix (insn, address,
17058 recog_data.operand_loc[opno],
17059 recog_data.operand_mode[opno], cop);
17060 }
17061
17062 }
17063 }
17064 }
17065
17066 return;
17067 }
17068
17069 /* Rewrite move insn into subtract of 0 if the condition codes will
17070 be useful in next conditional jump insn. */
17071
17072 static void
17073 thumb1_reorg (void)
17074 {
17075 basic_block bb;
17076
17077 FOR_EACH_BB_FN (bb, cfun)
17078 {
17079 rtx dest, src;
17080 rtx pat, op0, set = NULL;
17081 rtx_insn *prev, *insn = BB_END (bb);
17082 bool insn_clobbered = false;
17083
17084 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17085 insn = PREV_INSN (insn);
17086
17087 /* Find the last cbranchsi4_insn in basic block BB. */
17088 if (insn == BB_HEAD (bb)
17089 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17090 continue;
17091
17092 /* Get the register with which we are comparing. */
17093 pat = PATTERN (insn);
17094 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17095
17096 /* Find the first flag setting insn before INSN in basic block BB. */
17097 gcc_assert (insn != BB_HEAD (bb));
17098 for (prev = PREV_INSN (insn);
17099 (!insn_clobbered
17100 && prev != BB_HEAD (bb)
17101 && (NOTE_P (prev)
17102 || DEBUG_INSN_P (prev)
17103 || ((set = single_set (prev)) != NULL
17104 && get_attr_conds (prev) == CONDS_NOCOND)));
17105 prev = PREV_INSN (prev))
17106 {
17107 if (reg_set_p (op0, prev))
17108 insn_clobbered = true;
17109 }
17110
17111 /* Skip if op0 is clobbered by insn other than prev. */
17112 if (insn_clobbered)
17113 continue;
17114
17115 if (!set)
17116 continue;
17117
17118 dest = SET_DEST (set);
17119 src = SET_SRC (set);
17120 if (!low_register_operand (dest, SImode)
17121 || !low_register_operand (src, SImode))
17122 continue;
17123
17124 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17125 in INSN. Both src and dest of the move insn are checked. */
17126 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17127 {
17128 dest = copy_rtx (dest);
17129 src = copy_rtx (src);
17130 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17131 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17132 INSN_CODE (prev) = -1;
17133 /* Set test register in INSN to dest. */
17134 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17135 INSN_CODE (insn) = -1;
17136 }
17137 }
17138 }
17139
17140 /* Convert instructions to their cc-clobbering variant if possible, since
17141 that allows us to use smaller encodings. */
17142
17143 static void
17144 thumb2_reorg (void)
17145 {
17146 basic_block bb;
17147 regset_head live;
17148
17149 INIT_REG_SET (&live);
17150
17151 /* We are freeing block_for_insn in the toplev to keep compatibility
17152 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17153 compute_bb_for_insn ();
17154 df_analyze ();
17155
17156 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17157
17158 FOR_EACH_BB_FN (bb, cfun)
17159 {
17160 if (current_tune->disparage_flag_setting_t16_encodings
17161 && optimize_bb_for_speed_p (bb))
17162 continue;
17163
17164 rtx_insn *insn;
17165 Convert_Action action = SKIP;
17166 Convert_Action action_for_partial_flag_setting
17167 = (current_tune->disparage_partial_flag_setting_t16_encodings
17168 && optimize_bb_for_speed_p (bb))
17169 ? SKIP : CONV;
17170
17171 COPY_REG_SET (&live, DF_LR_OUT (bb));
17172 df_simulate_initialize_backwards (bb, &live);
17173 FOR_BB_INSNS_REVERSE (bb, insn)
17174 {
17175 if (NONJUMP_INSN_P (insn)
17176 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17177 && GET_CODE (PATTERN (insn)) == SET)
17178 {
17179 action = SKIP;
17180 rtx pat = PATTERN (insn);
17181 rtx dst = XEXP (pat, 0);
17182 rtx src = XEXP (pat, 1);
17183 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17184
17185 if (!OBJECT_P (src))
17186 op0 = XEXP (src, 0);
17187
17188 if (BINARY_P (src))
17189 op1 = XEXP (src, 1);
17190
17191 if (low_register_operand (dst, SImode))
17192 {
17193 switch (GET_CODE (src))
17194 {
17195 case PLUS:
17196 /* Adding two registers and storing the result
17197 in the first source is already a 16-bit
17198 operation. */
17199 if (rtx_equal_p (dst, op0)
17200 && register_operand (op1, SImode))
17201 break;
17202
17203 if (low_register_operand (op0, SImode))
17204 {
17205 /* ADDS <Rd>,<Rn>,<Rm> */
17206 if (low_register_operand (op1, SImode))
17207 action = CONV;
17208 /* ADDS <Rdn>,#<imm8> */
17209 /* SUBS <Rdn>,#<imm8> */
17210 else if (rtx_equal_p (dst, op0)
17211 && CONST_INT_P (op1)
17212 && IN_RANGE (INTVAL (op1), -255, 255))
17213 action = CONV;
17214 /* ADDS <Rd>,<Rn>,#<imm3> */
17215 /* SUBS <Rd>,<Rn>,#<imm3> */
17216 else if (CONST_INT_P (op1)
17217 && IN_RANGE (INTVAL (op1), -7, 7))
17218 action = CONV;
17219 }
17220 /* ADCS <Rd>, <Rn> */
17221 else if (GET_CODE (XEXP (src, 0)) == PLUS
17222 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17223 && low_register_operand (XEXP (XEXP (src, 0), 1),
17224 SImode)
17225 && COMPARISON_P (op1)
17226 && cc_register (XEXP (op1, 0), VOIDmode)
17227 && maybe_get_arm_condition_code (op1) == ARM_CS
17228 && XEXP (op1, 1) == const0_rtx)
17229 action = CONV;
17230 break;
17231
17232 case MINUS:
17233 /* RSBS <Rd>,<Rn>,#0
17234 Not handled here: see NEG below. */
17235 /* SUBS <Rd>,<Rn>,#<imm3>
17236 SUBS <Rdn>,#<imm8>
17237 Not handled here: see PLUS above. */
17238 /* SUBS <Rd>,<Rn>,<Rm> */
17239 if (low_register_operand (op0, SImode)
17240 && low_register_operand (op1, SImode))
17241 action = CONV;
17242 break;
17243
17244 case MULT:
17245 /* MULS <Rdm>,<Rn>,<Rdm>
17246 As an exception to the rule, this is only used
17247 when optimizing for size since MULS is slow on all
17248 known implementations. We do not even want to use
17249 MULS in cold code, if optimizing for speed, so we
17250 test the global flag here. */
17251 if (!optimize_size)
17252 break;
17253 /* else fall through. */
17254 case AND:
17255 case IOR:
17256 case XOR:
17257 /* ANDS <Rdn>,<Rm> */
17258 if (rtx_equal_p (dst, op0)
17259 && low_register_operand (op1, SImode))
17260 action = action_for_partial_flag_setting;
17261 else if (rtx_equal_p (dst, op1)
17262 && low_register_operand (op0, SImode))
17263 action = action_for_partial_flag_setting == SKIP
17264 ? SKIP : SWAP_CONV;
17265 break;
17266
17267 case ASHIFTRT:
17268 case ASHIFT:
17269 case LSHIFTRT:
17270 /* ASRS <Rdn>,<Rm> */
17271 /* LSRS <Rdn>,<Rm> */
17272 /* LSLS <Rdn>,<Rm> */
17273 if (rtx_equal_p (dst, op0)
17274 && low_register_operand (op1, SImode))
17275 action = action_for_partial_flag_setting;
17276 /* ASRS <Rd>,<Rm>,#<imm5> */
17277 /* LSRS <Rd>,<Rm>,#<imm5> */
17278 /* LSLS <Rd>,<Rm>,#<imm5> */
17279 else if (low_register_operand (op0, SImode)
17280 && CONST_INT_P (op1)
17281 && IN_RANGE (INTVAL (op1), 0, 31))
17282 action = action_for_partial_flag_setting;
17283 break;
17284
17285 case ROTATERT:
17286 /* RORS <Rdn>,<Rm> */
17287 if (rtx_equal_p (dst, op0)
17288 && low_register_operand (op1, SImode))
17289 action = action_for_partial_flag_setting;
17290 break;
17291
17292 case NOT:
17293 /* MVNS <Rd>,<Rm> */
17294 if (low_register_operand (op0, SImode))
17295 action = action_for_partial_flag_setting;
17296 break;
17297
17298 case NEG:
17299 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17300 if (low_register_operand (op0, SImode))
17301 action = CONV;
17302 break;
17303
17304 case CONST_INT:
17305 /* MOVS <Rd>,#<imm8> */
17306 if (CONST_INT_P (src)
17307 && IN_RANGE (INTVAL (src), 0, 255))
17308 action = action_for_partial_flag_setting;
17309 break;
17310
17311 case REG:
17312 /* MOVS and MOV<c> with registers have different
17313 encodings, so are not relevant here. */
17314 break;
17315
17316 default:
17317 break;
17318 }
17319 }
17320
17321 if (action != SKIP)
17322 {
17323 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17324 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17325 rtvec vec;
17326
17327 if (action == SWAP_CONV)
17328 {
17329 src = copy_rtx (src);
17330 XEXP (src, 0) = op1;
17331 XEXP (src, 1) = op0;
17332 pat = gen_rtx_SET (VOIDmode, dst, src);
17333 vec = gen_rtvec (2, pat, clobber);
17334 }
17335 else /* action == CONV */
17336 vec = gen_rtvec (2, pat, clobber);
17337
17338 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17339 INSN_CODE (insn) = -1;
17340 }
17341 }
17342
17343 if (NONDEBUG_INSN_P (insn))
17344 df_simulate_one_insn_backwards (bb, insn, &live);
17345 }
17346 }
17347
17348 CLEAR_REG_SET (&live);
17349 }
17350
17351 /* Gcc puts the pool in the wrong place for ARM, since we can only
17352 load addresses a limited distance around the pc. We do some
17353 special munging to move the constant pool values to the correct
17354 point in the code. */
17355 static void
17356 arm_reorg (void)
17357 {
17358 rtx_insn *insn;
17359 HOST_WIDE_INT address = 0;
17360 Mfix * fix;
17361
17362 if (TARGET_THUMB1)
17363 thumb1_reorg ();
17364 else if (TARGET_THUMB2)
17365 thumb2_reorg ();
17366
17367 /* Ensure all insns that must be split have been split at this point.
17368 Otherwise, the pool placement code below may compute incorrect
17369 insn lengths. Note that when optimizing, all insns have already
17370 been split at this point. */
17371 if (!optimize)
17372 split_all_insns_noflow ();
17373
17374 minipool_fix_head = minipool_fix_tail = NULL;
17375
17376 /* The first insn must always be a note, or the code below won't
17377 scan it properly. */
17378 insn = get_insns ();
17379 gcc_assert (NOTE_P (insn));
17380 minipool_pad = 0;
17381
17382 /* Scan all the insns and record the operands that will need fixing. */
17383 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17384 {
17385 if (BARRIER_P (insn))
17386 push_minipool_barrier (insn, address);
17387 else if (INSN_P (insn))
17388 {
17389 rtx_jump_table_data *table;
17390
17391 note_invalid_constants (insn, address, true);
17392 address += get_attr_length (insn);
17393
17394 /* If the insn is a vector jump, add the size of the table
17395 and skip the table. */
17396 if (tablejump_p (insn, NULL, &table))
17397 {
17398 address += get_jump_table_size (table);
17399 insn = table;
17400 }
17401 }
17402 else if (LABEL_P (insn))
17403 /* Add the worst-case padding due to alignment. We don't add
17404 the _current_ padding because the minipool insertions
17405 themselves might change it. */
17406 address += get_label_padding (insn);
17407 }
17408
17409 fix = minipool_fix_head;
17410
17411 /* Now scan the fixups and perform the required changes. */
17412 while (fix)
17413 {
17414 Mfix * ftmp;
17415 Mfix * fdel;
17416 Mfix * last_added_fix;
17417 Mfix * last_barrier = NULL;
17418 Mfix * this_fix;
17419
17420 /* Skip any further barriers before the next fix. */
17421 while (fix && BARRIER_P (fix->insn))
17422 fix = fix->next;
17423
17424 /* No more fixes. */
17425 if (fix == NULL)
17426 break;
17427
17428 last_added_fix = NULL;
17429
17430 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17431 {
17432 if (BARRIER_P (ftmp->insn))
17433 {
17434 if (ftmp->address >= minipool_vector_head->max_address)
17435 break;
17436
17437 last_barrier = ftmp;
17438 }
17439 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17440 break;
17441
17442 last_added_fix = ftmp; /* Keep track of the last fix added. */
17443 }
17444
17445 /* If we found a barrier, drop back to that; any fixes that we
17446 could have reached but come after the barrier will now go in
17447 the next mini-pool. */
17448 if (last_barrier != NULL)
17449 {
17450 /* Reduce the refcount for those fixes that won't go into this
17451 pool after all. */
17452 for (fdel = last_barrier->next;
17453 fdel && fdel != ftmp;
17454 fdel = fdel->next)
17455 {
17456 fdel->minipool->refcount--;
17457 fdel->minipool = NULL;
17458 }
17459
17460 ftmp = last_barrier;
17461 }
17462 else
17463 {
17464 /* ftmp is first fix that we can't fit into this pool and
17465 there no natural barriers that we could use. Insert a
17466 new barrier in the code somewhere between the previous
17467 fix and this one, and arrange to jump around it. */
17468 HOST_WIDE_INT max_address;
17469
17470 /* The last item on the list of fixes must be a barrier, so
17471 we can never run off the end of the list of fixes without
17472 last_barrier being set. */
17473 gcc_assert (ftmp);
17474
17475 max_address = minipool_vector_head->max_address;
17476 /* Check that there isn't another fix that is in range that
17477 we couldn't fit into this pool because the pool was
17478 already too large: we need to put the pool before such an
17479 instruction. The pool itself may come just after the
17480 fix because create_fix_barrier also allows space for a
17481 jump instruction. */
17482 if (ftmp->address < max_address)
17483 max_address = ftmp->address + 1;
17484
17485 last_barrier = create_fix_barrier (last_added_fix, max_address);
17486 }
17487
17488 assign_minipool_offsets (last_barrier);
17489
17490 while (ftmp)
17491 {
17492 if (!BARRIER_P (ftmp->insn)
17493 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17494 == NULL))
17495 break;
17496
17497 ftmp = ftmp->next;
17498 }
17499
17500 /* Scan over the fixes we have identified for this pool, fixing them
17501 up and adding the constants to the pool itself. */
17502 for (this_fix = fix; this_fix && ftmp != this_fix;
17503 this_fix = this_fix->next)
17504 if (!BARRIER_P (this_fix->insn))
17505 {
17506 rtx addr
17507 = plus_constant (Pmode,
17508 gen_rtx_LABEL_REF (VOIDmode,
17509 minipool_vector_label),
17510 this_fix->minipool->offset);
17511 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17512 }
17513
17514 dump_minipool (last_barrier->insn);
17515 fix = ftmp;
17516 }
17517
17518 /* From now on we must synthesize any constants that we can't handle
17519 directly. This can happen if the RTL gets split during final
17520 instruction generation. */
17521 cfun->machine->after_arm_reorg = 1;
17522
17523 /* Free the minipool memory. */
17524 obstack_free (&minipool_obstack, minipool_startobj);
17525 }
17526 \f
17527 /* Routines to output assembly language. */
17528
17529 /* Return string representation of passed in real value. */
17530 static const char *
17531 fp_const_from_val (REAL_VALUE_TYPE *r)
17532 {
17533 if (!fp_consts_inited)
17534 init_fp_table ();
17535
17536 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17537 return "0";
17538 }
17539
17540 /* OPERANDS[0] is the entire list of insns that constitute pop,
17541 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17542 is in the list, UPDATE is true iff the list contains explicit
17543 update of base register. */
17544 void
17545 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17546 bool update)
17547 {
17548 int i;
17549 char pattern[100];
17550 int offset;
17551 const char *conditional;
17552 int num_saves = XVECLEN (operands[0], 0);
17553 unsigned int regno;
17554 unsigned int regno_base = REGNO (operands[1]);
17555
17556 offset = 0;
17557 offset += update ? 1 : 0;
17558 offset += return_pc ? 1 : 0;
17559
17560 /* Is the base register in the list? */
17561 for (i = offset; i < num_saves; i++)
17562 {
17563 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17564 /* If SP is in the list, then the base register must be SP. */
17565 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17566 /* If base register is in the list, there must be no explicit update. */
17567 if (regno == regno_base)
17568 gcc_assert (!update);
17569 }
17570
17571 conditional = reverse ? "%?%D0" : "%?%d0";
17572 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17573 {
17574 /* Output pop (not stmfd) because it has a shorter encoding. */
17575 gcc_assert (update);
17576 sprintf (pattern, "pop%s\t{", conditional);
17577 }
17578 else
17579 {
17580 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17581 It's just a convention, their semantics are identical. */
17582 if (regno_base == SP_REGNUM)
17583 sprintf (pattern, "ldm%sfd\t", conditional);
17584 else if (TARGET_UNIFIED_ASM)
17585 sprintf (pattern, "ldmia%s\t", conditional);
17586 else
17587 sprintf (pattern, "ldm%sia\t", conditional);
17588
17589 strcat (pattern, reg_names[regno_base]);
17590 if (update)
17591 strcat (pattern, "!, {");
17592 else
17593 strcat (pattern, ", {");
17594 }
17595
17596 /* Output the first destination register. */
17597 strcat (pattern,
17598 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17599
17600 /* Output the rest of the destination registers. */
17601 for (i = offset + 1; i < num_saves; i++)
17602 {
17603 strcat (pattern, ", ");
17604 strcat (pattern,
17605 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17606 }
17607
17608 strcat (pattern, "}");
17609
17610 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17611 strcat (pattern, "^");
17612
17613 output_asm_insn (pattern, &cond);
17614 }
17615
17616
17617 /* Output the assembly for a store multiple. */
17618
17619 const char *
17620 vfp_output_vstmd (rtx * operands)
17621 {
17622 char pattern[100];
17623 int p;
17624 int base;
17625 int i;
17626 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17627 ? XEXP (operands[0], 0)
17628 : XEXP (XEXP (operands[0], 0), 0);
17629 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17630
17631 if (push_p)
17632 strcpy (pattern, "vpush%?.64\t{%P1");
17633 else
17634 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17635
17636 p = strlen (pattern);
17637
17638 gcc_assert (REG_P (operands[1]));
17639
17640 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17641 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17642 {
17643 p += sprintf (&pattern[p], ", d%d", base + i);
17644 }
17645 strcpy (&pattern[p], "}");
17646
17647 output_asm_insn (pattern, operands);
17648 return "";
17649 }
17650
17651
17652 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17653 number of bytes pushed. */
17654
17655 static int
17656 vfp_emit_fstmd (int base_reg, int count)
17657 {
17658 rtx par;
17659 rtx dwarf;
17660 rtx tmp, reg;
17661 int i;
17662
17663 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17664 register pairs are stored by a store multiple insn. We avoid this
17665 by pushing an extra pair. */
17666 if (count == 2 && !arm_arch6)
17667 {
17668 if (base_reg == LAST_VFP_REGNUM - 3)
17669 base_reg -= 2;
17670 count++;
17671 }
17672
17673 /* FSTMD may not store more than 16 doubleword registers at once. Split
17674 larger stores into multiple parts (up to a maximum of two, in
17675 practice). */
17676 if (count > 16)
17677 {
17678 int saved;
17679 /* NOTE: base_reg is an internal register number, so each D register
17680 counts as 2. */
17681 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17682 saved += vfp_emit_fstmd (base_reg, 16);
17683 return saved;
17684 }
17685
17686 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17687 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17688
17689 reg = gen_rtx_REG (DFmode, base_reg);
17690 base_reg += 2;
17691
17692 XVECEXP (par, 0, 0)
17693 = gen_rtx_SET (VOIDmode,
17694 gen_frame_mem
17695 (BLKmode,
17696 gen_rtx_PRE_MODIFY (Pmode,
17697 stack_pointer_rtx,
17698 plus_constant
17699 (Pmode, stack_pointer_rtx,
17700 - (count * 8)))
17701 ),
17702 gen_rtx_UNSPEC (BLKmode,
17703 gen_rtvec (1, reg),
17704 UNSPEC_PUSH_MULT));
17705
17706 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17707 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17708 RTX_FRAME_RELATED_P (tmp) = 1;
17709 XVECEXP (dwarf, 0, 0) = tmp;
17710
17711 tmp = gen_rtx_SET (VOIDmode,
17712 gen_frame_mem (DFmode, stack_pointer_rtx),
17713 reg);
17714 RTX_FRAME_RELATED_P (tmp) = 1;
17715 XVECEXP (dwarf, 0, 1) = tmp;
17716
17717 for (i = 1; i < count; i++)
17718 {
17719 reg = gen_rtx_REG (DFmode, base_reg);
17720 base_reg += 2;
17721 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17722
17723 tmp = gen_rtx_SET (VOIDmode,
17724 gen_frame_mem (DFmode,
17725 plus_constant (Pmode,
17726 stack_pointer_rtx,
17727 i * 8)),
17728 reg);
17729 RTX_FRAME_RELATED_P (tmp) = 1;
17730 XVECEXP (dwarf, 0, i + 1) = tmp;
17731 }
17732
17733 par = emit_insn (par);
17734 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17735 RTX_FRAME_RELATED_P (par) = 1;
17736
17737 return count * 8;
17738 }
17739
17740 /* Emit a call instruction with pattern PAT. ADDR is the address of
17741 the call target. */
17742
17743 void
17744 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17745 {
17746 rtx insn;
17747
17748 insn = emit_call_insn (pat);
17749
17750 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17751 If the call might use such an entry, add a use of the PIC register
17752 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17753 if (TARGET_VXWORKS_RTP
17754 && flag_pic
17755 && !sibcall
17756 && GET_CODE (addr) == SYMBOL_REF
17757 && (SYMBOL_REF_DECL (addr)
17758 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17759 : !SYMBOL_REF_LOCAL_P (addr)))
17760 {
17761 require_pic_register ();
17762 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17763 }
17764
17765 if (TARGET_AAPCS_BASED)
17766 {
17767 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17768 linker. We need to add an IP clobber to allow setting
17769 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17770 is not needed since it's a fixed register. */
17771 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17772 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17773 }
17774 }
17775
17776 /* Output a 'call' insn. */
17777 const char *
17778 output_call (rtx *operands)
17779 {
17780 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17781
17782 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17783 if (REGNO (operands[0]) == LR_REGNUM)
17784 {
17785 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17786 output_asm_insn ("mov%?\t%0, %|lr", operands);
17787 }
17788
17789 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17790
17791 if (TARGET_INTERWORK || arm_arch4t)
17792 output_asm_insn ("bx%?\t%0", operands);
17793 else
17794 output_asm_insn ("mov%?\t%|pc, %0", operands);
17795
17796 return "";
17797 }
17798
17799 /* Output a 'call' insn that is a reference in memory. This is
17800 disabled for ARMv5 and we prefer a blx instead because otherwise
17801 there's a significant performance overhead. */
17802 const char *
17803 output_call_mem (rtx *operands)
17804 {
17805 gcc_assert (!arm_arch5);
17806 if (TARGET_INTERWORK)
17807 {
17808 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17809 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17810 output_asm_insn ("bx%?\t%|ip", operands);
17811 }
17812 else if (regno_use_in (LR_REGNUM, operands[0]))
17813 {
17814 /* LR is used in the memory address. We load the address in the
17815 first instruction. It's safe to use IP as the target of the
17816 load since the call will kill it anyway. */
17817 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17818 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17819 if (arm_arch4t)
17820 output_asm_insn ("bx%?\t%|ip", operands);
17821 else
17822 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17823 }
17824 else
17825 {
17826 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17827 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17828 }
17829
17830 return "";
17831 }
17832
17833
17834 /* Output a move from arm registers to arm registers of a long double
17835 OPERANDS[0] is the destination.
17836 OPERANDS[1] is the source. */
17837 const char *
17838 output_mov_long_double_arm_from_arm (rtx *operands)
17839 {
17840 /* We have to be careful here because the two might overlap. */
17841 int dest_start = REGNO (operands[0]);
17842 int src_start = REGNO (operands[1]);
17843 rtx ops[2];
17844 int i;
17845
17846 if (dest_start < src_start)
17847 {
17848 for (i = 0; i < 3; i++)
17849 {
17850 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17851 ops[1] = gen_rtx_REG (SImode, src_start + i);
17852 output_asm_insn ("mov%?\t%0, %1", ops);
17853 }
17854 }
17855 else
17856 {
17857 for (i = 2; i >= 0; i--)
17858 {
17859 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17860 ops[1] = gen_rtx_REG (SImode, src_start + i);
17861 output_asm_insn ("mov%?\t%0, %1", ops);
17862 }
17863 }
17864
17865 return "";
17866 }
17867
17868 void
17869 arm_emit_movpair (rtx dest, rtx src)
17870 {
17871 /* If the src is an immediate, simplify it. */
17872 if (CONST_INT_P (src))
17873 {
17874 HOST_WIDE_INT val = INTVAL (src);
17875 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17876 if ((val >> 16) & 0x0000ffff)
17877 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17878 GEN_INT (16)),
17879 GEN_INT ((val >> 16) & 0x0000ffff));
17880 return;
17881 }
17882 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17883 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17884 }
17885
17886 /* Output a move between double words. It must be REG<-MEM
17887 or MEM<-REG. */
17888 const char *
17889 output_move_double (rtx *operands, bool emit, int *count)
17890 {
17891 enum rtx_code code0 = GET_CODE (operands[0]);
17892 enum rtx_code code1 = GET_CODE (operands[1]);
17893 rtx otherops[3];
17894 if (count)
17895 *count = 1;
17896
17897 /* The only case when this might happen is when
17898 you are looking at the length of a DImode instruction
17899 that has an invalid constant in it. */
17900 if (code0 == REG && code1 != MEM)
17901 {
17902 gcc_assert (!emit);
17903 *count = 2;
17904 return "";
17905 }
17906
17907 if (code0 == REG)
17908 {
17909 unsigned int reg0 = REGNO (operands[0]);
17910
17911 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17912
17913 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17914
17915 switch (GET_CODE (XEXP (operands[1], 0)))
17916 {
17917 case REG:
17918
17919 if (emit)
17920 {
17921 if (TARGET_LDRD
17922 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17923 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17924 else
17925 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17926 }
17927 break;
17928
17929 case PRE_INC:
17930 gcc_assert (TARGET_LDRD);
17931 if (emit)
17932 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17933 break;
17934
17935 case PRE_DEC:
17936 if (emit)
17937 {
17938 if (TARGET_LDRD)
17939 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17940 else
17941 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17942 }
17943 break;
17944
17945 case POST_INC:
17946 if (emit)
17947 {
17948 if (TARGET_LDRD)
17949 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17950 else
17951 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17952 }
17953 break;
17954
17955 case POST_DEC:
17956 gcc_assert (TARGET_LDRD);
17957 if (emit)
17958 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17959 break;
17960
17961 case PRE_MODIFY:
17962 case POST_MODIFY:
17963 /* Autoicrement addressing modes should never have overlapping
17964 base and destination registers, and overlapping index registers
17965 are already prohibited, so this doesn't need to worry about
17966 fix_cm3_ldrd. */
17967 otherops[0] = operands[0];
17968 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17969 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17970
17971 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17972 {
17973 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17974 {
17975 /* Registers overlap so split out the increment. */
17976 if (emit)
17977 {
17978 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17979 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17980 }
17981 if (count)
17982 *count = 2;
17983 }
17984 else
17985 {
17986 /* Use a single insn if we can.
17987 FIXME: IWMMXT allows offsets larger than ldrd can
17988 handle, fix these up with a pair of ldr. */
17989 if (TARGET_THUMB2
17990 || !CONST_INT_P (otherops[2])
17991 || (INTVAL (otherops[2]) > -256
17992 && INTVAL (otherops[2]) < 256))
17993 {
17994 if (emit)
17995 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17996 }
17997 else
17998 {
17999 if (emit)
18000 {
18001 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18002 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18003 }
18004 if (count)
18005 *count = 2;
18006
18007 }
18008 }
18009 }
18010 else
18011 {
18012 /* Use a single insn if we can.
18013 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18014 fix these up with a pair of ldr. */
18015 if (TARGET_THUMB2
18016 || !CONST_INT_P (otherops[2])
18017 || (INTVAL (otherops[2]) > -256
18018 && INTVAL (otherops[2]) < 256))
18019 {
18020 if (emit)
18021 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18022 }
18023 else
18024 {
18025 if (emit)
18026 {
18027 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18028 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18029 }
18030 if (count)
18031 *count = 2;
18032 }
18033 }
18034 break;
18035
18036 case LABEL_REF:
18037 case CONST:
18038 /* We might be able to use ldrd %0, %1 here. However the range is
18039 different to ldr/adr, and it is broken on some ARMv7-M
18040 implementations. */
18041 /* Use the second register of the pair to avoid problematic
18042 overlap. */
18043 otherops[1] = operands[1];
18044 if (emit)
18045 output_asm_insn ("adr%?\t%0, %1", otherops);
18046 operands[1] = otherops[0];
18047 if (emit)
18048 {
18049 if (TARGET_LDRD)
18050 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18051 else
18052 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18053 }
18054
18055 if (count)
18056 *count = 2;
18057 break;
18058
18059 /* ??? This needs checking for thumb2. */
18060 default:
18061 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18062 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18063 {
18064 otherops[0] = operands[0];
18065 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18066 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18067
18068 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18069 {
18070 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18071 {
18072 switch ((int) INTVAL (otherops[2]))
18073 {
18074 case -8:
18075 if (emit)
18076 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18077 return "";
18078 case -4:
18079 if (TARGET_THUMB2)
18080 break;
18081 if (emit)
18082 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18083 return "";
18084 case 4:
18085 if (TARGET_THUMB2)
18086 break;
18087 if (emit)
18088 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18089 return "";
18090 }
18091 }
18092 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18093 operands[1] = otherops[0];
18094 if (TARGET_LDRD
18095 && (REG_P (otherops[2])
18096 || TARGET_THUMB2
18097 || (CONST_INT_P (otherops[2])
18098 && INTVAL (otherops[2]) > -256
18099 && INTVAL (otherops[2]) < 256)))
18100 {
18101 if (reg_overlap_mentioned_p (operands[0],
18102 otherops[2]))
18103 {
18104 /* Swap base and index registers over to
18105 avoid a conflict. */
18106 std::swap (otherops[1], otherops[2]);
18107 }
18108 /* If both registers conflict, it will usually
18109 have been fixed by a splitter. */
18110 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18111 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18112 {
18113 if (emit)
18114 {
18115 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18116 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18117 }
18118 if (count)
18119 *count = 2;
18120 }
18121 else
18122 {
18123 otherops[0] = operands[0];
18124 if (emit)
18125 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18126 }
18127 return "";
18128 }
18129
18130 if (CONST_INT_P (otherops[2]))
18131 {
18132 if (emit)
18133 {
18134 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18135 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18136 else
18137 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18138 }
18139 }
18140 else
18141 {
18142 if (emit)
18143 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18144 }
18145 }
18146 else
18147 {
18148 if (emit)
18149 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18150 }
18151
18152 if (count)
18153 *count = 2;
18154
18155 if (TARGET_LDRD)
18156 return "ldr%(d%)\t%0, [%1]";
18157
18158 return "ldm%(ia%)\t%1, %M0";
18159 }
18160 else
18161 {
18162 otherops[1] = adjust_address (operands[1], SImode, 4);
18163 /* Take care of overlapping base/data reg. */
18164 if (reg_mentioned_p (operands[0], operands[1]))
18165 {
18166 if (emit)
18167 {
18168 output_asm_insn ("ldr%?\t%0, %1", otherops);
18169 output_asm_insn ("ldr%?\t%0, %1", operands);
18170 }
18171 if (count)
18172 *count = 2;
18173
18174 }
18175 else
18176 {
18177 if (emit)
18178 {
18179 output_asm_insn ("ldr%?\t%0, %1", operands);
18180 output_asm_insn ("ldr%?\t%0, %1", otherops);
18181 }
18182 if (count)
18183 *count = 2;
18184 }
18185 }
18186 }
18187 }
18188 else
18189 {
18190 /* Constraints should ensure this. */
18191 gcc_assert (code0 == MEM && code1 == REG);
18192 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18193 || (TARGET_ARM && TARGET_LDRD));
18194
18195 switch (GET_CODE (XEXP (operands[0], 0)))
18196 {
18197 case REG:
18198 if (emit)
18199 {
18200 if (TARGET_LDRD)
18201 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18202 else
18203 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18204 }
18205 break;
18206
18207 case PRE_INC:
18208 gcc_assert (TARGET_LDRD);
18209 if (emit)
18210 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18211 break;
18212
18213 case PRE_DEC:
18214 if (emit)
18215 {
18216 if (TARGET_LDRD)
18217 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18218 else
18219 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18220 }
18221 break;
18222
18223 case POST_INC:
18224 if (emit)
18225 {
18226 if (TARGET_LDRD)
18227 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18228 else
18229 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18230 }
18231 break;
18232
18233 case POST_DEC:
18234 gcc_assert (TARGET_LDRD);
18235 if (emit)
18236 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18237 break;
18238
18239 case PRE_MODIFY:
18240 case POST_MODIFY:
18241 otherops[0] = operands[1];
18242 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18243 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18244
18245 /* IWMMXT allows offsets larger than ldrd can handle,
18246 fix these up with a pair of ldr. */
18247 if (!TARGET_THUMB2
18248 && CONST_INT_P (otherops[2])
18249 && (INTVAL(otherops[2]) <= -256
18250 || INTVAL(otherops[2]) >= 256))
18251 {
18252 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18253 {
18254 if (emit)
18255 {
18256 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18257 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18258 }
18259 if (count)
18260 *count = 2;
18261 }
18262 else
18263 {
18264 if (emit)
18265 {
18266 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18267 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18268 }
18269 if (count)
18270 *count = 2;
18271 }
18272 }
18273 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18274 {
18275 if (emit)
18276 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18277 }
18278 else
18279 {
18280 if (emit)
18281 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18282 }
18283 break;
18284
18285 case PLUS:
18286 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18287 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18288 {
18289 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18290 {
18291 case -8:
18292 if (emit)
18293 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18294 return "";
18295
18296 case -4:
18297 if (TARGET_THUMB2)
18298 break;
18299 if (emit)
18300 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18301 return "";
18302
18303 case 4:
18304 if (TARGET_THUMB2)
18305 break;
18306 if (emit)
18307 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18308 return "";
18309 }
18310 }
18311 if (TARGET_LDRD
18312 && (REG_P (otherops[2])
18313 || TARGET_THUMB2
18314 || (CONST_INT_P (otherops[2])
18315 && INTVAL (otherops[2]) > -256
18316 && INTVAL (otherops[2]) < 256)))
18317 {
18318 otherops[0] = operands[1];
18319 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18320 if (emit)
18321 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18322 return "";
18323 }
18324 /* Fall through */
18325
18326 default:
18327 otherops[0] = adjust_address (operands[0], SImode, 4);
18328 otherops[1] = operands[1];
18329 if (emit)
18330 {
18331 output_asm_insn ("str%?\t%1, %0", operands);
18332 output_asm_insn ("str%?\t%H1, %0", otherops);
18333 }
18334 if (count)
18335 *count = 2;
18336 }
18337 }
18338
18339 return "";
18340 }
18341
18342 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18343 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18344
18345 const char *
18346 output_move_quad (rtx *operands)
18347 {
18348 if (REG_P (operands[0]))
18349 {
18350 /* Load, or reg->reg move. */
18351
18352 if (MEM_P (operands[1]))
18353 {
18354 switch (GET_CODE (XEXP (operands[1], 0)))
18355 {
18356 case REG:
18357 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18358 break;
18359
18360 case LABEL_REF:
18361 case CONST:
18362 output_asm_insn ("adr%?\t%0, %1", operands);
18363 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18364 break;
18365
18366 default:
18367 gcc_unreachable ();
18368 }
18369 }
18370 else
18371 {
18372 rtx ops[2];
18373 int dest, src, i;
18374
18375 gcc_assert (REG_P (operands[1]));
18376
18377 dest = REGNO (operands[0]);
18378 src = REGNO (operands[1]);
18379
18380 /* This seems pretty dumb, but hopefully GCC won't try to do it
18381 very often. */
18382 if (dest < src)
18383 for (i = 0; i < 4; i++)
18384 {
18385 ops[0] = gen_rtx_REG (SImode, dest + i);
18386 ops[1] = gen_rtx_REG (SImode, src + i);
18387 output_asm_insn ("mov%?\t%0, %1", ops);
18388 }
18389 else
18390 for (i = 3; i >= 0; i--)
18391 {
18392 ops[0] = gen_rtx_REG (SImode, dest + i);
18393 ops[1] = gen_rtx_REG (SImode, src + i);
18394 output_asm_insn ("mov%?\t%0, %1", ops);
18395 }
18396 }
18397 }
18398 else
18399 {
18400 gcc_assert (MEM_P (operands[0]));
18401 gcc_assert (REG_P (operands[1]));
18402 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18403
18404 switch (GET_CODE (XEXP (operands[0], 0)))
18405 {
18406 case REG:
18407 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18408 break;
18409
18410 default:
18411 gcc_unreachable ();
18412 }
18413 }
18414
18415 return "";
18416 }
18417
18418 /* Output a VFP load or store instruction. */
18419
18420 const char *
18421 output_move_vfp (rtx *operands)
18422 {
18423 rtx reg, mem, addr, ops[2];
18424 int load = REG_P (operands[0]);
18425 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18426 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18427 const char *templ;
18428 char buff[50];
18429 machine_mode mode;
18430
18431 reg = operands[!load];
18432 mem = operands[load];
18433
18434 mode = GET_MODE (reg);
18435
18436 gcc_assert (REG_P (reg));
18437 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18438 gcc_assert (mode == SFmode
18439 || mode == DFmode
18440 || mode == SImode
18441 || mode == DImode
18442 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18443 gcc_assert (MEM_P (mem));
18444
18445 addr = XEXP (mem, 0);
18446
18447 switch (GET_CODE (addr))
18448 {
18449 case PRE_DEC:
18450 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18451 ops[0] = XEXP (addr, 0);
18452 ops[1] = reg;
18453 break;
18454
18455 case POST_INC:
18456 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18457 ops[0] = XEXP (addr, 0);
18458 ops[1] = reg;
18459 break;
18460
18461 default:
18462 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18463 ops[0] = reg;
18464 ops[1] = mem;
18465 break;
18466 }
18467
18468 sprintf (buff, templ,
18469 load ? "ld" : "st",
18470 dp ? "64" : "32",
18471 dp ? "P" : "",
18472 integer_p ? "\t%@ int" : "");
18473 output_asm_insn (buff, ops);
18474
18475 return "";
18476 }
18477
18478 /* Output a Neon double-word or quad-word load or store, or a load
18479 or store for larger structure modes.
18480
18481 WARNING: The ordering of elements is weird in big-endian mode,
18482 because the EABI requires that vectors stored in memory appear
18483 as though they were stored by a VSTM, as required by the EABI.
18484 GCC RTL defines element ordering based on in-memory order.
18485 This can be different from the architectural ordering of elements
18486 within a NEON register. The intrinsics defined in arm_neon.h use the
18487 NEON register element ordering, not the GCC RTL element ordering.
18488
18489 For example, the in-memory ordering of a big-endian a quadword
18490 vector with 16-bit elements when stored from register pair {d0,d1}
18491 will be (lowest address first, d0[N] is NEON register element N):
18492
18493 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18494
18495 When necessary, quadword registers (dN, dN+1) are moved to ARM
18496 registers from rN in the order:
18497
18498 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18499
18500 So that STM/LDM can be used on vectors in ARM registers, and the
18501 same memory layout will result as if VSTM/VLDM were used.
18502
18503 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18504 possible, which allows use of appropriate alignment tags.
18505 Note that the choice of "64" is independent of the actual vector
18506 element size; this size simply ensures that the behavior is
18507 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18508
18509 Due to limitations of those instructions, use of VST1.64/VLD1.64
18510 is not possible if:
18511 - the address contains PRE_DEC, or
18512 - the mode refers to more than 4 double-word registers
18513
18514 In those cases, it would be possible to replace VSTM/VLDM by a
18515 sequence of instructions; this is not currently implemented since
18516 this is not certain to actually improve performance. */
18517
18518 const char *
18519 output_move_neon (rtx *operands)
18520 {
18521 rtx reg, mem, addr, ops[2];
18522 int regno, nregs, load = REG_P (operands[0]);
18523 const char *templ;
18524 char buff[50];
18525 machine_mode mode;
18526
18527 reg = operands[!load];
18528 mem = operands[load];
18529
18530 mode = GET_MODE (reg);
18531
18532 gcc_assert (REG_P (reg));
18533 regno = REGNO (reg);
18534 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18535 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18536 || NEON_REGNO_OK_FOR_QUAD (regno));
18537 gcc_assert (VALID_NEON_DREG_MODE (mode)
18538 || VALID_NEON_QREG_MODE (mode)
18539 || VALID_NEON_STRUCT_MODE (mode));
18540 gcc_assert (MEM_P (mem));
18541
18542 addr = XEXP (mem, 0);
18543
18544 /* Strip off const from addresses like (const (plus (...))). */
18545 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18546 addr = XEXP (addr, 0);
18547
18548 switch (GET_CODE (addr))
18549 {
18550 case POST_INC:
18551 /* We have to use vldm / vstm for too-large modes. */
18552 if (nregs > 4)
18553 {
18554 templ = "v%smia%%?\t%%0!, %%h1";
18555 ops[0] = XEXP (addr, 0);
18556 }
18557 else
18558 {
18559 templ = "v%s1.64\t%%h1, %%A0";
18560 ops[0] = mem;
18561 }
18562 ops[1] = reg;
18563 break;
18564
18565 case PRE_DEC:
18566 /* We have to use vldm / vstm in this case, since there is no
18567 pre-decrement form of the vld1 / vst1 instructions. */
18568 templ = "v%smdb%%?\t%%0!, %%h1";
18569 ops[0] = XEXP (addr, 0);
18570 ops[1] = reg;
18571 break;
18572
18573 case POST_MODIFY:
18574 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18575 gcc_unreachable ();
18576
18577 case REG:
18578 /* We have to use vldm / vstm for too-large modes. */
18579 if (nregs > 1)
18580 {
18581 if (nregs > 4)
18582 templ = "v%smia%%?\t%%m0, %%h1";
18583 else
18584 templ = "v%s1.64\t%%h1, %%A0";
18585
18586 ops[0] = mem;
18587 ops[1] = reg;
18588 break;
18589 }
18590 /* Fall through. */
18591 case LABEL_REF:
18592 case PLUS:
18593 {
18594 int i;
18595 int overlap = -1;
18596 for (i = 0; i < nregs; i++)
18597 {
18598 /* We're only using DImode here because it's a convenient size. */
18599 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18600 ops[1] = adjust_address (mem, DImode, 8 * i);
18601 if (reg_overlap_mentioned_p (ops[0], mem))
18602 {
18603 gcc_assert (overlap == -1);
18604 overlap = i;
18605 }
18606 else
18607 {
18608 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18609 output_asm_insn (buff, ops);
18610 }
18611 }
18612 if (overlap != -1)
18613 {
18614 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18615 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18616 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18617 output_asm_insn (buff, ops);
18618 }
18619
18620 return "";
18621 }
18622
18623 default:
18624 gcc_unreachable ();
18625 }
18626
18627 sprintf (buff, templ, load ? "ld" : "st");
18628 output_asm_insn (buff, ops);
18629
18630 return "";
18631 }
18632
18633 /* Compute and return the length of neon_mov<mode>, where <mode> is
18634 one of VSTRUCT modes: EI, OI, CI or XI. */
18635 int
18636 arm_attr_length_move_neon (rtx_insn *insn)
18637 {
18638 rtx reg, mem, addr;
18639 int load;
18640 machine_mode mode;
18641
18642 extract_insn_cached (insn);
18643
18644 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18645 {
18646 mode = GET_MODE (recog_data.operand[0]);
18647 switch (mode)
18648 {
18649 case EImode:
18650 case OImode:
18651 return 8;
18652 case CImode:
18653 return 12;
18654 case XImode:
18655 return 16;
18656 default:
18657 gcc_unreachable ();
18658 }
18659 }
18660
18661 load = REG_P (recog_data.operand[0]);
18662 reg = recog_data.operand[!load];
18663 mem = recog_data.operand[load];
18664
18665 gcc_assert (MEM_P (mem));
18666
18667 mode = GET_MODE (reg);
18668 addr = XEXP (mem, 0);
18669
18670 /* Strip off const from addresses like (const (plus (...))). */
18671 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18672 addr = XEXP (addr, 0);
18673
18674 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18675 {
18676 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18677 return insns * 4;
18678 }
18679 else
18680 return 4;
18681 }
18682
18683 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18684 return zero. */
18685
18686 int
18687 arm_address_offset_is_imm (rtx_insn *insn)
18688 {
18689 rtx mem, addr;
18690
18691 extract_insn_cached (insn);
18692
18693 if (REG_P (recog_data.operand[0]))
18694 return 0;
18695
18696 mem = recog_data.operand[0];
18697
18698 gcc_assert (MEM_P (mem));
18699
18700 addr = XEXP (mem, 0);
18701
18702 if (REG_P (addr)
18703 || (GET_CODE (addr) == PLUS
18704 && REG_P (XEXP (addr, 0))
18705 && CONST_INT_P (XEXP (addr, 1))))
18706 return 1;
18707 else
18708 return 0;
18709 }
18710
18711 /* Output an ADD r, s, #n where n may be too big for one instruction.
18712 If adding zero to one register, output nothing. */
18713 const char *
18714 output_add_immediate (rtx *operands)
18715 {
18716 HOST_WIDE_INT n = INTVAL (operands[2]);
18717
18718 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18719 {
18720 if (n < 0)
18721 output_multi_immediate (operands,
18722 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18723 -n);
18724 else
18725 output_multi_immediate (operands,
18726 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18727 n);
18728 }
18729
18730 return "";
18731 }
18732
18733 /* Output a multiple immediate operation.
18734 OPERANDS is the vector of operands referred to in the output patterns.
18735 INSTR1 is the output pattern to use for the first constant.
18736 INSTR2 is the output pattern to use for subsequent constants.
18737 IMMED_OP is the index of the constant slot in OPERANDS.
18738 N is the constant value. */
18739 static const char *
18740 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18741 int immed_op, HOST_WIDE_INT n)
18742 {
18743 #if HOST_BITS_PER_WIDE_INT > 32
18744 n &= 0xffffffff;
18745 #endif
18746
18747 if (n == 0)
18748 {
18749 /* Quick and easy output. */
18750 operands[immed_op] = const0_rtx;
18751 output_asm_insn (instr1, operands);
18752 }
18753 else
18754 {
18755 int i;
18756 const char * instr = instr1;
18757
18758 /* Note that n is never zero here (which would give no output). */
18759 for (i = 0; i < 32; i += 2)
18760 {
18761 if (n & (3 << i))
18762 {
18763 operands[immed_op] = GEN_INT (n & (255 << i));
18764 output_asm_insn (instr, operands);
18765 instr = instr2;
18766 i += 6;
18767 }
18768 }
18769 }
18770
18771 return "";
18772 }
18773
18774 /* Return the name of a shifter operation. */
18775 static const char *
18776 arm_shift_nmem(enum rtx_code code)
18777 {
18778 switch (code)
18779 {
18780 case ASHIFT:
18781 return ARM_LSL_NAME;
18782
18783 case ASHIFTRT:
18784 return "asr";
18785
18786 case LSHIFTRT:
18787 return "lsr";
18788
18789 case ROTATERT:
18790 return "ror";
18791
18792 default:
18793 abort();
18794 }
18795 }
18796
18797 /* Return the appropriate ARM instruction for the operation code.
18798 The returned result should not be overwritten. OP is the rtx of the
18799 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18800 was shifted. */
18801 const char *
18802 arithmetic_instr (rtx op, int shift_first_arg)
18803 {
18804 switch (GET_CODE (op))
18805 {
18806 case PLUS:
18807 return "add";
18808
18809 case MINUS:
18810 return shift_first_arg ? "rsb" : "sub";
18811
18812 case IOR:
18813 return "orr";
18814
18815 case XOR:
18816 return "eor";
18817
18818 case AND:
18819 return "and";
18820
18821 case ASHIFT:
18822 case ASHIFTRT:
18823 case LSHIFTRT:
18824 case ROTATERT:
18825 return arm_shift_nmem(GET_CODE(op));
18826
18827 default:
18828 gcc_unreachable ();
18829 }
18830 }
18831
18832 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18833 for the operation code. The returned result should not be overwritten.
18834 OP is the rtx code of the shift.
18835 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18836 shift. */
18837 static const char *
18838 shift_op (rtx op, HOST_WIDE_INT *amountp)
18839 {
18840 const char * mnem;
18841 enum rtx_code code = GET_CODE (op);
18842
18843 switch (code)
18844 {
18845 case ROTATE:
18846 if (!CONST_INT_P (XEXP (op, 1)))
18847 {
18848 output_operand_lossage ("invalid shift operand");
18849 return NULL;
18850 }
18851
18852 code = ROTATERT;
18853 *amountp = 32 - INTVAL (XEXP (op, 1));
18854 mnem = "ror";
18855 break;
18856
18857 case ASHIFT:
18858 case ASHIFTRT:
18859 case LSHIFTRT:
18860 case ROTATERT:
18861 mnem = arm_shift_nmem(code);
18862 if (CONST_INT_P (XEXP (op, 1)))
18863 {
18864 *amountp = INTVAL (XEXP (op, 1));
18865 }
18866 else if (REG_P (XEXP (op, 1)))
18867 {
18868 *amountp = -1;
18869 return mnem;
18870 }
18871 else
18872 {
18873 output_operand_lossage ("invalid shift operand");
18874 return NULL;
18875 }
18876 break;
18877
18878 case MULT:
18879 /* We never have to worry about the amount being other than a
18880 power of 2, since this case can never be reloaded from a reg. */
18881 if (!CONST_INT_P (XEXP (op, 1)))
18882 {
18883 output_operand_lossage ("invalid shift operand");
18884 return NULL;
18885 }
18886
18887 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18888
18889 /* Amount must be a power of two. */
18890 if (*amountp & (*amountp - 1))
18891 {
18892 output_operand_lossage ("invalid shift operand");
18893 return NULL;
18894 }
18895
18896 *amountp = int_log2 (*amountp);
18897 return ARM_LSL_NAME;
18898
18899 default:
18900 output_operand_lossage ("invalid shift operand");
18901 return NULL;
18902 }
18903
18904 /* This is not 100% correct, but follows from the desire to merge
18905 multiplication by a power of 2 with the recognizer for a
18906 shift. >=32 is not a valid shift for "lsl", so we must try and
18907 output a shift that produces the correct arithmetical result.
18908 Using lsr #32 is identical except for the fact that the carry bit
18909 is not set correctly if we set the flags; but we never use the
18910 carry bit from such an operation, so we can ignore that. */
18911 if (code == ROTATERT)
18912 /* Rotate is just modulo 32. */
18913 *amountp &= 31;
18914 else if (*amountp != (*amountp & 31))
18915 {
18916 if (code == ASHIFT)
18917 mnem = "lsr";
18918 *amountp = 32;
18919 }
18920
18921 /* Shifts of 0 are no-ops. */
18922 if (*amountp == 0)
18923 return NULL;
18924
18925 return mnem;
18926 }
18927
18928 /* Obtain the shift from the POWER of two. */
18929
18930 static HOST_WIDE_INT
18931 int_log2 (HOST_WIDE_INT power)
18932 {
18933 HOST_WIDE_INT shift = 0;
18934
18935 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18936 {
18937 gcc_assert (shift <= 31);
18938 shift++;
18939 }
18940
18941 return shift;
18942 }
18943
18944 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18945 because /bin/as is horribly restrictive. The judgement about
18946 whether or not each character is 'printable' (and can be output as
18947 is) or not (and must be printed with an octal escape) must be made
18948 with reference to the *host* character set -- the situation is
18949 similar to that discussed in the comments above pp_c_char in
18950 c-pretty-print.c. */
18951
18952 #define MAX_ASCII_LEN 51
18953
18954 void
18955 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18956 {
18957 int i;
18958 int len_so_far = 0;
18959
18960 fputs ("\t.ascii\t\"", stream);
18961
18962 for (i = 0; i < len; i++)
18963 {
18964 int c = p[i];
18965
18966 if (len_so_far >= MAX_ASCII_LEN)
18967 {
18968 fputs ("\"\n\t.ascii\t\"", stream);
18969 len_so_far = 0;
18970 }
18971
18972 if (ISPRINT (c))
18973 {
18974 if (c == '\\' || c == '\"')
18975 {
18976 putc ('\\', stream);
18977 len_so_far++;
18978 }
18979 putc (c, stream);
18980 len_so_far++;
18981 }
18982 else
18983 {
18984 fprintf (stream, "\\%03o", c);
18985 len_so_far += 4;
18986 }
18987 }
18988
18989 fputs ("\"\n", stream);
18990 }
18991 \f
18992 /* Compute the register save mask for registers 0 through 12
18993 inclusive. This code is used by arm_compute_save_reg_mask. */
18994
18995 static unsigned long
18996 arm_compute_save_reg0_reg12_mask (void)
18997 {
18998 unsigned long func_type = arm_current_func_type ();
18999 unsigned long save_reg_mask = 0;
19000 unsigned int reg;
19001
19002 if (IS_INTERRUPT (func_type))
19003 {
19004 unsigned int max_reg;
19005 /* Interrupt functions must not corrupt any registers,
19006 even call clobbered ones. If this is a leaf function
19007 we can just examine the registers used by the RTL, but
19008 otherwise we have to assume that whatever function is
19009 called might clobber anything, and so we have to save
19010 all the call-clobbered registers as well. */
19011 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19012 /* FIQ handlers have registers r8 - r12 banked, so
19013 we only need to check r0 - r7, Normal ISRs only
19014 bank r14 and r15, so we must check up to r12.
19015 r13 is the stack pointer which is always preserved,
19016 so we do not need to consider it here. */
19017 max_reg = 7;
19018 else
19019 max_reg = 12;
19020
19021 for (reg = 0; reg <= max_reg; reg++)
19022 if (df_regs_ever_live_p (reg)
19023 || (! crtl->is_leaf && call_used_regs[reg]))
19024 save_reg_mask |= (1 << reg);
19025
19026 /* Also save the pic base register if necessary. */
19027 if (flag_pic
19028 && !TARGET_SINGLE_PIC_BASE
19029 && arm_pic_register != INVALID_REGNUM
19030 && crtl->uses_pic_offset_table)
19031 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19032 }
19033 else if (IS_VOLATILE(func_type))
19034 {
19035 /* For noreturn functions we historically omitted register saves
19036 altogether. However this really messes up debugging. As a
19037 compromise save just the frame pointers. Combined with the link
19038 register saved elsewhere this should be sufficient to get
19039 a backtrace. */
19040 if (frame_pointer_needed)
19041 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19042 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19043 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19044 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19045 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19046 }
19047 else
19048 {
19049 /* In the normal case we only need to save those registers
19050 which are call saved and which are used by this function. */
19051 for (reg = 0; reg <= 11; reg++)
19052 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
19053 save_reg_mask |= (1 << reg);
19054
19055 /* Handle the frame pointer as a special case. */
19056 if (frame_pointer_needed)
19057 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19058
19059 /* If we aren't loading the PIC register,
19060 don't stack it even though it may be live. */
19061 if (flag_pic
19062 && !TARGET_SINGLE_PIC_BASE
19063 && arm_pic_register != INVALID_REGNUM
19064 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19065 || crtl->uses_pic_offset_table))
19066 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19067
19068 /* The prologue will copy SP into R0, so save it. */
19069 if (IS_STACKALIGN (func_type))
19070 save_reg_mask |= 1;
19071 }
19072
19073 /* Save registers so the exception handler can modify them. */
19074 if (crtl->calls_eh_return)
19075 {
19076 unsigned int i;
19077
19078 for (i = 0; ; i++)
19079 {
19080 reg = EH_RETURN_DATA_REGNO (i);
19081 if (reg == INVALID_REGNUM)
19082 break;
19083 save_reg_mask |= 1 << reg;
19084 }
19085 }
19086
19087 return save_reg_mask;
19088 }
19089
19090 /* Return true if r3 is live at the start of the function. */
19091
19092 static bool
19093 arm_r3_live_at_start_p (void)
19094 {
19095 /* Just look at cfg info, which is still close enough to correct at this
19096 point. This gives false positives for broken functions that might use
19097 uninitialized data that happens to be allocated in r3, but who cares? */
19098 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19099 }
19100
19101 /* Compute the number of bytes used to store the static chain register on the
19102 stack, above the stack frame. We need to know this accurately to get the
19103 alignment of the rest of the stack frame correct. */
19104
19105 static int
19106 arm_compute_static_chain_stack_bytes (void)
19107 {
19108 /* See the defining assertion in arm_expand_prologue. */
19109 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19110 && IS_NESTED (arm_current_func_type ())
19111 && arm_r3_live_at_start_p ()
19112 && crtl->args.pretend_args_size == 0)
19113 return 4;
19114
19115 return 0;
19116 }
19117
19118 /* Compute a bit mask of which registers need to be
19119 saved on the stack for the current function.
19120 This is used by arm_get_frame_offsets, which may add extra registers. */
19121
19122 static unsigned long
19123 arm_compute_save_reg_mask (void)
19124 {
19125 unsigned int save_reg_mask = 0;
19126 unsigned long func_type = arm_current_func_type ();
19127 unsigned int reg;
19128
19129 if (IS_NAKED (func_type))
19130 /* This should never really happen. */
19131 return 0;
19132
19133 /* If we are creating a stack frame, then we must save the frame pointer,
19134 IP (which will hold the old stack pointer), LR and the PC. */
19135 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19136 save_reg_mask |=
19137 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19138 | (1 << IP_REGNUM)
19139 | (1 << LR_REGNUM)
19140 | (1 << PC_REGNUM);
19141
19142 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19143
19144 /* Decide if we need to save the link register.
19145 Interrupt routines have their own banked link register,
19146 so they never need to save it.
19147 Otherwise if we do not use the link register we do not need to save
19148 it. If we are pushing other registers onto the stack however, we
19149 can save an instruction in the epilogue by pushing the link register
19150 now and then popping it back into the PC. This incurs extra memory
19151 accesses though, so we only do it when optimizing for size, and only
19152 if we know that we will not need a fancy return sequence. */
19153 if (df_regs_ever_live_p (LR_REGNUM)
19154 || (save_reg_mask
19155 && optimize_size
19156 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19157 && !crtl->calls_eh_return))
19158 save_reg_mask |= 1 << LR_REGNUM;
19159
19160 if (cfun->machine->lr_save_eliminated)
19161 save_reg_mask &= ~ (1 << LR_REGNUM);
19162
19163 if (TARGET_REALLY_IWMMXT
19164 && ((bit_count (save_reg_mask)
19165 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19166 arm_compute_static_chain_stack_bytes())
19167 ) % 2) != 0)
19168 {
19169 /* The total number of registers that are going to be pushed
19170 onto the stack is odd. We need to ensure that the stack
19171 is 64-bit aligned before we start to save iWMMXt registers,
19172 and also before we start to create locals. (A local variable
19173 might be a double or long long which we will load/store using
19174 an iWMMXt instruction). Therefore we need to push another
19175 ARM register, so that the stack will be 64-bit aligned. We
19176 try to avoid using the arg registers (r0 -r3) as they might be
19177 used to pass values in a tail call. */
19178 for (reg = 4; reg <= 12; reg++)
19179 if ((save_reg_mask & (1 << reg)) == 0)
19180 break;
19181
19182 if (reg <= 12)
19183 save_reg_mask |= (1 << reg);
19184 else
19185 {
19186 cfun->machine->sibcall_blocked = 1;
19187 save_reg_mask |= (1 << 3);
19188 }
19189 }
19190
19191 /* We may need to push an additional register for use initializing the
19192 PIC base register. */
19193 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19194 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19195 {
19196 reg = thumb_find_work_register (1 << 4);
19197 if (!call_used_regs[reg])
19198 save_reg_mask |= (1 << reg);
19199 }
19200
19201 return save_reg_mask;
19202 }
19203
19204
19205 /* Compute a bit mask of which registers need to be
19206 saved on the stack for the current function. */
19207 static unsigned long
19208 thumb1_compute_save_reg_mask (void)
19209 {
19210 unsigned long mask;
19211 unsigned reg;
19212
19213 mask = 0;
19214 for (reg = 0; reg < 12; reg ++)
19215 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19216 mask |= 1 << reg;
19217
19218 if (flag_pic
19219 && !TARGET_SINGLE_PIC_BASE
19220 && arm_pic_register != INVALID_REGNUM
19221 && crtl->uses_pic_offset_table)
19222 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19223
19224 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19225 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19226 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19227
19228 /* LR will also be pushed if any lo regs are pushed. */
19229 if (mask & 0xff || thumb_force_lr_save ())
19230 mask |= (1 << LR_REGNUM);
19231
19232 /* Make sure we have a low work register if we need one.
19233 We will need one if we are going to push a high register,
19234 but we are not currently intending to push a low register. */
19235 if ((mask & 0xff) == 0
19236 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19237 {
19238 /* Use thumb_find_work_register to choose which register
19239 we will use. If the register is live then we will
19240 have to push it. Use LAST_LO_REGNUM as our fallback
19241 choice for the register to select. */
19242 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19243 /* Make sure the register returned by thumb_find_work_register is
19244 not part of the return value. */
19245 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19246 reg = LAST_LO_REGNUM;
19247
19248 if (! call_used_regs[reg])
19249 mask |= 1 << reg;
19250 }
19251
19252 /* The 504 below is 8 bytes less than 512 because there are two possible
19253 alignment words. We can't tell here if they will be present or not so we
19254 have to play it safe and assume that they are. */
19255 if ((CALLER_INTERWORKING_SLOT_SIZE +
19256 ROUND_UP_WORD (get_frame_size ()) +
19257 crtl->outgoing_args_size) >= 504)
19258 {
19259 /* This is the same as the code in thumb1_expand_prologue() which
19260 determines which register to use for stack decrement. */
19261 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19262 if (mask & (1 << reg))
19263 break;
19264
19265 if (reg > LAST_LO_REGNUM)
19266 {
19267 /* Make sure we have a register available for stack decrement. */
19268 mask |= 1 << LAST_LO_REGNUM;
19269 }
19270 }
19271
19272 return mask;
19273 }
19274
19275
19276 /* Return the number of bytes required to save VFP registers. */
19277 static int
19278 arm_get_vfp_saved_size (void)
19279 {
19280 unsigned int regno;
19281 int count;
19282 int saved;
19283
19284 saved = 0;
19285 /* Space for saved VFP registers. */
19286 if (TARGET_HARD_FLOAT && TARGET_VFP)
19287 {
19288 count = 0;
19289 for (regno = FIRST_VFP_REGNUM;
19290 regno < LAST_VFP_REGNUM;
19291 regno += 2)
19292 {
19293 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19294 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19295 {
19296 if (count > 0)
19297 {
19298 /* Workaround ARM10 VFPr1 bug. */
19299 if (count == 2 && !arm_arch6)
19300 count++;
19301 saved += count * 8;
19302 }
19303 count = 0;
19304 }
19305 else
19306 count++;
19307 }
19308 if (count > 0)
19309 {
19310 if (count == 2 && !arm_arch6)
19311 count++;
19312 saved += count * 8;
19313 }
19314 }
19315 return saved;
19316 }
19317
19318
19319 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19320 everything bar the final return instruction. If simple_return is true,
19321 then do not output epilogue, because it has already been emitted in RTL. */
19322 const char *
19323 output_return_instruction (rtx operand, bool really_return, bool reverse,
19324 bool simple_return)
19325 {
19326 char conditional[10];
19327 char instr[100];
19328 unsigned reg;
19329 unsigned long live_regs_mask;
19330 unsigned long func_type;
19331 arm_stack_offsets *offsets;
19332
19333 func_type = arm_current_func_type ();
19334
19335 if (IS_NAKED (func_type))
19336 return "";
19337
19338 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19339 {
19340 /* If this function was declared non-returning, and we have
19341 found a tail call, then we have to trust that the called
19342 function won't return. */
19343 if (really_return)
19344 {
19345 rtx ops[2];
19346
19347 /* Otherwise, trap an attempted return by aborting. */
19348 ops[0] = operand;
19349 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19350 : "abort");
19351 assemble_external_libcall (ops[1]);
19352 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19353 }
19354
19355 return "";
19356 }
19357
19358 gcc_assert (!cfun->calls_alloca || really_return);
19359
19360 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19361
19362 cfun->machine->return_used_this_function = 1;
19363
19364 offsets = arm_get_frame_offsets ();
19365 live_regs_mask = offsets->saved_regs_mask;
19366
19367 if (!simple_return && live_regs_mask)
19368 {
19369 const char * return_reg;
19370
19371 /* If we do not have any special requirements for function exit
19372 (e.g. interworking) then we can load the return address
19373 directly into the PC. Otherwise we must load it into LR. */
19374 if (really_return
19375 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19376 return_reg = reg_names[PC_REGNUM];
19377 else
19378 return_reg = reg_names[LR_REGNUM];
19379
19380 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19381 {
19382 /* There are three possible reasons for the IP register
19383 being saved. 1) a stack frame was created, in which case
19384 IP contains the old stack pointer, or 2) an ISR routine
19385 corrupted it, or 3) it was saved to align the stack on
19386 iWMMXt. In case 1, restore IP into SP, otherwise just
19387 restore IP. */
19388 if (frame_pointer_needed)
19389 {
19390 live_regs_mask &= ~ (1 << IP_REGNUM);
19391 live_regs_mask |= (1 << SP_REGNUM);
19392 }
19393 else
19394 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19395 }
19396
19397 /* On some ARM architectures it is faster to use LDR rather than
19398 LDM to load a single register. On other architectures, the
19399 cost is the same. In 26 bit mode, or for exception handlers,
19400 we have to use LDM to load the PC so that the CPSR is also
19401 restored. */
19402 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19403 if (live_regs_mask == (1U << reg))
19404 break;
19405
19406 if (reg <= LAST_ARM_REGNUM
19407 && (reg != LR_REGNUM
19408 || ! really_return
19409 || ! IS_INTERRUPT (func_type)))
19410 {
19411 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19412 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19413 }
19414 else
19415 {
19416 char *p;
19417 int first = 1;
19418
19419 /* Generate the load multiple instruction to restore the
19420 registers. Note we can get here, even if
19421 frame_pointer_needed is true, but only if sp already
19422 points to the base of the saved core registers. */
19423 if (live_regs_mask & (1 << SP_REGNUM))
19424 {
19425 unsigned HOST_WIDE_INT stack_adjust;
19426
19427 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19428 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19429
19430 if (stack_adjust && arm_arch5 && TARGET_ARM)
19431 if (TARGET_UNIFIED_ASM)
19432 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19433 else
19434 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19435 else
19436 {
19437 /* If we can't use ldmib (SA110 bug),
19438 then try to pop r3 instead. */
19439 if (stack_adjust)
19440 live_regs_mask |= 1 << 3;
19441
19442 if (TARGET_UNIFIED_ASM)
19443 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19444 else
19445 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19446 }
19447 }
19448 else
19449 if (TARGET_UNIFIED_ASM)
19450 sprintf (instr, "pop%s\t{", conditional);
19451 else
19452 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19453
19454 p = instr + strlen (instr);
19455
19456 for (reg = 0; reg <= SP_REGNUM; reg++)
19457 if (live_regs_mask & (1 << reg))
19458 {
19459 int l = strlen (reg_names[reg]);
19460
19461 if (first)
19462 first = 0;
19463 else
19464 {
19465 memcpy (p, ", ", 2);
19466 p += 2;
19467 }
19468
19469 memcpy (p, "%|", 2);
19470 memcpy (p + 2, reg_names[reg], l);
19471 p += l + 2;
19472 }
19473
19474 if (live_regs_mask & (1 << LR_REGNUM))
19475 {
19476 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19477 /* If returning from an interrupt, restore the CPSR. */
19478 if (IS_INTERRUPT (func_type))
19479 strcat (p, "^");
19480 }
19481 else
19482 strcpy (p, "}");
19483 }
19484
19485 output_asm_insn (instr, & operand);
19486
19487 /* See if we need to generate an extra instruction to
19488 perform the actual function return. */
19489 if (really_return
19490 && func_type != ARM_FT_INTERWORKED
19491 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19492 {
19493 /* The return has already been handled
19494 by loading the LR into the PC. */
19495 return "";
19496 }
19497 }
19498
19499 if (really_return)
19500 {
19501 switch ((int) ARM_FUNC_TYPE (func_type))
19502 {
19503 case ARM_FT_ISR:
19504 case ARM_FT_FIQ:
19505 /* ??? This is wrong for unified assembly syntax. */
19506 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19507 break;
19508
19509 case ARM_FT_INTERWORKED:
19510 sprintf (instr, "bx%s\t%%|lr", conditional);
19511 break;
19512
19513 case ARM_FT_EXCEPTION:
19514 /* ??? This is wrong for unified assembly syntax. */
19515 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19516 break;
19517
19518 default:
19519 /* Use bx if it's available. */
19520 if (arm_arch5 || arm_arch4t)
19521 sprintf (instr, "bx%s\t%%|lr", conditional);
19522 else
19523 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19524 break;
19525 }
19526
19527 output_asm_insn (instr, & operand);
19528 }
19529
19530 return "";
19531 }
19532
19533 /* Write the function name into the code section, directly preceding
19534 the function prologue.
19535
19536 Code will be output similar to this:
19537 t0
19538 .ascii "arm_poke_function_name", 0
19539 .align
19540 t1
19541 .word 0xff000000 + (t1 - t0)
19542 arm_poke_function_name
19543 mov ip, sp
19544 stmfd sp!, {fp, ip, lr, pc}
19545 sub fp, ip, #4
19546
19547 When performing a stack backtrace, code can inspect the value
19548 of 'pc' stored at 'fp' + 0. If the trace function then looks
19549 at location pc - 12 and the top 8 bits are set, then we know
19550 that there is a function name embedded immediately preceding this
19551 location and has length ((pc[-3]) & 0xff000000).
19552
19553 We assume that pc is declared as a pointer to an unsigned long.
19554
19555 It is of no benefit to output the function name if we are assembling
19556 a leaf function. These function types will not contain a stack
19557 backtrace structure, therefore it is not possible to determine the
19558 function name. */
19559 void
19560 arm_poke_function_name (FILE *stream, const char *name)
19561 {
19562 unsigned long alignlength;
19563 unsigned long length;
19564 rtx x;
19565
19566 length = strlen (name) + 1;
19567 alignlength = ROUND_UP_WORD (length);
19568
19569 ASM_OUTPUT_ASCII (stream, name, length);
19570 ASM_OUTPUT_ALIGN (stream, 2);
19571 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19572 assemble_aligned_integer (UNITS_PER_WORD, x);
19573 }
19574
19575 /* Place some comments into the assembler stream
19576 describing the current function. */
19577 static void
19578 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19579 {
19580 unsigned long func_type;
19581
19582 /* ??? Do we want to print some of the below anyway? */
19583 if (TARGET_THUMB1)
19584 return;
19585
19586 /* Sanity check. */
19587 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19588
19589 func_type = arm_current_func_type ();
19590
19591 switch ((int) ARM_FUNC_TYPE (func_type))
19592 {
19593 default:
19594 case ARM_FT_NORMAL:
19595 break;
19596 case ARM_FT_INTERWORKED:
19597 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19598 break;
19599 case ARM_FT_ISR:
19600 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19601 break;
19602 case ARM_FT_FIQ:
19603 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19604 break;
19605 case ARM_FT_EXCEPTION:
19606 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19607 break;
19608 }
19609
19610 if (IS_NAKED (func_type))
19611 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19612
19613 if (IS_VOLATILE (func_type))
19614 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19615
19616 if (IS_NESTED (func_type))
19617 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19618 if (IS_STACKALIGN (func_type))
19619 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19620
19621 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19622 crtl->args.size,
19623 crtl->args.pretend_args_size, frame_size);
19624
19625 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19626 frame_pointer_needed,
19627 cfun->machine->uses_anonymous_args);
19628
19629 if (cfun->machine->lr_save_eliminated)
19630 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19631
19632 if (crtl->calls_eh_return)
19633 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19634
19635 }
19636
19637 static void
19638 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19639 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19640 {
19641 arm_stack_offsets *offsets;
19642
19643 if (TARGET_THUMB1)
19644 {
19645 int regno;
19646
19647 /* Emit any call-via-reg trampolines that are needed for v4t support
19648 of call_reg and call_value_reg type insns. */
19649 for (regno = 0; regno < LR_REGNUM; regno++)
19650 {
19651 rtx label = cfun->machine->call_via[regno];
19652
19653 if (label != NULL)
19654 {
19655 switch_to_section (function_section (current_function_decl));
19656 targetm.asm_out.internal_label (asm_out_file, "L",
19657 CODE_LABEL_NUMBER (label));
19658 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19659 }
19660 }
19661
19662 /* ??? Probably not safe to set this here, since it assumes that a
19663 function will be emitted as assembly immediately after we generate
19664 RTL for it. This does not happen for inline functions. */
19665 cfun->machine->return_used_this_function = 0;
19666 }
19667 else /* TARGET_32BIT */
19668 {
19669 /* We need to take into account any stack-frame rounding. */
19670 offsets = arm_get_frame_offsets ();
19671
19672 gcc_assert (!use_return_insn (FALSE, NULL)
19673 || (cfun->machine->return_used_this_function != 0)
19674 || offsets->saved_regs == offsets->outgoing_args
19675 || frame_pointer_needed);
19676 }
19677 }
19678
19679 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19680 STR and STRD. If an even number of registers are being pushed, one
19681 or more STRD patterns are created for each register pair. If an
19682 odd number of registers are pushed, emit an initial STR followed by
19683 as many STRD instructions as are needed. This works best when the
19684 stack is initially 64-bit aligned (the normal case), since it
19685 ensures that each STRD is also 64-bit aligned. */
19686 static void
19687 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19688 {
19689 int num_regs = 0;
19690 int i;
19691 int regno;
19692 rtx par = NULL_RTX;
19693 rtx dwarf = NULL_RTX;
19694 rtx tmp;
19695 bool first = true;
19696
19697 num_regs = bit_count (saved_regs_mask);
19698
19699 /* Must be at least one register to save, and can't save SP or PC. */
19700 gcc_assert (num_regs > 0 && num_regs <= 14);
19701 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19702 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19703
19704 /* Create sequence for DWARF info. All the frame-related data for
19705 debugging is held in this wrapper. */
19706 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19707
19708 /* Describe the stack adjustment. */
19709 tmp = gen_rtx_SET (VOIDmode,
19710 stack_pointer_rtx,
19711 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19712 RTX_FRAME_RELATED_P (tmp) = 1;
19713 XVECEXP (dwarf, 0, 0) = tmp;
19714
19715 /* Find the first register. */
19716 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19717 ;
19718
19719 i = 0;
19720
19721 /* If there's an odd number of registers to push. Start off by
19722 pushing a single register. This ensures that subsequent strd
19723 operations are dword aligned (assuming that SP was originally
19724 64-bit aligned). */
19725 if ((num_regs & 1) != 0)
19726 {
19727 rtx reg, mem, insn;
19728
19729 reg = gen_rtx_REG (SImode, regno);
19730 if (num_regs == 1)
19731 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19732 stack_pointer_rtx));
19733 else
19734 mem = gen_frame_mem (Pmode,
19735 gen_rtx_PRE_MODIFY
19736 (Pmode, stack_pointer_rtx,
19737 plus_constant (Pmode, stack_pointer_rtx,
19738 -4 * num_regs)));
19739
19740 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19741 RTX_FRAME_RELATED_P (tmp) = 1;
19742 insn = emit_insn (tmp);
19743 RTX_FRAME_RELATED_P (insn) = 1;
19744 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19745 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19746 reg);
19747 RTX_FRAME_RELATED_P (tmp) = 1;
19748 i++;
19749 regno++;
19750 XVECEXP (dwarf, 0, i) = tmp;
19751 first = false;
19752 }
19753
19754 while (i < num_regs)
19755 if (saved_regs_mask & (1 << regno))
19756 {
19757 rtx reg1, reg2, mem1, mem2;
19758 rtx tmp0, tmp1, tmp2;
19759 int regno2;
19760
19761 /* Find the register to pair with this one. */
19762 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19763 regno2++)
19764 ;
19765
19766 reg1 = gen_rtx_REG (SImode, regno);
19767 reg2 = gen_rtx_REG (SImode, regno2);
19768
19769 if (first)
19770 {
19771 rtx insn;
19772
19773 first = false;
19774 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19775 stack_pointer_rtx,
19776 -4 * num_regs));
19777 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19778 stack_pointer_rtx,
19779 -4 * (num_regs - 1)));
19780 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19781 plus_constant (Pmode, stack_pointer_rtx,
19782 -4 * (num_regs)));
19783 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19784 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19785 RTX_FRAME_RELATED_P (tmp0) = 1;
19786 RTX_FRAME_RELATED_P (tmp1) = 1;
19787 RTX_FRAME_RELATED_P (tmp2) = 1;
19788 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19789 XVECEXP (par, 0, 0) = tmp0;
19790 XVECEXP (par, 0, 1) = tmp1;
19791 XVECEXP (par, 0, 2) = tmp2;
19792 insn = emit_insn (par);
19793 RTX_FRAME_RELATED_P (insn) = 1;
19794 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19795 }
19796 else
19797 {
19798 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19799 stack_pointer_rtx,
19800 4 * i));
19801 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19802 stack_pointer_rtx,
19803 4 * (i + 1)));
19804 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19805 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19806 RTX_FRAME_RELATED_P (tmp1) = 1;
19807 RTX_FRAME_RELATED_P (tmp2) = 1;
19808 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19809 XVECEXP (par, 0, 0) = tmp1;
19810 XVECEXP (par, 0, 1) = tmp2;
19811 emit_insn (par);
19812 }
19813
19814 /* Create unwind information. This is an approximation. */
19815 tmp1 = gen_rtx_SET (VOIDmode,
19816 gen_frame_mem (Pmode,
19817 plus_constant (Pmode,
19818 stack_pointer_rtx,
19819 4 * i)),
19820 reg1);
19821 tmp2 = gen_rtx_SET (VOIDmode,
19822 gen_frame_mem (Pmode,
19823 plus_constant (Pmode,
19824 stack_pointer_rtx,
19825 4 * (i + 1))),
19826 reg2);
19827
19828 RTX_FRAME_RELATED_P (tmp1) = 1;
19829 RTX_FRAME_RELATED_P (tmp2) = 1;
19830 XVECEXP (dwarf, 0, i + 1) = tmp1;
19831 XVECEXP (dwarf, 0, i + 2) = tmp2;
19832 i += 2;
19833 regno = regno2 + 1;
19834 }
19835 else
19836 regno++;
19837
19838 return;
19839 }
19840
19841 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19842 whenever possible, otherwise it emits single-word stores. The first store
19843 also allocates stack space for all saved registers, using writeback with
19844 post-addressing mode. All other stores use offset addressing. If no STRD
19845 can be emitted, this function emits a sequence of single-word stores,
19846 and not an STM as before, because single-word stores provide more freedom
19847 scheduling and can be turned into an STM by peephole optimizations. */
19848 static void
19849 arm_emit_strd_push (unsigned long saved_regs_mask)
19850 {
19851 int num_regs = 0;
19852 int i, j, dwarf_index = 0;
19853 int offset = 0;
19854 rtx dwarf = NULL_RTX;
19855 rtx insn = NULL_RTX;
19856 rtx tmp, mem;
19857
19858 /* TODO: A more efficient code can be emitted by changing the
19859 layout, e.g., first push all pairs that can use STRD to keep the
19860 stack aligned, and then push all other registers. */
19861 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19862 if (saved_regs_mask & (1 << i))
19863 num_regs++;
19864
19865 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19866 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19867 gcc_assert (num_regs > 0);
19868
19869 /* Create sequence for DWARF info. */
19870 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19871
19872 /* For dwarf info, we generate explicit stack update. */
19873 tmp = gen_rtx_SET (VOIDmode,
19874 stack_pointer_rtx,
19875 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19876 RTX_FRAME_RELATED_P (tmp) = 1;
19877 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19878
19879 /* Save registers. */
19880 offset = - 4 * num_regs;
19881 j = 0;
19882 while (j <= LAST_ARM_REGNUM)
19883 if (saved_regs_mask & (1 << j))
19884 {
19885 if ((j % 2 == 0)
19886 && (saved_regs_mask & (1 << (j + 1))))
19887 {
19888 /* Current register and previous register form register pair for
19889 which STRD can be generated. */
19890 if (offset < 0)
19891 {
19892 /* Allocate stack space for all saved registers. */
19893 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19894 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19895 mem = gen_frame_mem (DImode, tmp);
19896 offset = 0;
19897 }
19898 else if (offset > 0)
19899 mem = gen_frame_mem (DImode,
19900 plus_constant (Pmode,
19901 stack_pointer_rtx,
19902 offset));
19903 else
19904 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19905
19906 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19907 RTX_FRAME_RELATED_P (tmp) = 1;
19908 tmp = emit_insn (tmp);
19909
19910 /* Record the first store insn. */
19911 if (dwarf_index == 1)
19912 insn = tmp;
19913
19914 /* Generate dwarf info. */
19915 mem = gen_frame_mem (SImode,
19916 plus_constant (Pmode,
19917 stack_pointer_rtx,
19918 offset));
19919 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19920 RTX_FRAME_RELATED_P (tmp) = 1;
19921 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19922
19923 mem = gen_frame_mem (SImode,
19924 plus_constant (Pmode,
19925 stack_pointer_rtx,
19926 offset + 4));
19927 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19928 RTX_FRAME_RELATED_P (tmp) = 1;
19929 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19930
19931 offset += 8;
19932 j += 2;
19933 }
19934 else
19935 {
19936 /* Emit a single word store. */
19937 if (offset < 0)
19938 {
19939 /* Allocate stack space for all saved registers. */
19940 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19941 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19942 mem = gen_frame_mem (SImode, tmp);
19943 offset = 0;
19944 }
19945 else if (offset > 0)
19946 mem = gen_frame_mem (SImode,
19947 plus_constant (Pmode,
19948 stack_pointer_rtx,
19949 offset));
19950 else
19951 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19952
19953 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19954 RTX_FRAME_RELATED_P (tmp) = 1;
19955 tmp = emit_insn (tmp);
19956
19957 /* Record the first store insn. */
19958 if (dwarf_index == 1)
19959 insn = tmp;
19960
19961 /* Generate dwarf info. */
19962 mem = gen_frame_mem (SImode,
19963 plus_constant(Pmode,
19964 stack_pointer_rtx,
19965 offset));
19966 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19967 RTX_FRAME_RELATED_P (tmp) = 1;
19968 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19969
19970 offset += 4;
19971 j += 1;
19972 }
19973 }
19974 else
19975 j++;
19976
19977 /* Attach dwarf info to the first insn we generate. */
19978 gcc_assert (insn != NULL_RTX);
19979 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19980 RTX_FRAME_RELATED_P (insn) = 1;
19981 }
19982
19983 /* Generate and emit an insn that we will recognize as a push_multi.
19984 Unfortunately, since this insn does not reflect very well the actual
19985 semantics of the operation, we need to annotate the insn for the benefit
19986 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19987 MASK for registers that should be annotated for DWARF2 frame unwind
19988 information. */
19989 static rtx
19990 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19991 {
19992 int num_regs = 0;
19993 int num_dwarf_regs = 0;
19994 int i, j;
19995 rtx par;
19996 rtx dwarf;
19997 int dwarf_par_index;
19998 rtx tmp, reg;
19999
20000 /* We don't record the PC in the dwarf frame information. */
20001 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20002
20003 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20004 {
20005 if (mask & (1 << i))
20006 num_regs++;
20007 if (dwarf_regs_mask & (1 << i))
20008 num_dwarf_regs++;
20009 }
20010
20011 gcc_assert (num_regs && num_regs <= 16);
20012 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20013
20014 /* For the body of the insn we are going to generate an UNSPEC in
20015 parallel with several USEs. This allows the insn to be recognized
20016 by the push_multi pattern in the arm.md file.
20017
20018 The body of the insn looks something like this:
20019
20020 (parallel [
20021 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20022 (const_int:SI <num>)))
20023 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20024 (use (reg:SI XX))
20025 (use (reg:SI YY))
20026 ...
20027 ])
20028
20029 For the frame note however, we try to be more explicit and actually
20030 show each register being stored into the stack frame, plus a (single)
20031 decrement of the stack pointer. We do it this way in order to be
20032 friendly to the stack unwinding code, which only wants to see a single
20033 stack decrement per instruction. The RTL we generate for the note looks
20034 something like this:
20035
20036 (sequence [
20037 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20038 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20039 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20040 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20041 ...
20042 ])
20043
20044 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20045 instead we'd have a parallel expression detailing all
20046 the stores to the various memory addresses so that debug
20047 information is more up-to-date. Remember however while writing
20048 this to take care of the constraints with the push instruction.
20049
20050 Note also that this has to be taken care of for the VFP registers.
20051
20052 For more see PR43399. */
20053
20054 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20055 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20056 dwarf_par_index = 1;
20057
20058 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20059 {
20060 if (mask & (1 << i))
20061 {
20062 reg = gen_rtx_REG (SImode, i);
20063
20064 XVECEXP (par, 0, 0)
20065 = gen_rtx_SET (VOIDmode,
20066 gen_frame_mem
20067 (BLKmode,
20068 gen_rtx_PRE_MODIFY (Pmode,
20069 stack_pointer_rtx,
20070 plus_constant
20071 (Pmode, stack_pointer_rtx,
20072 -4 * num_regs))
20073 ),
20074 gen_rtx_UNSPEC (BLKmode,
20075 gen_rtvec (1, reg),
20076 UNSPEC_PUSH_MULT));
20077
20078 if (dwarf_regs_mask & (1 << i))
20079 {
20080 tmp = gen_rtx_SET (VOIDmode,
20081 gen_frame_mem (SImode, stack_pointer_rtx),
20082 reg);
20083 RTX_FRAME_RELATED_P (tmp) = 1;
20084 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20085 }
20086
20087 break;
20088 }
20089 }
20090
20091 for (j = 1, i++; j < num_regs; i++)
20092 {
20093 if (mask & (1 << i))
20094 {
20095 reg = gen_rtx_REG (SImode, i);
20096
20097 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20098
20099 if (dwarf_regs_mask & (1 << i))
20100 {
20101 tmp
20102 = gen_rtx_SET (VOIDmode,
20103 gen_frame_mem
20104 (SImode,
20105 plus_constant (Pmode, stack_pointer_rtx,
20106 4 * j)),
20107 reg);
20108 RTX_FRAME_RELATED_P (tmp) = 1;
20109 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20110 }
20111
20112 j++;
20113 }
20114 }
20115
20116 par = emit_insn (par);
20117
20118 tmp = gen_rtx_SET (VOIDmode,
20119 stack_pointer_rtx,
20120 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20121 RTX_FRAME_RELATED_P (tmp) = 1;
20122 XVECEXP (dwarf, 0, 0) = tmp;
20123
20124 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20125
20126 return par;
20127 }
20128
20129 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20130 SIZE is the offset to be adjusted.
20131 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20132 static void
20133 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20134 {
20135 rtx dwarf;
20136
20137 RTX_FRAME_RELATED_P (insn) = 1;
20138 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20139 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20140 }
20141
20142 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20143 SAVED_REGS_MASK shows which registers need to be restored.
20144
20145 Unfortunately, since this insn does not reflect very well the actual
20146 semantics of the operation, we need to annotate the insn for the benefit
20147 of DWARF2 frame unwind information. */
20148 static void
20149 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20150 {
20151 int num_regs = 0;
20152 int i, j;
20153 rtx par;
20154 rtx dwarf = NULL_RTX;
20155 rtx tmp, reg;
20156 bool return_in_pc;
20157 int offset_adj;
20158 int emit_update;
20159
20160 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20161 offset_adj = return_in_pc ? 1 : 0;
20162 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20163 if (saved_regs_mask & (1 << i))
20164 num_regs++;
20165
20166 gcc_assert (num_regs && num_regs <= 16);
20167
20168 /* If SP is in reglist, then we don't emit SP update insn. */
20169 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20170
20171 /* The parallel needs to hold num_regs SETs
20172 and one SET for the stack update. */
20173 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20174
20175 if (return_in_pc)
20176 {
20177 tmp = ret_rtx;
20178 XVECEXP (par, 0, 0) = tmp;
20179 }
20180
20181 if (emit_update)
20182 {
20183 /* Increment the stack pointer, based on there being
20184 num_regs 4-byte registers to restore. */
20185 tmp = gen_rtx_SET (VOIDmode,
20186 stack_pointer_rtx,
20187 plus_constant (Pmode,
20188 stack_pointer_rtx,
20189 4 * num_regs));
20190 RTX_FRAME_RELATED_P (tmp) = 1;
20191 XVECEXP (par, 0, offset_adj) = tmp;
20192 }
20193
20194 /* Now restore every reg, which may include PC. */
20195 for (j = 0, i = 0; j < num_regs; i++)
20196 if (saved_regs_mask & (1 << i))
20197 {
20198 reg = gen_rtx_REG (SImode, i);
20199 if ((num_regs == 1) && emit_update && !return_in_pc)
20200 {
20201 /* Emit single load with writeback. */
20202 tmp = gen_frame_mem (SImode,
20203 gen_rtx_POST_INC (Pmode,
20204 stack_pointer_rtx));
20205 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20206 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20207 return;
20208 }
20209
20210 tmp = gen_rtx_SET (VOIDmode,
20211 reg,
20212 gen_frame_mem
20213 (SImode,
20214 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20215 RTX_FRAME_RELATED_P (tmp) = 1;
20216 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20217
20218 /* We need to maintain a sequence for DWARF info too. As dwarf info
20219 should not have PC, skip PC. */
20220 if (i != PC_REGNUM)
20221 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20222
20223 j++;
20224 }
20225
20226 if (return_in_pc)
20227 par = emit_jump_insn (par);
20228 else
20229 par = emit_insn (par);
20230
20231 REG_NOTES (par) = dwarf;
20232 if (!return_in_pc)
20233 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20234 stack_pointer_rtx, stack_pointer_rtx);
20235 }
20236
20237 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20238 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20239
20240 Unfortunately, since this insn does not reflect very well the actual
20241 semantics of the operation, we need to annotate the insn for the benefit
20242 of DWARF2 frame unwind information. */
20243 static void
20244 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20245 {
20246 int i, j;
20247 rtx par;
20248 rtx dwarf = NULL_RTX;
20249 rtx tmp, reg;
20250
20251 gcc_assert (num_regs && num_regs <= 32);
20252
20253 /* Workaround ARM10 VFPr1 bug. */
20254 if (num_regs == 2 && !arm_arch6)
20255 {
20256 if (first_reg == 15)
20257 first_reg--;
20258
20259 num_regs++;
20260 }
20261
20262 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20263 there could be up to 32 D-registers to restore.
20264 If there are more than 16 D-registers, make two recursive calls,
20265 each of which emits one pop_multi instruction. */
20266 if (num_regs > 16)
20267 {
20268 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20269 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20270 return;
20271 }
20272
20273 /* The parallel needs to hold num_regs SETs
20274 and one SET for the stack update. */
20275 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20276
20277 /* Increment the stack pointer, based on there being
20278 num_regs 8-byte registers to restore. */
20279 tmp = gen_rtx_SET (VOIDmode,
20280 base_reg,
20281 plus_constant (Pmode, base_reg, 8 * num_regs));
20282 RTX_FRAME_RELATED_P (tmp) = 1;
20283 XVECEXP (par, 0, 0) = tmp;
20284
20285 /* Now show every reg that will be restored, using a SET for each. */
20286 for (j = 0, i=first_reg; j < num_regs; i += 2)
20287 {
20288 reg = gen_rtx_REG (DFmode, i);
20289
20290 tmp = gen_rtx_SET (VOIDmode,
20291 reg,
20292 gen_frame_mem
20293 (DFmode,
20294 plus_constant (Pmode, base_reg, 8 * j)));
20295 RTX_FRAME_RELATED_P (tmp) = 1;
20296 XVECEXP (par, 0, j + 1) = tmp;
20297
20298 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20299
20300 j++;
20301 }
20302
20303 par = emit_insn (par);
20304 REG_NOTES (par) = dwarf;
20305
20306 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20307 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20308 {
20309 RTX_FRAME_RELATED_P (par) = 1;
20310 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20311 }
20312 else
20313 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20314 base_reg, base_reg);
20315 }
20316
20317 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20318 number of registers are being popped, multiple LDRD patterns are created for
20319 all register pairs. If odd number of registers are popped, last register is
20320 loaded by using LDR pattern. */
20321 static void
20322 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20323 {
20324 int num_regs = 0;
20325 int i, j;
20326 rtx par = NULL_RTX;
20327 rtx dwarf = NULL_RTX;
20328 rtx tmp, reg, tmp1;
20329 bool return_in_pc;
20330
20331 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20332 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20333 if (saved_regs_mask & (1 << i))
20334 num_regs++;
20335
20336 gcc_assert (num_regs && num_regs <= 16);
20337
20338 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20339 to be popped. So, if num_regs is even, now it will become odd,
20340 and we can generate pop with PC. If num_regs is odd, it will be
20341 even now, and ldr with return can be generated for PC. */
20342 if (return_in_pc)
20343 num_regs--;
20344
20345 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20346
20347 /* Var j iterates over all the registers to gather all the registers in
20348 saved_regs_mask. Var i gives index of saved registers in stack frame.
20349 A PARALLEL RTX of register-pair is created here, so that pattern for
20350 LDRD can be matched. As PC is always last register to be popped, and
20351 we have already decremented num_regs if PC, we don't have to worry
20352 about PC in this loop. */
20353 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20354 if (saved_regs_mask & (1 << j))
20355 {
20356 /* Create RTX for memory load. */
20357 reg = gen_rtx_REG (SImode, j);
20358 tmp = gen_rtx_SET (SImode,
20359 reg,
20360 gen_frame_mem (SImode,
20361 plus_constant (Pmode,
20362 stack_pointer_rtx, 4 * i)));
20363 RTX_FRAME_RELATED_P (tmp) = 1;
20364
20365 if (i % 2 == 0)
20366 {
20367 /* When saved-register index (i) is even, the RTX to be emitted is
20368 yet to be created. Hence create it first. The LDRD pattern we
20369 are generating is :
20370 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20371 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20372 where target registers need not be consecutive. */
20373 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20374 dwarf = NULL_RTX;
20375 }
20376
20377 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20378 added as 0th element and if i is odd, reg_i is added as 1st element
20379 of LDRD pattern shown above. */
20380 XVECEXP (par, 0, (i % 2)) = tmp;
20381 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20382
20383 if ((i % 2) == 1)
20384 {
20385 /* When saved-register index (i) is odd, RTXs for both the registers
20386 to be loaded are generated in above given LDRD pattern, and the
20387 pattern can be emitted now. */
20388 par = emit_insn (par);
20389 REG_NOTES (par) = dwarf;
20390 RTX_FRAME_RELATED_P (par) = 1;
20391 }
20392
20393 i++;
20394 }
20395
20396 /* If the number of registers pushed is odd AND return_in_pc is false OR
20397 number of registers are even AND return_in_pc is true, last register is
20398 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20399 then LDR with post increment. */
20400
20401 /* Increment the stack pointer, based on there being
20402 num_regs 4-byte registers to restore. */
20403 tmp = gen_rtx_SET (VOIDmode,
20404 stack_pointer_rtx,
20405 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20406 RTX_FRAME_RELATED_P (tmp) = 1;
20407 tmp = emit_insn (tmp);
20408 if (!return_in_pc)
20409 {
20410 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20411 stack_pointer_rtx, stack_pointer_rtx);
20412 }
20413
20414 dwarf = NULL_RTX;
20415
20416 if (((num_regs % 2) == 1 && !return_in_pc)
20417 || ((num_regs % 2) == 0 && return_in_pc))
20418 {
20419 /* Scan for the single register to be popped. Skip until the saved
20420 register is found. */
20421 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20422
20423 /* Gen LDR with post increment here. */
20424 tmp1 = gen_rtx_MEM (SImode,
20425 gen_rtx_POST_INC (SImode,
20426 stack_pointer_rtx));
20427 set_mem_alias_set (tmp1, get_frame_alias_set ());
20428
20429 reg = gen_rtx_REG (SImode, j);
20430 tmp = gen_rtx_SET (SImode, reg, tmp1);
20431 RTX_FRAME_RELATED_P (tmp) = 1;
20432 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20433
20434 if (return_in_pc)
20435 {
20436 /* If return_in_pc, j must be PC_REGNUM. */
20437 gcc_assert (j == PC_REGNUM);
20438 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20439 XVECEXP (par, 0, 0) = ret_rtx;
20440 XVECEXP (par, 0, 1) = tmp;
20441 par = emit_jump_insn (par);
20442 }
20443 else
20444 {
20445 par = emit_insn (tmp);
20446 REG_NOTES (par) = dwarf;
20447 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20448 stack_pointer_rtx, stack_pointer_rtx);
20449 }
20450
20451 }
20452 else if ((num_regs % 2) == 1 && return_in_pc)
20453 {
20454 /* There are 2 registers to be popped. So, generate the pattern
20455 pop_multiple_with_stack_update_and_return to pop in PC. */
20456 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20457 }
20458
20459 return;
20460 }
20461
20462 /* LDRD in ARM mode needs consecutive registers as operands. This function
20463 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20464 offset addressing and then generates one separate stack udpate. This provides
20465 more scheduling freedom, compared to writeback on every load. However,
20466 if the function returns using load into PC directly
20467 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20468 before the last load. TODO: Add a peephole optimization to recognize
20469 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20470 peephole optimization to merge the load at stack-offset zero
20471 with the stack update instruction using load with writeback
20472 in post-index addressing mode. */
20473 static void
20474 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20475 {
20476 int j = 0;
20477 int offset = 0;
20478 rtx par = NULL_RTX;
20479 rtx dwarf = NULL_RTX;
20480 rtx tmp, mem;
20481
20482 /* Restore saved registers. */
20483 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20484 j = 0;
20485 while (j <= LAST_ARM_REGNUM)
20486 if (saved_regs_mask & (1 << j))
20487 {
20488 if ((j % 2) == 0
20489 && (saved_regs_mask & (1 << (j + 1)))
20490 && (j + 1) != PC_REGNUM)
20491 {
20492 /* Current register and next register form register pair for which
20493 LDRD can be generated. PC is always the last register popped, and
20494 we handle it separately. */
20495 if (offset > 0)
20496 mem = gen_frame_mem (DImode,
20497 plus_constant (Pmode,
20498 stack_pointer_rtx,
20499 offset));
20500 else
20501 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20502
20503 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20504 tmp = emit_insn (tmp);
20505 RTX_FRAME_RELATED_P (tmp) = 1;
20506
20507 /* Generate dwarf info. */
20508
20509 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20510 gen_rtx_REG (SImode, j),
20511 NULL_RTX);
20512 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20513 gen_rtx_REG (SImode, j + 1),
20514 dwarf);
20515
20516 REG_NOTES (tmp) = dwarf;
20517
20518 offset += 8;
20519 j += 2;
20520 }
20521 else if (j != PC_REGNUM)
20522 {
20523 /* Emit a single word load. */
20524 if (offset > 0)
20525 mem = gen_frame_mem (SImode,
20526 plus_constant (Pmode,
20527 stack_pointer_rtx,
20528 offset));
20529 else
20530 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20531
20532 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20533 tmp = emit_insn (tmp);
20534 RTX_FRAME_RELATED_P (tmp) = 1;
20535
20536 /* Generate dwarf info. */
20537 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20538 gen_rtx_REG (SImode, j),
20539 NULL_RTX);
20540
20541 offset += 4;
20542 j += 1;
20543 }
20544 else /* j == PC_REGNUM */
20545 j++;
20546 }
20547 else
20548 j++;
20549
20550 /* Update the stack. */
20551 if (offset > 0)
20552 {
20553 tmp = gen_rtx_SET (Pmode,
20554 stack_pointer_rtx,
20555 plus_constant (Pmode,
20556 stack_pointer_rtx,
20557 offset));
20558 tmp = emit_insn (tmp);
20559 arm_add_cfa_adjust_cfa_note (tmp, offset,
20560 stack_pointer_rtx, stack_pointer_rtx);
20561 offset = 0;
20562 }
20563
20564 if (saved_regs_mask & (1 << PC_REGNUM))
20565 {
20566 /* Only PC is to be popped. */
20567 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20568 XVECEXP (par, 0, 0) = ret_rtx;
20569 tmp = gen_rtx_SET (SImode,
20570 gen_rtx_REG (SImode, PC_REGNUM),
20571 gen_frame_mem (SImode,
20572 gen_rtx_POST_INC (SImode,
20573 stack_pointer_rtx)));
20574 RTX_FRAME_RELATED_P (tmp) = 1;
20575 XVECEXP (par, 0, 1) = tmp;
20576 par = emit_jump_insn (par);
20577
20578 /* Generate dwarf info. */
20579 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20580 gen_rtx_REG (SImode, PC_REGNUM),
20581 NULL_RTX);
20582 REG_NOTES (par) = dwarf;
20583 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20584 stack_pointer_rtx, stack_pointer_rtx);
20585 }
20586 }
20587
20588 /* Calculate the size of the return value that is passed in registers. */
20589 static unsigned
20590 arm_size_return_regs (void)
20591 {
20592 machine_mode mode;
20593
20594 if (crtl->return_rtx != 0)
20595 mode = GET_MODE (crtl->return_rtx);
20596 else
20597 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20598
20599 return GET_MODE_SIZE (mode);
20600 }
20601
20602 /* Return true if the current function needs to save/restore LR. */
20603 static bool
20604 thumb_force_lr_save (void)
20605 {
20606 return !cfun->machine->lr_save_eliminated
20607 && (!leaf_function_p ()
20608 || thumb_far_jump_used_p ()
20609 || df_regs_ever_live_p (LR_REGNUM));
20610 }
20611
20612 /* We do not know if r3 will be available because
20613 we do have an indirect tailcall happening in this
20614 particular case. */
20615 static bool
20616 is_indirect_tailcall_p (rtx call)
20617 {
20618 rtx pat = PATTERN (call);
20619
20620 /* Indirect tail call. */
20621 pat = XVECEXP (pat, 0, 0);
20622 if (GET_CODE (pat) == SET)
20623 pat = SET_SRC (pat);
20624
20625 pat = XEXP (XEXP (pat, 0), 0);
20626 return REG_P (pat);
20627 }
20628
20629 /* Return true if r3 is used by any of the tail call insns in the
20630 current function. */
20631 static bool
20632 any_sibcall_could_use_r3 (void)
20633 {
20634 edge_iterator ei;
20635 edge e;
20636
20637 if (!crtl->tail_call_emit)
20638 return false;
20639 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20640 if (e->flags & EDGE_SIBCALL)
20641 {
20642 rtx call = BB_END (e->src);
20643 if (!CALL_P (call))
20644 call = prev_nonnote_nondebug_insn (call);
20645 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20646 if (find_regno_fusage (call, USE, 3)
20647 || is_indirect_tailcall_p (call))
20648 return true;
20649 }
20650 return false;
20651 }
20652
20653
20654 /* Compute the distance from register FROM to register TO.
20655 These can be the arg pointer (26), the soft frame pointer (25),
20656 the stack pointer (13) or the hard frame pointer (11).
20657 In thumb mode r7 is used as the soft frame pointer, if needed.
20658 Typical stack layout looks like this:
20659
20660 old stack pointer -> | |
20661 ----
20662 | | \
20663 | | saved arguments for
20664 | | vararg functions
20665 | | /
20666 --
20667 hard FP & arg pointer -> | | \
20668 | | stack
20669 | | frame
20670 | | /
20671 --
20672 | | \
20673 | | call saved
20674 | | registers
20675 soft frame pointer -> | | /
20676 --
20677 | | \
20678 | | local
20679 | | variables
20680 locals base pointer -> | | /
20681 --
20682 | | \
20683 | | outgoing
20684 | | arguments
20685 current stack pointer -> | | /
20686 --
20687
20688 For a given function some or all of these stack components
20689 may not be needed, giving rise to the possibility of
20690 eliminating some of the registers.
20691
20692 The values returned by this function must reflect the behavior
20693 of arm_expand_prologue() and arm_compute_save_reg_mask().
20694
20695 The sign of the number returned reflects the direction of stack
20696 growth, so the values are positive for all eliminations except
20697 from the soft frame pointer to the hard frame pointer.
20698
20699 SFP may point just inside the local variables block to ensure correct
20700 alignment. */
20701
20702
20703 /* Calculate stack offsets. These are used to calculate register elimination
20704 offsets and in prologue/epilogue code. Also calculates which registers
20705 should be saved. */
20706
20707 static arm_stack_offsets *
20708 arm_get_frame_offsets (void)
20709 {
20710 struct arm_stack_offsets *offsets;
20711 unsigned long func_type;
20712 int leaf;
20713 int saved;
20714 int core_saved;
20715 HOST_WIDE_INT frame_size;
20716 int i;
20717
20718 offsets = &cfun->machine->stack_offsets;
20719
20720 /* We need to know if we are a leaf function. Unfortunately, it
20721 is possible to be called after start_sequence has been called,
20722 which causes get_insns to return the insns for the sequence,
20723 not the function, which will cause leaf_function_p to return
20724 the incorrect result.
20725
20726 to know about leaf functions once reload has completed, and the
20727 frame size cannot be changed after that time, so we can safely
20728 use the cached value. */
20729
20730 if (reload_completed)
20731 return offsets;
20732
20733 /* Initially this is the size of the local variables. It will translated
20734 into an offset once we have determined the size of preceding data. */
20735 frame_size = ROUND_UP_WORD (get_frame_size ());
20736
20737 leaf = leaf_function_p ();
20738
20739 /* Space for variadic functions. */
20740 offsets->saved_args = crtl->args.pretend_args_size;
20741
20742 /* In Thumb mode this is incorrect, but never used. */
20743 offsets->frame
20744 = (offsets->saved_args
20745 + arm_compute_static_chain_stack_bytes ()
20746 + (frame_pointer_needed ? 4 : 0));
20747
20748 if (TARGET_32BIT)
20749 {
20750 unsigned int regno;
20751
20752 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20753 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20754 saved = core_saved;
20755
20756 /* We know that SP will be doubleword aligned on entry, and we must
20757 preserve that condition at any subroutine call. We also require the
20758 soft frame pointer to be doubleword aligned. */
20759
20760 if (TARGET_REALLY_IWMMXT)
20761 {
20762 /* Check for the call-saved iWMMXt registers. */
20763 for (regno = FIRST_IWMMXT_REGNUM;
20764 regno <= LAST_IWMMXT_REGNUM;
20765 regno++)
20766 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20767 saved += 8;
20768 }
20769
20770 func_type = arm_current_func_type ();
20771 /* Space for saved VFP registers. */
20772 if (! IS_VOLATILE (func_type)
20773 && TARGET_HARD_FLOAT && TARGET_VFP)
20774 saved += arm_get_vfp_saved_size ();
20775 }
20776 else /* TARGET_THUMB1 */
20777 {
20778 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20779 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20780 saved = core_saved;
20781 if (TARGET_BACKTRACE)
20782 saved += 16;
20783 }
20784
20785 /* Saved registers include the stack frame. */
20786 offsets->saved_regs
20787 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20788 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20789
20790 /* A leaf function does not need any stack alignment if it has nothing
20791 on the stack. */
20792 if (leaf && frame_size == 0
20793 /* However if it calls alloca(), we have a dynamically allocated
20794 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20795 && ! cfun->calls_alloca)
20796 {
20797 offsets->outgoing_args = offsets->soft_frame;
20798 offsets->locals_base = offsets->soft_frame;
20799 return offsets;
20800 }
20801
20802 /* Ensure SFP has the correct alignment. */
20803 if (ARM_DOUBLEWORD_ALIGN
20804 && (offsets->soft_frame & 7))
20805 {
20806 offsets->soft_frame += 4;
20807 /* Try to align stack by pushing an extra reg. Don't bother doing this
20808 when there is a stack frame as the alignment will be rolled into
20809 the normal stack adjustment. */
20810 if (frame_size + crtl->outgoing_args_size == 0)
20811 {
20812 int reg = -1;
20813
20814 /* Register r3 is caller-saved. Normally it does not need to be
20815 saved on entry by the prologue. However if we choose to save
20816 it for padding then we may confuse the compiler into thinking
20817 a prologue sequence is required when in fact it is not. This
20818 will occur when shrink-wrapping if r3 is used as a scratch
20819 register and there are no other callee-saved writes.
20820
20821 This situation can be avoided when other callee-saved registers
20822 are available and r3 is not mandatory if we choose a callee-saved
20823 register for padding. */
20824 bool prefer_callee_reg_p = false;
20825
20826 /* If it is safe to use r3, then do so. This sometimes
20827 generates better code on Thumb-2 by avoiding the need to
20828 use 32-bit push/pop instructions. */
20829 if (! any_sibcall_could_use_r3 ()
20830 && arm_size_return_regs () <= 12
20831 && (offsets->saved_regs_mask & (1 << 3)) == 0
20832 && (TARGET_THUMB2
20833 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20834 {
20835 reg = 3;
20836 if (!TARGET_THUMB2)
20837 prefer_callee_reg_p = true;
20838 }
20839 if (reg == -1
20840 || prefer_callee_reg_p)
20841 {
20842 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20843 {
20844 /* Avoid fixed registers; they may be changed at
20845 arbitrary times so it's unsafe to restore them
20846 during the epilogue. */
20847 if (!fixed_regs[i]
20848 && (offsets->saved_regs_mask & (1 << i)) == 0)
20849 {
20850 reg = i;
20851 break;
20852 }
20853 }
20854 }
20855
20856 if (reg != -1)
20857 {
20858 offsets->saved_regs += 4;
20859 offsets->saved_regs_mask |= (1 << reg);
20860 }
20861 }
20862 }
20863
20864 offsets->locals_base = offsets->soft_frame + frame_size;
20865 offsets->outgoing_args = (offsets->locals_base
20866 + crtl->outgoing_args_size);
20867
20868 if (ARM_DOUBLEWORD_ALIGN)
20869 {
20870 /* Ensure SP remains doubleword aligned. */
20871 if (offsets->outgoing_args & 7)
20872 offsets->outgoing_args += 4;
20873 gcc_assert (!(offsets->outgoing_args & 7));
20874 }
20875
20876 return offsets;
20877 }
20878
20879
20880 /* Calculate the relative offsets for the different stack pointers. Positive
20881 offsets are in the direction of stack growth. */
20882
20883 HOST_WIDE_INT
20884 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20885 {
20886 arm_stack_offsets *offsets;
20887
20888 offsets = arm_get_frame_offsets ();
20889
20890 /* OK, now we have enough information to compute the distances.
20891 There must be an entry in these switch tables for each pair
20892 of registers in ELIMINABLE_REGS, even if some of the entries
20893 seem to be redundant or useless. */
20894 switch (from)
20895 {
20896 case ARG_POINTER_REGNUM:
20897 switch (to)
20898 {
20899 case THUMB_HARD_FRAME_POINTER_REGNUM:
20900 return 0;
20901
20902 case FRAME_POINTER_REGNUM:
20903 /* This is the reverse of the soft frame pointer
20904 to hard frame pointer elimination below. */
20905 return offsets->soft_frame - offsets->saved_args;
20906
20907 case ARM_HARD_FRAME_POINTER_REGNUM:
20908 /* This is only non-zero in the case where the static chain register
20909 is stored above the frame. */
20910 return offsets->frame - offsets->saved_args - 4;
20911
20912 case STACK_POINTER_REGNUM:
20913 /* If nothing has been pushed on the stack at all
20914 then this will return -4. This *is* correct! */
20915 return offsets->outgoing_args - (offsets->saved_args + 4);
20916
20917 default:
20918 gcc_unreachable ();
20919 }
20920 gcc_unreachable ();
20921
20922 case FRAME_POINTER_REGNUM:
20923 switch (to)
20924 {
20925 case THUMB_HARD_FRAME_POINTER_REGNUM:
20926 return 0;
20927
20928 case ARM_HARD_FRAME_POINTER_REGNUM:
20929 /* The hard frame pointer points to the top entry in the
20930 stack frame. The soft frame pointer to the bottom entry
20931 in the stack frame. If there is no stack frame at all,
20932 then they are identical. */
20933
20934 return offsets->frame - offsets->soft_frame;
20935
20936 case STACK_POINTER_REGNUM:
20937 return offsets->outgoing_args - offsets->soft_frame;
20938
20939 default:
20940 gcc_unreachable ();
20941 }
20942 gcc_unreachable ();
20943
20944 default:
20945 /* You cannot eliminate from the stack pointer.
20946 In theory you could eliminate from the hard frame
20947 pointer to the stack pointer, but this will never
20948 happen, since if a stack frame is not needed the
20949 hard frame pointer will never be used. */
20950 gcc_unreachable ();
20951 }
20952 }
20953
20954 /* Given FROM and TO register numbers, say whether this elimination is
20955 allowed. Frame pointer elimination is automatically handled.
20956
20957 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20958 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20959 pointer, we must eliminate FRAME_POINTER_REGNUM into
20960 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20961 ARG_POINTER_REGNUM. */
20962
20963 bool
20964 arm_can_eliminate (const int from, const int to)
20965 {
20966 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20967 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20968 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20969 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20970 true);
20971 }
20972
20973 /* Emit RTL to save coprocessor registers on function entry. Returns the
20974 number of bytes pushed. */
20975
20976 static int
20977 arm_save_coproc_regs(void)
20978 {
20979 int saved_size = 0;
20980 unsigned reg;
20981 unsigned start_reg;
20982 rtx insn;
20983
20984 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20985 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20986 {
20987 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20988 insn = gen_rtx_MEM (V2SImode, insn);
20989 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20990 RTX_FRAME_RELATED_P (insn) = 1;
20991 saved_size += 8;
20992 }
20993
20994 if (TARGET_HARD_FLOAT && TARGET_VFP)
20995 {
20996 start_reg = FIRST_VFP_REGNUM;
20997
20998 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20999 {
21000 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21001 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21002 {
21003 if (start_reg != reg)
21004 saved_size += vfp_emit_fstmd (start_reg,
21005 (reg - start_reg) / 2);
21006 start_reg = reg + 2;
21007 }
21008 }
21009 if (start_reg != reg)
21010 saved_size += vfp_emit_fstmd (start_reg,
21011 (reg - start_reg) / 2);
21012 }
21013 return saved_size;
21014 }
21015
21016
21017 /* Set the Thumb frame pointer from the stack pointer. */
21018
21019 static void
21020 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21021 {
21022 HOST_WIDE_INT amount;
21023 rtx insn, dwarf;
21024
21025 amount = offsets->outgoing_args - offsets->locals_base;
21026 if (amount < 1024)
21027 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21028 stack_pointer_rtx, GEN_INT (amount)));
21029 else
21030 {
21031 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21032 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21033 expects the first two operands to be the same. */
21034 if (TARGET_THUMB2)
21035 {
21036 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21037 stack_pointer_rtx,
21038 hard_frame_pointer_rtx));
21039 }
21040 else
21041 {
21042 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21043 hard_frame_pointer_rtx,
21044 stack_pointer_rtx));
21045 }
21046 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21047 plus_constant (Pmode, stack_pointer_rtx, amount));
21048 RTX_FRAME_RELATED_P (dwarf) = 1;
21049 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21050 }
21051
21052 RTX_FRAME_RELATED_P (insn) = 1;
21053 }
21054
21055 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21056 function. */
21057 void
21058 arm_expand_prologue (void)
21059 {
21060 rtx amount;
21061 rtx insn;
21062 rtx ip_rtx;
21063 unsigned long live_regs_mask;
21064 unsigned long func_type;
21065 int fp_offset = 0;
21066 int saved_pretend_args = 0;
21067 int saved_regs = 0;
21068 unsigned HOST_WIDE_INT args_to_push;
21069 arm_stack_offsets *offsets;
21070
21071 func_type = arm_current_func_type ();
21072
21073 /* Naked functions don't have prologues. */
21074 if (IS_NAKED (func_type))
21075 return;
21076
21077 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21078 args_to_push = crtl->args.pretend_args_size;
21079
21080 /* Compute which register we will have to save onto the stack. */
21081 offsets = arm_get_frame_offsets ();
21082 live_regs_mask = offsets->saved_regs_mask;
21083
21084 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21085
21086 if (IS_STACKALIGN (func_type))
21087 {
21088 rtx r0, r1;
21089
21090 /* Handle a word-aligned stack pointer. We generate the following:
21091
21092 mov r0, sp
21093 bic r1, r0, #7
21094 mov sp, r1
21095 <save and restore r0 in normal prologue/epilogue>
21096 mov sp, r0
21097 bx lr
21098
21099 The unwinder doesn't need to know about the stack realignment.
21100 Just tell it we saved SP in r0. */
21101 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21102
21103 r0 = gen_rtx_REG (SImode, 0);
21104 r1 = gen_rtx_REG (SImode, 1);
21105
21106 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21107 RTX_FRAME_RELATED_P (insn) = 1;
21108 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21109
21110 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21111
21112 /* ??? The CFA changes here, which may cause GDB to conclude that it
21113 has entered a different function. That said, the unwind info is
21114 correct, individually, before and after this instruction because
21115 we've described the save of SP, which will override the default
21116 handling of SP as restoring from the CFA. */
21117 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21118 }
21119
21120 /* For APCS frames, if IP register is clobbered
21121 when creating frame, save that register in a special
21122 way. */
21123 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21124 {
21125 if (IS_INTERRUPT (func_type))
21126 {
21127 /* Interrupt functions must not corrupt any registers.
21128 Creating a frame pointer however, corrupts the IP
21129 register, so we must push it first. */
21130 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21131
21132 /* Do not set RTX_FRAME_RELATED_P on this insn.
21133 The dwarf stack unwinding code only wants to see one
21134 stack decrement per function, and this is not it. If
21135 this instruction is labeled as being part of the frame
21136 creation sequence then dwarf2out_frame_debug_expr will
21137 die when it encounters the assignment of IP to FP
21138 later on, since the use of SP here establishes SP as
21139 the CFA register and not IP.
21140
21141 Anyway this instruction is not really part of the stack
21142 frame creation although it is part of the prologue. */
21143 }
21144 else if (IS_NESTED (func_type))
21145 {
21146 /* The static chain register is the same as the IP register
21147 used as a scratch register during stack frame creation.
21148 To get around this need to find somewhere to store IP
21149 whilst the frame is being created. We try the following
21150 places in order:
21151
21152 1. The last argument register r3 if it is available.
21153 2. A slot on the stack above the frame if there are no
21154 arguments to push onto the stack.
21155 3. Register r3 again, after pushing the argument registers
21156 onto the stack, if this is a varargs function.
21157 4. The last slot on the stack created for the arguments to
21158 push, if this isn't a varargs function.
21159
21160 Note - we only need to tell the dwarf2 backend about the SP
21161 adjustment in the second variant; the static chain register
21162 doesn't need to be unwound, as it doesn't contain a value
21163 inherited from the caller. */
21164
21165 if (!arm_r3_live_at_start_p ())
21166 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21167 else if (args_to_push == 0)
21168 {
21169 rtx addr, dwarf;
21170
21171 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21172 saved_regs += 4;
21173
21174 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21175 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21176 fp_offset = 4;
21177
21178 /* Just tell the dwarf backend that we adjusted SP. */
21179 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21180 plus_constant (Pmode, stack_pointer_rtx,
21181 -fp_offset));
21182 RTX_FRAME_RELATED_P (insn) = 1;
21183 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21184 }
21185 else
21186 {
21187 /* Store the args on the stack. */
21188 if (cfun->machine->uses_anonymous_args)
21189 {
21190 insn
21191 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21192 (0xf0 >> (args_to_push / 4)) & 0xf);
21193 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21194 saved_pretend_args = 1;
21195 }
21196 else
21197 {
21198 rtx addr, dwarf;
21199
21200 if (args_to_push == 4)
21201 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21202 else
21203 addr
21204 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21205 plus_constant (Pmode,
21206 stack_pointer_rtx,
21207 -args_to_push));
21208
21209 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21210
21211 /* Just tell the dwarf backend that we adjusted SP. */
21212 dwarf
21213 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21214 plus_constant (Pmode, stack_pointer_rtx,
21215 -args_to_push));
21216 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21217 }
21218
21219 RTX_FRAME_RELATED_P (insn) = 1;
21220 fp_offset = args_to_push;
21221 args_to_push = 0;
21222 }
21223 }
21224
21225 insn = emit_set_insn (ip_rtx,
21226 plus_constant (Pmode, stack_pointer_rtx,
21227 fp_offset));
21228 RTX_FRAME_RELATED_P (insn) = 1;
21229 }
21230
21231 if (args_to_push)
21232 {
21233 /* Push the argument registers, or reserve space for them. */
21234 if (cfun->machine->uses_anonymous_args)
21235 insn = emit_multi_reg_push
21236 ((0xf0 >> (args_to_push / 4)) & 0xf,
21237 (0xf0 >> (args_to_push / 4)) & 0xf);
21238 else
21239 insn = emit_insn
21240 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21241 GEN_INT (- args_to_push)));
21242 RTX_FRAME_RELATED_P (insn) = 1;
21243 }
21244
21245 /* If this is an interrupt service routine, and the link register
21246 is going to be pushed, and we're not generating extra
21247 push of IP (needed when frame is needed and frame layout if apcs),
21248 subtracting four from LR now will mean that the function return
21249 can be done with a single instruction. */
21250 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21251 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21252 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21253 && TARGET_ARM)
21254 {
21255 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21256
21257 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21258 }
21259
21260 if (live_regs_mask)
21261 {
21262 unsigned long dwarf_regs_mask = live_regs_mask;
21263
21264 saved_regs += bit_count (live_regs_mask) * 4;
21265 if (optimize_size && !frame_pointer_needed
21266 && saved_regs == offsets->saved_regs - offsets->saved_args)
21267 {
21268 /* If no coprocessor registers are being pushed and we don't have
21269 to worry about a frame pointer then push extra registers to
21270 create the stack frame. This is done is a way that does not
21271 alter the frame layout, so is independent of the epilogue. */
21272 int n;
21273 int frame;
21274 n = 0;
21275 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21276 n++;
21277 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21278 if (frame && n * 4 >= frame)
21279 {
21280 n = frame / 4;
21281 live_regs_mask |= (1 << n) - 1;
21282 saved_regs += frame;
21283 }
21284 }
21285
21286 if (TARGET_LDRD
21287 && current_tune->prefer_ldrd_strd
21288 && !optimize_function_for_size_p (cfun))
21289 {
21290 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21291 if (TARGET_THUMB2)
21292 thumb2_emit_strd_push (live_regs_mask);
21293 else if (TARGET_ARM
21294 && !TARGET_APCS_FRAME
21295 && !IS_INTERRUPT (func_type))
21296 arm_emit_strd_push (live_regs_mask);
21297 else
21298 {
21299 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21300 RTX_FRAME_RELATED_P (insn) = 1;
21301 }
21302 }
21303 else
21304 {
21305 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21306 RTX_FRAME_RELATED_P (insn) = 1;
21307 }
21308 }
21309
21310 if (! IS_VOLATILE (func_type))
21311 saved_regs += arm_save_coproc_regs ();
21312
21313 if (frame_pointer_needed && TARGET_ARM)
21314 {
21315 /* Create the new frame pointer. */
21316 if (TARGET_APCS_FRAME)
21317 {
21318 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21319 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21320 RTX_FRAME_RELATED_P (insn) = 1;
21321
21322 if (IS_NESTED (func_type))
21323 {
21324 /* Recover the static chain register. */
21325 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21326 insn = gen_rtx_REG (SImode, 3);
21327 else
21328 {
21329 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21330 insn = gen_frame_mem (SImode, insn);
21331 }
21332 emit_set_insn (ip_rtx, insn);
21333 /* Add a USE to stop propagate_one_insn() from barfing. */
21334 emit_insn (gen_force_register_use (ip_rtx));
21335 }
21336 }
21337 else
21338 {
21339 insn = GEN_INT (saved_regs - 4);
21340 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21341 stack_pointer_rtx, insn));
21342 RTX_FRAME_RELATED_P (insn) = 1;
21343 }
21344 }
21345
21346 if (flag_stack_usage_info)
21347 current_function_static_stack_size
21348 = offsets->outgoing_args - offsets->saved_args;
21349
21350 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21351 {
21352 /* This add can produce multiple insns for a large constant, so we
21353 need to get tricky. */
21354 rtx_insn *last = get_last_insn ();
21355
21356 amount = GEN_INT (offsets->saved_args + saved_regs
21357 - offsets->outgoing_args);
21358
21359 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21360 amount));
21361 do
21362 {
21363 last = last ? NEXT_INSN (last) : get_insns ();
21364 RTX_FRAME_RELATED_P (last) = 1;
21365 }
21366 while (last != insn);
21367
21368 /* If the frame pointer is needed, emit a special barrier that
21369 will prevent the scheduler from moving stores to the frame
21370 before the stack adjustment. */
21371 if (frame_pointer_needed)
21372 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21373 hard_frame_pointer_rtx));
21374 }
21375
21376
21377 if (frame_pointer_needed && TARGET_THUMB2)
21378 thumb_set_frame_pointer (offsets);
21379
21380 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21381 {
21382 unsigned long mask;
21383
21384 mask = live_regs_mask;
21385 mask &= THUMB2_WORK_REGS;
21386 if (!IS_NESTED (func_type))
21387 mask |= (1 << IP_REGNUM);
21388 arm_load_pic_register (mask);
21389 }
21390
21391 /* If we are profiling, make sure no instructions are scheduled before
21392 the call to mcount. Similarly if the user has requested no
21393 scheduling in the prolog. Similarly if we want non-call exceptions
21394 using the EABI unwinder, to prevent faulting instructions from being
21395 swapped with a stack adjustment. */
21396 if (crtl->profile || !TARGET_SCHED_PROLOG
21397 || (arm_except_unwind_info (&global_options) == UI_TARGET
21398 && cfun->can_throw_non_call_exceptions))
21399 emit_insn (gen_blockage ());
21400
21401 /* If the link register is being kept alive, with the return address in it,
21402 then make sure that it does not get reused by the ce2 pass. */
21403 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21404 cfun->machine->lr_save_eliminated = 1;
21405 }
21406 \f
21407 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21408 static void
21409 arm_print_condition (FILE *stream)
21410 {
21411 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21412 {
21413 /* Branch conversion is not implemented for Thumb-2. */
21414 if (TARGET_THUMB)
21415 {
21416 output_operand_lossage ("predicated Thumb instruction");
21417 return;
21418 }
21419 if (current_insn_predicate != NULL)
21420 {
21421 output_operand_lossage
21422 ("predicated instruction in conditional sequence");
21423 return;
21424 }
21425
21426 fputs (arm_condition_codes[arm_current_cc], stream);
21427 }
21428 else if (current_insn_predicate)
21429 {
21430 enum arm_cond_code code;
21431
21432 if (TARGET_THUMB1)
21433 {
21434 output_operand_lossage ("predicated Thumb instruction");
21435 return;
21436 }
21437
21438 code = get_arm_condition_code (current_insn_predicate);
21439 fputs (arm_condition_codes[code], stream);
21440 }
21441 }
21442
21443
21444 /* Globally reserved letters: acln
21445 Puncutation letters currently used: @_|?().!#
21446 Lower case letters currently used: bcdefhimpqtvwxyz
21447 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21448 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21449
21450 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21451
21452 If CODE is 'd', then the X is a condition operand and the instruction
21453 should only be executed if the condition is true.
21454 if CODE is 'D', then the X is a condition operand and the instruction
21455 should only be executed if the condition is false: however, if the mode
21456 of the comparison is CCFPEmode, then always execute the instruction -- we
21457 do this because in these circumstances !GE does not necessarily imply LT;
21458 in these cases the instruction pattern will take care to make sure that
21459 an instruction containing %d will follow, thereby undoing the effects of
21460 doing this instruction unconditionally.
21461 If CODE is 'N' then X is a floating point operand that must be negated
21462 before output.
21463 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21464 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21465 static void
21466 arm_print_operand (FILE *stream, rtx x, int code)
21467 {
21468 switch (code)
21469 {
21470 case '@':
21471 fputs (ASM_COMMENT_START, stream);
21472 return;
21473
21474 case '_':
21475 fputs (user_label_prefix, stream);
21476 return;
21477
21478 case '|':
21479 fputs (REGISTER_PREFIX, stream);
21480 return;
21481
21482 case '?':
21483 arm_print_condition (stream);
21484 return;
21485
21486 case '(':
21487 /* Nothing in unified syntax, otherwise the current condition code. */
21488 if (!TARGET_UNIFIED_ASM)
21489 arm_print_condition (stream);
21490 break;
21491
21492 case ')':
21493 /* The current condition code in unified syntax, otherwise nothing. */
21494 if (TARGET_UNIFIED_ASM)
21495 arm_print_condition (stream);
21496 break;
21497
21498 case '.':
21499 /* The current condition code for a condition code setting instruction.
21500 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21501 if (TARGET_UNIFIED_ASM)
21502 {
21503 fputc('s', stream);
21504 arm_print_condition (stream);
21505 }
21506 else
21507 {
21508 arm_print_condition (stream);
21509 fputc('s', stream);
21510 }
21511 return;
21512
21513 case '!':
21514 /* If the instruction is conditionally executed then print
21515 the current condition code, otherwise print 's'. */
21516 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21517 if (current_insn_predicate)
21518 arm_print_condition (stream);
21519 else
21520 fputc('s', stream);
21521 break;
21522
21523 /* %# is a "break" sequence. It doesn't output anything, but is used to
21524 separate e.g. operand numbers from following text, if that text consists
21525 of further digits which we don't want to be part of the operand
21526 number. */
21527 case '#':
21528 return;
21529
21530 case 'N':
21531 {
21532 REAL_VALUE_TYPE r;
21533 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21534 r = real_value_negate (&r);
21535 fprintf (stream, "%s", fp_const_from_val (&r));
21536 }
21537 return;
21538
21539 /* An integer or symbol address without a preceding # sign. */
21540 case 'c':
21541 switch (GET_CODE (x))
21542 {
21543 case CONST_INT:
21544 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21545 break;
21546
21547 case SYMBOL_REF:
21548 output_addr_const (stream, x);
21549 break;
21550
21551 case CONST:
21552 if (GET_CODE (XEXP (x, 0)) == PLUS
21553 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21554 {
21555 output_addr_const (stream, x);
21556 break;
21557 }
21558 /* Fall through. */
21559
21560 default:
21561 output_operand_lossage ("Unsupported operand for code '%c'", code);
21562 }
21563 return;
21564
21565 /* An integer that we want to print in HEX. */
21566 case 'x':
21567 switch (GET_CODE (x))
21568 {
21569 case CONST_INT:
21570 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21571 break;
21572
21573 default:
21574 output_operand_lossage ("Unsupported operand for code '%c'", code);
21575 }
21576 return;
21577
21578 case 'B':
21579 if (CONST_INT_P (x))
21580 {
21581 HOST_WIDE_INT val;
21582 val = ARM_SIGN_EXTEND (~INTVAL (x));
21583 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21584 }
21585 else
21586 {
21587 putc ('~', stream);
21588 output_addr_const (stream, x);
21589 }
21590 return;
21591
21592 case 'b':
21593 /* Print the log2 of a CONST_INT. */
21594 {
21595 HOST_WIDE_INT val;
21596
21597 if (!CONST_INT_P (x)
21598 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21599 output_operand_lossage ("Unsupported operand for code '%c'", code);
21600 else
21601 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21602 }
21603 return;
21604
21605 case 'L':
21606 /* The low 16 bits of an immediate constant. */
21607 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21608 return;
21609
21610 case 'i':
21611 fprintf (stream, "%s", arithmetic_instr (x, 1));
21612 return;
21613
21614 case 'I':
21615 fprintf (stream, "%s", arithmetic_instr (x, 0));
21616 return;
21617
21618 case 'S':
21619 {
21620 HOST_WIDE_INT val;
21621 const char *shift;
21622
21623 shift = shift_op (x, &val);
21624
21625 if (shift)
21626 {
21627 fprintf (stream, ", %s ", shift);
21628 if (val == -1)
21629 arm_print_operand (stream, XEXP (x, 1), 0);
21630 else
21631 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21632 }
21633 }
21634 return;
21635
21636 /* An explanation of the 'Q', 'R' and 'H' register operands:
21637
21638 In a pair of registers containing a DI or DF value the 'Q'
21639 operand returns the register number of the register containing
21640 the least significant part of the value. The 'R' operand returns
21641 the register number of the register containing the most
21642 significant part of the value.
21643
21644 The 'H' operand returns the higher of the two register numbers.
21645 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21646 same as the 'Q' operand, since the most significant part of the
21647 value is held in the lower number register. The reverse is true
21648 on systems where WORDS_BIG_ENDIAN is false.
21649
21650 The purpose of these operands is to distinguish between cases
21651 where the endian-ness of the values is important (for example
21652 when they are added together), and cases where the endian-ness
21653 is irrelevant, but the order of register operations is important.
21654 For example when loading a value from memory into a register
21655 pair, the endian-ness does not matter. Provided that the value
21656 from the lower memory address is put into the lower numbered
21657 register, and the value from the higher address is put into the
21658 higher numbered register, the load will work regardless of whether
21659 the value being loaded is big-wordian or little-wordian. The
21660 order of the two register loads can matter however, if the address
21661 of the memory location is actually held in one of the registers
21662 being overwritten by the load.
21663
21664 The 'Q' and 'R' constraints are also available for 64-bit
21665 constants. */
21666 case 'Q':
21667 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21668 {
21669 rtx part = gen_lowpart (SImode, x);
21670 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21671 return;
21672 }
21673
21674 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21675 {
21676 output_operand_lossage ("invalid operand for code '%c'", code);
21677 return;
21678 }
21679
21680 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21681 return;
21682
21683 case 'R':
21684 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21685 {
21686 machine_mode mode = GET_MODE (x);
21687 rtx part;
21688
21689 if (mode == VOIDmode)
21690 mode = DImode;
21691 part = gen_highpart_mode (SImode, mode, x);
21692 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21693 return;
21694 }
21695
21696 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21697 {
21698 output_operand_lossage ("invalid operand for code '%c'", code);
21699 return;
21700 }
21701
21702 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21703 return;
21704
21705 case 'H':
21706 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21707 {
21708 output_operand_lossage ("invalid operand for code '%c'", code);
21709 return;
21710 }
21711
21712 asm_fprintf (stream, "%r", REGNO (x) + 1);
21713 return;
21714
21715 case 'J':
21716 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21717 {
21718 output_operand_lossage ("invalid operand for code '%c'", code);
21719 return;
21720 }
21721
21722 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21723 return;
21724
21725 case 'K':
21726 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21727 {
21728 output_operand_lossage ("invalid operand for code '%c'", code);
21729 return;
21730 }
21731
21732 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21733 return;
21734
21735 case 'm':
21736 asm_fprintf (stream, "%r",
21737 REG_P (XEXP (x, 0))
21738 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21739 return;
21740
21741 case 'M':
21742 asm_fprintf (stream, "{%r-%r}",
21743 REGNO (x),
21744 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21745 return;
21746
21747 /* Like 'M', but writing doubleword vector registers, for use by Neon
21748 insns. */
21749 case 'h':
21750 {
21751 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21752 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21753 if (numregs == 1)
21754 asm_fprintf (stream, "{d%d}", regno);
21755 else
21756 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21757 }
21758 return;
21759
21760 case 'd':
21761 /* CONST_TRUE_RTX means always -- that's the default. */
21762 if (x == const_true_rtx)
21763 return;
21764
21765 if (!COMPARISON_P (x))
21766 {
21767 output_operand_lossage ("invalid operand for code '%c'", code);
21768 return;
21769 }
21770
21771 fputs (arm_condition_codes[get_arm_condition_code (x)],
21772 stream);
21773 return;
21774
21775 case 'D':
21776 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21777 want to do that. */
21778 if (x == const_true_rtx)
21779 {
21780 output_operand_lossage ("instruction never executed");
21781 return;
21782 }
21783 if (!COMPARISON_P (x))
21784 {
21785 output_operand_lossage ("invalid operand for code '%c'", code);
21786 return;
21787 }
21788
21789 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21790 (get_arm_condition_code (x))],
21791 stream);
21792 return;
21793
21794 case 's':
21795 case 'V':
21796 case 'W':
21797 case 'X':
21798 case 'Y':
21799 case 'Z':
21800 /* Former Maverick support, removed after GCC-4.7. */
21801 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21802 return;
21803
21804 case 'U':
21805 if (!REG_P (x)
21806 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21807 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21808 /* Bad value for wCG register number. */
21809 {
21810 output_operand_lossage ("invalid operand for code '%c'", code);
21811 return;
21812 }
21813
21814 else
21815 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21816 return;
21817
21818 /* Print an iWMMXt control register name. */
21819 case 'w':
21820 if (!CONST_INT_P (x)
21821 || INTVAL (x) < 0
21822 || INTVAL (x) >= 16)
21823 /* Bad value for wC register number. */
21824 {
21825 output_operand_lossage ("invalid operand for code '%c'", code);
21826 return;
21827 }
21828
21829 else
21830 {
21831 static const char * wc_reg_names [16] =
21832 {
21833 "wCID", "wCon", "wCSSF", "wCASF",
21834 "wC4", "wC5", "wC6", "wC7",
21835 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21836 "wC12", "wC13", "wC14", "wC15"
21837 };
21838
21839 fputs (wc_reg_names [INTVAL (x)], stream);
21840 }
21841 return;
21842
21843 /* Print the high single-precision register of a VFP double-precision
21844 register. */
21845 case 'p':
21846 {
21847 machine_mode mode = GET_MODE (x);
21848 int regno;
21849
21850 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21851 {
21852 output_operand_lossage ("invalid operand for code '%c'", code);
21853 return;
21854 }
21855
21856 regno = REGNO (x);
21857 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21858 {
21859 output_operand_lossage ("invalid operand for code '%c'", code);
21860 return;
21861 }
21862
21863 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21864 }
21865 return;
21866
21867 /* Print a VFP/Neon double precision or quad precision register name. */
21868 case 'P':
21869 case 'q':
21870 {
21871 machine_mode mode = GET_MODE (x);
21872 int is_quad = (code == 'q');
21873 int regno;
21874
21875 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21876 {
21877 output_operand_lossage ("invalid operand for code '%c'", code);
21878 return;
21879 }
21880
21881 if (!REG_P (x)
21882 || !IS_VFP_REGNUM (REGNO (x)))
21883 {
21884 output_operand_lossage ("invalid operand for code '%c'", code);
21885 return;
21886 }
21887
21888 regno = REGNO (x);
21889 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21890 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21891 {
21892 output_operand_lossage ("invalid operand for code '%c'", code);
21893 return;
21894 }
21895
21896 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21897 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21898 }
21899 return;
21900
21901 /* These two codes print the low/high doubleword register of a Neon quad
21902 register, respectively. For pair-structure types, can also print
21903 low/high quadword registers. */
21904 case 'e':
21905 case 'f':
21906 {
21907 machine_mode mode = GET_MODE (x);
21908 int regno;
21909
21910 if ((GET_MODE_SIZE (mode) != 16
21911 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21912 {
21913 output_operand_lossage ("invalid operand for code '%c'", code);
21914 return;
21915 }
21916
21917 regno = REGNO (x);
21918 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21919 {
21920 output_operand_lossage ("invalid operand for code '%c'", code);
21921 return;
21922 }
21923
21924 if (GET_MODE_SIZE (mode) == 16)
21925 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21926 + (code == 'f' ? 1 : 0));
21927 else
21928 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21929 + (code == 'f' ? 1 : 0));
21930 }
21931 return;
21932
21933 /* Print a VFPv3 floating-point constant, represented as an integer
21934 index. */
21935 case 'G':
21936 {
21937 int index = vfp3_const_double_index (x);
21938 gcc_assert (index != -1);
21939 fprintf (stream, "%d", index);
21940 }
21941 return;
21942
21943 /* Print bits representing opcode features for Neon.
21944
21945 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21946 and polynomials as unsigned.
21947
21948 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21949
21950 Bit 2 is 1 for rounding functions, 0 otherwise. */
21951
21952 /* Identify the type as 's', 'u', 'p' or 'f'. */
21953 case 'T':
21954 {
21955 HOST_WIDE_INT bits = INTVAL (x);
21956 fputc ("uspf"[bits & 3], stream);
21957 }
21958 return;
21959
21960 /* Likewise, but signed and unsigned integers are both 'i'. */
21961 case 'F':
21962 {
21963 HOST_WIDE_INT bits = INTVAL (x);
21964 fputc ("iipf"[bits & 3], stream);
21965 }
21966 return;
21967
21968 /* As for 'T', but emit 'u' instead of 'p'. */
21969 case 't':
21970 {
21971 HOST_WIDE_INT bits = INTVAL (x);
21972 fputc ("usuf"[bits & 3], stream);
21973 }
21974 return;
21975
21976 /* Bit 2: rounding (vs none). */
21977 case 'O':
21978 {
21979 HOST_WIDE_INT bits = INTVAL (x);
21980 fputs ((bits & 4) != 0 ? "r" : "", stream);
21981 }
21982 return;
21983
21984 /* Memory operand for vld1/vst1 instruction. */
21985 case 'A':
21986 {
21987 rtx addr;
21988 bool postinc = FALSE;
21989 rtx postinc_reg = NULL;
21990 unsigned align, memsize, align_bits;
21991
21992 gcc_assert (MEM_P (x));
21993 addr = XEXP (x, 0);
21994 if (GET_CODE (addr) == POST_INC)
21995 {
21996 postinc = 1;
21997 addr = XEXP (addr, 0);
21998 }
21999 if (GET_CODE (addr) == POST_MODIFY)
22000 {
22001 postinc_reg = XEXP( XEXP (addr, 1), 1);
22002 addr = XEXP (addr, 0);
22003 }
22004 asm_fprintf (stream, "[%r", REGNO (addr));
22005
22006 /* We know the alignment of this access, so we can emit a hint in the
22007 instruction (for some alignments) as an aid to the memory subsystem
22008 of the target. */
22009 align = MEM_ALIGN (x) >> 3;
22010 memsize = MEM_SIZE (x);
22011
22012 /* Only certain alignment specifiers are supported by the hardware. */
22013 if (memsize == 32 && (align % 32) == 0)
22014 align_bits = 256;
22015 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22016 align_bits = 128;
22017 else if (memsize >= 8 && (align % 8) == 0)
22018 align_bits = 64;
22019 else
22020 align_bits = 0;
22021
22022 if (align_bits != 0)
22023 asm_fprintf (stream, ":%d", align_bits);
22024
22025 asm_fprintf (stream, "]");
22026
22027 if (postinc)
22028 fputs("!", stream);
22029 if (postinc_reg)
22030 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22031 }
22032 return;
22033
22034 case 'C':
22035 {
22036 rtx addr;
22037
22038 gcc_assert (MEM_P (x));
22039 addr = XEXP (x, 0);
22040 gcc_assert (REG_P (addr));
22041 asm_fprintf (stream, "[%r]", REGNO (addr));
22042 }
22043 return;
22044
22045 /* Translate an S register number into a D register number and element index. */
22046 case 'y':
22047 {
22048 machine_mode mode = GET_MODE (x);
22049 int regno;
22050
22051 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22052 {
22053 output_operand_lossage ("invalid operand for code '%c'", code);
22054 return;
22055 }
22056
22057 regno = REGNO (x);
22058 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22059 {
22060 output_operand_lossage ("invalid operand for code '%c'", code);
22061 return;
22062 }
22063
22064 regno = regno - FIRST_VFP_REGNUM;
22065 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22066 }
22067 return;
22068
22069 case 'v':
22070 gcc_assert (CONST_DOUBLE_P (x));
22071 int result;
22072 result = vfp3_const_double_for_fract_bits (x);
22073 if (result == 0)
22074 result = vfp3_const_double_for_bits (x);
22075 fprintf (stream, "#%d", result);
22076 return;
22077
22078 /* Register specifier for vld1.16/vst1.16. Translate the S register
22079 number into a D register number and element index. */
22080 case 'z':
22081 {
22082 machine_mode mode = GET_MODE (x);
22083 int regno;
22084
22085 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22086 {
22087 output_operand_lossage ("invalid operand for code '%c'", code);
22088 return;
22089 }
22090
22091 regno = REGNO (x);
22092 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22093 {
22094 output_operand_lossage ("invalid operand for code '%c'", code);
22095 return;
22096 }
22097
22098 regno = regno - FIRST_VFP_REGNUM;
22099 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22100 }
22101 return;
22102
22103 default:
22104 if (x == 0)
22105 {
22106 output_operand_lossage ("missing operand");
22107 return;
22108 }
22109
22110 switch (GET_CODE (x))
22111 {
22112 case REG:
22113 asm_fprintf (stream, "%r", REGNO (x));
22114 break;
22115
22116 case MEM:
22117 output_memory_reference_mode = GET_MODE (x);
22118 output_address (XEXP (x, 0));
22119 break;
22120
22121 case CONST_DOUBLE:
22122 {
22123 char fpstr[20];
22124 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22125 sizeof (fpstr), 0, 1);
22126 fprintf (stream, "#%s", fpstr);
22127 }
22128 break;
22129
22130 default:
22131 gcc_assert (GET_CODE (x) != NEG);
22132 fputc ('#', stream);
22133 if (GET_CODE (x) == HIGH)
22134 {
22135 fputs (":lower16:", stream);
22136 x = XEXP (x, 0);
22137 }
22138
22139 output_addr_const (stream, x);
22140 break;
22141 }
22142 }
22143 }
22144 \f
22145 /* Target hook for printing a memory address. */
22146 static void
22147 arm_print_operand_address (FILE *stream, rtx x)
22148 {
22149 if (TARGET_32BIT)
22150 {
22151 int is_minus = GET_CODE (x) == MINUS;
22152
22153 if (REG_P (x))
22154 asm_fprintf (stream, "[%r]", REGNO (x));
22155 else if (GET_CODE (x) == PLUS || is_minus)
22156 {
22157 rtx base = XEXP (x, 0);
22158 rtx index = XEXP (x, 1);
22159 HOST_WIDE_INT offset = 0;
22160 if (!REG_P (base)
22161 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22162 {
22163 /* Ensure that BASE is a register. */
22164 /* (one of them must be). */
22165 /* Also ensure the SP is not used as in index register. */
22166 std::swap (base, index);
22167 }
22168 switch (GET_CODE (index))
22169 {
22170 case CONST_INT:
22171 offset = INTVAL (index);
22172 if (is_minus)
22173 offset = -offset;
22174 asm_fprintf (stream, "[%r, #%wd]",
22175 REGNO (base), offset);
22176 break;
22177
22178 case REG:
22179 asm_fprintf (stream, "[%r, %s%r]",
22180 REGNO (base), is_minus ? "-" : "",
22181 REGNO (index));
22182 break;
22183
22184 case MULT:
22185 case ASHIFTRT:
22186 case LSHIFTRT:
22187 case ASHIFT:
22188 case ROTATERT:
22189 {
22190 asm_fprintf (stream, "[%r, %s%r",
22191 REGNO (base), is_minus ? "-" : "",
22192 REGNO (XEXP (index, 0)));
22193 arm_print_operand (stream, index, 'S');
22194 fputs ("]", stream);
22195 break;
22196 }
22197
22198 default:
22199 gcc_unreachable ();
22200 }
22201 }
22202 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22203 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22204 {
22205 extern machine_mode output_memory_reference_mode;
22206
22207 gcc_assert (REG_P (XEXP (x, 0)));
22208
22209 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22210 asm_fprintf (stream, "[%r, #%s%d]!",
22211 REGNO (XEXP (x, 0)),
22212 GET_CODE (x) == PRE_DEC ? "-" : "",
22213 GET_MODE_SIZE (output_memory_reference_mode));
22214 else
22215 asm_fprintf (stream, "[%r], #%s%d",
22216 REGNO (XEXP (x, 0)),
22217 GET_CODE (x) == POST_DEC ? "-" : "",
22218 GET_MODE_SIZE (output_memory_reference_mode));
22219 }
22220 else if (GET_CODE (x) == PRE_MODIFY)
22221 {
22222 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22223 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22224 asm_fprintf (stream, "#%wd]!",
22225 INTVAL (XEXP (XEXP (x, 1), 1)));
22226 else
22227 asm_fprintf (stream, "%r]!",
22228 REGNO (XEXP (XEXP (x, 1), 1)));
22229 }
22230 else if (GET_CODE (x) == POST_MODIFY)
22231 {
22232 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22233 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22234 asm_fprintf (stream, "#%wd",
22235 INTVAL (XEXP (XEXP (x, 1), 1)));
22236 else
22237 asm_fprintf (stream, "%r",
22238 REGNO (XEXP (XEXP (x, 1), 1)));
22239 }
22240 else output_addr_const (stream, x);
22241 }
22242 else
22243 {
22244 if (REG_P (x))
22245 asm_fprintf (stream, "[%r]", REGNO (x));
22246 else if (GET_CODE (x) == POST_INC)
22247 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22248 else if (GET_CODE (x) == PLUS)
22249 {
22250 gcc_assert (REG_P (XEXP (x, 0)));
22251 if (CONST_INT_P (XEXP (x, 1)))
22252 asm_fprintf (stream, "[%r, #%wd]",
22253 REGNO (XEXP (x, 0)),
22254 INTVAL (XEXP (x, 1)));
22255 else
22256 asm_fprintf (stream, "[%r, %r]",
22257 REGNO (XEXP (x, 0)),
22258 REGNO (XEXP (x, 1)));
22259 }
22260 else
22261 output_addr_const (stream, x);
22262 }
22263 }
22264 \f
22265 /* Target hook for indicating whether a punctuation character for
22266 TARGET_PRINT_OPERAND is valid. */
22267 static bool
22268 arm_print_operand_punct_valid_p (unsigned char code)
22269 {
22270 return (code == '@' || code == '|' || code == '.'
22271 || code == '(' || code == ')' || code == '#'
22272 || (TARGET_32BIT && (code == '?'))
22273 || (TARGET_THUMB2 && (code == '!'))
22274 || (TARGET_THUMB && (code == '_')));
22275 }
22276 \f
22277 /* Target hook for assembling integer objects. The ARM version needs to
22278 handle word-sized values specially. */
22279 static bool
22280 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22281 {
22282 machine_mode mode;
22283
22284 if (size == UNITS_PER_WORD && aligned_p)
22285 {
22286 fputs ("\t.word\t", asm_out_file);
22287 output_addr_const (asm_out_file, x);
22288
22289 /* Mark symbols as position independent. We only do this in the
22290 .text segment, not in the .data segment. */
22291 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22292 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22293 {
22294 /* See legitimize_pic_address for an explanation of the
22295 TARGET_VXWORKS_RTP check. */
22296 if (!arm_pic_data_is_text_relative
22297 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22298 fputs ("(GOT)", asm_out_file);
22299 else
22300 fputs ("(GOTOFF)", asm_out_file);
22301 }
22302 fputc ('\n', asm_out_file);
22303 return true;
22304 }
22305
22306 mode = GET_MODE (x);
22307
22308 if (arm_vector_mode_supported_p (mode))
22309 {
22310 int i, units;
22311
22312 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22313
22314 units = CONST_VECTOR_NUNITS (x);
22315 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22316
22317 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22318 for (i = 0; i < units; i++)
22319 {
22320 rtx elt = CONST_VECTOR_ELT (x, i);
22321 assemble_integer
22322 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22323 }
22324 else
22325 for (i = 0; i < units; i++)
22326 {
22327 rtx elt = CONST_VECTOR_ELT (x, i);
22328 REAL_VALUE_TYPE rval;
22329
22330 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22331
22332 assemble_real
22333 (rval, GET_MODE_INNER (mode),
22334 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22335 }
22336
22337 return true;
22338 }
22339
22340 return default_assemble_integer (x, size, aligned_p);
22341 }
22342
22343 static void
22344 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22345 {
22346 section *s;
22347
22348 if (!TARGET_AAPCS_BASED)
22349 {
22350 (is_ctor ?
22351 default_named_section_asm_out_constructor
22352 : default_named_section_asm_out_destructor) (symbol, priority);
22353 return;
22354 }
22355
22356 /* Put these in the .init_array section, using a special relocation. */
22357 if (priority != DEFAULT_INIT_PRIORITY)
22358 {
22359 char buf[18];
22360 sprintf (buf, "%s.%.5u",
22361 is_ctor ? ".init_array" : ".fini_array",
22362 priority);
22363 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22364 }
22365 else if (is_ctor)
22366 s = ctors_section;
22367 else
22368 s = dtors_section;
22369
22370 switch_to_section (s);
22371 assemble_align (POINTER_SIZE);
22372 fputs ("\t.word\t", asm_out_file);
22373 output_addr_const (asm_out_file, symbol);
22374 fputs ("(target1)\n", asm_out_file);
22375 }
22376
22377 /* Add a function to the list of static constructors. */
22378
22379 static void
22380 arm_elf_asm_constructor (rtx symbol, int priority)
22381 {
22382 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22383 }
22384
22385 /* Add a function to the list of static destructors. */
22386
22387 static void
22388 arm_elf_asm_destructor (rtx symbol, int priority)
22389 {
22390 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22391 }
22392 \f
22393 /* A finite state machine takes care of noticing whether or not instructions
22394 can be conditionally executed, and thus decrease execution time and code
22395 size by deleting branch instructions. The fsm is controlled by
22396 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22397
22398 /* The state of the fsm controlling condition codes are:
22399 0: normal, do nothing special
22400 1: make ASM_OUTPUT_OPCODE not output this instruction
22401 2: make ASM_OUTPUT_OPCODE not output this instruction
22402 3: make instructions conditional
22403 4: make instructions conditional
22404
22405 State transitions (state->state by whom under condition):
22406 0 -> 1 final_prescan_insn if the `target' is a label
22407 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22408 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22409 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22410 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22411 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22412 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22413 (the target insn is arm_target_insn).
22414
22415 If the jump clobbers the conditions then we use states 2 and 4.
22416
22417 A similar thing can be done with conditional return insns.
22418
22419 XXX In case the `target' is an unconditional branch, this conditionalising
22420 of the instructions always reduces code size, but not always execution
22421 time. But then, I want to reduce the code size to somewhere near what
22422 /bin/cc produces. */
22423
22424 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22425 instructions. When a COND_EXEC instruction is seen the subsequent
22426 instructions are scanned so that multiple conditional instructions can be
22427 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22428 specify the length and true/false mask for the IT block. These will be
22429 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22430
22431 /* Returns the index of the ARM condition code string in
22432 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22433 COMPARISON should be an rtx like `(eq (...) (...))'. */
22434
22435 enum arm_cond_code
22436 maybe_get_arm_condition_code (rtx comparison)
22437 {
22438 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22439 enum arm_cond_code code;
22440 enum rtx_code comp_code = GET_CODE (comparison);
22441
22442 if (GET_MODE_CLASS (mode) != MODE_CC)
22443 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22444 XEXP (comparison, 1));
22445
22446 switch (mode)
22447 {
22448 case CC_DNEmode: code = ARM_NE; goto dominance;
22449 case CC_DEQmode: code = ARM_EQ; goto dominance;
22450 case CC_DGEmode: code = ARM_GE; goto dominance;
22451 case CC_DGTmode: code = ARM_GT; goto dominance;
22452 case CC_DLEmode: code = ARM_LE; goto dominance;
22453 case CC_DLTmode: code = ARM_LT; goto dominance;
22454 case CC_DGEUmode: code = ARM_CS; goto dominance;
22455 case CC_DGTUmode: code = ARM_HI; goto dominance;
22456 case CC_DLEUmode: code = ARM_LS; goto dominance;
22457 case CC_DLTUmode: code = ARM_CC;
22458
22459 dominance:
22460 if (comp_code == EQ)
22461 return ARM_INVERSE_CONDITION_CODE (code);
22462 if (comp_code == NE)
22463 return code;
22464 return ARM_NV;
22465
22466 case CC_NOOVmode:
22467 switch (comp_code)
22468 {
22469 case NE: return ARM_NE;
22470 case EQ: return ARM_EQ;
22471 case GE: return ARM_PL;
22472 case LT: return ARM_MI;
22473 default: return ARM_NV;
22474 }
22475
22476 case CC_Zmode:
22477 switch (comp_code)
22478 {
22479 case NE: return ARM_NE;
22480 case EQ: return ARM_EQ;
22481 default: return ARM_NV;
22482 }
22483
22484 case CC_Nmode:
22485 switch (comp_code)
22486 {
22487 case NE: return ARM_MI;
22488 case EQ: return ARM_PL;
22489 default: return ARM_NV;
22490 }
22491
22492 case CCFPEmode:
22493 case CCFPmode:
22494 /* We can handle all cases except UNEQ and LTGT. */
22495 switch (comp_code)
22496 {
22497 case GE: return ARM_GE;
22498 case GT: return ARM_GT;
22499 case LE: return ARM_LS;
22500 case LT: return ARM_MI;
22501 case NE: return ARM_NE;
22502 case EQ: return ARM_EQ;
22503 case ORDERED: return ARM_VC;
22504 case UNORDERED: return ARM_VS;
22505 case UNLT: return ARM_LT;
22506 case UNLE: return ARM_LE;
22507 case UNGT: return ARM_HI;
22508 case UNGE: return ARM_PL;
22509 /* UNEQ and LTGT do not have a representation. */
22510 case UNEQ: /* Fall through. */
22511 case LTGT: /* Fall through. */
22512 default: return ARM_NV;
22513 }
22514
22515 case CC_SWPmode:
22516 switch (comp_code)
22517 {
22518 case NE: return ARM_NE;
22519 case EQ: return ARM_EQ;
22520 case GE: return ARM_LE;
22521 case GT: return ARM_LT;
22522 case LE: return ARM_GE;
22523 case LT: return ARM_GT;
22524 case GEU: return ARM_LS;
22525 case GTU: return ARM_CC;
22526 case LEU: return ARM_CS;
22527 case LTU: return ARM_HI;
22528 default: return ARM_NV;
22529 }
22530
22531 case CC_Cmode:
22532 switch (comp_code)
22533 {
22534 case LTU: return ARM_CS;
22535 case GEU: return ARM_CC;
22536 default: return ARM_NV;
22537 }
22538
22539 case CC_CZmode:
22540 switch (comp_code)
22541 {
22542 case NE: return ARM_NE;
22543 case EQ: return ARM_EQ;
22544 case GEU: return ARM_CS;
22545 case GTU: return ARM_HI;
22546 case LEU: return ARM_LS;
22547 case LTU: return ARM_CC;
22548 default: return ARM_NV;
22549 }
22550
22551 case CC_NCVmode:
22552 switch (comp_code)
22553 {
22554 case GE: return ARM_GE;
22555 case LT: return ARM_LT;
22556 case GEU: return ARM_CS;
22557 case LTU: return ARM_CC;
22558 default: return ARM_NV;
22559 }
22560
22561 case CCmode:
22562 switch (comp_code)
22563 {
22564 case NE: return ARM_NE;
22565 case EQ: return ARM_EQ;
22566 case GE: return ARM_GE;
22567 case GT: return ARM_GT;
22568 case LE: return ARM_LE;
22569 case LT: return ARM_LT;
22570 case GEU: return ARM_CS;
22571 case GTU: return ARM_HI;
22572 case LEU: return ARM_LS;
22573 case LTU: return ARM_CC;
22574 default: return ARM_NV;
22575 }
22576
22577 default: gcc_unreachable ();
22578 }
22579 }
22580
22581 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22582 static enum arm_cond_code
22583 get_arm_condition_code (rtx comparison)
22584 {
22585 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22586 gcc_assert (code != ARM_NV);
22587 return code;
22588 }
22589
22590 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22591 instructions. */
22592 void
22593 thumb2_final_prescan_insn (rtx_insn *insn)
22594 {
22595 rtx_insn *first_insn = insn;
22596 rtx body = PATTERN (insn);
22597 rtx predicate;
22598 enum arm_cond_code code;
22599 int n;
22600 int mask;
22601 int max;
22602
22603 /* max_insns_skipped in the tune was already taken into account in the
22604 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22605 just emit the IT blocks as we can. It does not make sense to split
22606 the IT blocks. */
22607 max = MAX_INSN_PER_IT_BLOCK;
22608
22609 /* Remove the previous insn from the count of insns to be output. */
22610 if (arm_condexec_count)
22611 arm_condexec_count--;
22612
22613 /* Nothing to do if we are already inside a conditional block. */
22614 if (arm_condexec_count)
22615 return;
22616
22617 if (GET_CODE (body) != COND_EXEC)
22618 return;
22619
22620 /* Conditional jumps are implemented directly. */
22621 if (JUMP_P (insn))
22622 return;
22623
22624 predicate = COND_EXEC_TEST (body);
22625 arm_current_cc = get_arm_condition_code (predicate);
22626
22627 n = get_attr_ce_count (insn);
22628 arm_condexec_count = 1;
22629 arm_condexec_mask = (1 << n) - 1;
22630 arm_condexec_masklen = n;
22631 /* See if subsequent instructions can be combined into the same block. */
22632 for (;;)
22633 {
22634 insn = next_nonnote_insn (insn);
22635
22636 /* Jumping into the middle of an IT block is illegal, so a label or
22637 barrier terminates the block. */
22638 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22639 break;
22640
22641 body = PATTERN (insn);
22642 /* USE and CLOBBER aren't really insns, so just skip them. */
22643 if (GET_CODE (body) == USE
22644 || GET_CODE (body) == CLOBBER)
22645 continue;
22646
22647 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22648 if (GET_CODE (body) != COND_EXEC)
22649 break;
22650 /* Maximum number of conditionally executed instructions in a block. */
22651 n = get_attr_ce_count (insn);
22652 if (arm_condexec_masklen + n > max)
22653 break;
22654
22655 predicate = COND_EXEC_TEST (body);
22656 code = get_arm_condition_code (predicate);
22657 mask = (1 << n) - 1;
22658 if (arm_current_cc == code)
22659 arm_condexec_mask |= (mask << arm_condexec_masklen);
22660 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22661 break;
22662
22663 arm_condexec_count++;
22664 arm_condexec_masklen += n;
22665
22666 /* A jump must be the last instruction in a conditional block. */
22667 if (JUMP_P (insn))
22668 break;
22669 }
22670 /* Restore recog_data (getting the attributes of other insns can
22671 destroy this array, but final.c assumes that it remains intact
22672 across this call). */
22673 extract_constrain_insn_cached (first_insn);
22674 }
22675
22676 void
22677 arm_final_prescan_insn (rtx_insn *insn)
22678 {
22679 /* BODY will hold the body of INSN. */
22680 rtx body = PATTERN (insn);
22681
22682 /* This will be 1 if trying to repeat the trick, and things need to be
22683 reversed if it appears to fail. */
22684 int reverse = 0;
22685
22686 /* If we start with a return insn, we only succeed if we find another one. */
22687 int seeking_return = 0;
22688 enum rtx_code return_code = UNKNOWN;
22689
22690 /* START_INSN will hold the insn from where we start looking. This is the
22691 first insn after the following code_label if REVERSE is true. */
22692 rtx_insn *start_insn = insn;
22693
22694 /* If in state 4, check if the target branch is reached, in order to
22695 change back to state 0. */
22696 if (arm_ccfsm_state == 4)
22697 {
22698 if (insn == arm_target_insn)
22699 {
22700 arm_target_insn = NULL;
22701 arm_ccfsm_state = 0;
22702 }
22703 return;
22704 }
22705
22706 /* If in state 3, it is possible to repeat the trick, if this insn is an
22707 unconditional branch to a label, and immediately following this branch
22708 is the previous target label which is only used once, and the label this
22709 branch jumps to is not too far off. */
22710 if (arm_ccfsm_state == 3)
22711 {
22712 if (simplejump_p (insn))
22713 {
22714 start_insn = next_nonnote_insn (start_insn);
22715 if (BARRIER_P (start_insn))
22716 {
22717 /* XXX Isn't this always a barrier? */
22718 start_insn = next_nonnote_insn (start_insn);
22719 }
22720 if (LABEL_P (start_insn)
22721 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22722 && LABEL_NUSES (start_insn) == 1)
22723 reverse = TRUE;
22724 else
22725 return;
22726 }
22727 else if (ANY_RETURN_P (body))
22728 {
22729 start_insn = next_nonnote_insn (start_insn);
22730 if (BARRIER_P (start_insn))
22731 start_insn = next_nonnote_insn (start_insn);
22732 if (LABEL_P (start_insn)
22733 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22734 && LABEL_NUSES (start_insn) == 1)
22735 {
22736 reverse = TRUE;
22737 seeking_return = 1;
22738 return_code = GET_CODE (body);
22739 }
22740 else
22741 return;
22742 }
22743 else
22744 return;
22745 }
22746
22747 gcc_assert (!arm_ccfsm_state || reverse);
22748 if (!JUMP_P (insn))
22749 return;
22750
22751 /* This jump might be paralleled with a clobber of the condition codes
22752 the jump should always come first */
22753 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22754 body = XVECEXP (body, 0, 0);
22755
22756 if (reverse
22757 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22758 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22759 {
22760 int insns_skipped;
22761 int fail = FALSE, succeed = FALSE;
22762 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22763 int then_not_else = TRUE;
22764 rtx_insn *this_insn = start_insn;
22765 rtx label = 0;
22766
22767 /* Register the insn jumped to. */
22768 if (reverse)
22769 {
22770 if (!seeking_return)
22771 label = XEXP (SET_SRC (body), 0);
22772 }
22773 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22774 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22775 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22776 {
22777 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22778 then_not_else = FALSE;
22779 }
22780 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22781 {
22782 seeking_return = 1;
22783 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22784 }
22785 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22786 {
22787 seeking_return = 1;
22788 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22789 then_not_else = FALSE;
22790 }
22791 else
22792 gcc_unreachable ();
22793
22794 /* See how many insns this branch skips, and what kind of insns. If all
22795 insns are okay, and the label or unconditional branch to the same
22796 label is not too far away, succeed. */
22797 for (insns_skipped = 0;
22798 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22799 {
22800 rtx scanbody;
22801
22802 this_insn = next_nonnote_insn (this_insn);
22803 if (!this_insn)
22804 break;
22805
22806 switch (GET_CODE (this_insn))
22807 {
22808 case CODE_LABEL:
22809 /* Succeed if it is the target label, otherwise fail since
22810 control falls in from somewhere else. */
22811 if (this_insn == label)
22812 {
22813 arm_ccfsm_state = 1;
22814 succeed = TRUE;
22815 }
22816 else
22817 fail = TRUE;
22818 break;
22819
22820 case BARRIER:
22821 /* Succeed if the following insn is the target label.
22822 Otherwise fail.
22823 If return insns are used then the last insn in a function
22824 will be a barrier. */
22825 this_insn = next_nonnote_insn (this_insn);
22826 if (this_insn && this_insn == label)
22827 {
22828 arm_ccfsm_state = 1;
22829 succeed = TRUE;
22830 }
22831 else
22832 fail = TRUE;
22833 break;
22834
22835 case CALL_INSN:
22836 /* The AAPCS says that conditional calls should not be
22837 used since they make interworking inefficient (the
22838 linker can't transform BL<cond> into BLX). That's
22839 only a problem if the machine has BLX. */
22840 if (arm_arch5)
22841 {
22842 fail = TRUE;
22843 break;
22844 }
22845
22846 /* Succeed if the following insn is the target label, or
22847 if the following two insns are a barrier and the
22848 target label. */
22849 this_insn = next_nonnote_insn (this_insn);
22850 if (this_insn && BARRIER_P (this_insn))
22851 this_insn = next_nonnote_insn (this_insn);
22852
22853 if (this_insn && this_insn == label
22854 && insns_skipped < max_insns_skipped)
22855 {
22856 arm_ccfsm_state = 1;
22857 succeed = TRUE;
22858 }
22859 else
22860 fail = TRUE;
22861 break;
22862
22863 case JUMP_INSN:
22864 /* If this is an unconditional branch to the same label, succeed.
22865 If it is to another label, do nothing. If it is conditional,
22866 fail. */
22867 /* XXX Probably, the tests for SET and the PC are
22868 unnecessary. */
22869
22870 scanbody = PATTERN (this_insn);
22871 if (GET_CODE (scanbody) == SET
22872 && GET_CODE (SET_DEST (scanbody)) == PC)
22873 {
22874 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22875 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22876 {
22877 arm_ccfsm_state = 2;
22878 succeed = TRUE;
22879 }
22880 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22881 fail = TRUE;
22882 }
22883 /* Fail if a conditional return is undesirable (e.g. on a
22884 StrongARM), but still allow this if optimizing for size. */
22885 else if (GET_CODE (scanbody) == return_code
22886 && !use_return_insn (TRUE, NULL)
22887 && !optimize_size)
22888 fail = TRUE;
22889 else if (GET_CODE (scanbody) == return_code)
22890 {
22891 arm_ccfsm_state = 2;
22892 succeed = TRUE;
22893 }
22894 else if (GET_CODE (scanbody) == PARALLEL)
22895 {
22896 switch (get_attr_conds (this_insn))
22897 {
22898 case CONDS_NOCOND:
22899 break;
22900 default:
22901 fail = TRUE;
22902 break;
22903 }
22904 }
22905 else
22906 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22907
22908 break;
22909
22910 case INSN:
22911 /* Instructions using or affecting the condition codes make it
22912 fail. */
22913 scanbody = PATTERN (this_insn);
22914 if (!(GET_CODE (scanbody) == SET
22915 || GET_CODE (scanbody) == PARALLEL)
22916 || get_attr_conds (this_insn) != CONDS_NOCOND)
22917 fail = TRUE;
22918 break;
22919
22920 default:
22921 break;
22922 }
22923 }
22924 if (succeed)
22925 {
22926 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22927 arm_target_label = CODE_LABEL_NUMBER (label);
22928 else
22929 {
22930 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22931
22932 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22933 {
22934 this_insn = next_nonnote_insn (this_insn);
22935 gcc_assert (!this_insn
22936 || (!BARRIER_P (this_insn)
22937 && !LABEL_P (this_insn)));
22938 }
22939 if (!this_insn)
22940 {
22941 /* Oh, dear! we ran off the end.. give up. */
22942 extract_constrain_insn_cached (insn);
22943 arm_ccfsm_state = 0;
22944 arm_target_insn = NULL;
22945 return;
22946 }
22947 arm_target_insn = this_insn;
22948 }
22949
22950 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22951 what it was. */
22952 if (!reverse)
22953 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22954
22955 if (reverse || then_not_else)
22956 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22957 }
22958
22959 /* Restore recog_data (getting the attributes of other insns can
22960 destroy this array, but final.c assumes that it remains intact
22961 across this call. */
22962 extract_constrain_insn_cached (insn);
22963 }
22964 }
22965
22966 /* Output IT instructions. */
22967 void
22968 thumb2_asm_output_opcode (FILE * stream)
22969 {
22970 char buff[5];
22971 int n;
22972
22973 if (arm_condexec_mask)
22974 {
22975 for (n = 0; n < arm_condexec_masklen; n++)
22976 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22977 buff[n] = 0;
22978 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22979 arm_condition_codes[arm_current_cc]);
22980 arm_condexec_mask = 0;
22981 }
22982 }
22983
22984 /* Returns true if REGNO is a valid register
22985 for holding a quantity of type MODE. */
22986 int
22987 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
22988 {
22989 if (GET_MODE_CLASS (mode) == MODE_CC)
22990 return (regno == CC_REGNUM
22991 || (TARGET_HARD_FLOAT && TARGET_VFP
22992 && regno == VFPCC_REGNUM));
22993
22994 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
22995 return false;
22996
22997 if (TARGET_THUMB1)
22998 /* For the Thumb we only allow values bigger than SImode in
22999 registers 0 - 6, so that there is always a second low
23000 register available to hold the upper part of the value.
23001 We probably we ought to ensure that the register is the
23002 start of an even numbered register pair. */
23003 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23004
23005 if (TARGET_HARD_FLOAT && TARGET_VFP
23006 && IS_VFP_REGNUM (regno))
23007 {
23008 if (mode == SFmode || mode == SImode)
23009 return VFP_REGNO_OK_FOR_SINGLE (regno);
23010
23011 if (mode == DFmode)
23012 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23013
23014 /* VFP registers can hold HFmode values, but there is no point in
23015 putting them there unless we have hardware conversion insns. */
23016 if (mode == HFmode)
23017 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23018
23019 if (TARGET_NEON)
23020 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23021 || (VALID_NEON_QREG_MODE (mode)
23022 && NEON_REGNO_OK_FOR_QUAD (regno))
23023 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23024 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23025 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23026 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23027 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23028
23029 return FALSE;
23030 }
23031
23032 if (TARGET_REALLY_IWMMXT)
23033 {
23034 if (IS_IWMMXT_GR_REGNUM (regno))
23035 return mode == SImode;
23036
23037 if (IS_IWMMXT_REGNUM (regno))
23038 return VALID_IWMMXT_REG_MODE (mode);
23039 }
23040
23041 /* We allow almost any value to be stored in the general registers.
23042 Restrict doubleword quantities to even register pairs in ARM state
23043 so that we can use ldrd. Do not allow very large Neon structure
23044 opaque modes in general registers; they would use too many. */
23045 if (regno <= LAST_ARM_REGNUM)
23046 {
23047 if (ARM_NUM_REGS (mode) > 4)
23048 return FALSE;
23049
23050 if (TARGET_THUMB2)
23051 return TRUE;
23052
23053 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23054 }
23055
23056 if (regno == FRAME_POINTER_REGNUM
23057 || regno == ARG_POINTER_REGNUM)
23058 /* We only allow integers in the fake hard registers. */
23059 return GET_MODE_CLASS (mode) == MODE_INT;
23060
23061 return FALSE;
23062 }
23063
23064 /* Implement MODES_TIEABLE_P. */
23065
23066 bool
23067 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23068 {
23069 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23070 return true;
23071
23072 /* We specifically want to allow elements of "structure" modes to
23073 be tieable to the structure. This more general condition allows
23074 other rarer situations too. */
23075 if (TARGET_NEON
23076 && (VALID_NEON_DREG_MODE (mode1)
23077 || VALID_NEON_QREG_MODE (mode1)
23078 || VALID_NEON_STRUCT_MODE (mode1))
23079 && (VALID_NEON_DREG_MODE (mode2)
23080 || VALID_NEON_QREG_MODE (mode2)
23081 || VALID_NEON_STRUCT_MODE (mode2)))
23082 return true;
23083
23084 return false;
23085 }
23086
23087 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23088 not used in arm mode. */
23089
23090 enum reg_class
23091 arm_regno_class (int regno)
23092 {
23093 if (regno == PC_REGNUM)
23094 return NO_REGS;
23095
23096 if (TARGET_THUMB1)
23097 {
23098 if (regno == STACK_POINTER_REGNUM)
23099 return STACK_REG;
23100 if (regno == CC_REGNUM)
23101 return CC_REG;
23102 if (regno < 8)
23103 return LO_REGS;
23104 return HI_REGS;
23105 }
23106
23107 if (TARGET_THUMB2 && regno < 8)
23108 return LO_REGS;
23109
23110 if ( regno <= LAST_ARM_REGNUM
23111 || regno == FRAME_POINTER_REGNUM
23112 || regno == ARG_POINTER_REGNUM)
23113 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23114
23115 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23116 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23117
23118 if (IS_VFP_REGNUM (regno))
23119 {
23120 if (regno <= D7_VFP_REGNUM)
23121 return VFP_D0_D7_REGS;
23122 else if (regno <= LAST_LO_VFP_REGNUM)
23123 return VFP_LO_REGS;
23124 else
23125 return VFP_HI_REGS;
23126 }
23127
23128 if (IS_IWMMXT_REGNUM (regno))
23129 return IWMMXT_REGS;
23130
23131 if (IS_IWMMXT_GR_REGNUM (regno))
23132 return IWMMXT_GR_REGS;
23133
23134 return NO_REGS;
23135 }
23136
23137 /* Handle a special case when computing the offset
23138 of an argument from the frame pointer. */
23139 int
23140 arm_debugger_arg_offset (int value, rtx addr)
23141 {
23142 rtx_insn *insn;
23143
23144 /* We are only interested if dbxout_parms() failed to compute the offset. */
23145 if (value != 0)
23146 return 0;
23147
23148 /* We can only cope with the case where the address is held in a register. */
23149 if (!REG_P (addr))
23150 return 0;
23151
23152 /* If we are using the frame pointer to point at the argument, then
23153 an offset of 0 is correct. */
23154 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23155 return 0;
23156
23157 /* If we are using the stack pointer to point at the
23158 argument, then an offset of 0 is correct. */
23159 /* ??? Check this is consistent with thumb2 frame layout. */
23160 if ((TARGET_THUMB || !frame_pointer_needed)
23161 && REGNO (addr) == SP_REGNUM)
23162 return 0;
23163
23164 /* Oh dear. The argument is pointed to by a register rather
23165 than being held in a register, or being stored at a known
23166 offset from the frame pointer. Since GDB only understands
23167 those two kinds of argument we must translate the address
23168 held in the register into an offset from the frame pointer.
23169 We do this by searching through the insns for the function
23170 looking to see where this register gets its value. If the
23171 register is initialized from the frame pointer plus an offset
23172 then we are in luck and we can continue, otherwise we give up.
23173
23174 This code is exercised by producing debugging information
23175 for a function with arguments like this:
23176
23177 double func (double a, double b, int c, double d) {return d;}
23178
23179 Without this code the stab for parameter 'd' will be set to
23180 an offset of 0 from the frame pointer, rather than 8. */
23181
23182 /* The if() statement says:
23183
23184 If the insn is a normal instruction
23185 and if the insn is setting the value in a register
23186 and if the register being set is the register holding the address of the argument
23187 and if the address is computing by an addition
23188 that involves adding to a register
23189 which is the frame pointer
23190 a constant integer
23191
23192 then... */
23193
23194 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23195 {
23196 if ( NONJUMP_INSN_P (insn)
23197 && GET_CODE (PATTERN (insn)) == SET
23198 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23199 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23200 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23201 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23202 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23203 )
23204 {
23205 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23206
23207 break;
23208 }
23209 }
23210
23211 if (value == 0)
23212 {
23213 debug_rtx (addr);
23214 warning (0, "unable to compute real location of stacked parameter");
23215 value = 8; /* XXX magic hack */
23216 }
23217
23218 return value;
23219 }
23220 \f
23221 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23222
23223 static const char *
23224 arm_invalid_parameter_type (const_tree t)
23225 {
23226 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23227 return N_("function parameters cannot have __fp16 type");
23228 return NULL;
23229 }
23230
23231 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23232
23233 static const char *
23234 arm_invalid_return_type (const_tree t)
23235 {
23236 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23237 return N_("functions cannot return __fp16 type");
23238 return NULL;
23239 }
23240
23241 /* Implement TARGET_PROMOTED_TYPE. */
23242
23243 static tree
23244 arm_promoted_type (const_tree t)
23245 {
23246 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23247 return float_type_node;
23248 return NULL_TREE;
23249 }
23250
23251 /* Implement TARGET_CONVERT_TO_TYPE.
23252 Specifically, this hook implements the peculiarity of the ARM
23253 half-precision floating-point C semantics that requires conversions between
23254 __fp16 to or from double to do an intermediate conversion to float. */
23255
23256 static tree
23257 arm_convert_to_type (tree type, tree expr)
23258 {
23259 tree fromtype = TREE_TYPE (expr);
23260 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23261 return NULL_TREE;
23262 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23263 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23264 return convert (type, convert (float_type_node, expr));
23265 return NULL_TREE;
23266 }
23267
23268 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23269 This simply adds HFmode as a supported mode; even though we don't
23270 implement arithmetic on this type directly, it's supported by
23271 optabs conversions, much the way the double-word arithmetic is
23272 special-cased in the default hook. */
23273
23274 static bool
23275 arm_scalar_mode_supported_p (machine_mode mode)
23276 {
23277 if (mode == HFmode)
23278 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23279 else if (ALL_FIXED_POINT_MODE_P (mode))
23280 return true;
23281 else
23282 return default_scalar_mode_supported_p (mode);
23283 }
23284
23285 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23286 void
23287 neon_reinterpret (rtx dest, rtx src)
23288 {
23289 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23290 }
23291
23292 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23293 not to early-clobber SRC registers in the process.
23294
23295 We assume that the operands described by SRC and DEST represent a
23296 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23297 number of components into which the copy has been decomposed. */
23298 void
23299 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23300 {
23301 unsigned int i;
23302
23303 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23304 || REGNO (operands[0]) < REGNO (operands[1]))
23305 {
23306 for (i = 0; i < count; i++)
23307 {
23308 operands[2 * i] = dest[i];
23309 operands[2 * i + 1] = src[i];
23310 }
23311 }
23312 else
23313 {
23314 for (i = 0; i < count; i++)
23315 {
23316 operands[2 * i] = dest[count - i - 1];
23317 operands[2 * i + 1] = src[count - i - 1];
23318 }
23319 }
23320 }
23321
23322 /* Split operands into moves from op[1] + op[2] into op[0]. */
23323
23324 void
23325 neon_split_vcombine (rtx operands[3])
23326 {
23327 unsigned int dest = REGNO (operands[0]);
23328 unsigned int src1 = REGNO (operands[1]);
23329 unsigned int src2 = REGNO (operands[2]);
23330 machine_mode halfmode = GET_MODE (operands[1]);
23331 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23332 rtx destlo, desthi;
23333
23334 if (src1 == dest && src2 == dest + halfregs)
23335 {
23336 /* No-op move. Can't split to nothing; emit something. */
23337 emit_note (NOTE_INSN_DELETED);
23338 return;
23339 }
23340
23341 /* Preserve register attributes for variable tracking. */
23342 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23343 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23344 GET_MODE_SIZE (halfmode));
23345
23346 /* Special case of reversed high/low parts. Use VSWP. */
23347 if (src2 == dest && src1 == dest + halfregs)
23348 {
23349 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
23350 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
23351 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23352 return;
23353 }
23354
23355 if (!reg_overlap_mentioned_p (operands[2], destlo))
23356 {
23357 /* Try to avoid unnecessary moves if part of the result
23358 is in the right place already. */
23359 if (src1 != dest)
23360 emit_move_insn (destlo, operands[1]);
23361 if (src2 != dest + halfregs)
23362 emit_move_insn (desthi, operands[2]);
23363 }
23364 else
23365 {
23366 if (src2 != dest + halfregs)
23367 emit_move_insn (desthi, operands[2]);
23368 if (src1 != dest)
23369 emit_move_insn (destlo, operands[1]);
23370 }
23371 }
23372 \f
23373 /* Return the number (counting from 0) of
23374 the least significant set bit in MASK. */
23375
23376 inline static int
23377 number_of_first_bit_set (unsigned mask)
23378 {
23379 return ctz_hwi (mask);
23380 }
23381
23382 /* Like emit_multi_reg_push, but allowing for a different set of
23383 registers to be described as saved. MASK is the set of registers
23384 to be saved; REAL_REGS is the set of registers to be described as
23385 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23386
23387 static rtx_insn *
23388 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23389 {
23390 unsigned long regno;
23391 rtx par[10], tmp, reg;
23392 rtx_insn *insn;
23393 int i, j;
23394
23395 /* Build the parallel of the registers actually being stored. */
23396 for (i = 0; mask; ++i, mask &= mask - 1)
23397 {
23398 regno = ctz_hwi (mask);
23399 reg = gen_rtx_REG (SImode, regno);
23400
23401 if (i == 0)
23402 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23403 else
23404 tmp = gen_rtx_USE (VOIDmode, reg);
23405
23406 par[i] = tmp;
23407 }
23408
23409 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23410 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23411 tmp = gen_frame_mem (BLKmode, tmp);
23412 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
23413 par[0] = tmp;
23414
23415 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23416 insn = emit_insn (tmp);
23417
23418 /* Always build the stack adjustment note for unwind info. */
23419 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23420 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
23421 par[0] = tmp;
23422
23423 /* Build the parallel of the registers recorded as saved for unwind. */
23424 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23425 {
23426 regno = ctz_hwi (real_regs);
23427 reg = gen_rtx_REG (SImode, regno);
23428
23429 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23430 tmp = gen_frame_mem (SImode, tmp);
23431 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
23432 RTX_FRAME_RELATED_P (tmp) = 1;
23433 par[j + 1] = tmp;
23434 }
23435
23436 if (j == 0)
23437 tmp = par[0];
23438 else
23439 {
23440 RTX_FRAME_RELATED_P (par[0]) = 1;
23441 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23442 }
23443
23444 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23445
23446 return insn;
23447 }
23448
23449 /* Emit code to push or pop registers to or from the stack. F is the
23450 assembly file. MASK is the registers to pop. */
23451 static void
23452 thumb_pop (FILE *f, unsigned long mask)
23453 {
23454 int regno;
23455 int lo_mask = mask & 0xFF;
23456 int pushed_words = 0;
23457
23458 gcc_assert (mask);
23459
23460 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23461 {
23462 /* Special case. Do not generate a POP PC statement here, do it in
23463 thumb_exit() */
23464 thumb_exit (f, -1);
23465 return;
23466 }
23467
23468 fprintf (f, "\tpop\t{");
23469
23470 /* Look at the low registers first. */
23471 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23472 {
23473 if (lo_mask & 1)
23474 {
23475 asm_fprintf (f, "%r", regno);
23476
23477 if ((lo_mask & ~1) != 0)
23478 fprintf (f, ", ");
23479
23480 pushed_words++;
23481 }
23482 }
23483
23484 if (mask & (1 << PC_REGNUM))
23485 {
23486 /* Catch popping the PC. */
23487 if (TARGET_INTERWORK || TARGET_BACKTRACE
23488 || crtl->calls_eh_return)
23489 {
23490 /* The PC is never poped directly, instead
23491 it is popped into r3 and then BX is used. */
23492 fprintf (f, "}\n");
23493
23494 thumb_exit (f, -1);
23495
23496 return;
23497 }
23498 else
23499 {
23500 if (mask & 0xFF)
23501 fprintf (f, ", ");
23502
23503 asm_fprintf (f, "%r", PC_REGNUM);
23504 }
23505 }
23506
23507 fprintf (f, "}\n");
23508 }
23509
23510 /* Generate code to return from a thumb function.
23511 If 'reg_containing_return_addr' is -1, then the return address is
23512 actually on the stack, at the stack pointer. */
23513 static void
23514 thumb_exit (FILE *f, int reg_containing_return_addr)
23515 {
23516 unsigned regs_available_for_popping;
23517 unsigned regs_to_pop;
23518 int pops_needed;
23519 unsigned available;
23520 unsigned required;
23521 machine_mode mode;
23522 int size;
23523 int restore_a4 = FALSE;
23524
23525 /* Compute the registers we need to pop. */
23526 regs_to_pop = 0;
23527 pops_needed = 0;
23528
23529 if (reg_containing_return_addr == -1)
23530 {
23531 regs_to_pop |= 1 << LR_REGNUM;
23532 ++pops_needed;
23533 }
23534
23535 if (TARGET_BACKTRACE)
23536 {
23537 /* Restore the (ARM) frame pointer and stack pointer. */
23538 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23539 pops_needed += 2;
23540 }
23541
23542 /* If there is nothing to pop then just emit the BX instruction and
23543 return. */
23544 if (pops_needed == 0)
23545 {
23546 if (crtl->calls_eh_return)
23547 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23548
23549 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23550 return;
23551 }
23552 /* Otherwise if we are not supporting interworking and we have not created
23553 a backtrace structure and the function was not entered in ARM mode then
23554 just pop the return address straight into the PC. */
23555 else if (!TARGET_INTERWORK
23556 && !TARGET_BACKTRACE
23557 && !is_called_in_ARM_mode (current_function_decl)
23558 && !crtl->calls_eh_return)
23559 {
23560 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23561 return;
23562 }
23563
23564 /* Find out how many of the (return) argument registers we can corrupt. */
23565 regs_available_for_popping = 0;
23566
23567 /* If returning via __builtin_eh_return, the bottom three registers
23568 all contain information needed for the return. */
23569 if (crtl->calls_eh_return)
23570 size = 12;
23571 else
23572 {
23573 /* If we can deduce the registers used from the function's
23574 return value. This is more reliable that examining
23575 df_regs_ever_live_p () because that will be set if the register is
23576 ever used in the function, not just if the register is used
23577 to hold a return value. */
23578
23579 if (crtl->return_rtx != 0)
23580 mode = GET_MODE (crtl->return_rtx);
23581 else
23582 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23583
23584 size = GET_MODE_SIZE (mode);
23585
23586 if (size == 0)
23587 {
23588 /* In a void function we can use any argument register.
23589 In a function that returns a structure on the stack
23590 we can use the second and third argument registers. */
23591 if (mode == VOIDmode)
23592 regs_available_for_popping =
23593 (1 << ARG_REGISTER (1))
23594 | (1 << ARG_REGISTER (2))
23595 | (1 << ARG_REGISTER (3));
23596 else
23597 regs_available_for_popping =
23598 (1 << ARG_REGISTER (2))
23599 | (1 << ARG_REGISTER (3));
23600 }
23601 else if (size <= 4)
23602 regs_available_for_popping =
23603 (1 << ARG_REGISTER (2))
23604 | (1 << ARG_REGISTER (3));
23605 else if (size <= 8)
23606 regs_available_for_popping =
23607 (1 << ARG_REGISTER (3));
23608 }
23609
23610 /* Match registers to be popped with registers into which we pop them. */
23611 for (available = regs_available_for_popping,
23612 required = regs_to_pop;
23613 required != 0 && available != 0;
23614 available &= ~(available & - available),
23615 required &= ~(required & - required))
23616 -- pops_needed;
23617
23618 /* If we have any popping registers left over, remove them. */
23619 if (available > 0)
23620 regs_available_for_popping &= ~available;
23621
23622 /* Otherwise if we need another popping register we can use
23623 the fourth argument register. */
23624 else if (pops_needed)
23625 {
23626 /* If we have not found any free argument registers and
23627 reg a4 contains the return address, we must move it. */
23628 if (regs_available_for_popping == 0
23629 && reg_containing_return_addr == LAST_ARG_REGNUM)
23630 {
23631 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23632 reg_containing_return_addr = LR_REGNUM;
23633 }
23634 else if (size > 12)
23635 {
23636 /* Register a4 is being used to hold part of the return value,
23637 but we have dire need of a free, low register. */
23638 restore_a4 = TRUE;
23639
23640 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23641 }
23642
23643 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23644 {
23645 /* The fourth argument register is available. */
23646 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23647
23648 --pops_needed;
23649 }
23650 }
23651
23652 /* Pop as many registers as we can. */
23653 thumb_pop (f, regs_available_for_popping);
23654
23655 /* Process the registers we popped. */
23656 if (reg_containing_return_addr == -1)
23657 {
23658 /* The return address was popped into the lowest numbered register. */
23659 regs_to_pop &= ~(1 << LR_REGNUM);
23660
23661 reg_containing_return_addr =
23662 number_of_first_bit_set (regs_available_for_popping);
23663
23664 /* Remove this register for the mask of available registers, so that
23665 the return address will not be corrupted by further pops. */
23666 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23667 }
23668
23669 /* If we popped other registers then handle them here. */
23670 if (regs_available_for_popping)
23671 {
23672 int frame_pointer;
23673
23674 /* Work out which register currently contains the frame pointer. */
23675 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23676
23677 /* Move it into the correct place. */
23678 asm_fprintf (f, "\tmov\t%r, %r\n",
23679 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23680
23681 /* (Temporarily) remove it from the mask of popped registers. */
23682 regs_available_for_popping &= ~(1 << frame_pointer);
23683 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23684
23685 if (regs_available_for_popping)
23686 {
23687 int stack_pointer;
23688
23689 /* We popped the stack pointer as well,
23690 find the register that contains it. */
23691 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23692
23693 /* Move it into the stack register. */
23694 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23695
23696 /* At this point we have popped all necessary registers, so
23697 do not worry about restoring regs_available_for_popping
23698 to its correct value:
23699
23700 assert (pops_needed == 0)
23701 assert (regs_available_for_popping == (1 << frame_pointer))
23702 assert (regs_to_pop == (1 << STACK_POINTER)) */
23703 }
23704 else
23705 {
23706 /* Since we have just move the popped value into the frame
23707 pointer, the popping register is available for reuse, and
23708 we know that we still have the stack pointer left to pop. */
23709 regs_available_for_popping |= (1 << frame_pointer);
23710 }
23711 }
23712
23713 /* If we still have registers left on the stack, but we no longer have
23714 any registers into which we can pop them, then we must move the return
23715 address into the link register and make available the register that
23716 contained it. */
23717 if (regs_available_for_popping == 0 && pops_needed > 0)
23718 {
23719 regs_available_for_popping |= 1 << reg_containing_return_addr;
23720
23721 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23722 reg_containing_return_addr);
23723
23724 reg_containing_return_addr = LR_REGNUM;
23725 }
23726
23727 /* If we have registers left on the stack then pop some more.
23728 We know that at most we will want to pop FP and SP. */
23729 if (pops_needed > 0)
23730 {
23731 int popped_into;
23732 int move_to;
23733
23734 thumb_pop (f, regs_available_for_popping);
23735
23736 /* We have popped either FP or SP.
23737 Move whichever one it is into the correct register. */
23738 popped_into = number_of_first_bit_set (regs_available_for_popping);
23739 move_to = number_of_first_bit_set (regs_to_pop);
23740
23741 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23742
23743 regs_to_pop &= ~(1 << move_to);
23744
23745 --pops_needed;
23746 }
23747
23748 /* If we still have not popped everything then we must have only
23749 had one register available to us and we are now popping the SP. */
23750 if (pops_needed > 0)
23751 {
23752 int popped_into;
23753
23754 thumb_pop (f, regs_available_for_popping);
23755
23756 popped_into = number_of_first_bit_set (regs_available_for_popping);
23757
23758 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23759 /*
23760 assert (regs_to_pop == (1 << STACK_POINTER))
23761 assert (pops_needed == 1)
23762 */
23763 }
23764
23765 /* If necessary restore the a4 register. */
23766 if (restore_a4)
23767 {
23768 if (reg_containing_return_addr != LR_REGNUM)
23769 {
23770 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23771 reg_containing_return_addr = LR_REGNUM;
23772 }
23773
23774 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23775 }
23776
23777 if (crtl->calls_eh_return)
23778 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23779
23780 /* Return to caller. */
23781 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23782 }
23783 \f
23784 /* Scan INSN just before assembler is output for it.
23785 For Thumb-1, we track the status of the condition codes; this
23786 information is used in the cbranchsi4_insn pattern. */
23787 void
23788 thumb1_final_prescan_insn (rtx_insn *insn)
23789 {
23790 if (flag_print_asm_name)
23791 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23792 INSN_ADDRESSES (INSN_UID (insn)));
23793 /* Don't overwrite the previous setter when we get to a cbranch. */
23794 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23795 {
23796 enum attr_conds conds;
23797
23798 if (cfun->machine->thumb1_cc_insn)
23799 {
23800 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23801 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23802 CC_STATUS_INIT;
23803 }
23804 conds = get_attr_conds (insn);
23805 if (conds == CONDS_SET)
23806 {
23807 rtx set = single_set (insn);
23808 cfun->machine->thumb1_cc_insn = insn;
23809 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23810 cfun->machine->thumb1_cc_op1 = const0_rtx;
23811 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23812 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23813 {
23814 rtx src1 = XEXP (SET_SRC (set), 1);
23815 if (src1 == const0_rtx)
23816 cfun->machine->thumb1_cc_mode = CCmode;
23817 }
23818 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23819 {
23820 /* Record the src register operand instead of dest because
23821 cprop_hardreg pass propagates src. */
23822 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23823 }
23824 }
23825 else if (conds != CONDS_NOCOND)
23826 cfun->machine->thumb1_cc_insn = NULL_RTX;
23827 }
23828
23829 /* Check if unexpected far jump is used. */
23830 if (cfun->machine->lr_save_eliminated
23831 && get_attr_far_jump (insn) == FAR_JUMP_YES)
23832 internal_error("Unexpected thumb1 far jump");
23833 }
23834
23835 int
23836 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23837 {
23838 unsigned HOST_WIDE_INT mask = 0xff;
23839 int i;
23840
23841 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23842 if (val == 0) /* XXX */
23843 return 0;
23844
23845 for (i = 0; i < 25; i++)
23846 if ((val & (mask << i)) == val)
23847 return 1;
23848
23849 return 0;
23850 }
23851
23852 /* Returns nonzero if the current function contains,
23853 or might contain a far jump. */
23854 static int
23855 thumb_far_jump_used_p (void)
23856 {
23857 rtx_insn *insn;
23858 bool far_jump = false;
23859 unsigned int func_size = 0;
23860
23861 /* This test is only important for leaf functions. */
23862 /* assert (!leaf_function_p ()); */
23863
23864 /* If we have already decided that far jumps may be used,
23865 do not bother checking again, and always return true even if
23866 it turns out that they are not being used. Once we have made
23867 the decision that far jumps are present (and that hence the link
23868 register will be pushed onto the stack) we cannot go back on it. */
23869 if (cfun->machine->far_jump_used)
23870 return 1;
23871
23872 /* If this function is not being called from the prologue/epilogue
23873 generation code then it must be being called from the
23874 INITIAL_ELIMINATION_OFFSET macro. */
23875 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23876 {
23877 /* In this case we know that we are being asked about the elimination
23878 of the arg pointer register. If that register is not being used,
23879 then there are no arguments on the stack, and we do not have to
23880 worry that a far jump might force the prologue to push the link
23881 register, changing the stack offsets. In this case we can just
23882 return false, since the presence of far jumps in the function will
23883 not affect stack offsets.
23884
23885 If the arg pointer is live (or if it was live, but has now been
23886 eliminated and so set to dead) then we do have to test to see if
23887 the function might contain a far jump. This test can lead to some
23888 false negatives, since before reload is completed, then length of
23889 branch instructions is not known, so gcc defaults to returning their
23890 longest length, which in turn sets the far jump attribute to true.
23891
23892 A false negative will not result in bad code being generated, but it
23893 will result in a needless push and pop of the link register. We
23894 hope that this does not occur too often.
23895
23896 If we need doubleword stack alignment this could affect the other
23897 elimination offsets so we can't risk getting it wrong. */
23898 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23899 cfun->machine->arg_pointer_live = 1;
23900 else if (!cfun->machine->arg_pointer_live)
23901 return 0;
23902 }
23903
23904 /* We should not change far_jump_used during or after reload, as there is
23905 no chance to change stack frame layout. */
23906 if (reload_in_progress || reload_completed)
23907 return 0;
23908
23909 /* Check to see if the function contains a branch
23910 insn with the far jump attribute set. */
23911 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23912 {
23913 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
23914 {
23915 far_jump = true;
23916 }
23917 func_size += get_attr_length (insn);
23918 }
23919
23920 /* Attribute far_jump will always be true for thumb1 before
23921 shorten_branch pass. So checking far_jump attribute before
23922 shorten_branch isn't much useful.
23923
23924 Following heuristic tries to estimate more accurately if a far jump
23925 may finally be used. The heuristic is very conservative as there is
23926 no chance to roll-back the decision of not to use far jump.
23927
23928 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23929 2-byte insn is associated with a 4 byte constant pool. Using
23930 function size 2048/3 as the threshold is conservative enough. */
23931 if (far_jump)
23932 {
23933 if ((func_size * 3) >= 2048)
23934 {
23935 /* Record the fact that we have decided that
23936 the function does use far jumps. */
23937 cfun->machine->far_jump_used = 1;
23938 return 1;
23939 }
23940 }
23941
23942 return 0;
23943 }
23944
23945 /* Return nonzero if FUNC must be entered in ARM mode. */
23946 int
23947 is_called_in_ARM_mode (tree func)
23948 {
23949 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23950
23951 /* Ignore the problem about functions whose address is taken. */
23952 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23953 return TRUE;
23954
23955 #ifdef ARM_PE
23956 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23957 #else
23958 return FALSE;
23959 #endif
23960 }
23961
23962 /* Given the stack offsets and register mask in OFFSETS, decide how
23963 many additional registers to push instead of subtracting a constant
23964 from SP. For epilogues the principle is the same except we use pop.
23965 FOR_PROLOGUE indicates which we're generating. */
23966 static int
23967 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23968 {
23969 HOST_WIDE_INT amount;
23970 unsigned long live_regs_mask = offsets->saved_regs_mask;
23971 /* Extract a mask of the ones we can give to the Thumb's push/pop
23972 instruction. */
23973 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23974 /* Then count how many other high registers will need to be pushed. */
23975 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23976 int n_free, reg_base, size;
23977
23978 if (!for_prologue && frame_pointer_needed)
23979 amount = offsets->locals_base - offsets->saved_regs;
23980 else
23981 amount = offsets->outgoing_args - offsets->saved_regs;
23982
23983 /* If the stack frame size is 512 exactly, we can save one load
23984 instruction, which should make this a win even when optimizing
23985 for speed. */
23986 if (!optimize_size && amount != 512)
23987 return 0;
23988
23989 /* Can't do this if there are high registers to push. */
23990 if (high_regs_pushed != 0)
23991 return 0;
23992
23993 /* Shouldn't do it in the prologue if no registers would normally
23994 be pushed at all. In the epilogue, also allow it if we'll have
23995 a pop insn for the PC. */
23996 if (l_mask == 0
23997 && (for_prologue
23998 || TARGET_BACKTRACE
23999 || (live_regs_mask & 1 << LR_REGNUM) == 0
24000 || TARGET_INTERWORK
24001 || crtl->args.pretend_args_size != 0))
24002 return 0;
24003
24004 /* Don't do this if thumb_expand_prologue wants to emit instructions
24005 between the push and the stack frame allocation. */
24006 if (for_prologue
24007 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24008 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24009 return 0;
24010
24011 reg_base = 0;
24012 n_free = 0;
24013 if (!for_prologue)
24014 {
24015 size = arm_size_return_regs ();
24016 reg_base = ARM_NUM_INTS (size);
24017 live_regs_mask >>= reg_base;
24018 }
24019
24020 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24021 && (for_prologue || call_used_regs[reg_base + n_free]))
24022 {
24023 live_regs_mask >>= 1;
24024 n_free++;
24025 }
24026
24027 if (n_free == 0)
24028 return 0;
24029 gcc_assert (amount / 4 * 4 == amount);
24030
24031 if (amount >= 512 && (amount - n_free * 4) < 512)
24032 return (amount - 508) / 4;
24033 if (amount <= n_free * 4)
24034 return amount / 4;
24035 return 0;
24036 }
24037
24038 /* The bits which aren't usefully expanded as rtl. */
24039 const char *
24040 thumb1_unexpanded_epilogue (void)
24041 {
24042 arm_stack_offsets *offsets;
24043 int regno;
24044 unsigned long live_regs_mask = 0;
24045 int high_regs_pushed = 0;
24046 int extra_pop;
24047 int had_to_push_lr;
24048 int size;
24049
24050 if (cfun->machine->return_used_this_function != 0)
24051 return "";
24052
24053 if (IS_NAKED (arm_current_func_type ()))
24054 return "";
24055
24056 offsets = arm_get_frame_offsets ();
24057 live_regs_mask = offsets->saved_regs_mask;
24058 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24059
24060 /* If we can deduce the registers used from the function's return value.
24061 This is more reliable that examining df_regs_ever_live_p () because that
24062 will be set if the register is ever used in the function, not just if
24063 the register is used to hold a return value. */
24064 size = arm_size_return_regs ();
24065
24066 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24067 if (extra_pop > 0)
24068 {
24069 unsigned long extra_mask = (1 << extra_pop) - 1;
24070 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24071 }
24072
24073 /* The prolog may have pushed some high registers to use as
24074 work registers. e.g. the testsuite file:
24075 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24076 compiles to produce:
24077 push {r4, r5, r6, r7, lr}
24078 mov r7, r9
24079 mov r6, r8
24080 push {r6, r7}
24081 as part of the prolog. We have to undo that pushing here. */
24082
24083 if (high_regs_pushed)
24084 {
24085 unsigned long mask = live_regs_mask & 0xff;
24086 int next_hi_reg;
24087
24088 /* The available low registers depend on the size of the value we are
24089 returning. */
24090 if (size <= 12)
24091 mask |= 1 << 3;
24092 if (size <= 8)
24093 mask |= 1 << 2;
24094
24095 if (mask == 0)
24096 /* Oh dear! We have no low registers into which we can pop
24097 high registers! */
24098 internal_error
24099 ("no low registers available for popping high registers");
24100
24101 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24102 if (live_regs_mask & (1 << next_hi_reg))
24103 break;
24104
24105 while (high_regs_pushed)
24106 {
24107 /* Find lo register(s) into which the high register(s) can
24108 be popped. */
24109 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24110 {
24111 if (mask & (1 << regno))
24112 high_regs_pushed--;
24113 if (high_regs_pushed == 0)
24114 break;
24115 }
24116
24117 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24118
24119 /* Pop the values into the low register(s). */
24120 thumb_pop (asm_out_file, mask);
24121
24122 /* Move the value(s) into the high registers. */
24123 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24124 {
24125 if (mask & (1 << regno))
24126 {
24127 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24128 regno);
24129
24130 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24131 if (live_regs_mask & (1 << next_hi_reg))
24132 break;
24133 }
24134 }
24135 }
24136 live_regs_mask &= ~0x0f00;
24137 }
24138
24139 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24140 live_regs_mask &= 0xff;
24141
24142 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24143 {
24144 /* Pop the return address into the PC. */
24145 if (had_to_push_lr)
24146 live_regs_mask |= 1 << PC_REGNUM;
24147
24148 /* Either no argument registers were pushed or a backtrace
24149 structure was created which includes an adjusted stack
24150 pointer, so just pop everything. */
24151 if (live_regs_mask)
24152 thumb_pop (asm_out_file, live_regs_mask);
24153
24154 /* We have either just popped the return address into the
24155 PC or it is was kept in LR for the entire function.
24156 Note that thumb_pop has already called thumb_exit if the
24157 PC was in the list. */
24158 if (!had_to_push_lr)
24159 thumb_exit (asm_out_file, LR_REGNUM);
24160 }
24161 else
24162 {
24163 /* Pop everything but the return address. */
24164 if (live_regs_mask)
24165 thumb_pop (asm_out_file, live_regs_mask);
24166
24167 if (had_to_push_lr)
24168 {
24169 if (size > 12)
24170 {
24171 /* We have no free low regs, so save one. */
24172 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24173 LAST_ARG_REGNUM);
24174 }
24175
24176 /* Get the return address into a temporary register. */
24177 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24178
24179 if (size > 12)
24180 {
24181 /* Move the return address to lr. */
24182 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24183 LAST_ARG_REGNUM);
24184 /* Restore the low register. */
24185 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24186 IP_REGNUM);
24187 regno = LR_REGNUM;
24188 }
24189 else
24190 regno = LAST_ARG_REGNUM;
24191 }
24192 else
24193 regno = LR_REGNUM;
24194
24195 /* Remove the argument registers that were pushed onto the stack. */
24196 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24197 SP_REGNUM, SP_REGNUM,
24198 crtl->args.pretend_args_size);
24199
24200 thumb_exit (asm_out_file, regno);
24201 }
24202
24203 return "";
24204 }
24205
24206 /* Functions to save and restore machine-specific function data. */
24207 static struct machine_function *
24208 arm_init_machine_status (void)
24209 {
24210 struct machine_function *machine;
24211 machine = ggc_cleared_alloc<machine_function> ();
24212
24213 #if ARM_FT_UNKNOWN != 0
24214 machine->func_type = ARM_FT_UNKNOWN;
24215 #endif
24216 return machine;
24217 }
24218
24219 /* Return an RTX indicating where the return address to the
24220 calling function can be found. */
24221 rtx
24222 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24223 {
24224 if (count != 0)
24225 return NULL_RTX;
24226
24227 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24228 }
24229
24230 /* Do anything needed before RTL is emitted for each function. */
24231 void
24232 arm_init_expanders (void)
24233 {
24234 /* Arrange to initialize and mark the machine per-function status. */
24235 init_machine_status = arm_init_machine_status;
24236
24237 /* This is to stop the combine pass optimizing away the alignment
24238 adjustment of va_arg. */
24239 /* ??? It is claimed that this should not be necessary. */
24240 if (cfun)
24241 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24242 }
24243
24244
24245 /* Like arm_compute_initial_elimination offset. Simpler because there
24246 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24247 to point at the base of the local variables after static stack
24248 space for a function has been allocated. */
24249
24250 HOST_WIDE_INT
24251 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24252 {
24253 arm_stack_offsets *offsets;
24254
24255 offsets = arm_get_frame_offsets ();
24256
24257 switch (from)
24258 {
24259 case ARG_POINTER_REGNUM:
24260 switch (to)
24261 {
24262 case STACK_POINTER_REGNUM:
24263 return offsets->outgoing_args - offsets->saved_args;
24264
24265 case FRAME_POINTER_REGNUM:
24266 return offsets->soft_frame - offsets->saved_args;
24267
24268 case ARM_HARD_FRAME_POINTER_REGNUM:
24269 return offsets->saved_regs - offsets->saved_args;
24270
24271 case THUMB_HARD_FRAME_POINTER_REGNUM:
24272 return offsets->locals_base - offsets->saved_args;
24273
24274 default:
24275 gcc_unreachable ();
24276 }
24277 break;
24278
24279 case FRAME_POINTER_REGNUM:
24280 switch (to)
24281 {
24282 case STACK_POINTER_REGNUM:
24283 return offsets->outgoing_args - offsets->soft_frame;
24284
24285 case ARM_HARD_FRAME_POINTER_REGNUM:
24286 return offsets->saved_regs - offsets->soft_frame;
24287
24288 case THUMB_HARD_FRAME_POINTER_REGNUM:
24289 return offsets->locals_base - offsets->soft_frame;
24290
24291 default:
24292 gcc_unreachable ();
24293 }
24294 break;
24295
24296 default:
24297 gcc_unreachable ();
24298 }
24299 }
24300
24301 /* Generate the function's prologue. */
24302
24303 void
24304 thumb1_expand_prologue (void)
24305 {
24306 rtx_insn *insn;
24307
24308 HOST_WIDE_INT amount;
24309 arm_stack_offsets *offsets;
24310 unsigned long func_type;
24311 int regno;
24312 unsigned long live_regs_mask;
24313 unsigned long l_mask;
24314 unsigned high_regs_pushed = 0;
24315
24316 func_type = arm_current_func_type ();
24317
24318 /* Naked functions don't have prologues. */
24319 if (IS_NAKED (func_type))
24320 return;
24321
24322 if (IS_INTERRUPT (func_type))
24323 {
24324 error ("interrupt Service Routines cannot be coded in Thumb mode");
24325 return;
24326 }
24327
24328 if (is_called_in_ARM_mode (current_function_decl))
24329 emit_insn (gen_prologue_thumb1_interwork ());
24330
24331 offsets = arm_get_frame_offsets ();
24332 live_regs_mask = offsets->saved_regs_mask;
24333
24334 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24335 l_mask = live_regs_mask & 0x40ff;
24336 /* Then count how many other high registers will need to be pushed. */
24337 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24338
24339 if (crtl->args.pretend_args_size)
24340 {
24341 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24342
24343 if (cfun->machine->uses_anonymous_args)
24344 {
24345 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24346 unsigned long mask;
24347
24348 mask = 1ul << (LAST_ARG_REGNUM + 1);
24349 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24350
24351 insn = thumb1_emit_multi_reg_push (mask, 0);
24352 }
24353 else
24354 {
24355 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24356 stack_pointer_rtx, x));
24357 }
24358 RTX_FRAME_RELATED_P (insn) = 1;
24359 }
24360
24361 if (TARGET_BACKTRACE)
24362 {
24363 HOST_WIDE_INT offset = 0;
24364 unsigned work_register;
24365 rtx work_reg, x, arm_hfp_rtx;
24366
24367 /* We have been asked to create a stack backtrace structure.
24368 The code looks like this:
24369
24370 0 .align 2
24371 0 func:
24372 0 sub SP, #16 Reserve space for 4 registers.
24373 2 push {R7} Push low registers.
24374 4 add R7, SP, #20 Get the stack pointer before the push.
24375 6 str R7, [SP, #8] Store the stack pointer
24376 (before reserving the space).
24377 8 mov R7, PC Get hold of the start of this code + 12.
24378 10 str R7, [SP, #16] Store it.
24379 12 mov R7, FP Get hold of the current frame pointer.
24380 14 str R7, [SP, #4] Store it.
24381 16 mov R7, LR Get hold of the current return address.
24382 18 str R7, [SP, #12] Store it.
24383 20 add R7, SP, #16 Point at the start of the
24384 backtrace structure.
24385 22 mov FP, R7 Put this value into the frame pointer. */
24386
24387 work_register = thumb_find_work_register (live_regs_mask);
24388 work_reg = gen_rtx_REG (SImode, work_register);
24389 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24390
24391 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24392 stack_pointer_rtx, GEN_INT (-16)));
24393 RTX_FRAME_RELATED_P (insn) = 1;
24394
24395 if (l_mask)
24396 {
24397 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24398 RTX_FRAME_RELATED_P (insn) = 1;
24399
24400 offset = bit_count (l_mask) * UNITS_PER_WORD;
24401 }
24402
24403 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24404 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24405
24406 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24407 x = gen_frame_mem (SImode, x);
24408 emit_move_insn (x, work_reg);
24409
24410 /* Make sure that the instruction fetching the PC is in the right place
24411 to calculate "start of backtrace creation code + 12". */
24412 /* ??? The stores using the common WORK_REG ought to be enough to
24413 prevent the scheduler from doing anything weird. Failing that
24414 we could always move all of the following into an UNSPEC_VOLATILE. */
24415 if (l_mask)
24416 {
24417 x = gen_rtx_REG (SImode, PC_REGNUM);
24418 emit_move_insn (work_reg, x);
24419
24420 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24421 x = gen_frame_mem (SImode, x);
24422 emit_move_insn (x, work_reg);
24423
24424 emit_move_insn (work_reg, arm_hfp_rtx);
24425
24426 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24427 x = gen_frame_mem (SImode, x);
24428 emit_move_insn (x, work_reg);
24429 }
24430 else
24431 {
24432 emit_move_insn (work_reg, arm_hfp_rtx);
24433
24434 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24435 x = gen_frame_mem (SImode, x);
24436 emit_move_insn (x, work_reg);
24437
24438 x = gen_rtx_REG (SImode, PC_REGNUM);
24439 emit_move_insn (work_reg, x);
24440
24441 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24442 x = gen_frame_mem (SImode, x);
24443 emit_move_insn (x, work_reg);
24444 }
24445
24446 x = gen_rtx_REG (SImode, LR_REGNUM);
24447 emit_move_insn (work_reg, x);
24448
24449 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24450 x = gen_frame_mem (SImode, x);
24451 emit_move_insn (x, work_reg);
24452
24453 x = GEN_INT (offset + 12);
24454 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24455
24456 emit_move_insn (arm_hfp_rtx, work_reg);
24457 }
24458 /* Optimization: If we are not pushing any low registers but we are going
24459 to push some high registers then delay our first push. This will just
24460 be a push of LR and we can combine it with the push of the first high
24461 register. */
24462 else if ((l_mask & 0xff) != 0
24463 || (high_regs_pushed == 0 && l_mask))
24464 {
24465 unsigned long mask = l_mask;
24466 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24467 insn = thumb1_emit_multi_reg_push (mask, mask);
24468 RTX_FRAME_RELATED_P (insn) = 1;
24469 }
24470
24471 if (high_regs_pushed)
24472 {
24473 unsigned pushable_regs;
24474 unsigned next_hi_reg;
24475 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24476 : crtl->args.info.nregs;
24477 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24478
24479 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24480 if (live_regs_mask & (1 << next_hi_reg))
24481 break;
24482
24483 /* Here we need to mask out registers used for passing arguments
24484 even if they can be pushed. This is to avoid using them to stash the high
24485 registers. Such kind of stash may clobber the use of arguments. */
24486 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24487
24488 if (pushable_regs == 0)
24489 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24490
24491 while (high_regs_pushed > 0)
24492 {
24493 unsigned long real_regs_mask = 0;
24494
24495 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24496 {
24497 if (pushable_regs & (1 << regno))
24498 {
24499 emit_move_insn (gen_rtx_REG (SImode, regno),
24500 gen_rtx_REG (SImode, next_hi_reg));
24501
24502 high_regs_pushed --;
24503 real_regs_mask |= (1 << next_hi_reg);
24504
24505 if (high_regs_pushed)
24506 {
24507 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24508 next_hi_reg --)
24509 if (live_regs_mask & (1 << next_hi_reg))
24510 break;
24511 }
24512 else
24513 {
24514 pushable_regs &= ~((1 << regno) - 1);
24515 break;
24516 }
24517 }
24518 }
24519
24520 /* If we had to find a work register and we have not yet
24521 saved the LR then add it to the list of regs to push. */
24522 if (l_mask == (1 << LR_REGNUM))
24523 {
24524 pushable_regs |= l_mask;
24525 real_regs_mask |= l_mask;
24526 l_mask = 0;
24527 }
24528
24529 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24530 RTX_FRAME_RELATED_P (insn) = 1;
24531 }
24532 }
24533
24534 /* Load the pic register before setting the frame pointer,
24535 so we can use r7 as a temporary work register. */
24536 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24537 arm_load_pic_register (live_regs_mask);
24538
24539 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24540 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24541 stack_pointer_rtx);
24542
24543 if (flag_stack_usage_info)
24544 current_function_static_stack_size
24545 = offsets->outgoing_args - offsets->saved_args;
24546
24547 amount = offsets->outgoing_args - offsets->saved_regs;
24548 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24549 if (amount)
24550 {
24551 if (amount < 512)
24552 {
24553 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24554 GEN_INT (- amount)));
24555 RTX_FRAME_RELATED_P (insn) = 1;
24556 }
24557 else
24558 {
24559 rtx reg, dwarf;
24560
24561 /* The stack decrement is too big for an immediate value in a single
24562 insn. In theory we could issue multiple subtracts, but after
24563 three of them it becomes more space efficient to place the full
24564 value in the constant pool and load into a register. (Also the
24565 ARM debugger really likes to see only one stack decrement per
24566 function). So instead we look for a scratch register into which
24567 we can load the decrement, and then we subtract this from the
24568 stack pointer. Unfortunately on the thumb the only available
24569 scratch registers are the argument registers, and we cannot use
24570 these as they may hold arguments to the function. Instead we
24571 attempt to locate a call preserved register which is used by this
24572 function. If we can find one, then we know that it will have
24573 been pushed at the start of the prologue and so we can corrupt
24574 it now. */
24575 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24576 if (live_regs_mask & (1 << regno))
24577 break;
24578
24579 gcc_assert(regno <= LAST_LO_REGNUM);
24580
24581 reg = gen_rtx_REG (SImode, regno);
24582
24583 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24584
24585 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24586 stack_pointer_rtx, reg));
24587
24588 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24589 plus_constant (Pmode, stack_pointer_rtx,
24590 -amount));
24591 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24592 RTX_FRAME_RELATED_P (insn) = 1;
24593 }
24594 }
24595
24596 if (frame_pointer_needed)
24597 thumb_set_frame_pointer (offsets);
24598
24599 /* If we are profiling, make sure no instructions are scheduled before
24600 the call to mcount. Similarly if the user has requested no
24601 scheduling in the prolog. Similarly if we want non-call exceptions
24602 using the EABI unwinder, to prevent faulting instructions from being
24603 swapped with a stack adjustment. */
24604 if (crtl->profile || !TARGET_SCHED_PROLOG
24605 || (arm_except_unwind_info (&global_options) == UI_TARGET
24606 && cfun->can_throw_non_call_exceptions))
24607 emit_insn (gen_blockage ());
24608
24609 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24610 if (live_regs_mask & 0xff)
24611 cfun->machine->lr_save_eliminated = 0;
24612 }
24613
24614 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24615 POP instruction can be generated. LR should be replaced by PC. All
24616 the checks required are already done by USE_RETURN_INSN (). Hence,
24617 all we really need to check here is if single register is to be
24618 returned, or multiple register return. */
24619 void
24620 thumb2_expand_return (bool simple_return)
24621 {
24622 int i, num_regs;
24623 unsigned long saved_regs_mask;
24624 arm_stack_offsets *offsets;
24625
24626 offsets = arm_get_frame_offsets ();
24627 saved_regs_mask = offsets->saved_regs_mask;
24628
24629 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24630 if (saved_regs_mask & (1 << i))
24631 num_regs++;
24632
24633 if (!simple_return && saved_regs_mask)
24634 {
24635 if (num_regs == 1)
24636 {
24637 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24638 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24639 rtx addr = gen_rtx_MEM (SImode,
24640 gen_rtx_POST_INC (SImode,
24641 stack_pointer_rtx));
24642 set_mem_alias_set (addr, get_frame_alias_set ());
24643 XVECEXP (par, 0, 0) = ret_rtx;
24644 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
24645 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24646 emit_jump_insn (par);
24647 }
24648 else
24649 {
24650 saved_regs_mask &= ~ (1 << LR_REGNUM);
24651 saved_regs_mask |= (1 << PC_REGNUM);
24652 arm_emit_multi_reg_pop (saved_regs_mask);
24653 }
24654 }
24655 else
24656 {
24657 emit_jump_insn (simple_return_rtx);
24658 }
24659 }
24660
24661 void
24662 thumb1_expand_epilogue (void)
24663 {
24664 HOST_WIDE_INT amount;
24665 arm_stack_offsets *offsets;
24666 int regno;
24667
24668 /* Naked functions don't have prologues. */
24669 if (IS_NAKED (arm_current_func_type ()))
24670 return;
24671
24672 offsets = arm_get_frame_offsets ();
24673 amount = offsets->outgoing_args - offsets->saved_regs;
24674
24675 if (frame_pointer_needed)
24676 {
24677 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24678 amount = offsets->locals_base - offsets->saved_regs;
24679 }
24680 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24681
24682 gcc_assert (amount >= 0);
24683 if (amount)
24684 {
24685 emit_insn (gen_blockage ());
24686
24687 if (amount < 512)
24688 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24689 GEN_INT (amount)));
24690 else
24691 {
24692 /* r3 is always free in the epilogue. */
24693 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24694
24695 emit_insn (gen_movsi (reg, GEN_INT (amount)));
24696 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24697 }
24698 }
24699
24700 /* Emit a USE (stack_pointer_rtx), so that
24701 the stack adjustment will not be deleted. */
24702 emit_insn (gen_force_register_use (stack_pointer_rtx));
24703
24704 if (crtl->profile || !TARGET_SCHED_PROLOG)
24705 emit_insn (gen_blockage ());
24706
24707 /* Emit a clobber for each insn that will be restored in the epilogue,
24708 so that flow2 will get register lifetimes correct. */
24709 for (regno = 0; regno < 13; regno++)
24710 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24711 emit_clobber (gen_rtx_REG (SImode, regno));
24712
24713 if (! df_regs_ever_live_p (LR_REGNUM))
24714 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24715 }
24716
24717 /* Epilogue code for APCS frame. */
24718 static void
24719 arm_expand_epilogue_apcs_frame (bool really_return)
24720 {
24721 unsigned long func_type;
24722 unsigned long saved_regs_mask;
24723 int num_regs = 0;
24724 int i;
24725 int floats_from_frame = 0;
24726 arm_stack_offsets *offsets;
24727
24728 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24729 func_type = arm_current_func_type ();
24730
24731 /* Get frame offsets for ARM. */
24732 offsets = arm_get_frame_offsets ();
24733 saved_regs_mask = offsets->saved_regs_mask;
24734
24735 /* Find the offset of the floating-point save area in the frame. */
24736 floats_from_frame
24737 = (offsets->saved_args
24738 + arm_compute_static_chain_stack_bytes ()
24739 - offsets->frame);
24740
24741 /* Compute how many core registers saved and how far away the floats are. */
24742 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24743 if (saved_regs_mask & (1 << i))
24744 {
24745 num_regs++;
24746 floats_from_frame += 4;
24747 }
24748
24749 if (TARGET_HARD_FLOAT && TARGET_VFP)
24750 {
24751 int start_reg;
24752 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24753
24754 /* The offset is from IP_REGNUM. */
24755 int saved_size = arm_get_vfp_saved_size ();
24756 if (saved_size > 0)
24757 {
24758 rtx_insn *insn;
24759 floats_from_frame += saved_size;
24760 insn = emit_insn (gen_addsi3 (ip_rtx,
24761 hard_frame_pointer_rtx,
24762 GEN_INT (-floats_from_frame)));
24763 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24764 ip_rtx, hard_frame_pointer_rtx);
24765 }
24766
24767 /* Generate VFP register multi-pop. */
24768 start_reg = FIRST_VFP_REGNUM;
24769
24770 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24771 /* Look for a case where a reg does not need restoring. */
24772 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24773 && (!df_regs_ever_live_p (i + 1)
24774 || call_used_regs[i + 1]))
24775 {
24776 if (start_reg != i)
24777 arm_emit_vfp_multi_reg_pop (start_reg,
24778 (i - start_reg) / 2,
24779 gen_rtx_REG (SImode,
24780 IP_REGNUM));
24781 start_reg = i + 2;
24782 }
24783
24784 /* Restore the remaining regs that we have discovered (or possibly
24785 even all of them, if the conditional in the for loop never
24786 fired). */
24787 if (start_reg != i)
24788 arm_emit_vfp_multi_reg_pop (start_reg,
24789 (i - start_reg) / 2,
24790 gen_rtx_REG (SImode, IP_REGNUM));
24791 }
24792
24793 if (TARGET_IWMMXT)
24794 {
24795 /* The frame pointer is guaranteed to be non-double-word aligned, as
24796 it is set to double-word-aligned old_stack_pointer - 4. */
24797 rtx_insn *insn;
24798 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24799
24800 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24801 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24802 {
24803 rtx addr = gen_frame_mem (V2SImode,
24804 plus_constant (Pmode, hard_frame_pointer_rtx,
24805 - lrm_count * 4));
24806 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24807 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24808 gen_rtx_REG (V2SImode, i),
24809 NULL_RTX);
24810 lrm_count += 2;
24811 }
24812 }
24813
24814 /* saved_regs_mask should contain IP which contains old stack pointer
24815 at the time of activation creation. Since SP and IP are adjacent registers,
24816 we can restore the value directly into SP. */
24817 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24818 saved_regs_mask &= ~(1 << IP_REGNUM);
24819 saved_regs_mask |= (1 << SP_REGNUM);
24820
24821 /* There are two registers left in saved_regs_mask - LR and PC. We
24822 only need to restore LR (the return address), but to
24823 save time we can load it directly into PC, unless we need a
24824 special function exit sequence, or we are not really returning. */
24825 if (really_return
24826 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24827 && !crtl->calls_eh_return)
24828 /* Delete LR from the register mask, so that LR on
24829 the stack is loaded into the PC in the register mask. */
24830 saved_regs_mask &= ~(1 << LR_REGNUM);
24831 else
24832 saved_regs_mask &= ~(1 << PC_REGNUM);
24833
24834 num_regs = bit_count (saved_regs_mask);
24835 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24836 {
24837 rtx_insn *insn;
24838 emit_insn (gen_blockage ());
24839 /* Unwind the stack to just below the saved registers. */
24840 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24841 hard_frame_pointer_rtx,
24842 GEN_INT (- 4 * num_regs)));
24843
24844 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24845 stack_pointer_rtx, hard_frame_pointer_rtx);
24846 }
24847
24848 arm_emit_multi_reg_pop (saved_regs_mask);
24849
24850 if (IS_INTERRUPT (func_type))
24851 {
24852 /* Interrupt handlers will have pushed the
24853 IP onto the stack, so restore it now. */
24854 rtx_insn *insn;
24855 rtx addr = gen_rtx_MEM (SImode,
24856 gen_rtx_POST_INC (SImode,
24857 stack_pointer_rtx));
24858 set_mem_alias_set (addr, get_frame_alias_set ());
24859 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24860 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24861 gen_rtx_REG (SImode, IP_REGNUM),
24862 NULL_RTX);
24863 }
24864
24865 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24866 return;
24867
24868 if (crtl->calls_eh_return)
24869 emit_insn (gen_addsi3 (stack_pointer_rtx,
24870 stack_pointer_rtx,
24871 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24872
24873 if (IS_STACKALIGN (func_type))
24874 /* Restore the original stack pointer. Before prologue, the stack was
24875 realigned and the original stack pointer saved in r0. For details,
24876 see comment in arm_expand_prologue. */
24877 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
24878
24879 emit_jump_insn (simple_return_rtx);
24880 }
24881
24882 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24883 function is not a sibcall. */
24884 void
24885 arm_expand_epilogue (bool really_return)
24886 {
24887 unsigned long func_type;
24888 unsigned long saved_regs_mask;
24889 int num_regs = 0;
24890 int i;
24891 int amount;
24892 arm_stack_offsets *offsets;
24893
24894 func_type = arm_current_func_type ();
24895
24896 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24897 let output_return_instruction take care of instruction emission if any. */
24898 if (IS_NAKED (func_type)
24899 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24900 {
24901 if (really_return)
24902 emit_jump_insn (simple_return_rtx);
24903 return;
24904 }
24905
24906 /* If we are throwing an exception, then we really must be doing a
24907 return, so we can't tail-call. */
24908 gcc_assert (!crtl->calls_eh_return || really_return);
24909
24910 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24911 {
24912 arm_expand_epilogue_apcs_frame (really_return);
24913 return;
24914 }
24915
24916 /* Get frame offsets for ARM. */
24917 offsets = arm_get_frame_offsets ();
24918 saved_regs_mask = offsets->saved_regs_mask;
24919 num_regs = bit_count (saved_regs_mask);
24920
24921 if (frame_pointer_needed)
24922 {
24923 rtx_insn *insn;
24924 /* Restore stack pointer if necessary. */
24925 if (TARGET_ARM)
24926 {
24927 /* In ARM mode, frame pointer points to first saved register.
24928 Restore stack pointer to last saved register. */
24929 amount = offsets->frame - offsets->saved_regs;
24930
24931 /* Force out any pending memory operations that reference stacked data
24932 before stack de-allocation occurs. */
24933 emit_insn (gen_blockage ());
24934 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24935 hard_frame_pointer_rtx,
24936 GEN_INT (amount)));
24937 arm_add_cfa_adjust_cfa_note (insn, amount,
24938 stack_pointer_rtx,
24939 hard_frame_pointer_rtx);
24940
24941 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24942 deleted. */
24943 emit_insn (gen_force_register_use (stack_pointer_rtx));
24944 }
24945 else
24946 {
24947 /* In Thumb-2 mode, the frame pointer points to the last saved
24948 register. */
24949 amount = offsets->locals_base - offsets->saved_regs;
24950 if (amount)
24951 {
24952 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24953 hard_frame_pointer_rtx,
24954 GEN_INT (amount)));
24955 arm_add_cfa_adjust_cfa_note (insn, amount,
24956 hard_frame_pointer_rtx,
24957 hard_frame_pointer_rtx);
24958 }
24959
24960 /* Force out any pending memory operations that reference stacked data
24961 before stack de-allocation occurs. */
24962 emit_insn (gen_blockage ());
24963 insn = emit_insn (gen_movsi (stack_pointer_rtx,
24964 hard_frame_pointer_rtx));
24965 arm_add_cfa_adjust_cfa_note (insn, 0,
24966 stack_pointer_rtx,
24967 hard_frame_pointer_rtx);
24968 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24969 deleted. */
24970 emit_insn (gen_force_register_use (stack_pointer_rtx));
24971 }
24972 }
24973 else
24974 {
24975 /* Pop off outgoing args and local frame to adjust stack pointer to
24976 last saved register. */
24977 amount = offsets->outgoing_args - offsets->saved_regs;
24978 if (amount)
24979 {
24980 rtx_insn *tmp;
24981 /* Force out any pending memory operations that reference stacked data
24982 before stack de-allocation occurs. */
24983 emit_insn (gen_blockage ());
24984 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24985 stack_pointer_rtx,
24986 GEN_INT (amount)));
24987 arm_add_cfa_adjust_cfa_note (tmp, amount,
24988 stack_pointer_rtx, stack_pointer_rtx);
24989 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24990 not deleted. */
24991 emit_insn (gen_force_register_use (stack_pointer_rtx));
24992 }
24993 }
24994
24995 if (TARGET_HARD_FLOAT && TARGET_VFP)
24996 {
24997 /* Generate VFP register multi-pop. */
24998 int end_reg = LAST_VFP_REGNUM + 1;
24999
25000 /* Scan the registers in reverse order. We need to match
25001 any groupings made in the prologue and generate matching
25002 vldm operations. The need to match groups is because,
25003 unlike pop, vldm can only do consecutive regs. */
25004 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25005 /* Look for a case where a reg does not need restoring. */
25006 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25007 && (!df_regs_ever_live_p (i + 1)
25008 || call_used_regs[i + 1]))
25009 {
25010 /* Restore the regs discovered so far (from reg+2 to
25011 end_reg). */
25012 if (end_reg > i + 2)
25013 arm_emit_vfp_multi_reg_pop (i + 2,
25014 (end_reg - (i + 2)) / 2,
25015 stack_pointer_rtx);
25016 end_reg = i;
25017 }
25018
25019 /* Restore the remaining regs that we have discovered (or possibly
25020 even all of them, if the conditional in the for loop never
25021 fired). */
25022 if (end_reg > i + 2)
25023 arm_emit_vfp_multi_reg_pop (i + 2,
25024 (end_reg - (i + 2)) / 2,
25025 stack_pointer_rtx);
25026 }
25027
25028 if (TARGET_IWMMXT)
25029 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25030 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25031 {
25032 rtx_insn *insn;
25033 rtx addr = gen_rtx_MEM (V2SImode,
25034 gen_rtx_POST_INC (SImode,
25035 stack_pointer_rtx));
25036 set_mem_alias_set (addr, get_frame_alias_set ());
25037 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25038 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25039 gen_rtx_REG (V2SImode, i),
25040 NULL_RTX);
25041 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25042 stack_pointer_rtx, stack_pointer_rtx);
25043 }
25044
25045 if (saved_regs_mask)
25046 {
25047 rtx insn;
25048 bool return_in_pc = false;
25049
25050 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25051 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25052 && !IS_STACKALIGN (func_type)
25053 && really_return
25054 && crtl->args.pretend_args_size == 0
25055 && saved_regs_mask & (1 << LR_REGNUM)
25056 && !crtl->calls_eh_return)
25057 {
25058 saved_regs_mask &= ~(1 << LR_REGNUM);
25059 saved_regs_mask |= (1 << PC_REGNUM);
25060 return_in_pc = true;
25061 }
25062
25063 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25064 {
25065 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25066 if (saved_regs_mask & (1 << i))
25067 {
25068 rtx addr = gen_rtx_MEM (SImode,
25069 gen_rtx_POST_INC (SImode,
25070 stack_pointer_rtx));
25071 set_mem_alias_set (addr, get_frame_alias_set ());
25072
25073 if (i == PC_REGNUM)
25074 {
25075 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25076 XVECEXP (insn, 0, 0) = ret_rtx;
25077 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
25078 gen_rtx_REG (SImode, i),
25079 addr);
25080 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25081 insn = emit_jump_insn (insn);
25082 }
25083 else
25084 {
25085 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25086 addr));
25087 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25088 gen_rtx_REG (SImode, i),
25089 NULL_RTX);
25090 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25091 stack_pointer_rtx,
25092 stack_pointer_rtx);
25093 }
25094 }
25095 }
25096 else
25097 {
25098 if (TARGET_LDRD
25099 && current_tune->prefer_ldrd_strd
25100 && !optimize_function_for_size_p (cfun))
25101 {
25102 if (TARGET_THUMB2)
25103 thumb2_emit_ldrd_pop (saved_regs_mask);
25104 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25105 arm_emit_ldrd_pop (saved_regs_mask);
25106 else
25107 arm_emit_multi_reg_pop (saved_regs_mask);
25108 }
25109 else
25110 arm_emit_multi_reg_pop (saved_regs_mask);
25111 }
25112
25113 if (return_in_pc == true)
25114 return;
25115 }
25116
25117 if (crtl->args.pretend_args_size)
25118 {
25119 int i, j;
25120 rtx dwarf = NULL_RTX;
25121 rtx_insn *tmp =
25122 emit_insn (gen_addsi3 (stack_pointer_rtx,
25123 stack_pointer_rtx,
25124 GEN_INT (crtl->args.pretend_args_size)));
25125
25126 RTX_FRAME_RELATED_P (tmp) = 1;
25127
25128 if (cfun->machine->uses_anonymous_args)
25129 {
25130 /* Restore pretend args. Refer arm_expand_prologue on how to save
25131 pretend_args in stack. */
25132 int num_regs = crtl->args.pretend_args_size / 4;
25133 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25134 for (j = 0, i = 0; j < num_regs; i++)
25135 if (saved_regs_mask & (1 << i))
25136 {
25137 rtx reg = gen_rtx_REG (SImode, i);
25138 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25139 j++;
25140 }
25141 REG_NOTES (tmp) = dwarf;
25142 }
25143 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25144 stack_pointer_rtx, stack_pointer_rtx);
25145 }
25146
25147 if (!really_return)
25148 return;
25149
25150 if (crtl->calls_eh_return)
25151 emit_insn (gen_addsi3 (stack_pointer_rtx,
25152 stack_pointer_rtx,
25153 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25154
25155 if (IS_STACKALIGN (func_type))
25156 /* Restore the original stack pointer. Before prologue, the stack was
25157 realigned and the original stack pointer saved in r0. For details,
25158 see comment in arm_expand_prologue. */
25159 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
25160
25161 emit_jump_insn (simple_return_rtx);
25162 }
25163
25164 /* Implementation of insn prologue_thumb1_interwork. This is the first
25165 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25166
25167 const char *
25168 thumb1_output_interwork (void)
25169 {
25170 const char * name;
25171 FILE *f = asm_out_file;
25172
25173 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25174 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25175 == SYMBOL_REF);
25176 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25177
25178 /* Generate code sequence to switch us into Thumb mode. */
25179 /* The .code 32 directive has already been emitted by
25180 ASM_DECLARE_FUNCTION_NAME. */
25181 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25182 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25183
25184 /* Generate a label, so that the debugger will notice the
25185 change in instruction sets. This label is also used by
25186 the assembler to bypass the ARM code when this function
25187 is called from a Thumb encoded function elsewhere in the
25188 same file. Hence the definition of STUB_NAME here must
25189 agree with the definition in gas/config/tc-arm.c. */
25190
25191 #define STUB_NAME ".real_start_of"
25192
25193 fprintf (f, "\t.code\t16\n");
25194 #ifdef ARM_PE
25195 if (arm_dllexport_name_p (name))
25196 name = arm_strip_name_encoding (name);
25197 #endif
25198 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25199 fprintf (f, "\t.thumb_func\n");
25200 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25201
25202 return "";
25203 }
25204
25205 /* Handle the case of a double word load into a low register from
25206 a computed memory address. The computed address may involve a
25207 register which is overwritten by the load. */
25208 const char *
25209 thumb_load_double_from_address (rtx *operands)
25210 {
25211 rtx addr;
25212 rtx base;
25213 rtx offset;
25214 rtx arg1;
25215 rtx arg2;
25216
25217 gcc_assert (REG_P (operands[0]));
25218 gcc_assert (MEM_P (operands[1]));
25219
25220 /* Get the memory address. */
25221 addr = XEXP (operands[1], 0);
25222
25223 /* Work out how the memory address is computed. */
25224 switch (GET_CODE (addr))
25225 {
25226 case REG:
25227 operands[2] = adjust_address (operands[1], SImode, 4);
25228
25229 if (REGNO (operands[0]) == REGNO (addr))
25230 {
25231 output_asm_insn ("ldr\t%H0, %2", operands);
25232 output_asm_insn ("ldr\t%0, %1", operands);
25233 }
25234 else
25235 {
25236 output_asm_insn ("ldr\t%0, %1", operands);
25237 output_asm_insn ("ldr\t%H0, %2", operands);
25238 }
25239 break;
25240
25241 case CONST:
25242 /* Compute <address> + 4 for the high order load. */
25243 operands[2] = adjust_address (operands[1], SImode, 4);
25244
25245 output_asm_insn ("ldr\t%0, %1", operands);
25246 output_asm_insn ("ldr\t%H0, %2", operands);
25247 break;
25248
25249 case PLUS:
25250 arg1 = XEXP (addr, 0);
25251 arg2 = XEXP (addr, 1);
25252
25253 if (CONSTANT_P (arg1))
25254 base = arg2, offset = arg1;
25255 else
25256 base = arg1, offset = arg2;
25257
25258 gcc_assert (REG_P (base));
25259
25260 /* Catch the case of <address> = <reg> + <reg> */
25261 if (REG_P (offset))
25262 {
25263 int reg_offset = REGNO (offset);
25264 int reg_base = REGNO (base);
25265 int reg_dest = REGNO (operands[0]);
25266
25267 /* Add the base and offset registers together into the
25268 higher destination register. */
25269 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25270 reg_dest + 1, reg_base, reg_offset);
25271
25272 /* Load the lower destination register from the address in
25273 the higher destination register. */
25274 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25275 reg_dest, reg_dest + 1);
25276
25277 /* Load the higher destination register from its own address
25278 plus 4. */
25279 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25280 reg_dest + 1, reg_dest + 1);
25281 }
25282 else
25283 {
25284 /* Compute <address> + 4 for the high order load. */
25285 operands[2] = adjust_address (operands[1], SImode, 4);
25286
25287 /* If the computed address is held in the low order register
25288 then load the high order register first, otherwise always
25289 load the low order register first. */
25290 if (REGNO (operands[0]) == REGNO (base))
25291 {
25292 output_asm_insn ("ldr\t%H0, %2", operands);
25293 output_asm_insn ("ldr\t%0, %1", operands);
25294 }
25295 else
25296 {
25297 output_asm_insn ("ldr\t%0, %1", operands);
25298 output_asm_insn ("ldr\t%H0, %2", operands);
25299 }
25300 }
25301 break;
25302
25303 case LABEL_REF:
25304 /* With no registers to worry about we can just load the value
25305 directly. */
25306 operands[2] = adjust_address (operands[1], SImode, 4);
25307
25308 output_asm_insn ("ldr\t%H0, %2", operands);
25309 output_asm_insn ("ldr\t%0, %1", operands);
25310 break;
25311
25312 default:
25313 gcc_unreachable ();
25314 }
25315
25316 return "";
25317 }
25318
25319 const char *
25320 thumb_output_move_mem_multiple (int n, rtx *operands)
25321 {
25322 rtx tmp;
25323
25324 switch (n)
25325 {
25326 case 2:
25327 if (REGNO (operands[4]) > REGNO (operands[5]))
25328 {
25329 tmp = operands[4];
25330 operands[4] = operands[5];
25331 operands[5] = tmp;
25332 }
25333 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25334 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25335 break;
25336
25337 case 3:
25338 if (REGNO (operands[4]) > REGNO (operands[5]))
25339 std::swap (operands[4], operands[5]);
25340 if (REGNO (operands[5]) > REGNO (operands[6]))
25341 std::swap (operands[5], operands[6]);
25342 if (REGNO (operands[4]) > REGNO (operands[5]))
25343 std::swap (operands[4], operands[5]);
25344
25345 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25346 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25347 break;
25348
25349 default:
25350 gcc_unreachable ();
25351 }
25352
25353 return "";
25354 }
25355
25356 /* Output a call-via instruction for thumb state. */
25357 const char *
25358 thumb_call_via_reg (rtx reg)
25359 {
25360 int regno = REGNO (reg);
25361 rtx *labelp;
25362
25363 gcc_assert (regno < LR_REGNUM);
25364
25365 /* If we are in the normal text section we can use a single instance
25366 per compilation unit. If we are doing function sections, then we need
25367 an entry per section, since we can't rely on reachability. */
25368 if (in_section == text_section)
25369 {
25370 thumb_call_reg_needed = 1;
25371
25372 if (thumb_call_via_label[regno] == NULL)
25373 thumb_call_via_label[regno] = gen_label_rtx ();
25374 labelp = thumb_call_via_label + regno;
25375 }
25376 else
25377 {
25378 if (cfun->machine->call_via[regno] == NULL)
25379 cfun->machine->call_via[regno] = gen_label_rtx ();
25380 labelp = cfun->machine->call_via + regno;
25381 }
25382
25383 output_asm_insn ("bl\t%a0", labelp);
25384 return "";
25385 }
25386
25387 /* Routines for generating rtl. */
25388 void
25389 thumb_expand_movmemqi (rtx *operands)
25390 {
25391 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25392 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25393 HOST_WIDE_INT len = INTVAL (operands[2]);
25394 HOST_WIDE_INT offset = 0;
25395
25396 while (len >= 12)
25397 {
25398 emit_insn (gen_movmem12b (out, in, out, in));
25399 len -= 12;
25400 }
25401
25402 if (len >= 8)
25403 {
25404 emit_insn (gen_movmem8b (out, in, out, in));
25405 len -= 8;
25406 }
25407
25408 if (len >= 4)
25409 {
25410 rtx reg = gen_reg_rtx (SImode);
25411 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25412 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25413 len -= 4;
25414 offset += 4;
25415 }
25416
25417 if (len >= 2)
25418 {
25419 rtx reg = gen_reg_rtx (HImode);
25420 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25421 plus_constant (Pmode, in,
25422 offset))));
25423 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25424 offset)),
25425 reg));
25426 len -= 2;
25427 offset += 2;
25428 }
25429
25430 if (len)
25431 {
25432 rtx reg = gen_reg_rtx (QImode);
25433 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25434 plus_constant (Pmode, in,
25435 offset))));
25436 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25437 offset)),
25438 reg));
25439 }
25440 }
25441
25442 void
25443 thumb_reload_out_hi (rtx *operands)
25444 {
25445 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25446 }
25447
25448 /* Handle reading a half-word from memory during reload. */
25449 void
25450 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25451 {
25452 gcc_unreachable ();
25453 }
25454
25455 /* Return the length of a function name prefix
25456 that starts with the character 'c'. */
25457 static int
25458 arm_get_strip_length (int c)
25459 {
25460 switch (c)
25461 {
25462 ARM_NAME_ENCODING_LENGTHS
25463 default: return 0;
25464 }
25465 }
25466
25467 /* Return a pointer to a function's name with any
25468 and all prefix encodings stripped from it. */
25469 const char *
25470 arm_strip_name_encoding (const char *name)
25471 {
25472 int skip;
25473
25474 while ((skip = arm_get_strip_length (* name)))
25475 name += skip;
25476
25477 return name;
25478 }
25479
25480 /* If there is a '*' anywhere in the name's prefix, then
25481 emit the stripped name verbatim, otherwise prepend an
25482 underscore if leading underscores are being used. */
25483 void
25484 arm_asm_output_labelref (FILE *stream, const char *name)
25485 {
25486 int skip;
25487 int verbatim = 0;
25488
25489 while ((skip = arm_get_strip_length (* name)))
25490 {
25491 verbatim |= (*name == '*');
25492 name += skip;
25493 }
25494
25495 if (verbatim)
25496 fputs (name, stream);
25497 else
25498 asm_fprintf (stream, "%U%s", name);
25499 }
25500
25501 /* This function is used to emit an EABI tag and its associated value.
25502 We emit the numerical value of the tag in case the assembler does not
25503 support textual tags. (Eg gas prior to 2.20). If requested we include
25504 the tag name in a comment so that anyone reading the assembler output
25505 will know which tag is being set.
25506
25507 This function is not static because arm-c.c needs it too. */
25508
25509 void
25510 arm_emit_eabi_attribute (const char *name, int num, int val)
25511 {
25512 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25513 if (flag_verbose_asm || flag_debug_asm)
25514 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25515 asm_fprintf (asm_out_file, "\n");
25516 }
25517
25518 static void
25519 arm_file_start (void)
25520 {
25521 int val;
25522
25523 if (TARGET_UNIFIED_ASM)
25524 asm_fprintf (asm_out_file, "\t.syntax unified\n");
25525
25526 if (TARGET_BPABI)
25527 {
25528 const char *fpu_name;
25529 if (arm_selected_arch)
25530 {
25531 /* armv7ve doesn't support any extensions. */
25532 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25533 {
25534 /* Keep backward compatability for assemblers
25535 which don't support armv7ve. */
25536 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25537 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25538 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25539 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25540 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25541 }
25542 else
25543 {
25544 const char* pos = strchr (arm_selected_arch->name, '+');
25545 if (pos)
25546 {
25547 char buf[15];
25548 gcc_assert (strlen (arm_selected_arch->name)
25549 <= sizeof (buf) / sizeof (*pos));
25550 strncpy (buf, arm_selected_arch->name,
25551 (pos - arm_selected_arch->name) * sizeof (*pos));
25552 buf[pos - arm_selected_arch->name] = '\0';
25553 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25554 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25555 }
25556 else
25557 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25558 }
25559 }
25560 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25561 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25562 else
25563 {
25564 const char* truncated_name
25565 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25566 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25567 }
25568
25569 if (TARGET_SOFT_FLOAT)
25570 {
25571 fpu_name = "softvfp";
25572 }
25573 else
25574 {
25575 fpu_name = arm_fpu_desc->name;
25576 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25577 {
25578 if (TARGET_HARD_FLOAT)
25579 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
25580 if (TARGET_HARD_FLOAT_ABI)
25581 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25582 }
25583 }
25584 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25585
25586 /* Some of these attributes only apply when the corresponding features
25587 are used. However we don't have any easy way of figuring this out.
25588 Conservatively record the setting that would have been used. */
25589
25590 if (flag_rounding_math)
25591 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25592
25593 if (!flag_unsafe_math_optimizations)
25594 {
25595 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25596 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25597 }
25598 if (flag_signaling_nans)
25599 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25600
25601 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25602 flag_finite_math_only ? 1 : 3);
25603
25604 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25605 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25606 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25607 flag_short_enums ? 1 : 2);
25608
25609 /* Tag_ABI_optimization_goals. */
25610 if (optimize_size)
25611 val = 4;
25612 else if (optimize >= 2)
25613 val = 2;
25614 else if (optimize)
25615 val = 1;
25616 else
25617 val = 6;
25618 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25619
25620 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25621 unaligned_access);
25622
25623 if (arm_fp16_format)
25624 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25625 (int) arm_fp16_format);
25626
25627 if (arm_lang_output_object_attributes_hook)
25628 arm_lang_output_object_attributes_hook();
25629 }
25630
25631 default_file_start ();
25632 }
25633
25634 static void
25635 arm_file_end (void)
25636 {
25637 int regno;
25638
25639 if (NEED_INDICATE_EXEC_STACK)
25640 /* Add .note.GNU-stack. */
25641 file_end_indicate_exec_stack ();
25642
25643 if (! thumb_call_reg_needed)
25644 return;
25645
25646 switch_to_section (text_section);
25647 asm_fprintf (asm_out_file, "\t.code 16\n");
25648 ASM_OUTPUT_ALIGN (asm_out_file, 1);
25649
25650 for (regno = 0; regno < LR_REGNUM; regno++)
25651 {
25652 rtx label = thumb_call_via_label[regno];
25653
25654 if (label != 0)
25655 {
25656 targetm.asm_out.internal_label (asm_out_file, "L",
25657 CODE_LABEL_NUMBER (label));
25658 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25659 }
25660 }
25661 }
25662
25663 #ifndef ARM_PE
25664 /* Symbols in the text segment can be accessed without indirecting via the
25665 constant pool; it may take an extra binary operation, but this is still
25666 faster than indirecting via memory. Don't do this when not optimizing,
25667 since we won't be calculating al of the offsets necessary to do this
25668 simplification. */
25669
25670 static void
25671 arm_encode_section_info (tree decl, rtx rtl, int first)
25672 {
25673 if (optimize > 0 && TREE_CONSTANT (decl))
25674 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25675
25676 default_encode_section_info (decl, rtl, first);
25677 }
25678 #endif /* !ARM_PE */
25679
25680 static void
25681 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25682 {
25683 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25684 && !strcmp (prefix, "L"))
25685 {
25686 arm_ccfsm_state = 0;
25687 arm_target_insn = NULL;
25688 }
25689 default_internal_label (stream, prefix, labelno);
25690 }
25691
25692 /* Output code to add DELTA to the first argument, and then jump
25693 to FUNCTION. Used for C++ multiple inheritance. */
25694 static void
25695 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25696 HOST_WIDE_INT delta,
25697 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25698 tree function)
25699 {
25700 static int thunk_label = 0;
25701 char label[256];
25702 char labelpc[256];
25703 int mi_delta = delta;
25704 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25705 int shift = 0;
25706 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25707 ? 1 : 0);
25708 if (mi_delta < 0)
25709 mi_delta = - mi_delta;
25710
25711 final_start_function (emit_barrier (), file, 1);
25712
25713 if (TARGET_THUMB1)
25714 {
25715 int labelno = thunk_label++;
25716 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25717 /* Thunks are entered in arm mode when avaiable. */
25718 if (TARGET_THUMB1_ONLY)
25719 {
25720 /* push r3 so we can use it as a temporary. */
25721 /* TODO: Omit this save if r3 is not used. */
25722 fputs ("\tpush {r3}\n", file);
25723 fputs ("\tldr\tr3, ", file);
25724 }
25725 else
25726 {
25727 fputs ("\tldr\tr12, ", file);
25728 }
25729 assemble_name (file, label);
25730 fputc ('\n', file);
25731 if (flag_pic)
25732 {
25733 /* If we are generating PIC, the ldr instruction below loads
25734 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25735 the address of the add + 8, so we have:
25736
25737 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25738 = target + 1.
25739
25740 Note that we have "+ 1" because some versions of GNU ld
25741 don't set the low bit of the result for R_ARM_REL32
25742 relocations against thumb function symbols.
25743 On ARMv6M this is +4, not +8. */
25744 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25745 assemble_name (file, labelpc);
25746 fputs (":\n", file);
25747 if (TARGET_THUMB1_ONLY)
25748 {
25749 /* This is 2 insns after the start of the thunk, so we know it
25750 is 4-byte aligned. */
25751 fputs ("\tadd\tr3, pc, r3\n", file);
25752 fputs ("\tmov r12, r3\n", file);
25753 }
25754 else
25755 fputs ("\tadd\tr12, pc, r12\n", file);
25756 }
25757 else if (TARGET_THUMB1_ONLY)
25758 fputs ("\tmov r12, r3\n", file);
25759 }
25760 if (TARGET_THUMB1_ONLY)
25761 {
25762 if (mi_delta > 255)
25763 {
25764 fputs ("\tldr\tr3, ", file);
25765 assemble_name (file, label);
25766 fputs ("+4\n", file);
25767 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25768 mi_op, this_regno, this_regno);
25769 }
25770 else if (mi_delta != 0)
25771 {
25772 /* Thumb1 unified syntax requires s suffix in instruction name when
25773 one of the operands is immediate. */
25774 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25775 mi_op, this_regno, this_regno,
25776 mi_delta);
25777 }
25778 }
25779 else
25780 {
25781 /* TODO: Use movw/movt for large constants when available. */
25782 while (mi_delta != 0)
25783 {
25784 if ((mi_delta & (3 << shift)) == 0)
25785 shift += 2;
25786 else
25787 {
25788 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25789 mi_op, this_regno, this_regno,
25790 mi_delta & (0xff << shift));
25791 mi_delta &= ~(0xff << shift);
25792 shift += 8;
25793 }
25794 }
25795 }
25796 if (TARGET_THUMB1)
25797 {
25798 if (TARGET_THUMB1_ONLY)
25799 fputs ("\tpop\t{r3}\n", file);
25800
25801 fprintf (file, "\tbx\tr12\n");
25802 ASM_OUTPUT_ALIGN (file, 2);
25803 assemble_name (file, label);
25804 fputs (":\n", file);
25805 if (flag_pic)
25806 {
25807 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25808 rtx tem = XEXP (DECL_RTL (function), 0);
25809 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25810 pipeline offset is four rather than eight. Adjust the offset
25811 accordingly. */
25812 tem = plus_constant (GET_MODE (tem), tem,
25813 TARGET_THUMB1_ONLY ? -3 : -7);
25814 tem = gen_rtx_MINUS (GET_MODE (tem),
25815 tem,
25816 gen_rtx_SYMBOL_REF (Pmode,
25817 ggc_strdup (labelpc)));
25818 assemble_integer (tem, 4, BITS_PER_WORD, 1);
25819 }
25820 else
25821 /* Output ".word .LTHUNKn". */
25822 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25823
25824 if (TARGET_THUMB1_ONLY && mi_delta > 255)
25825 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25826 }
25827 else
25828 {
25829 fputs ("\tb\t", file);
25830 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25831 if (NEED_PLT_RELOC)
25832 fputs ("(PLT)", file);
25833 fputc ('\n', file);
25834 }
25835
25836 final_end_function ();
25837 }
25838
25839 int
25840 arm_emit_vector_const (FILE *file, rtx x)
25841 {
25842 int i;
25843 const char * pattern;
25844
25845 gcc_assert (GET_CODE (x) == CONST_VECTOR);
25846
25847 switch (GET_MODE (x))
25848 {
25849 case V2SImode: pattern = "%08x"; break;
25850 case V4HImode: pattern = "%04x"; break;
25851 case V8QImode: pattern = "%02x"; break;
25852 default: gcc_unreachable ();
25853 }
25854
25855 fprintf (file, "0x");
25856 for (i = CONST_VECTOR_NUNITS (x); i--;)
25857 {
25858 rtx element;
25859
25860 element = CONST_VECTOR_ELT (x, i);
25861 fprintf (file, pattern, INTVAL (element));
25862 }
25863
25864 return 1;
25865 }
25866
25867 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25868 HFmode constant pool entries are actually loaded with ldr. */
25869 void
25870 arm_emit_fp16_const (rtx c)
25871 {
25872 REAL_VALUE_TYPE r;
25873 long bits;
25874
25875 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25876 bits = real_to_target (NULL, &r, HFmode);
25877 if (WORDS_BIG_ENDIAN)
25878 assemble_zeros (2);
25879 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25880 if (!WORDS_BIG_ENDIAN)
25881 assemble_zeros (2);
25882 }
25883
25884 const char *
25885 arm_output_load_gr (rtx *operands)
25886 {
25887 rtx reg;
25888 rtx offset;
25889 rtx wcgr;
25890 rtx sum;
25891
25892 if (!MEM_P (operands [1])
25893 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25894 || !REG_P (reg = XEXP (sum, 0))
25895 || !CONST_INT_P (offset = XEXP (sum, 1))
25896 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25897 return "wldrw%?\t%0, %1";
25898
25899 /* Fix up an out-of-range load of a GR register. */
25900 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25901 wcgr = operands[0];
25902 operands[0] = reg;
25903 output_asm_insn ("ldr%?\t%0, %1", operands);
25904
25905 operands[0] = wcgr;
25906 operands[1] = reg;
25907 output_asm_insn ("tmcr%?\t%0, %1", operands);
25908 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25909
25910 return "";
25911 }
25912
25913 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25914
25915 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25916 named arg and all anonymous args onto the stack.
25917 XXX I know the prologue shouldn't be pushing registers, but it is faster
25918 that way. */
25919
25920 static void
25921 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25922 machine_mode mode,
25923 tree type,
25924 int *pretend_size,
25925 int second_time ATTRIBUTE_UNUSED)
25926 {
25927 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25928 int nregs;
25929
25930 cfun->machine->uses_anonymous_args = 1;
25931 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
25932 {
25933 nregs = pcum->aapcs_ncrn;
25934 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
25935 nregs++;
25936 }
25937 else
25938 nregs = pcum->nregs;
25939
25940 if (nregs < NUM_ARG_REGS)
25941 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
25942 }
25943
25944 /* We can't rely on the caller doing the proper promotion when
25945 using APCS or ATPCS. */
25946
25947 static bool
25948 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
25949 {
25950 return !TARGET_AAPCS_BASED;
25951 }
25952
25953 static machine_mode
25954 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
25955 machine_mode mode,
25956 int *punsignedp ATTRIBUTE_UNUSED,
25957 const_tree fntype ATTRIBUTE_UNUSED,
25958 int for_return ATTRIBUTE_UNUSED)
25959 {
25960 if (GET_MODE_CLASS (mode) == MODE_INT
25961 && GET_MODE_SIZE (mode) < 4)
25962 return SImode;
25963
25964 return mode;
25965 }
25966
25967 /* AAPCS based ABIs use short enums by default. */
25968
25969 static bool
25970 arm_default_short_enums (void)
25971 {
25972 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
25973 }
25974
25975
25976 /* AAPCS requires that anonymous bitfields affect structure alignment. */
25977
25978 static bool
25979 arm_align_anon_bitfield (void)
25980 {
25981 return TARGET_AAPCS_BASED;
25982 }
25983
25984
25985 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
25986
25987 static tree
25988 arm_cxx_guard_type (void)
25989 {
25990 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
25991 }
25992
25993
25994 /* The EABI says test the least significant bit of a guard variable. */
25995
25996 static bool
25997 arm_cxx_guard_mask_bit (void)
25998 {
25999 return TARGET_AAPCS_BASED;
26000 }
26001
26002
26003 /* The EABI specifies that all array cookies are 8 bytes long. */
26004
26005 static tree
26006 arm_get_cookie_size (tree type)
26007 {
26008 tree size;
26009
26010 if (!TARGET_AAPCS_BASED)
26011 return default_cxx_get_cookie_size (type);
26012
26013 size = build_int_cst (sizetype, 8);
26014 return size;
26015 }
26016
26017
26018 /* The EABI says that array cookies should also contain the element size. */
26019
26020 static bool
26021 arm_cookie_has_size (void)
26022 {
26023 return TARGET_AAPCS_BASED;
26024 }
26025
26026
26027 /* The EABI says constructors and destructors should return a pointer to
26028 the object constructed/destroyed. */
26029
26030 static bool
26031 arm_cxx_cdtor_returns_this (void)
26032 {
26033 return TARGET_AAPCS_BASED;
26034 }
26035
26036 /* The EABI says that an inline function may never be the key
26037 method. */
26038
26039 static bool
26040 arm_cxx_key_method_may_be_inline (void)
26041 {
26042 return !TARGET_AAPCS_BASED;
26043 }
26044
26045 static void
26046 arm_cxx_determine_class_data_visibility (tree decl)
26047 {
26048 if (!TARGET_AAPCS_BASED
26049 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26050 return;
26051
26052 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26053 is exported. However, on systems without dynamic vague linkage,
26054 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26055 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26056 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26057 else
26058 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26059 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26060 }
26061
26062 static bool
26063 arm_cxx_class_data_always_comdat (void)
26064 {
26065 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26066 vague linkage if the class has no key function. */
26067 return !TARGET_AAPCS_BASED;
26068 }
26069
26070
26071 /* The EABI says __aeabi_atexit should be used to register static
26072 destructors. */
26073
26074 static bool
26075 arm_cxx_use_aeabi_atexit (void)
26076 {
26077 return TARGET_AAPCS_BASED;
26078 }
26079
26080
26081 void
26082 arm_set_return_address (rtx source, rtx scratch)
26083 {
26084 arm_stack_offsets *offsets;
26085 HOST_WIDE_INT delta;
26086 rtx addr;
26087 unsigned long saved_regs;
26088
26089 offsets = arm_get_frame_offsets ();
26090 saved_regs = offsets->saved_regs_mask;
26091
26092 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26093 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26094 else
26095 {
26096 if (frame_pointer_needed)
26097 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26098 else
26099 {
26100 /* LR will be the first saved register. */
26101 delta = offsets->outgoing_args - (offsets->frame + 4);
26102
26103
26104 if (delta >= 4096)
26105 {
26106 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26107 GEN_INT (delta & ~4095)));
26108 addr = scratch;
26109 delta &= 4095;
26110 }
26111 else
26112 addr = stack_pointer_rtx;
26113
26114 addr = plus_constant (Pmode, addr, delta);
26115 }
26116 /* The store needs to be marked as frame related in order to prevent
26117 DSE from deleting it as dead if it is based on fp. */
26118 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26119 RTX_FRAME_RELATED_P (insn) = 1;
26120 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26121 }
26122 }
26123
26124
26125 void
26126 thumb_set_return_address (rtx source, rtx scratch)
26127 {
26128 arm_stack_offsets *offsets;
26129 HOST_WIDE_INT delta;
26130 HOST_WIDE_INT limit;
26131 int reg;
26132 rtx addr;
26133 unsigned long mask;
26134
26135 emit_use (source);
26136
26137 offsets = arm_get_frame_offsets ();
26138 mask = offsets->saved_regs_mask;
26139 if (mask & (1 << LR_REGNUM))
26140 {
26141 limit = 1024;
26142 /* Find the saved regs. */
26143 if (frame_pointer_needed)
26144 {
26145 delta = offsets->soft_frame - offsets->saved_args;
26146 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26147 if (TARGET_THUMB1)
26148 limit = 128;
26149 }
26150 else
26151 {
26152 delta = offsets->outgoing_args - offsets->saved_args;
26153 reg = SP_REGNUM;
26154 }
26155 /* Allow for the stack frame. */
26156 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26157 delta -= 16;
26158 /* The link register is always the first saved register. */
26159 delta -= 4;
26160
26161 /* Construct the address. */
26162 addr = gen_rtx_REG (SImode, reg);
26163 if (delta > limit)
26164 {
26165 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26166 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26167 addr = scratch;
26168 }
26169 else
26170 addr = plus_constant (Pmode, addr, delta);
26171
26172 /* The store needs to be marked as frame related in order to prevent
26173 DSE from deleting it as dead if it is based on fp. */
26174 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26175 RTX_FRAME_RELATED_P (insn) = 1;
26176 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26177 }
26178 else
26179 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26180 }
26181
26182 /* Implements target hook vector_mode_supported_p. */
26183 bool
26184 arm_vector_mode_supported_p (machine_mode mode)
26185 {
26186 /* Neon also supports V2SImode, etc. listed in the clause below. */
26187 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26188 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26189 return true;
26190
26191 if ((TARGET_NEON || TARGET_IWMMXT)
26192 && ((mode == V2SImode)
26193 || (mode == V4HImode)
26194 || (mode == V8QImode)))
26195 return true;
26196
26197 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26198 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26199 || mode == V2HAmode))
26200 return true;
26201
26202 return false;
26203 }
26204
26205 /* Implements target hook array_mode_supported_p. */
26206
26207 static bool
26208 arm_array_mode_supported_p (machine_mode mode,
26209 unsigned HOST_WIDE_INT nelems)
26210 {
26211 if (TARGET_NEON
26212 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26213 && (nelems >= 2 && nelems <= 4))
26214 return true;
26215
26216 return false;
26217 }
26218
26219 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26220 registers when autovectorizing for Neon, at least until multiple vector
26221 widths are supported properly by the middle-end. */
26222
26223 static machine_mode
26224 arm_preferred_simd_mode (machine_mode mode)
26225 {
26226 if (TARGET_NEON)
26227 switch (mode)
26228 {
26229 case SFmode:
26230 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26231 case SImode:
26232 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26233 case HImode:
26234 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26235 case QImode:
26236 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26237 case DImode:
26238 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26239 return V2DImode;
26240 break;
26241
26242 default:;
26243 }
26244
26245 if (TARGET_REALLY_IWMMXT)
26246 switch (mode)
26247 {
26248 case SImode:
26249 return V2SImode;
26250 case HImode:
26251 return V4HImode;
26252 case QImode:
26253 return V8QImode;
26254
26255 default:;
26256 }
26257
26258 return word_mode;
26259 }
26260
26261 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26262
26263 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26264 using r0-r4 for function arguments, r7 for the stack frame and don't have
26265 enough left over to do doubleword arithmetic. For Thumb-2 all the
26266 potentially problematic instructions accept high registers so this is not
26267 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26268 that require many low registers. */
26269 static bool
26270 arm_class_likely_spilled_p (reg_class_t rclass)
26271 {
26272 if ((TARGET_THUMB1 && rclass == LO_REGS)
26273 || rclass == CC_REG)
26274 return true;
26275
26276 return false;
26277 }
26278
26279 /* Implements target hook small_register_classes_for_mode_p. */
26280 bool
26281 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26282 {
26283 return TARGET_THUMB1;
26284 }
26285
26286 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26287 ARM insns and therefore guarantee that the shift count is modulo 256.
26288 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26289 guarantee no particular behavior for out-of-range counts. */
26290
26291 static unsigned HOST_WIDE_INT
26292 arm_shift_truncation_mask (machine_mode mode)
26293 {
26294 return mode == SImode ? 255 : 0;
26295 }
26296
26297
26298 /* Map internal gcc register numbers to DWARF2 register numbers. */
26299
26300 unsigned int
26301 arm_dbx_register_number (unsigned int regno)
26302 {
26303 if (regno < 16)
26304 return regno;
26305
26306 if (IS_VFP_REGNUM (regno))
26307 {
26308 /* See comment in arm_dwarf_register_span. */
26309 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26310 return 64 + regno - FIRST_VFP_REGNUM;
26311 else
26312 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26313 }
26314
26315 if (IS_IWMMXT_GR_REGNUM (regno))
26316 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26317
26318 if (IS_IWMMXT_REGNUM (regno))
26319 return 112 + regno - FIRST_IWMMXT_REGNUM;
26320
26321 gcc_unreachable ();
26322 }
26323
26324 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26325 GCC models tham as 64 32-bit registers, so we need to describe this to
26326 the DWARF generation code. Other registers can use the default. */
26327 static rtx
26328 arm_dwarf_register_span (rtx rtl)
26329 {
26330 machine_mode mode;
26331 unsigned regno;
26332 rtx parts[16];
26333 int nregs;
26334 int i;
26335
26336 regno = REGNO (rtl);
26337 if (!IS_VFP_REGNUM (regno))
26338 return NULL_RTX;
26339
26340 /* XXX FIXME: The EABI defines two VFP register ranges:
26341 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26342 256-287: D0-D31
26343 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26344 corresponding D register. Until GDB supports this, we shall use the
26345 legacy encodings. We also use these encodings for D0-D15 for
26346 compatibility with older debuggers. */
26347 mode = GET_MODE (rtl);
26348 if (GET_MODE_SIZE (mode) < 8)
26349 return NULL_RTX;
26350
26351 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26352 {
26353 nregs = GET_MODE_SIZE (mode) / 4;
26354 for (i = 0; i < nregs; i += 2)
26355 if (TARGET_BIG_END)
26356 {
26357 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26358 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26359 }
26360 else
26361 {
26362 parts[i] = gen_rtx_REG (SImode, regno + i);
26363 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26364 }
26365 }
26366 else
26367 {
26368 nregs = GET_MODE_SIZE (mode) / 8;
26369 for (i = 0; i < nregs; i++)
26370 parts[i] = gen_rtx_REG (DImode, regno + i);
26371 }
26372
26373 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26374 }
26375
26376 #if ARM_UNWIND_INFO
26377 /* Emit unwind directives for a store-multiple instruction or stack pointer
26378 push during alignment.
26379 These should only ever be generated by the function prologue code, so
26380 expect them to have a particular form.
26381 The store-multiple instruction sometimes pushes pc as the last register,
26382 although it should not be tracked into unwind information, or for -Os
26383 sometimes pushes some dummy registers before first register that needs
26384 to be tracked in unwind information; such dummy registers are there just
26385 to avoid separate stack adjustment, and will not be restored in the
26386 epilogue. */
26387
26388 static void
26389 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26390 {
26391 int i;
26392 HOST_WIDE_INT offset;
26393 HOST_WIDE_INT nregs;
26394 int reg_size;
26395 unsigned reg;
26396 unsigned lastreg;
26397 unsigned padfirst = 0, padlast = 0;
26398 rtx e;
26399
26400 e = XVECEXP (p, 0, 0);
26401 gcc_assert (GET_CODE (e) == SET);
26402
26403 /* First insn will adjust the stack pointer. */
26404 gcc_assert (GET_CODE (e) == SET
26405 && REG_P (SET_DEST (e))
26406 && REGNO (SET_DEST (e)) == SP_REGNUM
26407 && GET_CODE (SET_SRC (e)) == PLUS);
26408
26409 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26410 nregs = XVECLEN (p, 0) - 1;
26411 gcc_assert (nregs);
26412
26413 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26414 if (reg < 16)
26415 {
26416 /* For -Os dummy registers can be pushed at the beginning to
26417 avoid separate stack pointer adjustment. */
26418 e = XVECEXP (p, 0, 1);
26419 e = XEXP (SET_DEST (e), 0);
26420 if (GET_CODE (e) == PLUS)
26421 padfirst = INTVAL (XEXP (e, 1));
26422 gcc_assert (padfirst == 0 || optimize_size);
26423 /* The function prologue may also push pc, but not annotate it as it is
26424 never restored. We turn this into a stack pointer adjustment. */
26425 e = XVECEXP (p, 0, nregs);
26426 e = XEXP (SET_DEST (e), 0);
26427 if (GET_CODE (e) == PLUS)
26428 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26429 else
26430 padlast = offset - 4;
26431 gcc_assert (padlast == 0 || padlast == 4);
26432 if (padlast == 4)
26433 fprintf (asm_out_file, "\t.pad #4\n");
26434 reg_size = 4;
26435 fprintf (asm_out_file, "\t.save {");
26436 }
26437 else if (IS_VFP_REGNUM (reg))
26438 {
26439 reg_size = 8;
26440 fprintf (asm_out_file, "\t.vsave {");
26441 }
26442 else
26443 /* Unknown register type. */
26444 gcc_unreachable ();
26445
26446 /* If the stack increment doesn't match the size of the saved registers,
26447 something has gone horribly wrong. */
26448 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26449
26450 offset = padfirst;
26451 lastreg = 0;
26452 /* The remaining insns will describe the stores. */
26453 for (i = 1; i <= nregs; i++)
26454 {
26455 /* Expect (set (mem <addr>) (reg)).
26456 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26457 e = XVECEXP (p, 0, i);
26458 gcc_assert (GET_CODE (e) == SET
26459 && MEM_P (SET_DEST (e))
26460 && REG_P (SET_SRC (e)));
26461
26462 reg = REGNO (SET_SRC (e));
26463 gcc_assert (reg >= lastreg);
26464
26465 if (i != 1)
26466 fprintf (asm_out_file, ", ");
26467 /* We can't use %r for vfp because we need to use the
26468 double precision register names. */
26469 if (IS_VFP_REGNUM (reg))
26470 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26471 else
26472 asm_fprintf (asm_out_file, "%r", reg);
26473
26474 #ifdef ENABLE_CHECKING
26475 /* Check that the addresses are consecutive. */
26476 e = XEXP (SET_DEST (e), 0);
26477 if (GET_CODE (e) == PLUS)
26478 gcc_assert (REG_P (XEXP (e, 0))
26479 && REGNO (XEXP (e, 0)) == SP_REGNUM
26480 && CONST_INT_P (XEXP (e, 1))
26481 && offset == INTVAL (XEXP (e, 1)));
26482 else
26483 gcc_assert (i == 1
26484 && REG_P (e)
26485 && REGNO (e) == SP_REGNUM);
26486 offset += reg_size;
26487 #endif
26488 }
26489 fprintf (asm_out_file, "}\n");
26490 if (padfirst)
26491 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26492 }
26493
26494 /* Emit unwind directives for a SET. */
26495
26496 static void
26497 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26498 {
26499 rtx e0;
26500 rtx e1;
26501 unsigned reg;
26502
26503 e0 = XEXP (p, 0);
26504 e1 = XEXP (p, 1);
26505 switch (GET_CODE (e0))
26506 {
26507 case MEM:
26508 /* Pushing a single register. */
26509 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26510 || !REG_P (XEXP (XEXP (e0, 0), 0))
26511 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26512 abort ();
26513
26514 asm_fprintf (asm_out_file, "\t.save ");
26515 if (IS_VFP_REGNUM (REGNO (e1)))
26516 asm_fprintf(asm_out_file, "{d%d}\n",
26517 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26518 else
26519 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26520 break;
26521
26522 case REG:
26523 if (REGNO (e0) == SP_REGNUM)
26524 {
26525 /* A stack increment. */
26526 if (GET_CODE (e1) != PLUS
26527 || !REG_P (XEXP (e1, 0))
26528 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26529 || !CONST_INT_P (XEXP (e1, 1)))
26530 abort ();
26531
26532 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26533 -INTVAL (XEXP (e1, 1)));
26534 }
26535 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26536 {
26537 HOST_WIDE_INT offset;
26538
26539 if (GET_CODE (e1) == PLUS)
26540 {
26541 if (!REG_P (XEXP (e1, 0))
26542 || !CONST_INT_P (XEXP (e1, 1)))
26543 abort ();
26544 reg = REGNO (XEXP (e1, 0));
26545 offset = INTVAL (XEXP (e1, 1));
26546 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26547 HARD_FRAME_POINTER_REGNUM, reg,
26548 offset);
26549 }
26550 else if (REG_P (e1))
26551 {
26552 reg = REGNO (e1);
26553 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26554 HARD_FRAME_POINTER_REGNUM, reg);
26555 }
26556 else
26557 abort ();
26558 }
26559 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26560 {
26561 /* Move from sp to reg. */
26562 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26563 }
26564 else if (GET_CODE (e1) == PLUS
26565 && REG_P (XEXP (e1, 0))
26566 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26567 && CONST_INT_P (XEXP (e1, 1)))
26568 {
26569 /* Set reg to offset from sp. */
26570 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26571 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26572 }
26573 else
26574 abort ();
26575 break;
26576
26577 default:
26578 abort ();
26579 }
26580 }
26581
26582
26583 /* Emit unwind directives for the given insn. */
26584
26585 static void
26586 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26587 {
26588 rtx note, pat;
26589 bool handled_one = false;
26590
26591 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26592 return;
26593
26594 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26595 && (TREE_NOTHROW (current_function_decl)
26596 || crtl->all_throwers_are_sibcalls))
26597 return;
26598
26599 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26600 return;
26601
26602 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26603 {
26604 switch (REG_NOTE_KIND (note))
26605 {
26606 case REG_FRAME_RELATED_EXPR:
26607 pat = XEXP (note, 0);
26608 goto found;
26609
26610 case REG_CFA_REGISTER:
26611 pat = XEXP (note, 0);
26612 if (pat == NULL)
26613 {
26614 pat = PATTERN (insn);
26615 if (GET_CODE (pat) == PARALLEL)
26616 pat = XVECEXP (pat, 0, 0);
26617 }
26618
26619 /* Only emitted for IS_STACKALIGN re-alignment. */
26620 {
26621 rtx dest, src;
26622 unsigned reg;
26623
26624 src = SET_SRC (pat);
26625 dest = SET_DEST (pat);
26626
26627 gcc_assert (src == stack_pointer_rtx);
26628 reg = REGNO (dest);
26629 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26630 reg + 0x90, reg);
26631 }
26632 handled_one = true;
26633 break;
26634
26635 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26636 to get correct dwarf information for shrink-wrap. We should not
26637 emit unwind information for it because these are used either for
26638 pretend arguments or notes to adjust sp and restore registers from
26639 stack. */
26640 case REG_CFA_DEF_CFA:
26641 case REG_CFA_ADJUST_CFA:
26642 case REG_CFA_RESTORE:
26643 return;
26644
26645 case REG_CFA_EXPRESSION:
26646 case REG_CFA_OFFSET:
26647 /* ??? Only handling here what we actually emit. */
26648 gcc_unreachable ();
26649
26650 default:
26651 break;
26652 }
26653 }
26654 if (handled_one)
26655 return;
26656 pat = PATTERN (insn);
26657 found:
26658
26659 switch (GET_CODE (pat))
26660 {
26661 case SET:
26662 arm_unwind_emit_set (asm_out_file, pat);
26663 break;
26664
26665 case SEQUENCE:
26666 /* Store multiple. */
26667 arm_unwind_emit_sequence (asm_out_file, pat);
26668 break;
26669
26670 default:
26671 abort();
26672 }
26673 }
26674
26675
26676 /* Output a reference from a function exception table to the type_info
26677 object X. The EABI specifies that the symbol should be relocated by
26678 an R_ARM_TARGET2 relocation. */
26679
26680 static bool
26681 arm_output_ttype (rtx x)
26682 {
26683 fputs ("\t.word\t", asm_out_file);
26684 output_addr_const (asm_out_file, x);
26685 /* Use special relocations for symbol references. */
26686 if (!CONST_INT_P (x))
26687 fputs ("(TARGET2)", asm_out_file);
26688 fputc ('\n', asm_out_file);
26689
26690 return TRUE;
26691 }
26692
26693 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26694
26695 static void
26696 arm_asm_emit_except_personality (rtx personality)
26697 {
26698 fputs ("\t.personality\t", asm_out_file);
26699 output_addr_const (asm_out_file, personality);
26700 fputc ('\n', asm_out_file);
26701 }
26702
26703 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26704
26705 static void
26706 arm_asm_init_sections (void)
26707 {
26708 exception_section = get_unnamed_section (0, output_section_asm_op,
26709 "\t.handlerdata");
26710 }
26711 #endif /* ARM_UNWIND_INFO */
26712
26713 /* Output unwind directives for the start/end of a function. */
26714
26715 void
26716 arm_output_fn_unwind (FILE * f, bool prologue)
26717 {
26718 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26719 return;
26720
26721 if (prologue)
26722 fputs ("\t.fnstart\n", f);
26723 else
26724 {
26725 /* If this function will never be unwound, then mark it as such.
26726 The came condition is used in arm_unwind_emit to suppress
26727 the frame annotations. */
26728 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26729 && (TREE_NOTHROW (current_function_decl)
26730 || crtl->all_throwers_are_sibcalls))
26731 fputs("\t.cantunwind\n", f);
26732
26733 fputs ("\t.fnend\n", f);
26734 }
26735 }
26736
26737 static bool
26738 arm_emit_tls_decoration (FILE *fp, rtx x)
26739 {
26740 enum tls_reloc reloc;
26741 rtx val;
26742
26743 val = XVECEXP (x, 0, 0);
26744 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26745
26746 output_addr_const (fp, val);
26747
26748 switch (reloc)
26749 {
26750 case TLS_GD32:
26751 fputs ("(tlsgd)", fp);
26752 break;
26753 case TLS_LDM32:
26754 fputs ("(tlsldm)", fp);
26755 break;
26756 case TLS_LDO32:
26757 fputs ("(tlsldo)", fp);
26758 break;
26759 case TLS_IE32:
26760 fputs ("(gottpoff)", fp);
26761 break;
26762 case TLS_LE32:
26763 fputs ("(tpoff)", fp);
26764 break;
26765 case TLS_DESCSEQ:
26766 fputs ("(tlsdesc)", fp);
26767 break;
26768 default:
26769 gcc_unreachable ();
26770 }
26771
26772 switch (reloc)
26773 {
26774 case TLS_GD32:
26775 case TLS_LDM32:
26776 case TLS_IE32:
26777 case TLS_DESCSEQ:
26778 fputs (" + (. - ", fp);
26779 output_addr_const (fp, XVECEXP (x, 0, 2));
26780 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26781 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26782 output_addr_const (fp, XVECEXP (x, 0, 3));
26783 fputc (')', fp);
26784 break;
26785 default:
26786 break;
26787 }
26788
26789 return TRUE;
26790 }
26791
26792 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26793
26794 static void
26795 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26796 {
26797 gcc_assert (size == 4);
26798 fputs ("\t.word\t", file);
26799 output_addr_const (file, x);
26800 fputs ("(tlsldo)", file);
26801 }
26802
26803 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26804
26805 static bool
26806 arm_output_addr_const_extra (FILE *fp, rtx x)
26807 {
26808 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26809 return arm_emit_tls_decoration (fp, x);
26810 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26811 {
26812 char label[256];
26813 int labelno = INTVAL (XVECEXP (x, 0, 0));
26814
26815 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26816 assemble_name_raw (fp, label);
26817
26818 return TRUE;
26819 }
26820 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26821 {
26822 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26823 if (GOT_PCREL)
26824 fputs ("+.", fp);
26825 fputs ("-(", fp);
26826 output_addr_const (fp, XVECEXP (x, 0, 0));
26827 fputc (')', fp);
26828 return TRUE;
26829 }
26830 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26831 {
26832 output_addr_const (fp, XVECEXP (x, 0, 0));
26833 if (GOT_PCREL)
26834 fputs ("+.", fp);
26835 fputs ("-(", fp);
26836 output_addr_const (fp, XVECEXP (x, 0, 1));
26837 fputc (')', fp);
26838 return TRUE;
26839 }
26840 else if (GET_CODE (x) == CONST_VECTOR)
26841 return arm_emit_vector_const (fp, x);
26842
26843 return FALSE;
26844 }
26845
26846 /* Output assembly for a shift instruction.
26847 SET_FLAGS determines how the instruction modifies the condition codes.
26848 0 - Do not set condition codes.
26849 1 - Set condition codes.
26850 2 - Use smallest instruction. */
26851 const char *
26852 arm_output_shift(rtx * operands, int set_flags)
26853 {
26854 char pattern[100];
26855 static const char flag_chars[3] = {'?', '.', '!'};
26856 const char *shift;
26857 HOST_WIDE_INT val;
26858 char c;
26859
26860 c = flag_chars[set_flags];
26861 if (TARGET_UNIFIED_ASM)
26862 {
26863 shift = shift_op(operands[3], &val);
26864 if (shift)
26865 {
26866 if (val != -1)
26867 operands[2] = GEN_INT(val);
26868 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26869 }
26870 else
26871 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26872 }
26873 else
26874 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26875 output_asm_insn (pattern, operands);
26876 return "";
26877 }
26878
26879 /* Output assembly for a WMMX immediate shift instruction. */
26880 const char *
26881 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26882 {
26883 int shift = INTVAL (operands[2]);
26884 char templ[50];
26885 machine_mode opmode = GET_MODE (operands[0]);
26886
26887 gcc_assert (shift >= 0);
26888
26889 /* If the shift value in the register versions is > 63 (for D qualifier),
26890 31 (for W qualifier) or 15 (for H qualifier). */
26891 if (((opmode == V4HImode) && (shift > 15))
26892 || ((opmode == V2SImode) && (shift > 31))
26893 || ((opmode == DImode) && (shift > 63)))
26894 {
26895 if (wror_or_wsra)
26896 {
26897 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26898 output_asm_insn (templ, operands);
26899 if (opmode == DImode)
26900 {
26901 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26902 output_asm_insn (templ, operands);
26903 }
26904 }
26905 else
26906 {
26907 /* The destination register will contain all zeros. */
26908 sprintf (templ, "wzero\t%%0");
26909 output_asm_insn (templ, operands);
26910 }
26911 return "";
26912 }
26913
26914 if ((opmode == DImode) && (shift > 32))
26915 {
26916 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26917 output_asm_insn (templ, operands);
26918 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26919 output_asm_insn (templ, operands);
26920 }
26921 else
26922 {
26923 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26924 output_asm_insn (templ, operands);
26925 }
26926 return "";
26927 }
26928
26929 /* Output assembly for a WMMX tinsr instruction. */
26930 const char *
26931 arm_output_iwmmxt_tinsr (rtx *operands)
26932 {
26933 int mask = INTVAL (operands[3]);
26934 int i;
26935 char templ[50];
26936 int units = mode_nunits[GET_MODE (operands[0])];
26937 gcc_assert ((mask & (mask - 1)) == 0);
26938 for (i = 0; i < units; ++i)
26939 {
26940 if ((mask & 0x01) == 1)
26941 {
26942 break;
26943 }
26944 mask >>= 1;
26945 }
26946 gcc_assert (i < units);
26947 {
26948 switch (GET_MODE (operands[0]))
26949 {
26950 case V8QImode:
26951 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
26952 break;
26953 case V4HImode:
26954 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
26955 break;
26956 case V2SImode:
26957 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
26958 break;
26959 default:
26960 gcc_unreachable ();
26961 break;
26962 }
26963 output_asm_insn (templ, operands);
26964 }
26965 return "";
26966 }
26967
26968 /* Output a Thumb-1 casesi dispatch sequence. */
26969 const char *
26970 thumb1_output_casesi (rtx *operands)
26971 {
26972 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
26973
26974 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26975
26976 switch (GET_MODE(diff_vec))
26977 {
26978 case QImode:
26979 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26980 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26981 case HImode:
26982 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26983 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26984 case SImode:
26985 return "bl\t%___gnu_thumb1_case_si";
26986 default:
26987 gcc_unreachable ();
26988 }
26989 }
26990
26991 /* Output a Thumb-2 casesi instruction. */
26992 const char *
26993 thumb2_output_casesi (rtx *operands)
26994 {
26995 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
26996
26997 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26998
26999 output_asm_insn ("cmp\t%0, %1", operands);
27000 output_asm_insn ("bhi\t%l3", operands);
27001 switch (GET_MODE(diff_vec))
27002 {
27003 case QImode:
27004 return "tbb\t[%|pc, %0]";
27005 case HImode:
27006 return "tbh\t[%|pc, %0, lsl #1]";
27007 case SImode:
27008 if (flag_pic)
27009 {
27010 output_asm_insn ("adr\t%4, %l2", operands);
27011 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27012 output_asm_insn ("add\t%4, %4, %5", operands);
27013 return "bx\t%4";
27014 }
27015 else
27016 {
27017 output_asm_insn ("adr\t%4, %l2", operands);
27018 return "ldr\t%|pc, [%4, %0, lsl #2]";
27019 }
27020 default:
27021 gcc_unreachable ();
27022 }
27023 }
27024
27025 /* Most ARM cores are single issue, but some newer ones can dual issue.
27026 The scheduler descriptions rely on this being correct. */
27027 static int
27028 arm_issue_rate (void)
27029 {
27030 switch (arm_tune)
27031 {
27032 case cortexa15:
27033 case cortexa57:
27034 return 3;
27035
27036 case cortexm7:
27037 case cortexr4:
27038 case cortexr4f:
27039 case cortexr5:
27040 case genericv7a:
27041 case cortexa5:
27042 case cortexa7:
27043 case cortexa8:
27044 case cortexa9:
27045 case cortexa12:
27046 case cortexa17:
27047 case cortexa53:
27048 case fa726te:
27049 case marvell_pj4:
27050 return 2;
27051
27052 default:
27053 return 1;
27054 }
27055 }
27056
27057 const char *
27058 arm_mangle_type (const_tree type)
27059 {
27060 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27061 has to be managled as if it is in the "std" namespace. */
27062 if (TARGET_AAPCS_BASED
27063 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27064 return "St9__va_list";
27065
27066 /* Half-precision float. */
27067 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27068 return "Dh";
27069
27070 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27071 builtin type. */
27072 if (TYPE_NAME (type) != NULL)
27073 return arm_mangle_builtin_type (type);
27074
27075 /* Use the default mangling. */
27076 return NULL;
27077 }
27078
27079 /* Order of allocation of core registers for Thumb: this allocation is
27080 written over the corresponding initial entries of the array
27081 initialized with REG_ALLOC_ORDER. We allocate all low registers
27082 first. Saving and restoring a low register is usually cheaper than
27083 using a call-clobbered high register. */
27084
27085 static const int thumb_core_reg_alloc_order[] =
27086 {
27087 3, 2, 1, 0, 4, 5, 6, 7,
27088 14, 12, 8, 9, 10, 11
27089 };
27090
27091 /* Adjust register allocation order when compiling for Thumb. */
27092
27093 void
27094 arm_order_regs_for_local_alloc (void)
27095 {
27096 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27097 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27098 if (TARGET_THUMB)
27099 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27100 sizeof (thumb_core_reg_alloc_order));
27101 }
27102
27103 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27104
27105 bool
27106 arm_frame_pointer_required (void)
27107 {
27108 return (cfun->has_nonlocal_label
27109 || SUBTARGET_FRAME_POINTER_REQUIRED
27110 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27111 }
27112
27113 /* Only thumb1 can't support conditional execution, so return true if
27114 the target is not thumb1. */
27115 static bool
27116 arm_have_conditional_execution (void)
27117 {
27118 return !TARGET_THUMB1;
27119 }
27120
27121 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27122 static HOST_WIDE_INT
27123 arm_vector_alignment (const_tree type)
27124 {
27125 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27126
27127 if (TARGET_AAPCS_BASED)
27128 align = MIN (align, 64);
27129
27130 return align;
27131 }
27132
27133 static unsigned int
27134 arm_autovectorize_vector_sizes (void)
27135 {
27136 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27137 }
27138
27139 static bool
27140 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27141 {
27142 /* Vectors which aren't in packed structures will not be less aligned than
27143 the natural alignment of their element type, so this is safe. */
27144 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27145 return !is_packed;
27146
27147 return default_builtin_vector_alignment_reachable (type, is_packed);
27148 }
27149
27150 static bool
27151 arm_builtin_support_vector_misalignment (machine_mode mode,
27152 const_tree type, int misalignment,
27153 bool is_packed)
27154 {
27155 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27156 {
27157 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27158
27159 if (is_packed)
27160 return align == 1;
27161
27162 /* If the misalignment is unknown, we should be able to handle the access
27163 so long as it is not to a member of a packed data structure. */
27164 if (misalignment == -1)
27165 return true;
27166
27167 /* Return true if the misalignment is a multiple of the natural alignment
27168 of the vector's element type. This is probably always going to be
27169 true in practice, since we've already established that this isn't a
27170 packed access. */
27171 return ((misalignment % align) == 0);
27172 }
27173
27174 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27175 is_packed);
27176 }
27177
27178 static void
27179 arm_conditional_register_usage (void)
27180 {
27181 int regno;
27182
27183 if (TARGET_THUMB1 && optimize_size)
27184 {
27185 /* When optimizing for size on Thumb-1, it's better not
27186 to use the HI regs, because of the overhead of
27187 stacking them. */
27188 for (regno = FIRST_HI_REGNUM;
27189 regno <= LAST_HI_REGNUM; ++regno)
27190 fixed_regs[regno] = call_used_regs[regno] = 1;
27191 }
27192
27193 /* The link register can be clobbered by any branch insn,
27194 but we have no way to track that at present, so mark
27195 it as unavailable. */
27196 if (TARGET_THUMB1)
27197 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27198
27199 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27200 {
27201 /* VFPv3 registers are disabled when earlier VFP
27202 versions are selected due to the definition of
27203 LAST_VFP_REGNUM. */
27204 for (regno = FIRST_VFP_REGNUM;
27205 regno <= LAST_VFP_REGNUM; ++ regno)
27206 {
27207 fixed_regs[regno] = 0;
27208 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27209 || regno >= FIRST_VFP_REGNUM + 32;
27210 }
27211 }
27212
27213 if (TARGET_REALLY_IWMMXT)
27214 {
27215 regno = FIRST_IWMMXT_GR_REGNUM;
27216 /* The 2002/10/09 revision of the XScale ABI has wCG0
27217 and wCG1 as call-preserved registers. The 2002/11/21
27218 revision changed this so that all wCG registers are
27219 scratch registers. */
27220 for (regno = FIRST_IWMMXT_GR_REGNUM;
27221 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27222 fixed_regs[regno] = 0;
27223 /* The XScale ABI has wR0 - wR9 as scratch registers,
27224 the rest as call-preserved registers. */
27225 for (regno = FIRST_IWMMXT_REGNUM;
27226 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27227 {
27228 fixed_regs[regno] = 0;
27229 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27230 }
27231 }
27232
27233 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27234 {
27235 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27236 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27237 }
27238 else if (TARGET_APCS_STACK)
27239 {
27240 fixed_regs[10] = 1;
27241 call_used_regs[10] = 1;
27242 }
27243 /* -mcaller-super-interworking reserves r11 for calls to
27244 _interwork_r11_call_via_rN(). Making the register global
27245 is an easy way of ensuring that it remains valid for all
27246 calls. */
27247 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27248 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27249 {
27250 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27251 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27252 if (TARGET_CALLER_INTERWORKING)
27253 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27254 }
27255 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27256 }
27257
27258 static reg_class_t
27259 arm_preferred_rename_class (reg_class_t rclass)
27260 {
27261 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27262 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27263 and code size can be reduced. */
27264 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27265 return LO_REGS;
27266 else
27267 return NO_REGS;
27268 }
27269
27270 /* Compute the atrribute "length" of insn "*push_multi".
27271 So this function MUST be kept in sync with that insn pattern. */
27272 int
27273 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27274 {
27275 int i, regno, hi_reg;
27276 int num_saves = XVECLEN (parallel_op, 0);
27277
27278 /* ARM mode. */
27279 if (TARGET_ARM)
27280 return 4;
27281 /* Thumb1 mode. */
27282 if (TARGET_THUMB1)
27283 return 2;
27284
27285 /* Thumb2 mode. */
27286 regno = REGNO (first_op);
27287 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27288 for (i = 1; i < num_saves && !hi_reg; i++)
27289 {
27290 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27291 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27292 }
27293
27294 if (!hi_reg)
27295 return 2;
27296 return 4;
27297 }
27298
27299 /* Compute the number of instructions emitted by output_move_double. */
27300 int
27301 arm_count_output_move_double_insns (rtx *operands)
27302 {
27303 int count;
27304 rtx ops[2];
27305 /* output_move_double may modify the operands array, so call it
27306 here on a copy of the array. */
27307 ops[0] = operands[0];
27308 ops[1] = operands[1];
27309 output_move_double (ops, false, &count);
27310 return count;
27311 }
27312
27313 int
27314 vfp3_const_double_for_fract_bits (rtx operand)
27315 {
27316 REAL_VALUE_TYPE r0;
27317
27318 if (!CONST_DOUBLE_P (operand))
27319 return 0;
27320
27321 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27322 if (exact_real_inverse (DFmode, &r0))
27323 {
27324 if (exact_real_truncate (DFmode, &r0))
27325 {
27326 HOST_WIDE_INT value = real_to_integer (&r0);
27327 value = value & 0xffffffff;
27328 if ((value != 0) && ( (value & (value - 1)) == 0))
27329 return int_log2 (value);
27330 }
27331 }
27332 return 0;
27333 }
27334
27335 int
27336 vfp3_const_double_for_bits (rtx operand)
27337 {
27338 REAL_VALUE_TYPE r0;
27339
27340 if (!CONST_DOUBLE_P (operand))
27341 return 0;
27342
27343 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27344 if (exact_real_truncate (DFmode, &r0))
27345 {
27346 HOST_WIDE_INT value = real_to_integer (&r0);
27347 value = value & 0xffffffff;
27348 if ((value != 0) && ( (value & (value - 1)) == 0))
27349 return int_log2 (value);
27350 }
27351
27352 return 0;
27353 }
27354 \f
27355 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27356
27357 static void
27358 arm_pre_atomic_barrier (enum memmodel model)
27359 {
27360 if (need_atomic_barrier_p (model, true))
27361 emit_insn (gen_memory_barrier ());
27362 }
27363
27364 static void
27365 arm_post_atomic_barrier (enum memmodel model)
27366 {
27367 if (need_atomic_barrier_p (model, false))
27368 emit_insn (gen_memory_barrier ());
27369 }
27370
27371 /* Emit the load-exclusive and store-exclusive instructions.
27372 Use acquire and release versions if necessary. */
27373
27374 static void
27375 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27376 {
27377 rtx (*gen) (rtx, rtx);
27378
27379 if (acq)
27380 {
27381 switch (mode)
27382 {
27383 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27384 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27385 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27386 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27387 default:
27388 gcc_unreachable ();
27389 }
27390 }
27391 else
27392 {
27393 switch (mode)
27394 {
27395 case QImode: gen = gen_arm_load_exclusiveqi; break;
27396 case HImode: gen = gen_arm_load_exclusivehi; break;
27397 case SImode: gen = gen_arm_load_exclusivesi; break;
27398 case DImode: gen = gen_arm_load_exclusivedi; break;
27399 default:
27400 gcc_unreachable ();
27401 }
27402 }
27403
27404 emit_insn (gen (rval, mem));
27405 }
27406
27407 static void
27408 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27409 rtx mem, bool rel)
27410 {
27411 rtx (*gen) (rtx, rtx, rtx);
27412
27413 if (rel)
27414 {
27415 switch (mode)
27416 {
27417 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27418 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27419 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27420 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27421 default:
27422 gcc_unreachable ();
27423 }
27424 }
27425 else
27426 {
27427 switch (mode)
27428 {
27429 case QImode: gen = gen_arm_store_exclusiveqi; break;
27430 case HImode: gen = gen_arm_store_exclusivehi; break;
27431 case SImode: gen = gen_arm_store_exclusivesi; break;
27432 case DImode: gen = gen_arm_store_exclusivedi; break;
27433 default:
27434 gcc_unreachable ();
27435 }
27436 }
27437
27438 emit_insn (gen (bval, rval, mem));
27439 }
27440
27441 /* Mark the previous jump instruction as unlikely. */
27442
27443 static void
27444 emit_unlikely_jump (rtx insn)
27445 {
27446 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27447
27448 insn = emit_jump_insn (insn);
27449 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27450 }
27451
27452 /* Expand a compare and swap pattern. */
27453
27454 void
27455 arm_expand_compare_and_swap (rtx operands[])
27456 {
27457 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27458 machine_mode mode;
27459 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27460
27461 bval = operands[0];
27462 rval = operands[1];
27463 mem = operands[2];
27464 oldval = operands[3];
27465 newval = operands[4];
27466 is_weak = operands[5];
27467 mod_s = operands[6];
27468 mod_f = operands[7];
27469 mode = GET_MODE (mem);
27470
27471 /* Normally the succ memory model must be stronger than fail, but in the
27472 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27473 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27474
27475 if (TARGET_HAVE_LDACQ
27476 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
27477 && INTVAL (mod_s) == MEMMODEL_RELEASE)
27478 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27479
27480 switch (mode)
27481 {
27482 case QImode:
27483 case HImode:
27484 /* For narrow modes, we're going to perform the comparison in SImode,
27485 so do the zero-extension now. */
27486 rval = gen_reg_rtx (SImode);
27487 oldval = convert_modes (SImode, mode, oldval, true);
27488 /* FALLTHRU */
27489
27490 case SImode:
27491 /* Force the value into a register if needed. We waited until after
27492 the zero-extension above to do this properly. */
27493 if (!arm_add_operand (oldval, SImode))
27494 oldval = force_reg (SImode, oldval);
27495 break;
27496
27497 case DImode:
27498 if (!cmpdi_operand (oldval, mode))
27499 oldval = force_reg (mode, oldval);
27500 break;
27501
27502 default:
27503 gcc_unreachable ();
27504 }
27505
27506 switch (mode)
27507 {
27508 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27509 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27510 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27511 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27512 default:
27513 gcc_unreachable ();
27514 }
27515
27516 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27517
27518 if (mode == QImode || mode == HImode)
27519 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27520
27521 /* In all cases, we arrange for success to be signaled by Z set.
27522 This arrangement allows for the boolean result to be used directly
27523 in a subsequent branch, post optimization. */
27524 x = gen_rtx_REG (CCmode, CC_REGNUM);
27525 x = gen_rtx_EQ (SImode, x, const0_rtx);
27526 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
27527 }
27528
27529 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27530 another memory store between the load-exclusive and store-exclusive can
27531 reset the monitor from Exclusive to Open state. This means we must wait
27532 until after reload to split the pattern, lest we get a register spill in
27533 the middle of the atomic sequence. */
27534
27535 void
27536 arm_split_compare_and_swap (rtx operands[])
27537 {
27538 rtx rval, mem, oldval, newval, scratch;
27539 machine_mode mode;
27540 enum memmodel mod_s, mod_f;
27541 bool is_weak;
27542 rtx_code_label *label1, *label2;
27543 rtx x, cond;
27544
27545 rval = operands[0];
27546 mem = operands[1];
27547 oldval = operands[2];
27548 newval = operands[3];
27549 is_weak = (operands[4] != const0_rtx);
27550 mod_s = (enum memmodel) INTVAL (operands[5]);
27551 mod_f = (enum memmodel) INTVAL (operands[6]);
27552 scratch = operands[7];
27553 mode = GET_MODE (mem);
27554
27555 bool use_acquire = TARGET_HAVE_LDACQ
27556 && !(mod_s == MEMMODEL_RELAXED
27557 || mod_s == MEMMODEL_CONSUME
27558 || mod_s == MEMMODEL_RELEASE);
27559
27560 bool use_release = TARGET_HAVE_LDACQ
27561 && !(mod_s == MEMMODEL_RELAXED
27562 || mod_s == MEMMODEL_CONSUME
27563 || mod_s == MEMMODEL_ACQUIRE);
27564
27565 /* Checks whether a barrier is needed and emits one accordingly. */
27566 if (!(use_acquire || use_release))
27567 arm_pre_atomic_barrier (mod_s);
27568
27569 label1 = NULL;
27570 if (!is_weak)
27571 {
27572 label1 = gen_label_rtx ();
27573 emit_label (label1);
27574 }
27575 label2 = gen_label_rtx ();
27576
27577 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27578
27579 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27580 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27581 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27582 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27583 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27584
27585 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27586
27587 /* Weak or strong, we want EQ to be true for success, so that we
27588 match the flags that we got from the compare above. */
27589 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27590 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27591 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
27592
27593 if (!is_weak)
27594 {
27595 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27596 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27597 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27598 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27599 }
27600
27601 if (mod_f != MEMMODEL_RELAXED)
27602 emit_label (label2);
27603
27604 /* Checks whether a barrier is needed and emits one accordingly. */
27605 if (!(use_acquire || use_release))
27606 arm_post_atomic_barrier (mod_s);
27607
27608 if (mod_f == MEMMODEL_RELAXED)
27609 emit_label (label2);
27610 }
27611
27612 void
27613 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27614 rtx value, rtx model_rtx, rtx cond)
27615 {
27616 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
27617 machine_mode mode = GET_MODE (mem);
27618 machine_mode wmode = (mode == DImode ? DImode : SImode);
27619 rtx_code_label *label;
27620 rtx x;
27621
27622 bool use_acquire = TARGET_HAVE_LDACQ
27623 && !(model == MEMMODEL_RELAXED
27624 || model == MEMMODEL_CONSUME
27625 || model == MEMMODEL_RELEASE);
27626
27627 bool use_release = TARGET_HAVE_LDACQ
27628 && !(model == MEMMODEL_RELAXED
27629 || model == MEMMODEL_CONSUME
27630 || model == MEMMODEL_ACQUIRE);
27631
27632 /* Checks whether a barrier is needed and emits one accordingly. */
27633 if (!(use_acquire || use_release))
27634 arm_pre_atomic_barrier (model);
27635
27636 label = gen_label_rtx ();
27637 emit_label (label);
27638
27639 if (new_out)
27640 new_out = gen_lowpart (wmode, new_out);
27641 if (old_out)
27642 old_out = gen_lowpart (wmode, old_out);
27643 else
27644 old_out = new_out;
27645 value = simplify_gen_subreg (wmode, value, mode, 0);
27646
27647 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27648
27649 switch (code)
27650 {
27651 case SET:
27652 new_out = value;
27653 break;
27654
27655 case NOT:
27656 x = gen_rtx_AND (wmode, old_out, value);
27657 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27658 x = gen_rtx_NOT (wmode, new_out);
27659 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27660 break;
27661
27662 case MINUS:
27663 if (CONST_INT_P (value))
27664 {
27665 value = GEN_INT (-INTVAL (value));
27666 code = PLUS;
27667 }
27668 /* FALLTHRU */
27669
27670 case PLUS:
27671 if (mode == DImode)
27672 {
27673 /* DImode plus/minus need to clobber flags. */
27674 /* The adddi3 and subdi3 patterns are incorrectly written so that
27675 they require matching operands, even when we could easily support
27676 three operands. Thankfully, this can be fixed up post-splitting,
27677 as the individual add+adc patterns do accept three operands and
27678 post-reload cprop can make these moves go away. */
27679 emit_move_insn (new_out, old_out);
27680 if (code == PLUS)
27681 x = gen_adddi3 (new_out, new_out, value);
27682 else
27683 x = gen_subdi3 (new_out, new_out, value);
27684 emit_insn (x);
27685 break;
27686 }
27687 /* FALLTHRU */
27688
27689 default:
27690 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27691 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27692 break;
27693 }
27694
27695 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27696 use_release);
27697
27698 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27699 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27700
27701 /* Checks whether a barrier is needed and emits one accordingly. */
27702 if (!(use_acquire || use_release))
27703 arm_post_atomic_barrier (model);
27704 }
27705 \f
27706 #define MAX_VECT_LEN 16
27707
27708 struct expand_vec_perm_d
27709 {
27710 rtx target, op0, op1;
27711 unsigned char perm[MAX_VECT_LEN];
27712 machine_mode vmode;
27713 unsigned char nelt;
27714 bool one_vector_p;
27715 bool testing_p;
27716 };
27717
27718 /* Generate a variable permutation. */
27719
27720 static void
27721 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27722 {
27723 machine_mode vmode = GET_MODE (target);
27724 bool one_vector_p = rtx_equal_p (op0, op1);
27725
27726 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27727 gcc_checking_assert (GET_MODE (op0) == vmode);
27728 gcc_checking_assert (GET_MODE (op1) == vmode);
27729 gcc_checking_assert (GET_MODE (sel) == vmode);
27730 gcc_checking_assert (TARGET_NEON);
27731
27732 if (one_vector_p)
27733 {
27734 if (vmode == V8QImode)
27735 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27736 else
27737 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27738 }
27739 else
27740 {
27741 rtx pair;
27742
27743 if (vmode == V8QImode)
27744 {
27745 pair = gen_reg_rtx (V16QImode);
27746 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27747 pair = gen_lowpart (TImode, pair);
27748 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27749 }
27750 else
27751 {
27752 pair = gen_reg_rtx (OImode);
27753 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27754 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27755 }
27756 }
27757 }
27758
27759 void
27760 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27761 {
27762 machine_mode vmode = GET_MODE (target);
27763 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27764 bool one_vector_p = rtx_equal_p (op0, op1);
27765 rtx rmask[MAX_VECT_LEN], mask;
27766
27767 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27768 numbering of elements for big-endian, we must reverse the order. */
27769 gcc_checking_assert (!BYTES_BIG_ENDIAN);
27770
27771 /* The VTBL instruction does not use a modulo index, so we must take care
27772 of that ourselves. */
27773 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27774 for (i = 0; i < nelt; ++i)
27775 rmask[i] = mask;
27776 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27777 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27778
27779 arm_expand_vec_perm_1 (target, op0, op1, sel);
27780 }
27781
27782 /* Generate or test for an insn that supports a constant permutation. */
27783
27784 /* Recognize patterns for the VUZP insns. */
27785
27786 static bool
27787 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27788 {
27789 unsigned int i, odd, mask, nelt = d->nelt;
27790 rtx out0, out1, in0, in1, x;
27791 rtx (*gen)(rtx, rtx, rtx, rtx);
27792
27793 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27794 return false;
27795
27796 /* Note that these are little-endian tests. Adjust for big-endian later. */
27797 if (d->perm[0] == 0)
27798 odd = 0;
27799 else if (d->perm[0] == 1)
27800 odd = 1;
27801 else
27802 return false;
27803 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27804
27805 for (i = 0; i < nelt; i++)
27806 {
27807 unsigned elt = (i * 2 + odd) & mask;
27808 if (d->perm[i] != elt)
27809 return false;
27810 }
27811
27812 /* Success! */
27813 if (d->testing_p)
27814 return true;
27815
27816 switch (d->vmode)
27817 {
27818 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27819 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
27820 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
27821 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
27822 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
27823 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
27824 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
27825 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
27826 default:
27827 gcc_unreachable ();
27828 }
27829
27830 in0 = d->op0;
27831 in1 = d->op1;
27832 if (BYTES_BIG_ENDIAN)
27833 {
27834 x = in0, in0 = in1, in1 = x;
27835 odd = !odd;
27836 }
27837
27838 out0 = d->target;
27839 out1 = gen_reg_rtx (d->vmode);
27840 if (odd)
27841 x = out0, out0 = out1, out1 = x;
27842
27843 emit_insn (gen (out0, in0, in1, out1));
27844 return true;
27845 }
27846
27847 /* Recognize patterns for the VZIP insns. */
27848
27849 static bool
27850 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27851 {
27852 unsigned int i, high, mask, nelt = d->nelt;
27853 rtx out0, out1, in0, in1, x;
27854 rtx (*gen)(rtx, rtx, rtx, rtx);
27855
27856 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27857 return false;
27858
27859 /* Note that these are little-endian tests. Adjust for big-endian later. */
27860 high = nelt / 2;
27861 if (d->perm[0] == high)
27862 ;
27863 else if (d->perm[0] == 0)
27864 high = 0;
27865 else
27866 return false;
27867 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27868
27869 for (i = 0; i < nelt / 2; i++)
27870 {
27871 unsigned elt = (i + high) & mask;
27872 if (d->perm[i * 2] != elt)
27873 return false;
27874 elt = (elt + nelt) & mask;
27875 if (d->perm[i * 2 + 1] != elt)
27876 return false;
27877 }
27878
27879 /* Success! */
27880 if (d->testing_p)
27881 return true;
27882
27883 switch (d->vmode)
27884 {
27885 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27886 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
27887 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
27888 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
27889 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
27890 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
27891 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
27892 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
27893 default:
27894 gcc_unreachable ();
27895 }
27896
27897 in0 = d->op0;
27898 in1 = d->op1;
27899 if (BYTES_BIG_ENDIAN)
27900 {
27901 x = in0, in0 = in1, in1 = x;
27902 high = !high;
27903 }
27904
27905 out0 = d->target;
27906 out1 = gen_reg_rtx (d->vmode);
27907 if (high)
27908 x = out0, out0 = out1, out1 = x;
27909
27910 emit_insn (gen (out0, in0, in1, out1));
27911 return true;
27912 }
27913
27914 /* Recognize patterns for the VREV insns. */
27915
27916 static bool
27917 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27918 {
27919 unsigned int i, j, diff, nelt = d->nelt;
27920 rtx (*gen)(rtx, rtx);
27921
27922 if (!d->one_vector_p)
27923 return false;
27924
27925 diff = d->perm[0];
27926 switch (diff)
27927 {
27928 case 7:
27929 switch (d->vmode)
27930 {
27931 case V16QImode: gen = gen_neon_vrev64v16qi; break;
27932 case V8QImode: gen = gen_neon_vrev64v8qi; break;
27933 default:
27934 return false;
27935 }
27936 break;
27937 case 3:
27938 switch (d->vmode)
27939 {
27940 case V16QImode: gen = gen_neon_vrev32v16qi; break;
27941 case V8QImode: gen = gen_neon_vrev32v8qi; break;
27942 case V8HImode: gen = gen_neon_vrev64v8hi; break;
27943 case V4HImode: gen = gen_neon_vrev64v4hi; break;
27944 default:
27945 return false;
27946 }
27947 break;
27948 case 1:
27949 switch (d->vmode)
27950 {
27951 case V16QImode: gen = gen_neon_vrev16v16qi; break;
27952 case V8QImode: gen = gen_neon_vrev16v8qi; break;
27953 case V8HImode: gen = gen_neon_vrev32v8hi; break;
27954 case V4HImode: gen = gen_neon_vrev32v4hi; break;
27955 case V4SImode: gen = gen_neon_vrev64v4si; break;
27956 case V2SImode: gen = gen_neon_vrev64v2si; break;
27957 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
27958 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
27959 default:
27960 return false;
27961 }
27962 break;
27963 default:
27964 return false;
27965 }
27966
27967 for (i = 0; i < nelt ; i += diff + 1)
27968 for (j = 0; j <= diff; j += 1)
27969 {
27970 /* This is guaranteed to be true as the value of diff
27971 is 7, 3, 1 and we should have enough elements in the
27972 queue to generate this. Getting a vector mask with a
27973 value of diff other than these values implies that
27974 something is wrong by the time we get here. */
27975 gcc_assert (i + j < nelt);
27976 if (d->perm[i + j] != i + diff - j)
27977 return false;
27978 }
27979
27980 /* Success! */
27981 if (d->testing_p)
27982 return true;
27983
27984 emit_insn (gen (d->target, d->op0));
27985 return true;
27986 }
27987
27988 /* Recognize patterns for the VTRN insns. */
27989
27990 static bool
27991 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
27992 {
27993 unsigned int i, odd, mask, nelt = d->nelt;
27994 rtx out0, out1, in0, in1, x;
27995 rtx (*gen)(rtx, rtx, rtx, rtx);
27996
27997 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27998 return false;
27999
28000 /* Note that these are little-endian tests. Adjust for big-endian later. */
28001 if (d->perm[0] == 0)
28002 odd = 0;
28003 else if (d->perm[0] == 1)
28004 odd = 1;
28005 else
28006 return false;
28007 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28008
28009 for (i = 0; i < nelt; i += 2)
28010 {
28011 if (d->perm[i] != i + odd)
28012 return false;
28013 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28014 return false;
28015 }
28016
28017 /* Success! */
28018 if (d->testing_p)
28019 return true;
28020
28021 switch (d->vmode)
28022 {
28023 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28024 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28025 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28026 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28027 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28028 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28029 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28030 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28031 default:
28032 gcc_unreachable ();
28033 }
28034
28035 in0 = d->op0;
28036 in1 = d->op1;
28037 if (BYTES_BIG_ENDIAN)
28038 {
28039 x = in0, in0 = in1, in1 = x;
28040 odd = !odd;
28041 }
28042
28043 out0 = d->target;
28044 out1 = gen_reg_rtx (d->vmode);
28045 if (odd)
28046 x = out0, out0 = out1, out1 = x;
28047
28048 emit_insn (gen (out0, in0, in1, out1));
28049 return true;
28050 }
28051
28052 /* Recognize patterns for the VEXT insns. */
28053
28054 static bool
28055 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28056 {
28057 unsigned int i, nelt = d->nelt;
28058 rtx (*gen) (rtx, rtx, rtx, rtx);
28059 rtx offset;
28060
28061 unsigned int location;
28062
28063 unsigned int next = d->perm[0] + 1;
28064
28065 /* TODO: Handle GCC's numbering of elements for big-endian. */
28066 if (BYTES_BIG_ENDIAN)
28067 return false;
28068
28069 /* Check if the extracted indexes are increasing by one. */
28070 for (i = 1; i < nelt; next++, i++)
28071 {
28072 /* If we hit the most significant element of the 2nd vector in
28073 the previous iteration, no need to test further. */
28074 if (next == 2 * nelt)
28075 return false;
28076
28077 /* If we are operating on only one vector: it could be a
28078 rotation. If there are only two elements of size < 64, let
28079 arm_evpc_neon_vrev catch it. */
28080 if (d->one_vector_p && (next == nelt))
28081 {
28082 if ((nelt == 2) && (d->vmode != V2DImode))
28083 return false;
28084 else
28085 next = 0;
28086 }
28087
28088 if (d->perm[i] != next)
28089 return false;
28090 }
28091
28092 location = d->perm[0];
28093
28094 switch (d->vmode)
28095 {
28096 case V16QImode: gen = gen_neon_vextv16qi; break;
28097 case V8QImode: gen = gen_neon_vextv8qi; break;
28098 case V4HImode: gen = gen_neon_vextv4hi; break;
28099 case V8HImode: gen = gen_neon_vextv8hi; break;
28100 case V2SImode: gen = gen_neon_vextv2si; break;
28101 case V4SImode: gen = gen_neon_vextv4si; break;
28102 case V2SFmode: gen = gen_neon_vextv2sf; break;
28103 case V4SFmode: gen = gen_neon_vextv4sf; break;
28104 case V2DImode: gen = gen_neon_vextv2di; break;
28105 default:
28106 return false;
28107 }
28108
28109 /* Success! */
28110 if (d->testing_p)
28111 return true;
28112
28113 offset = GEN_INT (location);
28114 emit_insn (gen (d->target, d->op0, d->op1, offset));
28115 return true;
28116 }
28117
28118 /* The NEON VTBL instruction is a fully variable permuation that's even
28119 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28120 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28121 can do slightly better by expanding this as a constant where we don't
28122 have to apply a mask. */
28123
28124 static bool
28125 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28126 {
28127 rtx rperm[MAX_VECT_LEN], sel;
28128 machine_mode vmode = d->vmode;
28129 unsigned int i, nelt = d->nelt;
28130
28131 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28132 numbering of elements for big-endian, we must reverse the order. */
28133 if (BYTES_BIG_ENDIAN)
28134 return false;
28135
28136 if (d->testing_p)
28137 return true;
28138
28139 /* Generic code will try constant permutation twice. Once with the
28140 original mode and again with the elements lowered to QImode.
28141 So wait and don't do the selector expansion ourselves. */
28142 if (vmode != V8QImode && vmode != V16QImode)
28143 return false;
28144
28145 for (i = 0; i < nelt; ++i)
28146 rperm[i] = GEN_INT (d->perm[i]);
28147 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28148 sel = force_reg (vmode, sel);
28149
28150 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28151 return true;
28152 }
28153
28154 static bool
28155 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28156 {
28157 /* Check if the input mask matches vext before reordering the
28158 operands. */
28159 if (TARGET_NEON)
28160 if (arm_evpc_neon_vext (d))
28161 return true;
28162
28163 /* The pattern matching functions above are written to look for a small
28164 number to begin the sequence (0, 1, N/2). If we begin with an index
28165 from the second operand, we can swap the operands. */
28166 if (d->perm[0] >= d->nelt)
28167 {
28168 unsigned i, nelt = d->nelt;
28169 rtx x;
28170
28171 for (i = 0; i < nelt; ++i)
28172 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28173
28174 x = d->op0;
28175 d->op0 = d->op1;
28176 d->op1 = x;
28177 }
28178
28179 if (TARGET_NEON)
28180 {
28181 if (arm_evpc_neon_vuzp (d))
28182 return true;
28183 if (arm_evpc_neon_vzip (d))
28184 return true;
28185 if (arm_evpc_neon_vrev (d))
28186 return true;
28187 if (arm_evpc_neon_vtrn (d))
28188 return true;
28189 return arm_evpc_neon_vtbl (d);
28190 }
28191 return false;
28192 }
28193
28194 /* Expand a vec_perm_const pattern. */
28195
28196 bool
28197 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28198 {
28199 struct expand_vec_perm_d d;
28200 int i, nelt, which;
28201
28202 d.target = target;
28203 d.op0 = op0;
28204 d.op1 = op1;
28205
28206 d.vmode = GET_MODE (target);
28207 gcc_assert (VECTOR_MODE_P (d.vmode));
28208 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28209 d.testing_p = false;
28210
28211 for (i = which = 0; i < nelt; ++i)
28212 {
28213 rtx e = XVECEXP (sel, 0, i);
28214 int ei = INTVAL (e) & (2 * nelt - 1);
28215 which |= (ei < nelt ? 1 : 2);
28216 d.perm[i] = ei;
28217 }
28218
28219 switch (which)
28220 {
28221 default:
28222 gcc_unreachable();
28223
28224 case 3:
28225 d.one_vector_p = false;
28226 if (!rtx_equal_p (op0, op1))
28227 break;
28228
28229 /* The elements of PERM do not suggest that only the first operand
28230 is used, but both operands are identical. Allow easier matching
28231 of the permutation by folding the permutation into the single
28232 input vector. */
28233 /* FALLTHRU */
28234 case 2:
28235 for (i = 0; i < nelt; ++i)
28236 d.perm[i] &= nelt - 1;
28237 d.op0 = op1;
28238 d.one_vector_p = true;
28239 break;
28240
28241 case 1:
28242 d.op1 = op0;
28243 d.one_vector_p = true;
28244 break;
28245 }
28246
28247 return arm_expand_vec_perm_const_1 (&d);
28248 }
28249
28250 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28251
28252 static bool
28253 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28254 const unsigned char *sel)
28255 {
28256 struct expand_vec_perm_d d;
28257 unsigned int i, nelt, which;
28258 bool ret;
28259
28260 d.vmode = vmode;
28261 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28262 d.testing_p = true;
28263 memcpy (d.perm, sel, nelt);
28264
28265 /* Categorize the set of elements in the selector. */
28266 for (i = which = 0; i < nelt; ++i)
28267 {
28268 unsigned char e = d.perm[i];
28269 gcc_assert (e < 2 * nelt);
28270 which |= (e < nelt ? 1 : 2);
28271 }
28272
28273 /* For all elements from second vector, fold the elements to first. */
28274 if (which == 2)
28275 for (i = 0; i < nelt; ++i)
28276 d.perm[i] -= nelt;
28277
28278 /* Check whether the mask can be applied to the vector type. */
28279 d.one_vector_p = (which != 3);
28280
28281 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28282 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28283 if (!d.one_vector_p)
28284 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28285
28286 start_sequence ();
28287 ret = arm_expand_vec_perm_const_1 (&d);
28288 end_sequence ();
28289
28290 return ret;
28291 }
28292
28293 bool
28294 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28295 {
28296 /* If we are soft float and we do not have ldrd
28297 then all auto increment forms are ok. */
28298 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28299 return true;
28300
28301 switch (code)
28302 {
28303 /* Post increment and Pre Decrement are supported for all
28304 instruction forms except for vector forms. */
28305 case ARM_POST_INC:
28306 case ARM_PRE_DEC:
28307 if (VECTOR_MODE_P (mode))
28308 {
28309 if (code != ARM_PRE_DEC)
28310 return true;
28311 else
28312 return false;
28313 }
28314
28315 return true;
28316
28317 case ARM_POST_DEC:
28318 case ARM_PRE_INC:
28319 /* Without LDRD and mode size greater than
28320 word size, there is no point in auto-incrementing
28321 because ldm and stm will not have these forms. */
28322 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28323 return false;
28324
28325 /* Vector and floating point modes do not support
28326 these auto increment forms. */
28327 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28328 return false;
28329
28330 return true;
28331
28332 default:
28333 return false;
28334
28335 }
28336
28337 return false;
28338 }
28339
28340 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28341 on ARM, since we know that shifts by negative amounts are no-ops.
28342 Additionally, the default expansion code is not available or suitable
28343 for post-reload insn splits (this can occur when the register allocator
28344 chooses not to do a shift in NEON).
28345
28346 This function is used in both initial expand and post-reload splits, and
28347 handles all kinds of 64-bit shifts.
28348
28349 Input requirements:
28350 - It is safe for the input and output to be the same register, but
28351 early-clobber rules apply for the shift amount and scratch registers.
28352 - Shift by register requires both scratch registers. In all other cases
28353 the scratch registers may be NULL.
28354 - Ashiftrt by a register also clobbers the CC register. */
28355 void
28356 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28357 rtx amount, rtx scratch1, rtx scratch2)
28358 {
28359 rtx out_high = gen_highpart (SImode, out);
28360 rtx out_low = gen_lowpart (SImode, out);
28361 rtx in_high = gen_highpart (SImode, in);
28362 rtx in_low = gen_lowpart (SImode, in);
28363
28364 /* Terminology:
28365 in = the register pair containing the input value.
28366 out = the destination register pair.
28367 up = the high- or low-part of each pair.
28368 down = the opposite part to "up".
28369 In a shift, we can consider bits to shift from "up"-stream to
28370 "down"-stream, so in a left-shift "up" is the low-part and "down"
28371 is the high-part of each register pair. */
28372
28373 rtx out_up = code == ASHIFT ? out_low : out_high;
28374 rtx out_down = code == ASHIFT ? out_high : out_low;
28375 rtx in_up = code == ASHIFT ? in_low : in_high;
28376 rtx in_down = code == ASHIFT ? in_high : in_low;
28377
28378 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28379 gcc_assert (out
28380 && (REG_P (out) || GET_CODE (out) == SUBREG)
28381 && GET_MODE (out) == DImode);
28382 gcc_assert (in
28383 && (REG_P (in) || GET_CODE (in) == SUBREG)
28384 && GET_MODE (in) == DImode);
28385 gcc_assert (amount
28386 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28387 && GET_MODE (amount) == SImode)
28388 || CONST_INT_P (amount)));
28389 gcc_assert (scratch1 == NULL
28390 || (GET_CODE (scratch1) == SCRATCH)
28391 || (GET_MODE (scratch1) == SImode
28392 && REG_P (scratch1)));
28393 gcc_assert (scratch2 == NULL
28394 || (GET_CODE (scratch2) == SCRATCH)
28395 || (GET_MODE (scratch2) == SImode
28396 && REG_P (scratch2)));
28397 gcc_assert (!REG_P (out) || !REG_P (amount)
28398 || !HARD_REGISTER_P (out)
28399 || (REGNO (out) != REGNO (amount)
28400 && REGNO (out) + 1 != REGNO (amount)));
28401
28402 /* Macros to make following code more readable. */
28403 #define SUB_32(DEST,SRC) \
28404 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28405 #define RSB_32(DEST,SRC) \
28406 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28407 #define SUB_S_32(DEST,SRC) \
28408 gen_addsi3_compare0 ((DEST), (SRC), \
28409 GEN_INT (-32))
28410 #define SET(DEST,SRC) \
28411 gen_rtx_SET (SImode, (DEST), (SRC))
28412 #define SHIFT(CODE,SRC,AMOUNT) \
28413 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28414 #define LSHIFT(CODE,SRC,AMOUNT) \
28415 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28416 SImode, (SRC), (AMOUNT))
28417 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28418 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28419 SImode, (SRC), (AMOUNT))
28420 #define ORR(A,B) \
28421 gen_rtx_IOR (SImode, (A), (B))
28422 #define BRANCH(COND,LABEL) \
28423 gen_arm_cond_branch ((LABEL), \
28424 gen_rtx_ ## COND (CCmode, cc_reg, \
28425 const0_rtx), \
28426 cc_reg)
28427
28428 /* Shifts by register and shifts by constant are handled separately. */
28429 if (CONST_INT_P (amount))
28430 {
28431 /* We have a shift-by-constant. */
28432
28433 /* First, handle out-of-range shift amounts.
28434 In both cases we try to match the result an ARM instruction in a
28435 shift-by-register would give. This helps reduce execution
28436 differences between optimization levels, but it won't stop other
28437 parts of the compiler doing different things. This is "undefined
28438 behaviour, in any case. */
28439 if (INTVAL (amount) <= 0)
28440 emit_insn (gen_movdi (out, in));
28441 else if (INTVAL (amount) >= 64)
28442 {
28443 if (code == ASHIFTRT)
28444 {
28445 rtx const31_rtx = GEN_INT (31);
28446 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28447 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28448 }
28449 else
28450 emit_insn (gen_movdi (out, const0_rtx));
28451 }
28452
28453 /* Now handle valid shifts. */
28454 else if (INTVAL (amount) < 32)
28455 {
28456 /* Shifts by a constant less than 32. */
28457 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28458
28459 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28460 emit_insn (SET (out_down,
28461 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28462 out_down)));
28463 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28464 }
28465 else
28466 {
28467 /* Shifts by a constant greater than 31. */
28468 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28469
28470 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28471 if (code == ASHIFTRT)
28472 emit_insn (gen_ashrsi3 (out_up, in_up,
28473 GEN_INT (31)));
28474 else
28475 emit_insn (SET (out_up, const0_rtx));
28476 }
28477 }
28478 else
28479 {
28480 /* We have a shift-by-register. */
28481 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28482
28483 /* This alternative requires the scratch registers. */
28484 gcc_assert (scratch1 && REG_P (scratch1));
28485 gcc_assert (scratch2 && REG_P (scratch2));
28486
28487 /* We will need the values "amount-32" and "32-amount" later.
28488 Swapping them around now allows the later code to be more general. */
28489 switch (code)
28490 {
28491 case ASHIFT:
28492 emit_insn (SUB_32 (scratch1, amount));
28493 emit_insn (RSB_32 (scratch2, amount));
28494 break;
28495 case ASHIFTRT:
28496 emit_insn (RSB_32 (scratch1, amount));
28497 /* Also set CC = amount > 32. */
28498 emit_insn (SUB_S_32 (scratch2, amount));
28499 break;
28500 case LSHIFTRT:
28501 emit_insn (RSB_32 (scratch1, amount));
28502 emit_insn (SUB_32 (scratch2, amount));
28503 break;
28504 default:
28505 gcc_unreachable ();
28506 }
28507
28508 /* Emit code like this:
28509
28510 arithmetic-left:
28511 out_down = in_down << amount;
28512 out_down = (in_up << (amount - 32)) | out_down;
28513 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28514 out_up = in_up << amount;
28515
28516 arithmetic-right:
28517 out_down = in_down >> amount;
28518 out_down = (in_up << (32 - amount)) | out_down;
28519 if (amount < 32)
28520 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28521 out_up = in_up << amount;
28522
28523 logical-right:
28524 out_down = in_down >> amount;
28525 out_down = (in_up << (32 - amount)) | out_down;
28526 if (amount < 32)
28527 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28528 out_up = in_up << amount;
28529
28530 The ARM and Thumb2 variants are the same but implemented slightly
28531 differently. If this were only called during expand we could just
28532 use the Thumb2 case and let combine do the right thing, but this
28533 can also be called from post-reload splitters. */
28534
28535 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28536
28537 if (!TARGET_THUMB2)
28538 {
28539 /* Emit code for ARM mode. */
28540 emit_insn (SET (out_down,
28541 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28542 if (code == ASHIFTRT)
28543 {
28544 rtx_code_label *done_label = gen_label_rtx ();
28545 emit_jump_insn (BRANCH (LT, done_label));
28546 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28547 out_down)));
28548 emit_label (done_label);
28549 }
28550 else
28551 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28552 out_down)));
28553 }
28554 else
28555 {
28556 /* Emit code for Thumb2 mode.
28557 Thumb2 can't do shift and or in one insn. */
28558 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28559 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28560
28561 if (code == ASHIFTRT)
28562 {
28563 rtx_code_label *done_label = gen_label_rtx ();
28564 emit_jump_insn (BRANCH (LT, done_label));
28565 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28566 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28567 emit_label (done_label);
28568 }
28569 else
28570 {
28571 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28572 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28573 }
28574 }
28575
28576 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28577 }
28578
28579 #undef SUB_32
28580 #undef RSB_32
28581 #undef SUB_S_32
28582 #undef SET
28583 #undef SHIFT
28584 #undef LSHIFT
28585 #undef REV_LSHIFT
28586 #undef ORR
28587 #undef BRANCH
28588 }
28589
28590
28591 /* Returns true if a valid comparison operation and makes
28592 the operands in a form that is valid. */
28593 bool
28594 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28595 {
28596 enum rtx_code code = GET_CODE (*comparison);
28597 int code_int;
28598 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28599 ? GET_MODE (*op2) : GET_MODE (*op1);
28600
28601 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28602
28603 if (code == UNEQ || code == LTGT)
28604 return false;
28605
28606 code_int = (int)code;
28607 arm_canonicalize_comparison (&code_int, op1, op2, 0);
28608 PUT_CODE (*comparison, (enum rtx_code)code_int);
28609
28610 switch (mode)
28611 {
28612 case SImode:
28613 if (!arm_add_operand (*op1, mode))
28614 *op1 = force_reg (mode, *op1);
28615 if (!arm_add_operand (*op2, mode))
28616 *op2 = force_reg (mode, *op2);
28617 return true;
28618
28619 case DImode:
28620 if (!cmpdi_operand (*op1, mode))
28621 *op1 = force_reg (mode, *op1);
28622 if (!cmpdi_operand (*op2, mode))
28623 *op2 = force_reg (mode, *op2);
28624 return true;
28625
28626 case SFmode:
28627 case DFmode:
28628 if (!arm_float_compare_operand (*op1, mode))
28629 *op1 = force_reg (mode, *op1);
28630 if (!arm_float_compare_operand (*op2, mode))
28631 *op2 = force_reg (mode, *op2);
28632 return true;
28633 default:
28634 break;
28635 }
28636
28637 return false;
28638
28639 }
28640
28641 /* Maximum number of instructions to set block of memory. */
28642 static int
28643 arm_block_set_max_insns (void)
28644 {
28645 if (optimize_function_for_size_p (cfun))
28646 return 4;
28647 else
28648 return current_tune->max_insns_inline_memset;
28649 }
28650
28651 /* Return TRUE if it's profitable to set block of memory for
28652 non-vectorized case. VAL is the value to set the memory
28653 with. LENGTH is the number of bytes to set. ALIGN is the
28654 alignment of the destination memory in bytes. UNALIGNED_P
28655 is TRUE if we can only set the memory with instructions
28656 meeting alignment requirements. USE_STRD_P is TRUE if we
28657 can use strd to set the memory. */
28658 static bool
28659 arm_block_set_non_vect_profit_p (rtx val,
28660 unsigned HOST_WIDE_INT length,
28661 unsigned HOST_WIDE_INT align,
28662 bool unaligned_p, bool use_strd_p)
28663 {
28664 int num = 0;
28665 /* For leftovers in bytes of 0-7, we can set the memory block using
28666 strb/strh/str with minimum instruction number. */
28667 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28668
28669 if (unaligned_p)
28670 {
28671 num = arm_const_inline_cost (SET, val);
28672 num += length / align + length % align;
28673 }
28674 else if (use_strd_p)
28675 {
28676 num = arm_const_double_inline_cost (val);
28677 num += (length >> 3) + leftover[length & 7];
28678 }
28679 else
28680 {
28681 num = arm_const_inline_cost (SET, val);
28682 num += (length >> 2) + leftover[length & 3];
28683 }
28684
28685 /* We may be able to combine last pair STRH/STRB into a single STR
28686 by shifting one byte back. */
28687 if (unaligned_access && length > 3 && (length & 3) == 3)
28688 num--;
28689
28690 return (num <= arm_block_set_max_insns ());
28691 }
28692
28693 /* Return TRUE if it's profitable to set block of memory for
28694 vectorized case. LENGTH is the number of bytes to set.
28695 ALIGN is the alignment of destination memory in bytes.
28696 MODE is the vector mode used to set the memory. */
28697 static bool
28698 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28699 unsigned HOST_WIDE_INT align,
28700 machine_mode mode)
28701 {
28702 int num;
28703 bool unaligned_p = ((align & 3) != 0);
28704 unsigned int nelt = GET_MODE_NUNITS (mode);
28705
28706 /* Instruction loading constant value. */
28707 num = 1;
28708 /* Instructions storing the memory. */
28709 num += (length + nelt - 1) / nelt;
28710 /* Instructions adjusting the address expression. Only need to
28711 adjust address expression if it's 4 bytes aligned and bytes
28712 leftover can only be stored by mis-aligned store instruction. */
28713 if (!unaligned_p && (length & 3) != 0)
28714 num++;
28715
28716 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28717 if (!unaligned_p && mode == V16QImode)
28718 num--;
28719
28720 return (num <= arm_block_set_max_insns ());
28721 }
28722
28723 /* Set a block of memory using vectorization instructions for the
28724 unaligned case. We fill the first LENGTH bytes of the memory
28725 area starting from DSTBASE with byte constant VALUE. ALIGN is
28726 the alignment requirement of memory. Return TRUE if succeeded. */
28727 static bool
28728 arm_block_set_unaligned_vect (rtx dstbase,
28729 unsigned HOST_WIDE_INT length,
28730 unsigned HOST_WIDE_INT value,
28731 unsigned HOST_WIDE_INT align)
28732 {
28733 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28734 rtx dst, mem;
28735 rtx val_elt, val_vec, reg;
28736 rtx rval[MAX_VECT_LEN];
28737 rtx (*gen_func) (rtx, rtx);
28738 machine_mode mode;
28739 unsigned HOST_WIDE_INT v = value;
28740
28741 gcc_assert ((align & 0x3) != 0);
28742 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28743 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28744 if (length >= nelt_v16)
28745 {
28746 mode = V16QImode;
28747 gen_func = gen_movmisalignv16qi;
28748 }
28749 else
28750 {
28751 mode = V8QImode;
28752 gen_func = gen_movmisalignv8qi;
28753 }
28754 nelt_mode = GET_MODE_NUNITS (mode);
28755 gcc_assert (length >= nelt_mode);
28756 /* Skip if it isn't profitable. */
28757 if (!arm_block_set_vect_profit_p (length, align, mode))
28758 return false;
28759
28760 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28761 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28762
28763 v = sext_hwi (v, BITS_PER_WORD);
28764 val_elt = GEN_INT (v);
28765 for (j = 0; j < nelt_mode; j++)
28766 rval[j] = val_elt;
28767
28768 reg = gen_reg_rtx (mode);
28769 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28770 /* Emit instruction loading the constant value. */
28771 emit_move_insn (reg, val_vec);
28772
28773 /* Handle nelt_mode bytes in a vector. */
28774 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28775 {
28776 emit_insn ((*gen_func) (mem, reg));
28777 if (i + 2 * nelt_mode <= length)
28778 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28779 }
28780
28781 /* If there are not less than nelt_v8 bytes leftover, we must be in
28782 V16QI mode. */
28783 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28784
28785 /* Handle (8, 16) bytes leftover. */
28786 if (i + nelt_v8 < length)
28787 {
28788 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28789 /* We are shifting bytes back, set the alignment accordingly. */
28790 if ((length & 1) != 0 && align >= 2)
28791 set_mem_align (mem, BITS_PER_UNIT);
28792
28793 emit_insn (gen_movmisalignv16qi (mem, reg));
28794 }
28795 /* Handle (0, 8] bytes leftover. */
28796 else if (i < length && i + nelt_v8 >= length)
28797 {
28798 if (mode == V16QImode)
28799 {
28800 reg = gen_lowpart (V8QImode, reg);
28801 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
28802 }
28803 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28804 + (nelt_mode - nelt_v8))));
28805 /* We are shifting bytes back, set the alignment accordingly. */
28806 if ((length & 1) != 0 && align >= 2)
28807 set_mem_align (mem, BITS_PER_UNIT);
28808
28809 emit_insn (gen_movmisalignv8qi (mem, reg));
28810 }
28811
28812 return true;
28813 }
28814
28815 /* Set a block of memory using vectorization instructions for the
28816 aligned case. We fill the first LENGTH bytes of the memory area
28817 starting from DSTBASE with byte constant VALUE. ALIGN is the
28818 alignment requirement of memory. Return TRUE if succeeded. */
28819 static bool
28820 arm_block_set_aligned_vect (rtx dstbase,
28821 unsigned HOST_WIDE_INT length,
28822 unsigned HOST_WIDE_INT value,
28823 unsigned HOST_WIDE_INT align)
28824 {
28825 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
28826 rtx dst, addr, mem;
28827 rtx val_elt, val_vec, reg;
28828 rtx rval[MAX_VECT_LEN];
28829 machine_mode mode;
28830 unsigned HOST_WIDE_INT v = value;
28831
28832 gcc_assert ((align & 0x3) == 0);
28833 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28834 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28835 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
28836 mode = V16QImode;
28837 else
28838 mode = V8QImode;
28839
28840 nelt_mode = GET_MODE_NUNITS (mode);
28841 gcc_assert (length >= nelt_mode);
28842 /* Skip if it isn't profitable. */
28843 if (!arm_block_set_vect_profit_p (length, align, mode))
28844 return false;
28845
28846 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28847
28848 v = sext_hwi (v, BITS_PER_WORD);
28849 val_elt = GEN_INT (v);
28850 for (j = 0; j < nelt_mode; j++)
28851 rval[j] = val_elt;
28852
28853 reg = gen_reg_rtx (mode);
28854 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28855 /* Emit instruction loading the constant value. */
28856 emit_move_insn (reg, val_vec);
28857
28858 i = 0;
28859 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28860 if (mode == V16QImode)
28861 {
28862 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28863 emit_insn (gen_movmisalignv16qi (mem, reg));
28864 i += nelt_mode;
28865 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28866 if (i + nelt_v8 < length && i + nelt_v16 > length)
28867 {
28868 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28869 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28870 /* We are shifting bytes back, set the alignment accordingly. */
28871 if ((length & 0x3) == 0)
28872 set_mem_align (mem, BITS_PER_UNIT * 4);
28873 else if ((length & 0x1) == 0)
28874 set_mem_align (mem, BITS_PER_UNIT * 2);
28875 else
28876 set_mem_align (mem, BITS_PER_UNIT);
28877
28878 emit_insn (gen_movmisalignv16qi (mem, reg));
28879 return true;
28880 }
28881 /* Fall through for bytes leftover. */
28882 mode = V8QImode;
28883 nelt_mode = GET_MODE_NUNITS (mode);
28884 reg = gen_lowpart (V8QImode, reg);
28885 }
28886
28887 /* Handle 8 bytes in a vector. */
28888 for (; (i + nelt_mode <= length); i += nelt_mode)
28889 {
28890 addr = plus_constant (Pmode, dst, i);
28891 mem = adjust_automodify_address (dstbase, mode, addr, i);
28892 emit_move_insn (mem, reg);
28893 }
28894
28895 /* Handle single word leftover by shifting 4 bytes back. We can
28896 use aligned access for this case. */
28897 if (i + UNITS_PER_WORD == length)
28898 {
28899 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
28900 mem = adjust_automodify_address (dstbase, mode,
28901 addr, i - UNITS_PER_WORD);
28902 /* We are shifting 4 bytes back, set the alignment accordingly. */
28903 if (align > UNITS_PER_WORD)
28904 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
28905
28906 emit_move_insn (mem, reg);
28907 }
28908 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28909 We have to use unaligned access for this case. */
28910 else if (i < length)
28911 {
28912 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28913 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28914 /* We are shifting bytes back, set the alignment accordingly. */
28915 if ((length & 1) == 0)
28916 set_mem_align (mem, BITS_PER_UNIT * 2);
28917 else
28918 set_mem_align (mem, BITS_PER_UNIT);
28919
28920 emit_insn (gen_movmisalignv8qi (mem, reg));
28921 }
28922
28923 return true;
28924 }
28925
28926 /* Set a block of memory using plain strh/strb instructions, only
28927 using instructions allowed by ALIGN on processor. We fill the
28928 first LENGTH bytes of the memory area starting from DSTBASE
28929 with byte constant VALUE. ALIGN is the alignment requirement
28930 of memory. */
28931 static bool
28932 arm_block_set_unaligned_non_vect (rtx dstbase,
28933 unsigned HOST_WIDE_INT length,
28934 unsigned HOST_WIDE_INT value,
28935 unsigned HOST_WIDE_INT align)
28936 {
28937 unsigned int i;
28938 rtx dst, addr, mem;
28939 rtx val_exp, val_reg, reg;
28940 machine_mode mode;
28941 HOST_WIDE_INT v = value;
28942
28943 gcc_assert (align == 1 || align == 2);
28944
28945 if (align == 2)
28946 v |= (value << BITS_PER_UNIT);
28947
28948 v = sext_hwi (v, BITS_PER_WORD);
28949 val_exp = GEN_INT (v);
28950 /* Skip if it isn't profitable. */
28951 if (!arm_block_set_non_vect_profit_p (val_exp, length,
28952 align, true, false))
28953 return false;
28954
28955 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28956 mode = (align == 2 ? HImode : QImode);
28957 val_reg = force_reg (SImode, val_exp);
28958 reg = gen_lowpart (mode, val_reg);
28959
28960 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
28961 {
28962 addr = plus_constant (Pmode, dst, i);
28963 mem = adjust_automodify_address (dstbase, mode, addr, i);
28964 emit_move_insn (mem, reg);
28965 }
28966
28967 /* Handle single byte leftover. */
28968 if (i + 1 == length)
28969 {
28970 reg = gen_lowpart (QImode, val_reg);
28971 addr = plus_constant (Pmode, dst, i);
28972 mem = adjust_automodify_address (dstbase, QImode, addr, i);
28973 emit_move_insn (mem, reg);
28974 i++;
28975 }
28976
28977 gcc_assert (i == length);
28978 return true;
28979 }
28980
28981 /* Set a block of memory using plain strd/str/strh/strb instructions,
28982 to permit unaligned copies on processors which support unaligned
28983 semantics for those instructions. We fill the first LENGTH bytes
28984 of the memory area starting from DSTBASE with byte constant VALUE.
28985 ALIGN is the alignment requirement of memory. */
28986 static bool
28987 arm_block_set_aligned_non_vect (rtx dstbase,
28988 unsigned HOST_WIDE_INT length,
28989 unsigned HOST_WIDE_INT value,
28990 unsigned HOST_WIDE_INT align)
28991 {
28992 unsigned int i;
28993 rtx dst, addr, mem;
28994 rtx val_exp, val_reg, reg;
28995 unsigned HOST_WIDE_INT v;
28996 bool use_strd_p;
28997
28998 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
28999 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29000
29001 v = (value | (value << 8) | (value << 16) | (value << 24));
29002 if (length < UNITS_PER_WORD)
29003 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29004
29005 if (use_strd_p)
29006 v |= (v << BITS_PER_WORD);
29007 else
29008 v = sext_hwi (v, BITS_PER_WORD);
29009
29010 val_exp = GEN_INT (v);
29011 /* Skip if it isn't profitable. */
29012 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29013 align, false, use_strd_p))
29014 {
29015 if (!use_strd_p)
29016 return false;
29017
29018 /* Try without strd. */
29019 v = (v >> BITS_PER_WORD);
29020 v = sext_hwi (v, BITS_PER_WORD);
29021 val_exp = GEN_INT (v);
29022 use_strd_p = false;
29023 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29024 align, false, use_strd_p))
29025 return false;
29026 }
29027
29028 i = 0;
29029 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29030 /* Handle double words using strd if possible. */
29031 if (use_strd_p)
29032 {
29033 val_reg = force_reg (DImode, val_exp);
29034 reg = val_reg;
29035 for (; (i + 8 <= length); i += 8)
29036 {
29037 addr = plus_constant (Pmode, dst, i);
29038 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29039 emit_move_insn (mem, reg);
29040 }
29041 }
29042 else
29043 val_reg = force_reg (SImode, val_exp);
29044
29045 /* Handle words. */
29046 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29047 for (; (i + 4 <= length); i += 4)
29048 {
29049 addr = plus_constant (Pmode, dst, i);
29050 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29051 if ((align & 3) == 0)
29052 emit_move_insn (mem, reg);
29053 else
29054 emit_insn (gen_unaligned_storesi (mem, reg));
29055 }
29056
29057 /* Merge last pair of STRH and STRB into a STR if possible. */
29058 if (unaligned_access && i > 0 && (i + 3) == length)
29059 {
29060 addr = plus_constant (Pmode, dst, i - 1);
29061 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29062 /* We are shifting one byte back, set the alignment accordingly. */
29063 if ((align & 1) == 0)
29064 set_mem_align (mem, BITS_PER_UNIT);
29065
29066 /* Most likely this is an unaligned access, and we can't tell at
29067 compilation time. */
29068 emit_insn (gen_unaligned_storesi (mem, reg));
29069 return true;
29070 }
29071
29072 /* Handle half word leftover. */
29073 if (i + 2 <= length)
29074 {
29075 reg = gen_lowpart (HImode, val_reg);
29076 addr = plus_constant (Pmode, dst, i);
29077 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29078 if ((align & 1) == 0)
29079 emit_move_insn (mem, reg);
29080 else
29081 emit_insn (gen_unaligned_storehi (mem, reg));
29082
29083 i += 2;
29084 }
29085
29086 /* Handle single byte leftover. */
29087 if (i + 1 == length)
29088 {
29089 reg = gen_lowpart (QImode, val_reg);
29090 addr = plus_constant (Pmode, dst, i);
29091 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29092 emit_move_insn (mem, reg);
29093 }
29094
29095 return true;
29096 }
29097
29098 /* Set a block of memory using vectorization instructions for both
29099 aligned and unaligned cases. We fill the first LENGTH bytes of
29100 the memory area starting from DSTBASE with byte constant VALUE.
29101 ALIGN is the alignment requirement of memory. */
29102 static bool
29103 arm_block_set_vect (rtx dstbase,
29104 unsigned HOST_WIDE_INT length,
29105 unsigned HOST_WIDE_INT value,
29106 unsigned HOST_WIDE_INT align)
29107 {
29108 /* Check whether we need to use unaligned store instruction. */
29109 if (((align & 3) != 0 || (length & 3) != 0)
29110 /* Check whether unaligned store instruction is available. */
29111 && (!unaligned_access || BYTES_BIG_ENDIAN))
29112 return false;
29113
29114 if ((align & 3) == 0)
29115 return arm_block_set_aligned_vect (dstbase, length, value, align);
29116 else
29117 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29118 }
29119
29120 /* Expand string store operation. Firstly we try to do that by using
29121 vectorization instructions, then try with ARM unaligned access and
29122 double-word store if profitable. OPERANDS[0] is the destination,
29123 OPERANDS[1] is the number of bytes, operands[2] is the value to
29124 initialize the memory, OPERANDS[3] is the known alignment of the
29125 destination. */
29126 bool
29127 arm_gen_setmem (rtx *operands)
29128 {
29129 rtx dstbase = operands[0];
29130 unsigned HOST_WIDE_INT length;
29131 unsigned HOST_WIDE_INT value;
29132 unsigned HOST_WIDE_INT align;
29133
29134 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29135 return false;
29136
29137 length = UINTVAL (operands[1]);
29138 if (length > 64)
29139 return false;
29140
29141 value = (UINTVAL (operands[2]) & 0xFF);
29142 align = UINTVAL (operands[3]);
29143 if (TARGET_NEON && length >= 8
29144 && current_tune->string_ops_prefer_neon
29145 && arm_block_set_vect (dstbase, length, value, align))
29146 return true;
29147
29148 if (!unaligned_access && (align & 3) != 0)
29149 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29150
29151 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29152 }
29153
29154 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29155
29156 static unsigned HOST_WIDE_INT
29157 arm_asan_shadow_offset (void)
29158 {
29159 return (unsigned HOST_WIDE_INT) 1 << 29;
29160 }
29161
29162
29163 /* This is a temporary fix for PR60655. Ideally we need
29164 to handle most of these cases in the generic part but
29165 currently we reject minus (..) (sym_ref). We try to
29166 ameliorate the case with minus (sym_ref1) (sym_ref2)
29167 where they are in the same section. */
29168
29169 static bool
29170 arm_const_not_ok_for_debug_p (rtx p)
29171 {
29172 tree decl_op0 = NULL;
29173 tree decl_op1 = NULL;
29174
29175 if (GET_CODE (p) == MINUS)
29176 {
29177 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29178 {
29179 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29180 if (decl_op1
29181 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29182 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29183 {
29184 if ((TREE_CODE (decl_op1) == VAR_DECL
29185 || TREE_CODE (decl_op1) == CONST_DECL)
29186 && (TREE_CODE (decl_op0) == VAR_DECL
29187 || TREE_CODE (decl_op0) == CONST_DECL))
29188 return (get_variable_section (decl_op1, false)
29189 != get_variable_section (decl_op0, false));
29190
29191 if (TREE_CODE (decl_op1) == LABEL_DECL
29192 && TREE_CODE (decl_op0) == LABEL_DECL)
29193 return (DECL_CONTEXT (decl_op1)
29194 != DECL_CONTEXT (decl_op0));
29195 }
29196
29197 return true;
29198 }
29199 }
29200
29201 return false;
29202 }
29203
29204 /* return TRUE if x is a reference to a value in a constant pool */
29205 extern bool
29206 arm_is_constant_pool_ref (rtx x)
29207 {
29208 return (MEM_P (x)
29209 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29210 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29211 }
29212
29213 /* If MEM is in the form of [base+offset], extract the two parts
29214 of address and set to BASE and OFFSET, otherwise return false
29215 after clearing BASE and OFFSET. */
29216
29217 static bool
29218 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29219 {
29220 rtx addr;
29221
29222 gcc_assert (MEM_P (mem));
29223
29224 addr = XEXP (mem, 0);
29225
29226 /* Strip off const from addresses like (const (addr)). */
29227 if (GET_CODE (addr) == CONST)
29228 addr = XEXP (addr, 0);
29229
29230 if (GET_CODE (addr) == REG)
29231 {
29232 *base = addr;
29233 *offset = const0_rtx;
29234 return true;
29235 }
29236
29237 if (GET_CODE (addr) == PLUS
29238 && GET_CODE (XEXP (addr, 0)) == REG
29239 && CONST_INT_P (XEXP (addr, 1)))
29240 {
29241 *base = XEXP (addr, 0);
29242 *offset = XEXP (addr, 1);
29243 return true;
29244 }
29245
29246 *base = NULL_RTX;
29247 *offset = NULL_RTX;
29248
29249 return false;
29250 }
29251
29252 /* If INSN is a load or store of address in the form of [base+offset],
29253 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29254 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29255 otherwise return FALSE. */
29256
29257 static bool
29258 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29259 {
29260 rtx x, dest, src;
29261
29262 gcc_assert (INSN_P (insn));
29263 x = PATTERN (insn);
29264 if (GET_CODE (x) != SET)
29265 return false;
29266
29267 src = SET_SRC (x);
29268 dest = SET_DEST (x);
29269 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29270 {
29271 *is_load = false;
29272 extract_base_offset_in_addr (dest, base, offset);
29273 }
29274 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29275 {
29276 *is_load = true;
29277 extract_base_offset_in_addr (src, base, offset);
29278 }
29279 else
29280 return false;
29281
29282 return (*base != NULL_RTX && *offset != NULL_RTX);
29283 }
29284
29285 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29286
29287 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29288 and PRI are only calculated for these instructions. For other instruction,
29289 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29290 instruction fusion can be supported by returning different priorities.
29291
29292 It's important that irrelevant instructions get the largest FUSION_PRI. */
29293
29294 static void
29295 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29296 int *fusion_pri, int *pri)
29297 {
29298 int tmp, off_val;
29299 bool is_load;
29300 rtx base, offset;
29301
29302 gcc_assert (INSN_P (insn));
29303
29304 tmp = max_pri - 1;
29305 if (!fusion_load_store (insn, &base, &offset, &is_load))
29306 {
29307 *pri = tmp;
29308 *fusion_pri = tmp;
29309 return;
29310 }
29311
29312 /* Load goes first. */
29313 if (is_load)
29314 *fusion_pri = tmp - 1;
29315 else
29316 *fusion_pri = tmp - 2;
29317
29318 tmp /= 2;
29319
29320 /* INSN with smaller base register goes first. */
29321 tmp -= ((REGNO (base) & 0xff) << 20);
29322
29323 /* INSN with smaller offset goes first. */
29324 off_val = (int)(INTVAL (offset));
29325 if (off_val >= 0)
29326 tmp -= (off_val & 0xfffff);
29327 else
29328 tmp += ((- off_val) & 0xfffff);
29329
29330 *pri = tmp;
29331 return;
29332 }
29333 #include "gt-arm.h"