]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
genattrtab.c (write_header): Include hash-set.h...
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "hash-set.h"
30 #include "machmode.h"
31 #include "vec.h"
32 #include "double-int.h"
33 #include "input.h"
34 #include "alias.h"
35 #include "symtab.h"
36 #include "wide-int.h"
37 #include "inchash.h"
38 #include "tree.h"
39 #include "fold-const.h"
40 #include "stringpool.h"
41 #include "stor-layout.h"
42 #include "calls.h"
43 #include "varasm.h"
44 #include "obstack.h"
45 #include "regs.h"
46 #include "hard-reg-set.h"
47 #include "insn-config.h"
48 #include "conditions.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "reload.h"
53 #include "input.h"
54 #include "function.h"
55 #include "expr.h"
56 #include "insn-codes.h"
57 #include "optabs.h"
58 #include "diagnostic-core.h"
59 #include "recog.h"
60 #include "predict.h"
61 #include "dominance.h"
62 #include "cfg.h"
63 #include "cfgrtl.h"
64 #include "cfganal.h"
65 #include "lcm.h"
66 #include "cfgbuild.h"
67 #include "cfgcleanup.h"
68 #include "basic-block.h"
69 #include "hash-map.h"
70 #include "is-a.h"
71 #include "plugin-api.h"
72 #include "ipa-ref.h"
73 #include "cgraph.h"
74 #include "ggc.h"
75 #include "except.h"
76 #include "tm_p.h"
77 #include "target.h"
78 #include "sched-int.h"
79 #include "target-def.h"
80 #include "debug.h"
81 #include "langhooks.h"
82 #include "df.h"
83 #include "intl.h"
84 #include "libfuncs.h"
85 #include "params.h"
86 #include "opts.h"
87 #include "dumpfile.h"
88 #include "gimple-expr.h"
89 #include "builtins.h"
90 #include "tm-constrs.h"
91 #include "rtl-iter.h"
92
93 /* Forward definitions of types. */
94 typedef struct minipool_node Mnode;
95 typedef struct minipool_fixup Mfix;
96
97 void (*arm_lang_output_object_attributes_hook)(void);
98
99 struct four_ints
100 {
101 int i[4];
102 };
103
104 /* Forward function declarations. */
105 static bool arm_const_not_ok_for_debug_p (rtx);
106 static bool arm_lra_p (void);
107 static bool arm_needs_doubleword_align (machine_mode, const_tree);
108 static int arm_compute_static_chain_stack_bytes (void);
109 static arm_stack_offsets *arm_get_frame_offsets (void);
110 static void arm_add_gc_roots (void);
111 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
112 HOST_WIDE_INT, rtx, rtx, int, int);
113 static unsigned bit_count (unsigned long);
114 static int arm_address_register_rtx_p (rtx, int);
115 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
116 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
117 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
118 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
119 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
120 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
121 inline static int thumb1_index_register_rtx_p (rtx, int);
122 static int thumb_far_jump_used_p (void);
123 static bool thumb_force_lr_save (void);
124 static unsigned arm_size_return_regs (void);
125 static bool arm_assemble_integer (rtx, unsigned int, int);
126 static void arm_print_operand (FILE *, rtx, int);
127 static void arm_print_operand_address (FILE *, rtx);
128 static bool arm_print_operand_punct_valid_p (unsigned char code);
129 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
130 static arm_cc get_arm_condition_code (rtx);
131 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
132 static const char *output_multi_immediate (rtx *, const char *, const char *,
133 int, HOST_WIDE_INT);
134 static const char *shift_op (rtx, HOST_WIDE_INT *);
135 static struct machine_function *arm_init_machine_status (void);
136 static void thumb_exit (FILE *, int);
137 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
138 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
139 static Mnode *add_minipool_forward_ref (Mfix *);
140 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
141 static Mnode *add_minipool_backward_ref (Mfix *);
142 static void assign_minipool_offsets (Mfix *);
143 static void arm_print_value (FILE *, rtx);
144 static void dump_minipool (rtx_insn *);
145 static int arm_barrier_cost (rtx);
146 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
147 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
148 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
149 machine_mode, rtx);
150 static void arm_reorg (void);
151 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
152 static unsigned long arm_compute_save_reg0_reg12_mask (void);
153 static unsigned long arm_compute_save_reg_mask (void);
154 static unsigned long arm_isr_value (tree);
155 static unsigned long arm_compute_func_type (void);
156 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
157 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
158 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
159 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
160 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
161 #endif
162 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
163 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
164 static int arm_comp_type_attributes (const_tree, const_tree);
165 static void arm_set_default_type_attributes (tree);
166 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
167 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
168 static int optimal_immediate_sequence (enum rtx_code code,
169 unsigned HOST_WIDE_INT val,
170 struct four_ints *return_sequence);
171 static int optimal_immediate_sequence_1 (enum rtx_code code,
172 unsigned HOST_WIDE_INT val,
173 struct four_ints *return_sequence,
174 int i);
175 static int arm_get_strip_length (int);
176 static bool arm_function_ok_for_sibcall (tree, tree);
177 static machine_mode arm_promote_function_mode (const_tree,
178 machine_mode, int *,
179 const_tree, int);
180 static bool arm_return_in_memory (const_tree, const_tree);
181 static rtx arm_function_value (const_tree, const_tree, bool);
182 static rtx arm_libcall_value_1 (machine_mode);
183 static rtx arm_libcall_value (machine_mode, const_rtx);
184 static bool arm_function_value_regno_p (const unsigned int);
185 static void arm_internal_label (FILE *, const char *, unsigned long);
186 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
187 tree);
188 static bool arm_have_conditional_execution (void);
189 static bool arm_cannot_force_const_mem (machine_mode, rtx);
190 static bool arm_legitimate_constant_p (machine_mode, rtx);
191 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
192 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
193 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
194 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
195 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
196 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
197 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
198 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
199 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
200 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
201 static void emit_constant_insn (rtx cond, rtx pattern);
202 static rtx_insn *emit_set_insn (rtx, rtx);
203 static rtx emit_multi_reg_push (unsigned long, unsigned long);
204 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
205 tree, bool);
206 static rtx arm_function_arg (cumulative_args_t, machine_mode,
207 const_tree, bool);
208 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
209 const_tree, bool);
210 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
211 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
212 const_tree);
213 static rtx aapcs_libcall_value (machine_mode);
214 static int aapcs_select_return_coproc (const_tree, const_tree);
215
216 #ifdef OBJECT_FORMAT_ELF
217 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
218 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
219 #endif
220 #ifndef ARM_PE
221 static void arm_encode_section_info (tree, rtx, int);
222 #endif
223
224 static void arm_file_end (void);
225 static void arm_file_start (void);
226
227 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
228 tree, int *, int);
229 static bool arm_pass_by_reference (cumulative_args_t,
230 machine_mode, const_tree, bool);
231 static bool arm_promote_prototypes (const_tree);
232 static bool arm_default_short_enums (void);
233 static bool arm_align_anon_bitfield (void);
234 static bool arm_return_in_msb (const_tree);
235 static bool arm_must_pass_in_stack (machine_mode, const_tree);
236 static bool arm_return_in_memory (const_tree, const_tree);
237 #if ARM_UNWIND_INFO
238 static void arm_unwind_emit (FILE *, rtx_insn *);
239 static bool arm_output_ttype (rtx);
240 static void arm_asm_emit_except_personality (rtx);
241 static void arm_asm_init_sections (void);
242 #endif
243 static rtx arm_dwarf_register_span (rtx);
244
245 static tree arm_cxx_guard_type (void);
246 static bool arm_cxx_guard_mask_bit (void);
247 static tree arm_get_cookie_size (tree);
248 static bool arm_cookie_has_size (void);
249 static bool arm_cxx_cdtor_returns_this (void);
250 static bool arm_cxx_key_method_may_be_inline (void);
251 static void arm_cxx_determine_class_data_visibility (tree);
252 static bool arm_cxx_class_data_always_comdat (void);
253 static bool arm_cxx_use_aeabi_atexit (void);
254 static void arm_init_libfuncs (void);
255 static tree arm_build_builtin_va_list (void);
256 static void arm_expand_builtin_va_start (tree, rtx);
257 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
258 static void arm_option_override (void);
259 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
260 static bool arm_cannot_copy_insn_p (rtx_insn *);
261 static int arm_issue_rate (void);
262 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
263 static bool arm_output_addr_const_extra (FILE *, rtx);
264 static bool arm_allocate_stack_slots_for_args (void);
265 static bool arm_warn_func_return (tree);
266 static const char *arm_invalid_parameter_type (const_tree t);
267 static const char *arm_invalid_return_type (const_tree t);
268 static tree arm_promoted_type (const_tree t);
269 static tree arm_convert_to_type (tree type, tree expr);
270 static bool arm_scalar_mode_supported_p (machine_mode);
271 static bool arm_frame_pointer_required (void);
272 static bool arm_can_eliminate (const int, const int);
273 static void arm_asm_trampoline_template (FILE *);
274 static void arm_trampoline_init (rtx, tree, rtx);
275 static rtx arm_trampoline_adjust_address (rtx);
276 static rtx arm_pic_static_addr (rtx orig, rtx reg);
277 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
278 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
279 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
280 static bool arm_array_mode_supported_p (machine_mode,
281 unsigned HOST_WIDE_INT);
282 static machine_mode arm_preferred_simd_mode (machine_mode);
283 static bool arm_class_likely_spilled_p (reg_class_t);
284 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
285 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
286 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
287 const_tree type,
288 int misalignment,
289 bool is_packed);
290 static void arm_conditional_register_usage (void);
291 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
292 static unsigned int arm_autovectorize_vector_sizes (void);
293 static int arm_default_branch_cost (bool, bool);
294 static int arm_cortex_a5_branch_cost (bool, bool);
295 static int arm_cortex_m_branch_cost (bool, bool);
296
297 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
298 const unsigned char *sel);
299
300 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
301 tree vectype,
302 int misalign ATTRIBUTE_UNUSED);
303 static unsigned arm_add_stmt_cost (void *data, int count,
304 enum vect_cost_for_stmt kind,
305 struct _stmt_vec_info *stmt_info,
306 int misalign,
307 enum vect_cost_model_location where);
308
309 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
310 bool op0_preserve_value);
311 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
312
313 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
314 \f
315 /* Table of machine attributes. */
316 static const struct attribute_spec arm_attribute_table[] =
317 {
318 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
319 affects_type_identity } */
320 /* Function calls made to this symbol must be done indirectly, because
321 it may lie outside of the 26 bit addressing range of a normal function
322 call. */
323 { "long_call", 0, 0, false, true, true, NULL, false },
324 /* Whereas these functions are always known to reside within the 26 bit
325 addressing range. */
326 { "short_call", 0, 0, false, true, true, NULL, false },
327 /* Specify the procedure call conventions for a function. */
328 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
329 false },
330 /* Interrupt Service Routines have special prologue and epilogue requirements. */
331 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
332 false },
333 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
334 false },
335 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
336 false },
337 #ifdef ARM_PE
338 /* ARM/PE has three new attributes:
339 interfacearm - ?
340 dllexport - for exporting a function/variable that will live in a dll
341 dllimport - for importing a function/variable from a dll
342
343 Microsoft allows multiple declspecs in one __declspec, separating
344 them with spaces. We do NOT support this. Instead, use __declspec
345 multiple times.
346 */
347 { "dllimport", 0, 0, true, false, false, NULL, false },
348 { "dllexport", 0, 0, true, false, false, NULL, false },
349 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
350 false },
351 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
352 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
353 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
354 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
355 false },
356 #endif
357 { NULL, 0, 0, false, false, false, NULL, false }
358 };
359 \f
360 /* Initialize the GCC target structure. */
361 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
362 #undef TARGET_MERGE_DECL_ATTRIBUTES
363 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
364 #endif
365
366 #undef TARGET_LEGITIMIZE_ADDRESS
367 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
368
369 #undef TARGET_LRA_P
370 #define TARGET_LRA_P arm_lra_p
371
372 #undef TARGET_ATTRIBUTE_TABLE
373 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
374
375 #undef TARGET_ASM_FILE_START
376 #define TARGET_ASM_FILE_START arm_file_start
377 #undef TARGET_ASM_FILE_END
378 #define TARGET_ASM_FILE_END arm_file_end
379
380 #undef TARGET_ASM_ALIGNED_SI_OP
381 #define TARGET_ASM_ALIGNED_SI_OP NULL
382 #undef TARGET_ASM_INTEGER
383 #define TARGET_ASM_INTEGER arm_assemble_integer
384
385 #undef TARGET_PRINT_OPERAND
386 #define TARGET_PRINT_OPERAND arm_print_operand
387 #undef TARGET_PRINT_OPERAND_ADDRESS
388 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
389 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
390 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
391
392 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
393 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
394
395 #undef TARGET_ASM_FUNCTION_PROLOGUE
396 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
397
398 #undef TARGET_ASM_FUNCTION_EPILOGUE
399 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
400
401 #undef TARGET_OPTION_OVERRIDE
402 #define TARGET_OPTION_OVERRIDE arm_option_override
403
404 #undef TARGET_COMP_TYPE_ATTRIBUTES
405 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
406
407 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
408 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
409
410 #undef TARGET_SCHED_ADJUST_COST
411 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
412
413 #undef TARGET_SCHED_REORDER
414 #define TARGET_SCHED_REORDER arm_sched_reorder
415
416 #undef TARGET_REGISTER_MOVE_COST
417 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
418
419 #undef TARGET_MEMORY_MOVE_COST
420 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
421
422 #undef TARGET_ENCODE_SECTION_INFO
423 #ifdef ARM_PE
424 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
425 #else
426 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
427 #endif
428
429 #undef TARGET_STRIP_NAME_ENCODING
430 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
431
432 #undef TARGET_ASM_INTERNAL_LABEL
433 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
434
435 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
436 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
437
438 #undef TARGET_FUNCTION_VALUE
439 #define TARGET_FUNCTION_VALUE arm_function_value
440
441 #undef TARGET_LIBCALL_VALUE
442 #define TARGET_LIBCALL_VALUE arm_libcall_value
443
444 #undef TARGET_FUNCTION_VALUE_REGNO_P
445 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
446
447 #undef TARGET_ASM_OUTPUT_MI_THUNK
448 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
449 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
450 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
451
452 #undef TARGET_RTX_COSTS
453 #define TARGET_RTX_COSTS arm_rtx_costs
454 #undef TARGET_ADDRESS_COST
455 #define TARGET_ADDRESS_COST arm_address_cost
456
457 #undef TARGET_SHIFT_TRUNCATION_MASK
458 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
459 #undef TARGET_VECTOR_MODE_SUPPORTED_P
460 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
461 #undef TARGET_ARRAY_MODE_SUPPORTED_P
462 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
463 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
464 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
465 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
466 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
467 arm_autovectorize_vector_sizes
468
469 #undef TARGET_MACHINE_DEPENDENT_REORG
470 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
471
472 #undef TARGET_INIT_BUILTINS
473 #define TARGET_INIT_BUILTINS arm_init_builtins
474 #undef TARGET_EXPAND_BUILTIN
475 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
476 #undef TARGET_BUILTIN_DECL
477 #define TARGET_BUILTIN_DECL arm_builtin_decl
478
479 #undef TARGET_INIT_LIBFUNCS
480 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
481
482 #undef TARGET_PROMOTE_FUNCTION_MODE
483 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
484 #undef TARGET_PROMOTE_PROTOTYPES
485 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
486 #undef TARGET_PASS_BY_REFERENCE
487 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
488 #undef TARGET_ARG_PARTIAL_BYTES
489 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
490 #undef TARGET_FUNCTION_ARG
491 #define TARGET_FUNCTION_ARG arm_function_arg
492 #undef TARGET_FUNCTION_ARG_ADVANCE
493 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
494 #undef TARGET_FUNCTION_ARG_BOUNDARY
495 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
496
497 #undef TARGET_SETUP_INCOMING_VARARGS
498 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
499
500 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
501 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
502
503 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
504 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
505 #undef TARGET_TRAMPOLINE_INIT
506 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
507 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
508 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
509
510 #undef TARGET_WARN_FUNC_RETURN
511 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
512
513 #undef TARGET_DEFAULT_SHORT_ENUMS
514 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
515
516 #undef TARGET_ALIGN_ANON_BITFIELD
517 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
518
519 #undef TARGET_NARROW_VOLATILE_BITFIELD
520 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
521
522 #undef TARGET_CXX_GUARD_TYPE
523 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
524
525 #undef TARGET_CXX_GUARD_MASK_BIT
526 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
527
528 #undef TARGET_CXX_GET_COOKIE_SIZE
529 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
530
531 #undef TARGET_CXX_COOKIE_HAS_SIZE
532 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
533
534 #undef TARGET_CXX_CDTOR_RETURNS_THIS
535 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
536
537 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
538 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
539
540 #undef TARGET_CXX_USE_AEABI_ATEXIT
541 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
542
543 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
544 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
545 arm_cxx_determine_class_data_visibility
546
547 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
548 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
549
550 #undef TARGET_RETURN_IN_MSB
551 #define TARGET_RETURN_IN_MSB arm_return_in_msb
552
553 #undef TARGET_RETURN_IN_MEMORY
554 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
555
556 #undef TARGET_MUST_PASS_IN_STACK
557 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
558
559 #if ARM_UNWIND_INFO
560 #undef TARGET_ASM_UNWIND_EMIT
561 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
562
563 /* EABI unwinding tables use a different format for the typeinfo tables. */
564 #undef TARGET_ASM_TTYPE
565 #define TARGET_ASM_TTYPE arm_output_ttype
566
567 #undef TARGET_ARM_EABI_UNWINDER
568 #define TARGET_ARM_EABI_UNWINDER true
569
570 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
571 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
572
573 #undef TARGET_ASM_INIT_SECTIONS
574 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
575 #endif /* ARM_UNWIND_INFO */
576
577 #undef TARGET_DWARF_REGISTER_SPAN
578 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
579
580 #undef TARGET_CANNOT_COPY_INSN_P
581 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
582
583 #ifdef HAVE_AS_TLS
584 #undef TARGET_HAVE_TLS
585 #define TARGET_HAVE_TLS true
586 #endif
587
588 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
589 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
590
591 #undef TARGET_LEGITIMATE_CONSTANT_P
592 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
593
594 #undef TARGET_CANNOT_FORCE_CONST_MEM
595 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
596
597 #undef TARGET_MAX_ANCHOR_OFFSET
598 #define TARGET_MAX_ANCHOR_OFFSET 4095
599
600 /* The minimum is set such that the total size of the block
601 for a particular anchor is -4088 + 1 + 4095 bytes, which is
602 divisible by eight, ensuring natural spacing of anchors. */
603 #undef TARGET_MIN_ANCHOR_OFFSET
604 #define TARGET_MIN_ANCHOR_OFFSET -4088
605
606 #undef TARGET_SCHED_ISSUE_RATE
607 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
608
609 #undef TARGET_MANGLE_TYPE
610 #define TARGET_MANGLE_TYPE arm_mangle_type
611
612 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
613 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
614
615 #undef TARGET_BUILD_BUILTIN_VA_LIST
616 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
617 #undef TARGET_EXPAND_BUILTIN_VA_START
618 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
619 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
620 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
621
622 #ifdef HAVE_AS_TLS
623 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
624 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
625 #endif
626
627 #undef TARGET_LEGITIMATE_ADDRESS_P
628 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
629
630 #undef TARGET_PREFERRED_RELOAD_CLASS
631 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
632
633 #undef TARGET_INVALID_PARAMETER_TYPE
634 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
635
636 #undef TARGET_INVALID_RETURN_TYPE
637 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
638
639 #undef TARGET_PROMOTED_TYPE
640 #define TARGET_PROMOTED_TYPE arm_promoted_type
641
642 #undef TARGET_CONVERT_TO_TYPE
643 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
644
645 #undef TARGET_SCALAR_MODE_SUPPORTED_P
646 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
647
648 #undef TARGET_FRAME_POINTER_REQUIRED
649 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
650
651 #undef TARGET_CAN_ELIMINATE
652 #define TARGET_CAN_ELIMINATE arm_can_eliminate
653
654 #undef TARGET_CONDITIONAL_REGISTER_USAGE
655 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
656
657 #undef TARGET_CLASS_LIKELY_SPILLED_P
658 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
659
660 #undef TARGET_VECTORIZE_BUILTINS
661 #define TARGET_VECTORIZE_BUILTINS
662
663 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
664 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
665 arm_builtin_vectorized_function
666
667 #undef TARGET_VECTOR_ALIGNMENT
668 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
669
670 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
671 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
672 arm_vector_alignment_reachable
673
674 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
675 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
676 arm_builtin_support_vector_misalignment
677
678 #undef TARGET_PREFERRED_RENAME_CLASS
679 #define TARGET_PREFERRED_RENAME_CLASS \
680 arm_preferred_rename_class
681
682 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
683 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
684 arm_vectorize_vec_perm_const_ok
685
686 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
687 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
688 arm_builtin_vectorization_cost
689 #undef TARGET_VECTORIZE_ADD_STMT_COST
690 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
691
692 #undef TARGET_CANONICALIZE_COMPARISON
693 #define TARGET_CANONICALIZE_COMPARISON \
694 arm_canonicalize_comparison
695
696 #undef TARGET_ASAN_SHADOW_OFFSET
697 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
698
699 #undef MAX_INSN_PER_IT_BLOCK
700 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
701
702 #undef TARGET_CAN_USE_DOLOOP_P
703 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
704
705 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
706 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
707
708 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
709 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
710
711 #undef TARGET_SCHED_FUSION_PRIORITY
712 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
713
714 struct gcc_target targetm = TARGET_INITIALIZER;
715 \f
716 /* Obstack for minipool constant handling. */
717 static struct obstack minipool_obstack;
718 static char * minipool_startobj;
719
720 /* The maximum number of insns skipped which
721 will be conditionalised if possible. */
722 static int max_insns_skipped = 5;
723
724 extern FILE * asm_out_file;
725
726 /* True if we are currently building a constant table. */
727 int making_const_table;
728
729 /* The processor for which instructions should be scheduled. */
730 enum processor_type arm_tune = arm_none;
731
732 /* The current tuning set. */
733 const struct tune_params *current_tune;
734
735 /* Which floating point hardware to schedule for. */
736 int arm_fpu_attr;
737
738 /* Which floating popint hardware to use. */
739 const struct arm_fpu_desc *arm_fpu_desc;
740
741 /* Used for Thumb call_via trampolines. */
742 rtx thumb_call_via_label[14];
743 static int thumb_call_reg_needed;
744
745 /* The bits in this mask specify which
746 instructions we are allowed to generate. */
747 unsigned long insn_flags = 0;
748
749 /* The bits in this mask specify which instruction scheduling options should
750 be used. */
751 unsigned long tune_flags = 0;
752
753 /* The highest ARM architecture version supported by the
754 target. */
755 enum base_architecture arm_base_arch = BASE_ARCH_0;
756
757 /* The following are used in the arm.md file as equivalents to bits
758 in the above two flag variables. */
759
760 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
761 int arm_arch3m = 0;
762
763 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
764 int arm_arch4 = 0;
765
766 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
767 int arm_arch4t = 0;
768
769 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
770 int arm_arch5 = 0;
771
772 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
773 int arm_arch5e = 0;
774
775 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
776 int arm_arch6 = 0;
777
778 /* Nonzero if this chip supports the ARM 6K extensions. */
779 int arm_arch6k = 0;
780
781 /* Nonzero if instructions present in ARMv6-M can be used. */
782 int arm_arch6m = 0;
783
784 /* Nonzero if this chip supports the ARM 7 extensions. */
785 int arm_arch7 = 0;
786
787 /* Nonzero if instructions not present in the 'M' profile can be used. */
788 int arm_arch_notm = 0;
789
790 /* Nonzero if instructions present in ARMv7E-M can be used. */
791 int arm_arch7em = 0;
792
793 /* Nonzero if instructions present in ARMv8 can be used. */
794 int arm_arch8 = 0;
795
796 /* Nonzero if this chip can benefit from load scheduling. */
797 int arm_ld_sched = 0;
798
799 /* Nonzero if this chip is a StrongARM. */
800 int arm_tune_strongarm = 0;
801
802 /* Nonzero if this chip supports Intel Wireless MMX technology. */
803 int arm_arch_iwmmxt = 0;
804
805 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
806 int arm_arch_iwmmxt2 = 0;
807
808 /* Nonzero if this chip is an XScale. */
809 int arm_arch_xscale = 0;
810
811 /* Nonzero if tuning for XScale */
812 int arm_tune_xscale = 0;
813
814 /* Nonzero if we want to tune for stores that access the write-buffer.
815 This typically means an ARM6 or ARM7 with MMU or MPU. */
816 int arm_tune_wbuf = 0;
817
818 /* Nonzero if tuning for Cortex-A9. */
819 int arm_tune_cortex_a9 = 0;
820
821 /* Nonzero if generating Thumb instructions. */
822 int thumb_code = 0;
823
824 /* Nonzero if generating Thumb-1 instructions. */
825 int thumb1_code = 0;
826
827 /* Nonzero if we should define __THUMB_INTERWORK__ in the
828 preprocessor.
829 XXX This is a bit of a hack, it's intended to help work around
830 problems in GLD which doesn't understand that armv5t code is
831 interworking clean. */
832 int arm_cpp_interwork = 0;
833
834 /* Nonzero if chip supports Thumb 2. */
835 int arm_arch_thumb2;
836
837 /* Nonzero if chip supports integer division instruction. */
838 int arm_arch_arm_hwdiv;
839 int arm_arch_thumb_hwdiv;
840
841 /* Nonzero if we should use Neon to handle 64-bits operations rather
842 than core registers. */
843 int prefer_neon_for_64bits = 0;
844
845 /* Nonzero if we shouldn't use literal pools. */
846 bool arm_disable_literal_pool = false;
847
848 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
849 we must report the mode of the memory reference from
850 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
851 machine_mode output_memory_reference_mode;
852
853 /* The register number to be used for the PIC offset register. */
854 unsigned arm_pic_register = INVALID_REGNUM;
855
856 enum arm_pcs arm_pcs_default;
857
858 /* For an explanation of these variables, see final_prescan_insn below. */
859 int arm_ccfsm_state;
860 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
861 enum arm_cond_code arm_current_cc;
862
863 rtx arm_target_insn;
864 int arm_target_label;
865 /* The number of conditionally executed insns, including the current insn. */
866 int arm_condexec_count = 0;
867 /* A bitmask specifying the patterns for the IT block.
868 Zero means do not output an IT block before this insn. */
869 int arm_condexec_mask = 0;
870 /* The number of bits used in arm_condexec_mask. */
871 int arm_condexec_masklen = 0;
872
873 /* Nonzero if chip supports the ARMv8 CRC instructions. */
874 int arm_arch_crc = 0;
875
876 /* Nonzero if the core has a very small, high-latency, multiply unit. */
877 int arm_m_profile_small_mul = 0;
878
879 /* The condition codes of the ARM, and the inverse function. */
880 static const char * const arm_condition_codes[] =
881 {
882 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
883 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
884 };
885
886 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
887 int arm_regs_in_sequence[] =
888 {
889 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
890 };
891
892 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
893 #define streq(string1, string2) (strcmp (string1, string2) == 0)
894
895 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
896 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
897 | (1 << PIC_OFFSET_TABLE_REGNUM)))
898 \f
899 /* Initialization code. */
900
901 struct processors
902 {
903 const char *const name;
904 enum processor_type core;
905 const char *arch;
906 enum base_architecture base_arch;
907 const unsigned long flags;
908 const struct tune_params *const tune;
909 };
910
911
912 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
913 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
914 prefetch_slots, \
915 l1_size, \
916 l1_line_size
917
918 /* arm generic vectorizer costs. */
919 static const
920 struct cpu_vec_costs arm_default_vec_cost = {
921 1, /* scalar_stmt_cost. */
922 1, /* scalar load_cost. */
923 1, /* scalar_store_cost. */
924 1, /* vec_stmt_cost. */
925 1, /* vec_to_scalar_cost. */
926 1, /* scalar_to_vec_cost. */
927 1, /* vec_align_load_cost. */
928 1, /* vec_unalign_load_cost. */
929 1, /* vec_unalign_store_cost. */
930 1, /* vec_store_cost. */
931 3, /* cond_taken_branch_cost. */
932 1, /* cond_not_taken_branch_cost. */
933 };
934
935 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
936 #include "aarch-cost-tables.h"
937
938
939
940 const struct cpu_cost_table cortexa9_extra_costs =
941 {
942 /* ALU */
943 {
944 0, /* arith. */
945 0, /* logical. */
946 0, /* shift. */
947 COSTS_N_INSNS (1), /* shift_reg. */
948 COSTS_N_INSNS (1), /* arith_shift. */
949 COSTS_N_INSNS (2), /* arith_shift_reg. */
950 0, /* log_shift. */
951 COSTS_N_INSNS (1), /* log_shift_reg. */
952 COSTS_N_INSNS (1), /* extend. */
953 COSTS_N_INSNS (2), /* extend_arith. */
954 COSTS_N_INSNS (1), /* bfi. */
955 COSTS_N_INSNS (1), /* bfx. */
956 0, /* clz. */
957 0, /* rev. */
958 0, /* non_exec. */
959 true /* non_exec_costs_exec. */
960 },
961 {
962 /* MULT SImode */
963 {
964 COSTS_N_INSNS (3), /* simple. */
965 COSTS_N_INSNS (3), /* flag_setting. */
966 COSTS_N_INSNS (2), /* extend. */
967 COSTS_N_INSNS (3), /* add. */
968 COSTS_N_INSNS (2), /* extend_add. */
969 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
970 },
971 /* MULT DImode */
972 {
973 0, /* simple (N/A). */
974 0, /* flag_setting (N/A). */
975 COSTS_N_INSNS (4), /* extend. */
976 0, /* add (N/A). */
977 COSTS_N_INSNS (4), /* extend_add. */
978 0 /* idiv (N/A). */
979 }
980 },
981 /* LD/ST */
982 {
983 COSTS_N_INSNS (2), /* load. */
984 COSTS_N_INSNS (2), /* load_sign_extend. */
985 COSTS_N_INSNS (2), /* ldrd. */
986 COSTS_N_INSNS (2), /* ldm_1st. */
987 1, /* ldm_regs_per_insn_1st. */
988 2, /* ldm_regs_per_insn_subsequent. */
989 COSTS_N_INSNS (5), /* loadf. */
990 COSTS_N_INSNS (5), /* loadd. */
991 COSTS_N_INSNS (1), /* load_unaligned. */
992 COSTS_N_INSNS (2), /* store. */
993 COSTS_N_INSNS (2), /* strd. */
994 COSTS_N_INSNS (2), /* stm_1st. */
995 1, /* stm_regs_per_insn_1st. */
996 2, /* stm_regs_per_insn_subsequent. */
997 COSTS_N_INSNS (1), /* storef. */
998 COSTS_N_INSNS (1), /* stored. */
999 COSTS_N_INSNS (1) /* store_unaligned. */
1000 },
1001 {
1002 /* FP SFmode */
1003 {
1004 COSTS_N_INSNS (14), /* div. */
1005 COSTS_N_INSNS (4), /* mult. */
1006 COSTS_N_INSNS (7), /* mult_addsub. */
1007 COSTS_N_INSNS (30), /* fma. */
1008 COSTS_N_INSNS (3), /* addsub. */
1009 COSTS_N_INSNS (1), /* fpconst. */
1010 COSTS_N_INSNS (1), /* neg. */
1011 COSTS_N_INSNS (3), /* compare. */
1012 COSTS_N_INSNS (3), /* widen. */
1013 COSTS_N_INSNS (3), /* narrow. */
1014 COSTS_N_INSNS (3), /* toint. */
1015 COSTS_N_INSNS (3), /* fromint. */
1016 COSTS_N_INSNS (3) /* roundint. */
1017 },
1018 /* FP DFmode */
1019 {
1020 COSTS_N_INSNS (24), /* div. */
1021 COSTS_N_INSNS (5), /* mult. */
1022 COSTS_N_INSNS (8), /* mult_addsub. */
1023 COSTS_N_INSNS (30), /* fma. */
1024 COSTS_N_INSNS (3), /* addsub. */
1025 COSTS_N_INSNS (1), /* fpconst. */
1026 COSTS_N_INSNS (1), /* neg. */
1027 COSTS_N_INSNS (3), /* compare. */
1028 COSTS_N_INSNS (3), /* widen. */
1029 COSTS_N_INSNS (3), /* narrow. */
1030 COSTS_N_INSNS (3), /* toint. */
1031 COSTS_N_INSNS (3), /* fromint. */
1032 COSTS_N_INSNS (3) /* roundint. */
1033 }
1034 },
1035 /* Vector */
1036 {
1037 COSTS_N_INSNS (1) /* alu. */
1038 }
1039 };
1040
1041 const struct cpu_cost_table cortexa8_extra_costs =
1042 {
1043 /* ALU */
1044 {
1045 0, /* arith. */
1046 0, /* logical. */
1047 COSTS_N_INSNS (1), /* shift. */
1048 0, /* shift_reg. */
1049 COSTS_N_INSNS (1), /* arith_shift. */
1050 0, /* arith_shift_reg. */
1051 COSTS_N_INSNS (1), /* log_shift. */
1052 0, /* log_shift_reg. */
1053 0, /* extend. */
1054 0, /* extend_arith. */
1055 0, /* bfi. */
1056 0, /* bfx. */
1057 0, /* clz. */
1058 0, /* rev. */
1059 0, /* non_exec. */
1060 true /* non_exec_costs_exec. */
1061 },
1062 {
1063 /* MULT SImode */
1064 {
1065 COSTS_N_INSNS (1), /* simple. */
1066 COSTS_N_INSNS (1), /* flag_setting. */
1067 COSTS_N_INSNS (1), /* extend. */
1068 COSTS_N_INSNS (1), /* add. */
1069 COSTS_N_INSNS (1), /* extend_add. */
1070 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1071 },
1072 /* MULT DImode */
1073 {
1074 0, /* simple (N/A). */
1075 0, /* flag_setting (N/A). */
1076 COSTS_N_INSNS (2), /* extend. */
1077 0, /* add (N/A). */
1078 COSTS_N_INSNS (2), /* extend_add. */
1079 0 /* idiv (N/A). */
1080 }
1081 },
1082 /* LD/ST */
1083 {
1084 COSTS_N_INSNS (1), /* load. */
1085 COSTS_N_INSNS (1), /* load_sign_extend. */
1086 COSTS_N_INSNS (1), /* ldrd. */
1087 COSTS_N_INSNS (1), /* ldm_1st. */
1088 1, /* ldm_regs_per_insn_1st. */
1089 2, /* ldm_regs_per_insn_subsequent. */
1090 COSTS_N_INSNS (1), /* loadf. */
1091 COSTS_N_INSNS (1), /* loadd. */
1092 COSTS_N_INSNS (1), /* load_unaligned. */
1093 COSTS_N_INSNS (1), /* store. */
1094 COSTS_N_INSNS (1), /* strd. */
1095 COSTS_N_INSNS (1), /* stm_1st. */
1096 1, /* stm_regs_per_insn_1st. */
1097 2, /* stm_regs_per_insn_subsequent. */
1098 COSTS_N_INSNS (1), /* storef. */
1099 COSTS_N_INSNS (1), /* stored. */
1100 COSTS_N_INSNS (1) /* store_unaligned. */
1101 },
1102 {
1103 /* FP SFmode */
1104 {
1105 COSTS_N_INSNS (36), /* div. */
1106 COSTS_N_INSNS (11), /* mult. */
1107 COSTS_N_INSNS (20), /* mult_addsub. */
1108 COSTS_N_INSNS (30), /* fma. */
1109 COSTS_N_INSNS (9), /* addsub. */
1110 COSTS_N_INSNS (3), /* fpconst. */
1111 COSTS_N_INSNS (3), /* neg. */
1112 COSTS_N_INSNS (6), /* compare. */
1113 COSTS_N_INSNS (4), /* widen. */
1114 COSTS_N_INSNS (4), /* narrow. */
1115 COSTS_N_INSNS (8), /* toint. */
1116 COSTS_N_INSNS (8), /* fromint. */
1117 COSTS_N_INSNS (8) /* roundint. */
1118 },
1119 /* FP DFmode */
1120 {
1121 COSTS_N_INSNS (64), /* div. */
1122 COSTS_N_INSNS (16), /* mult. */
1123 COSTS_N_INSNS (25), /* mult_addsub. */
1124 COSTS_N_INSNS (30), /* fma. */
1125 COSTS_N_INSNS (9), /* addsub. */
1126 COSTS_N_INSNS (3), /* fpconst. */
1127 COSTS_N_INSNS (3), /* neg. */
1128 COSTS_N_INSNS (6), /* compare. */
1129 COSTS_N_INSNS (6), /* widen. */
1130 COSTS_N_INSNS (6), /* narrow. */
1131 COSTS_N_INSNS (8), /* toint. */
1132 COSTS_N_INSNS (8), /* fromint. */
1133 COSTS_N_INSNS (8) /* roundint. */
1134 }
1135 },
1136 /* Vector */
1137 {
1138 COSTS_N_INSNS (1) /* alu. */
1139 }
1140 };
1141
1142 const struct cpu_cost_table cortexa5_extra_costs =
1143 {
1144 /* ALU */
1145 {
1146 0, /* arith. */
1147 0, /* logical. */
1148 COSTS_N_INSNS (1), /* shift. */
1149 COSTS_N_INSNS (1), /* shift_reg. */
1150 COSTS_N_INSNS (1), /* arith_shift. */
1151 COSTS_N_INSNS (1), /* arith_shift_reg. */
1152 COSTS_N_INSNS (1), /* log_shift. */
1153 COSTS_N_INSNS (1), /* log_shift_reg. */
1154 COSTS_N_INSNS (1), /* extend. */
1155 COSTS_N_INSNS (1), /* extend_arith. */
1156 COSTS_N_INSNS (1), /* bfi. */
1157 COSTS_N_INSNS (1), /* bfx. */
1158 COSTS_N_INSNS (1), /* clz. */
1159 COSTS_N_INSNS (1), /* rev. */
1160 0, /* non_exec. */
1161 true /* non_exec_costs_exec. */
1162 },
1163
1164 {
1165 /* MULT SImode */
1166 {
1167 0, /* simple. */
1168 COSTS_N_INSNS (1), /* flag_setting. */
1169 COSTS_N_INSNS (1), /* extend. */
1170 COSTS_N_INSNS (1), /* add. */
1171 COSTS_N_INSNS (1), /* extend_add. */
1172 COSTS_N_INSNS (7) /* idiv. */
1173 },
1174 /* MULT DImode */
1175 {
1176 0, /* simple (N/A). */
1177 0, /* flag_setting (N/A). */
1178 COSTS_N_INSNS (1), /* extend. */
1179 0, /* add. */
1180 COSTS_N_INSNS (2), /* extend_add. */
1181 0 /* idiv (N/A). */
1182 }
1183 },
1184 /* LD/ST */
1185 {
1186 COSTS_N_INSNS (1), /* load. */
1187 COSTS_N_INSNS (1), /* load_sign_extend. */
1188 COSTS_N_INSNS (6), /* ldrd. */
1189 COSTS_N_INSNS (1), /* ldm_1st. */
1190 1, /* ldm_regs_per_insn_1st. */
1191 2, /* ldm_regs_per_insn_subsequent. */
1192 COSTS_N_INSNS (2), /* loadf. */
1193 COSTS_N_INSNS (4), /* loadd. */
1194 COSTS_N_INSNS (1), /* load_unaligned. */
1195 COSTS_N_INSNS (1), /* store. */
1196 COSTS_N_INSNS (3), /* strd. */
1197 COSTS_N_INSNS (1), /* stm_1st. */
1198 1, /* stm_regs_per_insn_1st. */
1199 2, /* stm_regs_per_insn_subsequent. */
1200 COSTS_N_INSNS (2), /* storef. */
1201 COSTS_N_INSNS (2), /* stored. */
1202 COSTS_N_INSNS (1) /* store_unaligned. */
1203 },
1204 {
1205 /* FP SFmode */
1206 {
1207 COSTS_N_INSNS (15), /* div. */
1208 COSTS_N_INSNS (3), /* mult. */
1209 COSTS_N_INSNS (7), /* mult_addsub. */
1210 COSTS_N_INSNS (7), /* fma. */
1211 COSTS_N_INSNS (3), /* addsub. */
1212 COSTS_N_INSNS (3), /* fpconst. */
1213 COSTS_N_INSNS (3), /* neg. */
1214 COSTS_N_INSNS (3), /* compare. */
1215 COSTS_N_INSNS (3), /* widen. */
1216 COSTS_N_INSNS (3), /* narrow. */
1217 COSTS_N_INSNS (3), /* toint. */
1218 COSTS_N_INSNS (3), /* fromint. */
1219 COSTS_N_INSNS (3) /* roundint. */
1220 },
1221 /* FP DFmode */
1222 {
1223 COSTS_N_INSNS (30), /* div. */
1224 COSTS_N_INSNS (6), /* mult. */
1225 COSTS_N_INSNS (10), /* mult_addsub. */
1226 COSTS_N_INSNS (7), /* fma. */
1227 COSTS_N_INSNS (3), /* addsub. */
1228 COSTS_N_INSNS (3), /* fpconst. */
1229 COSTS_N_INSNS (3), /* neg. */
1230 COSTS_N_INSNS (3), /* compare. */
1231 COSTS_N_INSNS (3), /* widen. */
1232 COSTS_N_INSNS (3), /* narrow. */
1233 COSTS_N_INSNS (3), /* toint. */
1234 COSTS_N_INSNS (3), /* fromint. */
1235 COSTS_N_INSNS (3) /* roundint. */
1236 }
1237 },
1238 /* Vector */
1239 {
1240 COSTS_N_INSNS (1) /* alu. */
1241 }
1242 };
1243
1244
1245 const struct cpu_cost_table cortexa7_extra_costs =
1246 {
1247 /* ALU */
1248 {
1249 0, /* arith. */
1250 0, /* logical. */
1251 COSTS_N_INSNS (1), /* shift. */
1252 COSTS_N_INSNS (1), /* shift_reg. */
1253 COSTS_N_INSNS (1), /* arith_shift. */
1254 COSTS_N_INSNS (1), /* arith_shift_reg. */
1255 COSTS_N_INSNS (1), /* log_shift. */
1256 COSTS_N_INSNS (1), /* log_shift_reg. */
1257 COSTS_N_INSNS (1), /* extend. */
1258 COSTS_N_INSNS (1), /* extend_arith. */
1259 COSTS_N_INSNS (1), /* bfi. */
1260 COSTS_N_INSNS (1), /* bfx. */
1261 COSTS_N_INSNS (1), /* clz. */
1262 COSTS_N_INSNS (1), /* rev. */
1263 0, /* non_exec. */
1264 true /* non_exec_costs_exec. */
1265 },
1266
1267 {
1268 /* MULT SImode */
1269 {
1270 0, /* simple. */
1271 COSTS_N_INSNS (1), /* flag_setting. */
1272 COSTS_N_INSNS (1), /* extend. */
1273 COSTS_N_INSNS (1), /* add. */
1274 COSTS_N_INSNS (1), /* extend_add. */
1275 COSTS_N_INSNS (7) /* idiv. */
1276 },
1277 /* MULT DImode */
1278 {
1279 0, /* simple (N/A). */
1280 0, /* flag_setting (N/A). */
1281 COSTS_N_INSNS (1), /* extend. */
1282 0, /* add. */
1283 COSTS_N_INSNS (2), /* extend_add. */
1284 0 /* idiv (N/A). */
1285 }
1286 },
1287 /* LD/ST */
1288 {
1289 COSTS_N_INSNS (1), /* load. */
1290 COSTS_N_INSNS (1), /* load_sign_extend. */
1291 COSTS_N_INSNS (3), /* ldrd. */
1292 COSTS_N_INSNS (1), /* ldm_1st. */
1293 1, /* ldm_regs_per_insn_1st. */
1294 2, /* ldm_regs_per_insn_subsequent. */
1295 COSTS_N_INSNS (2), /* loadf. */
1296 COSTS_N_INSNS (2), /* loadd. */
1297 COSTS_N_INSNS (1), /* load_unaligned. */
1298 COSTS_N_INSNS (1), /* store. */
1299 COSTS_N_INSNS (3), /* strd. */
1300 COSTS_N_INSNS (1), /* stm_1st. */
1301 1, /* stm_regs_per_insn_1st. */
1302 2, /* stm_regs_per_insn_subsequent. */
1303 COSTS_N_INSNS (2), /* storef. */
1304 COSTS_N_INSNS (2), /* stored. */
1305 COSTS_N_INSNS (1) /* store_unaligned. */
1306 },
1307 {
1308 /* FP SFmode */
1309 {
1310 COSTS_N_INSNS (15), /* div. */
1311 COSTS_N_INSNS (3), /* mult. */
1312 COSTS_N_INSNS (7), /* mult_addsub. */
1313 COSTS_N_INSNS (7), /* fma. */
1314 COSTS_N_INSNS (3), /* addsub. */
1315 COSTS_N_INSNS (3), /* fpconst. */
1316 COSTS_N_INSNS (3), /* neg. */
1317 COSTS_N_INSNS (3), /* compare. */
1318 COSTS_N_INSNS (3), /* widen. */
1319 COSTS_N_INSNS (3), /* narrow. */
1320 COSTS_N_INSNS (3), /* toint. */
1321 COSTS_N_INSNS (3), /* fromint. */
1322 COSTS_N_INSNS (3) /* roundint. */
1323 },
1324 /* FP DFmode */
1325 {
1326 COSTS_N_INSNS (30), /* div. */
1327 COSTS_N_INSNS (6), /* mult. */
1328 COSTS_N_INSNS (10), /* mult_addsub. */
1329 COSTS_N_INSNS (7), /* fma. */
1330 COSTS_N_INSNS (3), /* addsub. */
1331 COSTS_N_INSNS (3), /* fpconst. */
1332 COSTS_N_INSNS (3), /* neg. */
1333 COSTS_N_INSNS (3), /* compare. */
1334 COSTS_N_INSNS (3), /* widen. */
1335 COSTS_N_INSNS (3), /* narrow. */
1336 COSTS_N_INSNS (3), /* toint. */
1337 COSTS_N_INSNS (3), /* fromint. */
1338 COSTS_N_INSNS (3) /* roundint. */
1339 }
1340 },
1341 /* Vector */
1342 {
1343 COSTS_N_INSNS (1) /* alu. */
1344 }
1345 };
1346
1347 const struct cpu_cost_table cortexa12_extra_costs =
1348 {
1349 /* ALU */
1350 {
1351 0, /* arith. */
1352 0, /* logical. */
1353 0, /* shift. */
1354 COSTS_N_INSNS (1), /* shift_reg. */
1355 COSTS_N_INSNS (1), /* arith_shift. */
1356 COSTS_N_INSNS (1), /* arith_shift_reg. */
1357 COSTS_N_INSNS (1), /* log_shift. */
1358 COSTS_N_INSNS (1), /* log_shift_reg. */
1359 0, /* extend. */
1360 COSTS_N_INSNS (1), /* extend_arith. */
1361 0, /* bfi. */
1362 COSTS_N_INSNS (1), /* bfx. */
1363 COSTS_N_INSNS (1), /* clz. */
1364 COSTS_N_INSNS (1), /* rev. */
1365 0, /* non_exec. */
1366 true /* non_exec_costs_exec. */
1367 },
1368 /* MULT SImode */
1369 {
1370 {
1371 COSTS_N_INSNS (2), /* simple. */
1372 COSTS_N_INSNS (3), /* flag_setting. */
1373 COSTS_N_INSNS (2), /* extend. */
1374 COSTS_N_INSNS (3), /* add. */
1375 COSTS_N_INSNS (2), /* extend_add. */
1376 COSTS_N_INSNS (18) /* idiv. */
1377 },
1378 /* MULT DImode */
1379 {
1380 0, /* simple (N/A). */
1381 0, /* flag_setting (N/A). */
1382 COSTS_N_INSNS (3), /* extend. */
1383 0, /* add (N/A). */
1384 COSTS_N_INSNS (3), /* extend_add. */
1385 0 /* idiv (N/A). */
1386 }
1387 },
1388 /* LD/ST */
1389 {
1390 COSTS_N_INSNS (3), /* load. */
1391 COSTS_N_INSNS (3), /* load_sign_extend. */
1392 COSTS_N_INSNS (3), /* ldrd. */
1393 COSTS_N_INSNS (3), /* ldm_1st. */
1394 1, /* ldm_regs_per_insn_1st. */
1395 2, /* ldm_regs_per_insn_subsequent. */
1396 COSTS_N_INSNS (3), /* loadf. */
1397 COSTS_N_INSNS (3), /* loadd. */
1398 0, /* load_unaligned. */
1399 0, /* store. */
1400 0, /* strd. */
1401 0, /* stm_1st. */
1402 1, /* stm_regs_per_insn_1st. */
1403 2, /* stm_regs_per_insn_subsequent. */
1404 COSTS_N_INSNS (2), /* storef. */
1405 COSTS_N_INSNS (2), /* stored. */
1406 0 /* store_unaligned. */
1407 },
1408 {
1409 /* FP SFmode */
1410 {
1411 COSTS_N_INSNS (17), /* div. */
1412 COSTS_N_INSNS (4), /* mult. */
1413 COSTS_N_INSNS (8), /* mult_addsub. */
1414 COSTS_N_INSNS (8), /* fma. */
1415 COSTS_N_INSNS (4), /* addsub. */
1416 COSTS_N_INSNS (2), /* fpconst. */
1417 COSTS_N_INSNS (2), /* neg. */
1418 COSTS_N_INSNS (2), /* compare. */
1419 COSTS_N_INSNS (4), /* widen. */
1420 COSTS_N_INSNS (4), /* narrow. */
1421 COSTS_N_INSNS (4), /* toint. */
1422 COSTS_N_INSNS (4), /* fromint. */
1423 COSTS_N_INSNS (4) /* roundint. */
1424 },
1425 /* FP DFmode */
1426 {
1427 COSTS_N_INSNS (31), /* div. */
1428 COSTS_N_INSNS (4), /* mult. */
1429 COSTS_N_INSNS (8), /* mult_addsub. */
1430 COSTS_N_INSNS (8), /* fma. */
1431 COSTS_N_INSNS (4), /* addsub. */
1432 COSTS_N_INSNS (2), /* fpconst. */
1433 COSTS_N_INSNS (2), /* neg. */
1434 COSTS_N_INSNS (2), /* compare. */
1435 COSTS_N_INSNS (4), /* widen. */
1436 COSTS_N_INSNS (4), /* narrow. */
1437 COSTS_N_INSNS (4), /* toint. */
1438 COSTS_N_INSNS (4), /* fromint. */
1439 COSTS_N_INSNS (4) /* roundint. */
1440 }
1441 },
1442 /* Vector */
1443 {
1444 COSTS_N_INSNS (1) /* alu. */
1445 }
1446 };
1447
1448 const struct cpu_cost_table cortexa15_extra_costs =
1449 {
1450 /* ALU */
1451 {
1452 0, /* arith. */
1453 0, /* logical. */
1454 0, /* shift. */
1455 0, /* shift_reg. */
1456 COSTS_N_INSNS (1), /* arith_shift. */
1457 COSTS_N_INSNS (1), /* arith_shift_reg. */
1458 COSTS_N_INSNS (1), /* log_shift. */
1459 COSTS_N_INSNS (1), /* log_shift_reg. */
1460 0, /* extend. */
1461 COSTS_N_INSNS (1), /* extend_arith. */
1462 COSTS_N_INSNS (1), /* bfi. */
1463 0, /* bfx. */
1464 0, /* clz. */
1465 0, /* rev. */
1466 0, /* non_exec. */
1467 true /* non_exec_costs_exec. */
1468 },
1469 /* MULT SImode */
1470 {
1471 {
1472 COSTS_N_INSNS (2), /* simple. */
1473 COSTS_N_INSNS (3), /* flag_setting. */
1474 COSTS_N_INSNS (2), /* extend. */
1475 COSTS_N_INSNS (2), /* add. */
1476 COSTS_N_INSNS (2), /* extend_add. */
1477 COSTS_N_INSNS (18) /* idiv. */
1478 },
1479 /* MULT DImode */
1480 {
1481 0, /* simple (N/A). */
1482 0, /* flag_setting (N/A). */
1483 COSTS_N_INSNS (3), /* extend. */
1484 0, /* add (N/A). */
1485 COSTS_N_INSNS (3), /* extend_add. */
1486 0 /* idiv (N/A). */
1487 }
1488 },
1489 /* LD/ST */
1490 {
1491 COSTS_N_INSNS (3), /* load. */
1492 COSTS_N_INSNS (3), /* load_sign_extend. */
1493 COSTS_N_INSNS (3), /* ldrd. */
1494 COSTS_N_INSNS (4), /* ldm_1st. */
1495 1, /* ldm_regs_per_insn_1st. */
1496 2, /* ldm_regs_per_insn_subsequent. */
1497 COSTS_N_INSNS (4), /* loadf. */
1498 COSTS_N_INSNS (4), /* loadd. */
1499 0, /* load_unaligned. */
1500 0, /* store. */
1501 0, /* strd. */
1502 COSTS_N_INSNS (1), /* stm_1st. */
1503 1, /* stm_regs_per_insn_1st. */
1504 2, /* stm_regs_per_insn_subsequent. */
1505 0, /* storef. */
1506 0, /* stored. */
1507 0 /* store_unaligned. */
1508 },
1509 {
1510 /* FP SFmode */
1511 {
1512 COSTS_N_INSNS (17), /* div. */
1513 COSTS_N_INSNS (4), /* mult. */
1514 COSTS_N_INSNS (8), /* mult_addsub. */
1515 COSTS_N_INSNS (8), /* fma. */
1516 COSTS_N_INSNS (4), /* addsub. */
1517 COSTS_N_INSNS (2), /* fpconst. */
1518 COSTS_N_INSNS (2), /* neg. */
1519 COSTS_N_INSNS (5), /* compare. */
1520 COSTS_N_INSNS (4), /* widen. */
1521 COSTS_N_INSNS (4), /* narrow. */
1522 COSTS_N_INSNS (4), /* toint. */
1523 COSTS_N_INSNS (4), /* fromint. */
1524 COSTS_N_INSNS (4) /* roundint. */
1525 },
1526 /* FP DFmode */
1527 {
1528 COSTS_N_INSNS (31), /* div. */
1529 COSTS_N_INSNS (4), /* mult. */
1530 COSTS_N_INSNS (8), /* mult_addsub. */
1531 COSTS_N_INSNS (8), /* fma. */
1532 COSTS_N_INSNS (4), /* addsub. */
1533 COSTS_N_INSNS (2), /* fpconst. */
1534 COSTS_N_INSNS (2), /* neg. */
1535 COSTS_N_INSNS (2), /* compare. */
1536 COSTS_N_INSNS (4), /* widen. */
1537 COSTS_N_INSNS (4), /* narrow. */
1538 COSTS_N_INSNS (4), /* toint. */
1539 COSTS_N_INSNS (4), /* fromint. */
1540 COSTS_N_INSNS (4) /* roundint. */
1541 }
1542 },
1543 /* Vector */
1544 {
1545 COSTS_N_INSNS (1) /* alu. */
1546 }
1547 };
1548
1549 const struct cpu_cost_table v7m_extra_costs =
1550 {
1551 /* ALU */
1552 {
1553 0, /* arith. */
1554 0, /* logical. */
1555 0, /* shift. */
1556 0, /* shift_reg. */
1557 0, /* arith_shift. */
1558 COSTS_N_INSNS (1), /* arith_shift_reg. */
1559 0, /* log_shift. */
1560 COSTS_N_INSNS (1), /* log_shift_reg. */
1561 0, /* extend. */
1562 COSTS_N_INSNS (1), /* extend_arith. */
1563 0, /* bfi. */
1564 0, /* bfx. */
1565 0, /* clz. */
1566 0, /* rev. */
1567 COSTS_N_INSNS (1), /* non_exec. */
1568 false /* non_exec_costs_exec. */
1569 },
1570 {
1571 /* MULT SImode */
1572 {
1573 COSTS_N_INSNS (1), /* simple. */
1574 COSTS_N_INSNS (1), /* flag_setting. */
1575 COSTS_N_INSNS (2), /* extend. */
1576 COSTS_N_INSNS (1), /* add. */
1577 COSTS_N_INSNS (3), /* extend_add. */
1578 COSTS_N_INSNS (8) /* idiv. */
1579 },
1580 /* MULT DImode */
1581 {
1582 0, /* simple (N/A). */
1583 0, /* flag_setting (N/A). */
1584 COSTS_N_INSNS (2), /* extend. */
1585 0, /* add (N/A). */
1586 COSTS_N_INSNS (3), /* extend_add. */
1587 0 /* idiv (N/A). */
1588 }
1589 },
1590 /* LD/ST */
1591 {
1592 COSTS_N_INSNS (2), /* load. */
1593 0, /* load_sign_extend. */
1594 COSTS_N_INSNS (3), /* ldrd. */
1595 COSTS_N_INSNS (2), /* ldm_1st. */
1596 1, /* ldm_regs_per_insn_1st. */
1597 1, /* ldm_regs_per_insn_subsequent. */
1598 COSTS_N_INSNS (2), /* loadf. */
1599 COSTS_N_INSNS (3), /* loadd. */
1600 COSTS_N_INSNS (1), /* load_unaligned. */
1601 COSTS_N_INSNS (2), /* store. */
1602 COSTS_N_INSNS (3), /* strd. */
1603 COSTS_N_INSNS (2), /* stm_1st. */
1604 1, /* stm_regs_per_insn_1st. */
1605 1, /* stm_regs_per_insn_subsequent. */
1606 COSTS_N_INSNS (2), /* storef. */
1607 COSTS_N_INSNS (3), /* stored. */
1608 COSTS_N_INSNS (1) /* store_unaligned. */
1609 },
1610 {
1611 /* FP SFmode */
1612 {
1613 COSTS_N_INSNS (7), /* div. */
1614 COSTS_N_INSNS (2), /* mult. */
1615 COSTS_N_INSNS (5), /* mult_addsub. */
1616 COSTS_N_INSNS (3), /* fma. */
1617 COSTS_N_INSNS (1), /* addsub. */
1618 0, /* fpconst. */
1619 0, /* neg. */
1620 0, /* compare. */
1621 0, /* widen. */
1622 0, /* narrow. */
1623 0, /* toint. */
1624 0, /* fromint. */
1625 0 /* roundint. */
1626 },
1627 /* FP DFmode */
1628 {
1629 COSTS_N_INSNS (15), /* div. */
1630 COSTS_N_INSNS (5), /* mult. */
1631 COSTS_N_INSNS (7), /* mult_addsub. */
1632 COSTS_N_INSNS (7), /* fma. */
1633 COSTS_N_INSNS (3), /* addsub. */
1634 0, /* fpconst. */
1635 0, /* neg. */
1636 0, /* compare. */
1637 0, /* widen. */
1638 0, /* narrow. */
1639 0, /* toint. */
1640 0, /* fromint. */
1641 0 /* roundint. */
1642 }
1643 },
1644 /* Vector */
1645 {
1646 COSTS_N_INSNS (1) /* alu. */
1647 }
1648 };
1649
1650 const struct tune_params arm_slowmul_tune =
1651 {
1652 arm_slowmul_rtx_costs,
1653 NULL,
1654 NULL, /* Sched adj cost. */
1655 3, /* Constant limit. */
1656 5, /* Max cond insns. */
1657 ARM_PREFETCH_NOT_BENEFICIAL,
1658 true, /* Prefer constant pool. */
1659 arm_default_branch_cost,
1660 false, /* Prefer LDRD/STRD. */
1661 {true, true}, /* Prefer non short circuit. */
1662 &arm_default_vec_cost, /* Vectorizer costs. */
1663 false, /* Prefer Neon for 64-bits bitops. */
1664 false, false, /* Prefer 32-bit encodings. */
1665 false, /* Prefer Neon for stringops. */
1666 8 /* Maximum insns to inline memset. */
1667 };
1668
1669 const struct tune_params arm_fastmul_tune =
1670 {
1671 arm_fastmul_rtx_costs,
1672 NULL,
1673 NULL, /* Sched adj cost. */
1674 1, /* Constant limit. */
1675 5, /* Max cond insns. */
1676 ARM_PREFETCH_NOT_BENEFICIAL,
1677 true, /* Prefer constant pool. */
1678 arm_default_branch_cost,
1679 false, /* Prefer LDRD/STRD. */
1680 {true, true}, /* Prefer non short circuit. */
1681 &arm_default_vec_cost, /* Vectorizer costs. */
1682 false, /* Prefer Neon for 64-bits bitops. */
1683 false, false, /* Prefer 32-bit encodings. */
1684 false, /* Prefer Neon for stringops. */
1685 8 /* Maximum insns to inline memset. */
1686 };
1687
1688 /* StrongARM has early execution of branches, so a sequence that is worth
1689 skipping is shorter. Set max_insns_skipped to a lower value. */
1690
1691 const struct tune_params arm_strongarm_tune =
1692 {
1693 arm_fastmul_rtx_costs,
1694 NULL,
1695 NULL, /* Sched adj cost. */
1696 1, /* Constant limit. */
1697 3, /* Max cond insns. */
1698 ARM_PREFETCH_NOT_BENEFICIAL,
1699 true, /* Prefer constant pool. */
1700 arm_default_branch_cost,
1701 false, /* Prefer LDRD/STRD. */
1702 {true, true}, /* Prefer non short circuit. */
1703 &arm_default_vec_cost, /* Vectorizer costs. */
1704 false, /* Prefer Neon for 64-bits bitops. */
1705 false, false, /* Prefer 32-bit encodings. */
1706 false, /* Prefer Neon for stringops. */
1707 8 /* Maximum insns to inline memset. */
1708 };
1709
1710 const struct tune_params arm_xscale_tune =
1711 {
1712 arm_xscale_rtx_costs,
1713 NULL,
1714 xscale_sched_adjust_cost,
1715 2, /* Constant limit. */
1716 3, /* Max cond insns. */
1717 ARM_PREFETCH_NOT_BENEFICIAL,
1718 true, /* Prefer constant pool. */
1719 arm_default_branch_cost,
1720 false, /* Prefer LDRD/STRD. */
1721 {true, true}, /* Prefer non short circuit. */
1722 &arm_default_vec_cost, /* Vectorizer costs. */
1723 false, /* Prefer Neon for 64-bits bitops. */
1724 false, false, /* Prefer 32-bit encodings. */
1725 false, /* Prefer Neon for stringops. */
1726 8 /* Maximum insns to inline memset. */
1727 };
1728
1729 const struct tune_params arm_9e_tune =
1730 {
1731 arm_9e_rtx_costs,
1732 NULL,
1733 NULL, /* Sched adj cost. */
1734 1, /* Constant limit. */
1735 5, /* Max cond insns. */
1736 ARM_PREFETCH_NOT_BENEFICIAL,
1737 true, /* Prefer constant pool. */
1738 arm_default_branch_cost,
1739 false, /* Prefer LDRD/STRD. */
1740 {true, true}, /* Prefer non short circuit. */
1741 &arm_default_vec_cost, /* Vectorizer costs. */
1742 false, /* Prefer Neon for 64-bits bitops. */
1743 false, false, /* Prefer 32-bit encodings. */
1744 false, /* Prefer Neon for stringops. */
1745 8 /* Maximum insns to inline memset. */
1746 };
1747
1748 const struct tune_params arm_v6t2_tune =
1749 {
1750 arm_9e_rtx_costs,
1751 NULL,
1752 NULL, /* Sched adj cost. */
1753 1, /* Constant limit. */
1754 5, /* Max cond insns. */
1755 ARM_PREFETCH_NOT_BENEFICIAL,
1756 false, /* Prefer constant pool. */
1757 arm_default_branch_cost,
1758 false, /* Prefer LDRD/STRD. */
1759 {true, true}, /* Prefer non short circuit. */
1760 &arm_default_vec_cost, /* Vectorizer costs. */
1761 false, /* Prefer Neon for 64-bits bitops. */
1762 false, false, /* Prefer 32-bit encodings. */
1763 false, /* Prefer Neon for stringops. */
1764 8 /* Maximum insns to inline memset. */
1765 };
1766
1767 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1768 const struct tune_params arm_cortex_tune =
1769 {
1770 arm_9e_rtx_costs,
1771 &generic_extra_costs,
1772 NULL, /* Sched adj cost. */
1773 1, /* Constant limit. */
1774 5, /* Max cond insns. */
1775 ARM_PREFETCH_NOT_BENEFICIAL,
1776 false, /* Prefer constant pool. */
1777 arm_default_branch_cost,
1778 false, /* Prefer LDRD/STRD. */
1779 {true, true}, /* Prefer non short circuit. */
1780 &arm_default_vec_cost, /* Vectorizer costs. */
1781 false, /* Prefer Neon for 64-bits bitops. */
1782 false, false, /* Prefer 32-bit encodings. */
1783 false, /* Prefer Neon for stringops. */
1784 8 /* Maximum insns to inline memset. */
1785 };
1786
1787 const struct tune_params arm_cortex_a8_tune =
1788 {
1789 arm_9e_rtx_costs,
1790 &cortexa8_extra_costs,
1791 NULL, /* Sched adj cost. */
1792 1, /* Constant limit. */
1793 5, /* Max cond insns. */
1794 ARM_PREFETCH_NOT_BENEFICIAL,
1795 false, /* Prefer constant pool. */
1796 arm_default_branch_cost,
1797 false, /* Prefer LDRD/STRD. */
1798 {true, true}, /* Prefer non short circuit. */
1799 &arm_default_vec_cost, /* Vectorizer costs. */
1800 false, /* Prefer Neon for 64-bits bitops. */
1801 false, false, /* Prefer 32-bit encodings. */
1802 true, /* Prefer Neon for stringops. */
1803 8 /* Maximum insns to inline memset. */
1804 };
1805
1806 const struct tune_params arm_cortex_a7_tune =
1807 {
1808 arm_9e_rtx_costs,
1809 &cortexa7_extra_costs,
1810 NULL,
1811 1, /* Constant limit. */
1812 5, /* Max cond insns. */
1813 ARM_PREFETCH_NOT_BENEFICIAL,
1814 false, /* Prefer constant pool. */
1815 arm_default_branch_cost,
1816 false, /* Prefer LDRD/STRD. */
1817 {true, true}, /* Prefer non short circuit. */
1818 &arm_default_vec_cost, /* Vectorizer costs. */
1819 false, /* Prefer Neon for 64-bits bitops. */
1820 false, false, /* Prefer 32-bit encodings. */
1821 true, /* Prefer Neon for stringops. */
1822 8 /* Maximum insns to inline memset. */
1823 };
1824
1825 const struct tune_params arm_cortex_a15_tune =
1826 {
1827 arm_9e_rtx_costs,
1828 &cortexa15_extra_costs,
1829 NULL, /* Sched adj cost. */
1830 1, /* Constant limit. */
1831 2, /* Max cond insns. */
1832 ARM_PREFETCH_NOT_BENEFICIAL,
1833 false, /* Prefer constant pool. */
1834 arm_default_branch_cost,
1835 true, /* Prefer LDRD/STRD. */
1836 {true, true}, /* Prefer non short circuit. */
1837 &arm_default_vec_cost, /* Vectorizer costs. */
1838 false, /* Prefer Neon for 64-bits bitops. */
1839 true, true, /* Prefer 32-bit encodings. */
1840 true, /* Prefer Neon for stringops. */
1841 8 /* Maximum insns to inline memset. */
1842 };
1843
1844 const struct tune_params arm_cortex_a53_tune =
1845 {
1846 arm_9e_rtx_costs,
1847 &cortexa53_extra_costs,
1848 NULL, /* Scheduler cost adjustment. */
1849 1, /* Constant limit. */
1850 5, /* Max cond insns. */
1851 ARM_PREFETCH_NOT_BENEFICIAL,
1852 false, /* Prefer constant pool. */
1853 arm_default_branch_cost,
1854 false, /* Prefer LDRD/STRD. */
1855 {true, true}, /* Prefer non short circuit. */
1856 &arm_default_vec_cost, /* Vectorizer costs. */
1857 false, /* Prefer Neon for 64-bits bitops. */
1858 false, false, /* Prefer 32-bit encodings. */
1859 false, /* Prefer Neon for stringops. */
1860 8 /* Maximum insns to inline memset. */
1861 };
1862
1863 const struct tune_params arm_cortex_a57_tune =
1864 {
1865 arm_9e_rtx_costs,
1866 &cortexa57_extra_costs,
1867 NULL, /* Scheduler cost adjustment. */
1868 1, /* Constant limit. */
1869 2, /* Max cond insns. */
1870 ARM_PREFETCH_NOT_BENEFICIAL,
1871 false, /* Prefer constant pool. */
1872 arm_default_branch_cost,
1873 true, /* Prefer LDRD/STRD. */
1874 {true, true}, /* Prefer non short circuit. */
1875 &arm_default_vec_cost, /* Vectorizer costs. */
1876 false, /* Prefer Neon for 64-bits bitops. */
1877 true, true, /* Prefer 32-bit encodings. */
1878 false, /* Prefer Neon for stringops. */
1879 8 /* Maximum insns to inline memset. */
1880 };
1881
1882 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1883 less appealing. Set max_insns_skipped to a low value. */
1884
1885 const struct tune_params arm_cortex_a5_tune =
1886 {
1887 arm_9e_rtx_costs,
1888 &cortexa5_extra_costs,
1889 NULL, /* Sched adj cost. */
1890 1, /* Constant limit. */
1891 1, /* Max cond insns. */
1892 ARM_PREFETCH_NOT_BENEFICIAL,
1893 false, /* Prefer constant pool. */
1894 arm_cortex_a5_branch_cost,
1895 false, /* Prefer LDRD/STRD. */
1896 {false, false}, /* Prefer non short circuit. */
1897 &arm_default_vec_cost, /* Vectorizer costs. */
1898 false, /* Prefer Neon for 64-bits bitops. */
1899 false, false, /* Prefer 32-bit encodings. */
1900 true, /* Prefer Neon for stringops. */
1901 8 /* Maximum insns to inline memset. */
1902 };
1903
1904 const struct tune_params arm_cortex_a9_tune =
1905 {
1906 arm_9e_rtx_costs,
1907 &cortexa9_extra_costs,
1908 cortex_a9_sched_adjust_cost,
1909 1, /* Constant limit. */
1910 5, /* Max cond insns. */
1911 ARM_PREFETCH_BENEFICIAL(4,32,32),
1912 false, /* Prefer constant pool. */
1913 arm_default_branch_cost,
1914 false, /* Prefer LDRD/STRD. */
1915 {true, true}, /* Prefer non short circuit. */
1916 &arm_default_vec_cost, /* Vectorizer costs. */
1917 false, /* Prefer Neon for 64-bits bitops. */
1918 false, false, /* Prefer 32-bit encodings. */
1919 false, /* Prefer Neon for stringops. */
1920 8 /* Maximum insns to inline memset. */
1921 };
1922
1923 const struct tune_params arm_cortex_a12_tune =
1924 {
1925 arm_9e_rtx_costs,
1926 &cortexa12_extra_costs,
1927 NULL,
1928 1, /* Constant limit. */
1929 5, /* Max cond insns. */
1930 ARM_PREFETCH_BENEFICIAL(4,32,32),
1931 false, /* Prefer constant pool. */
1932 arm_default_branch_cost,
1933 true, /* Prefer LDRD/STRD. */
1934 {true, true}, /* Prefer non short circuit. */
1935 &arm_default_vec_cost, /* Vectorizer costs. */
1936 false, /* Prefer Neon for 64-bits bitops. */
1937 false, false, /* Prefer 32-bit encodings. */
1938 true, /* Prefer Neon for stringops. */
1939 8 /* Maximum insns to inline memset. */
1940 };
1941
1942 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1943 cycle to execute each. An LDR from the constant pool also takes two cycles
1944 to execute, but mildly increases pipelining opportunity (consecutive
1945 loads/stores can be pipelined together, saving one cycle), and may also
1946 improve icache utilisation. Hence we prefer the constant pool for such
1947 processors. */
1948
1949 const struct tune_params arm_v7m_tune =
1950 {
1951 arm_9e_rtx_costs,
1952 &v7m_extra_costs,
1953 NULL, /* Sched adj cost. */
1954 1, /* Constant limit. */
1955 2, /* Max cond insns. */
1956 ARM_PREFETCH_NOT_BENEFICIAL,
1957 true, /* Prefer constant pool. */
1958 arm_cortex_m_branch_cost,
1959 false, /* Prefer LDRD/STRD. */
1960 {false, false}, /* Prefer non short circuit. */
1961 &arm_default_vec_cost, /* Vectorizer costs. */
1962 false, /* Prefer Neon for 64-bits bitops. */
1963 false, false, /* Prefer 32-bit encodings. */
1964 false, /* Prefer Neon for stringops. */
1965 8 /* Maximum insns to inline memset. */
1966 };
1967
1968 /* Cortex-M7 tuning. */
1969
1970 const struct tune_params arm_cortex_m7_tune =
1971 {
1972 arm_9e_rtx_costs,
1973 &v7m_extra_costs,
1974 NULL, /* Sched adj cost. */
1975 0, /* Constant limit. */
1976 0, /* Max cond insns. */
1977 ARM_PREFETCH_NOT_BENEFICIAL,
1978 true, /* Prefer constant pool. */
1979 arm_cortex_m_branch_cost,
1980 false, /* Prefer LDRD/STRD. */
1981 {true, true}, /* Prefer non short circuit. */
1982 &arm_default_vec_cost, /* Vectorizer costs. */
1983 false, /* Prefer Neon for 64-bits bitops. */
1984 false, false, /* Prefer 32-bit encodings. */
1985 false, /* Prefer Neon for stringops. */
1986 8 /* Maximum insns to inline memset. */
1987 };
1988
1989 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1990 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1991 const struct tune_params arm_v6m_tune =
1992 {
1993 arm_9e_rtx_costs,
1994 NULL,
1995 NULL, /* Sched adj cost. */
1996 1, /* Constant limit. */
1997 5, /* Max cond insns. */
1998 ARM_PREFETCH_NOT_BENEFICIAL,
1999 false, /* Prefer constant pool. */
2000 arm_default_branch_cost,
2001 false, /* Prefer LDRD/STRD. */
2002 {false, false}, /* Prefer non short circuit. */
2003 &arm_default_vec_cost, /* Vectorizer costs. */
2004 false, /* Prefer Neon for 64-bits bitops. */
2005 false, false, /* Prefer 32-bit encodings. */
2006 false, /* Prefer Neon for stringops. */
2007 8 /* Maximum insns to inline memset. */
2008 };
2009
2010 const struct tune_params arm_fa726te_tune =
2011 {
2012 arm_9e_rtx_costs,
2013 NULL,
2014 fa726te_sched_adjust_cost,
2015 1, /* Constant limit. */
2016 5, /* Max cond insns. */
2017 ARM_PREFETCH_NOT_BENEFICIAL,
2018 true, /* Prefer constant pool. */
2019 arm_default_branch_cost,
2020 false, /* Prefer LDRD/STRD. */
2021 {true, true}, /* Prefer non short circuit. */
2022 &arm_default_vec_cost, /* Vectorizer costs. */
2023 false, /* Prefer Neon for 64-bits bitops. */
2024 false, false, /* Prefer 32-bit encodings. */
2025 false, /* Prefer Neon for stringops. */
2026 8 /* Maximum insns to inline memset. */
2027 };
2028
2029
2030 /* Not all of these give usefully different compilation alternatives,
2031 but there is no simple way of generalizing them. */
2032 static const struct processors all_cores[] =
2033 {
2034 /* ARM Cores */
2035 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2036 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2037 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2038 #include "arm-cores.def"
2039 #undef ARM_CORE
2040 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2041 };
2042
2043 static const struct processors all_architectures[] =
2044 {
2045 /* ARM Architectures */
2046 /* We don't specify tuning costs here as it will be figured out
2047 from the core. */
2048
2049 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2050 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2051 #include "arm-arches.def"
2052 #undef ARM_ARCH
2053 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2054 };
2055
2056
2057 /* These are populated as commandline arguments are processed, or NULL
2058 if not specified. */
2059 static const struct processors *arm_selected_arch;
2060 static const struct processors *arm_selected_cpu;
2061 static const struct processors *arm_selected_tune;
2062
2063 /* The name of the preprocessor macro to define for this architecture. */
2064
2065 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2066
2067 /* Available values for -mfpu=. */
2068
2069 static const struct arm_fpu_desc all_fpus[] =
2070 {
2071 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2072 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2073 #include "arm-fpus.def"
2074 #undef ARM_FPU
2075 };
2076
2077
2078 /* Supported TLS relocations. */
2079
2080 enum tls_reloc {
2081 TLS_GD32,
2082 TLS_LDM32,
2083 TLS_LDO32,
2084 TLS_IE32,
2085 TLS_LE32,
2086 TLS_DESCSEQ /* GNU scheme */
2087 };
2088
2089 /* The maximum number of insns to be used when loading a constant. */
2090 inline static int
2091 arm_constant_limit (bool size_p)
2092 {
2093 return size_p ? 1 : current_tune->constant_limit;
2094 }
2095
2096 /* Emit an insn that's a simple single-set. Both the operands must be known
2097 to be valid. */
2098 inline static rtx_insn *
2099 emit_set_insn (rtx x, rtx y)
2100 {
2101 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2102 }
2103
2104 /* Return the number of bits set in VALUE. */
2105 static unsigned
2106 bit_count (unsigned long value)
2107 {
2108 unsigned long count = 0;
2109
2110 while (value)
2111 {
2112 count++;
2113 value &= value - 1; /* Clear the least-significant set bit. */
2114 }
2115
2116 return count;
2117 }
2118
2119 typedef struct
2120 {
2121 machine_mode mode;
2122 const char *name;
2123 } arm_fixed_mode_set;
2124
2125 /* A small helper for setting fixed-point library libfuncs. */
2126
2127 static void
2128 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2129 const char *funcname, const char *modename,
2130 int num_suffix)
2131 {
2132 char buffer[50];
2133
2134 if (num_suffix == 0)
2135 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2136 else
2137 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2138
2139 set_optab_libfunc (optable, mode, buffer);
2140 }
2141
2142 static void
2143 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2144 machine_mode from, const char *funcname,
2145 const char *toname, const char *fromname)
2146 {
2147 char buffer[50];
2148 const char *maybe_suffix_2 = "";
2149
2150 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2151 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2152 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2153 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2154 maybe_suffix_2 = "2";
2155
2156 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2157 maybe_suffix_2);
2158
2159 set_conv_libfunc (optable, to, from, buffer);
2160 }
2161
2162 /* Set up library functions unique to ARM. */
2163
2164 static void
2165 arm_init_libfuncs (void)
2166 {
2167 /* For Linux, we have access to kernel support for atomic operations. */
2168 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2169 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2170
2171 /* There are no special library functions unless we are using the
2172 ARM BPABI. */
2173 if (!TARGET_BPABI)
2174 return;
2175
2176 /* The functions below are described in Section 4 of the "Run-Time
2177 ABI for the ARM architecture", Version 1.0. */
2178
2179 /* Double-precision floating-point arithmetic. Table 2. */
2180 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2181 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2182 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2183 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2184 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2185
2186 /* Double-precision comparisons. Table 3. */
2187 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2188 set_optab_libfunc (ne_optab, DFmode, NULL);
2189 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2190 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2191 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2192 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2193 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2194
2195 /* Single-precision floating-point arithmetic. Table 4. */
2196 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2197 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2198 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2199 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2200 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2201
2202 /* Single-precision comparisons. Table 5. */
2203 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2204 set_optab_libfunc (ne_optab, SFmode, NULL);
2205 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2206 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2207 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2208 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2209 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2210
2211 /* Floating-point to integer conversions. Table 6. */
2212 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2213 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2214 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2215 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2216 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2217 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2218 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2219 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2220
2221 /* Conversions between floating types. Table 7. */
2222 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2223 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2224
2225 /* Integer to floating-point conversions. Table 8. */
2226 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2227 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2228 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2229 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2230 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2231 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2232 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2233 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2234
2235 /* Long long. Table 9. */
2236 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2237 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2238 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2239 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2240 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2241 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2242 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2243 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2244
2245 /* Integer (32/32->32) division. \S 4.3.1. */
2246 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2247 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2248
2249 /* The divmod functions are designed so that they can be used for
2250 plain division, even though they return both the quotient and the
2251 remainder. The quotient is returned in the usual location (i.e.,
2252 r0 for SImode, {r0, r1} for DImode), just as would be expected
2253 for an ordinary division routine. Because the AAPCS calling
2254 conventions specify that all of { r0, r1, r2, r3 } are
2255 callee-saved registers, there is no need to tell the compiler
2256 explicitly that those registers are clobbered by these
2257 routines. */
2258 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2259 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2260
2261 /* For SImode division the ABI provides div-without-mod routines,
2262 which are faster. */
2263 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2264 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2265
2266 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2267 divmod libcalls instead. */
2268 set_optab_libfunc (smod_optab, DImode, NULL);
2269 set_optab_libfunc (umod_optab, DImode, NULL);
2270 set_optab_libfunc (smod_optab, SImode, NULL);
2271 set_optab_libfunc (umod_optab, SImode, NULL);
2272
2273 /* Half-precision float operations. The compiler handles all operations
2274 with NULL libfuncs by converting the SFmode. */
2275 switch (arm_fp16_format)
2276 {
2277 case ARM_FP16_FORMAT_IEEE:
2278 case ARM_FP16_FORMAT_ALTERNATIVE:
2279
2280 /* Conversions. */
2281 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2282 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2283 ? "__gnu_f2h_ieee"
2284 : "__gnu_f2h_alternative"));
2285 set_conv_libfunc (sext_optab, SFmode, HFmode,
2286 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2287 ? "__gnu_h2f_ieee"
2288 : "__gnu_h2f_alternative"));
2289
2290 /* Arithmetic. */
2291 set_optab_libfunc (add_optab, HFmode, NULL);
2292 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2293 set_optab_libfunc (smul_optab, HFmode, NULL);
2294 set_optab_libfunc (neg_optab, HFmode, NULL);
2295 set_optab_libfunc (sub_optab, HFmode, NULL);
2296
2297 /* Comparisons. */
2298 set_optab_libfunc (eq_optab, HFmode, NULL);
2299 set_optab_libfunc (ne_optab, HFmode, NULL);
2300 set_optab_libfunc (lt_optab, HFmode, NULL);
2301 set_optab_libfunc (le_optab, HFmode, NULL);
2302 set_optab_libfunc (ge_optab, HFmode, NULL);
2303 set_optab_libfunc (gt_optab, HFmode, NULL);
2304 set_optab_libfunc (unord_optab, HFmode, NULL);
2305 break;
2306
2307 default:
2308 break;
2309 }
2310
2311 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2312 {
2313 const arm_fixed_mode_set fixed_arith_modes[] =
2314 {
2315 { QQmode, "qq" },
2316 { UQQmode, "uqq" },
2317 { HQmode, "hq" },
2318 { UHQmode, "uhq" },
2319 { SQmode, "sq" },
2320 { USQmode, "usq" },
2321 { DQmode, "dq" },
2322 { UDQmode, "udq" },
2323 { TQmode, "tq" },
2324 { UTQmode, "utq" },
2325 { HAmode, "ha" },
2326 { UHAmode, "uha" },
2327 { SAmode, "sa" },
2328 { USAmode, "usa" },
2329 { DAmode, "da" },
2330 { UDAmode, "uda" },
2331 { TAmode, "ta" },
2332 { UTAmode, "uta" }
2333 };
2334 const arm_fixed_mode_set fixed_conv_modes[] =
2335 {
2336 { QQmode, "qq" },
2337 { UQQmode, "uqq" },
2338 { HQmode, "hq" },
2339 { UHQmode, "uhq" },
2340 { SQmode, "sq" },
2341 { USQmode, "usq" },
2342 { DQmode, "dq" },
2343 { UDQmode, "udq" },
2344 { TQmode, "tq" },
2345 { UTQmode, "utq" },
2346 { HAmode, "ha" },
2347 { UHAmode, "uha" },
2348 { SAmode, "sa" },
2349 { USAmode, "usa" },
2350 { DAmode, "da" },
2351 { UDAmode, "uda" },
2352 { TAmode, "ta" },
2353 { UTAmode, "uta" },
2354 { QImode, "qi" },
2355 { HImode, "hi" },
2356 { SImode, "si" },
2357 { DImode, "di" },
2358 { TImode, "ti" },
2359 { SFmode, "sf" },
2360 { DFmode, "df" }
2361 };
2362 unsigned int i, j;
2363
2364 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2365 {
2366 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2367 "add", fixed_arith_modes[i].name, 3);
2368 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2369 "ssadd", fixed_arith_modes[i].name, 3);
2370 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2371 "usadd", fixed_arith_modes[i].name, 3);
2372 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2373 "sub", fixed_arith_modes[i].name, 3);
2374 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2375 "sssub", fixed_arith_modes[i].name, 3);
2376 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2377 "ussub", fixed_arith_modes[i].name, 3);
2378 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2379 "mul", fixed_arith_modes[i].name, 3);
2380 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2381 "ssmul", fixed_arith_modes[i].name, 3);
2382 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2383 "usmul", fixed_arith_modes[i].name, 3);
2384 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2385 "div", fixed_arith_modes[i].name, 3);
2386 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2387 "udiv", fixed_arith_modes[i].name, 3);
2388 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2389 "ssdiv", fixed_arith_modes[i].name, 3);
2390 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2391 "usdiv", fixed_arith_modes[i].name, 3);
2392 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2393 "neg", fixed_arith_modes[i].name, 2);
2394 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2395 "ssneg", fixed_arith_modes[i].name, 2);
2396 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2397 "usneg", fixed_arith_modes[i].name, 2);
2398 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2399 "ashl", fixed_arith_modes[i].name, 3);
2400 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2401 "ashr", fixed_arith_modes[i].name, 3);
2402 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2403 "lshr", fixed_arith_modes[i].name, 3);
2404 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2405 "ssashl", fixed_arith_modes[i].name, 3);
2406 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2407 "usashl", fixed_arith_modes[i].name, 3);
2408 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2409 "cmp", fixed_arith_modes[i].name, 2);
2410 }
2411
2412 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2413 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2414 {
2415 if (i == j
2416 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2417 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2418 continue;
2419
2420 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2421 fixed_conv_modes[j].mode, "fract",
2422 fixed_conv_modes[i].name,
2423 fixed_conv_modes[j].name);
2424 arm_set_fixed_conv_libfunc (satfract_optab,
2425 fixed_conv_modes[i].mode,
2426 fixed_conv_modes[j].mode, "satfract",
2427 fixed_conv_modes[i].name,
2428 fixed_conv_modes[j].name);
2429 arm_set_fixed_conv_libfunc (fractuns_optab,
2430 fixed_conv_modes[i].mode,
2431 fixed_conv_modes[j].mode, "fractuns",
2432 fixed_conv_modes[i].name,
2433 fixed_conv_modes[j].name);
2434 arm_set_fixed_conv_libfunc (satfractuns_optab,
2435 fixed_conv_modes[i].mode,
2436 fixed_conv_modes[j].mode, "satfractuns",
2437 fixed_conv_modes[i].name,
2438 fixed_conv_modes[j].name);
2439 }
2440 }
2441
2442 if (TARGET_AAPCS_BASED)
2443 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2444 }
2445
2446 /* On AAPCS systems, this is the "struct __va_list". */
2447 static GTY(()) tree va_list_type;
2448
2449 /* Return the type to use as __builtin_va_list. */
2450 static tree
2451 arm_build_builtin_va_list (void)
2452 {
2453 tree va_list_name;
2454 tree ap_field;
2455
2456 if (!TARGET_AAPCS_BASED)
2457 return std_build_builtin_va_list ();
2458
2459 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2460 defined as:
2461
2462 struct __va_list
2463 {
2464 void *__ap;
2465 };
2466
2467 The C Library ABI further reinforces this definition in \S
2468 4.1.
2469
2470 We must follow this definition exactly. The structure tag
2471 name is visible in C++ mangled names, and thus forms a part
2472 of the ABI. The field name may be used by people who
2473 #include <stdarg.h>. */
2474 /* Create the type. */
2475 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2476 /* Give it the required name. */
2477 va_list_name = build_decl (BUILTINS_LOCATION,
2478 TYPE_DECL,
2479 get_identifier ("__va_list"),
2480 va_list_type);
2481 DECL_ARTIFICIAL (va_list_name) = 1;
2482 TYPE_NAME (va_list_type) = va_list_name;
2483 TYPE_STUB_DECL (va_list_type) = va_list_name;
2484 /* Create the __ap field. */
2485 ap_field = build_decl (BUILTINS_LOCATION,
2486 FIELD_DECL,
2487 get_identifier ("__ap"),
2488 ptr_type_node);
2489 DECL_ARTIFICIAL (ap_field) = 1;
2490 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2491 TYPE_FIELDS (va_list_type) = ap_field;
2492 /* Compute its layout. */
2493 layout_type (va_list_type);
2494
2495 return va_list_type;
2496 }
2497
2498 /* Return an expression of type "void *" pointing to the next
2499 available argument in a variable-argument list. VALIST is the
2500 user-level va_list object, of type __builtin_va_list. */
2501 static tree
2502 arm_extract_valist_ptr (tree valist)
2503 {
2504 if (TREE_TYPE (valist) == error_mark_node)
2505 return error_mark_node;
2506
2507 /* On an AAPCS target, the pointer is stored within "struct
2508 va_list". */
2509 if (TARGET_AAPCS_BASED)
2510 {
2511 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2512 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2513 valist, ap_field, NULL_TREE);
2514 }
2515
2516 return valist;
2517 }
2518
2519 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2520 static void
2521 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2522 {
2523 valist = arm_extract_valist_ptr (valist);
2524 std_expand_builtin_va_start (valist, nextarg);
2525 }
2526
2527 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2528 static tree
2529 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2530 gimple_seq *post_p)
2531 {
2532 valist = arm_extract_valist_ptr (valist);
2533 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2534 }
2535
2536 /* Fix up any incompatible options that the user has specified. */
2537 static void
2538 arm_option_override (void)
2539 {
2540 if (global_options_set.x_arm_arch_option)
2541 arm_selected_arch = &all_architectures[arm_arch_option];
2542
2543 if (global_options_set.x_arm_cpu_option)
2544 {
2545 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2546 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2547 }
2548
2549 if (global_options_set.x_arm_tune_option)
2550 arm_selected_tune = &all_cores[(int) arm_tune_option];
2551
2552 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2553 SUBTARGET_OVERRIDE_OPTIONS;
2554 #endif
2555
2556 if (arm_selected_arch)
2557 {
2558 if (arm_selected_cpu)
2559 {
2560 /* Check for conflict between mcpu and march. */
2561 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2562 {
2563 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2564 arm_selected_cpu->name, arm_selected_arch->name);
2565 /* -march wins for code generation.
2566 -mcpu wins for default tuning. */
2567 if (!arm_selected_tune)
2568 arm_selected_tune = arm_selected_cpu;
2569
2570 arm_selected_cpu = arm_selected_arch;
2571 }
2572 else
2573 /* -mcpu wins. */
2574 arm_selected_arch = NULL;
2575 }
2576 else
2577 /* Pick a CPU based on the architecture. */
2578 arm_selected_cpu = arm_selected_arch;
2579 }
2580
2581 /* If the user did not specify a processor, choose one for them. */
2582 if (!arm_selected_cpu)
2583 {
2584 const struct processors * sel;
2585 unsigned int sought;
2586
2587 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2588 if (!arm_selected_cpu->name)
2589 {
2590 #ifdef SUBTARGET_CPU_DEFAULT
2591 /* Use the subtarget default CPU if none was specified by
2592 configure. */
2593 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2594 #endif
2595 /* Default to ARM6. */
2596 if (!arm_selected_cpu->name)
2597 arm_selected_cpu = &all_cores[arm6];
2598 }
2599
2600 sel = arm_selected_cpu;
2601 insn_flags = sel->flags;
2602
2603 /* Now check to see if the user has specified some command line
2604 switch that require certain abilities from the cpu. */
2605 sought = 0;
2606
2607 if (TARGET_INTERWORK || TARGET_THUMB)
2608 {
2609 sought |= (FL_THUMB | FL_MODE32);
2610
2611 /* There are no ARM processors that support both APCS-26 and
2612 interworking. Therefore we force FL_MODE26 to be removed
2613 from insn_flags here (if it was set), so that the search
2614 below will always be able to find a compatible processor. */
2615 insn_flags &= ~FL_MODE26;
2616 }
2617
2618 if (sought != 0 && ((sought & insn_flags) != sought))
2619 {
2620 /* Try to locate a CPU type that supports all of the abilities
2621 of the default CPU, plus the extra abilities requested by
2622 the user. */
2623 for (sel = all_cores; sel->name != NULL; sel++)
2624 if ((sel->flags & sought) == (sought | insn_flags))
2625 break;
2626
2627 if (sel->name == NULL)
2628 {
2629 unsigned current_bit_count = 0;
2630 const struct processors * best_fit = NULL;
2631
2632 /* Ideally we would like to issue an error message here
2633 saying that it was not possible to find a CPU compatible
2634 with the default CPU, but which also supports the command
2635 line options specified by the programmer, and so they
2636 ought to use the -mcpu=<name> command line option to
2637 override the default CPU type.
2638
2639 If we cannot find a cpu that has both the
2640 characteristics of the default cpu and the given
2641 command line options we scan the array again looking
2642 for a best match. */
2643 for (sel = all_cores; sel->name != NULL; sel++)
2644 if ((sel->flags & sought) == sought)
2645 {
2646 unsigned count;
2647
2648 count = bit_count (sel->flags & insn_flags);
2649
2650 if (count >= current_bit_count)
2651 {
2652 best_fit = sel;
2653 current_bit_count = count;
2654 }
2655 }
2656
2657 gcc_assert (best_fit);
2658 sel = best_fit;
2659 }
2660
2661 arm_selected_cpu = sel;
2662 }
2663 }
2664
2665 gcc_assert (arm_selected_cpu);
2666 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2667 if (!arm_selected_tune)
2668 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2669
2670 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2671 insn_flags = arm_selected_cpu->flags;
2672 arm_base_arch = arm_selected_cpu->base_arch;
2673
2674 arm_tune = arm_selected_tune->core;
2675 tune_flags = arm_selected_tune->flags;
2676 current_tune = arm_selected_tune->tune;
2677
2678 /* Make sure that the processor choice does not conflict with any of the
2679 other command line choices. */
2680 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2681 error ("target CPU does not support ARM mode");
2682
2683 /* BPABI targets use linker tricks to allow interworking on cores
2684 without thumb support. */
2685 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2686 {
2687 warning (0, "target CPU does not support interworking" );
2688 target_flags &= ~MASK_INTERWORK;
2689 }
2690
2691 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2692 {
2693 warning (0, "target CPU does not support THUMB instructions");
2694 target_flags &= ~MASK_THUMB;
2695 }
2696
2697 if (TARGET_APCS_FRAME && TARGET_THUMB)
2698 {
2699 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2700 target_flags &= ~MASK_APCS_FRAME;
2701 }
2702
2703 /* Callee super interworking implies thumb interworking. Adding
2704 this to the flags here simplifies the logic elsewhere. */
2705 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2706 target_flags |= MASK_INTERWORK;
2707
2708 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2709 from here where no function is being compiled currently. */
2710 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2711 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2712
2713 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2714 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2715
2716 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2717 {
2718 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2719 target_flags |= MASK_APCS_FRAME;
2720 }
2721
2722 if (TARGET_POKE_FUNCTION_NAME)
2723 target_flags |= MASK_APCS_FRAME;
2724
2725 if (TARGET_APCS_REENT && flag_pic)
2726 error ("-fpic and -mapcs-reent are incompatible");
2727
2728 if (TARGET_APCS_REENT)
2729 warning (0, "APCS reentrant code not supported. Ignored");
2730
2731 /* If this target is normally configured to use APCS frames, warn if they
2732 are turned off and debugging is turned on. */
2733 if (TARGET_ARM
2734 && write_symbols != NO_DEBUG
2735 && !TARGET_APCS_FRAME
2736 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2737 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2738
2739 if (TARGET_APCS_FLOAT)
2740 warning (0, "passing floating point arguments in fp regs not yet supported");
2741
2742 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2743 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2744 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2745 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2746 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2747 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2748 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2749 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2750 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2751 arm_arch6m = arm_arch6 && !arm_arch_notm;
2752 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2753 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2754 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2755 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2756 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2757
2758 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2759 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2760 thumb_code = TARGET_ARM == 0;
2761 thumb1_code = TARGET_THUMB1 != 0;
2762 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2763 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2764 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2765 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2766 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2767 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2768 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2769 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2770 arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2771 if (arm_restrict_it == 2)
2772 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2773
2774 if (!TARGET_THUMB2)
2775 arm_restrict_it = 0;
2776
2777 /* If we are not using the default (ARM mode) section anchor offset
2778 ranges, then set the correct ranges now. */
2779 if (TARGET_THUMB1)
2780 {
2781 /* Thumb-1 LDR instructions cannot have negative offsets.
2782 Permissible positive offset ranges are 5-bit (for byte loads),
2783 6-bit (for halfword loads), or 7-bit (for word loads).
2784 Empirical results suggest a 7-bit anchor range gives the best
2785 overall code size. */
2786 targetm.min_anchor_offset = 0;
2787 targetm.max_anchor_offset = 127;
2788 }
2789 else if (TARGET_THUMB2)
2790 {
2791 /* The minimum is set such that the total size of the block
2792 for a particular anchor is 248 + 1 + 4095 bytes, which is
2793 divisible by eight, ensuring natural spacing of anchors. */
2794 targetm.min_anchor_offset = -248;
2795 targetm.max_anchor_offset = 4095;
2796 }
2797
2798 /* V5 code we generate is completely interworking capable, so we turn off
2799 TARGET_INTERWORK here to avoid many tests later on. */
2800
2801 /* XXX However, we must pass the right pre-processor defines to CPP
2802 or GLD can get confused. This is a hack. */
2803 if (TARGET_INTERWORK)
2804 arm_cpp_interwork = 1;
2805
2806 if (arm_arch5)
2807 target_flags &= ~MASK_INTERWORK;
2808
2809 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2810 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2811
2812 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2813 error ("iwmmxt abi requires an iwmmxt capable cpu");
2814
2815 if (!global_options_set.x_arm_fpu_index)
2816 {
2817 const char *target_fpu_name;
2818 bool ok;
2819
2820 #ifdef FPUTYPE_DEFAULT
2821 target_fpu_name = FPUTYPE_DEFAULT;
2822 #else
2823 target_fpu_name = "vfp";
2824 #endif
2825
2826 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2827 CL_TARGET);
2828 gcc_assert (ok);
2829 }
2830
2831 arm_fpu_desc = &all_fpus[arm_fpu_index];
2832
2833 switch (arm_fpu_desc->model)
2834 {
2835 case ARM_FP_MODEL_VFP:
2836 arm_fpu_attr = FPU_VFP;
2837 break;
2838
2839 default:
2840 gcc_unreachable();
2841 }
2842
2843 if (TARGET_AAPCS_BASED)
2844 {
2845 if (TARGET_CALLER_INTERWORKING)
2846 error ("AAPCS does not support -mcaller-super-interworking");
2847 else
2848 if (TARGET_CALLEE_INTERWORKING)
2849 error ("AAPCS does not support -mcallee-super-interworking");
2850 }
2851
2852 /* iWMMXt and NEON are incompatible. */
2853 if (TARGET_IWMMXT && TARGET_NEON)
2854 error ("iWMMXt and NEON are incompatible");
2855
2856 /* iWMMXt unsupported under Thumb mode. */
2857 if (TARGET_THUMB && TARGET_IWMMXT)
2858 error ("iWMMXt unsupported under Thumb mode");
2859
2860 /* __fp16 support currently assumes the core has ldrh. */
2861 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2862 sorry ("__fp16 and no ldrh");
2863
2864 /* If soft-float is specified then don't use FPU. */
2865 if (TARGET_SOFT_FLOAT)
2866 arm_fpu_attr = FPU_NONE;
2867
2868 if (TARGET_AAPCS_BASED)
2869 {
2870 if (arm_abi == ARM_ABI_IWMMXT)
2871 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2872 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2873 && TARGET_HARD_FLOAT
2874 && TARGET_VFP)
2875 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2876 else
2877 arm_pcs_default = ARM_PCS_AAPCS;
2878 }
2879 else
2880 {
2881 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2882 sorry ("-mfloat-abi=hard and VFP");
2883
2884 if (arm_abi == ARM_ABI_APCS)
2885 arm_pcs_default = ARM_PCS_APCS;
2886 else
2887 arm_pcs_default = ARM_PCS_ATPCS;
2888 }
2889
2890 /* For arm2/3 there is no need to do any scheduling if we are doing
2891 software floating-point. */
2892 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2893 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2894
2895 /* Use the cp15 method if it is available. */
2896 if (target_thread_pointer == TP_AUTO)
2897 {
2898 if (arm_arch6k && !TARGET_THUMB1)
2899 target_thread_pointer = TP_CP15;
2900 else
2901 target_thread_pointer = TP_SOFT;
2902 }
2903
2904 if (TARGET_HARD_TP && TARGET_THUMB1)
2905 error ("can not use -mtp=cp15 with 16-bit Thumb");
2906
2907 /* Override the default structure alignment for AAPCS ABI. */
2908 if (!global_options_set.x_arm_structure_size_boundary)
2909 {
2910 if (TARGET_AAPCS_BASED)
2911 arm_structure_size_boundary = 8;
2912 }
2913 else
2914 {
2915 if (arm_structure_size_boundary != 8
2916 && arm_structure_size_boundary != 32
2917 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2918 {
2919 if (ARM_DOUBLEWORD_ALIGN)
2920 warning (0,
2921 "structure size boundary can only be set to 8, 32 or 64");
2922 else
2923 warning (0, "structure size boundary can only be set to 8 or 32");
2924 arm_structure_size_boundary
2925 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2926 }
2927 }
2928
2929 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2930 {
2931 error ("RTP PIC is incompatible with Thumb");
2932 flag_pic = 0;
2933 }
2934
2935 /* If stack checking is disabled, we can use r10 as the PIC register,
2936 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2937 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2938 {
2939 if (TARGET_VXWORKS_RTP)
2940 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2941 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2942 }
2943
2944 if (flag_pic && TARGET_VXWORKS_RTP)
2945 arm_pic_register = 9;
2946
2947 if (arm_pic_register_string != NULL)
2948 {
2949 int pic_register = decode_reg_name (arm_pic_register_string);
2950
2951 if (!flag_pic)
2952 warning (0, "-mpic-register= is useless without -fpic");
2953
2954 /* Prevent the user from choosing an obviously stupid PIC register. */
2955 else if (pic_register < 0 || call_used_regs[pic_register]
2956 || pic_register == HARD_FRAME_POINTER_REGNUM
2957 || pic_register == STACK_POINTER_REGNUM
2958 || pic_register >= PC_REGNUM
2959 || (TARGET_VXWORKS_RTP
2960 && (unsigned int) pic_register != arm_pic_register))
2961 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2962 else
2963 arm_pic_register = pic_register;
2964 }
2965
2966 if (TARGET_VXWORKS_RTP
2967 && !global_options_set.x_arm_pic_data_is_text_relative)
2968 arm_pic_data_is_text_relative = 0;
2969
2970 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2971 if (fix_cm3_ldrd == 2)
2972 {
2973 if (arm_selected_cpu->core == cortexm3)
2974 fix_cm3_ldrd = 1;
2975 else
2976 fix_cm3_ldrd = 0;
2977 }
2978
2979 /* Enable -munaligned-access by default for
2980 - all ARMv6 architecture-based processors
2981 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2982 - ARMv8 architecture-base processors.
2983
2984 Disable -munaligned-access by default for
2985 - all pre-ARMv6 architecture-based processors
2986 - ARMv6-M architecture-based processors. */
2987
2988 if (unaligned_access == 2)
2989 {
2990 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2991 unaligned_access = 1;
2992 else
2993 unaligned_access = 0;
2994 }
2995 else if (unaligned_access == 1
2996 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2997 {
2998 warning (0, "target CPU does not support unaligned accesses");
2999 unaligned_access = 0;
3000 }
3001
3002 if (TARGET_THUMB1 && flag_schedule_insns)
3003 {
3004 /* Don't warn since it's on by default in -O2. */
3005 flag_schedule_insns = 0;
3006 }
3007
3008 if (optimize_size)
3009 {
3010 /* If optimizing for size, bump the number of instructions that we
3011 are prepared to conditionally execute (even on a StrongARM). */
3012 max_insns_skipped = 6;
3013
3014 /* For THUMB2, we limit the conditional sequence to one IT block. */
3015 if (TARGET_THUMB2)
3016 max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3017 }
3018 else
3019 max_insns_skipped = current_tune->max_insns_skipped;
3020
3021 /* Hot/Cold partitioning is not currently supported, since we can't
3022 handle literal pool placement in that case. */
3023 if (flag_reorder_blocks_and_partition)
3024 {
3025 inform (input_location,
3026 "-freorder-blocks-and-partition not supported on this architecture");
3027 flag_reorder_blocks_and_partition = 0;
3028 flag_reorder_blocks = 1;
3029 }
3030
3031 if (flag_pic)
3032 /* Hoisting PIC address calculations more aggressively provides a small,
3033 but measurable, size reduction for PIC code. Therefore, we decrease
3034 the bar for unrestricted expression hoisting to the cost of PIC address
3035 calculation, which is 2 instructions. */
3036 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3037 global_options.x_param_values,
3038 global_options_set.x_param_values);
3039
3040 /* ARM EABI defaults to strict volatile bitfields. */
3041 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3042 && abi_version_at_least(2))
3043 flag_strict_volatile_bitfields = 1;
3044
3045 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3046 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3047 if (flag_prefetch_loop_arrays < 0
3048 && HAVE_prefetch
3049 && optimize >= 3
3050 && current_tune->num_prefetch_slots > 0)
3051 flag_prefetch_loop_arrays = 1;
3052
3053 /* Set up parameters to be used in prefetching algorithm. Do not override the
3054 defaults unless we are tuning for a core we have researched values for. */
3055 if (current_tune->num_prefetch_slots > 0)
3056 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3057 current_tune->num_prefetch_slots,
3058 global_options.x_param_values,
3059 global_options_set.x_param_values);
3060 if (current_tune->l1_cache_line_size >= 0)
3061 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3062 current_tune->l1_cache_line_size,
3063 global_options.x_param_values,
3064 global_options_set.x_param_values);
3065 if (current_tune->l1_cache_size >= 0)
3066 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3067 current_tune->l1_cache_size,
3068 global_options.x_param_values,
3069 global_options_set.x_param_values);
3070
3071 /* Use Neon to perform 64-bits operations rather than core
3072 registers. */
3073 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3074 if (use_neon_for_64bits == 1)
3075 prefer_neon_for_64bits = true;
3076
3077 /* Use the alternative scheduling-pressure algorithm by default. */
3078 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3079 global_options.x_param_values,
3080 global_options_set.x_param_values);
3081
3082 /* Disable shrink-wrap when optimizing function for size, since it tends to
3083 generate additional returns. */
3084 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3085 flag_shrink_wrap = false;
3086 /* TBD: Dwarf info for apcs frame is not handled yet. */
3087 if (TARGET_APCS_FRAME)
3088 flag_shrink_wrap = false;
3089
3090 /* We only support -mslow-flash-data on armv7-m targets. */
3091 if (target_slow_flash_data
3092 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3093 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3094 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3095
3096 /* Currently, for slow flash data, we just disable literal pools. */
3097 if (target_slow_flash_data)
3098 arm_disable_literal_pool = true;
3099
3100 /* Thumb2 inline assembly code should always use unified syntax.
3101 This will apply to ARM and Thumb1 eventually. */
3102 if (TARGET_THUMB2)
3103 inline_asm_unified = 1;
3104
3105 /* Disable scheduling fusion by default if it's not armv7 processor
3106 or doesn't prefer ldrd/strd. */
3107 if (flag_schedule_fusion == 2
3108 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3109 flag_schedule_fusion = 0;
3110
3111 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3112 - epilogue_insns - does not accurately model the corresponding insns
3113 emitted in the asm file. In particular, see the comment in thumb_exit
3114 'Find out how many of the (return) argument registers we can corrupt'.
3115 As a consequence, the epilogue may clobber registers without fipa-ra
3116 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3117 TODO: Accurately model clobbers for epilogue_insns and reenable
3118 fipa-ra. */
3119 if (TARGET_THUMB1)
3120 flag_ipa_ra = 0;
3121
3122 /* Register global variables with the garbage collector. */
3123 arm_add_gc_roots ();
3124 }
3125
3126 static void
3127 arm_add_gc_roots (void)
3128 {
3129 gcc_obstack_init(&minipool_obstack);
3130 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3131 }
3132 \f
3133 /* A table of known ARM exception types.
3134 For use with the interrupt function attribute. */
3135
3136 typedef struct
3137 {
3138 const char *const arg;
3139 const unsigned long return_value;
3140 }
3141 isr_attribute_arg;
3142
3143 static const isr_attribute_arg isr_attribute_args [] =
3144 {
3145 { "IRQ", ARM_FT_ISR },
3146 { "irq", ARM_FT_ISR },
3147 { "FIQ", ARM_FT_FIQ },
3148 { "fiq", ARM_FT_FIQ },
3149 { "ABORT", ARM_FT_ISR },
3150 { "abort", ARM_FT_ISR },
3151 { "ABORT", ARM_FT_ISR },
3152 { "abort", ARM_FT_ISR },
3153 { "UNDEF", ARM_FT_EXCEPTION },
3154 { "undef", ARM_FT_EXCEPTION },
3155 { "SWI", ARM_FT_EXCEPTION },
3156 { "swi", ARM_FT_EXCEPTION },
3157 { NULL, ARM_FT_NORMAL }
3158 };
3159
3160 /* Returns the (interrupt) function type of the current
3161 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3162
3163 static unsigned long
3164 arm_isr_value (tree argument)
3165 {
3166 const isr_attribute_arg * ptr;
3167 const char * arg;
3168
3169 if (!arm_arch_notm)
3170 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3171
3172 /* No argument - default to IRQ. */
3173 if (argument == NULL_TREE)
3174 return ARM_FT_ISR;
3175
3176 /* Get the value of the argument. */
3177 if (TREE_VALUE (argument) == NULL_TREE
3178 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3179 return ARM_FT_UNKNOWN;
3180
3181 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3182
3183 /* Check it against the list of known arguments. */
3184 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3185 if (streq (arg, ptr->arg))
3186 return ptr->return_value;
3187
3188 /* An unrecognized interrupt type. */
3189 return ARM_FT_UNKNOWN;
3190 }
3191
3192 /* Computes the type of the current function. */
3193
3194 static unsigned long
3195 arm_compute_func_type (void)
3196 {
3197 unsigned long type = ARM_FT_UNKNOWN;
3198 tree a;
3199 tree attr;
3200
3201 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3202
3203 /* Decide if the current function is volatile. Such functions
3204 never return, and many memory cycles can be saved by not storing
3205 register values that will never be needed again. This optimization
3206 was added to speed up context switching in a kernel application. */
3207 if (optimize > 0
3208 && (TREE_NOTHROW (current_function_decl)
3209 || !(flag_unwind_tables
3210 || (flag_exceptions
3211 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3212 && TREE_THIS_VOLATILE (current_function_decl))
3213 type |= ARM_FT_VOLATILE;
3214
3215 if (cfun->static_chain_decl != NULL)
3216 type |= ARM_FT_NESTED;
3217
3218 attr = DECL_ATTRIBUTES (current_function_decl);
3219
3220 a = lookup_attribute ("naked", attr);
3221 if (a != NULL_TREE)
3222 type |= ARM_FT_NAKED;
3223
3224 a = lookup_attribute ("isr", attr);
3225 if (a == NULL_TREE)
3226 a = lookup_attribute ("interrupt", attr);
3227
3228 if (a == NULL_TREE)
3229 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3230 else
3231 type |= arm_isr_value (TREE_VALUE (a));
3232
3233 return type;
3234 }
3235
3236 /* Returns the type of the current function. */
3237
3238 unsigned long
3239 arm_current_func_type (void)
3240 {
3241 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3242 cfun->machine->func_type = arm_compute_func_type ();
3243
3244 return cfun->machine->func_type;
3245 }
3246
3247 bool
3248 arm_allocate_stack_slots_for_args (void)
3249 {
3250 /* Naked functions should not allocate stack slots for arguments. */
3251 return !IS_NAKED (arm_current_func_type ());
3252 }
3253
3254 static bool
3255 arm_warn_func_return (tree decl)
3256 {
3257 /* Naked functions are implemented entirely in assembly, including the
3258 return sequence, so suppress warnings about this. */
3259 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3260 }
3261
3262 \f
3263 /* Output assembler code for a block containing the constant parts
3264 of a trampoline, leaving space for the variable parts.
3265
3266 On the ARM, (if r8 is the static chain regnum, and remembering that
3267 referencing pc adds an offset of 8) the trampoline looks like:
3268 ldr r8, [pc, #0]
3269 ldr pc, [pc]
3270 .word static chain value
3271 .word function's address
3272 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3273
3274 static void
3275 arm_asm_trampoline_template (FILE *f)
3276 {
3277 if (TARGET_ARM)
3278 {
3279 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3280 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3281 }
3282 else if (TARGET_THUMB2)
3283 {
3284 /* The Thumb-2 trampoline is similar to the arm implementation.
3285 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3286 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3287 STATIC_CHAIN_REGNUM, PC_REGNUM);
3288 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3289 }
3290 else
3291 {
3292 ASM_OUTPUT_ALIGN (f, 2);
3293 fprintf (f, "\t.code\t16\n");
3294 fprintf (f, ".Ltrampoline_start:\n");
3295 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3296 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3297 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3298 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3299 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3300 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3301 }
3302 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3303 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3304 }
3305
3306 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3307
3308 static void
3309 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3310 {
3311 rtx fnaddr, mem, a_tramp;
3312
3313 emit_block_move (m_tramp, assemble_trampoline_template (),
3314 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3315
3316 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3317 emit_move_insn (mem, chain_value);
3318
3319 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3320 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3321 emit_move_insn (mem, fnaddr);
3322
3323 a_tramp = XEXP (m_tramp, 0);
3324 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3325 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3326 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3327 }
3328
3329 /* Thumb trampolines should be entered in thumb mode, so set
3330 the bottom bit of the address. */
3331
3332 static rtx
3333 arm_trampoline_adjust_address (rtx addr)
3334 {
3335 if (TARGET_THUMB)
3336 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3337 NULL, 0, OPTAB_LIB_WIDEN);
3338 return addr;
3339 }
3340 \f
3341 /* Return 1 if it is possible to return using a single instruction.
3342 If SIBLING is non-null, this is a test for a return before a sibling
3343 call. SIBLING is the call insn, so we can examine its register usage. */
3344
3345 int
3346 use_return_insn (int iscond, rtx sibling)
3347 {
3348 int regno;
3349 unsigned int func_type;
3350 unsigned long saved_int_regs;
3351 unsigned HOST_WIDE_INT stack_adjust;
3352 arm_stack_offsets *offsets;
3353
3354 /* Never use a return instruction before reload has run. */
3355 if (!reload_completed)
3356 return 0;
3357
3358 func_type = arm_current_func_type ();
3359
3360 /* Naked, volatile and stack alignment functions need special
3361 consideration. */
3362 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3363 return 0;
3364
3365 /* So do interrupt functions that use the frame pointer and Thumb
3366 interrupt functions. */
3367 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3368 return 0;
3369
3370 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3371 && !optimize_function_for_size_p (cfun))
3372 return 0;
3373
3374 offsets = arm_get_frame_offsets ();
3375 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3376
3377 /* As do variadic functions. */
3378 if (crtl->args.pretend_args_size
3379 || cfun->machine->uses_anonymous_args
3380 /* Or if the function calls __builtin_eh_return () */
3381 || crtl->calls_eh_return
3382 /* Or if the function calls alloca */
3383 || cfun->calls_alloca
3384 /* Or if there is a stack adjustment. However, if the stack pointer
3385 is saved on the stack, we can use a pre-incrementing stack load. */
3386 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3387 && stack_adjust == 4)))
3388 return 0;
3389
3390 saved_int_regs = offsets->saved_regs_mask;
3391
3392 /* Unfortunately, the insn
3393
3394 ldmib sp, {..., sp, ...}
3395
3396 triggers a bug on most SA-110 based devices, such that the stack
3397 pointer won't be correctly restored if the instruction takes a
3398 page fault. We work around this problem by popping r3 along with
3399 the other registers, since that is never slower than executing
3400 another instruction.
3401
3402 We test for !arm_arch5 here, because code for any architecture
3403 less than this could potentially be run on one of the buggy
3404 chips. */
3405 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3406 {
3407 /* Validate that r3 is a call-clobbered register (always true in
3408 the default abi) ... */
3409 if (!call_used_regs[3])
3410 return 0;
3411
3412 /* ... that it isn't being used for a return value ... */
3413 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3414 return 0;
3415
3416 /* ... or for a tail-call argument ... */
3417 if (sibling)
3418 {
3419 gcc_assert (CALL_P (sibling));
3420
3421 if (find_regno_fusage (sibling, USE, 3))
3422 return 0;
3423 }
3424
3425 /* ... and that there are no call-saved registers in r0-r2
3426 (always true in the default ABI). */
3427 if (saved_int_regs & 0x7)
3428 return 0;
3429 }
3430
3431 /* Can't be done if interworking with Thumb, and any registers have been
3432 stacked. */
3433 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3434 return 0;
3435
3436 /* On StrongARM, conditional returns are expensive if they aren't
3437 taken and multiple registers have been stacked. */
3438 if (iscond && arm_tune_strongarm)
3439 {
3440 /* Conditional return when just the LR is stored is a simple
3441 conditional-load instruction, that's not expensive. */
3442 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3443 return 0;
3444
3445 if (flag_pic
3446 && arm_pic_register != INVALID_REGNUM
3447 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3448 return 0;
3449 }
3450
3451 /* If there are saved registers but the LR isn't saved, then we need
3452 two instructions for the return. */
3453 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3454 return 0;
3455
3456 /* Can't be done if any of the VFP regs are pushed,
3457 since this also requires an insn. */
3458 if (TARGET_HARD_FLOAT && TARGET_VFP)
3459 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3460 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3461 return 0;
3462
3463 if (TARGET_REALLY_IWMMXT)
3464 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3465 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3466 return 0;
3467
3468 return 1;
3469 }
3470
3471 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3472 shrink-wrapping if possible. This is the case if we need to emit a
3473 prologue, which we can test by looking at the offsets. */
3474 bool
3475 use_simple_return_p (void)
3476 {
3477 arm_stack_offsets *offsets;
3478
3479 offsets = arm_get_frame_offsets ();
3480 return offsets->outgoing_args != 0;
3481 }
3482
3483 /* Return TRUE if int I is a valid immediate ARM constant. */
3484
3485 int
3486 const_ok_for_arm (HOST_WIDE_INT i)
3487 {
3488 int lowbit;
3489
3490 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3491 be all zero, or all one. */
3492 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3493 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3494 != ((~(unsigned HOST_WIDE_INT) 0)
3495 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3496 return FALSE;
3497
3498 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3499
3500 /* Fast return for 0 and small values. We must do this for zero, since
3501 the code below can't handle that one case. */
3502 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3503 return TRUE;
3504
3505 /* Get the number of trailing zeros. */
3506 lowbit = ffs((int) i) - 1;
3507
3508 /* Only even shifts are allowed in ARM mode so round down to the
3509 nearest even number. */
3510 if (TARGET_ARM)
3511 lowbit &= ~1;
3512
3513 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3514 return TRUE;
3515
3516 if (TARGET_ARM)
3517 {
3518 /* Allow rotated constants in ARM mode. */
3519 if (lowbit <= 4
3520 && ((i & ~0xc000003f) == 0
3521 || (i & ~0xf000000f) == 0
3522 || (i & ~0xfc000003) == 0))
3523 return TRUE;
3524 }
3525 else
3526 {
3527 HOST_WIDE_INT v;
3528
3529 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3530 v = i & 0xff;
3531 v |= v << 16;
3532 if (i == v || i == (v | (v << 8)))
3533 return TRUE;
3534
3535 /* Allow repeated pattern 0xXY00XY00. */
3536 v = i & 0xff00;
3537 v |= v << 16;
3538 if (i == v)
3539 return TRUE;
3540 }
3541
3542 return FALSE;
3543 }
3544
3545 /* Return true if I is a valid constant for the operation CODE. */
3546 int
3547 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3548 {
3549 if (const_ok_for_arm (i))
3550 return 1;
3551
3552 switch (code)
3553 {
3554 case SET:
3555 /* See if we can use movw. */
3556 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3557 return 1;
3558 else
3559 /* Otherwise, try mvn. */
3560 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3561
3562 case PLUS:
3563 /* See if we can use addw or subw. */
3564 if (TARGET_THUMB2
3565 && ((i & 0xfffff000) == 0
3566 || ((-i) & 0xfffff000) == 0))
3567 return 1;
3568 /* else fall through. */
3569
3570 case COMPARE:
3571 case EQ:
3572 case NE:
3573 case GT:
3574 case LE:
3575 case LT:
3576 case GE:
3577 case GEU:
3578 case LTU:
3579 case GTU:
3580 case LEU:
3581 case UNORDERED:
3582 case ORDERED:
3583 case UNEQ:
3584 case UNGE:
3585 case UNLT:
3586 case UNGT:
3587 case UNLE:
3588 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3589
3590 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3591 case XOR:
3592 return 0;
3593
3594 case IOR:
3595 if (TARGET_THUMB2)
3596 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3597 return 0;
3598
3599 case AND:
3600 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3601
3602 default:
3603 gcc_unreachable ();
3604 }
3605 }
3606
3607 /* Return true if I is a valid di mode constant for the operation CODE. */
3608 int
3609 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3610 {
3611 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3612 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3613 rtx hi = GEN_INT (hi_val);
3614 rtx lo = GEN_INT (lo_val);
3615
3616 if (TARGET_THUMB1)
3617 return 0;
3618
3619 switch (code)
3620 {
3621 case AND:
3622 case IOR:
3623 case XOR:
3624 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3625 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3626 case PLUS:
3627 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3628
3629 default:
3630 return 0;
3631 }
3632 }
3633
3634 /* Emit a sequence of insns to handle a large constant.
3635 CODE is the code of the operation required, it can be any of SET, PLUS,
3636 IOR, AND, XOR, MINUS;
3637 MODE is the mode in which the operation is being performed;
3638 VAL is the integer to operate on;
3639 SOURCE is the other operand (a register, or a null-pointer for SET);
3640 SUBTARGETS means it is safe to create scratch registers if that will
3641 either produce a simpler sequence, or we will want to cse the values.
3642 Return value is the number of insns emitted. */
3643
3644 /* ??? Tweak this for thumb2. */
3645 int
3646 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3647 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3648 {
3649 rtx cond;
3650
3651 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3652 cond = COND_EXEC_TEST (PATTERN (insn));
3653 else
3654 cond = NULL_RTX;
3655
3656 if (subtargets || code == SET
3657 || (REG_P (target) && REG_P (source)
3658 && REGNO (target) != REGNO (source)))
3659 {
3660 /* After arm_reorg has been called, we can't fix up expensive
3661 constants by pushing them into memory so we must synthesize
3662 them in-line, regardless of the cost. This is only likely to
3663 be more costly on chips that have load delay slots and we are
3664 compiling without running the scheduler (so no splitting
3665 occurred before the final instruction emission).
3666
3667 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3668 */
3669 if (!cfun->machine->after_arm_reorg
3670 && !cond
3671 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3672 1, 0)
3673 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3674 + (code != SET))))
3675 {
3676 if (code == SET)
3677 {
3678 /* Currently SET is the only monadic value for CODE, all
3679 the rest are diadic. */
3680 if (TARGET_USE_MOVT)
3681 arm_emit_movpair (target, GEN_INT (val));
3682 else
3683 emit_set_insn (target, GEN_INT (val));
3684
3685 return 1;
3686 }
3687 else
3688 {
3689 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3690
3691 if (TARGET_USE_MOVT)
3692 arm_emit_movpair (temp, GEN_INT (val));
3693 else
3694 emit_set_insn (temp, GEN_INT (val));
3695
3696 /* For MINUS, the value is subtracted from, since we never
3697 have subtraction of a constant. */
3698 if (code == MINUS)
3699 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3700 else
3701 emit_set_insn (target,
3702 gen_rtx_fmt_ee (code, mode, source, temp));
3703 return 2;
3704 }
3705 }
3706 }
3707
3708 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3709 1);
3710 }
3711
3712 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3713 ARM/THUMB2 immediates, and add up to VAL.
3714 Thr function return value gives the number of insns required. */
3715 static int
3716 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3717 struct four_ints *return_sequence)
3718 {
3719 int best_consecutive_zeros = 0;
3720 int i;
3721 int best_start = 0;
3722 int insns1, insns2;
3723 struct four_ints tmp_sequence;
3724
3725 /* If we aren't targeting ARM, the best place to start is always at
3726 the bottom, otherwise look more closely. */
3727 if (TARGET_ARM)
3728 {
3729 for (i = 0; i < 32; i += 2)
3730 {
3731 int consecutive_zeros = 0;
3732
3733 if (!(val & (3 << i)))
3734 {
3735 while ((i < 32) && !(val & (3 << i)))
3736 {
3737 consecutive_zeros += 2;
3738 i += 2;
3739 }
3740 if (consecutive_zeros > best_consecutive_zeros)
3741 {
3742 best_consecutive_zeros = consecutive_zeros;
3743 best_start = i - consecutive_zeros;
3744 }
3745 i -= 2;
3746 }
3747 }
3748 }
3749
3750 /* So long as it won't require any more insns to do so, it's
3751 desirable to emit a small constant (in bits 0...9) in the last
3752 insn. This way there is more chance that it can be combined with
3753 a later addressing insn to form a pre-indexed load or store
3754 operation. Consider:
3755
3756 *((volatile int *)0xe0000100) = 1;
3757 *((volatile int *)0xe0000110) = 2;
3758
3759 We want this to wind up as:
3760
3761 mov rA, #0xe0000000
3762 mov rB, #1
3763 str rB, [rA, #0x100]
3764 mov rB, #2
3765 str rB, [rA, #0x110]
3766
3767 rather than having to synthesize both large constants from scratch.
3768
3769 Therefore, we calculate how many insns would be required to emit
3770 the constant starting from `best_start', and also starting from
3771 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3772 yield a shorter sequence, we may as well use zero. */
3773 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3774 if (best_start != 0
3775 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3776 {
3777 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3778 if (insns2 <= insns1)
3779 {
3780 *return_sequence = tmp_sequence;
3781 insns1 = insns2;
3782 }
3783 }
3784
3785 return insns1;
3786 }
3787
3788 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3789 static int
3790 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3791 struct four_ints *return_sequence, int i)
3792 {
3793 int remainder = val & 0xffffffff;
3794 int insns = 0;
3795
3796 /* Try and find a way of doing the job in either two or three
3797 instructions.
3798
3799 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3800 location. We start at position I. This may be the MSB, or
3801 optimial_immediate_sequence may have positioned it at the largest block
3802 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3803 wrapping around to the top of the word when we drop off the bottom.
3804 In the worst case this code should produce no more than four insns.
3805
3806 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3807 constants, shifted to any arbitrary location. We should always start
3808 at the MSB. */
3809 do
3810 {
3811 int end;
3812 unsigned int b1, b2, b3, b4;
3813 unsigned HOST_WIDE_INT result;
3814 int loc;
3815
3816 gcc_assert (insns < 4);
3817
3818 if (i <= 0)
3819 i += 32;
3820
3821 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3822 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3823 {
3824 loc = i;
3825 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3826 /* We can use addw/subw for the last 12 bits. */
3827 result = remainder;
3828 else
3829 {
3830 /* Use an 8-bit shifted/rotated immediate. */
3831 end = i - 8;
3832 if (end < 0)
3833 end += 32;
3834 result = remainder & ((0x0ff << end)
3835 | ((i < end) ? (0xff >> (32 - end))
3836 : 0));
3837 i -= 8;
3838 }
3839 }
3840 else
3841 {
3842 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3843 arbitrary shifts. */
3844 i -= TARGET_ARM ? 2 : 1;
3845 continue;
3846 }
3847
3848 /* Next, see if we can do a better job with a thumb2 replicated
3849 constant.
3850
3851 We do it this way around to catch the cases like 0x01F001E0 where
3852 two 8-bit immediates would work, but a replicated constant would
3853 make it worse.
3854
3855 TODO: 16-bit constants that don't clear all the bits, but still win.
3856 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3857 if (TARGET_THUMB2)
3858 {
3859 b1 = (remainder & 0xff000000) >> 24;
3860 b2 = (remainder & 0x00ff0000) >> 16;
3861 b3 = (remainder & 0x0000ff00) >> 8;
3862 b4 = remainder & 0xff;
3863
3864 if (loc > 24)
3865 {
3866 /* The 8-bit immediate already found clears b1 (and maybe b2),
3867 but must leave b3 and b4 alone. */
3868
3869 /* First try to find a 32-bit replicated constant that clears
3870 almost everything. We can assume that we can't do it in one,
3871 or else we wouldn't be here. */
3872 unsigned int tmp = b1 & b2 & b3 & b4;
3873 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3874 + (tmp << 24);
3875 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3876 + (tmp == b3) + (tmp == b4);
3877 if (tmp
3878 && (matching_bytes >= 3
3879 || (matching_bytes == 2
3880 && const_ok_for_op (remainder & ~tmp2, code))))
3881 {
3882 /* At least 3 of the bytes match, and the fourth has at
3883 least as many bits set, or two of the bytes match
3884 and it will only require one more insn to finish. */
3885 result = tmp2;
3886 i = tmp != b1 ? 32
3887 : tmp != b2 ? 24
3888 : tmp != b3 ? 16
3889 : 8;
3890 }
3891
3892 /* Second, try to find a 16-bit replicated constant that can
3893 leave three of the bytes clear. If b2 or b4 is already
3894 zero, then we can. If the 8-bit from above would not
3895 clear b2 anyway, then we still win. */
3896 else if (b1 == b3 && (!b2 || !b4
3897 || (remainder & 0x00ff0000 & ~result)))
3898 {
3899 result = remainder & 0xff00ff00;
3900 i = 24;
3901 }
3902 }
3903 else if (loc > 16)
3904 {
3905 /* The 8-bit immediate already found clears b2 (and maybe b3)
3906 and we don't get here unless b1 is alredy clear, but it will
3907 leave b4 unchanged. */
3908
3909 /* If we can clear b2 and b4 at once, then we win, since the
3910 8-bits couldn't possibly reach that far. */
3911 if (b2 == b4)
3912 {
3913 result = remainder & 0x00ff00ff;
3914 i = 16;
3915 }
3916 }
3917 }
3918
3919 return_sequence->i[insns++] = result;
3920 remainder &= ~result;
3921
3922 if (code == SET || code == MINUS)
3923 code = PLUS;
3924 }
3925 while (remainder);
3926
3927 return insns;
3928 }
3929
3930 /* Emit an instruction with the indicated PATTERN. If COND is
3931 non-NULL, conditionalize the execution of the instruction on COND
3932 being true. */
3933
3934 static void
3935 emit_constant_insn (rtx cond, rtx pattern)
3936 {
3937 if (cond)
3938 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3939 emit_insn (pattern);
3940 }
3941
3942 /* As above, but extra parameter GENERATE which, if clear, suppresses
3943 RTL generation. */
3944
3945 static int
3946 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
3947 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3948 int generate)
3949 {
3950 int can_invert = 0;
3951 int can_negate = 0;
3952 int final_invert = 0;
3953 int i;
3954 int set_sign_bit_copies = 0;
3955 int clear_sign_bit_copies = 0;
3956 int clear_zero_bit_copies = 0;
3957 int set_zero_bit_copies = 0;
3958 int insns = 0, neg_insns, inv_insns;
3959 unsigned HOST_WIDE_INT temp1, temp2;
3960 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3961 struct four_ints *immediates;
3962 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3963
3964 /* Find out which operations are safe for a given CODE. Also do a quick
3965 check for degenerate cases; these can occur when DImode operations
3966 are split. */
3967 switch (code)
3968 {
3969 case SET:
3970 can_invert = 1;
3971 break;
3972
3973 case PLUS:
3974 can_negate = 1;
3975 break;
3976
3977 case IOR:
3978 if (remainder == 0xffffffff)
3979 {
3980 if (generate)
3981 emit_constant_insn (cond,
3982 gen_rtx_SET (VOIDmode, target,
3983 GEN_INT (ARM_SIGN_EXTEND (val))));
3984 return 1;
3985 }
3986
3987 if (remainder == 0)
3988 {
3989 if (reload_completed && rtx_equal_p (target, source))
3990 return 0;
3991
3992 if (generate)
3993 emit_constant_insn (cond,
3994 gen_rtx_SET (VOIDmode, target, source));
3995 return 1;
3996 }
3997 break;
3998
3999 case AND:
4000 if (remainder == 0)
4001 {
4002 if (generate)
4003 emit_constant_insn (cond,
4004 gen_rtx_SET (VOIDmode, target, const0_rtx));
4005 return 1;
4006 }
4007 if (remainder == 0xffffffff)
4008 {
4009 if (reload_completed && rtx_equal_p (target, source))
4010 return 0;
4011 if (generate)
4012 emit_constant_insn (cond,
4013 gen_rtx_SET (VOIDmode, target, source));
4014 return 1;
4015 }
4016 can_invert = 1;
4017 break;
4018
4019 case XOR:
4020 if (remainder == 0)
4021 {
4022 if (reload_completed && rtx_equal_p (target, source))
4023 return 0;
4024 if (generate)
4025 emit_constant_insn (cond,
4026 gen_rtx_SET (VOIDmode, target, source));
4027 return 1;
4028 }
4029
4030 if (remainder == 0xffffffff)
4031 {
4032 if (generate)
4033 emit_constant_insn (cond,
4034 gen_rtx_SET (VOIDmode, target,
4035 gen_rtx_NOT (mode, source)));
4036 return 1;
4037 }
4038 final_invert = 1;
4039 break;
4040
4041 case MINUS:
4042 /* We treat MINUS as (val - source), since (source - val) is always
4043 passed as (source + (-val)). */
4044 if (remainder == 0)
4045 {
4046 if (generate)
4047 emit_constant_insn (cond,
4048 gen_rtx_SET (VOIDmode, target,
4049 gen_rtx_NEG (mode, source)));
4050 return 1;
4051 }
4052 if (const_ok_for_arm (val))
4053 {
4054 if (generate)
4055 emit_constant_insn (cond,
4056 gen_rtx_SET (VOIDmode, target,
4057 gen_rtx_MINUS (mode, GEN_INT (val),
4058 source)));
4059 return 1;
4060 }
4061
4062 break;
4063
4064 default:
4065 gcc_unreachable ();
4066 }
4067
4068 /* If we can do it in one insn get out quickly. */
4069 if (const_ok_for_op (val, code))
4070 {
4071 if (generate)
4072 emit_constant_insn (cond,
4073 gen_rtx_SET (VOIDmode, target,
4074 (source
4075 ? gen_rtx_fmt_ee (code, mode, source,
4076 GEN_INT (val))
4077 : GEN_INT (val))));
4078 return 1;
4079 }
4080
4081 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4082 insn. */
4083 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4084 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4085 {
4086 if (generate)
4087 {
4088 if (mode == SImode && i == 16)
4089 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4090 smaller insn. */
4091 emit_constant_insn (cond,
4092 gen_zero_extendhisi2
4093 (target, gen_lowpart (HImode, source)));
4094 else
4095 /* Extz only supports SImode, but we can coerce the operands
4096 into that mode. */
4097 emit_constant_insn (cond,
4098 gen_extzv_t2 (gen_lowpart (SImode, target),
4099 gen_lowpart (SImode, source),
4100 GEN_INT (i), const0_rtx));
4101 }
4102
4103 return 1;
4104 }
4105
4106 /* Calculate a few attributes that may be useful for specific
4107 optimizations. */
4108 /* Count number of leading zeros. */
4109 for (i = 31; i >= 0; i--)
4110 {
4111 if ((remainder & (1 << i)) == 0)
4112 clear_sign_bit_copies++;
4113 else
4114 break;
4115 }
4116
4117 /* Count number of leading 1's. */
4118 for (i = 31; i >= 0; i--)
4119 {
4120 if ((remainder & (1 << i)) != 0)
4121 set_sign_bit_copies++;
4122 else
4123 break;
4124 }
4125
4126 /* Count number of trailing zero's. */
4127 for (i = 0; i <= 31; i++)
4128 {
4129 if ((remainder & (1 << i)) == 0)
4130 clear_zero_bit_copies++;
4131 else
4132 break;
4133 }
4134
4135 /* Count number of trailing 1's. */
4136 for (i = 0; i <= 31; i++)
4137 {
4138 if ((remainder & (1 << i)) != 0)
4139 set_zero_bit_copies++;
4140 else
4141 break;
4142 }
4143
4144 switch (code)
4145 {
4146 case SET:
4147 /* See if we can do this by sign_extending a constant that is known
4148 to be negative. This is a good, way of doing it, since the shift
4149 may well merge into a subsequent insn. */
4150 if (set_sign_bit_copies > 1)
4151 {
4152 if (const_ok_for_arm
4153 (temp1 = ARM_SIGN_EXTEND (remainder
4154 << (set_sign_bit_copies - 1))))
4155 {
4156 if (generate)
4157 {
4158 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4159 emit_constant_insn (cond,
4160 gen_rtx_SET (VOIDmode, new_src,
4161 GEN_INT (temp1)));
4162 emit_constant_insn (cond,
4163 gen_ashrsi3 (target, new_src,
4164 GEN_INT (set_sign_bit_copies - 1)));
4165 }
4166 return 2;
4167 }
4168 /* For an inverted constant, we will need to set the low bits,
4169 these will be shifted out of harm's way. */
4170 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4171 if (const_ok_for_arm (~temp1))
4172 {
4173 if (generate)
4174 {
4175 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4176 emit_constant_insn (cond,
4177 gen_rtx_SET (VOIDmode, new_src,
4178 GEN_INT (temp1)));
4179 emit_constant_insn (cond,
4180 gen_ashrsi3 (target, new_src,
4181 GEN_INT (set_sign_bit_copies - 1)));
4182 }
4183 return 2;
4184 }
4185 }
4186
4187 /* See if we can calculate the value as the difference between two
4188 valid immediates. */
4189 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4190 {
4191 int topshift = clear_sign_bit_copies & ~1;
4192
4193 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4194 & (0xff000000 >> topshift));
4195
4196 /* If temp1 is zero, then that means the 9 most significant
4197 bits of remainder were 1 and we've caused it to overflow.
4198 When topshift is 0 we don't need to do anything since we
4199 can borrow from 'bit 32'. */
4200 if (temp1 == 0 && topshift != 0)
4201 temp1 = 0x80000000 >> (topshift - 1);
4202
4203 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4204
4205 if (const_ok_for_arm (temp2))
4206 {
4207 if (generate)
4208 {
4209 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4210 emit_constant_insn (cond,
4211 gen_rtx_SET (VOIDmode, new_src,
4212 GEN_INT (temp1)));
4213 emit_constant_insn (cond,
4214 gen_addsi3 (target, new_src,
4215 GEN_INT (-temp2)));
4216 }
4217
4218 return 2;
4219 }
4220 }
4221
4222 /* See if we can generate this by setting the bottom (or the top)
4223 16 bits, and then shifting these into the other half of the
4224 word. We only look for the simplest cases, to do more would cost
4225 too much. Be careful, however, not to generate this when the
4226 alternative would take fewer insns. */
4227 if (val & 0xffff0000)
4228 {
4229 temp1 = remainder & 0xffff0000;
4230 temp2 = remainder & 0x0000ffff;
4231
4232 /* Overlaps outside this range are best done using other methods. */
4233 for (i = 9; i < 24; i++)
4234 {
4235 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4236 && !const_ok_for_arm (temp2))
4237 {
4238 rtx new_src = (subtargets
4239 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4240 : target);
4241 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4242 source, subtargets, generate);
4243 source = new_src;
4244 if (generate)
4245 emit_constant_insn
4246 (cond,
4247 gen_rtx_SET
4248 (VOIDmode, target,
4249 gen_rtx_IOR (mode,
4250 gen_rtx_ASHIFT (mode, source,
4251 GEN_INT (i)),
4252 source)));
4253 return insns + 1;
4254 }
4255 }
4256
4257 /* Don't duplicate cases already considered. */
4258 for (i = 17; i < 24; i++)
4259 {
4260 if (((temp1 | (temp1 >> i)) == remainder)
4261 && !const_ok_for_arm (temp1))
4262 {
4263 rtx new_src = (subtargets
4264 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4265 : target);
4266 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4267 source, subtargets, generate);
4268 source = new_src;
4269 if (generate)
4270 emit_constant_insn
4271 (cond,
4272 gen_rtx_SET (VOIDmode, target,
4273 gen_rtx_IOR
4274 (mode,
4275 gen_rtx_LSHIFTRT (mode, source,
4276 GEN_INT (i)),
4277 source)));
4278 return insns + 1;
4279 }
4280 }
4281 }
4282 break;
4283
4284 case IOR:
4285 case XOR:
4286 /* If we have IOR or XOR, and the constant can be loaded in a
4287 single instruction, and we can find a temporary to put it in,
4288 then this can be done in two instructions instead of 3-4. */
4289 if (subtargets
4290 /* TARGET can't be NULL if SUBTARGETS is 0 */
4291 || (reload_completed && !reg_mentioned_p (target, source)))
4292 {
4293 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4294 {
4295 if (generate)
4296 {
4297 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4298
4299 emit_constant_insn (cond,
4300 gen_rtx_SET (VOIDmode, sub,
4301 GEN_INT (val)));
4302 emit_constant_insn (cond,
4303 gen_rtx_SET (VOIDmode, target,
4304 gen_rtx_fmt_ee (code, mode,
4305 source, sub)));
4306 }
4307 return 2;
4308 }
4309 }
4310
4311 if (code == XOR)
4312 break;
4313
4314 /* Convert.
4315 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4316 and the remainder 0s for e.g. 0xfff00000)
4317 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4318
4319 This can be done in 2 instructions by using shifts with mov or mvn.
4320 e.g. for
4321 x = x | 0xfff00000;
4322 we generate.
4323 mvn r0, r0, asl #12
4324 mvn r0, r0, lsr #12 */
4325 if (set_sign_bit_copies > 8
4326 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4327 {
4328 if (generate)
4329 {
4330 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4331 rtx shift = GEN_INT (set_sign_bit_copies);
4332
4333 emit_constant_insn
4334 (cond,
4335 gen_rtx_SET (VOIDmode, sub,
4336 gen_rtx_NOT (mode,
4337 gen_rtx_ASHIFT (mode,
4338 source,
4339 shift))));
4340 emit_constant_insn
4341 (cond,
4342 gen_rtx_SET (VOIDmode, target,
4343 gen_rtx_NOT (mode,
4344 gen_rtx_LSHIFTRT (mode, sub,
4345 shift))));
4346 }
4347 return 2;
4348 }
4349
4350 /* Convert
4351 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4352 to
4353 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4354
4355 For eg. r0 = r0 | 0xfff
4356 mvn r0, r0, lsr #12
4357 mvn r0, r0, asl #12
4358
4359 */
4360 if (set_zero_bit_copies > 8
4361 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4362 {
4363 if (generate)
4364 {
4365 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4366 rtx shift = GEN_INT (set_zero_bit_copies);
4367
4368 emit_constant_insn
4369 (cond,
4370 gen_rtx_SET (VOIDmode, sub,
4371 gen_rtx_NOT (mode,
4372 gen_rtx_LSHIFTRT (mode,
4373 source,
4374 shift))));
4375 emit_constant_insn
4376 (cond,
4377 gen_rtx_SET (VOIDmode, target,
4378 gen_rtx_NOT (mode,
4379 gen_rtx_ASHIFT (mode, sub,
4380 shift))));
4381 }
4382 return 2;
4383 }
4384
4385 /* This will never be reached for Thumb2 because orn is a valid
4386 instruction. This is for Thumb1 and the ARM 32 bit cases.
4387
4388 x = y | constant (such that ~constant is a valid constant)
4389 Transform this to
4390 x = ~(~y & ~constant).
4391 */
4392 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4393 {
4394 if (generate)
4395 {
4396 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4397 emit_constant_insn (cond,
4398 gen_rtx_SET (VOIDmode, sub,
4399 gen_rtx_NOT (mode, source)));
4400 source = sub;
4401 if (subtargets)
4402 sub = gen_reg_rtx (mode);
4403 emit_constant_insn (cond,
4404 gen_rtx_SET (VOIDmode, sub,
4405 gen_rtx_AND (mode, source,
4406 GEN_INT (temp1))));
4407 emit_constant_insn (cond,
4408 gen_rtx_SET (VOIDmode, target,
4409 gen_rtx_NOT (mode, sub)));
4410 }
4411 return 3;
4412 }
4413 break;
4414
4415 case AND:
4416 /* See if two shifts will do 2 or more insn's worth of work. */
4417 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4418 {
4419 HOST_WIDE_INT shift_mask = ((0xffffffff
4420 << (32 - clear_sign_bit_copies))
4421 & 0xffffffff);
4422
4423 if ((remainder | shift_mask) != 0xffffffff)
4424 {
4425 if (generate)
4426 {
4427 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4428 insns = arm_gen_constant (AND, mode, cond,
4429 remainder | shift_mask,
4430 new_src, source, subtargets, 1);
4431 source = new_src;
4432 }
4433 else
4434 {
4435 rtx targ = subtargets ? NULL_RTX : target;
4436 insns = arm_gen_constant (AND, mode, cond,
4437 remainder | shift_mask,
4438 targ, source, subtargets, 0);
4439 }
4440 }
4441
4442 if (generate)
4443 {
4444 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4445 rtx shift = GEN_INT (clear_sign_bit_copies);
4446
4447 emit_insn (gen_ashlsi3 (new_src, source, shift));
4448 emit_insn (gen_lshrsi3 (target, new_src, shift));
4449 }
4450
4451 return insns + 2;
4452 }
4453
4454 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4455 {
4456 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4457
4458 if ((remainder | shift_mask) != 0xffffffff)
4459 {
4460 if (generate)
4461 {
4462 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4463
4464 insns = arm_gen_constant (AND, mode, cond,
4465 remainder | shift_mask,
4466 new_src, source, subtargets, 1);
4467 source = new_src;
4468 }
4469 else
4470 {
4471 rtx targ = subtargets ? NULL_RTX : target;
4472
4473 insns = arm_gen_constant (AND, mode, cond,
4474 remainder | shift_mask,
4475 targ, source, subtargets, 0);
4476 }
4477 }
4478
4479 if (generate)
4480 {
4481 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4482 rtx shift = GEN_INT (clear_zero_bit_copies);
4483
4484 emit_insn (gen_lshrsi3 (new_src, source, shift));
4485 emit_insn (gen_ashlsi3 (target, new_src, shift));
4486 }
4487
4488 return insns + 2;
4489 }
4490
4491 break;
4492
4493 default:
4494 break;
4495 }
4496
4497 /* Calculate what the instruction sequences would be if we generated it
4498 normally, negated, or inverted. */
4499 if (code == AND)
4500 /* AND cannot be split into multiple insns, so invert and use BIC. */
4501 insns = 99;
4502 else
4503 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4504
4505 if (can_negate)
4506 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4507 &neg_immediates);
4508 else
4509 neg_insns = 99;
4510
4511 if (can_invert || final_invert)
4512 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4513 &inv_immediates);
4514 else
4515 inv_insns = 99;
4516
4517 immediates = &pos_immediates;
4518
4519 /* Is the negated immediate sequence more efficient? */
4520 if (neg_insns < insns && neg_insns <= inv_insns)
4521 {
4522 insns = neg_insns;
4523 immediates = &neg_immediates;
4524 }
4525 else
4526 can_negate = 0;
4527
4528 /* Is the inverted immediate sequence more efficient?
4529 We must allow for an extra NOT instruction for XOR operations, although
4530 there is some chance that the final 'mvn' will get optimized later. */
4531 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4532 {
4533 insns = inv_insns;
4534 immediates = &inv_immediates;
4535 }
4536 else
4537 {
4538 can_invert = 0;
4539 final_invert = 0;
4540 }
4541
4542 /* Now output the chosen sequence as instructions. */
4543 if (generate)
4544 {
4545 for (i = 0; i < insns; i++)
4546 {
4547 rtx new_src, temp1_rtx;
4548
4549 temp1 = immediates->i[i];
4550
4551 if (code == SET || code == MINUS)
4552 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4553 else if ((final_invert || i < (insns - 1)) && subtargets)
4554 new_src = gen_reg_rtx (mode);
4555 else
4556 new_src = target;
4557
4558 if (can_invert)
4559 temp1 = ~temp1;
4560 else if (can_negate)
4561 temp1 = -temp1;
4562
4563 temp1 = trunc_int_for_mode (temp1, mode);
4564 temp1_rtx = GEN_INT (temp1);
4565
4566 if (code == SET)
4567 ;
4568 else if (code == MINUS)
4569 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4570 else
4571 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4572
4573 emit_constant_insn (cond,
4574 gen_rtx_SET (VOIDmode, new_src,
4575 temp1_rtx));
4576 source = new_src;
4577
4578 if (code == SET)
4579 {
4580 can_negate = can_invert;
4581 can_invert = 0;
4582 code = PLUS;
4583 }
4584 else if (code == MINUS)
4585 code = PLUS;
4586 }
4587 }
4588
4589 if (final_invert)
4590 {
4591 if (generate)
4592 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4593 gen_rtx_NOT (mode, source)));
4594 insns++;
4595 }
4596
4597 return insns;
4598 }
4599
4600 /* Canonicalize a comparison so that we are more likely to recognize it.
4601 This can be done for a few constant compares, where we can make the
4602 immediate value easier to load. */
4603
4604 static void
4605 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4606 bool op0_preserve_value)
4607 {
4608 machine_mode mode;
4609 unsigned HOST_WIDE_INT i, maxval;
4610
4611 mode = GET_MODE (*op0);
4612 if (mode == VOIDmode)
4613 mode = GET_MODE (*op1);
4614
4615 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4616
4617 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4618 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4619 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4620 for GTU/LEU in Thumb mode. */
4621 if (mode == DImode)
4622 {
4623
4624 if (*code == GT || *code == LE
4625 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4626 {
4627 /* Missing comparison. First try to use an available
4628 comparison. */
4629 if (CONST_INT_P (*op1))
4630 {
4631 i = INTVAL (*op1);
4632 switch (*code)
4633 {
4634 case GT:
4635 case LE:
4636 if (i != maxval
4637 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4638 {
4639 *op1 = GEN_INT (i + 1);
4640 *code = *code == GT ? GE : LT;
4641 return;
4642 }
4643 break;
4644 case GTU:
4645 case LEU:
4646 if (i != ~((unsigned HOST_WIDE_INT) 0)
4647 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4648 {
4649 *op1 = GEN_INT (i + 1);
4650 *code = *code == GTU ? GEU : LTU;
4651 return;
4652 }
4653 break;
4654 default:
4655 gcc_unreachable ();
4656 }
4657 }
4658
4659 /* If that did not work, reverse the condition. */
4660 if (!op0_preserve_value)
4661 {
4662 std::swap (*op0, *op1);
4663 *code = (int)swap_condition ((enum rtx_code)*code);
4664 }
4665 }
4666 return;
4667 }
4668
4669 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4670 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4671 to facilitate possible combining with a cmp into 'ands'. */
4672 if (mode == SImode
4673 && GET_CODE (*op0) == ZERO_EXTEND
4674 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4675 && GET_MODE (XEXP (*op0, 0)) == QImode
4676 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4677 && subreg_lowpart_p (XEXP (*op0, 0))
4678 && *op1 == const0_rtx)
4679 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4680 GEN_INT (255));
4681
4682 /* Comparisons smaller than DImode. Only adjust comparisons against
4683 an out-of-range constant. */
4684 if (!CONST_INT_P (*op1)
4685 || const_ok_for_arm (INTVAL (*op1))
4686 || const_ok_for_arm (- INTVAL (*op1)))
4687 return;
4688
4689 i = INTVAL (*op1);
4690
4691 switch (*code)
4692 {
4693 case EQ:
4694 case NE:
4695 return;
4696
4697 case GT:
4698 case LE:
4699 if (i != maxval
4700 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4701 {
4702 *op1 = GEN_INT (i + 1);
4703 *code = *code == GT ? GE : LT;
4704 return;
4705 }
4706 break;
4707
4708 case GE:
4709 case LT:
4710 if (i != ~maxval
4711 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4712 {
4713 *op1 = GEN_INT (i - 1);
4714 *code = *code == GE ? GT : LE;
4715 return;
4716 }
4717 break;
4718
4719 case GTU:
4720 case LEU:
4721 if (i != ~((unsigned HOST_WIDE_INT) 0)
4722 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4723 {
4724 *op1 = GEN_INT (i + 1);
4725 *code = *code == GTU ? GEU : LTU;
4726 return;
4727 }
4728 break;
4729
4730 case GEU:
4731 case LTU:
4732 if (i != 0
4733 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4734 {
4735 *op1 = GEN_INT (i - 1);
4736 *code = *code == GEU ? GTU : LEU;
4737 return;
4738 }
4739 break;
4740
4741 default:
4742 gcc_unreachable ();
4743 }
4744 }
4745
4746
4747 /* Define how to find the value returned by a function. */
4748
4749 static rtx
4750 arm_function_value(const_tree type, const_tree func,
4751 bool outgoing ATTRIBUTE_UNUSED)
4752 {
4753 machine_mode mode;
4754 int unsignedp ATTRIBUTE_UNUSED;
4755 rtx r ATTRIBUTE_UNUSED;
4756
4757 mode = TYPE_MODE (type);
4758
4759 if (TARGET_AAPCS_BASED)
4760 return aapcs_allocate_return_reg (mode, type, func);
4761
4762 /* Promote integer types. */
4763 if (INTEGRAL_TYPE_P (type))
4764 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4765
4766 /* Promotes small structs returned in a register to full-word size
4767 for big-endian AAPCS. */
4768 if (arm_return_in_msb (type))
4769 {
4770 HOST_WIDE_INT size = int_size_in_bytes (type);
4771 if (size % UNITS_PER_WORD != 0)
4772 {
4773 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4774 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4775 }
4776 }
4777
4778 return arm_libcall_value_1 (mode);
4779 }
4780
4781 /* libcall hashtable helpers. */
4782
4783 struct libcall_hasher : typed_noop_remove <rtx_def>
4784 {
4785 typedef rtx_def value_type;
4786 typedef rtx_def compare_type;
4787 static inline hashval_t hash (const value_type *);
4788 static inline bool equal (const value_type *, const compare_type *);
4789 static inline void remove (value_type *);
4790 };
4791
4792 inline bool
4793 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4794 {
4795 return rtx_equal_p (p1, p2);
4796 }
4797
4798 inline hashval_t
4799 libcall_hasher::hash (const value_type *p1)
4800 {
4801 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4802 }
4803
4804 typedef hash_table<libcall_hasher> libcall_table_type;
4805
4806 static void
4807 add_libcall (libcall_table_type *htab, rtx libcall)
4808 {
4809 *htab->find_slot (libcall, INSERT) = libcall;
4810 }
4811
4812 static bool
4813 arm_libcall_uses_aapcs_base (const_rtx libcall)
4814 {
4815 static bool init_done = false;
4816 static libcall_table_type *libcall_htab = NULL;
4817
4818 if (!init_done)
4819 {
4820 init_done = true;
4821
4822 libcall_htab = new libcall_table_type (31);
4823 add_libcall (libcall_htab,
4824 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4825 add_libcall (libcall_htab,
4826 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4827 add_libcall (libcall_htab,
4828 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4829 add_libcall (libcall_htab,
4830 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4831
4832 add_libcall (libcall_htab,
4833 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4834 add_libcall (libcall_htab,
4835 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4836 add_libcall (libcall_htab,
4837 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4838 add_libcall (libcall_htab,
4839 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4840
4841 add_libcall (libcall_htab,
4842 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4843 add_libcall (libcall_htab,
4844 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4845 add_libcall (libcall_htab,
4846 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4847 add_libcall (libcall_htab,
4848 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4849 add_libcall (libcall_htab,
4850 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4851 add_libcall (libcall_htab,
4852 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4853 add_libcall (libcall_htab,
4854 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4855 add_libcall (libcall_htab,
4856 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4857
4858 /* Values from double-precision helper functions are returned in core
4859 registers if the selected core only supports single-precision
4860 arithmetic, even if we are using the hard-float ABI. The same is
4861 true for single-precision helpers, but we will never be using the
4862 hard-float ABI on a CPU which doesn't support single-precision
4863 operations in hardware. */
4864 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4865 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4866 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4867 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4868 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4869 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4870 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4871 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4872 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4873 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4874 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4875 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4876 SFmode));
4877 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4878 DFmode));
4879 }
4880
4881 return libcall && libcall_htab->find (libcall) != NULL;
4882 }
4883
4884 static rtx
4885 arm_libcall_value_1 (machine_mode mode)
4886 {
4887 if (TARGET_AAPCS_BASED)
4888 return aapcs_libcall_value (mode);
4889 else if (TARGET_IWMMXT_ABI
4890 && arm_vector_mode_supported_p (mode))
4891 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4892 else
4893 return gen_rtx_REG (mode, ARG_REGISTER (1));
4894 }
4895
4896 /* Define how to find the value returned by a library function
4897 assuming the value has mode MODE. */
4898
4899 static rtx
4900 arm_libcall_value (machine_mode mode, const_rtx libcall)
4901 {
4902 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4903 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4904 {
4905 /* The following libcalls return their result in integer registers,
4906 even though they return a floating point value. */
4907 if (arm_libcall_uses_aapcs_base (libcall))
4908 return gen_rtx_REG (mode, ARG_REGISTER(1));
4909
4910 }
4911
4912 return arm_libcall_value_1 (mode);
4913 }
4914
4915 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4916
4917 static bool
4918 arm_function_value_regno_p (const unsigned int regno)
4919 {
4920 if (regno == ARG_REGISTER (1)
4921 || (TARGET_32BIT
4922 && TARGET_AAPCS_BASED
4923 && TARGET_VFP
4924 && TARGET_HARD_FLOAT
4925 && regno == FIRST_VFP_REGNUM)
4926 || (TARGET_IWMMXT_ABI
4927 && regno == FIRST_IWMMXT_REGNUM))
4928 return true;
4929
4930 return false;
4931 }
4932
4933 /* Determine the amount of memory needed to store the possible return
4934 registers of an untyped call. */
4935 int
4936 arm_apply_result_size (void)
4937 {
4938 int size = 16;
4939
4940 if (TARGET_32BIT)
4941 {
4942 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4943 size += 32;
4944 if (TARGET_IWMMXT_ABI)
4945 size += 8;
4946 }
4947
4948 return size;
4949 }
4950
4951 /* Decide whether TYPE should be returned in memory (true)
4952 or in a register (false). FNTYPE is the type of the function making
4953 the call. */
4954 static bool
4955 arm_return_in_memory (const_tree type, const_tree fntype)
4956 {
4957 HOST_WIDE_INT size;
4958
4959 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4960
4961 if (TARGET_AAPCS_BASED)
4962 {
4963 /* Simple, non-aggregate types (ie not including vectors and
4964 complex) are always returned in a register (or registers).
4965 We don't care about which register here, so we can short-cut
4966 some of the detail. */
4967 if (!AGGREGATE_TYPE_P (type)
4968 && TREE_CODE (type) != VECTOR_TYPE
4969 && TREE_CODE (type) != COMPLEX_TYPE)
4970 return false;
4971
4972 /* Any return value that is no larger than one word can be
4973 returned in r0. */
4974 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4975 return false;
4976
4977 /* Check any available co-processors to see if they accept the
4978 type as a register candidate (VFP, for example, can return
4979 some aggregates in consecutive registers). These aren't
4980 available if the call is variadic. */
4981 if (aapcs_select_return_coproc (type, fntype) >= 0)
4982 return false;
4983
4984 /* Vector values should be returned using ARM registers, not
4985 memory (unless they're over 16 bytes, which will break since
4986 we only have four call-clobbered registers to play with). */
4987 if (TREE_CODE (type) == VECTOR_TYPE)
4988 return (size < 0 || size > (4 * UNITS_PER_WORD));
4989
4990 /* The rest go in memory. */
4991 return true;
4992 }
4993
4994 if (TREE_CODE (type) == VECTOR_TYPE)
4995 return (size < 0 || size > (4 * UNITS_PER_WORD));
4996
4997 if (!AGGREGATE_TYPE_P (type) &&
4998 (TREE_CODE (type) != VECTOR_TYPE))
4999 /* All simple types are returned in registers. */
5000 return false;
5001
5002 if (arm_abi != ARM_ABI_APCS)
5003 {
5004 /* ATPCS and later return aggregate types in memory only if they are
5005 larger than a word (or are variable size). */
5006 return (size < 0 || size > UNITS_PER_WORD);
5007 }
5008
5009 /* For the arm-wince targets we choose to be compatible with Microsoft's
5010 ARM and Thumb compilers, which always return aggregates in memory. */
5011 #ifndef ARM_WINCE
5012 /* All structures/unions bigger than one word are returned in memory.
5013 Also catch the case where int_size_in_bytes returns -1. In this case
5014 the aggregate is either huge or of variable size, and in either case
5015 we will want to return it via memory and not in a register. */
5016 if (size < 0 || size > UNITS_PER_WORD)
5017 return true;
5018
5019 if (TREE_CODE (type) == RECORD_TYPE)
5020 {
5021 tree field;
5022
5023 /* For a struct the APCS says that we only return in a register
5024 if the type is 'integer like' and every addressable element
5025 has an offset of zero. For practical purposes this means
5026 that the structure can have at most one non bit-field element
5027 and that this element must be the first one in the structure. */
5028
5029 /* Find the first field, ignoring non FIELD_DECL things which will
5030 have been created by C++. */
5031 for (field = TYPE_FIELDS (type);
5032 field && TREE_CODE (field) != FIELD_DECL;
5033 field = DECL_CHAIN (field))
5034 continue;
5035
5036 if (field == NULL)
5037 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5038
5039 /* Check that the first field is valid for returning in a register. */
5040
5041 /* ... Floats are not allowed */
5042 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5043 return true;
5044
5045 /* ... Aggregates that are not themselves valid for returning in
5046 a register are not allowed. */
5047 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5048 return true;
5049
5050 /* Now check the remaining fields, if any. Only bitfields are allowed,
5051 since they are not addressable. */
5052 for (field = DECL_CHAIN (field);
5053 field;
5054 field = DECL_CHAIN (field))
5055 {
5056 if (TREE_CODE (field) != FIELD_DECL)
5057 continue;
5058
5059 if (!DECL_BIT_FIELD_TYPE (field))
5060 return true;
5061 }
5062
5063 return false;
5064 }
5065
5066 if (TREE_CODE (type) == UNION_TYPE)
5067 {
5068 tree field;
5069
5070 /* Unions can be returned in registers if every element is
5071 integral, or can be returned in an integer register. */
5072 for (field = TYPE_FIELDS (type);
5073 field;
5074 field = DECL_CHAIN (field))
5075 {
5076 if (TREE_CODE (field) != FIELD_DECL)
5077 continue;
5078
5079 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5080 return true;
5081
5082 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5083 return true;
5084 }
5085
5086 return false;
5087 }
5088 #endif /* not ARM_WINCE */
5089
5090 /* Return all other types in memory. */
5091 return true;
5092 }
5093
5094 const struct pcs_attribute_arg
5095 {
5096 const char *arg;
5097 enum arm_pcs value;
5098 } pcs_attribute_args[] =
5099 {
5100 {"aapcs", ARM_PCS_AAPCS},
5101 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5102 #if 0
5103 /* We could recognize these, but changes would be needed elsewhere
5104 * to implement them. */
5105 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5106 {"atpcs", ARM_PCS_ATPCS},
5107 {"apcs", ARM_PCS_APCS},
5108 #endif
5109 {NULL, ARM_PCS_UNKNOWN}
5110 };
5111
5112 static enum arm_pcs
5113 arm_pcs_from_attribute (tree attr)
5114 {
5115 const struct pcs_attribute_arg *ptr;
5116 const char *arg;
5117
5118 /* Get the value of the argument. */
5119 if (TREE_VALUE (attr) == NULL_TREE
5120 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5121 return ARM_PCS_UNKNOWN;
5122
5123 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5124
5125 /* Check it against the list of known arguments. */
5126 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5127 if (streq (arg, ptr->arg))
5128 return ptr->value;
5129
5130 /* An unrecognized interrupt type. */
5131 return ARM_PCS_UNKNOWN;
5132 }
5133
5134 /* Get the PCS variant to use for this call. TYPE is the function's type
5135 specification, DECL is the specific declartion. DECL may be null if
5136 the call could be indirect or if this is a library call. */
5137 static enum arm_pcs
5138 arm_get_pcs_model (const_tree type, const_tree decl)
5139 {
5140 bool user_convention = false;
5141 enum arm_pcs user_pcs = arm_pcs_default;
5142 tree attr;
5143
5144 gcc_assert (type);
5145
5146 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5147 if (attr)
5148 {
5149 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5150 user_convention = true;
5151 }
5152
5153 if (TARGET_AAPCS_BASED)
5154 {
5155 /* Detect varargs functions. These always use the base rules
5156 (no argument is ever a candidate for a co-processor
5157 register). */
5158 bool base_rules = stdarg_p (type);
5159
5160 if (user_convention)
5161 {
5162 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5163 sorry ("non-AAPCS derived PCS variant");
5164 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5165 error ("variadic functions must use the base AAPCS variant");
5166 }
5167
5168 if (base_rules)
5169 return ARM_PCS_AAPCS;
5170 else if (user_convention)
5171 return user_pcs;
5172 else if (decl && flag_unit_at_a_time)
5173 {
5174 /* Local functions never leak outside this compilation unit,
5175 so we are free to use whatever conventions are
5176 appropriate. */
5177 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5178 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5179 if (i && i->local)
5180 return ARM_PCS_AAPCS_LOCAL;
5181 }
5182 }
5183 else if (user_convention && user_pcs != arm_pcs_default)
5184 sorry ("PCS variant");
5185
5186 /* For everything else we use the target's default. */
5187 return arm_pcs_default;
5188 }
5189
5190
5191 static void
5192 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5193 const_tree fntype ATTRIBUTE_UNUSED,
5194 rtx libcall ATTRIBUTE_UNUSED,
5195 const_tree fndecl ATTRIBUTE_UNUSED)
5196 {
5197 /* Record the unallocated VFP registers. */
5198 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5199 pcum->aapcs_vfp_reg_alloc = 0;
5200 }
5201
5202 /* Walk down the type tree of TYPE counting consecutive base elements.
5203 If *MODEP is VOIDmode, then set it to the first valid floating point
5204 type. If a non-floating point type is found, or if a floating point
5205 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5206 otherwise return the count in the sub-tree. */
5207 static int
5208 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5209 {
5210 machine_mode mode;
5211 HOST_WIDE_INT size;
5212
5213 switch (TREE_CODE (type))
5214 {
5215 case REAL_TYPE:
5216 mode = TYPE_MODE (type);
5217 if (mode != DFmode && mode != SFmode)
5218 return -1;
5219
5220 if (*modep == VOIDmode)
5221 *modep = mode;
5222
5223 if (*modep == mode)
5224 return 1;
5225
5226 break;
5227
5228 case COMPLEX_TYPE:
5229 mode = TYPE_MODE (TREE_TYPE (type));
5230 if (mode != DFmode && mode != SFmode)
5231 return -1;
5232
5233 if (*modep == VOIDmode)
5234 *modep = mode;
5235
5236 if (*modep == mode)
5237 return 2;
5238
5239 break;
5240
5241 case VECTOR_TYPE:
5242 /* Use V2SImode and V4SImode as representatives of all 64-bit
5243 and 128-bit vector types, whether or not those modes are
5244 supported with the present options. */
5245 size = int_size_in_bytes (type);
5246 switch (size)
5247 {
5248 case 8:
5249 mode = V2SImode;
5250 break;
5251 case 16:
5252 mode = V4SImode;
5253 break;
5254 default:
5255 return -1;
5256 }
5257
5258 if (*modep == VOIDmode)
5259 *modep = mode;
5260
5261 /* Vector modes are considered to be opaque: two vectors are
5262 equivalent for the purposes of being homogeneous aggregates
5263 if they are the same size. */
5264 if (*modep == mode)
5265 return 1;
5266
5267 break;
5268
5269 case ARRAY_TYPE:
5270 {
5271 int count;
5272 tree index = TYPE_DOMAIN (type);
5273
5274 /* Can't handle incomplete types nor sizes that are not
5275 fixed. */
5276 if (!COMPLETE_TYPE_P (type)
5277 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5278 return -1;
5279
5280 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5281 if (count == -1
5282 || !index
5283 || !TYPE_MAX_VALUE (index)
5284 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5285 || !TYPE_MIN_VALUE (index)
5286 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5287 || count < 0)
5288 return -1;
5289
5290 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5291 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5292
5293 /* There must be no padding. */
5294 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5295 return -1;
5296
5297 return count;
5298 }
5299
5300 case RECORD_TYPE:
5301 {
5302 int count = 0;
5303 int sub_count;
5304 tree field;
5305
5306 /* Can't handle incomplete types nor sizes that are not
5307 fixed. */
5308 if (!COMPLETE_TYPE_P (type)
5309 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5310 return -1;
5311
5312 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5313 {
5314 if (TREE_CODE (field) != FIELD_DECL)
5315 continue;
5316
5317 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5318 if (sub_count < 0)
5319 return -1;
5320 count += sub_count;
5321 }
5322
5323 /* There must be no padding. */
5324 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5325 return -1;
5326
5327 return count;
5328 }
5329
5330 case UNION_TYPE:
5331 case QUAL_UNION_TYPE:
5332 {
5333 /* These aren't very interesting except in a degenerate case. */
5334 int count = 0;
5335 int sub_count;
5336 tree field;
5337
5338 /* Can't handle incomplete types nor sizes that are not
5339 fixed. */
5340 if (!COMPLETE_TYPE_P (type)
5341 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5342 return -1;
5343
5344 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5345 {
5346 if (TREE_CODE (field) != FIELD_DECL)
5347 continue;
5348
5349 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5350 if (sub_count < 0)
5351 return -1;
5352 count = count > sub_count ? count : sub_count;
5353 }
5354
5355 /* There must be no padding. */
5356 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5357 return -1;
5358
5359 return count;
5360 }
5361
5362 default:
5363 break;
5364 }
5365
5366 return -1;
5367 }
5368
5369 /* Return true if PCS_VARIANT should use VFP registers. */
5370 static bool
5371 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5372 {
5373 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5374 {
5375 static bool seen_thumb1_vfp = false;
5376
5377 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5378 {
5379 sorry ("Thumb-1 hard-float VFP ABI");
5380 /* sorry() is not immediately fatal, so only display this once. */
5381 seen_thumb1_vfp = true;
5382 }
5383
5384 return true;
5385 }
5386
5387 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5388 return false;
5389
5390 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5391 (TARGET_VFP_DOUBLE || !is_double));
5392 }
5393
5394 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5395 suitable for passing or returning in VFP registers for the PCS
5396 variant selected. If it is, then *BASE_MODE is updated to contain
5397 a machine mode describing each element of the argument's type and
5398 *COUNT to hold the number of such elements. */
5399 static bool
5400 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5401 machine_mode mode, const_tree type,
5402 machine_mode *base_mode, int *count)
5403 {
5404 machine_mode new_mode = VOIDmode;
5405
5406 /* If we have the type information, prefer that to working things
5407 out from the mode. */
5408 if (type)
5409 {
5410 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5411
5412 if (ag_count > 0 && ag_count <= 4)
5413 *count = ag_count;
5414 else
5415 return false;
5416 }
5417 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5418 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5419 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5420 {
5421 *count = 1;
5422 new_mode = mode;
5423 }
5424 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5425 {
5426 *count = 2;
5427 new_mode = (mode == DCmode ? DFmode : SFmode);
5428 }
5429 else
5430 return false;
5431
5432
5433 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5434 return false;
5435
5436 *base_mode = new_mode;
5437 return true;
5438 }
5439
5440 static bool
5441 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5442 machine_mode mode, const_tree type)
5443 {
5444 int count ATTRIBUTE_UNUSED;
5445 machine_mode ag_mode ATTRIBUTE_UNUSED;
5446
5447 if (!use_vfp_abi (pcs_variant, false))
5448 return false;
5449 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5450 &ag_mode, &count);
5451 }
5452
5453 static bool
5454 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5455 const_tree type)
5456 {
5457 if (!use_vfp_abi (pcum->pcs_variant, false))
5458 return false;
5459
5460 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5461 &pcum->aapcs_vfp_rmode,
5462 &pcum->aapcs_vfp_rcount);
5463 }
5464
5465 static bool
5466 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5467 const_tree type ATTRIBUTE_UNUSED)
5468 {
5469 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5470 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5471 int regno;
5472
5473 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5474 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5475 {
5476 pcum->aapcs_vfp_reg_alloc = mask << regno;
5477 if (mode == BLKmode
5478 || (mode == TImode && ! TARGET_NEON)
5479 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5480 {
5481 int i;
5482 int rcount = pcum->aapcs_vfp_rcount;
5483 int rshift = shift;
5484 machine_mode rmode = pcum->aapcs_vfp_rmode;
5485 rtx par;
5486 if (!TARGET_NEON)
5487 {
5488 /* Avoid using unsupported vector modes. */
5489 if (rmode == V2SImode)
5490 rmode = DImode;
5491 else if (rmode == V4SImode)
5492 {
5493 rmode = DImode;
5494 rcount *= 2;
5495 rshift /= 2;
5496 }
5497 }
5498 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5499 for (i = 0; i < rcount; i++)
5500 {
5501 rtx tmp = gen_rtx_REG (rmode,
5502 FIRST_VFP_REGNUM + regno + i * rshift);
5503 tmp = gen_rtx_EXPR_LIST
5504 (VOIDmode, tmp,
5505 GEN_INT (i * GET_MODE_SIZE (rmode)));
5506 XVECEXP (par, 0, i) = tmp;
5507 }
5508
5509 pcum->aapcs_reg = par;
5510 }
5511 else
5512 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5513 return true;
5514 }
5515 return false;
5516 }
5517
5518 static rtx
5519 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5520 machine_mode mode,
5521 const_tree type ATTRIBUTE_UNUSED)
5522 {
5523 if (!use_vfp_abi (pcs_variant, false))
5524 return NULL;
5525
5526 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5527 {
5528 int count;
5529 machine_mode ag_mode;
5530 int i;
5531 rtx par;
5532 int shift;
5533
5534 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5535 &ag_mode, &count);
5536
5537 if (!TARGET_NEON)
5538 {
5539 if (ag_mode == V2SImode)
5540 ag_mode = DImode;
5541 else if (ag_mode == V4SImode)
5542 {
5543 ag_mode = DImode;
5544 count *= 2;
5545 }
5546 }
5547 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5548 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5549 for (i = 0; i < count; i++)
5550 {
5551 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5552 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5553 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5554 XVECEXP (par, 0, i) = tmp;
5555 }
5556
5557 return par;
5558 }
5559
5560 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5561 }
5562
5563 static void
5564 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5565 machine_mode mode ATTRIBUTE_UNUSED,
5566 const_tree type ATTRIBUTE_UNUSED)
5567 {
5568 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5569 pcum->aapcs_vfp_reg_alloc = 0;
5570 return;
5571 }
5572
5573 #define AAPCS_CP(X) \
5574 { \
5575 aapcs_ ## X ## _cum_init, \
5576 aapcs_ ## X ## _is_call_candidate, \
5577 aapcs_ ## X ## _allocate, \
5578 aapcs_ ## X ## _is_return_candidate, \
5579 aapcs_ ## X ## _allocate_return_reg, \
5580 aapcs_ ## X ## _advance \
5581 }
5582
5583 /* Table of co-processors that can be used to pass arguments in
5584 registers. Idealy no arugment should be a candidate for more than
5585 one co-processor table entry, but the table is processed in order
5586 and stops after the first match. If that entry then fails to put
5587 the argument into a co-processor register, the argument will go on
5588 the stack. */
5589 static struct
5590 {
5591 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5592 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5593
5594 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5595 BLKmode) is a candidate for this co-processor's registers; this
5596 function should ignore any position-dependent state in
5597 CUMULATIVE_ARGS and only use call-type dependent information. */
5598 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5599
5600 /* Return true if the argument does get a co-processor register; it
5601 should set aapcs_reg to an RTX of the register allocated as is
5602 required for a return from FUNCTION_ARG. */
5603 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5604
5605 /* Return true if a result of mode MODE (or type TYPE if MODE is
5606 BLKmode) is can be returned in this co-processor's registers. */
5607 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5608
5609 /* Allocate and return an RTX element to hold the return type of a
5610 call, this routine must not fail and will only be called if
5611 is_return_candidate returned true with the same parameters. */
5612 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5613
5614 /* Finish processing this argument and prepare to start processing
5615 the next one. */
5616 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5617 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5618 {
5619 AAPCS_CP(vfp)
5620 };
5621
5622 #undef AAPCS_CP
5623
5624 static int
5625 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5626 const_tree type)
5627 {
5628 int i;
5629
5630 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5631 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5632 return i;
5633
5634 return -1;
5635 }
5636
5637 static int
5638 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5639 {
5640 /* We aren't passed a decl, so we can't check that a call is local.
5641 However, it isn't clear that that would be a win anyway, since it
5642 might limit some tail-calling opportunities. */
5643 enum arm_pcs pcs_variant;
5644
5645 if (fntype)
5646 {
5647 const_tree fndecl = NULL_TREE;
5648
5649 if (TREE_CODE (fntype) == FUNCTION_DECL)
5650 {
5651 fndecl = fntype;
5652 fntype = TREE_TYPE (fntype);
5653 }
5654
5655 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5656 }
5657 else
5658 pcs_variant = arm_pcs_default;
5659
5660 if (pcs_variant != ARM_PCS_AAPCS)
5661 {
5662 int i;
5663
5664 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5665 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5666 TYPE_MODE (type),
5667 type))
5668 return i;
5669 }
5670 return -1;
5671 }
5672
5673 static rtx
5674 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5675 const_tree fntype)
5676 {
5677 /* We aren't passed a decl, so we can't check that a call is local.
5678 However, it isn't clear that that would be a win anyway, since it
5679 might limit some tail-calling opportunities. */
5680 enum arm_pcs pcs_variant;
5681 int unsignedp ATTRIBUTE_UNUSED;
5682
5683 if (fntype)
5684 {
5685 const_tree fndecl = NULL_TREE;
5686
5687 if (TREE_CODE (fntype) == FUNCTION_DECL)
5688 {
5689 fndecl = fntype;
5690 fntype = TREE_TYPE (fntype);
5691 }
5692
5693 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5694 }
5695 else
5696 pcs_variant = arm_pcs_default;
5697
5698 /* Promote integer types. */
5699 if (type && INTEGRAL_TYPE_P (type))
5700 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5701
5702 if (pcs_variant != ARM_PCS_AAPCS)
5703 {
5704 int i;
5705
5706 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5707 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5708 type))
5709 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5710 mode, type);
5711 }
5712
5713 /* Promotes small structs returned in a register to full-word size
5714 for big-endian AAPCS. */
5715 if (type && arm_return_in_msb (type))
5716 {
5717 HOST_WIDE_INT size = int_size_in_bytes (type);
5718 if (size % UNITS_PER_WORD != 0)
5719 {
5720 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5721 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5722 }
5723 }
5724
5725 return gen_rtx_REG (mode, R0_REGNUM);
5726 }
5727
5728 static rtx
5729 aapcs_libcall_value (machine_mode mode)
5730 {
5731 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5732 && GET_MODE_SIZE (mode) <= 4)
5733 mode = SImode;
5734
5735 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5736 }
5737
5738 /* Lay out a function argument using the AAPCS rules. The rule
5739 numbers referred to here are those in the AAPCS. */
5740 static void
5741 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5742 const_tree type, bool named)
5743 {
5744 int nregs, nregs2;
5745 int ncrn;
5746
5747 /* We only need to do this once per argument. */
5748 if (pcum->aapcs_arg_processed)
5749 return;
5750
5751 pcum->aapcs_arg_processed = true;
5752
5753 /* Special case: if named is false then we are handling an incoming
5754 anonymous argument which is on the stack. */
5755 if (!named)
5756 return;
5757
5758 /* Is this a potential co-processor register candidate? */
5759 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5760 {
5761 int slot = aapcs_select_call_coproc (pcum, mode, type);
5762 pcum->aapcs_cprc_slot = slot;
5763
5764 /* We don't have to apply any of the rules from part B of the
5765 preparation phase, these are handled elsewhere in the
5766 compiler. */
5767
5768 if (slot >= 0)
5769 {
5770 /* A Co-processor register candidate goes either in its own
5771 class of registers or on the stack. */
5772 if (!pcum->aapcs_cprc_failed[slot])
5773 {
5774 /* C1.cp - Try to allocate the argument to co-processor
5775 registers. */
5776 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5777 return;
5778
5779 /* C2.cp - Put the argument on the stack and note that we
5780 can't assign any more candidates in this slot. We also
5781 need to note that we have allocated stack space, so that
5782 we won't later try to split a non-cprc candidate between
5783 core registers and the stack. */
5784 pcum->aapcs_cprc_failed[slot] = true;
5785 pcum->can_split = false;
5786 }
5787
5788 /* We didn't get a register, so this argument goes on the
5789 stack. */
5790 gcc_assert (pcum->can_split == false);
5791 return;
5792 }
5793 }
5794
5795 /* C3 - For double-word aligned arguments, round the NCRN up to the
5796 next even number. */
5797 ncrn = pcum->aapcs_ncrn;
5798 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5799 ncrn++;
5800
5801 nregs = ARM_NUM_REGS2(mode, type);
5802
5803 /* Sigh, this test should really assert that nregs > 0, but a GCC
5804 extension allows empty structs and then gives them empty size; it
5805 then allows such a structure to be passed by value. For some of
5806 the code below we have to pretend that such an argument has
5807 non-zero size so that we 'locate' it correctly either in
5808 registers or on the stack. */
5809 gcc_assert (nregs >= 0);
5810
5811 nregs2 = nregs ? nregs : 1;
5812
5813 /* C4 - Argument fits entirely in core registers. */
5814 if (ncrn + nregs2 <= NUM_ARG_REGS)
5815 {
5816 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5817 pcum->aapcs_next_ncrn = ncrn + nregs;
5818 return;
5819 }
5820
5821 /* C5 - Some core registers left and there are no arguments already
5822 on the stack: split this argument between the remaining core
5823 registers and the stack. */
5824 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5825 {
5826 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5827 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5828 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5829 return;
5830 }
5831
5832 /* C6 - NCRN is set to 4. */
5833 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5834
5835 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5836 return;
5837 }
5838
5839 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5840 for a call to a function whose data type is FNTYPE.
5841 For a library call, FNTYPE is NULL. */
5842 void
5843 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5844 rtx libname,
5845 tree fndecl ATTRIBUTE_UNUSED)
5846 {
5847 /* Long call handling. */
5848 if (fntype)
5849 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5850 else
5851 pcum->pcs_variant = arm_pcs_default;
5852
5853 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5854 {
5855 if (arm_libcall_uses_aapcs_base (libname))
5856 pcum->pcs_variant = ARM_PCS_AAPCS;
5857
5858 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5859 pcum->aapcs_reg = NULL_RTX;
5860 pcum->aapcs_partial = 0;
5861 pcum->aapcs_arg_processed = false;
5862 pcum->aapcs_cprc_slot = -1;
5863 pcum->can_split = true;
5864
5865 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5866 {
5867 int i;
5868
5869 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5870 {
5871 pcum->aapcs_cprc_failed[i] = false;
5872 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5873 }
5874 }
5875 return;
5876 }
5877
5878 /* Legacy ABIs */
5879
5880 /* On the ARM, the offset starts at 0. */
5881 pcum->nregs = 0;
5882 pcum->iwmmxt_nregs = 0;
5883 pcum->can_split = true;
5884
5885 /* Varargs vectors are treated the same as long long.
5886 named_count avoids having to change the way arm handles 'named' */
5887 pcum->named_count = 0;
5888 pcum->nargs = 0;
5889
5890 if (TARGET_REALLY_IWMMXT && fntype)
5891 {
5892 tree fn_arg;
5893
5894 for (fn_arg = TYPE_ARG_TYPES (fntype);
5895 fn_arg;
5896 fn_arg = TREE_CHAIN (fn_arg))
5897 pcum->named_count += 1;
5898
5899 if (! pcum->named_count)
5900 pcum->named_count = INT_MAX;
5901 }
5902 }
5903
5904 /* Return true if we use LRA instead of reload pass. */
5905 static bool
5906 arm_lra_p (void)
5907 {
5908 return arm_lra_flag;
5909 }
5910
5911 /* Return true if mode/type need doubleword alignment. */
5912 static bool
5913 arm_needs_doubleword_align (machine_mode mode, const_tree type)
5914 {
5915 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5916 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5917 }
5918
5919
5920 /* Determine where to put an argument to a function.
5921 Value is zero to push the argument on the stack,
5922 or a hard register in which to store the argument.
5923
5924 MODE is the argument's machine mode.
5925 TYPE is the data type of the argument (as a tree).
5926 This is null for libcalls where that information may
5927 not be available.
5928 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5929 the preceding args and about the function being called.
5930 NAMED is nonzero if this argument is a named parameter
5931 (otherwise it is an extra parameter matching an ellipsis).
5932
5933 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5934 other arguments are passed on the stack. If (NAMED == 0) (which happens
5935 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5936 defined), say it is passed in the stack (function_prologue will
5937 indeed make it pass in the stack if necessary). */
5938
5939 static rtx
5940 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
5941 const_tree type, bool named)
5942 {
5943 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5944 int nregs;
5945
5946 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5947 a call insn (op3 of a call_value insn). */
5948 if (mode == VOIDmode)
5949 return const0_rtx;
5950
5951 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5952 {
5953 aapcs_layout_arg (pcum, mode, type, named);
5954 return pcum->aapcs_reg;
5955 }
5956
5957 /* Varargs vectors are treated the same as long long.
5958 named_count avoids having to change the way arm handles 'named' */
5959 if (TARGET_IWMMXT_ABI
5960 && arm_vector_mode_supported_p (mode)
5961 && pcum->named_count > pcum->nargs + 1)
5962 {
5963 if (pcum->iwmmxt_nregs <= 9)
5964 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5965 else
5966 {
5967 pcum->can_split = false;
5968 return NULL_RTX;
5969 }
5970 }
5971
5972 /* Put doubleword aligned quantities in even register pairs. */
5973 if (pcum->nregs & 1
5974 && ARM_DOUBLEWORD_ALIGN
5975 && arm_needs_doubleword_align (mode, type))
5976 pcum->nregs++;
5977
5978 /* Only allow splitting an arg between regs and memory if all preceding
5979 args were allocated to regs. For args passed by reference we only count
5980 the reference pointer. */
5981 if (pcum->can_split)
5982 nregs = 1;
5983 else
5984 nregs = ARM_NUM_REGS2 (mode, type);
5985
5986 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5987 return NULL_RTX;
5988
5989 return gen_rtx_REG (mode, pcum->nregs);
5990 }
5991
5992 static unsigned int
5993 arm_function_arg_boundary (machine_mode mode, const_tree type)
5994 {
5995 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5996 ? DOUBLEWORD_ALIGNMENT
5997 : PARM_BOUNDARY);
5998 }
5999
6000 static int
6001 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6002 tree type, bool named)
6003 {
6004 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6005 int nregs = pcum->nregs;
6006
6007 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6008 {
6009 aapcs_layout_arg (pcum, mode, type, named);
6010 return pcum->aapcs_partial;
6011 }
6012
6013 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6014 return 0;
6015
6016 if (NUM_ARG_REGS > nregs
6017 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6018 && pcum->can_split)
6019 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6020
6021 return 0;
6022 }
6023
6024 /* Update the data in PCUM to advance over an argument
6025 of mode MODE and data type TYPE.
6026 (TYPE is null for libcalls where that information may not be available.) */
6027
6028 static void
6029 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6030 const_tree type, bool named)
6031 {
6032 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6033
6034 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6035 {
6036 aapcs_layout_arg (pcum, mode, type, named);
6037
6038 if (pcum->aapcs_cprc_slot >= 0)
6039 {
6040 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6041 type);
6042 pcum->aapcs_cprc_slot = -1;
6043 }
6044
6045 /* Generic stuff. */
6046 pcum->aapcs_arg_processed = false;
6047 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6048 pcum->aapcs_reg = NULL_RTX;
6049 pcum->aapcs_partial = 0;
6050 }
6051 else
6052 {
6053 pcum->nargs += 1;
6054 if (arm_vector_mode_supported_p (mode)
6055 && pcum->named_count > pcum->nargs
6056 && TARGET_IWMMXT_ABI)
6057 pcum->iwmmxt_nregs += 1;
6058 else
6059 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6060 }
6061 }
6062
6063 /* Variable sized types are passed by reference. This is a GCC
6064 extension to the ARM ABI. */
6065
6066 static bool
6067 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6068 machine_mode mode ATTRIBUTE_UNUSED,
6069 const_tree type, bool named ATTRIBUTE_UNUSED)
6070 {
6071 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6072 }
6073 \f
6074 /* Encode the current state of the #pragma [no_]long_calls. */
6075 typedef enum
6076 {
6077 OFF, /* No #pragma [no_]long_calls is in effect. */
6078 LONG, /* #pragma long_calls is in effect. */
6079 SHORT /* #pragma no_long_calls is in effect. */
6080 } arm_pragma_enum;
6081
6082 static arm_pragma_enum arm_pragma_long_calls = OFF;
6083
6084 void
6085 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6086 {
6087 arm_pragma_long_calls = LONG;
6088 }
6089
6090 void
6091 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6092 {
6093 arm_pragma_long_calls = SHORT;
6094 }
6095
6096 void
6097 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6098 {
6099 arm_pragma_long_calls = OFF;
6100 }
6101 \f
6102 /* Handle an attribute requiring a FUNCTION_DECL;
6103 arguments as in struct attribute_spec.handler. */
6104 static tree
6105 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6106 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6107 {
6108 if (TREE_CODE (*node) != FUNCTION_DECL)
6109 {
6110 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6111 name);
6112 *no_add_attrs = true;
6113 }
6114
6115 return NULL_TREE;
6116 }
6117
6118 /* Handle an "interrupt" or "isr" attribute;
6119 arguments as in struct attribute_spec.handler. */
6120 static tree
6121 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6122 bool *no_add_attrs)
6123 {
6124 if (DECL_P (*node))
6125 {
6126 if (TREE_CODE (*node) != FUNCTION_DECL)
6127 {
6128 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6129 name);
6130 *no_add_attrs = true;
6131 }
6132 /* FIXME: the argument if any is checked for type attributes;
6133 should it be checked for decl ones? */
6134 }
6135 else
6136 {
6137 if (TREE_CODE (*node) == FUNCTION_TYPE
6138 || TREE_CODE (*node) == METHOD_TYPE)
6139 {
6140 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6141 {
6142 warning (OPT_Wattributes, "%qE attribute ignored",
6143 name);
6144 *no_add_attrs = true;
6145 }
6146 }
6147 else if (TREE_CODE (*node) == POINTER_TYPE
6148 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6149 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6150 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6151 {
6152 *node = build_variant_type_copy (*node);
6153 TREE_TYPE (*node) = build_type_attribute_variant
6154 (TREE_TYPE (*node),
6155 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6156 *no_add_attrs = true;
6157 }
6158 else
6159 {
6160 /* Possibly pass this attribute on from the type to a decl. */
6161 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6162 | (int) ATTR_FLAG_FUNCTION_NEXT
6163 | (int) ATTR_FLAG_ARRAY_NEXT))
6164 {
6165 *no_add_attrs = true;
6166 return tree_cons (name, args, NULL_TREE);
6167 }
6168 else
6169 {
6170 warning (OPT_Wattributes, "%qE attribute ignored",
6171 name);
6172 }
6173 }
6174 }
6175
6176 return NULL_TREE;
6177 }
6178
6179 /* Handle a "pcs" attribute; arguments as in struct
6180 attribute_spec.handler. */
6181 static tree
6182 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6183 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6184 {
6185 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6186 {
6187 warning (OPT_Wattributes, "%qE attribute ignored", name);
6188 *no_add_attrs = true;
6189 }
6190 return NULL_TREE;
6191 }
6192
6193 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6194 /* Handle the "notshared" attribute. This attribute is another way of
6195 requesting hidden visibility. ARM's compiler supports
6196 "__declspec(notshared)"; we support the same thing via an
6197 attribute. */
6198
6199 static tree
6200 arm_handle_notshared_attribute (tree *node,
6201 tree name ATTRIBUTE_UNUSED,
6202 tree args ATTRIBUTE_UNUSED,
6203 int flags ATTRIBUTE_UNUSED,
6204 bool *no_add_attrs)
6205 {
6206 tree decl = TYPE_NAME (*node);
6207
6208 if (decl)
6209 {
6210 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6211 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6212 *no_add_attrs = false;
6213 }
6214 return NULL_TREE;
6215 }
6216 #endif
6217
6218 /* Return 0 if the attributes for two types are incompatible, 1 if they
6219 are compatible, and 2 if they are nearly compatible (which causes a
6220 warning to be generated). */
6221 static int
6222 arm_comp_type_attributes (const_tree type1, const_tree type2)
6223 {
6224 int l1, l2, s1, s2;
6225
6226 /* Check for mismatch of non-default calling convention. */
6227 if (TREE_CODE (type1) != FUNCTION_TYPE)
6228 return 1;
6229
6230 /* Check for mismatched call attributes. */
6231 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6232 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6233 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6234 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6235
6236 /* Only bother to check if an attribute is defined. */
6237 if (l1 | l2 | s1 | s2)
6238 {
6239 /* If one type has an attribute, the other must have the same attribute. */
6240 if ((l1 != l2) || (s1 != s2))
6241 return 0;
6242
6243 /* Disallow mixed attributes. */
6244 if ((l1 & s2) || (l2 & s1))
6245 return 0;
6246 }
6247
6248 /* Check for mismatched ISR attribute. */
6249 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6250 if (! l1)
6251 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6252 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6253 if (! l2)
6254 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6255 if (l1 != l2)
6256 return 0;
6257
6258 return 1;
6259 }
6260
6261 /* Assigns default attributes to newly defined type. This is used to
6262 set short_call/long_call attributes for function types of
6263 functions defined inside corresponding #pragma scopes. */
6264 static void
6265 arm_set_default_type_attributes (tree type)
6266 {
6267 /* Add __attribute__ ((long_call)) to all functions, when
6268 inside #pragma long_calls or __attribute__ ((short_call)),
6269 when inside #pragma no_long_calls. */
6270 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6271 {
6272 tree type_attr_list, attr_name;
6273 type_attr_list = TYPE_ATTRIBUTES (type);
6274
6275 if (arm_pragma_long_calls == LONG)
6276 attr_name = get_identifier ("long_call");
6277 else if (arm_pragma_long_calls == SHORT)
6278 attr_name = get_identifier ("short_call");
6279 else
6280 return;
6281
6282 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6283 TYPE_ATTRIBUTES (type) = type_attr_list;
6284 }
6285 }
6286 \f
6287 /* Return true if DECL is known to be linked into section SECTION. */
6288
6289 static bool
6290 arm_function_in_section_p (tree decl, section *section)
6291 {
6292 /* We can only be certain about functions defined in the same
6293 compilation unit. */
6294 if (!TREE_STATIC (decl))
6295 return false;
6296
6297 /* Make sure that SYMBOL always binds to the definition in this
6298 compilation unit. */
6299 if (!targetm.binds_local_p (decl))
6300 return false;
6301
6302 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6303 if (!DECL_SECTION_NAME (decl))
6304 {
6305 /* Make sure that we will not create a unique section for DECL. */
6306 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6307 return false;
6308 }
6309
6310 return function_section (decl) == section;
6311 }
6312
6313 /* Return nonzero if a 32-bit "long_call" should be generated for
6314 a call from the current function to DECL. We generate a long_call
6315 if the function:
6316
6317 a. has an __attribute__((long call))
6318 or b. is within the scope of a #pragma long_calls
6319 or c. the -mlong-calls command line switch has been specified
6320
6321 However we do not generate a long call if the function:
6322
6323 d. has an __attribute__ ((short_call))
6324 or e. is inside the scope of a #pragma no_long_calls
6325 or f. is defined in the same section as the current function. */
6326
6327 bool
6328 arm_is_long_call_p (tree decl)
6329 {
6330 tree attrs;
6331
6332 if (!decl)
6333 return TARGET_LONG_CALLS;
6334
6335 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6336 if (lookup_attribute ("short_call", attrs))
6337 return false;
6338
6339 /* For "f", be conservative, and only cater for cases in which the
6340 whole of the current function is placed in the same section. */
6341 if (!flag_reorder_blocks_and_partition
6342 && TREE_CODE (decl) == FUNCTION_DECL
6343 && arm_function_in_section_p (decl, current_function_section ()))
6344 return false;
6345
6346 if (lookup_attribute ("long_call", attrs))
6347 return true;
6348
6349 return TARGET_LONG_CALLS;
6350 }
6351
6352 /* Return nonzero if it is ok to make a tail-call to DECL. */
6353 static bool
6354 arm_function_ok_for_sibcall (tree decl, tree exp)
6355 {
6356 unsigned long func_type;
6357
6358 if (cfun->machine->sibcall_blocked)
6359 return false;
6360
6361 /* Never tailcall something if we are generating code for Thumb-1. */
6362 if (TARGET_THUMB1)
6363 return false;
6364
6365 /* The PIC register is live on entry to VxWorks PLT entries, so we
6366 must make the call before restoring the PIC register. */
6367 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6368 return false;
6369
6370 /* If we are interworking and the function is not declared static
6371 then we can't tail-call it unless we know that it exists in this
6372 compilation unit (since it might be a Thumb routine). */
6373 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6374 && !TREE_ASM_WRITTEN (decl))
6375 return false;
6376
6377 func_type = arm_current_func_type ();
6378 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6379 if (IS_INTERRUPT (func_type))
6380 return false;
6381
6382 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6383 {
6384 /* Check that the return value locations are the same. For
6385 example that we aren't returning a value from the sibling in
6386 a VFP register but then need to transfer it to a core
6387 register. */
6388 rtx a, b;
6389
6390 a = arm_function_value (TREE_TYPE (exp), decl, false);
6391 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6392 cfun->decl, false);
6393 if (!rtx_equal_p (a, b))
6394 return false;
6395 }
6396
6397 /* Never tailcall if function may be called with a misaligned SP. */
6398 if (IS_STACKALIGN (func_type))
6399 return false;
6400
6401 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6402 references should become a NOP. Don't convert such calls into
6403 sibling calls. */
6404 if (TARGET_AAPCS_BASED
6405 && arm_abi == ARM_ABI_AAPCS
6406 && decl
6407 && DECL_WEAK (decl))
6408 return false;
6409
6410 /* Everything else is ok. */
6411 return true;
6412 }
6413
6414 \f
6415 /* Addressing mode support functions. */
6416
6417 /* Return nonzero if X is a legitimate immediate operand when compiling
6418 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6419 int
6420 legitimate_pic_operand_p (rtx x)
6421 {
6422 if (GET_CODE (x) == SYMBOL_REF
6423 || (GET_CODE (x) == CONST
6424 && GET_CODE (XEXP (x, 0)) == PLUS
6425 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6426 return 0;
6427
6428 return 1;
6429 }
6430
6431 /* Record that the current function needs a PIC register. Initialize
6432 cfun->machine->pic_reg if we have not already done so. */
6433
6434 static void
6435 require_pic_register (void)
6436 {
6437 /* A lot of the logic here is made obscure by the fact that this
6438 routine gets called as part of the rtx cost estimation process.
6439 We don't want those calls to affect any assumptions about the real
6440 function; and further, we can't call entry_of_function() until we
6441 start the real expansion process. */
6442 if (!crtl->uses_pic_offset_table)
6443 {
6444 gcc_assert (can_create_pseudo_p ());
6445 if (arm_pic_register != INVALID_REGNUM
6446 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6447 {
6448 if (!cfun->machine->pic_reg)
6449 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6450
6451 /* Play games to avoid marking the function as needing pic
6452 if we are being called as part of the cost-estimation
6453 process. */
6454 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6455 crtl->uses_pic_offset_table = 1;
6456 }
6457 else
6458 {
6459 rtx_insn *seq, *insn;
6460
6461 if (!cfun->machine->pic_reg)
6462 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6463
6464 /* Play games to avoid marking the function as needing pic
6465 if we are being called as part of the cost-estimation
6466 process. */
6467 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6468 {
6469 crtl->uses_pic_offset_table = 1;
6470 start_sequence ();
6471
6472 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6473 && arm_pic_register > LAST_LO_REGNUM)
6474 emit_move_insn (cfun->machine->pic_reg,
6475 gen_rtx_REG (Pmode, arm_pic_register));
6476 else
6477 arm_load_pic_register (0UL);
6478
6479 seq = get_insns ();
6480 end_sequence ();
6481
6482 for (insn = seq; insn; insn = NEXT_INSN (insn))
6483 if (INSN_P (insn))
6484 INSN_LOCATION (insn) = prologue_location;
6485
6486 /* We can be called during expansion of PHI nodes, where
6487 we can't yet emit instructions directly in the final
6488 insn stream. Queue the insns on the entry edge, they will
6489 be committed after everything else is expanded. */
6490 insert_insn_on_edge (seq,
6491 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6492 }
6493 }
6494 }
6495 }
6496
6497 rtx
6498 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6499 {
6500 if (GET_CODE (orig) == SYMBOL_REF
6501 || GET_CODE (orig) == LABEL_REF)
6502 {
6503 rtx insn;
6504
6505 if (reg == 0)
6506 {
6507 gcc_assert (can_create_pseudo_p ());
6508 reg = gen_reg_rtx (Pmode);
6509 }
6510
6511 /* VxWorks does not impose a fixed gap between segments; the run-time
6512 gap can be different from the object-file gap. We therefore can't
6513 use GOTOFF unless we are absolutely sure that the symbol is in the
6514 same segment as the GOT. Unfortunately, the flexibility of linker
6515 scripts means that we can't be sure of that in general, so assume
6516 that GOTOFF is never valid on VxWorks. */
6517 if ((GET_CODE (orig) == LABEL_REF
6518 || (GET_CODE (orig) == SYMBOL_REF &&
6519 SYMBOL_REF_LOCAL_P (orig)))
6520 && NEED_GOT_RELOC
6521 && arm_pic_data_is_text_relative)
6522 insn = arm_pic_static_addr (orig, reg);
6523 else
6524 {
6525 rtx pat;
6526 rtx mem;
6527
6528 /* If this function doesn't have a pic register, create one now. */
6529 require_pic_register ();
6530
6531 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6532
6533 /* Make the MEM as close to a constant as possible. */
6534 mem = SET_SRC (pat);
6535 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6536 MEM_READONLY_P (mem) = 1;
6537 MEM_NOTRAP_P (mem) = 1;
6538
6539 insn = emit_insn (pat);
6540 }
6541
6542 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6543 by loop. */
6544 set_unique_reg_note (insn, REG_EQUAL, orig);
6545
6546 return reg;
6547 }
6548 else if (GET_CODE (orig) == CONST)
6549 {
6550 rtx base, offset;
6551
6552 if (GET_CODE (XEXP (orig, 0)) == PLUS
6553 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6554 return orig;
6555
6556 /* Handle the case where we have: const (UNSPEC_TLS). */
6557 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6558 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6559 return orig;
6560
6561 /* Handle the case where we have:
6562 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6563 CONST_INT. */
6564 if (GET_CODE (XEXP (orig, 0)) == PLUS
6565 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6566 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6567 {
6568 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6569 return orig;
6570 }
6571
6572 if (reg == 0)
6573 {
6574 gcc_assert (can_create_pseudo_p ());
6575 reg = gen_reg_rtx (Pmode);
6576 }
6577
6578 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6579
6580 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6581 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6582 base == reg ? 0 : reg);
6583
6584 if (CONST_INT_P (offset))
6585 {
6586 /* The base register doesn't really matter, we only want to
6587 test the index for the appropriate mode. */
6588 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6589 {
6590 gcc_assert (can_create_pseudo_p ());
6591 offset = force_reg (Pmode, offset);
6592 }
6593
6594 if (CONST_INT_P (offset))
6595 return plus_constant (Pmode, base, INTVAL (offset));
6596 }
6597
6598 if (GET_MODE_SIZE (mode) > 4
6599 && (GET_MODE_CLASS (mode) == MODE_INT
6600 || TARGET_SOFT_FLOAT))
6601 {
6602 emit_insn (gen_addsi3 (reg, base, offset));
6603 return reg;
6604 }
6605
6606 return gen_rtx_PLUS (Pmode, base, offset);
6607 }
6608
6609 return orig;
6610 }
6611
6612
6613 /* Find a spare register to use during the prolog of a function. */
6614
6615 static int
6616 thumb_find_work_register (unsigned long pushed_regs_mask)
6617 {
6618 int reg;
6619
6620 /* Check the argument registers first as these are call-used. The
6621 register allocation order means that sometimes r3 might be used
6622 but earlier argument registers might not, so check them all. */
6623 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6624 if (!df_regs_ever_live_p (reg))
6625 return reg;
6626
6627 /* Before going on to check the call-saved registers we can try a couple
6628 more ways of deducing that r3 is available. The first is when we are
6629 pushing anonymous arguments onto the stack and we have less than 4
6630 registers worth of fixed arguments(*). In this case r3 will be part of
6631 the variable argument list and so we can be sure that it will be
6632 pushed right at the start of the function. Hence it will be available
6633 for the rest of the prologue.
6634 (*): ie crtl->args.pretend_args_size is greater than 0. */
6635 if (cfun->machine->uses_anonymous_args
6636 && crtl->args.pretend_args_size > 0)
6637 return LAST_ARG_REGNUM;
6638
6639 /* The other case is when we have fixed arguments but less than 4 registers
6640 worth. In this case r3 might be used in the body of the function, but
6641 it is not being used to convey an argument into the function. In theory
6642 we could just check crtl->args.size to see how many bytes are
6643 being passed in argument registers, but it seems that it is unreliable.
6644 Sometimes it will have the value 0 when in fact arguments are being
6645 passed. (See testcase execute/20021111-1.c for an example). So we also
6646 check the args_info.nregs field as well. The problem with this field is
6647 that it makes no allowances for arguments that are passed to the
6648 function but which are not used. Hence we could miss an opportunity
6649 when a function has an unused argument in r3. But it is better to be
6650 safe than to be sorry. */
6651 if (! cfun->machine->uses_anonymous_args
6652 && crtl->args.size >= 0
6653 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6654 && (TARGET_AAPCS_BASED
6655 ? crtl->args.info.aapcs_ncrn < 4
6656 : crtl->args.info.nregs < 4))
6657 return LAST_ARG_REGNUM;
6658
6659 /* Otherwise look for a call-saved register that is going to be pushed. */
6660 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6661 if (pushed_regs_mask & (1 << reg))
6662 return reg;
6663
6664 if (TARGET_THUMB2)
6665 {
6666 /* Thumb-2 can use high regs. */
6667 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6668 if (pushed_regs_mask & (1 << reg))
6669 return reg;
6670 }
6671 /* Something went wrong - thumb_compute_save_reg_mask()
6672 should have arranged for a suitable register to be pushed. */
6673 gcc_unreachable ();
6674 }
6675
6676 static GTY(()) int pic_labelno;
6677
6678 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6679 low register. */
6680
6681 void
6682 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6683 {
6684 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6685
6686 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6687 return;
6688
6689 gcc_assert (flag_pic);
6690
6691 pic_reg = cfun->machine->pic_reg;
6692 if (TARGET_VXWORKS_RTP)
6693 {
6694 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6695 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6696 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6697
6698 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6699
6700 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6701 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6702 }
6703 else
6704 {
6705 /* We use an UNSPEC rather than a LABEL_REF because this label
6706 never appears in the code stream. */
6707
6708 labelno = GEN_INT (pic_labelno++);
6709 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6710 l1 = gen_rtx_CONST (VOIDmode, l1);
6711
6712 /* On the ARM the PC register contains 'dot + 8' at the time of the
6713 addition, on the Thumb it is 'dot + 4'. */
6714 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6715 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6716 UNSPEC_GOTSYM_OFF);
6717 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6718
6719 if (TARGET_32BIT)
6720 {
6721 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6722 }
6723 else /* TARGET_THUMB1 */
6724 {
6725 if (arm_pic_register != INVALID_REGNUM
6726 && REGNO (pic_reg) > LAST_LO_REGNUM)
6727 {
6728 /* We will have pushed the pic register, so we should always be
6729 able to find a work register. */
6730 pic_tmp = gen_rtx_REG (SImode,
6731 thumb_find_work_register (saved_regs));
6732 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6733 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6734 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6735 }
6736 else if (arm_pic_register != INVALID_REGNUM
6737 && arm_pic_register > LAST_LO_REGNUM
6738 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6739 {
6740 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6741 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6742 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6743 }
6744 else
6745 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6746 }
6747 }
6748
6749 /* Need to emit this whether or not we obey regdecls,
6750 since setjmp/longjmp can cause life info to screw up. */
6751 emit_use (pic_reg);
6752 }
6753
6754 /* Generate code to load the address of a static var when flag_pic is set. */
6755 static rtx
6756 arm_pic_static_addr (rtx orig, rtx reg)
6757 {
6758 rtx l1, labelno, offset_rtx, insn;
6759
6760 gcc_assert (flag_pic);
6761
6762 /* We use an UNSPEC rather than a LABEL_REF because this label
6763 never appears in the code stream. */
6764 labelno = GEN_INT (pic_labelno++);
6765 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6766 l1 = gen_rtx_CONST (VOIDmode, l1);
6767
6768 /* On the ARM the PC register contains 'dot + 8' at the time of the
6769 addition, on the Thumb it is 'dot + 4'. */
6770 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6771 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6772 UNSPEC_SYMBOL_OFFSET);
6773 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6774
6775 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6776 return insn;
6777 }
6778
6779 /* Return nonzero if X is valid as an ARM state addressing register. */
6780 static int
6781 arm_address_register_rtx_p (rtx x, int strict_p)
6782 {
6783 int regno;
6784
6785 if (!REG_P (x))
6786 return 0;
6787
6788 regno = REGNO (x);
6789
6790 if (strict_p)
6791 return ARM_REGNO_OK_FOR_BASE_P (regno);
6792
6793 return (regno <= LAST_ARM_REGNUM
6794 || regno >= FIRST_PSEUDO_REGISTER
6795 || regno == FRAME_POINTER_REGNUM
6796 || regno == ARG_POINTER_REGNUM);
6797 }
6798
6799 /* Return TRUE if this rtx is the difference of a symbol and a label,
6800 and will reduce to a PC-relative relocation in the object file.
6801 Expressions like this can be left alone when generating PIC, rather
6802 than forced through the GOT. */
6803 static int
6804 pcrel_constant_p (rtx x)
6805 {
6806 if (GET_CODE (x) == MINUS)
6807 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6808
6809 return FALSE;
6810 }
6811
6812 /* Return true if X will surely end up in an index register after next
6813 splitting pass. */
6814 static bool
6815 will_be_in_index_register (const_rtx x)
6816 {
6817 /* arm.md: calculate_pic_address will split this into a register. */
6818 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6819 }
6820
6821 /* Return nonzero if X is a valid ARM state address operand. */
6822 int
6823 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6824 int strict_p)
6825 {
6826 bool use_ldrd;
6827 enum rtx_code code = GET_CODE (x);
6828
6829 if (arm_address_register_rtx_p (x, strict_p))
6830 return 1;
6831
6832 use_ldrd = (TARGET_LDRD
6833 && (mode == DImode
6834 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6835
6836 if (code == POST_INC || code == PRE_DEC
6837 || ((code == PRE_INC || code == POST_DEC)
6838 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6839 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6840
6841 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6842 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6843 && GET_CODE (XEXP (x, 1)) == PLUS
6844 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6845 {
6846 rtx addend = XEXP (XEXP (x, 1), 1);
6847
6848 /* Don't allow ldrd post increment by register because it's hard
6849 to fixup invalid register choices. */
6850 if (use_ldrd
6851 && GET_CODE (x) == POST_MODIFY
6852 && REG_P (addend))
6853 return 0;
6854
6855 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6856 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6857 }
6858
6859 /* After reload constants split into minipools will have addresses
6860 from a LABEL_REF. */
6861 else if (reload_completed
6862 && (code == LABEL_REF
6863 || (code == CONST
6864 && GET_CODE (XEXP (x, 0)) == PLUS
6865 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6866 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6867 return 1;
6868
6869 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6870 return 0;
6871
6872 else if (code == PLUS)
6873 {
6874 rtx xop0 = XEXP (x, 0);
6875 rtx xop1 = XEXP (x, 1);
6876
6877 return ((arm_address_register_rtx_p (xop0, strict_p)
6878 && ((CONST_INT_P (xop1)
6879 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6880 || (!strict_p && will_be_in_index_register (xop1))))
6881 || (arm_address_register_rtx_p (xop1, strict_p)
6882 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6883 }
6884
6885 #if 0
6886 /* Reload currently can't handle MINUS, so disable this for now */
6887 else if (GET_CODE (x) == MINUS)
6888 {
6889 rtx xop0 = XEXP (x, 0);
6890 rtx xop1 = XEXP (x, 1);
6891
6892 return (arm_address_register_rtx_p (xop0, strict_p)
6893 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6894 }
6895 #endif
6896
6897 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6898 && code == SYMBOL_REF
6899 && CONSTANT_POOL_ADDRESS_P (x)
6900 && ! (flag_pic
6901 && symbol_mentioned_p (get_pool_constant (x))
6902 && ! pcrel_constant_p (get_pool_constant (x))))
6903 return 1;
6904
6905 return 0;
6906 }
6907
6908 /* Return nonzero if X is a valid Thumb-2 address operand. */
6909 static int
6910 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
6911 {
6912 bool use_ldrd;
6913 enum rtx_code code = GET_CODE (x);
6914
6915 if (arm_address_register_rtx_p (x, strict_p))
6916 return 1;
6917
6918 use_ldrd = (TARGET_LDRD
6919 && (mode == DImode
6920 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6921
6922 if (code == POST_INC || code == PRE_DEC
6923 || ((code == PRE_INC || code == POST_DEC)
6924 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6925 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6926
6927 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6928 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6929 && GET_CODE (XEXP (x, 1)) == PLUS
6930 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6931 {
6932 /* Thumb-2 only has autoincrement by constant. */
6933 rtx addend = XEXP (XEXP (x, 1), 1);
6934 HOST_WIDE_INT offset;
6935
6936 if (!CONST_INT_P (addend))
6937 return 0;
6938
6939 offset = INTVAL(addend);
6940 if (GET_MODE_SIZE (mode) <= 4)
6941 return (offset > -256 && offset < 256);
6942
6943 return (use_ldrd && offset > -1024 && offset < 1024
6944 && (offset & 3) == 0);
6945 }
6946
6947 /* After reload constants split into minipools will have addresses
6948 from a LABEL_REF. */
6949 else if (reload_completed
6950 && (code == LABEL_REF
6951 || (code == CONST
6952 && GET_CODE (XEXP (x, 0)) == PLUS
6953 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6954 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6955 return 1;
6956
6957 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6958 return 0;
6959
6960 else if (code == PLUS)
6961 {
6962 rtx xop0 = XEXP (x, 0);
6963 rtx xop1 = XEXP (x, 1);
6964
6965 return ((arm_address_register_rtx_p (xop0, strict_p)
6966 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6967 || (!strict_p && will_be_in_index_register (xop1))))
6968 || (arm_address_register_rtx_p (xop1, strict_p)
6969 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6970 }
6971
6972 /* Normally we can assign constant values to target registers without
6973 the help of constant pool. But there are cases we have to use constant
6974 pool like:
6975 1) assign a label to register.
6976 2) sign-extend a 8bit value to 32bit and then assign to register.
6977
6978 Constant pool access in format:
6979 (set (reg r0) (mem (symbol_ref (".LC0"))))
6980 will cause the use of literal pool (later in function arm_reorg).
6981 So here we mark such format as an invalid format, then the compiler
6982 will adjust it into:
6983 (set (reg r0) (symbol_ref (".LC0")))
6984 (set (reg r0) (mem (reg r0))).
6985 No extra register is required, and (mem (reg r0)) won't cause the use
6986 of literal pools. */
6987 else if (arm_disable_literal_pool && code == SYMBOL_REF
6988 && CONSTANT_POOL_ADDRESS_P (x))
6989 return 0;
6990
6991 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6992 && code == SYMBOL_REF
6993 && CONSTANT_POOL_ADDRESS_P (x)
6994 && ! (flag_pic
6995 && symbol_mentioned_p (get_pool_constant (x))
6996 && ! pcrel_constant_p (get_pool_constant (x))))
6997 return 1;
6998
6999 return 0;
7000 }
7001
7002 /* Return nonzero if INDEX is valid for an address index operand in
7003 ARM state. */
7004 static int
7005 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7006 int strict_p)
7007 {
7008 HOST_WIDE_INT range;
7009 enum rtx_code code = GET_CODE (index);
7010
7011 /* Standard coprocessor addressing modes. */
7012 if (TARGET_HARD_FLOAT
7013 && TARGET_VFP
7014 && (mode == SFmode || mode == DFmode))
7015 return (code == CONST_INT && INTVAL (index) < 1024
7016 && INTVAL (index) > -1024
7017 && (INTVAL (index) & 3) == 0);
7018
7019 /* For quad modes, we restrict the constant offset to be slightly less
7020 than what the instruction format permits. We do this because for
7021 quad mode moves, we will actually decompose them into two separate
7022 double-mode reads or writes. INDEX must therefore be a valid
7023 (double-mode) offset and so should INDEX+8. */
7024 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7025 return (code == CONST_INT
7026 && INTVAL (index) < 1016
7027 && INTVAL (index) > -1024
7028 && (INTVAL (index) & 3) == 0);
7029
7030 /* We have no such constraint on double mode offsets, so we permit the
7031 full range of the instruction format. */
7032 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7033 return (code == CONST_INT
7034 && INTVAL (index) < 1024
7035 && INTVAL (index) > -1024
7036 && (INTVAL (index) & 3) == 0);
7037
7038 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7039 return (code == CONST_INT
7040 && INTVAL (index) < 1024
7041 && INTVAL (index) > -1024
7042 && (INTVAL (index) & 3) == 0);
7043
7044 if (arm_address_register_rtx_p (index, strict_p)
7045 && (GET_MODE_SIZE (mode) <= 4))
7046 return 1;
7047
7048 if (mode == DImode || mode == DFmode)
7049 {
7050 if (code == CONST_INT)
7051 {
7052 HOST_WIDE_INT val = INTVAL (index);
7053
7054 if (TARGET_LDRD)
7055 return val > -256 && val < 256;
7056 else
7057 return val > -4096 && val < 4092;
7058 }
7059
7060 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7061 }
7062
7063 if (GET_MODE_SIZE (mode) <= 4
7064 && ! (arm_arch4
7065 && (mode == HImode
7066 || mode == HFmode
7067 || (mode == QImode && outer == SIGN_EXTEND))))
7068 {
7069 if (code == MULT)
7070 {
7071 rtx xiop0 = XEXP (index, 0);
7072 rtx xiop1 = XEXP (index, 1);
7073
7074 return ((arm_address_register_rtx_p (xiop0, strict_p)
7075 && power_of_two_operand (xiop1, SImode))
7076 || (arm_address_register_rtx_p (xiop1, strict_p)
7077 && power_of_two_operand (xiop0, SImode)));
7078 }
7079 else if (code == LSHIFTRT || code == ASHIFTRT
7080 || code == ASHIFT || code == ROTATERT)
7081 {
7082 rtx op = XEXP (index, 1);
7083
7084 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7085 && CONST_INT_P (op)
7086 && INTVAL (op) > 0
7087 && INTVAL (op) <= 31);
7088 }
7089 }
7090
7091 /* For ARM v4 we may be doing a sign-extend operation during the
7092 load. */
7093 if (arm_arch4)
7094 {
7095 if (mode == HImode
7096 || mode == HFmode
7097 || (outer == SIGN_EXTEND && mode == QImode))
7098 range = 256;
7099 else
7100 range = 4096;
7101 }
7102 else
7103 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7104
7105 return (code == CONST_INT
7106 && INTVAL (index) < range
7107 && INTVAL (index) > -range);
7108 }
7109
7110 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7111 index operand. i.e. 1, 2, 4 or 8. */
7112 static bool
7113 thumb2_index_mul_operand (rtx op)
7114 {
7115 HOST_WIDE_INT val;
7116
7117 if (!CONST_INT_P (op))
7118 return false;
7119
7120 val = INTVAL(op);
7121 return (val == 1 || val == 2 || val == 4 || val == 8);
7122 }
7123
7124 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7125 static int
7126 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7127 {
7128 enum rtx_code code = GET_CODE (index);
7129
7130 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7131 /* Standard coprocessor addressing modes. */
7132 if (TARGET_HARD_FLOAT
7133 && TARGET_VFP
7134 && (mode == SFmode || mode == DFmode))
7135 return (code == CONST_INT && INTVAL (index) < 1024
7136 /* Thumb-2 allows only > -256 index range for it's core register
7137 load/stores. Since we allow SF/DF in core registers, we have
7138 to use the intersection between -256~4096 (core) and -1024~1024
7139 (coprocessor). */
7140 && INTVAL (index) > -256
7141 && (INTVAL (index) & 3) == 0);
7142
7143 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7144 {
7145 /* For DImode assume values will usually live in core regs
7146 and only allow LDRD addressing modes. */
7147 if (!TARGET_LDRD || mode != DImode)
7148 return (code == CONST_INT
7149 && INTVAL (index) < 1024
7150 && INTVAL (index) > -1024
7151 && (INTVAL (index) & 3) == 0);
7152 }
7153
7154 /* For quad modes, we restrict the constant offset to be slightly less
7155 than what the instruction format permits. We do this because for
7156 quad mode moves, we will actually decompose them into two separate
7157 double-mode reads or writes. INDEX must therefore be a valid
7158 (double-mode) offset and so should INDEX+8. */
7159 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7160 return (code == CONST_INT
7161 && INTVAL (index) < 1016
7162 && INTVAL (index) > -1024
7163 && (INTVAL (index) & 3) == 0);
7164
7165 /* We have no such constraint on double mode offsets, so we permit the
7166 full range of the instruction format. */
7167 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7168 return (code == CONST_INT
7169 && INTVAL (index) < 1024
7170 && INTVAL (index) > -1024
7171 && (INTVAL (index) & 3) == 0);
7172
7173 if (arm_address_register_rtx_p (index, strict_p)
7174 && (GET_MODE_SIZE (mode) <= 4))
7175 return 1;
7176
7177 if (mode == DImode || mode == DFmode)
7178 {
7179 if (code == CONST_INT)
7180 {
7181 HOST_WIDE_INT val = INTVAL (index);
7182 /* ??? Can we assume ldrd for thumb2? */
7183 /* Thumb-2 ldrd only has reg+const addressing modes. */
7184 /* ldrd supports offsets of +-1020.
7185 However the ldr fallback does not. */
7186 return val > -256 && val < 256 && (val & 3) == 0;
7187 }
7188 else
7189 return 0;
7190 }
7191
7192 if (code == MULT)
7193 {
7194 rtx xiop0 = XEXP (index, 0);
7195 rtx xiop1 = XEXP (index, 1);
7196
7197 return ((arm_address_register_rtx_p (xiop0, strict_p)
7198 && thumb2_index_mul_operand (xiop1))
7199 || (arm_address_register_rtx_p (xiop1, strict_p)
7200 && thumb2_index_mul_operand (xiop0)));
7201 }
7202 else if (code == ASHIFT)
7203 {
7204 rtx op = XEXP (index, 1);
7205
7206 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7207 && CONST_INT_P (op)
7208 && INTVAL (op) > 0
7209 && INTVAL (op) <= 3);
7210 }
7211
7212 return (code == CONST_INT
7213 && INTVAL (index) < 4096
7214 && INTVAL (index) > -256);
7215 }
7216
7217 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7218 static int
7219 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7220 {
7221 int regno;
7222
7223 if (!REG_P (x))
7224 return 0;
7225
7226 regno = REGNO (x);
7227
7228 if (strict_p)
7229 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7230
7231 return (regno <= LAST_LO_REGNUM
7232 || regno > LAST_VIRTUAL_REGISTER
7233 || regno == FRAME_POINTER_REGNUM
7234 || (GET_MODE_SIZE (mode) >= 4
7235 && (regno == STACK_POINTER_REGNUM
7236 || regno >= FIRST_PSEUDO_REGISTER
7237 || x == hard_frame_pointer_rtx
7238 || x == arg_pointer_rtx)));
7239 }
7240
7241 /* Return nonzero if x is a legitimate index register. This is the case
7242 for any base register that can access a QImode object. */
7243 inline static int
7244 thumb1_index_register_rtx_p (rtx x, int strict_p)
7245 {
7246 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7247 }
7248
7249 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7250
7251 The AP may be eliminated to either the SP or the FP, so we use the
7252 least common denominator, e.g. SImode, and offsets from 0 to 64.
7253
7254 ??? Verify whether the above is the right approach.
7255
7256 ??? Also, the FP may be eliminated to the SP, so perhaps that
7257 needs special handling also.
7258
7259 ??? Look at how the mips16 port solves this problem. It probably uses
7260 better ways to solve some of these problems.
7261
7262 Although it is not incorrect, we don't accept QImode and HImode
7263 addresses based on the frame pointer or arg pointer until the
7264 reload pass starts. This is so that eliminating such addresses
7265 into stack based ones won't produce impossible code. */
7266 int
7267 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7268 {
7269 /* ??? Not clear if this is right. Experiment. */
7270 if (GET_MODE_SIZE (mode) < 4
7271 && !(reload_in_progress || reload_completed)
7272 && (reg_mentioned_p (frame_pointer_rtx, x)
7273 || reg_mentioned_p (arg_pointer_rtx, x)
7274 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7275 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7276 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7277 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7278 return 0;
7279
7280 /* Accept any base register. SP only in SImode or larger. */
7281 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7282 return 1;
7283
7284 /* This is PC relative data before arm_reorg runs. */
7285 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7286 && GET_CODE (x) == SYMBOL_REF
7287 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7288 return 1;
7289
7290 /* This is PC relative data after arm_reorg runs. */
7291 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7292 && reload_completed
7293 && (GET_CODE (x) == LABEL_REF
7294 || (GET_CODE (x) == CONST
7295 && GET_CODE (XEXP (x, 0)) == PLUS
7296 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7297 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7298 return 1;
7299
7300 /* Post-inc indexing only supported for SImode and larger. */
7301 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7302 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7303 return 1;
7304
7305 else if (GET_CODE (x) == PLUS)
7306 {
7307 /* REG+REG address can be any two index registers. */
7308 /* We disallow FRAME+REG addressing since we know that FRAME
7309 will be replaced with STACK, and SP relative addressing only
7310 permits SP+OFFSET. */
7311 if (GET_MODE_SIZE (mode) <= 4
7312 && XEXP (x, 0) != frame_pointer_rtx
7313 && XEXP (x, 1) != frame_pointer_rtx
7314 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7315 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7316 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7317 return 1;
7318
7319 /* REG+const has 5-7 bit offset for non-SP registers. */
7320 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7321 || XEXP (x, 0) == arg_pointer_rtx)
7322 && CONST_INT_P (XEXP (x, 1))
7323 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7324 return 1;
7325
7326 /* REG+const has 10-bit offset for SP, but only SImode and
7327 larger is supported. */
7328 /* ??? Should probably check for DI/DFmode overflow here
7329 just like GO_IF_LEGITIMATE_OFFSET does. */
7330 else if (REG_P (XEXP (x, 0))
7331 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7332 && GET_MODE_SIZE (mode) >= 4
7333 && CONST_INT_P (XEXP (x, 1))
7334 && INTVAL (XEXP (x, 1)) >= 0
7335 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7336 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7337 return 1;
7338
7339 else if (REG_P (XEXP (x, 0))
7340 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7341 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7342 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7343 && REGNO (XEXP (x, 0))
7344 <= LAST_VIRTUAL_POINTER_REGISTER))
7345 && GET_MODE_SIZE (mode) >= 4
7346 && CONST_INT_P (XEXP (x, 1))
7347 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7348 return 1;
7349 }
7350
7351 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7352 && GET_MODE_SIZE (mode) == 4
7353 && GET_CODE (x) == SYMBOL_REF
7354 && CONSTANT_POOL_ADDRESS_P (x)
7355 && ! (flag_pic
7356 && symbol_mentioned_p (get_pool_constant (x))
7357 && ! pcrel_constant_p (get_pool_constant (x))))
7358 return 1;
7359
7360 return 0;
7361 }
7362
7363 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7364 instruction of mode MODE. */
7365 int
7366 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7367 {
7368 switch (GET_MODE_SIZE (mode))
7369 {
7370 case 1:
7371 return val >= 0 && val < 32;
7372
7373 case 2:
7374 return val >= 0 && val < 64 && (val & 1) == 0;
7375
7376 default:
7377 return (val >= 0
7378 && (val + GET_MODE_SIZE (mode)) <= 128
7379 && (val & 3) == 0);
7380 }
7381 }
7382
7383 bool
7384 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7385 {
7386 if (TARGET_ARM)
7387 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7388 else if (TARGET_THUMB2)
7389 return thumb2_legitimate_address_p (mode, x, strict_p);
7390 else /* if (TARGET_THUMB1) */
7391 return thumb1_legitimate_address_p (mode, x, strict_p);
7392 }
7393
7394 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7395
7396 Given an rtx X being reloaded into a reg required to be
7397 in class CLASS, return the class of reg to actually use.
7398 In general this is just CLASS, but for the Thumb core registers and
7399 immediate constants we prefer a LO_REGS class or a subset. */
7400
7401 static reg_class_t
7402 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7403 {
7404 if (TARGET_32BIT)
7405 return rclass;
7406 else
7407 {
7408 if (rclass == GENERAL_REGS)
7409 return LO_REGS;
7410 else
7411 return rclass;
7412 }
7413 }
7414
7415 /* Build the SYMBOL_REF for __tls_get_addr. */
7416
7417 static GTY(()) rtx tls_get_addr_libfunc;
7418
7419 static rtx
7420 get_tls_get_addr (void)
7421 {
7422 if (!tls_get_addr_libfunc)
7423 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7424 return tls_get_addr_libfunc;
7425 }
7426
7427 rtx
7428 arm_load_tp (rtx target)
7429 {
7430 if (!target)
7431 target = gen_reg_rtx (SImode);
7432
7433 if (TARGET_HARD_TP)
7434 {
7435 /* Can return in any reg. */
7436 emit_insn (gen_load_tp_hard (target));
7437 }
7438 else
7439 {
7440 /* Always returned in r0. Immediately copy the result into a pseudo,
7441 otherwise other uses of r0 (e.g. setting up function arguments) may
7442 clobber the value. */
7443
7444 rtx tmp;
7445
7446 emit_insn (gen_load_tp_soft ());
7447
7448 tmp = gen_rtx_REG (SImode, 0);
7449 emit_move_insn (target, tmp);
7450 }
7451 return target;
7452 }
7453
7454 static rtx
7455 load_tls_operand (rtx x, rtx reg)
7456 {
7457 rtx tmp;
7458
7459 if (reg == NULL_RTX)
7460 reg = gen_reg_rtx (SImode);
7461
7462 tmp = gen_rtx_CONST (SImode, x);
7463
7464 emit_move_insn (reg, tmp);
7465
7466 return reg;
7467 }
7468
7469 static rtx
7470 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7471 {
7472 rtx insns, label, labelno, sum;
7473
7474 gcc_assert (reloc != TLS_DESCSEQ);
7475 start_sequence ();
7476
7477 labelno = GEN_INT (pic_labelno++);
7478 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7479 label = gen_rtx_CONST (VOIDmode, label);
7480
7481 sum = gen_rtx_UNSPEC (Pmode,
7482 gen_rtvec (4, x, GEN_INT (reloc), label,
7483 GEN_INT (TARGET_ARM ? 8 : 4)),
7484 UNSPEC_TLS);
7485 reg = load_tls_operand (sum, reg);
7486
7487 if (TARGET_ARM)
7488 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7489 else
7490 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7491
7492 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7493 LCT_PURE, /* LCT_CONST? */
7494 Pmode, 1, reg, Pmode);
7495
7496 insns = get_insns ();
7497 end_sequence ();
7498
7499 return insns;
7500 }
7501
7502 static rtx
7503 arm_tls_descseq_addr (rtx x, rtx reg)
7504 {
7505 rtx labelno = GEN_INT (pic_labelno++);
7506 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7507 rtx sum = gen_rtx_UNSPEC (Pmode,
7508 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7509 gen_rtx_CONST (VOIDmode, label),
7510 GEN_INT (!TARGET_ARM)),
7511 UNSPEC_TLS);
7512 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7513
7514 emit_insn (gen_tlscall (x, labelno));
7515 if (!reg)
7516 reg = gen_reg_rtx (SImode);
7517 else
7518 gcc_assert (REGNO (reg) != 0);
7519
7520 emit_move_insn (reg, reg0);
7521
7522 return reg;
7523 }
7524
7525 rtx
7526 legitimize_tls_address (rtx x, rtx reg)
7527 {
7528 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7529 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7530
7531 switch (model)
7532 {
7533 case TLS_MODEL_GLOBAL_DYNAMIC:
7534 if (TARGET_GNU2_TLS)
7535 {
7536 reg = arm_tls_descseq_addr (x, reg);
7537
7538 tp = arm_load_tp (NULL_RTX);
7539
7540 dest = gen_rtx_PLUS (Pmode, tp, reg);
7541 }
7542 else
7543 {
7544 /* Original scheme */
7545 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7546 dest = gen_reg_rtx (Pmode);
7547 emit_libcall_block (insns, dest, ret, x);
7548 }
7549 return dest;
7550
7551 case TLS_MODEL_LOCAL_DYNAMIC:
7552 if (TARGET_GNU2_TLS)
7553 {
7554 reg = arm_tls_descseq_addr (x, reg);
7555
7556 tp = arm_load_tp (NULL_RTX);
7557
7558 dest = gen_rtx_PLUS (Pmode, tp, reg);
7559 }
7560 else
7561 {
7562 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7563
7564 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7565 share the LDM result with other LD model accesses. */
7566 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7567 UNSPEC_TLS);
7568 dest = gen_reg_rtx (Pmode);
7569 emit_libcall_block (insns, dest, ret, eqv);
7570
7571 /* Load the addend. */
7572 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7573 GEN_INT (TLS_LDO32)),
7574 UNSPEC_TLS);
7575 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7576 dest = gen_rtx_PLUS (Pmode, dest, addend);
7577 }
7578 return dest;
7579
7580 case TLS_MODEL_INITIAL_EXEC:
7581 labelno = GEN_INT (pic_labelno++);
7582 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7583 label = gen_rtx_CONST (VOIDmode, label);
7584 sum = gen_rtx_UNSPEC (Pmode,
7585 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7586 GEN_INT (TARGET_ARM ? 8 : 4)),
7587 UNSPEC_TLS);
7588 reg = load_tls_operand (sum, reg);
7589
7590 if (TARGET_ARM)
7591 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7592 else if (TARGET_THUMB2)
7593 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7594 else
7595 {
7596 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7597 emit_move_insn (reg, gen_const_mem (SImode, reg));
7598 }
7599
7600 tp = arm_load_tp (NULL_RTX);
7601
7602 return gen_rtx_PLUS (Pmode, tp, reg);
7603
7604 case TLS_MODEL_LOCAL_EXEC:
7605 tp = arm_load_tp (NULL_RTX);
7606
7607 reg = gen_rtx_UNSPEC (Pmode,
7608 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7609 UNSPEC_TLS);
7610 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7611
7612 return gen_rtx_PLUS (Pmode, tp, reg);
7613
7614 default:
7615 abort ();
7616 }
7617 }
7618
7619 /* Try machine-dependent ways of modifying an illegitimate address
7620 to be legitimate. If we find one, return the new, valid address. */
7621 rtx
7622 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7623 {
7624 if (arm_tls_referenced_p (x))
7625 {
7626 rtx addend = NULL;
7627
7628 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7629 {
7630 addend = XEXP (XEXP (x, 0), 1);
7631 x = XEXP (XEXP (x, 0), 0);
7632 }
7633
7634 if (GET_CODE (x) != SYMBOL_REF)
7635 return x;
7636
7637 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7638
7639 x = legitimize_tls_address (x, NULL_RTX);
7640
7641 if (addend)
7642 {
7643 x = gen_rtx_PLUS (SImode, x, addend);
7644 orig_x = x;
7645 }
7646 else
7647 return x;
7648 }
7649
7650 if (!TARGET_ARM)
7651 {
7652 /* TODO: legitimize_address for Thumb2. */
7653 if (TARGET_THUMB2)
7654 return x;
7655 return thumb_legitimize_address (x, orig_x, mode);
7656 }
7657
7658 if (GET_CODE (x) == PLUS)
7659 {
7660 rtx xop0 = XEXP (x, 0);
7661 rtx xop1 = XEXP (x, 1);
7662
7663 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7664 xop0 = force_reg (SImode, xop0);
7665
7666 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7667 && !symbol_mentioned_p (xop1))
7668 xop1 = force_reg (SImode, xop1);
7669
7670 if (ARM_BASE_REGISTER_RTX_P (xop0)
7671 && CONST_INT_P (xop1))
7672 {
7673 HOST_WIDE_INT n, low_n;
7674 rtx base_reg, val;
7675 n = INTVAL (xop1);
7676
7677 /* VFP addressing modes actually allow greater offsets, but for
7678 now we just stick with the lowest common denominator. */
7679 if (mode == DImode
7680 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7681 {
7682 low_n = n & 0x0f;
7683 n &= ~0x0f;
7684 if (low_n > 4)
7685 {
7686 n += 16;
7687 low_n -= 16;
7688 }
7689 }
7690 else
7691 {
7692 low_n = ((mode) == TImode ? 0
7693 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7694 n -= low_n;
7695 }
7696
7697 base_reg = gen_reg_rtx (SImode);
7698 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7699 emit_move_insn (base_reg, val);
7700 x = plus_constant (Pmode, base_reg, low_n);
7701 }
7702 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7703 x = gen_rtx_PLUS (SImode, xop0, xop1);
7704 }
7705
7706 /* XXX We don't allow MINUS any more -- see comment in
7707 arm_legitimate_address_outer_p (). */
7708 else if (GET_CODE (x) == MINUS)
7709 {
7710 rtx xop0 = XEXP (x, 0);
7711 rtx xop1 = XEXP (x, 1);
7712
7713 if (CONSTANT_P (xop0))
7714 xop0 = force_reg (SImode, xop0);
7715
7716 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7717 xop1 = force_reg (SImode, xop1);
7718
7719 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7720 x = gen_rtx_MINUS (SImode, xop0, xop1);
7721 }
7722
7723 /* Make sure to take full advantage of the pre-indexed addressing mode
7724 with absolute addresses which often allows for the base register to
7725 be factorized for multiple adjacent memory references, and it might
7726 even allows for the mini pool to be avoided entirely. */
7727 else if (CONST_INT_P (x) && optimize > 0)
7728 {
7729 unsigned int bits;
7730 HOST_WIDE_INT mask, base, index;
7731 rtx base_reg;
7732
7733 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7734 use a 8-bit index. So let's use a 12-bit index for SImode only and
7735 hope that arm_gen_constant will enable ldrb to use more bits. */
7736 bits = (mode == SImode) ? 12 : 8;
7737 mask = (1 << bits) - 1;
7738 base = INTVAL (x) & ~mask;
7739 index = INTVAL (x) & mask;
7740 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7741 {
7742 /* It'll most probably be more efficient to generate the base
7743 with more bits set and use a negative index instead. */
7744 base |= mask;
7745 index -= mask;
7746 }
7747 base_reg = force_reg (SImode, GEN_INT (base));
7748 x = plus_constant (Pmode, base_reg, index);
7749 }
7750
7751 if (flag_pic)
7752 {
7753 /* We need to find and carefully transform any SYMBOL and LABEL
7754 references; so go back to the original address expression. */
7755 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7756
7757 if (new_x != orig_x)
7758 x = new_x;
7759 }
7760
7761 return x;
7762 }
7763
7764
7765 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7766 to be legitimate. If we find one, return the new, valid address. */
7767 rtx
7768 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7769 {
7770 if (GET_CODE (x) == PLUS
7771 && CONST_INT_P (XEXP (x, 1))
7772 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7773 || INTVAL (XEXP (x, 1)) < 0))
7774 {
7775 rtx xop0 = XEXP (x, 0);
7776 rtx xop1 = XEXP (x, 1);
7777 HOST_WIDE_INT offset = INTVAL (xop1);
7778
7779 /* Try and fold the offset into a biasing of the base register and
7780 then offsetting that. Don't do this when optimizing for space
7781 since it can cause too many CSEs. */
7782 if (optimize_size && offset >= 0
7783 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7784 {
7785 HOST_WIDE_INT delta;
7786
7787 if (offset >= 256)
7788 delta = offset - (256 - GET_MODE_SIZE (mode));
7789 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7790 delta = 31 * GET_MODE_SIZE (mode);
7791 else
7792 delta = offset & (~31 * GET_MODE_SIZE (mode));
7793
7794 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7795 NULL_RTX);
7796 x = plus_constant (Pmode, xop0, delta);
7797 }
7798 else if (offset < 0 && offset > -256)
7799 /* Small negative offsets are best done with a subtract before the
7800 dereference, forcing these into a register normally takes two
7801 instructions. */
7802 x = force_operand (x, NULL_RTX);
7803 else
7804 {
7805 /* For the remaining cases, force the constant into a register. */
7806 xop1 = force_reg (SImode, xop1);
7807 x = gen_rtx_PLUS (SImode, xop0, xop1);
7808 }
7809 }
7810 else if (GET_CODE (x) == PLUS
7811 && s_register_operand (XEXP (x, 1), SImode)
7812 && !s_register_operand (XEXP (x, 0), SImode))
7813 {
7814 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7815
7816 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7817 }
7818
7819 if (flag_pic)
7820 {
7821 /* We need to find and carefully transform any SYMBOL and LABEL
7822 references; so go back to the original address expression. */
7823 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7824
7825 if (new_x != orig_x)
7826 x = new_x;
7827 }
7828
7829 return x;
7830 }
7831
7832 bool
7833 arm_legitimize_reload_address (rtx *p,
7834 machine_mode mode,
7835 int opnum, int type,
7836 int ind_levels ATTRIBUTE_UNUSED)
7837 {
7838 /* We must recognize output that we have already generated ourselves. */
7839 if (GET_CODE (*p) == PLUS
7840 && GET_CODE (XEXP (*p, 0)) == PLUS
7841 && REG_P (XEXP (XEXP (*p, 0), 0))
7842 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7843 && CONST_INT_P (XEXP (*p, 1)))
7844 {
7845 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7846 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7847 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7848 return true;
7849 }
7850
7851 if (GET_CODE (*p) == PLUS
7852 && REG_P (XEXP (*p, 0))
7853 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7854 /* If the base register is equivalent to a constant, let the generic
7855 code handle it. Otherwise we will run into problems if a future
7856 reload pass decides to rematerialize the constant. */
7857 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7858 && CONST_INT_P (XEXP (*p, 1)))
7859 {
7860 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7861 HOST_WIDE_INT low, high;
7862
7863 /* Detect coprocessor load/stores. */
7864 bool coproc_p = ((TARGET_HARD_FLOAT
7865 && TARGET_VFP
7866 && (mode == SFmode || mode == DFmode))
7867 || (TARGET_REALLY_IWMMXT
7868 && VALID_IWMMXT_REG_MODE (mode))
7869 || (TARGET_NEON
7870 && (VALID_NEON_DREG_MODE (mode)
7871 || VALID_NEON_QREG_MODE (mode))));
7872
7873 /* For some conditions, bail out when lower two bits are unaligned. */
7874 if ((val & 0x3) != 0
7875 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7876 && (coproc_p
7877 /* For DI, and DF under soft-float: */
7878 || ((mode == DImode || mode == DFmode)
7879 /* Without ldrd, we use stm/ldm, which does not
7880 fair well with unaligned bits. */
7881 && (! TARGET_LDRD
7882 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7883 || TARGET_THUMB2))))
7884 return false;
7885
7886 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7887 of which the (reg+high) gets turned into a reload add insn,
7888 we try to decompose the index into high/low values that can often
7889 also lead to better reload CSE.
7890 For example:
7891 ldr r0, [r2, #4100] // Offset too large
7892 ldr r1, [r2, #4104] // Offset too large
7893
7894 is best reloaded as:
7895 add t1, r2, #4096
7896 ldr r0, [t1, #4]
7897 add t2, r2, #4096
7898 ldr r1, [t2, #8]
7899
7900 which post-reload CSE can simplify in most cases to eliminate the
7901 second add instruction:
7902 add t1, r2, #4096
7903 ldr r0, [t1, #4]
7904 ldr r1, [t1, #8]
7905
7906 The idea here is that we want to split out the bits of the constant
7907 as a mask, rather than as subtracting the maximum offset that the
7908 respective type of load/store used can handle.
7909
7910 When encountering negative offsets, we can still utilize it even if
7911 the overall offset is positive; sometimes this may lead to an immediate
7912 that can be constructed with fewer instructions.
7913 For example:
7914 ldr r0, [r2, #0x3FFFFC]
7915
7916 This is best reloaded as:
7917 add t1, r2, #0x400000
7918 ldr r0, [t1, #-4]
7919
7920 The trick for spotting this for a load insn with N bits of offset
7921 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7922 negative offset that is going to make bit N and all the bits below
7923 it become zero in the remainder part.
7924
7925 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7926 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7927 used in most cases of ARM load/store instructions. */
7928
7929 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7930 (((VAL) & ((1 << (N)) - 1)) \
7931 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7932 : 0)
7933
7934 if (coproc_p)
7935 {
7936 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7937
7938 /* NEON quad-word load/stores are made of two double-word accesses,
7939 so the valid index range is reduced by 8. Treat as 9-bit range if
7940 we go over it. */
7941 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7942 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7943 }
7944 else if (GET_MODE_SIZE (mode) == 8)
7945 {
7946 if (TARGET_LDRD)
7947 low = (TARGET_THUMB2
7948 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7949 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7950 else
7951 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7952 to access doublewords. The supported load/store offsets are
7953 -8, -4, and 4, which we try to produce here. */
7954 low = ((val & 0xf) ^ 0x8) - 0x8;
7955 }
7956 else if (GET_MODE_SIZE (mode) < 8)
7957 {
7958 /* NEON element load/stores do not have an offset. */
7959 if (TARGET_NEON_FP16 && mode == HFmode)
7960 return false;
7961
7962 if (TARGET_THUMB2)
7963 {
7964 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7965 Try the wider 12-bit range first, and re-try if the result
7966 is out of range. */
7967 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7968 if (low < -255)
7969 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7970 }
7971 else
7972 {
7973 if (mode == HImode || mode == HFmode)
7974 {
7975 if (arm_arch4)
7976 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7977 else
7978 {
7979 /* The storehi/movhi_bytes fallbacks can use only
7980 [-4094,+4094] of the full ldrb/strb index range. */
7981 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7982 if (low == 4095 || low == -4095)
7983 return false;
7984 }
7985 }
7986 else
7987 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7988 }
7989 }
7990 else
7991 return false;
7992
7993 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7994 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7995 - (unsigned HOST_WIDE_INT) 0x80000000);
7996 /* Check for overflow or zero */
7997 if (low == 0 || high == 0 || (high + low != val))
7998 return false;
7999
8000 /* Reload the high part into a base reg; leave the low part
8001 in the mem.
8002 Note that replacing this gen_rtx_PLUS with plus_constant is
8003 wrong in this case because we rely on the
8004 (plus (plus reg c1) c2) structure being preserved so that
8005 XEXP (*p, 0) in push_reload below uses the correct term. */
8006 *p = gen_rtx_PLUS (GET_MODE (*p),
8007 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8008 GEN_INT (high)),
8009 GEN_INT (low));
8010 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8011 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8012 VOIDmode, 0, 0, opnum, (enum reload_type) type);
8013 return true;
8014 }
8015
8016 return false;
8017 }
8018
8019 rtx
8020 thumb_legitimize_reload_address (rtx *x_p,
8021 machine_mode mode,
8022 int opnum, int type,
8023 int ind_levels ATTRIBUTE_UNUSED)
8024 {
8025 rtx x = *x_p;
8026
8027 if (GET_CODE (x) == PLUS
8028 && GET_MODE_SIZE (mode) < 4
8029 && REG_P (XEXP (x, 0))
8030 && XEXP (x, 0) == stack_pointer_rtx
8031 && CONST_INT_P (XEXP (x, 1))
8032 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8033 {
8034 rtx orig_x = x;
8035
8036 x = copy_rtx (x);
8037 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8038 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8039 return x;
8040 }
8041
8042 /* If both registers are hi-regs, then it's better to reload the
8043 entire expression rather than each register individually. That
8044 only requires one reload register rather than two. */
8045 if (GET_CODE (x) == PLUS
8046 && REG_P (XEXP (x, 0))
8047 && REG_P (XEXP (x, 1))
8048 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8049 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8050 {
8051 rtx orig_x = x;
8052
8053 x = copy_rtx (x);
8054 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8055 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8056 return x;
8057 }
8058
8059 return NULL;
8060 }
8061
8062 /* Return TRUE if X contains any TLS symbol references. */
8063
8064 bool
8065 arm_tls_referenced_p (rtx x)
8066 {
8067 if (! TARGET_HAVE_TLS)
8068 return false;
8069
8070 subrtx_iterator::array_type array;
8071 FOR_EACH_SUBRTX (iter, array, x, ALL)
8072 {
8073 const_rtx x = *iter;
8074 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8075 return true;
8076
8077 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8078 TLS offsets, not real symbol references. */
8079 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8080 iter.skip_subrtxes ();
8081 }
8082 return false;
8083 }
8084
8085 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8086
8087 On the ARM, allow any integer (invalid ones are removed later by insn
8088 patterns), nice doubles and symbol_refs which refer to the function's
8089 constant pool XXX.
8090
8091 When generating pic allow anything. */
8092
8093 static bool
8094 arm_legitimate_constant_p_1 (machine_mode mode, rtx x)
8095 {
8096 /* At present, we have no support for Neon structure constants, so forbid
8097 them here. It might be possible to handle simple cases like 0 and -1
8098 in future. */
8099 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8100 return false;
8101
8102 return flag_pic || !label_mentioned_p (x);
8103 }
8104
8105 static bool
8106 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8107 {
8108 return (CONST_INT_P (x)
8109 || CONST_DOUBLE_P (x)
8110 || CONSTANT_ADDRESS_P (x)
8111 || flag_pic);
8112 }
8113
8114 static bool
8115 arm_legitimate_constant_p (machine_mode mode, rtx x)
8116 {
8117 return (!arm_cannot_force_const_mem (mode, x)
8118 && (TARGET_32BIT
8119 ? arm_legitimate_constant_p_1 (mode, x)
8120 : thumb_legitimate_constant_p (mode, x)));
8121 }
8122
8123 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8124
8125 static bool
8126 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8127 {
8128 rtx base, offset;
8129
8130 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8131 {
8132 split_const (x, &base, &offset);
8133 if (GET_CODE (base) == SYMBOL_REF
8134 && !offset_within_block_p (base, INTVAL (offset)))
8135 return true;
8136 }
8137 return arm_tls_referenced_p (x);
8138 }
8139 \f
8140 #define REG_OR_SUBREG_REG(X) \
8141 (REG_P (X) \
8142 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8143
8144 #define REG_OR_SUBREG_RTX(X) \
8145 (REG_P (X) ? (X) : SUBREG_REG (X))
8146
8147 static inline int
8148 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8149 {
8150 machine_mode mode = GET_MODE (x);
8151 int total, words;
8152
8153 switch (code)
8154 {
8155 case ASHIFT:
8156 case ASHIFTRT:
8157 case LSHIFTRT:
8158 case ROTATERT:
8159 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8160
8161 case PLUS:
8162 case MINUS:
8163 case COMPARE:
8164 case NEG:
8165 case NOT:
8166 return COSTS_N_INSNS (1);
8167
8168 case MULT:
8169 if (CONST_INT_P (XEXP (x, 1)))
8170 {
8171 int cycles = 0;
8172 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8173
8174 while (i)
8175 {
8176 i >>= 2;
8177 cycles++;
8178 }
8179 return COSTS_N_INSNS (2) + cycles;
8180 }
8181 return COSTS_N_INSNS (1) + 16;
8182
8183 case SET:
8184 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8185 the mode. */
8186 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8187 return (COSTS_N_INSNS (words)
8188 + 4 * ((MEM_P (SET_SRC (x)))
8189 + MEM_P (SET_DEST (x))));
8190
8191 case CONST_INT:
8192 if (outer == SET)
8193 {
8194 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8195 return 0;
8196 if (thumb_shiftable_const (INTVAL (x)))
8197 return COSTS_N_INSNS (2);
8198 return COSTS_N_INSNS (3);
8199 }
8200 else if ((outer == PLUS || outer == COMPARE)
8201 && INTVAL (x) < 256 && INTVAL (x) > -256)
8202 return 0;
8203 else if ((outer == IOR || outer == XOR || outer == AND)
8204 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8205 return COSTS_N_INSNS (1);
8206 else if (outer == AND)
8207 {
8208 int i;
8209 /* This duplicates the tests in the andsi3 expander. */
8210 for (i = 9; i <= 31; i++)
8211 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8212 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8213 return COSTS_N_INSNS (2);
8214 }
8215 else if (outer == ASHIFT || outer == ASHIFTRT
8216 || outer == LSHIFTRT)
8217 return 0;
8218 return COSTS_N_INSNS (2);
8219
8220 case CONST:
8221 case CONST_DOUBLE:
8222 case LABEL_REF:
8223 case SYMBOL_REF:
8224 return COSTS_N_INSNS (3);
8225
8226 case UDIV:
8227 case UMOD:
8228 case DIV:
8229 case MOD:
8230 return 100;
8231
8232 case TRUNCATE:
8233 return 99;
8234
8235 case AND:
8236 case XOR:
8237 case IOR:
8238 /* XXX guess. */
8239 return 8;
8240
8241 case MEM:
8242 /* XXX another guess. */
8243 /* Memory costs quite a lot for the first word, but subsequent words
8244 load at the equivalent of a single insn each. */
8245 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8246 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8247 ? 4 : 0));
8248
8249 case IF_THEN_ELSE:
8250 /* XXX a guess. */
8251 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8252 return 14;
8253 return 2;
8254
8255 case SIGN_EXTEND:
8256 case ZERO_EXTEND:
8257 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8258 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8259
8260 if (mode == SImode)
8261 return total;
8262
8263 if (arm_arch6)
8264 return total + COSTS_N_INSNS (1);
8265
8266 /* Assume a two-shift sequence. Increase the cost slightly so
8267 we prefer actual shifts over an extend operation. */
8268 return total + 1 + COSTS_N_INSNS (2);
8269
8270 default:
8271 return 99;
8272 }
8273 }
8274
8275 static inline bool
8276 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8277 {
8278 machine_mode mode = GET_MODE (x);
8279 enum rtx_code subcode;
8280 rtx operand;
8281 enum rtx_code code = GET_CODE (x);
8282 *total = 0;
8283
8284 switch (code)
8285 {
8286 case MEM:
8287 /* Memory costs quite a lot for the first word, but subsequent words
8288 load at the equivalent of a single insn each. */
8289 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8290 return true;
8291
8292 case DIV:
8293 case MOD:
8294 case UDIV:
8295 case UMOD:
8296 if (TARGET_HARD_FLOAT && mode == SFmode)
8297 *total = COSTS_N_INSNS (2);
8298 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8299 *total = COSTS_N_INSNS (4);
8300 else
8301 *total = COSTS_N_INSNS (20);
8302 return false;
8303
8304 case ROTATE:
8305 if (REG_P (XEXP (x, 1)))
8306 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8307 else if (!CONST_INT_P (XEXP (x, 1)))
8308 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8309
8310 /* Fall through */
8311 case ROTATERT:
8312 if (mode != SImode)
8313 {
8314 *total += COSTS_N_INSNS (4);
8315 return true;
8316 }
8317
8318 /* Fall through */
8319 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8320 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8321 if (mode == DImode)
8322 {
8323 *total += COSTS_N_INSNS (3);
8324 return true;
8325 }
8326
8327 *total += COSTS_N_INSNS (1);
8328 /* Increase the cost of complex shifts because they aren't any faster,
8329 and reduce dual issue opportunities. */
8330 if (arm_tune_cortex_a9
8331 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8332 ++*total;
8333
8334 return true;
8335
8336 case MINUS:
8337 if (mode == DImode)
8338 {
8339 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8340 if (CONST_INT_P (XEXP (x, 0))
8341 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8342 {
8343 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8344 return true;
8345 }
8346
8347 if (CONST_INT_P (XEXP (x, 1))
8348 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8349 {
8350 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8351 return true;
8352 }
8353
8354 return false;
8355 }
8356
8357 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8358 {
8359 if (TARGET_HARD_FLOAT
8360 && (mode == SFmode
8361 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8362 {
8363 *total = COSTS_N_INSNS (1);
8364 if (CONST_DOUBLE_P (XEXP (x, 0))
8365 && arm_const_double_rtx (XEXP (x, 0)))
8366 {
8367 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8368 return true;
8369 }
8370
8371 if (CONST_DOUBLE_P (XEXP (x, 1))
8372 && arm_const_double_rtx (XEXP (x, 1)))
8373 {
8374 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8375 return true;
8376 }
8377
8378 return false;
8379 }
8380 *total = COSTS_N_INSNS (20);
8381 return false;
8382 }
8383
8384 *total = COSTS_N_INSNS (1);
8385 if (CONST_INT_P (XEXP (x, 0))
8386 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8387 {
8388 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8389 return true;
8390 }
8391
8392 subcode = GET_CODE (XEXP (x, 1));
8393 if (subcode == ASHIFT || subcode == ASHIFTRT
8394 || subcode == LSHIFTRT
8395 || subcode == ROTATE || subcode == ROTATERT)
8396 {
8397 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8398 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8399 return true;
8400 }
8401
8402 /* A shift as a part of RSB costs no more than RSB itself. */
8403 if (GET_CODE (XEXP (x, 0)) == MULT
8404 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8405 {
8406 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8407 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8408 return true;
8409 }
8410
8411 if (subcode == MULT
8412 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8413 {
8414 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8415 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8416 return true;
8417 }
8418
8419 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8420 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8421 {
8422 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8423 if (REG_P (XEXP (XEXP (x, 1), 0))
8424 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8425 *total += COSTS_N_INSNS (1);
8426
8427 return true;
8428 }
8429
8430 /* Fall through */
8431
8432 case PLUS:
8433 if (code == PLUS && arm_arch6 && mode == SImode
8434 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8435 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8436 {
8437 *total = COSTS_N_INSNS (1);
8438 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8439 0, speed);
8440 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8441 return true;
8442 }
8443
8444 /* MLA: All arguments must be registers. We filter out
8445 multiplication by a power of two, so that we fall down into
8446 the code below. */
8447 if (GET_CODE (XEXP (x, 0)) == MULT
8448 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8449 {
8450 /* The cost comes from the cost of the multiply. */
8451 return false;
8452 }
8453
8454 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8455 {
8456 if (TARGET_HARD_FLOAT
8457 && (mode == SFmode
8458 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8459 {
8460 *total = COSTS_N_INSNS (1);
8461 if (CONST_DOUBLE_P (XEXP (x, 1))
8462 && arm_const_double_rtx (XEXP (x, 1)))
8463 {
8464 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8465 return true;
8466 }
8467
8468 return false;
8469 }
8470
8471 *total = COSTS_N_INSNS (20);
8472 return false;
8473 }
8474
8475 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8476 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8477 {
8478 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8479 if (REG_P (XEXP (XEXP (x, 0), 0))
8480 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8481 *total += COSTS_N_INSNS (1);
8482 return true;
8483 }
8484
8485 /* Fall through */
8486
8487 case AND: case XOR: case IOR:
8488
8489 /* Normally the frame registers will be spilt into reg+const during
8490 reload, so it is a bad idea to combine them with other instructions,
8491 since then they might not be moved outside of loops. As a compromise
8492 we allow integration with ops that have a constant as their second
8493 operand. */
8494 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8495 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8496 && !CONST_INT_P (XEXP (x, 1)))
8497 *total = COSTS_N_INSNS (1);
8498
8499 if (mode == DImode)
8500 {
8501 *total += COSTS_N_INSNS (2);
8502 if (CONST_INT_P (XEXP (x, 1))
8503 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8504 {
8505 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8506 return true;
8507 }
8508
8509 return false;
8510 }
8511
8512 *total += COSTS_N_INSNS (1);
8513 if (CONST_INT_P (XEXP (x, 1))
8514 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8515 {
8516 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8517 return true;
8518 }
8519 subcode = GET_CODE (XEXP (x, 0));
8520 if (subcode == ASHIFT || subcode == ASHIFTRT
8521 || subcode == LSHIFTRT
8522 || subcode == ROTATE || subcode == ROTATERT)
8523 {
8524 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8525 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8526 return true;
8527 }
8528
8529 if (subcode == MULT
8530 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8531 {
8532 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8533 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8534 return true;
8535 }
8536
8537 if (subcode == UMIN || subcode == UMAX
8538 || subcode == SMIN || subcode == SMAX)
8539 {
8540 *total = COSTS_N_INSNS (3);
8541 return true;
8542 }
8543
8544 return false;
8545
8546 case MULT:
8547 /* This should have been handled by the CPU specific routines. */
8548 gcc_unreachable ();
8549
8550 case TRUNCATE:
8551 if (arm_arch3m && mode == SImode
8552 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8553 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8554 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8555 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8556 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8557 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8558 {
8559 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8560 return true;
8561 }
8562 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8563 return false;
8564
8565 case NEG:
8566 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8567 {
8568 if (TARGET_HARD_FLOAT
8569 && (mode == SFmode
8570 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8571 {
8572 *total = COSTS_N_INSNS (1);
8573 return false;
8574 }
8575 *total = COSTS_N_INSNS (2);
8576 return false;
8577 }
8578
8579 /* Fall through */
8580 case NOT:
8581 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8582 if (mode == SImode && code == NOT)
8583 {
8584 subcode = GET_CODE (XEXP (x, 0));
8585 if (subcode == ASHIFT || subcode == ASHIFTRT
8586 || subcode == LSHIFTRT
8587 || subcode == ROTATE || subcode == ROTATERT
8588 || (subcode == MULT
8589 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8590 {
8591 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8592 /* Register shifts cost an extra cycle. */
8593 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8594 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8595 subcode, 1, speed);
8596 return true;
8597 }
8598 }
8599
8600 return false;
8601
8602 case IF_THEN_ELSE:
8603 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8604 {
8605 *total = COSTS_N_INSNS (4);
8606 return true;
8607 }
8608
8609 operand = XEXP (x, 0);
8610
8611 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8612 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8613 && REG_P (XEXP (operand, 0))
8614 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8615 *total += COSTS_N_INSNS (1);
8616 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8617 + rtx_cost (XEXP (x, 2), code, 2, speed));
8618 return true;
8619
8620 case NE:
8621 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8622 {
8623 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8624 return true;
8625 }
8626 goto scc_insn;
8627
8628 case GE:
8629 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8630 && mode == SImode && XEXP (x, 1) == const0_rtx)
8631 {
8632 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8633 return true;
8634 }
8635 goto scc_insn;
8636
8637 case LT:
8638 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8639 && mode == SImode && XEXP (x, 1) == const0_rtx)
8640 {
8641 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8642 return true;
8643 }
8644 goto scc_insn;
8645
8646 case EQ:
8647 case GT:
8648 case LE:
8649 case GEU:
8650 case LTU:
8651 case GTU:
8652 case LEU:
8653 case UNORDERED:
8654 case ORDERED:
8655 case UNEQ:
8656 case UNGE:
8657 case UNLT:
8658 case UNGT:
8659 case UNLE:
8660 scc_insn:
8661 /* SCC insns. In the case where the comparison has already been
8662 performed, then they cost 2 instructions. Otherwise they need
8663 an additional comparison before them. */
8664 *total = COSTS_N_INSNS (2);
8665 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8666 {
8667 return true;
8668 }
8669
8670 /* Fall through */
8671 case COMPARE:
8672 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8673 {
8674 *total = 0;
8675 return true;
8676 }
8677
8678 *total += COSTS_N_INSNS (1);
8679 if (CONST_INT_P (XEXP (x, 1))
8680 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8681 {
8682 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8683 return true;
8684 }
8685
8686 subcode = GET_CODE (XEXP (x, 0));
8687 if (subcode == ASHIFT || subcode == ASHIFTRT
8688 || subcode == LSHIFTRT
8689 || subcode == ROTATE || subcode == ROTATERT)
8690 {
8691 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8692 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8693 return true;
8694 }
8695
8696 if (subcode == MULT
8697 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8698 {
8699 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8700 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8701 return true;
8702 }
8703
8704 return false;
8705
8706 case UMIN:
8707 case UMAX:
8708 case SMIN:
8709 case SMAX:
8710 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8711 if (!CONST_INT_P (XEXP (x, 1))
8712 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8713 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8714 return true;
8715
8716 case ABS:
8717 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8718 {
8719 if (TARGET_HARD_FLOAT
8720 && (mode == SFmode
8721 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8722 {
8723 *total = COSTS_N_INSNS (1);
8724 return false;
8725 }
8726 *total = COSTS_N_INSNS (20);
8727 return false;
8728 }
8729 *total = COSTS_N_INSNS (1);
8730 if (mode == DImode)
8731 *total += COSTS_N_INSNS (3);
8732 return false;
8733
8734 case SIGN_EXTEND:
8735 case ZERO_EXTEND:
8736 *total = 0;
8737 if (GET_MODE_CLASS (mode) == MODE_INT)
8738 {
8739 rtx op = XEXP (x, 0);
8740 machine_mode opmode = GET_MODE (op);
8741
8742 if (mode == DImode)
8743 *total += COSTS_N_INSNS (1);
8744
8745 if (opmode != SImode)
8746 {
8747 if (MEM_P (op))
8748 {
8749 /* If !arm_arch4, we use one of the extendhisi2_mem
8750 or movhi_bytes patterns for HImode. For a QImode
8751 sign extension, we first zero-extend from memory
8752 and then perform a shift sequence. */
8753 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8754 *total += COSTS_N_INSNS (2);
8755 }
8756 else if (arm_arch6)
8757 *total += COSTS_N_INSNS (1);
8758
8759 /* We don't have the necessary insn, so we need to perform some
8760 other operation. */
8761 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8762 /* An and with constant 255. */
8763 *total += COSTS_N_INSNS (1);
8764 else
8765 /* A shift sequence. Increase costs slightly to avoid
8766 combining two shifts into an extend operation. */
8767 *total += COSTS_N_INSNS (2) + 1;
8768 }
8769
8770 return false;
8771 }
8772
8773 switch (GET_MODE (XEXP (x, 0)))
8774 {
8775 case V8QImode:
8776 case V4HImode:
8777 case V2SImode:
8778 case V4QImode:
8779 case V2HImode:
8780 *total = COSTS_N_INSNS (1);
8781 return false;
8782
8783 default:
8784 gcc_unreachable ();
8785 }
8786 gcc_unreachable ();
8787
8788 case ZERO_EXTRACT:
8789 case SIGN_EXTRACT:
8790 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8791 return true;
8792
8793 case CONST_INT:
8794 if (const_ok_for_arm (INTVAL (x))
8795 || const_ok_for_arm (~INTVAL (x)))
8796 *total = COSTS_N_INSNS (1);
8797 else
8798 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8799 INTVAL (x), NULL_RTX,
8800 NULL_RTX, 0, 0));
8801 return true;
8802
8803 case CONST:
8804 case LABEL_REF:
8805 case SYMBOL_REF:
8806 *total = COSTS_N_INSNS (3);
8807 return true;
8808
8809 case HIGH:
8810 *total = COSTS_N_INSNS (1);
8811 return true;
8812
8813 case LO_SUM:
8814 *total = COSTS_N_INSNS (1);
8815 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8816 return true;
8817
8818 case CONST_DOUBLE:
8819 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8820 && (mode == SFmode || !TARGET_VFP_SINGLE))
8821 *total = COSTS_N_INSNS (1);
8822 else
8823 *total = COSTS_N_INSNS (4);
8824 return true;
8825
8826 case SET:
8827 /* The vec_extract patterns accept memory operands that require an
8828 address reload. Account for the cost of that reload to give the
8829 auto-inc-dec pass an incentive to try to replace them. */
8830 if (TARGET_NEON && MEM_P (SET_DEST (x))
8831 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8832 {
8833 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8834 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8835 *total += COSTS_N_INSNS (1);
8836 return true;
8837 }
8838 /* Likewise for the vec_set patterns. */
8839 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8840 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8841 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8842 {
8843 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8844 *total = rtx_cost (mem, code, 0, speed);
8845 if (!neon_vector_mem_operand (mem, 2, true))
8846 *total += COSTS_N_INSNS (1);
8847 return true;
8848 }
8849 return false;
8850
8851 case UNSPEC:
8852 /* We cost this as high as our memory costs to allow this to
8853 be hoisted from loops. */
8854 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8855 {
8856 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8857 }
8858 return true;
8859
8860 case CONST_VECTOR:
8861 if (TARGET_NEON
8862 && TARGET_HARD_FLOAT
8863 && outer == SET
8864 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8865 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8866 *total = COSTS_N_INSNS (1);
8867 else
8868 *total = COSTS_N_INSNS (4);
8869 return true;
8870
8871 default:
8872 *total = COSTS_N_INSNS (4);
8873 return false;
8874 }
8875 }
8876
8877 /* Estimates the size cost of thumb1 instructions.
8878 For now most of the code is copied from thumb1_rtx_costs. We need more
8879 fine grain tuning when we have more related test cases. */
8880 static inline int
8881 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8882 {
8883 machine_mode mode = GET_MODE (x);
8884 int words;
8885
8886 switch (code)
8887 {
8888 case ASHIFT:
8889 case ASHIFTRT:
8890 case LSHIFTRT:
8891 case ROTATERT:
8892 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8893
8894 case PLUS:
8895 case MINUS:
8896 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8897 defined by RTL expansion, especially for the expansion of
8898 multiplication. */
8899 if ((GET_CODE (XEXP (x, 0)) == MULT
8900 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8901 || (GET_CODE (XEXP (x, 1)) == MULT
8902 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8903 return COSTS_N_INSNS (2);
8904 /* On purpose fall through for normal RTX. */
8905 case COMPARE:
8906 case NEG:
8907 case NOT:
8908 return COSTS_N_INSNS (1);
8909
8910 case MULT:
8911 if (CONST_INT_P (XEXP (x, 1)))
8912 {
8913 /* Thumb1 mul instruction can't operate on const. We must Load it
8914 into a register first. */
8915 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8916 /* For the targets which have a very small and high-latency multiply
8917 unit, we prefer to synthesize the mult with up to 5 instructions,
8918 giving a good balance between size and performance. */
8919 if (arm_arch6m && arm_m_profile_small_mul)
8920 return COSTS_N_INSNS (5);
8921 else
8922 return COSTS_N_INSNS (1) + const_size;
8923 }
8924 return COSTS_N_INSNS (1);
8925
8926 case SET:
8927 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8928 the mode. */
8929 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8930 return COSTS_N_INSNS (words)
8931 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8932 || satisfies_constraint_K (SET_SRC (x))
8933 /* thumb1_movdi_insn. */
8934 || ((words > 1) && MEM_P (SET_SRC (x))));
8935
8936 case CONST_INT:
8937 if (outer == SET)
8938 {
8939 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8940 return COSTS_N_INSNS (1);
8941 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8942 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8943 return COSTS_N_INSNS (2);
8944 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8945 if (thumb_shiftable_const (INTVAL (x)))
8946 return COSTS_N_INSNS (2);
8947 return COSTS_N_INSNS (3);
8948 }
8949 else if ((outer == PLUS || outer == COMPARE)
8950 && INTVAL (x) < 256 && INTVAL (x) > -256)
8951 return 0;
8952 else if ((outer == IOR || outer == XOR || outer == AND)
8953 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8954 return COSTS_N_INSNS (1);
8955 else if (outer == AND)
8956 {
8957 int i;
8958 /* This duplicates the tests in the andsi3 expander. */
8959 for (i = 9; i <= 31; i++)
8960 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8961 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8962 return COSTS_N_INSNS (2);
8963 }
8964 else if (outer == ASHIFT || outer == ASHIFTRT
8965 || outer == LSHIFTRT)
8966 return 0;
8967 return COSTS_N_INSNS (2);
8968
8969 case CONST:
8970 case CONST_DOUBLE:
8971 case LABEL_REF:
8972 case SYMBOL_REF:
8973 return COSTS_N_INSNS (3);
8974
8975 case UDIV:
8976 case UMOD:
8977 case DIV:
8978 case MOD:
8979 return 100;
8980
8981 case TRUNCATE:
8982 return 99;
8983
8984 case AND:
8985 case XOR:
8986 case IOR:
8987 return COSTS_N_INSNS (1);
8988
8989 case MEM:
8990 return (COSTS_N_INSNS (1)
8991 + COSTS_N_INSNS (1)
8992 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8993 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8994 ? COSTS_N_INSNS (1) : 0));
8995
8996 case IF_THEN_ELSE:
8997 /* XXX a guess. */
8998 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8999 return 14;
9000 return 2;
9001
9002 case ZERO_EXTEND:
9003 /* XXX still guessing. */
9004 switch (GET_MODE (XEXP (x, 0)))
9005 {
9006 case QImode:
9007 return (1 + (mode == DImode ? 4 : 0)
9008 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9009
9010 case HImode:
9011 return (4 + (mode == DImode ? 4 : 0)
9012 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9013
9014 case SImode:
9015 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9016
9017 default:
9018 return 99;
9019 }
9020
9021 default:
9022 return 99;
9023 }
9024 }
9025
9026 /* RTX costs when optimizing for size. */
9027 static bool
9028 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9029 int *total)
9030 {
9031 machine_mode mode = GET_MODE (x);
9032 if (TARGET_THUMB1)
9033 {
9034 *total = thumb1_size_rtx_costs (x, code, outer_code);
9035 return true;
9036 }
9037
9038 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9039 switch (code)
9040 {
9041 case MEM:
9042 /* A memory access costs 1 insn if the mode is small, or the address is
9043 a single register, otherwise it costs one insn per word. */
9044 if (REG_P (XEXP (x, 0)))
9045 *total = COSTS_N_INSNS (1);
9046 else if (flag_pic
9047 && GET_CODE (XEXP (x, 0)) == PLUS
9048 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9049 /* This will be split into two instructions.
9050 See arm.md:calculate_pic_address. */
9051 *total = COSTS_N_INSNS (2);
9052 else
9053 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9054 return true;
9055
9056 case DIV:
9057 case MOD:
9058 case UDIV:
9059 case UMOD:
9060 /* Needs a libcall, so it costs about this. */
9061 *total = COSTS_N_INSNS (2);
9062 return false;
9063
9064 case ROTATE:
9065 if (mode == SImode && REG_P (XEXP (x, 1)))
9066 {
9067 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9068 return true;
9069 }
9070 /* Fall through */
9071 case ROTATERT:
9072 case ASHIFT:
9073 case LSHIFTRT:
9074 case ASHIFTRT:
9075 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9076 {
9077 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9078 return true;
9079 }
9080 else if (mode == SImode)
9081 {
9082 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9083 /* Slightly disparage register shifts, but not by much. */
9084 if (!CONST_INT_P (XEXP (x, 1)))
9085 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9086 return true;
9087 }
9088
9089 /* Needs a libcall. */
9090 *total = COSTS_N_INSNS (2);
9091 return false;
9092
9093 case MINUS:
9094 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9095 && (mode == SFmode || !TARGET_VFP_SINGLE))
9096 {
9097 *total = COSTS_N_INSNS (1);
9098 return false;
9099 }
9100
9101 if (mode == SImode)
9102 {
9103 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9104 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9105
9106 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9107 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9108 || subcode1 == ROTATE || subcode1 == ROTATERT
9109 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9110 || subcode1 == ASHIFTRT)
9111 {
9112 /* It's just the cost of the two operands. */
9113 *total = 0;
9114 return false;
9115 }
9116
9117 *total = COSTS_N_INSNS (1);
9118 return false;
9119 }
9120
9121 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9122 return false;
9123
9124 case PLUS:
9125 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9126 && (mode == SFmode || !TARGET_VFP_SINGLE))
9127 {
9128 *total = COSTS_N_INSNS (1);
9129 return false;
9130 }
9131
9132 /* A shift as a part of ADD costs nothing. */
9133 if (GET_CODE (XEXP (x, 0)) == MULT
9134 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9135 {
9136 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9137 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9138 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9139 return true;
9140 }
9141
9142 /* Fall through */
9143 case AND: case XOR: case IOR:
9144 if (mode == SImode)
9145 {
9146 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9147
9148 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9149 || subcode == LSHIFTRT || subcode == ASHIFTRT
9150 || (code == AND && subcode == NOT))
9151 {
9152 /* It's just the cost of the two operands. */
9153 *total = 0;
9154 return false;
9155 }
9156 }
9157
9158 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9159 return false;
9160
9161 case MULT:
9162 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9163 return false;
9164
9165 case NEG:
9166 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9167 && (mode == SFmode || !TARGET_VFP_SINGLE))
9168 {
9169 *total = COSTS_N_INSNS (1);
9170 return false;
9171 }
9172
9173 /* Fall through */
9174 case NOT:
9175 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9176
9177 return false;
9178
9179 case IF_THEN_ELSE:
9180 *total = 0;
9181 return false;
9182
9183 case COMPARE:
9184 if (cc_register (XEXP (x, 0), VOIDmode))
9185 * total = 0;
9186 else
9187 *total = COSTS_N_INSNS (1);
9188 return false;
9189
9190 case ABS:
9191 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9192 && (mode == SFmode || !TARGET_VFP_SINGLE))
9193 *total = COSTS_N_INSNS (1);
9194 else
9195 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9196 return false;
9197
9198 case SIGN_EXTEND:
9199 case ZERO_EXTEND:
9200 return arm_rtx_costs_1 (x, outer_code, total, 0);
9201
9202 case CONST_INT:
9203 if (const_ok_for_arm (INTVAL (x)))
9204 /* A multiplication by a constant requires another instruction
9205 to load the constant to a register. */
9206 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9207 ? 1 : 0);
9208 else if (const_ok_for_arm (~INTVAL (x)))
9209 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9210 else if (const_ok_for_arm (-INTVAL (x)))
9211 {
9212 if (outer_code == COMPARE || outer_code == PLUS
9213 || outer_code == MINUS)
9214 *total = 0;
9215 else
9216 *total = COSTS_N_INSNS (1);
9217 }
9218 else
9219 *total = COSTS_N_INSNS (2);
9220 return true;
9221
9222 case CONST:
9223 case LABEL_REF:
9224 case SYMBOL_REF:
9225 *total = COSTS_N_INSNS (2);
9226 return true;
9227
9228 case CONST_DOUBLE:
9229 *total = COSTS_N_INSNS (4);
9230 return true;
9231
9232 case CONST_VECTOR:
9233 if (TARGET_NEON
9234 && TARGET_HARD_FLOAT
9235 && outer_code == SET
9236 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9237 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9238 *total = COSTS_N_INSNS (1);
9239 else
9240 *total = COSTS_N_INSNS (4);
9241 return true;
9242
9243 case HIGH:
9244 case LO_SUM:
9245 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9246 cost of these slightly. */
9247 *total = COSTS_N_INSNS (1) + 1;
9248 return true;
9249
9250 case SET:
9251 return false;
9252
9253 default:
9254 if (mode != VOIDmode)
9255 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9256 else
9257 *total = COSTS_N_INSNS (4); /* How knows? */
9258 return false;
9259 }
9260 }
9261
9262 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9263 operand, then return the operand that is being shifted. If the shift
9264 is not by a constant, then set SHIFT_REG to point to the operand.
9265 Return NULL if OP is not a shifter operand. */
9266 static rtx
9267 shifter_op_p (rtx op, rtx *shift_reg)
9268 {
9269 enum rtx_code code = GET_CODE (op);
9270
9271 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9272 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9273 return XEXP (op, 0);
9274 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9275 return XEXP (op, 0);
9276 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9277 || code == ASHIFTRT)
9278 {
9279 if (!CONST_INT_P (XEXP (op, 1)))
9280 *shift_reg = XEXP (op, 1);
9281 return XEXP (op, 0);
9282 }
9283
9284 return NULL;
9285 }
9286
9287 static bool
9288 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9289 {
9290 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9291 gcc_assert (GET_CODE (x) == UNSPEC);
9292
9293 switch (XINT (x, 1))
9294 {
9295 case UNSPEC_UNALIGNED_LOAD:
9296 /* We can only do unaligned loads into the integer unit, and we can't
9297 use LDM or LDRD. */
9298 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9299 if (speed_p)
9300 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9301 + extra_cost->ldst.load_unaligned);
9302
9303 #ifdef NOT_YET
9304 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9305 ADDR_SPACE_GENERIC, speed_p);
9306 #endif
9307 return true;
9308
9309 case UNSPEC_UNALIGNED_STORE:
9310 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9311 if (speed_p)
9312 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9313 + extra_cost->ldst.store_unaligned);
9314
9315 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9316 #ifdef NOT_YET
9317 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9318 ADDR_SPACE_GENERIC, speed_p);
9319 #endif
9320 return true;
9321
9322 case UNSPEC_VRINTZ:
9323 case UNSPEC_VRINTP:
9324 case UNSPEC_VRINTM:
9325 case UNSPEC_VRINTR:
9326 case UNSPEC_VRINTX:
9327 case UNSPEC_VRINTA:
9328 *cost = COSTS_N_INSNS (1);
9329 if (speed_p)
9330 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9331
9332 return true;
9333 default:
9334 *cost = COSTS_N_INSNS (2);
9335 break;
9336 }
9337 return false;
9338 }
9339
9340 /* Cost of a libcall. We assume one insn per argument, an amount for the
9341 call (one insn for -Os) and then one for processing the result. */
9342 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9343
9344 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9345 do \
9346 { \
9347 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9348 if (shift_op != NULL \
9349 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9350 { \
9351 if (shift_reg) \
9352 { \
9353 if (speed_p) \
9354 *cost += extra_cost->alu.arith_shift_reg; \
9355 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9356 } \
9357 else if (speed_p) \
9358 *cost += extra_cost->alu.arith_shift; \
9359 \
9360 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9361 + rtx_cost (XEXP (x, 1 - IDX), \
9362 OP, 1, speed_p)); \
9363 return true; \
9364 } \
9365 } \
9366 while (0);
9367
9368 /* RTX costs. Make an estimate of the cost of executing the operation
9369 X, which is contained with an operation with code OUTER_CODE.
9370 SPEED_P indicates whether the cost desired is the performance cost,
9371 or the size cost. The estimate is stored in COST and the return
9372 value is TRUE if the cost calculation is final, or FALSE if the
9373 caller should recurse through the operands of X to add additional
9374 costs.
9375
9376 We currently make no attempt to model the size savings of Thumb-2
9377 16-bit instructions. At the normal points in compilation where
9378 this code is called we have no measure of whether the condition
9379 flags are live or not, and thus no realistic way to determine what
9380 the size will eventually be. */
9381 static bool
9382 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9383 const struct cpu_cost_table *extra_cost,
9384 int *cost, bool speed_p)
9385 {
9386 machine_mode mode = GET_MODE (x);
9387
9388 if (TARGET_THUMB1)
9389 {
9390 if (speed_p)
9391 *cost = thumb1_rtx_costs (x, code, outer_code);
9392 else
9393 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9394 return true;
9395 }
9396
9397 switch (code)
9398 {
9399 case SET:
9400 *cost = 0;
9401 /* SET RTXs don't have a mode so we get it from the destination. */
9402 mode = GET_MODE (SET_DEST (x));
9403
9404 if (REG_P (SET_SRC (x))
9405 && REG_P (SET_DEST (x)))
9406 {
9407 /* Assume that most copies can be done with a single insn,
9408 unless we don't have HW FP, in which case everything
9409 larger than word mode will require two insns. */
9410 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9411 && GET_MODE_SIZE (mode) > 4)
9412 || mode == DImode)
9413 ? 2 : 1);
9414 /* Conditional register moves can be encoded
9415 in 16 bits in Thumb mode. */
9416 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9417 *cost >>= 1;
9418
9419 return true;
9420 }
9421
9422 if (CONST_INT_P (SET_SRC (x)))
9423 {
9424 /* Handle CONST_INT here, since the value doesn't have a mode
9425 and we would otherwise be unable to work out the true cost. */
9426 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9427 outer_code = SET;
9428 /* Slightly lower the cost of setting a core reg to a constant.
9429 This helps break up chains and allows for better scheduling. */
9430 if (REG_P (SET_DEST (x))
9431 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9432 *cost -= 1;
9433 x = SET_SRC (x);
9434 /* Immediate moves with an immediate in the range [0, 255] can be
9435 encoded in 16 bits in Thumb mode. */
9436 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9437 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9438 *cost >>= 1;
9439 goto const_int_cost;
9440 }
9441
9442 return false;
9443
9444 case MEM:
9445 /* A memory access costs 1 insn if the mode is small, or the address is
9446 a single register, otherwise it costs one insn per word. */
9447 if (REG_P (XEXP (x, 0)))
9448 *cost = COSTS_N_INSNS (1);
9449 else if (flag_pic
9450 && GET_CODE (XEXP (x, 0)) == PLUS
9451 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9452 /* This will be split into two instructions.
9453 See arm.md:calculate_pic_address. */
9454 *cost = COSTS_N_INSNS (2);
9455 else
9456 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9457
9458 /* For speed optimizations, add the costs of the address and
9459 accessing memory. */
9460 if (speed_p)
9461 #ifdef NOT_YET
9462 *cost += (extra_cost->ldst.load
9463 + arm_address_cost (XEXP (x, 0), mode,
9464 ADDR_SPACE_GENERIC, speed_p));
9465 #else
9466 *cost += extra_cost->ldst.load;
9467 #endif
9468 return true;
9469
9470 case PARALLEL:
9471 {
9472 /* Calculations of LDM costs are complex. We assume an initial cost
9473 (ldm_1st) which will load the number of registers mentioned in
9474 ldm_regs_per_insn_1st registers; then each additional
9475 ldm_regs_per_insn_subsequent registers cost one more insn. The
9476 formula for N regs is thus:
9477
9478 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9479 + ldm_regs_per_insn_subsequent - 1)
9480 / ldm_regs_per_insn_subsequent).
9481
9482 Additional costs may also be added for addressing. A similar
9483 formula is used for STM. */
9484
9485 bool is_ldm = load_multiple_operation (x, SImode);
9486 bool is_stm = store_multiple_operation (x, SImode);
9487
9488 *cost = COSTS_N_INSNS (1);
9489
9490 if (is_ldm || is_stm)
9491 {
9492 if (speed_p)
9493 {
9494 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9495 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9496 ? extra_cost->ldst.ldm_regs_per_insn_1st
9497 : extra_cost->ldst.stm_regs_per_insn_1st;
9498 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9499 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9500 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9501
9502 *cost += regs_per_insn_1st
9503 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9504 + regs_per_insn_sub - 1)
9505 / regs_per_insn_sub);
9506 return true;
9507 }
9508
9509 }
9510 return false;
9511 }
9512 case DIV:
9513 case UDIV:
9514 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9515 && (mode == SFmode || !TARGET_VFP_SINGLE))
9516 *cost = COSTS_N_INSNS (speed_p
9517 ? extra_cost->fp[mode != SFmode].div : 1);
9518 else if (mode == SImode && TARGET_IDIV)
9519 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9520 else
9521 *cost = LIBCALL_COST (2);
9522 return false; /* All arguments must be in registers. */
9523
9524 case MOD:
9525 case UMOD:
9526 *cost = LIBCALL_COST (2);
9527 return false; /* All arguments must be in registers. */
9528
9529 case ROTATE:
9530 if (mode == SImode && REG_P (XEXP (x, 1)))
9531 {
9532 *cost = (COSTS_N_INSNS (2)
9533 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9534 if (speed_p)
9535 *cost += extra_cost->alu.shift_reg;
9536 return true;
9537 }
9538 /* Fall through */
9539 case ROTATERT:
9540 case ASHIFT:
9541 case LSHIFTRT:
9542 case ASHIFTRT:
9543 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9544 {
9545 *cost = (COSTS_N_INSNS (3)
9546 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9547 if (speed_p)
9548 *cost += 2 * extra_cost->alu.shift;
9549 return true;
9550 }
9551 else if (mode == SImode)
9552 {
9553 *cost = (COSTS_N_INSNS (1)
9554 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9555 /* Slightly disparage register shifts at -Os, but not by much. */
9556 if (!CONST_INT_P (XEXP (x, 1)))
9557 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9558 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9559 return true;
9560 }
9561 else if (GET_MODE_CLASS (mode) == MODE_INT
9562 && GET_MODE_SIZE (mode) < 4)
9563 {
9564 if (code == ASHIFT)
9565 {
9566 *cost = (COSTS_N_INSNS (1)
9567 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9568 /* Slightly disparage register shifts at -Os, but not by
9569 much. */
9570 if (!CONST_INT_P (XEXP (x, 1)))
9571 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9572 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9573 }
9574 else if (code == LSHIFTRT || code == ASHIFTRT)
9575 {
9576 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9577 {
9578 /* Can use SBFX/UBFX. */
9579 *cost = COSTS_N_INSNS (1);
9580 if (speed_p)
9581 *cost += extra_cost->alu.bfx;
9582 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9583 }
9584 else
9585 {
9586 *cost = COSTS_N_INSNS (2);
9587 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9588 if (speed_p)
9589 {
9590 if (CONST_INT_P (XEXP (x, 1)))
9591 *cost += 2 * extra_cost->alu.shift;
9592 else
9593 *cost += (extra_cost->alu.shift
9594 + extra_cost->alu.shift_reg);
9595 }
9596 else
9597 /* Slightly disparage register shifts. */
9598 *cost += !CONST_INT_P (XEXP (x, 1));
9599 }
9600 }
9601 else /* Rotates. */
9602 {
9603 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9604 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9605 if (speed_p)
9606 {
9607 if (CONST_INT_P (XEXP (x, 1)))
9608 *cost += (2 * extra_cost->alu.shift
9609 + extra_cost->alu.log_shift);
9610 else
9611 *cost += (extra_cost->alu.shift
9612 + extra_cost->alu.shift_reg
9613 + extra_cost->alu.log_shift_reg);
9614 }
9615 }
9616 return true;
9617 }
9618
9619 *cost = LIBCALL_COST (2);
9620 return false;
9621
9622 case BSWAP:
9623 if (arm_arch6)
9624 {
9625 if (mode == SImode)
9626 {
9627 *cost = COSTS_N_INSNS (1);
9628 if (speed_p)
9629 *cost += extra_cost->alu.rev;
9630
9631 return false;
9632 }
9633 }
9634 else
9635 {
9636 /* No rev instruction available. Look at arm_legacy_rev
9637 and thumb_legacy_rev for the form of RTL used then. */
9638 if (TARGET_THUMB)
9639 {
9640 *cost = COSTS_N_INSNS (10);
9641
9642 if (speed_p)
9643 {
9644 *cost += 6 * extra_cost->alu.shift;
9645 *cost += 3 * extra_cost->alu.logical;
9646 }
9647 }
9648 else
9649 {
9650 *cost = COSTS_N_INSNS (5);
9651
9652 if (speed_p)
9653 {
9654 *cost += 2 * extra_cost->alu.shift;
9655 *cost += extra_cost->alu.arith_shift;
9656 *cost += 2 * extra_cost->alu.logical;
9657 }
9658 }
9659 return true;
9660 }
9661 return false;
9662
9663 case MINUS:
9664 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9665 && (mode == SFmode || !TARGET_VFP_SINGLE))
9666 {
9667 *cost = COSTS_N_INSNS (1);
9668 if (GET_CODE (XEXP (x, 0)) == MULT
9669 || GET_CODE (XEXP (x, 1)) == MULT)
9670 {
9671 rtx mul_op0, mul_op1, sub_op;
9672
9673 if (speed_p)
9674 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9675
9676 if (GET_CODE (XEXP (x, 0)) == MULT)
9677 {
9678 mul_op0 = XEXP (XEXP (x, 0), 0);
9679 mul_op1 = XEXP (XEXP (x, 0), 1);
9680 sub_op = XEXP (x, 1);
9681 }
9682 else
9683 {
9684 mul_op0 = XEXP (XEXP (x, 1), 0);
9685 mul_op1 = XEXP (XEXP (x, 1), 1);
9686 sub_op = XEXP (x, 0);
9687 }
9688
9689 /* The first operand of the multiply may be optionally
9690 negated. */
9691 if (GET_CODE (mul_op0) == NEG)
9692 mul_op0 = XEXP (mul_op0, 0);
9693
9694 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9695 + rtx_cost (mul_op1, code, 0, speed_p)
9696 + rtx_cost (sub_op, code, 0, speed_p));
9697
9698 return true;
9699 }
9700
9701 if (speed_p)
9702 *cost += extra_cost->fp[mode != SFmode].addsub;
9703 return false;
9704 }
9705
9706 if (mode == SImode)
9707 {
9708 rtx shift_by_reg = NULL;
9709 rtx shift_op;
9710 rtx non_shift_op;
9711
9712 *cost = COSTS_N_INSNS (1);
9713
9714 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9715 if (shift_op == NULL)
9716 {
9717 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9718 non_shift_op = XEXP (x, 0);
9719 }
9720 else
9721 non_shift_op = XEXP (x, 1);
9722
9723 if (shift_op != NULL)
9724 {
9725 if (shift_by_reg != NULL)
9726 {
9727 if (speed_p)
9728 *cost += extra_cost->alu.arith_shift_reg;
9729 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9730 }
9731 else if (speed_p)
9732 *cost += extra_cost->alu.arith_shift;
9733
9734 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9735 + rtx_cost (non_shift_op, code, 0, speed_p));
9736 return true;
9737 }
9738
9739 if (arm_arch_thumb2
9740 && GET_CODE (XEXP (x, 1)) == MULT)
9741 {
9742 /* MLS. */
9743 if (speed_p)
9744 *cost += extra_cost->mult[0].add;
9745 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9746 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9747 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9748 return true;
9749 }
9750
9751 if (CONST_INT_P (XEXP (x, 0)))
9752 {
9753 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9754 INTVAL (XEXP (x, 0)), NULL_RTX,
9755 NULL_RTX, 1, 0);
9756 *cost = COSTS_N_INSNS (insns);
9757 if (speed_p)
9758 *cost += insns * extra_cost->alu.arith;
9759 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9760 return true;
9761 }
9762 else if (speed_p)
9763 *cost += extra_cost->alu.arith;
9764
9765 return false;
9766 }
9767
9768 if (GET_MODE_CLASS (mode) == MODE_INT
9769 && GET_MODE_SIZE (mode) < 4)
9770 {
9771 rtx shift_op, shift_reg;
9772 shift_reg = NULL;
9773
9774 /* We check both sides of the MINUS for shifter operands since,
9775 unlike PLUS, it's not commutative. */
9776
9777 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9778 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9779
9780 /* Slightly disparage, as we might need to widen the result. */
9781 *cost = 1 + COSTS_N_INSNS (1);
9782 if (speed_p)
9783 *cost += extra_cost->alu.arith;
9784
9785 if (CONST_INT_P (XEXP (x, 0)))
9786 {
9787 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9788 return true;
9789 }
9790
9791 return false;
9792 }
9793
9794 if (mode == DImode)
9795 {
9796 *cost = COSTS_N_INSNS (2);
9797
9798 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9799 {
9800 rtx op1 = XEXP (x, 1);
9801
9802 if (speed_p)
9803 *cost += 2 * extra_cost->alu.arith;
9804
9805 if (GET_CODE (op1) == ZERO_EXTEND)
9806 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9807 else
9808 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9809 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9810 0, speed_p);
9811 return true;
9812 }
9813 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9814 {
9815 if (speed_p)
9816 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9817 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9818 0, speed_p)
9819 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9820 return true;
9821 }
9822 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9823 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9824 {
9825 if (speed_p)
9826 *cost += (extra_cost->alu.arith
9827 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9828 ? extra_cost->alu.arith
9829 : extra_cost->alu.arith_shift));
9830 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9831 + rtx_cost (XEXP (XEXP (x, 1), 0),
9832 GET_CODE (XEXP (x, 1)), 0, speed_p));
9833 return true;
9834 }
9835
9836 if (speed_p)
9837 *cost += 2 * extra_cost->alu.arith;
9838 return false;
9839 }
9840
9841 /* Vector mode? */
9842
9843 *cost = LIBCALL_COST (2);
9844 return false;
9845
9846 case PLUS:
9847 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9848 && (mode == SFmode || !TARGET_VFP_SINGLE))
9849 {
9850 *cost = COSTS_N_INSNS (1);
9851 if (GET_CODE (XEXP (x, 0)) == MULT)
9852 {
9853 rtx mul_op0, mul_op1, add_op;
9854
9855 if (speed_p)
9856 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9857
9858 mul_op0 = XEXP (XEXP (x, 0), 0);
9859 mul_op1 = XEXP (XEXP (x, 0), 1);
9860 add_op = XEXP (x, 1);
9861
9862 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9863 + rtx_cost (mul_op1, code, 0, speed_p)
9864 + rtx_cost (add_op, code, 0, speed_p));
9865
9866 return true;
9867 }
9868
9869 if (speed_p)
9870 *cost += extra_cost->fp[mode != SFmode].addsub;
9871 return false;
9872 }
9873 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9874 {
9875 *cost = LIBCALL_COST (2);
9876 return false;
9877 }
9878
9879 /* Narrow modes can be synthesized in SImode, but the range
9880 of useful sub-operations is limited. Check for shift operations
9881 on one of the operands. Only left shifts can be used in the
9882 narrow modes. */
9883 if (GET_MODE_CLASS (mode) == MODE_INT
9884 && GET_MODE_SIZE (mode) < 4)
9885 {
9886 rtx shift_op, shift_reg;
9887 shift_reg = NULL;
9888
9889 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9890
9891 if (CONST_INT_P (XEXP (x, 1)))
9892 {
9893 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9894 INTVAL (XEXP (x, 1)), NULL_RTX,
9895 NULL_RTX, 1, 0);
9896 *cost = COSTS_N_INSNS (insns);
9897 if (speed_p)
9898 *cost += insns * extra_cost->alu.arith;
9899 /* Slightly penalize a narrow operation as the result may
9900 need widening. */
9901 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9902 return true;
9903 }
9904
9905 /* Slightly penalize a narrow operation as the result may
9906 need widening. */
9907 *cost = 1 + COSTS_N_INSNS (1);
9908 if (speed_p)
9909 *cost += extra_cost->alu.arith;
9910
9911 return false;
9912 }
9913
9914 if (mode == SImode)
9915 {
9916 rtx shift_op, shift_reg;
9917
9918 *cost = COSTS_N_INSNS (1);
9919 if (TARGET_INT_SIMD
9920 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9921 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9922 {
9923 /* UXTA[BH] or SXTA[BH]. */
9924 if (speed_p)
9925 *cost += extra_cost->alu.extend_arith;
9926 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9927 speed_p)
9928 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9929 return true;
9930 }
9931
9932 shift_reg = NULL;
9933 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9934 if (shift_op != NULL)
9935 {
9936 if (shift_reg)
9937 {
9938 if (speed_p)
9939 *cost += extra_cost->alu.arith_shift_reg;
9940 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9941 }
9942 else if (speed_p)
9943 *cost += extra_cost->alu.arith_shift;
9944
9945 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9946 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9947 return true;
9948 }
9949 if (GET_CODE (XEXP (x, 0)) == MULT)
9950 {
9951 rtx mul_op = XEXP (x, 0);
9952
9953 *cost = COSTS_N_INSNS (1);
9954
9955 if (TARGET_DSP_MULTIPLY
9956 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9957 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9958 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9959 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9960 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9961 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9962 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9963 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9964 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9965 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9966 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9967 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9968 == 16))))))
9969 {
9970 /* SMLA[BT][BT]. */
9971 if (speed_p)
9972 *cost += extra_cost->mult[0].extend_add;
9973 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9974 SIGN_EXTEND, 0, speed_p)
9975 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9976 SIGN_EXTEND, 0, speed_p)
9977 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9978 return true;
9979 }
9980
9981 if (speed_p)
9982 *cost += extra_cost->mult[0].add;
9983 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9984 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9985 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9986 return true;
9987 }
9988 if (CONST_INT_P (XEXP (x, 1)))
9989 {
9990 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9991 INTVAL (XEXP (x, 1)), NULL_RTX,
9992 NULL_RTX, 1, 0);
9993 *cost = COSTS_N_INSNS (insns);
9994 if (speed_p)
9995 *cost += insns * extra_cost->alu.arith;
9996 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9997 return true;
9998 }
9999 else if (speed_p)
10000 *cost += extra_cost->alu.arith;
10001
10002 return false;
10003 }
10004
10005 if (mode == DImode)
10006 {
10007 if (arm_arch3m
10008 && GET_CODE (XEXP (x, 0)) == MULT
10009 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10010 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10011 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10012 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10013 {
10014 *cost = COSTS_N_INSNS (1);
10015 if (speed_p)
10016 *cost += extra_cost->mult[1].extend_add;
10017 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10018 ZERO_EXTEND, 0, speed_p)
10019 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10020 ZERO_EXTEND, 0, speed_p)
10021 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10022 return true;
10023 }
10024
10025 *cost = COSTS_N_INSNS (2);
10026
10027 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10028 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10029 {
10030 if (speed_p)
10031 *cost += (extra_cost->alu.arith
10032 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10033 ? extra_cost->alu.arith
10034 : extra_cost->alu.arith_shift));
10035
10036 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10037 speed_p)
10038 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10039 return true;
10040 }
10041
10042 if (speed_p)
10043 *cost += 2 * extra_cost->alu.arith;
10044 return false;
10045 }
10046
10047 /* Vector mode? */
10048 *cost = LIBCALL_COST (2);
10049 return false;
10050 case IOR:
10051 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10052 {
10053 *cost = COSTS_N_INSNS (1);
10054 if (speed_p)
10055 *cost += extra_cost->alu.rev;
10056
10057 return true;
10058 }
10059 /* Fall through. */
10060 case AND: case XOR:
10061 if (mode == SImode)
10062 {
10063 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10064 rtx op0 = XEXP (x, 0);
10065 rtx shift_op, shift_reg;
10066
10067 *cost = COSTS_N_INSNS (1);
10068
10069 if (subcode == NOT
10070 && (code == AND
10071 || (code == IOR && TARGET_THUMB2)))
10072 op0 = XEXP (op0, 0);
10073
10074 shift_reg = NULL;
10075 shift_op = shifter_op_p (op0, &shift_reg);
10076 if (shift_op != NULL)
10077 {
10078 if (shift_reg)
10079 {
10080 if (speed_p)
10081 *cost += extra_cost->alu.log_shift_reg;
10082 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10083 }
10084 else if (speed_p)
10085 *cost += extra_cost->alu.log_shift;
10086
10087 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10088 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10089 return true;
10090 }
10091
10092 if (CONST_INT_P (XEXP (x, 1)))
10093 {
10094 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10095 INTVAL (XEXP (x, 1)), NULL_RTX,
10096 NULL_RTX, 1, 0);
10097
10098 *cost = COSTS_N_INSNS (insns);
10099 if (speed_p)
10100 *cost += insns * extra_cost->alu.logical;
10101 *cost += rtx_cost (op0, code, 0, speed_p);
10102 return true;
10103 }
10104
10105 if (speed_p)
10106 *cost += extra_cost->alu.logical;
10107 *cost += (rtx_cost (op0, code, 0, speed_p)
10108 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10109 return true;
10110 }
10111
10112 if (mode == DImode)
10113 {
10114 rtx op0 = XEXP (x, 0);
10115 enum rtx_code subcode = GET_CODE (op0);
10116
10117 *cost = COSTS_N_INSNS (2);
10118
10119 if (subcode == NOT
10120 && (code == AND
10121 || (code == IOR && TARGET_THUMB2)))
10122 op0 = XEXP (op0, 0);
10123
10124 if (GET_CODE (op0) == ZERO_EXTEND)
10125 {
10126 if (speed_p)
10127 *cost += 2 * extra_cost->alu.logical;
10128
10129 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10130 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10131 return true;
10132 }
10133 else if (GET_CODE (op0) == SIGN_EXTEND)
10134 {
10135 if (speed_p)
10136 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10137
10138 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10139 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10140 return true;
10141 }
10142
10143 if (speed_p)
10144 *cost += 2 * extra_cost->alu.logical;
10145
10146 return true;
10147 }
10148 /* Vector mode? */
10149
10150 *cost = LIBCALL_COST (2);
10151 return false;
10152
10153 case MULT:
10154 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10155 && (mode == SFmode || !TARGET_VFP_SINGLE))
10156 {
10157 rtx op0 = XEXP (x, 0);
10158
10159 *cost = COSTS_N_INSNS (1);
10160
10161 if (GET_CODE (op0) == NEG)
10162 op0 = XEXP (op0, 0);
10163
10164 if (speed_p)
10165 *cost += extra_cost->fp[mode != SFmode].mult;
10166
10167 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10168 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10169 return true;
10170 }
10171 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10172 {
10173 *cost = LIBCALL_COST (2);
10174 return false;
10175 }
10176
10177 if (mode == SImode)
10178 {
10179 *cost = COSTS_N_INSNS (1);
10180 if (TARGET_DSP_MULTIPLY
10181 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10182 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10183 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10184 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10185 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10186 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10187 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10188 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10189 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10190 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10191 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10192 && (INTVAL (XEXP (XEXP (x, 1), 1))
10193 == 16))))))
10194 {
10195 /* SMUL[TB][TB]. */
10196 if (speed_p)
10197 *cost += extra_cost->mult[0].extend;
10198 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10199 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10200 return true;
10201 }
10202 if (speed_p)
10203 *cost += extra_cost->mult[0].simple;
10204 return false;
10205 }
10206
10207 if (mode == DImode)
10208 {
10209 if (arm_arch3m
10210 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10211 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10212 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10213 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10214 {
10215 *cost = COSTS_N_INSNS (1);
10216 if (speed_p)
10217 *cost += extra_cost->mult[1].extend;
10218 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10219 ZERO_EXTEND, 0, speed_p)
10220 + rtx_cost (XEXP (XEXP (x, 1), 0),
10221 ZERO_EXTEND, 0, speed_p));
10222 return true;
10223 }
10224
10225 *cost = LIBCALL_COST (2);
10226 return false;
10227 }
10228
10229 /* Vector mode? */
10230 *cost = LIBCALL_COST (2);
10231 return false;
10232
10233 case NEG:
10234 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10235 && (mode == SFmode || !TARGET_VFP_SINGLE))
10236 {
10237 *cost = COSTS_N_INSNS (1);
10238 if (speed_p)
10239 *cost += extra_cost->fp[mode != SFmode].neg;
10240
10241 return false;
10242 }
10243 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10244 {
10245 *cost = LIBCALL_COST (1);
10246 return false;
10247 }
10248
10249 if (mode == SImode)
10250 {
10251 if (GET_CODE (XEXP (x, 0)) == ABS)
10252 {
10253 *cost = COSTS_N_INSNS (2);
10254 /* Assume the non-flag-changing variant. */
10255 if (speed_p)
10256 *cost += (extra_cost->alu.log_shift
10257 + extra_cost->alu.arith_shift);
10258 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10259 return true;
10260 }
10261
10262 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10263 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10264 {
10265 *cost = COSTS_N_INSNS (2);
10266 /* No extra cost for MOV imm and MVN imm. */
10267 /* If the comparison op is using the flags, there's no further
10268 cost, otherwise we need to add the cost of the comparison. */
10269 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10270 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10271 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10272 {
10273 *cost += (COSTS_N_INSNS (1)
10274 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10275 speed_p)
10276 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10277 speed_p));
10278 if (speed_p)
10279 *cost += extra_cost->alu.arith;
10280 }
10281 return true;
10282 }
10283 *cost = COSTS_N_INSNS (1);
10284 if (speed_p)
10285 *cost += extra_cost->alu.arith;
10286 return false;
10287 }
10288
10289 if (GET_MODE_CLASS (mode) == MODE_INT
10290 && GET_MODE_SIZE (mode) < 4)
10291 {
10292 /* Slightly disparage, as we might need an extend operation. */
10293 *cost = 1 + COSTS_N_INSNS (1);
10294 if (speed_p)
10295 *cost += extra_cost->alu.arith;
10296 return false;
10297 }
10298
10299 if (mode == DImode)
10300 {
10301 *cost = COSTS_N_INSNS (2);
10302 if (speed_p)
10303 *cost += 2 * extra_cost->alu.arith;
10304 return false;
10305 }
10306
10307 /* Vector mode? */
10308 *cost = LIBCALL_COST (1);
10309 return false;
10310
10311 case NOT:
10312 if (mode == SImode)
10313 {
10314 rtx shift_op;
10315 rtx shift_reg = NULL;
10316
10317 *cost = COSTS_N_INSNS (1);
10318 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10319
10320 if (shift_op)
10321 {
10322 if (shift_reg != NULL)
10323 {
10324 if (speed_p)
10325 *cost += extra_cost->alu.log_shift_reg;
10326 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10327 }
10328 else if (speed_p)
10329 *cost += extra_cost->alu.log_shift;
10330 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10331 return true;
10332 }
10333
10334 if (speed_p)
10335 *cost += extra_cost->alu.logical;
10336 return false;
10337 }
10338 if (mode == DImode)
10339 {
10340 *cost = COSTS_N_INSNS (2);
10341 return false;
10342 }
10343
10344 /* Vector mode? */
10345
10346 *cost += LIBCALL_COST (1);
10347 return false;
10348
10349 case IF_THEN_ELSE:
10350 {
10351 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10352 {
10353 *cost = COSTS_N_INSNS (4);
10354 return true;
10355 }
10356 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10357 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10358
10359 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10360 /* Assume that if one arm of the if_then_else is a register,
10361 that it will be tied with the result and eliminate the
10362 conditional insn. */
10363 if (REG_P (XEXP (x, 1)))
10364 *cost += op2cost;
10365 else if (REG_P (XEXP (x, 2)))
10366 *cost += op1cost;
10367 else
10368 {
10369 if (speed_p)
10370 {
10371 if (extra_cost->alu.non_exec_costs_exec)
10372 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10373 else
10374 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10375 }
10376 else
10377 *cost += op1cost + op2cost;
10378 }
10379 }
10380 return true;
10381
10382 case COMPARE:
10383 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10384 *cost = 0;
10385 else
10386 {
10387 machine_mode op0mode;
10388 /* We'll mostly assume that the cost of a compare is the cost of the
10389 LHS. However, there are some notable exceptions. */
10390
10391 /* Floating point compares are never done as side-effects. */
10392 op0mode = GET_MODE (XEXP (x, 0));
10393 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10394 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10395 {
10396 *cost = COSTS_N_INSNS (1);
10397 if (speed_p)
10398 *cost += extra_cost->fp[op0mode != SFmode].compare;
10399
10400 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10401 {
10402 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10403 return true;
10404 }
10405
10406 return false;
10407 }
10408 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10409 {
10410 *cost = LIBCALL_COST (2);
10411 return false;
10412 }
10413
10414 /* DImode compares normally take two insns. */
10415 if (op0mode == DImode)
10416 {
10417 *cost = COSTS_N_INSNS (2);
10418 if (speed_p)
10419 *cost += 2 * extra_cost->alu.arith;
10420 return false;
10421 }
10422
10423 if (op0mode == SImode)
10424 {
10425 rtx shift_op;
10426 rtx shift_reg;
10427
10428 if (XEXP (x, 1) == const0_rtx
10429 && !(REG_P (XEXP (x, 0))
10430 || (GET_CODE (XEXP (x, 0)) == SUBREG
10431 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10432 {
10433 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10434
10435 /* Multiply operations that set the flags are often
10436 significantly more expensive. */
10437 if (speed_p
10438 && GET_CODE (XEXP (x, 0)) == MULT
10439 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10440 *cost += extra_cost->mult[0].flag_setting;
10441
10442 if (speed_p
10443 && GET_CODE (XEXP (x, 0)) == PLUS
10444 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10445 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10446 0), 1), mode))
10447 *cost += extra_cost->mult[0].flag_setting;
10448 return true;
10449 }
10450
10451 shift_reg = NULL;
10452 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10453 if (shift_op != NULL)
10454 {
10455 *cost = COSTS_N_INSNS (1);
10456 if (shift_reg != NULL)
10457 {
10458 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10459 if (speed_p)
10460 *cost += extra_cost->alu.arith_shift_reg;
10461 }
10462 else if (speed_p)
10463 *cost += extra_cost->alu.arith_shift;
10464 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10465 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10466 return true;
10467 }
10468
10469 *cost = COSTS_N_INSNS (1);
10470 if (speed_p)
10471 *cost += extra_cost->alu.arith;
10472 if (CONST_INT_P (XEXP (x, 1))
10473 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10474 {
10475 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10476 return true;
10477 }
10478 return false;
10479 }
10480
10481 /* Vector mode? */
10482
10483 *cost = LIBCALL_COST (2);
10484 return false;
10485 }
10486 return true;
10487
10488 case EQ:
10489 case NE:
10490 case LT:
10491 case LE:
10492 case GT:
10493 case GE:
10494 case LTU:
10495 case LEU:
10496 case GEU:
10497 case GTU:
10498 case ORDERED:
10499 case UNORDERED:
10500 case UNEQ:
10501 case UNLE:
10502 case UNLT:
10503 case UNGE:
10504 case UNGT:
10505 case LTGT:
10506 if (outer_code == SET)
10507 {
10508 /* Is it a store-flag operation? */
10509 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10510 && XEXP (x, 1) == const0_rtx)
10511 {
10512 /* Thumb also needs an IT insn. */
10513 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10514 return true;
10515 }
10516 if (XEXP (x, 1) == const0_rtx)
10517 {
10518 switch (code)
10519 {
10520 case LT:
10521 /* LSR Rd, Rn, #31. */
10522 *cost = COSTS_N_INSNS (1);
10523 if (speed_p)
10524 *cost += extra_cost->alu.shift;
10525 break;
10526
10527 case EQ:
10528 /* RSBS T1, Rn, #0
10529 ADC Rd, Rn, T1. */
10530
10531 case NE:
10532 /* SUBS T1, Rn, #1
10533 SBC Rd, Rn, T1. */
10534 *cost = COSTS_N_INSNS (2);
10535 break;
10536
10537 case LE:
10538 /* RSBS T1, Rn, Rn, LSR #31
10539 ADC Rd, Rn, T1. */
10540 *cost = COSTS_N_INSNS (2);
10541 if (speed_p)
10542 *cost += extra_cost->alu.arith_shift;
10543 break;
10544
10545 case GT:
10546 /* RSB Rd, Rn, Rn, ASR #1
10547 LSR Rd, Rd, #31. */
10548 *cost = COSTS_N_INSNS (2);
10549 if (speed_p)
10550 *cost += (extra_cost->alu.arith_shift
10551 + extra_cost->alu.shift);
10552 break;
10553
10554 case GE:
10555 /* ASR Rd, Rn, #31
10556 ADD Rd, Rn, #1. */
10557 *cost = COSTS_N_INSNS (2);
10558 if (speed_p)
10559 *cost += extra_cost->alu.shift;
10560 break;
10561
10562 default:
10563 /* Remaining cases are either meaningless or would take
10564 three insns anyway. */
10565 *cost = COSTS_N_INSNS (3);
10566 break;
10567 }
10568 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10569 return true;
10570 }
10571 else
10572 {
10573 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10574 if (CONST_INT_P (XEXP (x, 1))
10575 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10576 {
10577 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10578 return true;
10579 }
10580
10581 return false;
10582 }
10583 }
10584 /* Not directly inside a set. If it involves the condition code
10585 register it must be the condition for a branch, cond_exec or
10586 I_T_E operation. Since the comparison is performed elsewhere
10587 this is just the control part which has no additional
10588 cost. */
10589 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10590 && XEXP (x, 1) == const0_rtx)
10591 {
10592 *cost = 0;
10593 return true;
10594 }
10595 return false;
10596
10597 case ABS:
10598 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10599 && (mode == SFmode || !TARGET_VFP_SINGLE))
10600 {
10601 *cost = COSTS_N_INSNS (1);
10602 if (speed_p)
10603 *cost += extra_cost->fp[mode != SFmode].neg;
10604
10605 return false;
10606 }
10607 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10608 {
10609 *cost = LIBCALL_COST (1);
10610 return false;
10611 }
10612
10613 if (mode == SImode)
10614 {
10615 *cost = COSTS_N_INSNS (1);
10616 if (speed_p)
10617 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10618 return false;
10619 }
10620 /* Vector mode? */
10621 *cost = LIBCALL_COST (1);
10622 return false;
10623
10624 case SIGN_EXTEND:
10625 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10626 && MEM_P (XEXP (x, 0)))
10627 {
10628 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10629
10630 if (mode == DImode)
10631 *cost += COSTS_N_INSNS (1);
10632
10633 if (!speed_p)
10634 return true;
10635
10636 if (GET_MODE (XEXP (x, 0)) == SImode)
10637 *cost += extra_cost->ldst.load;
10638 else
10639 *cost += extra_cost->ldst.load_sign_extend;
10640
10641 if (mode == DImode)
10642 *cost += extra_cost->alu.shift;
10643
10644 return true;
10645 }
10646
10647 /* Widening from less than 32-bits requires an extend operation. */
10648 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10649 {
10650 /* We have SXTB/SXTH. */
10651 *cost = COSTS_N_INSNS (1);
10652 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10653 if (speed_p)
10654 *cost += extra_cost->alu.extend;
10655 }
10656 else if (GET_MODE (XEXP (x, 0)) != SImode)
10657 {
10658 /* Needs two shifts. */
10659 *cost = COSTS_N_INSNS (2);
10660 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10661 if (speed_p)
10662 *cost += 2 * extra_cost->alu.shift;
10663 }
10664
10665 /* Widening beyond 32-bits requires one more insn. */
10666 if (mode == DImode)
10667 {
10668 *cost += COSTS_N_INSNS (1);
10669 if (speed_p)
10670 *cost += extra_cost->alu.shift;
10671 }
10672
10673 return true;
10674
10675 case ZERO_EXTEND:
10676 if ((arm_arch4
10677 || GET_MODE (XEXP (x, 0)) == SImode
10678 || GET_MODE (XEXP (x, 0)) == QImode)
10679 && MEM_P (XEXP (x, 0)))
10680 {
10681 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10682
10683 if (mode == DImode)
10684 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10685
10686 return true;
10687 }
10688
10689 /* Widening from less than 32-bits requires an extend operation. */
10690 if (GET_MODE (XEXP (x, 0)) == QImode)
10691 {
10692 /* UXTB can be a shorter instruction in Thumb2, but it might
10693 be slower than the AND Rd, Rn, #255 alternative. When
10694 optimizing for speed it should never be slower to use
10695 AND, and we don't really model 16-bit vs 32-bit insns
10696 here. */
10697 *cost = COSTS_N_INSNS (1);
10698 if (speed_p)
10699 *cost += extra_cost->alu.logical;
10700 }
10701 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10702 {
10703 /* We have UXTB/UXTH. */
10704 *cost = COSTS_N_INSNS (1);
10705 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10706 if (speed_p)
10707 *cost += extra_cost->alu.extend;
10708 }
10709 else if (GET_MODE (XEXP (x, 0)) != SImode)
10710 {
10711 /* Needs two shifts. It's marginally preferable to use
10712 shifts rather than two BIC instructions as the second
10713 shift may merge with a subsequent insn as a shifter
10714 op. */
10715 *cost = COSTS_N_INSNS (2);
10716 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10717 if (speed_p)
10718 *cost += 2 * extra_cost->alu.shift;
10719 }
10720 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10721 *cost = COSTS_N_INSNS (1);
10722
10723 /* Widening beyond 32-bits requires one more insn. */
10724 if (mode == DImode)
10725 {
10726 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10727 }
10728
10729 return true;
10730
10731 case CONST_INT:
10732 *cost = 0;
10733 /* CONST_INT has no mode, so we cannot tell for sure how many
10734 insns are really going to be needed. The best we can do is
10735 look at the value passed. If it fits in SImode, then assume
10736 that's the mode it will be used for. Otherwise assume it
10737 will be used in DImode. */
10738 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10739 mode = SImode;
10740 else
10741 mode = DImode;
10742
10743 /* Avoid blowing up in arm_gen_constant (). */
10744 if (!(outer_code == PLUS
10745 || outer_code == AND
10746 || outer_code == IOR
10747 || outer_code == XOR
10748 || outer_code == MINUS))
10749 outer_code = SET;
10750
10751 const_int_cost:
10752 if (mode == SImode)
10753 {
10754 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10755 INTVAL (x), NULL, NULL,
10756 0, 0));
10757 /* Extra costs? */
10758 }
10759 else
10760 {
10761 *cost += COSTS_N_INSNS (arm_gen_constant
10762 (outer_code, SImode, NULL,
10763 trunc_int_for_mode (INTVAL (x), SImode),
10764 NULL, NULL, 0, 0)
10765 + arm_gen_constant (outer_code, SImode, NULL,
10766 INTVAL (x) >> 32, NULL,
10767 NULL, 0, 0));
10768 /* Extra costs? */
10769 }
10770
10771 return true;
10772
10773 case CONST:
10774 case LABEL_REF:
10775 case SYMBOL_REF:
10776 if (speed_p)
10777 {
10778 if (arm_arch_thumb2 && !flag_pic)
10779 *cost = COSTS_N_INSNS (2);
10780 else
10781 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10782 }
10783 else
10784 *cost = COSTS_N_INSNS (2);
10785
10786 if (flag_pic)
10787 {
10788 *cost += COSTS_N_INSNS (1);
10789 if (speed_p)
10790 *cost += extra_cost->alu.arith;
10791 }
10792
10793 return true;
10794
10795 case CONST_FIXED:
10796 *cost = COSTS_N_INSNS (4);
10797 /* Fixme. */
10798 return true;
10799
10800 case CONST_DOUBLE:
10801 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10802 && (mode == SFmode || !TARGET_VFP_SINGLE))
10803 {
10804 if (vfp3_const_double_rtx (x))
10805 {
10806 *cost = COSTS_N_INSNS (1);
10807 if (speed_p)
10808 *cost += extra_cost->fp[mode == DFmode].fpconst;
10809 return true;
10810 }
10811
10812 if (speed_p)
10813 {
10814 *cost = COSTS_N_INSNS (1);
10815 if (mode == DFmode)
10816 *cost += extra_cost->ldst.loadd;
10817 else
10818 *cost += extra_cost->ldst.loadf;
10819 }
10820 else
10821 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10822
10823 return true;
10824 }
10825 *cost = COSTS_N_INSNS (4);
10826 return true;
10827
10828 case CONST_VECTOR:
10829 /* Fixme. */
10830 if (TARGET_NEON
10831 && TARGET_HARD_FLOAT
10832 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10833 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10834 *cost = COSTS_N_INSNS (1);
10835 else
10836 *cost = COSTS_N_INSNS (4);
10837 return true;
10838
10839 case HIGH:
10840 case LO_SUM:
10841 *cost = COSTS_N_INSNS (1);
10842 /* When optimizing for size, we prefer constant pool entries to
10843 MOVW/MOVT pairs, so bump the cost of these slightly. */
10844 if (!speed_p)
10845 *cost += 1;
10846 return true;
10847
10848 case CLZ:
10849 *cost = COSTS_N_INSNS (1);
10850 if (speed_p)
10851 *cost += extra_cost->alu.clz;
10852 return false;
10853
10854 case SMIN:
10855 if (XEXP (x, 1) == const0_rtx)
10856 {
10857 *cost = COSTS_N_INSNS (1);
10858 if (speed_p)
10859 *cost += extra_cost->alu.log_shift;
10860 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10861 return true;
10862 }
10863 /* Fall through. */
10864 case SMAX:
10865 case UMIN:
10866 case UMAX:
10867 *cost = COSTS_N_INSNS (2);
10868 return false;
10869
10870 case TRUNCATE:
10871 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10872 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10873 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10874 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10875 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10876 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10877 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10878 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10879 == ZERO_EXTEND))))
10880 {
10881 *cost = COSTS_N_INSNS (1);
10882 if (speed_p)
10883 *cost += extra_cost->mult[1].extend;
10884 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10885 speed_p)
10886 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10887 0, speed_p));
10888 return true;
10889 }
10890 *cost = LIBCALL_COST (1);
10891 return false;
10892
10893 case UNSPEC:
10894 return arm_unspec_cost (x, outer_code, speed_p, cost);
10895
10896 case PC:
10897 /* Reading the PC is like reading any other register. Writing it
10898 is more expensive, but we take that into account elsewhere. */
10899 *cost = 0;
10900 return true;
10901
10902 case ZERO_EXTRACT:
10903 /* TODO: Simple zero_extract of bottom bits using AND. */
10904 /* Fall through. */
10905 case SIGN_EXTRACT:
10906 if (arm_arch6
10907 && mode == SImode
10908 && CONST_INT_P (XEXP (x, 1))
10909 && CONST_INT_P (XEXP (x, 2)))
10910 {
10911 *cost = COSTS_N_INSNS (1);
10912 if (speed_p)
10913 *cost += extra_cost->alu.bfx;
10914 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10915 return true;
10916 }
10917 /* Without UBFX/SBFX, need to resort to shift operations. */
10918 *cost = COSTS_N_INSNS (2);
10919 if (speed_p)
10920 *cost += 2 * extra_cost->alu.shift;
10921 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10922 return true;
10923
10924 case FLOAT_EXTEND:
10925 if (TARGET_HARD_FLOAT)
10926 {
10927 *cost = COSTS_N_INSNS (1);
10928 if (speed_p)
10929 *cost += extra_cost->fp[mode == DFmode].widen;
10930 if (!TARGET_FPU_ARMV8
10931 && GET_MODE (XEXP (x, 0)) == HFmode)
10932 {
10933 /* Pre v8, widening HF->DF is a two-step process, first
10934 widening to SFmode. */
10935 *cost += COSTS_N_INSNS (1);
10936 if (speed_p)
10937 *cost += extra_cost->fp[0].widen;
10938 }
10939 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10940 return true;
10941 }
10942
10943 *cost = LIBCALL_COST (1);
10944 return false;
10945
10946 case FLOAT_TRUNCATE:
10947 if (TARGET_HARD_FLOAT)
10948 {
10949 *cost = COSTS_N_INSNS (1);
10950 if (speed_p)
10951 *cost += extra_cost->fp[mode == DFmode].narrow;
10952 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10953 return true;
10954 /* Vector modes? */
10955 }
10956 *cost = LIBCALL_COST (1);
10957 return false;
10958
10959 case FMA:
10960 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10961 {
10962 rtx op0 = XEXP (x, 0);
10963 rtx op1 = XEXP (x, 1);
10964 rtx op2 = XEXP (x, 2);
10965
10966 *cost = COSTS_N_INSNS (1);
10967
10968 /* vfms or vfnma. */
10969 if (GET_CODE (op0) == NEG)
10970 op0 = XEXP (op0, 0);
10971
10972 /* vfnms or vfnma. */
10973 if (GET_CODE (op2) == NEG)
10974 op2 = XEXP (op2, 0);
10975
10976 *cost += rtx_cost (op0, FMA, 0, speed_p);
10977 *cost += rtx_cost (op1, FMA, 1, speed_p);
10978 *cost += rtx_cost (op2, FMA, 2, speed_p);
10979
10980 if (speed_p)
10981 *cost += extra_cost->fp[mode ==DFmode].fma;
10982
10983 return true;
10984 }
10985
10986 *cost = LIBCALL_COST (3);
10987 return false;
10988
10989 case FIX:
10990 case UNSIGNED_FIX:
10991 if (TARGET_HARD_FLOAT)
10992 {
10993 if (GET_MODE_CLASS (mode) == MODE_INT)
10994 {
10995 *cost = COSTS_N_INSNS (1);
10996 if (speed_p)
10997 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10998 /* Strip of the 'cost' of rounding towards zero. */
10999 if (GET_CODE (XEXP (x, 0)) == FIX)
11000 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
11001 else
11002 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11003 /* ??? Increase the cost to deal with transferring from
11004 FP -> CORE registers? */
11005 return true;
11006 }
11007 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11008 && TARGET_FPU_ARMV8)
11009 {
11010 *cost = COSTS_N_INSNS (1);
11011 if (speed_p)
11012 *cost += extra_cost->fp[mode == DFmode].roundint;
11013 return false;
11014 }
11015 /* Vector costs? */
11016 }
11017 *cost = LIBCALL_COST (1);
11018 return false;
11019
11020 case FLOAT:
11021 case UNSIGNED_FLOAT:
11022 if (TARGET_HARD_FLOAT)
11023 {
11024 /* ??? Increase the cost to deal with transferring from CORE
11025 -> FP registers? */
11026 *cost = COSTS_N_INSNS (1);
11027 if (speed_p)
11028 *cost += extra_cost->fp[mode == DFmode].fromint;
11029 return false;
11030 }
11031 *cost = LIBCALL_COST (1);
11032 return false;
11033
11034 case CALL:
11035 *cost = COSTS_N_INSNS (1);
11036 return true;
11037
11038 case ASM_OPERANDS:
11039 {
11040 /* Just a guess. Guess number of instructions in the asm
11041 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11042 though (see PR60663). */
11043 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11044 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11045
11046 *cost = COSTS_N_INSNS (asm_length + num_operands);
11047 return true;
11048 }
11049 default:
11050 if (mode != VOIDmode)
11051 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11052 else
11053 *cost = COSTS_N_INSNS (4); /* Who knows? */
11054 return false;
11055 }
11056 }
11057
11058 #undef HANDLE_NARROW_SHIFT_ARITH
11059
11060 /* RTX costs when optimizing for size. */
11061 static bool
11062 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11063 int *total, bool speed)
11064 {
11065 bool result;
11066
11067 if (TARGET_OLD_RTX_COSTS
11068 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11069 {
11070 /* Old way. (Deprecated.) */
11071 if (!speed)
11072 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11073 (enum rtx_code) outer_code, total);
11074 else
11075 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11076 (enum rtx_code) outer_code, total,
11077 speed);
11078 }
11079 else
11080 {
11081 /* New way. */
11082 if (current_tune->insn_extra_cost)
11083 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11084 (enum rtx_code) outer_code,
11085 current_tune->insn_extra_cost,
11086 total, speed);
11087 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11088 && current_tune->insn_extra_cost != NULL */
11089 else
11090 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11091 (enum rtx_code) outer_code,
11092 &generic_extra_costs, total, speed);
11093 }
11094
11095 if (dump_file && (dump_flags & TDF_DETAILS))
11096 {
11097 print_rtl_single (dump_file, x);
11098 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11099 *total, result ? "final" : "partial");
11100 }
11101 return result;
11102 }
11103
11104 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11105 supported on any "slowmul" cores, so it can be ignored. */
11106
11107 static bool
11108 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11109 int *total, bool speed)
11110 {
11111 machine_mode mode = GET_MODE (x);
11112
11113 if (TARGET_THUMB)
11114 {
11115 *total = thumb1_rtx_costs (x, code, outer_code);
11116 return true;
11117 }
11118
11119 switch (code)
11120 {
11121 case MULT:
11122 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11123 || mode == DImode)
11124 {
11125 *total = COSTS_N_INSNS (20);
11126 return false;
11127 }
11128
11129 if (CONST_INT_P (XEXP (x, 1)))
11130 {
11131 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11132 & (unsigned HOST_WIDE_INT) 0xffffffff);
11133 int cost, const_ok = const_ok_for_arm (i);
11134 int j, booth_unit_size;
11135
11136 /* Tune as appropriate. */
11137 cost = const_ok ? 4 : 8;
11138 booth_unit_size = 2;
11139 for (j = 0; i && j < 32; j += booth_unit_size)
11140 {
11141 i >>= booth_unit_size;
11142 cost++;
11143 }
11144
11145 *total = COSTS_N_INSNS (cost);
11146 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11147 return true;
11148 }
11149
11150 *total = COSTS_N_INSNS (20);
11151 return false;
11152
11153 default:
11154 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11155 }
11156 }
11157
11158
11159 /* RTX cost for cores with a fast multiply unit (M variants). */
11160
11161 static bool
11162 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11163 int *total, bool speed)
11164 {
11165 machine_mode mode = GET_MODE (x);
11166
11167 if (TARGET_THUMB1)
11168 {
11169 *total = thumb1_rtx_costs (x, code, outer_code);
11170 return true;
11171 }
11172
11173 /* ??? should thumb2 use different costs? */
11174 switch (code)
11175 {
11176 case MULT:
11177 /* There is no point basing this on the tuning, since it is always the
11178 fast variant if it exists at all. */
11179 if (mode == DImode
11180 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11181 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11182 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11183 {
11184 *total = COSTS_N_INSNS(2);
11185 return false;
11186 }
11187
11188
11189 if (mode == DImode)
11190 {
11191 *total = COSTS_N_INSNS (5);
11192 return false;
11193 }
11194
11195 if (CONST_INT_P (XEXP (x, 1)))
11196 {
11197 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11198 & (unsigned HOST_WIDE_INT) 0xffffffff);
11199 int cost, const_ok = const_ok_for_arm (i);
11200 int j, booth_unit_size;
11201
11202 /* Tune as appropriate. */
11203 cost = const_ok ? 4 : 8;
11204 booth_unit_size = 8;
11205 for (j = 0; i && j < 32; j += booth_unit_size)
11206 {
11207 i >>= booth_unit_size;
11208 cost++;
11209 }
11210
11211 *total = COSTS_N_INSNS(cost);
11212 return false;
11213 }
11214
11215 if (mode == SImode)
11216 {
11217 *total = COSTS_N_INSNS (4);
11218 return false;
11219 }
11220
11221 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11222 {
11223 if (TARGET_HARD_FLOAT
11224 && (mode == SFmode
11225 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11226 {
11227 *total = COSTS_N_INSNS (1);
11228 return false;
11229 }
11230 }
11231
11232 /* Requires a lib call */
11233 *total = COSTS_N_INSNS (20);
11234 return false;
11235
11236 default:
11237 return arm_rtx_costs_1 (x, outer_code, total, speed);
11238 }
11239 }
11240
11241
11242 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11243 so it can be ignored. */
11244
11245 static bool
11246 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11247 int *total, bool speed)
11248 {
11249 machine_mode mode = GET_MODE (x);
11250
11251 if (TARGET_THUMB)
11252 {
11253 *total = thumb1_rtx_costs (x, code, outer_code);
11254 return true;
11255 }
11256
11257 switch (code)
11258 {
11259 case COMPARE:
11260 if (GET_CODE (XEXP (x, 0)) != MULT)
11261 return arm_rtx_costs_1 (x, outer_code, total, speed);
11262
11263 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11264 will stall until the multiplication is complete. */
11265 *total = COSTS_N_INSNS (3);
11266 return false;
11267
11268 case MULT:
11269 /* There is no point basing this on the tuning, since it is always the
11270 fast variant if it exists at all. */
11271 if (mode == DImode
11272 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11273 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11274 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11275 {
11276 *total = COSTS_N_INSNS (2);
11277 return false;
11278 }
11279
11280
11281 if (mode == DImode)
11282 {
11283 *total = COSTS_N_INSNS (5);
11284 return false;
11285 }
11286
11287 if (CONST_INT_P (XEXP (x, 1)))
11288 {
11289 /* If operand 1 is a constant we can more accurately
11290 calculate the cost of the multiply. The multiplier can
11291 retire 15 bits on the first cycle and a further 12 on the
11292 second. We do, of course, have to load the constant into
11293 a register first. */
11294 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11295 /* There's a general overhead of one cycle. */
11296 int cost = 1;
11297 unsigned HOST_WIDE_INT masked_const;
11298
11299 if (i & 0x80000000)
11300 i = ~i;
11301
11302 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11303
11304 masked_const = i & 0xffff8000;
11305 if (masked_const != 0)
11306 {
11307 cost++;
11308 masked_const = i & 0xf8000000;
11309 if (masked_const != 0)
11310 cost++;
11311 }
11312 *total = COSTS_N_INSNS (cost);
11313 return false;
11314 }
11315
11316 if (mode == SImode)
11317 {
11318 *total = COSTS_N_INSNS (3);
11319 return false;
11320 }
11321
11322 /* Requires a lib call */
11323 *total = COSTS_N_INSNS (20);
11324 return false;
11325
11326 default:
11327 return arm_rtx_costs_1 (x, outer_code, total, speed);
11328 }
11329 }
11330
11331
11332 /* RTX costs for 9e (and later) cores. */
11333
11334 static bool
11335 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11336 int *total, bool speed)
11337 {
11338 machine_mode mode = GET_MODE (x);
11339
11340 if (TARGET_THUMB1)
11341 {
11342 switch (code)
11343 {
11344 case MULT:
11345 /* Small multiply: 32 cycles for an integer multiply inst. */
11346 if (arm_arch6m && arm_m_profile_small_mul)
11347 *total = COSTS_N_INSNS (32);
11348 else
11349 *total = COSTS_N_INSNS (3);
11350 return true;
11351
11352 default:
11353 *total = thumb1_rtx_costs (x, code, outer_code);
11354 return true;
11355 }
11356 }
11357
11358 switch (code)
11359 {
11360 case MULT:
11361 /* There is no point basing this on the tuning, since it is always the
11362 fast variant if it exists at all. */
11363 if (mode == DImode
11364 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11365 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11366 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11367 {
11368 *total = COSTS_N_INSNS (2);
11369 return false;
11370 }
11371
11372
11373 if (mode == DImode)
11374 {
11375 *total = COSTS_N_INSNS (5);
11376 return false;
11377 }
11378
11379 if (mode == SImode)
11380 {
11381 *total = COSTS_N_INSNS (2);
11382 return false;
11383 }
11384
11385 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11386 {
11387 if (TARGET_HARD_FLOAT
11388 && (mode == SFmode
11389 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11390 {
11391 *total = COSTS_N_INSNS (1);
11392 return false;
11393 }
11394 }
11395
11396 *total = COSTS_N_INSNS (20);
11397 return false;
11398
11399 default:
11400 return arm_rtx_costs_1 (x, outer_code, total, speed);
11401 }
11402 }
11403 /* All address computations that can be done are free, but rtx cost returns
11404 the same for practically all of them. So we weight the different types
11405 of address here in the order (most pref first):
11406 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11407 static inline int
11408 arm_arm_address_cost (rtx x)
11409 {
11410 enum rtx_code c = GET_CODE (x);
11411
11412 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11413 return 0;
11414 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11415 return 10;
11416
11417 if (c == PLUS)
11418 {
11419 if (CONST_INT_P (XEXP (x, 1)))
11420 return 2;
11421
11422 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11423 return 3;
11424
11425 return 4;
11426 }
11427
11428 return 6;
11429 }
11430
11431 static inline int
11432 arm_thumb_address_cost (rtx x)
11433 {
11434 enum rtx_code c = GET_CODE (x);
11435
11436 if (c == REG)
11437 return 1;
11438 if (c == PLUS
11439 && REG_P (XEXP (x, 0))
11440 && CONST_INT_P (XEXP (x, 1)))
11441 return 1;
11442
11443 return 2;
11444 }
11445
11446 static int
11447 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11448 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11449 {
11450 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11451 }
11452
11453 /* Adjust cost hook for XScale. */
11454 static bool
11455 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11456 {
11457 /* Some true dependencies can have a higher cost depending
11458 on precisely how certain input operands are used. */
11459 if (REG_NOTE_KIND(link) == 0
11460 && recog_memoized (insn) >= 0
11461 && recog_memoized (dep) >= 0)
11462 {
11463 int shift_opnum = get_attr_shift (insn);
11464 enum attr_type attr_type = get_attr_type (dep);
11465
11466 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11467 operand for INSN. If we have a shifted input operand and the
11468 instruction we depend on is another ALU instruction, then we may
11469 have to account for an additional stall. */
11470 if (shift_opnum != 0
11471 && (attr_type == TYPE_ALU_SHIFT_IMM
11472 || attr_type == TYPE_ALUS_SHIFT_IMM
11473 || attr_type == TYPE_LOGIC_SHIFT_IMM
11474 || attr_type == TYPE_LOGICS_SHIFT_IMM
11475 || attr_type == TYPE_ALU_SHIFT_REG
11476 || attr_type == TYPE_ALUS_SHIFT_REG
11477 || attr_type == TYPE_LOGIC_SHIFT_REG
11478 || attr_type == TYPE_LOGICS_SHIFT_REG
11479 || attr_type == TYPE_MOV_SHIFT
11480 || attr_type == TYPE_MVN_SHIFT
11481 || attr_type == TYPE_MOV_SHIFT_REG
11482 || attr_type == TYPE_MVN_SHIFT_REG))
11483 {
11484 rtx shifted_operand;
11485 int opno;
11486
11487 /* Get the shifted operand. */
11488 extract_insn (insn);
11489 shifted_operand = recog_data.operand[shift_opnum];
11490
11491 /* Iterate over all the operands in DEP. If we write an operand
11492 that overlaps with SHIFTED_OPERAND, then we have increase the
11493 cost of this dependency. */
11494 extract_insn (dep);
11495 preprocess_constraints (dep);
11496 for (opno = 0; opno < recog_data.n_operands; opno++)
11497 {
11498 /* We can ignore strict inputs. */
11499 if (recog_data.operand_type[opno] == OP_IN)
11500 continue;
11501
11502 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11503 shifted_operand))
11504 {
11505 *cost = 2;
11506 return false;
11507 }
11508 }
11509 }
11510 }
11511 return true;
11512 }
11513
11514 /* Adjust cost hook for Cortex A9. */
11515 static bool
11516 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11517 {
11518 switch (REG_NOTE_KIND (link))
11519 {
11520 case REG_DEP_ANTI:
11521 *cost = 0;
11522 return false;
11523
11524 case REG_DEP_TRUE:
11525 case REG_DEP_OUTPUT:
11526 if (recog_memoized (insn) >= 0
11527 && recog_memoized (dep) >= 0)
11528 {
11529 if (GET_CODE (PATTERN (insn)) == SET)
11530 {
11531 if (GET_MODE_CLASS
11532 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11533 || GET_MODE_CLASS
11534 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11535 {
11536 enum attr_type attr_type_insn = get_attr_type (insn);
11537 enum attr_type attr_type_dep = get_attr_type (dep);
11538
11539 /* By default all dependencies of the form
11540 s0 = s0 <op> s1
11541 s0 = s0 <op> s2
11542 have an extra latency of 1 cycle because
11543 of the input and output dependency in this
11544 case. However this gets modeled as an true
11545 dependency and hence all these checks. */
11546 if (REG_P (SET_DEST (PATTERN (insn)))
11547 && REG_P (SET_DEST (PATTERN (dep)))
11548 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11549 SET_DEST (PATTERN (dep))))
11550 {
11551 /* FMACS is a special case where the dependent
11552 instruction can be issued 3 cycles before
11553 the normal latency in case of an output
11554 dependency. */
11555 if ((attr_type_insn == TYPE_FMACS
11556 || attr_type_insn == TYPE_FMACD)
11557 && (attr_type_dep == TYPE_FMACS
11558 || attr_type_dep == TYPE_FMACD))
11559 {
11560 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11561 *cost = insn_default_latency (dep) - 3;
11562 else
11563 *cost = insn_default_latency (dep);
11564 return false;
11565 }
11566 else
11567 {
11568 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11569 *cost = insn_default_latency (dep) + 1;
11570 else
11571 *cost = insn_default_latency (dep);
11572 }
11573 return false;
11574 }
11575 }
11576 }
11577 }
11578 break;
11579
11580 default:
11581 gcc_unreachable ();
11582 }
11583
11584 return true;
11585 }
11586
11587 /* Adjust cost hook for FA726TE. */
11588 static bool
11589 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11590 {
11591 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11592 have penalty of 3. */
11593 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11594 && recog_memoized (insn) >= 0
11595 && recog_memoized (dep) >= 0
11596 && get_attr_conds (dep) == CONDS_SET)
11597 {
11598 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11599 if (get_attr_conds (insn) == CONDS_USE
11600 && get_attr_type (insn) != TYPE_BRANCH)
11601 {
11602 *cost = 3;
11603 return false;
11604 }
11605
11606 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11607 || get_attr_conds (insn) == CONDS_USE)
11608 {
11609 *cost = 0;
11610 return false;
11611 }
11612 }
11613
11614 return true;
11615 }
11616
11617 /* Implement TARGET_REGISTER_MOVE_COST.
11618
11619 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11620 it is typically more expensive than a single memory access. We set
11621 the cost to less than two memory accesses so that floating
11622 point to integer conversion does not go through memory. */
11623
11624 int
11625 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11626 reg_class_t from, reg_class_t to)
11627 {
11628 if (TARGET_32BIT)
11629 {
11630 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11631 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11632 return 15;
11633 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11634 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11635 return 4;
11636 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11637 return 20;
11638 else
11639 return 2;
11640 }
11641 else
11642 {
11643 if (from == HI_REGS || to == HI_REGS)
11644 return 4;
11645 else
11646 return 2;
11647 }
11648 }
11649
11650 /* Implement TARGET_MEMORY_MOVE_COST. */
11651
11652 int
11653 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11654 bool in ATTRIBUTE_UNUSED)
11655 {
11656 if (TARGET_32BIT)
11657 return 10;
11658 else
11659 {
11660 if (GET_MODE_SIZE (mode) < 4)
11661 return 8;
11662 else
11663 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11664 }
11665 }
11666
11667 /* Vectorizer cost model implementation. */
11668
11669 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11670 static int
11671 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11672 tree vectype,
11673 int misalign ATTRIBUTE_UNUSED)
11674 {
11675 unsigned elements;
11676
11677 switch (type_of_cost)
11678 {
11679 case scalar_stmt:
11680 return current_tune->vec_costs->scalar_stmt_cost;
11681
11682 case scalar_load:
11683 return current_tune->vec_costs->scalar_load_cost;
11684
11685 case scalar_store:
11686 return current_tune->vec_costs->scalar_store_cost;
11687
11688 case vector_stmt:
11689 return current_tune->vec_costs->vec_stmt_cost;
11690
11691 case vector_load:
11692 return current_tune->vec_costs->vec_align_load_cost;
11693
11694 case vector_store:
11695 return current_tune->vec_costs->vec_store_cost;
11696
11697 case vec_to_scalar:
11698 return current_tune->vec_costs->vec_to_scalar_cost;
11699
11700 case scalar_to_vec:
11701 return current_tune->vec_costs->scalar_to_vec_cost;
11702
11703 case unaligned_load:
11704 return current_tune->vec_costs->vec_unalign_load_cost;
11705
11706 case unaligned_store:
11707 return current_tune->vec_costs->vec_unalign_store_cost;
11708
11709 case cond_branch_taken:
11710 return current_tune->vec_costs->cond_taken_branch_cost;
11711
11712 case cond_branch_not_taken:
11713 return current_tune->vec_costs->cond_not_taken_branch_cost;
11714
11715 case vec_perm:
11716 case vec_promote_demote:
11717 return current_tune->vec_costs->vec_stmt_cost;
11718
11719 case vec_construct:
11720 elements = TYPE_VECTOR_SUBPARTS (vectype);
11721 return elements / 2 + 1;
11722
11723 default:
11724 gcc_unreachable ();
11725 }
11726 }
11727
11728 /* Implement targetm.vectorize.add_stmt_cost. */
11729
11730 static unsigned
11731 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11732 struct _stmt_vec_info *stmt_info, int misalign,
11733 enum vect_cost_model_location where)
11734 {
11735 unsigned *cost = (unsigned *) data;
11736 unsigned retval = 0;
11737
11738 if (flag_vect_cost_model)
11739 {
11740 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11741 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11742
11743 /* Statements in an inner loop relative to the loop being
11744 vectorized are weighted more heavily. The value here is
11745 arbitrary and could potentially be improved with analysis. */
11746 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11747 count *= 50; /* FIXME. */
11748
11749 retval = (unsigned) (count * stmt_cost);
11750 cost[where] += retval;
11751 }
11752
11753 return retval;
11754 }
11755
11756 /* Return true if and only if this insn can dual-issue only as older. */
11757 static bool
11758 cortexa7_older_only (rtx_insn *insn)
11759 {
11760 if (recog_memoized (insn) < 0)
11761 return false;
11762
11763 switch (get_attr_type (insn))
11764 {
11765 case TYPE_ALU_DSP_REG:
11766 case TYPE_ALU_SREG:
11767 case TYPE_ALUS_SREG:
11768 case TYPE_LOGIC_REG:
11769 case TYPE_LOGICS_REG:
11770 case TYPE_ADC_REG:
11771 case TYPE_ADCS_REG:
11772 case TYPE_ADR:
11773 case TYPE_BFM:
11774 case TYPE_REV:
11775 case TYPE_MVN_REG:
11776 case TYPE_SHIFT_IMM:
11777 case TYPE_SHIFT_REG:
11778 case TYPE_LOAD_BYTE:
11779 case TYPE_LOAD1:
11780 case TYPE_STORE1:
11781 case TYPE_FFARITHS:
11782 case TYPE_FADDS:
11783 case TYPE_FFARITHD:
11784 case TYPE_FADDD:
11785 case TYPE_FMOV:
11786 case TYPE_F_CVT:
11787 case TYPE_FCMPS:
11788 case TYPE_FCMPD:
11789 case TYPE_FCONSTS:
11790 case TYPE_FCONSTD:
11791 case TYPE_FMULS:
11792 case TYPE_FMACS:
11793 case TYPE_FMULD:
11794 case TYPE_FMACD:
11795 case TYPE_FDIVS:
11796 case TYPE_FDIVD:
11797 case TYPE_F_MRC:
11798 case TYPE_F_MRRC:
11799 case TYPE_F_FLAG:
11800 case TYPE_F_LOADS:
11801 case TYPE_F_STORES:
11802 return true;
11803 default:
11804 return false;
11805 }
11806 }
11807
11808 /* Return true if and only if this insn can dual-issue as younger. */
11809 static bool
11810 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11811 {
11812 if (recog_memoized (insn) < 0)
11813 {
11814 if (verbose > 5)
11815 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11816 return false;
11817 }
11818
11819 switch (get_attr_type (insn))
11820 {
11821 case TYPE_ALU_IMM:
11822 case TYPE_ALUS_IMM:
11823 case TYPE_LOGIC_IMM:
11824 case TYPE_LOGICS_IMM:
11825 case TYPE_EXTEND:
11826 case TYPE_MVN_IMM:
11827 case TYPE_MOV_IMM:
11828 case TYPE_MOV_REG:
11829 case TYPE_MOV_SHIFT:
11830 case TYPE_MOV_SHIFT_REG:
11831 case TYPE_BRANCH:
11832 case TYPE_CALL:
11833 return true;
11834 default:
11835 return false;
11836 }
11837 }
11838
11839
11840 /* Look for an instruction that can dual issue only as an older
11841 instruction, and move it in front of any instructions that can
11842 dual-issue as younger, while preserving the relative order of all
11843 other instructions in the ready list. This is a hueuristic to help
11844 dual-issue in later cycles, by postponing issue of more flexible
11845 instructions. This heuristic may affect dual issue opportunities
11846 in the current cycle. */
11847 static void
11848 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11849 int *n_readyp, int clock)
11850 {
11851 int i;
11852 int first_older_only = -1, first_younger = -1;
11853
11854 if (verbose > 5)
11855 fprintf (file,
11856 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11857 clock,
11858 *n_readyp);
11859
11860 /* Traverse the ready list from the head (the instruction to issue
11861 first), and looking for the first instruction that can issue as
11862 younger and the first instruction that can dual-issue only as
11863 older. */
11864 for (i = *n_readyp - 1; i >= 0; i--)
11865 {
11866 rtx_insn *insn = ready[i];
11867 if (cortexa7_older_only (insn))
11868 {
11869 first_older_only = i;
11870 if (verbose > 5)
11871 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11872 break;
11873 }
11874 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11875 first_younger = i;
11876 }
11877
11878 /* Nothing to reorder because either no younger insn found or insn
11879 that can dual-issue only as older appears before any insn that
11880 can dual-issue as younger. */
11881 if (first_younger == -1)
11882 {
11883 if (verbose > 5)
11884 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11885 return;
11886 }
11887
11888 /* Nothing to reorder because no older-only insn in the ready list. */
11889 if (first_older_only == -1)
11890 {
11891 if (verbose > 5)
11892 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11893 return;
11894 }
11895
11896 /* Move first_older_only insn before first_younger. */
11897 if (verbose > 5)
11898 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11899 INSN_UID(ready [first_older_only]),
11900 INSN_UID(ready [first_younger]));
11901 rtx_insn *first_older_only_insn = ready [first_older_only];
11902 for (i = first_older_only; i < first_younger; i++)
11903 {
11904 ready[i] = ready[i+1];
11905 }
11906
11907 ready[i] = first_older_only_insn;
11908 return;
11909 }
11910
11911 /* Implement TARGET_SCHED_REORDER. */
11912 static int
11913 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11914 int clock)
11915 {
11916 switch (arm_tune)
11917 {
11918 case cortexa7:
11919 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11920 break;
11921 default:
11922 /* Do nothing for other cores. */
11923 break;
11924 }
11925
11926 return arm_issue_rate ();
11927 }
11928
11929 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11930 It corrects the value of COST based on the relationship between
11931 INSN and DEP through the dependence LINK. It returns the new
11932 value. There is a per-core adjust_cost hook to adjust scheduler costs
11933 and the per-core hook can choose to completely override the generic
11934 adjust_cost function. Only put bits of code into arm_adjust_cost that
11935 are common across all cores. */
11936 static int
11937 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11938 {
11939 rtx i_pat, d_pat;
11940
11941 /* When generating Thumb-1 code, we want to place flag-setting operations
11942 close to a conditional branch which depends on them, so that we can
11943 omit the comparison. */
11944 if (TARGET_THUMB1
11945 && REG_NOTE_KIND (link) == 0
11946 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11947 && recog_memoized (dep) >= 0
11948 && get_attr_conds (dep) == CONDS_SET)
11949 return 0;
11950
11951 if (current_tune->sched_adjust_cost != NULL)
11952 {
11953 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11954 return cost;
11955 }
11956
11957 /* XXX Is this strictly true? */
11958 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11959 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11960 return 0;
11961
11962 /* Call insns don't incur a stall, even if they follow a load. */
11963 if (REG_NOTE_KIND (link) == 0
11964 && CALL_P (insn))
11965 return 1;
11966
11967 if ((i_pat = single_set (insn)) != NULL
11968 && MEM_P (SET_SRC (i_pat))
11969 && (d_pat = single_set (dep)) != NULL
11970 && MEM_P (SET_DEST (d_pat)))
11971 {
11972 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11973 /* This is a load after a store, there is no conflict if the load reads
11974 from a cached area. Assume that loads from the stack, and from the
11975 constant pool are cached, and that others will miss. This is a
11976 hack. */
11977
11978 if ((GET_CODE (src_mem) == SYMBOL_REF
11979 && CONSTANT_POOL_ADDRESS_P (src_mem))
11980 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11981 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11982 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11983 return 1;
11984 }
11985
11986 return cost;
11987 }
11988
11989 int
11990 arm_max_conditional_execute (void)
11991 {
11992 return max_insns_skipped;
11993 }
11994
11995 static int
11996 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11997 {
11998 if (TARGET_32BIT)
11999 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12000 else
12001 return (optimize > 0) ? 2 : 0;
12002 }
12003
12004 static int
12005 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12006 {
12007 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12008 }
12009
12010 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12011 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12012 sequences of non-executed instructions in IT blocks probably take the same
12013 amount of time as executed instructions (and the IT instruction itself takes
12014 space in icache). This function was experimentally determined to give good
12015 results on a popular embedded benchmark. */
12016
12017 static int
12018 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12019 {
12020 return (TARGET_32BIT && speed_p) ? 1
12021 : arm_default_branch_cost (speed_p, predictable_p);
12022 }
12023
12024 static bool fp_consts_inited = false;
12025
12026 static REAL_VALUE_TYPE value_fp0;
12027
12028 static void
12029 init_fp_table (void)
12030 {
12031 REAL_VALUE_TYPE r;
12032
12033 r = REAL_VALUE_ATOF ("0", DFmode);
12034 value_fp0 = r;
12035 fp_consts_inited = true;
12036 }
12037
12038 /* Return TRUE if rtx X is a valid immediate FP constant. */
12039 int
12040 arm_const_double_rtx (rtx x)
12041 {
12042 REAL_VALUE_TYPE r;
12043
12044 if (!fp_consts_inited)
12045 init_fp_table ();
12046
12047 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12048 if (REAL_VALUE_MINUS_ZERO (r))
12049 return 0;
12050
12051 if (REAL_VALUES_EQUAL (r, value_fp0))
12052 return 1;
12053
12054 return 0;
12055 }
12056
12057 /* VFPv3 has a fairly wide range of representable immediates, formed from
12058 "quarter-precision" floating-point values. These can be evaluated using this
12059 formula (with ^ for exponentiation):
12060
12061 -1^s * n * 2^-r
12062
12063 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12064 16 <= n <= 31 and 0 <= r <= 7.
12065
12066 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12067
12068 - A (most-significant) is the sign bit.
12069 - BCD are the exponent (encoded as r XOR 3).
12070 - EFGH are the mantissa (encoded as n - 16).
12071 */
12072
12073 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12074 fconst[sd] instruction, or -1 if X isn't suitable. */
12075 static int
12076 vfp3_const_double_index (rtx x)
12077 {
12078 REAL_VALUE_TYPE r, m;
12079 int sign, exponent;
12080 unsigned HOST_WIDE_INT mantissa, mant_hi;
12081 unsigned HOST_WIDE_INT mask;
12082 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12083 bool fail;
12084
12085 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12086 return -1;
12087
12088 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12089
12090 /* We can't represent these things, so detect them first. */
12091 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12092 return -1;
12093
12094 /* Extract sign, exponent and mantissa. */
12095 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12096 r = real_value_abs (&r);
12097 exponent = REAL_EXP (&r);
12098 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12099 highest (sign) bit, with a fixed binary point at bit point_pos.
12100 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12101 bits for the mantissa, this may fail (low bits would be lost). */
12102 real_ldexp (&m, &r, point_pos - exponent);
12103 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12104 mantissa = w.elt (0);
12105 mant_hi = w.elt (1);
12106
12107 /* If there are bits set in the low part of the mantissa, we can't
12108 represent this value. */
12109 if (mantissa != 0)
12110 return -1;
12111
12112 /* Now make it so that mantissa contains the most-significant bits, and move
12113 the point_pos to indicate that the least-significant bits have been
12114 discarded. */
12115 point_pos -= HOST_BITS_PER_WIDE_INT;
12116 mantissa = mant_hi;
12117
12118 /* We can permit four significant bits of mantissa only, plus a high bit
12119 which is always 1. */
12120 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12121 if ((mantissa & mask) != 0)
12122 return -1;
12123
12124 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12125 mantissa >>= point_pos - 5;
12126
12127 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12128 floating-point immediate zero with Neon using an integer-zero load, but
12129 that case is handled elsewhere.) */
12130 if (mantissa == 0)
12131 return -1;
12132
12133 gcc_assert (mantissa >= 16 && mantissa <= 31);
12134
12135 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12136 normalized significands are in the range [1, 2). (Our mantissa is shifted
12137 left 4 places at this point relative to normalized IEEE754 values). GCC
12138 internally uses [0.5, 1) (see real.c), so the exponent returned from
12139 REAL_EXP must be altered. */
12140 exponent = 5 - exponent;
12141
12142 if (exponent < 0 || exponent > 7)
12143 return -1;
12144
12145 /* Sign, mantissa and exponent are now in the correct form to plug into the
12146 formula described in the comment above. */
12147 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12148 }
12149
12150 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12151 int
12152 vfp3_const_double_rtx (rtx x)
12153 {
12154 if (!TARGET_VFP3)
12155 return 0;
12156
12157 return vfp3_const_double_index (x) != -1;
12158 }
12159
12160 /* Recognize immediates which can be used in various Neon instructions. Legal
12161 immediates are described by the following table (for VMVN variants, the
12162 bitwise inverse of the constant shown is recognized. In either case, VMOV
12163 is output and the correct instruction to use for a given constant is chosen
12164 by the assembler). The constant shown is replicated across all elements of
12165 the destination vector.
12166
12167 insn elems variant constant (binary)
12168 ---- ----- ------- -----------------
12169 vmov i32 0 00000000 00000000 00000000 abcdefgh
12170 vmov i32 1 00000000 00000000 abcdefgh 00000000
12171 vmov i32 2 00000000 abcdefgh 00000000 00000000
12172 vmov i32 3 abcdefgh 00000000 00000000 00000000
12173 vmov i16 4 00000000 abcdefgh
12174 vmov i16 5 abcdefgh 00000000
12175 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12176 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12177 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12178 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12179 vmvn i16 10 00000000 abcdefgh
12180 vmvn i16 11 abcdefgh 00000000
12181 vmov i32 12 00000000 00000000 abcdefgh 11111111
12182 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12183 vmov i32 14 00000000 abcdefgh 11111111 11111111
12184 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12185 vmov i8 16 abcdefgh
12186 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12187 eeeeeeee ffffffff gggggggg hhhhhhhh
12188 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12189 vmov f32 19 00000000 00000000 00000000 00000000
12190
12191 For case 18, B = !b. Representable values are exactly those accepted by
12192 vfp3_const_double_index, but are output as floating-point numbers rather
12193 than indices.
12194
12195 For case 19, we will change it to vmov.i32 when assembling.
12196
12197 Variants 0-5 (inclusive) may also be used as immediates for the second
12198 operand of VORR/VBIC instructions.
12199
12200 The INVERSE argument causes the bitwise inverse of the given operand to be
12201 recognized instead (used for recognizing legal immediates for the VAND/VORN
12202 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12203 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12204 output, rather than the real insns vbic/vorr).
12205
12206 INVERSE makes no difference to the recognition of float vectors.
12207
12208 The return value is the variant of immediate as shown in the above table, or
12209 -1 if the given value doesn't match any of the listed patterns.
12210 */
12211 static int
12212 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12213 rtx *modconst, int *elementwidth)
12214 {
12215 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12216 matches = 1; \
12217 for (i = 0; i < idx; i += (STRIDE)) \
12218 if (!(TEST)) \
12219 matches = 0; \
12220 if (matches) \
12221 { \
12222 immtype = (CLASS); \
12223 elsize = (ELSIZE); \
12224 break; \
12225 }
12226
12227 unsigned int i, elsize = 0, idx = 0, n_elts;
12228 unsigned int innersize;
12229 unsigned char bytes[16];
12230 int immtype = -1, matches;
12231 unsigned int invmask = inverse ? 0xff : 0;
12232 bool vector = GET_CODE (op) == CONST_VECTOR;
12233
12234 if (vector)
12235 {
12236 n_elts = CONST_VECTOR_NUNITS (op);
12237 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12238 }
12239 else
12240 {
12241 n_elts = 1;
12242 if (mode == VOIDmode)
12243 mode = DImode;
12244 innersize = GET_MODE_SIZE (mode);
12245 }
12246
12247 /* Vectors of float constants. */
12248 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12249 {
12250 rtx el0 = CONST_VECTOR_ELT (op, 0);
12251 REAL_VALUE_TYPE r0;
12252
12253 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12254 return -1;
12255
12256 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12257
12258 for (i = 1; i < n_elts; i++)
12259 {
12260 rtx elt = CONST_VECTOR_ELT (op, i);
12261 REAL_VALUE_TYPE re;
12262
12263 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12264
12265 if (!REAL_VALUES_EQUAL (r0, re))
12266 return -1;
12267 }
12268
12269 if (modconst)
12270 *modconst = CONST_VECTOR_ELT (op, 0);
12271
12272 if (elementwidth)
12273 *elementwidth = 0;
12274
12275 if (el0 == CONST0_RTX (GET_MODE (el0)))
12276 return 19;
12277 else
12278 return 18;
12279 }
12280
12281 /* Splat vector constant out into a byte vector. */
12282 for (i = 0; i < n_elts; i++)
12283 {
12284 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12285 unsigned HOST_WIDE_INT elpart;
12286 unsigned int part, parts;
12287
12288 if (CONST_INT_P (el))
12289 {
12290 elpart = INTVAL (el);
12291 parts = 1;
12292 }
12293 else if (CONST_DOUBLE_P (el))
12294 {
12295 elpart = CONST_DOUBLE_LOW (el);
12296 parts = 2;
12297 }
12298 else
12299 gcc_unreachable ();
12300
12301 for (part = 0; part < parts; part++)
12302 {
12303 unsigned int byte;
12304 for (byte = 0; byte < innersize; byte++)
12305 {
12306 bytes[idx++] = (elpart & 0xff) ^ invmask;
12307 elpart >>= BITS_PER_UNIT;
12308 }
12309 if (CONST_DOUBLE_P (el))
12310 elpart = CONST_DOUBLE_HIGH (el);
12311 }
12312 }
12313
12314 /* Sanity check. */
12315 gcc_assert (idx == GET_MODE_SIZE (mode));
12316
12317 do
12318 {
12319 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12320 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12321
12322 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12323 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12324
12325 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12326 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12327
12328 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12329 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12330
12331 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12332
12333 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12334
12335 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12336 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12337
12338 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12339 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12340
12341 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12342 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12343
12344 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12345 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12346
12347 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12348
12349 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12350
12351 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12352 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12353
12354 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12355 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12356
12357 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12358 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12359
12360 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12361 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12362
12363 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12364
12365 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12366 && bytes[i] == bytes[(i + 8) % idx]);
12367 }
12368 while (0);
12369
12370 if (immtype == -1)
12371 return -1;
12372
12373 if (elementwidth)
12374 *elementwidth = elsize;
12375
12376 if (modconst)
12377 {
12378 unsigned HOST_WIDE_INT imm = 0;
12379
12380 /* Un-invert bytes of recognized vector, if necessary. */
12381 if (invmask != 0)
12382 for (i = 0; i < idx; i++)
12383 bytes[i] ^= invmask;
12384
12385 if (immtype == 17)
12386 {
12387 /* FIXME: Broken on 32-bit H_W_I hosts. */
12388 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12389
12390 for (i = 0; i < 8; i++)
12391 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12392 << (i * BITS_PER_UNIT);
12393
12394 *modconst = GEN_INT (imm);
12395 }
12396 else
12397 {
12398 unsigned HOST_WIDE_INT imm = 0;
12399
12400 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12401 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12402
12403 *modconst = GEN_INT (imm);
12404 }
12405 }
12406
12407 return immtype;
12408 #undef CHECK
12409 }
12410
12411 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12412 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12413 float elements), and a modified constant (whatever should be output for a
12414 VMOV) in *MODCONST. */
12415
12416 int
12417 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12418 rtx *modconst, int *elementwidth)
12419 {
12420 rtx tmpconst;
12421 int tmpwidth;
12422 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12423
12424 if (retval == -1)
12425 return 0;
12426
12427 if (modconst)
12428 *modconst = tmpconst;
12429
12430 if (elementwidth)
12431 *elementwidth = tmpwidth;
12432
12433 return 1;
12434 }
12435
12436 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12437 the immediate is valid, write a constant suitable for using as an operand
12438 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12439 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12440
12441 int
12442 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12443 rtx *modconst, int *elementwidth)
12444 {
12445 rtx tmpconst;
12446 int tmpwidth;
12447 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12448
12449 if (retval < 0 || retval > 5)
12450 return 0;
12451
12452 if (modconst)
12453 *modconst = tmpconst;
12454
12455 if (elementwidth)
12456 *elementwidth = tmpwidth;
12457
12458 return 1;
12459 }
12460
12461 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12462 the immediate is valid, write a constant suitable for using as an operand
12463 to VSHR/VSHL to *MODCONST and the corresponding element width to
12464 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12465 because they have different limitations. */
12466
12467 int
12468 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12469 rtx *modconst, int *elementwidth,
12470 bool isleftshift)
12471 {
12472 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12473 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12474 unsigned HOST_WIDE_INT last_elt = 0;
12475 unsigned HOST_WIDE_INT maxshift;
12476
12477 /* Split vector constant out into a byte vector. */
12478 for (i = 0; i < n_elts; i++)
12479 {
12480 rtx el = CONST_VECTOR_ELT (op, i);
12481 unsigned HOST_WIDE_INT elpart;
12482
12483 if (CONST_INT_P (el))
12484 elpart = INTVAL (el);
12485 else if (CONST_DOUBLE_P (el))
12486 return 0;
12487 else
12488 gcc_unreachable ();
12489
12490 if (i != 0 && elpart != last_elt)
12491 return 0;
12492
12493 last_elt = elpart;
12494 }
12495
12496 /* Shift less than element size. */
12497 maxshift = innersize * 8;
12498
12499 if (isleftshift)
12500 {
12501 /* Left shift immediate value can be from 0 to <size>-1. */
12502 if (last_elt >= maxshift)
12503 return 0;
12504 }
12505 else
12506 {
12507 /* Right shift immediate value can be from 1 to <size>. */
12508 if (last_elt == 0 || last_elt > maxshift)
12509 return 0;
12510 }
12511
12512 if (elementwidth)
12513 *elementwidth = innersize * 8;
12514
12515 if (modconst)
12516 *modconst = CONST_VECTOR_ELT (op, 0);
12517
12518 return 1;
12519 }
12520
12521 /* Return a string suitable for output of Neon immediate logic operation
12522 MNEM. */
12523
12524 char *
12525 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12526 int inverse, int quad)
12527 {
12528 int width, is_valid;
12529 static char templ[40];
12530
12531 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12532
12533 gcc_assert (is_valid != 0);
12534
12535 if (quad)
12536 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12537 else
12538 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12539
12540 return templ;
12541 }
12542
12543 /* Return a string suitable for output of Neon immediate shift operation
12544 (VSHR or VSHL) MNEM. */
12545
12546 char *
12547 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12548 machine_mode mode, int quad,
12549 bool isleftshift)
12550 {
12551 int width, is_valid;
12552 static char templ[40];
12553
12554 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12555 gcc_assert (is_valid != 0);
12556
12557 if (quad)
12558 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12559 else
12560 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12561
12562 return templ;
12563 }
12564
12565 /* Output a sequence of pairwise operations to implement a reduction.
12566 NOTE: We do "too much work" here, because pairwise operations work on two
12567 registers-worth of operands in one go. Unfortunately we can't exploit those
12568 extra calculations to do the full operation in fewer steps, I don't think.
12569 Although all vector elements of the result but the first are ignored, we
12570 actually calculate the same result in each of the elements. An alternative
12571 such as initially loading a vector with zero to use as each of the second
12572 operands would use up an additional register and take an extra instruction,
12573 for no particular gain. */
12574
12575 void
12576 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12577 rtx (*reduc) (rtx, rtx, rtx))
12578 {
12579 machine_mode inner = GET_MODE_INNER (mode);
12580 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12581 rtx tmpsum = op1;
12582
12583 for (i = parts / 2; i >= 1; i /= 2)
12584 {
12585 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12586 emit_insn (reduc (dest, tmpsum, tmpsum));
12587 tmpsum = dest;
12588 }
12589 }
12590
12591 /* If VALS is a vector constant that can be loaded into a register
12592 using VDUP, generate instructions to do so and return an RTX to
12593 assign to the register. Otherwise return NULL_RTX. */
12594
12595 static rtx
12596 neon_vdup_constant (rtx vals)
12597 {
12598 machine_mode mode = GET_MODE (vals);
12599 machine_mode inner_mode = GET_MODE_INNER (mode);
12600 int n_elts = GET_MODE_NUNITS (mode);
12601 bool all_same = true;
12602 rtx x;
12603 int i;
12604
12605 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12606 return NULL_RTX;
12607
12608 for (i = 0; i < n_elts; ++i)
12609 {
12610 x = XVECEXP (vals, 0, i);
12611 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12612 all_same = false;
12613 }
12614
12615 if (!all_same)
12616 /* The elements are not all the same. We could handle repeating
12617 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12618 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12619 vdup.i16). */
12620 return NULL_RTX;
12621
12622 /* We can load this constant by using VDUP and a constant in a
12623 single ARM register. This will be cheaper than a vector
12624 load. */
12625
12626 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12627 return gen_rtx_VEC_DUPLICATE (mode, x);
12628 }
12629
12630 /* Generate code to load VALS, which is a PARALLEL containing only
12631 constants (for vec_init) or CONST_VECTOR, efficiently into a
12632 register. Returns an RTX to copy into the register, or NULL_RTX
12633 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12634
12635 rtx
12636 neon_make_constant (rtx vals)
12637 {
12638 machine_mode mode = GET_MODE (vals);
12639 rtx target;
12640 rtx const_vec = NULL_RTX;
12641 int n_elts = GET_MODE_NUNITS (mode);
12642 int n_const = 0;
12643 int i;
12644
12645 if (GET_CODE (vals) == CONST_VECTOR)
12646 const_vec = vals;
12647 else if (GET_CODE (vals) == PARALLEL)
12648 {
12649 /* A CONST_VECTOR must contain only CONST_INTs and
12650 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12651 Only store valid constants in a CONST_VECTOR. */
12652 for (i = 0; i < n_elts; ++i)
12653 {
12654 rtx x = XVECEXP (vals, 0, i);
12655 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12656 n_const++;
12657 }
12658 if (n_const == n_elts)
12659 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12660 }
12661 else
12662 gcc_unreachable ();
12663
12664 if (const_vec != NULL
12665 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12666 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12667 return const_vec;
12668 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12669 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12670 pipeline cycle; creating the constant takes one or two ARM
12671 pipeline cycles. */
12672 return target;
12673 else if (const_vec != NULL_RTX)
12674 /* Load from constant pool. On Cortex-A8 this takes two cycles
12675 (for either double or quad vectors). We can not take advantage
12676 of single-cycle VLD1 because we need a PC-relative addressing
12677 mode. */
12678 return const_vec;
12679 else
12680 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12681 We can not construct an initializer. */
12682 return NULL_RTX;
12683 }
12684
12685 /* Initialize vector TARGET to VALS. */
12686
12687 void
12688 neon_expand_vector_init (rtx target, rtx vals)
12689 {
12690 machine_mode mode = GET_MODE (target);
12691 machine_mode inner_mode = GET_MODE_INNER (mode);
12692 int n_elts = GET_MODE_NUNITS (mode);
12693 int n_var = 0, one_var = -1;
12694 bool all_same = true;
12695 rtx x, mem;
12696 int i;
12697
12698 for (i = 0; i < n_elts; ++i)
12699 {
12700 x = XVECEXP (vals, 0, i);
12701 if (!CONSTANT_P (x))
12702 ++n_var, one_var = i;
12703
12704 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12705 all_same = false;
12706 }
12707
12708 if (n_var == 0)
12709 {
12710 rtx constant = neon_make_constant (vals);
12711 if (constant != NULL_RTX)
12712 {
12713 emit_move_insn (target, constant);
12714 return;
12715 }
12716 }
12717
12718 /* Splat a single non-constant element if we can. */
12719 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12720 {
12721 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12722 emit_insn (gen_rtx_SET (VOIDmode, target,
12723 gen_rtx_VEC_DUPLICATE (mode, x)));
12724 return;
12725 }
12726
12727 /* One field is non-constant. Load constant then overwrite varying
12728 field. This is more efficient than using the stack. */
12729 if (n_var == 1)
12730 {
12731 rtx copy = copy_rtx (vals);
12732 rtx index = GEN_INT (one_var);
12733
12734 /* Load constant part of vector, substitute neighboring value for
12735 varying element. */
12736 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12737 neon_expand_vector_init (target, copy);
12738
12739 /* Insert variable. */
12740 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12741 switch (mode)
12742 {
12743 case V8QImode:
12744 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12745 break;
12746 case V16QImode:
12747 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12748 break;
12749 case V4HImode:
12750 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12751 break;
12752 case V8HImode:
12753 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12754 break;
12755 case V2SImode:
12756 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12757 break;
12758 case V4SImode:
12759 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12760 break;
12761 case V2SFmode:
12762 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12763 break;
12764 case V4SFmode:
12765 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12766 break;
12767 case V2DImode:
12768 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12769 break;
12770 default:
12771 gcc_unreachable ();
12772 }
12773 return;
12774 }
12775
12776 /* Construct the vector in memory one field at a time
12777 and load the whole vector. */
12778 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12779 for (i = 0; i < n_elts; i++)
12780 emit_move_insn (adjust_address_nv (mem, inner_mode,
12781 i * GET_MODE_SIZE (inner_mode)),
12782 XVECEXP (vals, 0, i));
12783 emit_move_insn (target, mem);
12784 }
12785
12786 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12787 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12788 reported source locations are bogus. */
12789
12790 static void
12791 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12792 const char *err)
12793 {
12794 HOST_WIDE_INT lane;
12795
12796 gcc_assert (CONST_INT_P (operand));
12797
12798 lane = INTVAL (operand);
12799
12800 if (lane < low || lane >= high)
12801 error (err);
12802 }
12803
12804 /* Bounds-check lanes. */
12805
12806 void
12807 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12808 {
12809 bounds_check (operand, low, high, "lane out of range");
12810 }
12811
12812 /* Bounds-check constants. */
12813
12814 void
12815 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12816 {
12817 bounds_check (operand, low, high, "constant out of range");
12818 }
12819
12820 HOST_WIDE_INT
12821 neon_element_bits (machine_mode mode)
12822 {
12823 if (mode == DImode)
12824 return GET_MODE_BITSIZE (mode);
12825 else
12826 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12827 }
12828
12829 \f
12830 /* Predicates for `match_operand' and `match_operator'. */
12831
12832 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12833 WB is true if full writeback address modes are allowed and is false
12834 if limited writeback address modes (POST_INC and PRE_DEC) are
12835 allowed. */
12836
12837 int
12838 arm_coproc_mem_operand (rtx op, bool wb)
12839 {
12840 rtx ind;
12841
12842 /* Reject eliminable registers. */
12843 if (! (reload_in_progress || reload_completed || lra_in_progress)
12844 && ( reg_mentioned_p (frame_pointer_rtx, op)
12845 || reg_mentioned_p (arg_pointer_rtx, op)
12846 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12847 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12848 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12849 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12850 return FALSE;
12851
12852 /* Constants are converted into offsets from labels. */
12853 if (!MEM_P (op))
12854 return FALSE;
12855
12856 ind = XEXP (op, 0);
12857
12858 if (reload_completed
12859 && (GET_CODE (ind) == LABEL_REF
12860 || (GET_CODE (ind) == CONST
12861 && GET_CODE (XEXP (ind, 0)) == PLUS
12862 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12863 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12864 return TRUE;
12865
12866 /* Match: (mem (reg)). */
12867 if (REG_P (ind))
12868 return arm_address_register_rtx_p (ind, 0);
12869
12870 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12871 acceptable in any case (subject to verification by
12872 arm_address_register_rtx_p). We need WB to be true to accept
12873 PRE_INC and POST_DEC. */
12874 if (GET_CODE (ind) == POST_INC
12875 || GET_CODE (ind) == PRE_DEC
12876 || (wb
12877 && (GET_CODE (ind) == PRE_INC
12878 || GET_CODE (ind) == POST_DEC)))
12879 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12880
12881 if (wb
12882 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12883 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12884 && GET_CODE (XEXP (ind, 1)) == PLUS
12885 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12886 ind = XEXP (ind, 1);
12887
12888 /* Match:
12889 (plus (reg)
12890 (const)). */
12891 if (GET_CODE (ind) == PLUS
12892 && REG_P (XEXP (ind, 0))
12893 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12894 && CONST_INT_P (XEXP (ind, 1))
12895 && INTVAL (XEXP (ind, 1)) > -1024
12896 && INTVAL (XEXP (ind, 1)) < 1024
12897 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12898 return TRUE;
12899
12900 return FALSE;
12901 }
12902
12903 /* Return TRUE if OP is a memory operand which we can load or store a vector
12904 to/from. TYPE is one of the following values:
12905 0 - Vector load/stor (vldr)
12906 1 - Core registers (ldm)
12907 2 - Element/structure loads (vld1)
12908 */
12909 int
12910 neon_vector_mem_operand (rtx op, int type, bool strict)
12911 {
12912 rtx ind;
12913
12914 /* Reject eliminable registers. */
12915 if (! (reload_in_progress || reload_completed)
12916 && ( reg_mentioned_p (frame_pointer_rtx, op)
12917 || reg_mentioned_p (arg_pointer_rtx, op)
12918 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12919 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12920 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12921 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12922 return !strict;
12923
12924 /* Constants are converted into offsets from labels. */
12925 if (!MEM_P (op))
12926 return FALSE;
12927
12928 ind = XEXP (op, 0);
12929
12930 if (reload_completed
12931 && (GET_CODE (ind) == LABEL_REF
12932 || (GET_CODE (ind) == CONST
12933 && GET_CODE (XEXP (ind, 0)) == PLUS
12934 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12935 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12936 return TRUE;
12937
12938 /* Match: (mem (reg)). */
12939 if (REG_P (ind))
12940 return arm_address_register_rtx_p (ind, 0);
12941
12942 /* Allow post-increment with Neon registers. */
12943 if ((type != 1 && GET_CODE (ind) == POST_INC)
12944 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12945 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12946
12947 /* Allow post-increment by register for VLDn */
12948 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12949 && GET_CODE (XEXP (ind, 1)) == PLUS
12950 && REG_P (XEXP (XEXP (ind, 1), 1)))
12951 return true;
12952
12953 /* Match:
12954 (plus (reg)
12955 (const)). */
12956 if (type == 0
12957 && GET_CODE (ind) == PLUS
12958 && REG_P (XEXP (ind, 0))
12959 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12960 && CONST_INT_P (XEXP (ind, 1))
12961 && INTVAL (XEXP (ind, 1)) > -1024
12962 /* For quad modes, we restrict the constant offset to be slightly less
12963 than what the instruction format permits. We have no such constraint
12964 on double mode offsets. (This must match arm_legitimate_index_p.) */
12965 && (INTVAL (XEXP (ind, 1))
12966 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12967 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12968 return TRUE;
12969
12970 return FALSE;
12971 }
12972
12973 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12974 type. */
12975 int
12976 neon_struct_mem_operand (rtx op)
12977 {
12978 rtx ind;
12979
12980 /* Reject eliminable registers. */
12981 if (! (reload_in_progress || reload_completed)
12982 && ( reg_mentioned_p (frame_pointer_rtx, op)
12983 || reg_mentioned_p (arg_pointer_rtx, op)
12984 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12985 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12986 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12987 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12988 return FALSE;
12989
12990 /* Constants are converted into offsets from labels. */
12991 if (!MEM_P (op))
12992 return FALSE;
12993
12994 ind = XEXP (op, 0);
12995
12996 if (reload_completed
12997 && (GET_CODE (ind) == LABEL_REF
12998 || (GET_CODE (ind) == CONST
12999 && GET_CODE (XEXP (ind, 0)) == PLUS
13000 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13001 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13002 return TRUE;
13003
13004 /* Match: (mem (reg)). */
13005 if (REG_P (ind))
13006 return arm_address_register_rtx_p (ind, 0);
13007
13008 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13009 if (GET_CODE (ind) == POST_INC
13010 || GET_CODE (ind) == PRE_DEC)
13011 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13012
13013 return FALSE;
13014 }
13015
13016 /* Return true if X is a register that will be eliminated later on. */
13017 int
13018 arm_eliminable_register (rtx x)
13019 {
13020 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13021 || REGNO (x) == ARG_POINTER_REGNUM
13022 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13023 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13024 }
13025
13026 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13027 coprocessor registers. Otherwise return NO_REGS. */
13028
13029 enum reg_class
13030 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13031 {
13032 if (mode == HFmode)
13033 {
13034 if (!TARGET_NEON_FP16)
13035 return GENERAL_REGS;
13036 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13037 return NO_REGS;
13038 return GENERAL_REGS;
13039 }
13040
13041 /* The neon move patterns handle all legitimate vector and struct
13042 addresses. */
13043 if (TARGET_NEON
13044 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13045 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13046 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13047 || VALID_NEON_STRUCT_MODE (mode)))
13048 return NO_REGS;
13049
13050 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13051 return NO_REGS;
13052
13053 return GENERAL_REGS;
13054 }
13055
13056 /* Values which must be returned in the most-significant end of the return
13057 register. */
13058
13059 static bool
13060 arm_return_in_msb (const_tree valtype)
13061 {
13062 return (TARGET_AAPCS_BASED
13063 && BYTES_BIG_ENDIAN
13064 && (AGGREGATE_TYPE_P (valtype)
13065 || TREE_CODE (valtype) == COMPLEX_TYPE
13066 || FIXED_POINT_TYPE_P (valtype)));
13067 }
13068
13069 /* Return TRUE if X references a SYMBOL_REF. */
13070 int
13071 symbol_mentioned_p (rtx x)
13072 {
13073 const char * fmt;
13074 int i;
13075
13076 if (GET_CODE (x) == SYMBOL_REF)
13077 return 1;
13078
13079 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13080 are constant offsets, not symbols. */
13081 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13082 return 0;
13083
13084 fmt = GET_RTX_FORMAT (GET_CODE (x));
13085
13086 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13087 {
13088 if (fmt[i] == 'E')
13089 {
13090 int j;
13091
13092 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13093 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13094 return 1;
13095 }
13096 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13097 return 1;
13098 }
13099
13100 return 0;
13101 }
13102
13103 /* Return TRUE if X references a LABEL_REF. */
13104 int
13105 label_mentioned_p (rtx x)
13106 {
13107 const char * fmt;
13108 int i;
13109
13110 if (GET_CODE (x) == LABEL_REF)
13111 return 1;
13112
13113 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13114 instruction, but they are constant offsets, not symbols. */
13115 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13116 return 0;
13117
13118 fmt = GET_RTX_FORMAT (GET_CODE (x));
13119 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13120 {
13121 if (fmt[i] == 'E')
13122 {
13123 int j;
13124
13125 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13126 if (label_mentioned_p (XVECEXP (x, i, j)))
13127 return 1;
13128 }
13129 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13130 return 1;
13131 }
13132
13133 return 0;
13134 }
13135
13136 int
13137 tls_mentioned_p (rtx x)
13138 {
13139 switch (GET_CODE (x))
13140 {
13141 case CONST:
13142 return tls_mentioned_p (XEXP (x, 0));
13143
13144 case UNSPEC:
13145 if (XINT (x, 1) == UNSPEC_TLS)
13146 return 1;
13147
13148 default:
13149 return 0;
13150 }
13151 }
13152
13153 /* Must not copy any rtx that uses a pc-relative address. */
13154
13155 static bool
13156 arm_cannot_copy_insn_p (rtx_insn *insn)
13157 {
13158 /* The tls call insn cannot be copied, as it is paired with a data
13159 word. */
13160 if (recog_memoized (insn) == CODE_FOR_tlscall)
13161 return true;
13162
13163 subrtx_iterator::array_type array;
13164 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13165 {
13166 const_rtx x = *iter;
13167 if (GET_CODE (x) == UNSPEC
13168 && (XINT (x, 1) == UNSPEC_PIC_BASE
13169 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13170 return true;
13171 }
13172 return false;
13173 }
13174
13175 enum rtx_code
13176 minmax_code (rtx x)
13177 {
13178 enum rtx_code code = GET_CODE (x);
13179
13180 switch (code)
13181 {
13182 case SMAX:
13183 return GE;
13184 case SMIN:
13185 return LE;
13186 case UMIN:
13187 return LEU;
13188 case UMAX:
13189 return GEU;
13190 default:
13191 gcc_unreachable ();
13192 }
13193 }
13194
13195 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13196
13197 bool
13198 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13199 int *mask, bool *signed_sat)
13200 {
13201 /* The high bound must be a power of two minus one. */
13202 int log = exact_log2 (INTVAL (hi_bound) + 1);
13203 if (log == -1)
13204 return false;
13205
13206 /* The low bound is either zero (for usat) or one less than the
13207 negation of the high bound (for ssat). */
13208 if (INTVAL (lo_bound) == 0)
13209 {
13210 if (mask)
13211 *mask = log;
13212 if (signed_sat)
13213 *signed_sat = false;
13214
13215 return true;
13216 }
13217
13218 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13219 {
13220 if (mask)
13221 *mask = log + 1;
13222 if (signed_sat)
13223 *signed_sat = true;
13224
13225 return true;
13226 }
13227
13228 return false;
13229 }
13230
13231 /* Return 1 if memory locations are adjacent. */
13232 int
13233 adjacent_mem_locations (rtx a, rtx b)
13234 {
13235 /* We don't guarantee to preserve the order of these memory refs. */
13236 if (volatile_refs_p (a) || volatile_refs_p (b))
13237 return 0;
13238
13239 if ((REG_P (XEXP (a, 0))
13240 || (GET_CODE (XEXP (a, 0)) == PLUS
13241 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13242 && (REG_P (XEXP (b, 0))
13243 || (GET_CODE (XEXP (b, 0)) == PLUS
13244 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13245 {
13246 HOST_WIDE_INT val0 = 0, val1 = 0;
13247 rtx reg0, reg1;
13248 int val_diff;
13249
13250 if (GET_CODE (XEXP (a, 0)) == PLUS)
13251 {
13252 reg0 = XEXP (XEXP (a, 0), 0);
13253 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13254 }
13255 else
13256 reg0 = XEXP (a, 0);
13257
13258 if (GET_CODE (XEXP (b, 0)) == PLUS)
13259 {
13260 reg1 = XEXP (XEXP (b, 0), 0);
13261 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13262 }
13263 else
13264 reg1 = XEXP (b, 0);
13265
13266 /* Don't accept any offset that will require multiple
13267 instructions to handle, since this would cause the
13268 arith_adjacentmem pattern to output an overlong sequence. */
13269 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13270 return 0;
13271
13272 /* Don't allow an eliminable register: register elimination can make
13273 the offset too large. */
13274 if (arm_eliminable_register (reg0))
13275 return 0;
13276
13277 val_diff = val1 - val0;
13278
13279 if (arm_ld_sched)
13280 {
13281 /* If the target has load delay slots, then there's no benefit
13282 to using an ldm instruction unless the offset is zero and
13283 we are optimizing for size. */
13284 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13285 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13286 && (val_diff == 4 || val_diff == -4));
13287 }
13288
13289 return ((REGNO (reg0) == REGNO (reg1))
13290 && (val_diff == 4 || val_diff == -4));
13291 }
13292
13293 return 0;
13294 }
13295
13296 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13297 for load operations, false for store operations. CONSECUTIVE is true
13298 if the register numbers in the operation must be consecutive in the register
13299 bank. RETURN_PC is true if value is to be loaded in PC.
13300 The pattern we are trying to match for load is:
13301 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13302 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13303 :
13304 :
13305 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13306 ]
13307 where
13308 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13309 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13310 3. If consecutive is TRUE, then for kth register being loaded,
13311 REGNO (R_dk) = REGNO (R_d0) + k.
13312 The pattern for store is similar. */
13313 bool
13314 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13315 bool consecutive, bool return_pc)
13316 {
13317 HOST_WIDE_INT count = XVECLEN (op, 0);
13318 rtx reg, mem, addr;
13319 unsigned regno;
13320 unsigned first_regno;
13321 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13322 rtx elt;
13323 bool addr_reg_in_reglist = false;
13324 bool update = false;
13325 int reg_increment;
13326 int offset_adj;
13327 int regs_per_val;
13328
13329 /* If not in SImode, then registers must be consecutive
13330 (e.g., VLDM instructions for DFmode). */
13331 gcc_assert ((mode == SImode) || consecutive);
13332 /* Setting return_pc for stores is illegal. */
13333 gcc_assert (!return_pc || load);
13334
13335 /* Set up the increments and the regs per val based on the mode. */
13336 reg_increment = GET_MODE_SIZE (mode);
13337 regs_per_val = reg_increment / 4;
13338 offset_adj = return_pc ? 1 : 0;
13339
13340 if (count <= 1
13341 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13342 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13343 return false;
13344
13345 /* Check if this is a write-back. */
13346 elt = XVECEXP (op, 0, offset_adj);
13347 if (GET_CODE (SET_SRC (elt)) == PLUS)
13348 {
13349 i++;
13350 base = 1;
13351 update = true;
13352
13353 /* The offset adjustment must be the number of registers being
13354 popped times the size of a single register. */
13355 if (!REG_P (SET_DEST (elt))
13356 || !REG_P (XEXP (SET_SRC (elt), 0))
13357 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13358 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13359 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13360 ((count - 1 - offset_adj) * reg_increment))
13361 return false;
13362 }
13363
13364 i = i + offset_adj;
13365 base = base + offset_adj;
13366 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13367 success depends on the type: VLDM can do just one reg,
13368 LDM must do at least two. */
13369 if ((count <= i) && (mode == SImode))
13370 return false;
13371
13372 elt = XVECEXP (op, 0, i - 1);
13373 if (GET_CODE (elt) != SET)
13374 return false;
13375
13376 if (load)
13377 {
13378 reg = SET_DEST (elt);
13379 mem = SET_SRC (elt);
13380 }
13381 else
13382 {
13383 reg = SET_SRC (elt);
13384 mem = SET_DEST (elt);
13385 }
13386
13387 if (!REG_P (reg) || !MEM_P (mem))
13388 return false;
13389
13390 regno = REGNO (reg);
13391 first_regno = regno;
13392 addr = XEXP (mem, 0);
13393 if (GET_CODE (addr) == PLUS)
13394 {
13395 if (!CONST_INT_P (XEXP (addr, 1)))
13396 return false;
13397
13398 offset = INTVAL (XEXP (addr, 1));
13399 addr = XEXP (addr, 0);
13400 }
13401
13402 if (!REG_P (addr))
13403 return false;
13404
13405 /* Don't allow SP to be loaded unless it is also the base register. It
13406 guarantees that SP is reset correctly when an LDM instruction
13407 is interrupted. Otherwise, we might end up with a corrupt stack. */
13408 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13409 return false;
13410
13411 for (; i < count; i++)
13412 {
13413 elt = XVECEXP (op, 0, i);
13414 if (GET_CODE (elt) != SET)
13415 return false;
13416
13417 if (load)
13418 {
13419 reg = SET_DEST (elt);
13420 mem = SET_SRC (elt);
13421 }
13422 else
13423 {
13424 reg = SET_SRC (elt);
13425 mem = SET_DEST (elt);
13426 }
13427
13428 if (!REG_P (reg)
13429 || GET_MODE (reg) != mode
13430 || REGNO (reg) <= regno
13431 || (consecutive
13432 && (REGNO (reg) !=
13433 (unsigned int) (first_regno + regs_per_val * (i - base))))
13434 /* Don't allow SP to be loaded unless it is also the base register. It
13435 guarantees that SP is reset correctly when an LDM instruction
13436 is interrupted. Otherwise, we might end up with a corrupt stack. */
13437 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13438 || !MEM_P (mem)
13439 || GET_MODE (mem) != mode
13440 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13441 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13442 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13443 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13444 offset + (i - base) * reg_increment))
13445 && (!REG_P (XEXP (mem, 0))
13446 || offset + (i - base) * reg_increment != 0)))
13447 return false;
13448
13449 regno = REGNO (reg);
13450 if (regno == REGNO (addr))
13451 addr_reg_in_reglist = true;
13452 }
13453
13454 if (load)
13455 {
13456 if (update && addr_reg_in_reglist)
13457 return false;
13458
13459 /* For Thumb-1, address register is always modified - either by write-back
13460 or by explicit load. If the pattern does not describe an update,
13461 then the address register must be in the list of loaded registers. */
13462 if (TARGET_THUMB1)
13463 return update || addr_reg_in_reglist;
13464 }
13465
13466 return true;
13467 }
13468
13469 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13470 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13471 instruction. ADD_OFFSET is nonzero if the base address register needs
13472 to be modified with an add instruction before we can use it. */
13473
13474 static bool
13475 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13476 int nops, HOST_WIDE_INT add_offset)
13477 {
13478 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13479 if the offset isn't small enough. The reason 2 ldrs are faster
13480 is because these ARMs are able to do more than one cache access
13481 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13482 whilst the ARM8 has a double bandwidth cache. This means that
13483 these cores can do both an instruction fetch and a data fetch in
13484 a single cycle, so the trick of calculating the address into a
13485 scratch register (one of the result regs) and then doing a load
13486 multiple actually becomes slower (and no smaller in code size).
13487 That is the transformation
13488
13489 ldr rd1, [rbase + offset]
13490 ldr rd2, [rbase + offset + 4]
13491
13492 to
13493
13494 add rd1, rbase, offset
13495 ldmia rd1, {rd1, rd2}
13496
13497 produces worse code -- '3 cycles + any stalls on rd2' instead of
13498 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13499 access per cycle, the first sequence could never complete in less
13500 than 6 cycles, whereas the ldm sequence would only take 5 and
13501 would make better use of sequential accesses if not hitting the
13502 cache.
13503
13504 We cheat here and test 'arm_ld_sched' which we currently know to
13505 only be true for the ARM8, ARM9 and StrongARM. If this ever
13506 changes, then the test below needs to be reworked. */
13507 if (nops == 2 && arm_ld_sched && add_offset != 0)
13508 return false;
13509
13510 /* XScale has load-store double instructions, but they have stricter
13511 alignment requirements than load-store multiple, so we cannot
13512 use them.
13513
13514 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13515 the pipeline until completion.
13516
13517 NREGS CYCLES
13518 1 3
13519 2 4
13520 3 5
13521 4 6
13522
13523 An ldr instruction takes 1-3 cycles, but does not block the
13524 pipeline.
13525
13526 NREGS CYCLES
13527 1 1-3
13528 2 2-6
13529 3 3-9
13530 4 4-12
13531
13532 Best case ldr will always win. However, the more ldr instructions
13533 we issue, the less likely we are to be able to schedule them well.
13534 Using ldr instructions also increases code size.
13535
13536 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13537 for counts of 3 or 4 regs. */
13538 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13539 return false;
13540 return true;
13541 }
13542
13543 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13544 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13545 an array ORDER which describes the sequence to use when accessing the
13546 offsets that produces an ascending order. In this sequence, each
13547 offset must be larger by exactly 4 than the previous one. ORDER[0]
13548 must have been filled in with the lowest offset by the caller.
13549 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13550 we use to verify that ORDER produces an ascending order of registers.
13551 Return true if it was possible to construct such an order, false if
13552 not. */
13553
13554 static bool
13555 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13556 int *unsorted_regs)
13557 {
13558 int i;
13559 for (i = 1; i < nops; i++)
13560 {
13561 int j;
13562
13563 order[i] = order[i - 1];
13564 for (j = 0; j < nops; j++)
13565 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13566 {
13567 /* We must find exactly one offset that is higher than the
13568 previous one by 4. */
13569 if (order[i] != order[i - 1])
13570 return false;
13571 order[i] = j;
13572 }
13573 if (order[i] == order[i - 1])
13574 return false;
13575 /* The register numbers must be ascending. */
13576 if (unsorted_regs != NULL
13577 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13578 return false;
13579 }
13580 return true;
13581 }
13582
13583 /* Used to determine in a peephole whether a sequence of load
13584 instructions can be changed into a load-multiple instruction.
13585 NOPS is the number of separate load instructions we are examining. The
13586 first NOPS entries in OPERANDS are the destination registers, the
13587 next NOPS entries are memory operands. If this function is
13588 successful, *BASE is set to the common base register of the memory
13589 accesses; *LOAD_OFFSET is set to the first memory location's offset
13590 from that base register.
13591 REGS is an array filled in with the destination register numbers.
13592 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13593 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13594 the sequence of registers in REGS matches the loads from ascending memory
13595 locations, and the function verifies that the register numbers are
13596 themselves ascending. If CHECK_REGS is false, the register numbers
13597 are stored in the order they are found in the operands. */
13598 static int
13599 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13600 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13601 {
13602 int unsorted_regs[MAX_LDM_STM_OPS];
13603 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13604 int order[MAX_LDM_STM_OPS];
13605 rtx base_reg_rtx = NULL;
13606 int base_reg = -1;
13607 int i, ldm_case;
13608
13609 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13610 easily extended if required. */
13611 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13612
13613 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13614
13615 /* Loop over the operands and check that the memory references are
13616 suitable (i.e. immediate offsets from the same base register). At
13617 the same time, extract the target register, and the memory
13618 offsets. */
13619 for (i = 0; i < nops; i++)
13620 {
13621 rtx reg;
13622 rtx offset;
13623
13624 /* Convert a subreg of a mem into the mem itself. */
13625 if (GET_CODE (operands[nops + i]) == SUBREG)
13626 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13627
13628 gcc_assert (MEM_P (operands[nops + i]));
13629
13630 /* Don't reorder volatile memory references; it doesn't seem worth
13631 looking for the case where the order is ok anyway. */
13632 if (MEM_VOLATILE_P (operands[nops + i]))
13633 return 0;
13634
13635 offset = const0_rtx;
13636
13637 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13638 || (GET_CODE (reg) == SUBREG
13639 && REG_P (reg = SUBREG_REG (reg))))
13640 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13641 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13642 || (GET_CODE (reg) == SUBREG
13643 && REG_P (reg = SUBREG_REG (reg))))
13644 && (CONST_INT_P (offset
13645 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13646 {
13647 if (i == 0)
13648 {
13649 base_reg = REGNO (reg);
13650 base_reg_rtx = reg;
13651 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13652 return 0;
13653 }
13654 else if (base_reg != (int) REGNO (reg))
13655 /* Not addressed from the same base register. */
13656 return 0;
13657
13658 unsorted_regs[i] = (REG_P (operands[i])
13659 ? REGNO (operands[i])
13660 : REGNO (SUBREG_REG (operands[i])));
13661
13662 /* If it isn't an integer register, or if it overwrites the
13663 base register but isn't the last insn in the list, then
13664 we can't do this. */
13665 if (unsorted_regs[i] < 0
13666 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13667 || unsorted_regs[i] > 14
13668 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13669 return 0;
13670
13671 /* Don't allow SP to be loaded unless it is also the base
13672 register. It guarantees that SP is reset correctly when
13673 an LDM instruction is interrupted. Otherwise, we might
13674 end up with a corrupt stack. */
13675 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13676 return 0;
13677
13678 unsorted_offsets[i] = INTVAL (offset);
13679 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13680 order[0] = i;
13681 }
13682 else
13683 /* Not a suitable memory address. */
13684 return 0;
13685 }
13686
13687 /* All the useful information has now been extracted from the
13688 operands into unsorted_regs and unsorted_offsets; additionally,
13689 order[0] has been set to the lowest offset in the list. Sort
13690 the offsets into order, verifying that they are adjacent, and
13691 check that the register numbers are ascending. */
13692 if (!compute_offset_order (nops, unsorted_offsets, order,
13693 check_regs ? unsorted_regs : NULL))
13694 return 0;
13695
13696 if (saved_order)
13697 memcpy (saved_order, order, sizeof order);
13698
13699 if (base)
13700 {
13701 *base = base_reg;
13702
13703 for (i = 0; i < nops; i++)
13704 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13705
13706 *load_offset = unsorted_offsets[order[0]];
13707 }
13708
13709 if (TARGET_THUMB1
13710 && !peep2_reg_dead_p (nops, base_reg_rtx))
13711 return 0;
13712
13713 if (unsorted_offsets[order[0]] == 0)
13714 ldm_case = 1; /* ldmia */
13715 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13716 ldm_case = 2; /* ldmib */
13717 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13718 ldm_case = 3; /* ldmda */
13719 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13720 ldm_case = 4; /* ldmdb */
13721 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13722 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13723 ldm_case = 5;
13724 else
13725 return 0;
13726
13727 if (!multiple_operation_profitable_p (false, nops,
13728 ldm_case == 5
13729 ? unsorted_offsets[order[0]] : 0))
13730 return 0;
13731
13732 return ldm_case;
13733 }
13734
13735 /* Used to determine in a peephole whether a sequence of store instructions can
13736 be changed into a store-multiple instruction.
13737 NOPS is the number of separate store instructions we are examining.
13738 NOPS_TOTAL is the total number of instructions recognized by the peephole
13739 pattern.
13740 The first NOPS entries in OPERANDS are the source registers, the next
13741 NOPS entries are memory operands. If this function is successful, *BASE is
13742 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13743 to the first memory location's offset from that base register. REGS is an
13744 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13745 likewise filled with the corresponding rtx's.
13746 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13747 numbers to an ascending order of stores.
13748 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13749 from ascending memory locations, and the function verifies that the register
13750 numbers are themselves ascending. If CHECK_REGS is false, the register
13751 numbers are stored in the order they are found in the operands. */
13752 static int
13753 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13754 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13755 HOST_WIDE_INT *load_offset, bool check_regs)
13756 {
13757 int unsorted_regs[MAX_LDM_STM_OPS];
13758 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13759 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13760 int order[MAX_LDM_STM_OPS];
13761 int base_reg = -1;
13762 rtx base_reg_rtx = NULL;
13763 int i, stm_case;
13764
13765 /* Write back of base register is currently only supported for Thumb 1. */
13766 int base_writeback = TARGET_THUMB1;
13767
13768 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13769 easily extended if required. */
13770 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13771
13772 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13773
13774 /* Loop over the operands and check that the memory references are
13775 suitable (i.e. immediate offsets from the same base register). At
13776 the same time, extract the target register, and the memory
13777 offsets. */
13778 for (i = 0; i < nops; i++)
13779 {
13780 rtx reg;
13781 rtx offset;
13782
13783 /* Convert a subreg of a mem into the mem itself. */
13784 if (GET_CODE (operands[nops + i]) == SUBREG)
13785 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13786
13787 gcc_assert (MEM_P (operands[nops + i]));
13788
13789 /* Don't reorder volatile memory references; it doesn't seem worth
13790 looking for the case where the order is ok anyway. */
13791 if (MEM_VOLATILE_P (operands[nops + i]))
13792 return 0;
13793
13794 offset = const0_rtx;
13795
13796 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13797 || (GET_CODE (reg) == SUBREG
13798 && REG_P (reg = SUBREG_REG (reg))))
13799 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13800 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13801 || (GET_CODE (reg) == SUBREG
13802 && REG_P (reg = SUBREG_REG (reg))))
13803 && (CONST_INT_P (offset
13804 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13805 {
13806 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13807 ? operands[i] : SUBREG_REG (operands[i]));
13808 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13809
13810 if (i == 0)
13811 {
13812 base_reg = REGNO (reg);
13813 base_reg_rtx = reg;
13814 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13815 return 0;
13816 }
13817 else if (base_reg != (int) REGNO (reg))
13818 /* Not addressed from the same base register. */
13819 return 0;
13820
13821 /* If it isn't an integer register, then we can't do this. */
13822 if (unsorted_regs[i] < 0
13823 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13824 /* The effects are unpredictable if the base register is
13825 both updated and stored. */
13826 || (base_writeback && unsorted_regs[i] == base_reg)
13827 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13828 || unsorted_regs[i] > 14)
13829 return 0;
13830
13831 unsorted_offsets[i] = INTVAL (offset);
13832 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13833 order[0] = i;
13834 }
13835 else
13836 /* Not a suitable memory address. */
13837 return 0;
13838 }
13839
13840 /* All the useful information has now been extracted from the
13841 operands into unsorted_regs and unsorted_offsets; additionally,
13842 order[0] has been set to the lowest offset in the list. Sort
13843 the offsets into order, verifying that they are adjacent, and
13844 check that the register numbers are ascending. */
13845 if (!compute_offset_order (nops, unsorted_offsets, order,
13846 check_regs ? unsorted_regs : NULL))
13847 return 0;
13848
13849 if (saved_order)
13850 memcpy (saved_order, order, sizeof order);
13851
13852 if (base)
13853 {
13854 *base = base_reg;
13855
13856 for (i = 0; i < nops; i++)
13857 {
13858 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13859 if (reg_rtxs)
13860 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13861 }
13862
13863 *load_offset = unsorted_offsets[order[0]];
13864 }
13865
13866 if (TARGET_THUMB1
13867 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13868 return 0;
13869
13870 if (unsorted_offsets[order[0]] == 0)
13871 stm_case = 1; /* stmia */
13872 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13873 stm_case = 2; /* stmib */
13874 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13875 stm_case = 3; /* stmda */
13876 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13877 stm_case = 4; /* stmdb */
13878 else
13879 return 0;
13880
13881 if (!multiple_operation_profitable_p (false, nops, 0))
13882 return 0;
13883
13884 return stm_case;
13885 }
13886 \f
13887 /* Routines for use in generating RTL. */
13888
13889 /* Generate a load-multiple instruction. COUNT is the number of loads in
13890 the instruction; REGS and MEMS are arrays containing the operands.
13891 BASEREG is the base register to be used in addressing the memory operands.
13892 WBACK_OFFSET is nonzero if the instruction should update the base
13893 register. */
13894
13895 static rtx
13896 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13897 HOST_WIDE_INT wback_offset)
13898 {
13899 int i = 0, j;
13900 rtx result;
13901
13902 if (!multiple_operation_profitable_p (false, count, 0))
13903 {
13904 rtx seq;
13905
13906 start_sequence ();
13907
13908 for (i = 0; i < count; i++)
13909 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13910
13911 if (wback_offset != 0)
13912 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13913
13914 seq = get_insns ();
13915 end_sequence ();
13916
13917 return seq;
13918 }
13919
13920 result = gen_rtx_PARALLEL (VOIDmode,
13921 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13922 if (wback_offset != 0)
13923 {
13924 XVECEXP (result, 0, 0)
13925 = gen_rtx_SET (VOIDmode, basereg,
13926 plus_constant (Pmode, basereg, wback_offset));
13927 i = 1;
13928 count++;
13929 }
13930
13931 for (j = 0; i < count; i++, j++)
13932 XVECEXP (result, 0, i)
13933 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13934
13935 return result;
13936 }
13937
13938 /* Generate a store-multiple instruction. COUNT is the number of stores in
13939 the instruction; REGS and MEMS are arrays containing the operands.
13940 BASEREG is the base register to be used in addressing the memory operands.
13941 WBACK_OFFSET is nonzero if the instruction should update the base
13942 register. */
13943
13944 static rtx
13945 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13946 HOST_WIDE_INT wback_offset)
13947 {
13948 int i = 0, j;
13949 rtx result;
13950
13951 if (GET_CODE (basereg) == PLUS)
13952 basereg = XEXP (basereg, 0);
13953
13954 if (!multiple_operation_profitable_p (false, count, 0))
13955 {
13956 rtx seq;
13957
13958 start_sequence ();
13959
13960 for (i = 0; i < count; i++)
13961 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13962
13963 if (wback_offset != 0)
13964 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13965
13966 seq = get_insns ();
13967 end_sequence ();
13968
13969 return seq;
13970 }
13971
13972 result = gen_rtx_PARALLEL (VOIDmode,
13973 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13974 if (wback_offset != 0)
13975 {
13976 XVECEXP (result, 0, 0)
13977 = gen_rtx_SET (VOIDmode, basereg,
13978 plus_constant (Pmode, basereg, wback_offset));
13979 i = 1;
13980 count++;
13981 }
13982
13983 for (j = 0; i < count; i++, j++)
13984 XVECEXP (result, 0, i)
13985 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13986
13987 return result;
13988 }
13989
13990 /* Generate either a load-multiple or a store-multiple instruction. This
13991 function can be used in situations where we can start with a single MEM
13992 rtx and adjust its address upwards.
13993 COUNT is the number of operations in the instruction, not counting a
13994 possible update of the base register. REGS is an array containing the
13995 register operands.
13996 BASEREG is the base register to be used in addressing the memory operands,
13997 which are constructed from BASEMEM.
13998 WRITE_BACK specifies whether the generated instruction should include an
13999 update of the base register.
14000 OFFSETP is used to pass an offset to and from this function; this offset
14001 is not used when constructing the address (instead BASEMEM should have an
14002 appropriate offset in its address), it is used only for setting
14003 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14004
14005 static rtx
14006 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14007 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14008 {
14009 rtx mems[MAX_LDM_STM_OPS];
14010 HOST_WIDE_INT offset = *offsetp;
14011 int i;
14012
14013 gcc_assert (count <= MAX_LDM_STM_OPS);
14014
14015 if (GET_CODE (basereg) == PLUS)
14016 basereg = XEXP (basereg, 0);
14017
14018 for (i = 0; i < count; i++)
14019 {
14020 rtx addr = plus_constant (Pmode, basereg, i * 4);
14021 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14022 offset += 4;
14023 }
14024
14025 if (write_back)
14026 *offsetp = offset;
14027
14028 if (is_load)
14029 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14030 write_back ? 4 * count : 0);
14031 else
14032 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14033 write_back ? 4 * count : 0);
14034 }
14035
14036 rtx
14037 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14038 rtx basemem, HOST_WIDE_INT *offsetp)
14039 {
14040 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14041 offsetp);
14042 }
14043
14044 rtx
14045 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14046 rtx basemem, HOST_WIDE_INT *offsetp)
14047 {
14048 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14049 offsetp);
14050 }
14051
14052 /* Called from a peephole2 expander to turn a sequence of loads into an
14053 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14054 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14055 is true if we can reorder the registers because they are used commutatively
14056 subsequently.
14057 Returns true iff we could generate a new instruction. */
14058
14059 bool
14060 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14061 {
14062 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14063 rtx mems[MAX_LDM_STM_OPS];
14064 int i, j, base_reg;
14065 rtx base_reg_rtx;
14066 HOST_WIDE_INT offset;
14067 int write_back = FALSE;
14068 int ldm_case;
14069 rtx addr;
14070
14071 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14072 &base_reg, &offset, !sort_regs);
14073
14074 if (ldm_case == 0)
14075 return false;
14076
14077 if (sort_regs)
14078 for (i = 0; i < nops - 1; i++)
14079 for (j = i + 1; j < nops; j++)
14080 if (regs[i] > regs[j])
14081 {
14082 int t = regs[i];
14083 regs[i] = regs[j];
14084 regs[j] = t;
14085 }
14086 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14087
14088 if (TARGET_THUMB1)
14089 {
14090 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14091 gcc_assert (ldm_case == 1 || ldm_case == 5);
14092 write_back = TRUE;
14093 }
14094
14095 if (ldm_case == 5)
14096 {
14097 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14098 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14099 offset = 0;
14100 if (!TARGET_THUMB1)
14101 {
14102 base_reg = regs[0];
14103 base_reg_rtx = newbase;
14104 }
14105 }
14106
14107 for (i = 0; i < nops; i++)
14108 {
14109 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14110 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14111 SImode, addr, 0);
14112 }
14113 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14114 write_back ? offset + i * 4 : 0));
14115 return true;
14116 }
14117
14118 /* Called from a peephole2 expander to turn a sequence of stores into an
14119 STM instruction. OPERANDS are the operands found by the peephole matcher;
14120 NOPS indicates how many separate stores we are trying to combine.
14121 Returns true iff we could generate a new instruction. */
14122
14123 bool
14124 gen_stm_seq (rtx *operands, int nops)
14125 {
14126 int i;
14127 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14128 rtx mems[MAX_LDM_STM_OPS];
14129 int base_reg;
14130 rtx base_reg_rtx;
14131 HOST_WIDE_INT offset;
14132 int write_back = FALSE;
14133 int stm_case;
14134 rtx addr;
14135 bool base_reg_dies;
14136
14137 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14138 mem_order, &base_reg, &offset, true);
14139
14140 if (stm_case == 0)
14141 return false;
14142
14143 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14144
14145 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14146 if (TARGET_THUMB1)
14147 {
14148 gcc_assert (base_reg_dies);
14149 write_back = TRUE;
14150 }
14151
14152 if (stm_case == 5)
14153 {
14154 gcc_assert (base_reg_dies);
14155 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14156 offset = 0;
14157 }
14158
14159 addr = plus_constant (Pmode, base_reg_rtx, offset);
14160
14161 for (i = 0; i < nops; i++)
14162 {
14163 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14164 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14165 SImode, addr, 0);
14166 }
14167 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14168 write_back ? offset + i * 4 : 0));
14169 return true;
14170 }
14171
14172 /* Called from a peephole2 expander to turn a sequence of stores that are
14173 preceded by constant loads into an STM instruction. OPERANDS are the
14174 operands found by the peephole matcher; NOPS indicates how many
14175 separate stores we are trying to combine; there are 2 * NOPS
14176 instructions in the peephole.
14177 Returns true iff we could generate a new instruction. */
14178
14179 bool
14180 gen_const_stm_seq (rtx *operands, int nops)
14181 {
14182 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14183 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14184 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14185 rtx mems[MAX_LDM_STM_OPS];
14186 int base_reg;
14187 rtx base_reg_rtx;
14188 HOST_WIDE_INT offset;
14189 int write_back = FALSE;
14190 int stm_case;
14191 rtx addr;
14192 bool base_reg_dies;
14193 int i, j;
14194 HARD_REG_SET allocated;
14195
14196 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14197 mem_order, &base_reg, &offset, false);
14198
14199 if (stm_case == 0)
14200 return false;
14201
14202 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14203
14204 /* If the same register is used more than once, try to find a free
14205 register. */
14206 CLEAR_HARD_REG_SET (allocated);
14207 for (i = 0; i < nops; i++)
14208 {
14209 for (j = i + 1; j < nops; j++)
14210 if (regs[i] == regs[j])
14211 {
14212 rtx t = peep2_find_free_register (0, nops * 2,
14213 TARGET_THUMB1 ? "l" : "r",
14214 SImode, &allocated);
14215 if (t == NULL_RTX)
14216 return false;
14217 reg_rtxs[i] = t;
14218 regs[i] = REGNO (t);
14219 }
14220 }
14221
14222 /* Compute an ordering that maps the register numbers to an ascending
14223 sequence. */
14224 reg_order[0] = 0;
14225 for (i = 0; i < nops; i++)
14226 if (regs[i] < regs[reg_order[0]])
14227 reg_order[0] = i;
14228
14229 for (i = 1; i < nops; i++)
14230 {
14231 int this_order = reg_order[i - 1];
14232 for (j = 0; j < nops; j++)
14233 if (regs[j] > regs[reg_order[i - 1]]
14234 && (this_order == reg_order[i - 1]
14235 || regs[j] < regs[this_order]))
14236 this_order = j;
14237 reg_order[i] = this_order;
14238 }
14239
14240 /* Ensure that registers that must be live after the instruction end
14241 up with the correct value. */
14242 for (i = 0; i < nops; i++)
14243 {
14244 int this_order = reg_order[i];
14245 if ((this_order != mem_order[i]
14246 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14247 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14248 return false;
14249 }
14250
14251 /* Load the constants. */
14252 for (i = 0; i < nops; i++)
14253 {
14254 rtx op = operands[2 * nops + mem_order[i]];
14255 sorted_regs[i] = regs[reg_order[i]];
14256 emit_move_insn (reg_rtxs[reg_order[i]], op);
14257 }
14258
14259 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14260
14261 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14262 if (TARGET_THUMB1)
14263 {
14264 gcc_assert (base_reg_dies);
14265 write_back = TRUE;
14266 }
14267
14268 if (stm_case == 5)
14269 {
14270 gcc_assert (base_reg_dies);
14271 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14272 offset = 0;
14273 }
14274
14275 addr = plus_constant (Pmode, base_reg_rtx, offset);
14276
14277 for (i = 0; i < nops; i++)
14278 {
14279 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14280 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14281 SImode, addr, 0);
14282 }
14283 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14284 write_back ? offset + i * 4 : 0));
14285 return true;
14286 }
14287
14288 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14289 unaligned copies on processors which support unaligned semantics for those
14290 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14291 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14292 An interleave factor of 1 (the minimum) will perform no interleaving.
14293 Load/store multiple are used for aligned addresses where possible. */
14294
14295 static void
14296 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14297 HOST_WIDE_INT length,
14298 unsigned int interleave_factor)
14299 {
14300 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14301 int *regnos = XALLOCAVEC (int, interleave_factor);
14302 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14303 HOST_WIDE_INT i, j;
14304 HOST_WIDE_INT remaining = length, words;
14305 rtx halfword_tmp = NULL, byte_tmp = NULL;
14306 rtx dst, src;
14307 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14308 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14309 HOST_WIDE_INT srcoffset, dstoffset;
14310 HOST_WIDE_INT src_autoinc, dst_autoinc;
14311 rtx mem, addr;
14312
14313 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14314
14315 /* Use hard registers if we have aligned source or destination so we can use
14316 load/store multiple with contiguous registers. */
14317 if (dst_aligned || src_aligned)
14318 for (i = 0; i < interleave_factor; i++)
14319 regs[i] = gen_rtx_REG (SImode, i);
14320 else
14321 for (i = 0; i < interleave_factor; i++)
14322 regs[i] = gen_reg_rtx (SImode);
14323
14324 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14325 src = copy_addr_to_reg (XEXP (srcbase, 0));
14326
14327 srcoffset = dstoffset = 0;
14328
14329 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14330 For copying the last bytes we want to subtract this offset again. */
14331 src_autoinc = dst_autoinc = 0;
14332
14333 for (i = 0; i < interleave_factor; i++)
14334 regnos[i] = i;
14335
14336 /* Copy BLOCK_SIZE_BYTES chunks. */
14337
14338 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14339 {
14340 /* Load words. */
14341 if (src_aligned && interleave_factor > 1)
14342 {
14343 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14344 TRUE, srcbase, &srcoffset));
14345 src_autoinc += UNITS_PER_WORD * interleave_factor;
14346 }
14347 else
14348 {
14349 for (j = 0; j < interleave_factor; j++)
14350 {
14351 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14352 - src_autoinc));
14353 mem = adjust_automodify_address (srcbase, SImode, addr,
14354 srcoffset + j * UNITS_PER_WORD);
14355 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14356 }
14357 srcoffset += block_size_bytes;
14358 }
14359
14360 /* Store words. */
14361 if (dst_aligned && interleave_factor > 1)
14362 {
14363 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14364 TRUE, dstbase, &dstoffset));
14365 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14366 }
14367 else
14368 {
14369 for (j = 0; j < interleave_factor; j++)
14370 {
14371 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14372 - dst_autoinc));
14373 mem = adjust_automodify_address (dstbase, SImode, addr,
14374 dstoffset + j * UNITS_PER_WORD);
14375 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14376 }
14377 dstoffset += block_size_bytes;
14378 }
14379
14380 remaining -= block_size_bytes;
14381 }
14382
14383 /* Copy any whole words left (note these aren't interleaved with any
14384 subsequent halfword/byte load/stores in the interests of simplicity). */
14385
14386 words = remaining / UNITS_PER_WORD;
14387
14388 gcc_assert (words < interleave_factor);
14389
14390 if (src_aligned && words > 1)
14391 {
14392 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14393 &srcoffset));
14394 src_autoinc += UNITS_PER_WORD * words;
14395 }
14396 else
14397 {
14398 for (j = 0; j < words; j++)
14399 {
14400 addr = plus_constant (Pmode, src,
14401 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14402 mem = adjust_automodify_address (srcbase, SImode, addr,
14403 srcoffset + j * UNITS_PER_WORD);
14404 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14405 }
14406 srcoffset += words * UNITS_PER_WORD;
14407 }
14408
14409 if (dst_aligned && words > 1)
14410 {
14411 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14412 &dstoffset));
14413 dst_autoinc += words * UNITS_PER_WORD;
14414 }
14415 else
14416 {
14417 for (j = 0; j < words; j++)
14418 {
14419 addr = plus_constant (Pmode, dst,
14420 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14421 mem = adjust_automodify_address (dstbase, SImode, addr,
14422 dstoffset + j * UNITS_PER_WORD);
14423 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14424 }
14425 dstoffset += words * UNITS_PER_WORD;
14426 }
14427
14428 remaining -= words * UNITS_PER_WORD;
14429
14430 gcc_assert (remaining < 4);
14431
14432 /* Copy a halfword if necessary. */
14433
14434 if (remaining >= 2)
14435 {
14436 halfword_tmp = gen_reg_rtx (SImode);
14437
14438 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14439 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14440 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14441
14442 /* Either write out immediately, or delay until we've loaded the last
14443 byte, depending on interleave factor. */
14444 if (interleave_factor == 1)
14445 {
14446 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14447 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14448 emit_insn (gen_unaligned_storehi (mem,
14449 gen_lowpart (HImode, halfword_tmp)));
14450 halfword_tmp = NULL;
14451 dstoffset += 2;
14452 }
14453
14454 remaining -= 2;
14455 srcoffset += 2;
14456 }
14457
14458 gcc_assert (remaining < 2);
14459
14460 /* Copy last byte. */
14461
14462 if ((remaining & 1) != 0)
14463 {
14464 byte_tmp = gen_reg_rtx (SImode);
14465
14466 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14467 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14468 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14469
14470 if (interleave_factor == 1)
14471 {
14472 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14473 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14474 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14475 byte_tmp = NULL;
14476 dstoffset++;
14477 }
14478
14479 remaining--;
14480 srcoffset++;
14481 }
14482
14483 /* Store last halfword if we haven't done so already. */
14484
14485 if (halfword_tmp)
14486 {
14487 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14488 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14489 emit_insn (gen_unaligned_storehi (mem,
14490 gen_lowpart (HImode, halfword_tmp)));
14491 dstoffset += 2;
14492 }
14493
14494 /* Likewise for last byte. */
14495
14496 if (byte_tmp)
14497 {
14498 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14499 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14500 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14501 dstoffset++;
14502 }
14503
14504 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14505 }
14506
14507 /* From mips_adjust_block_mem:
14508
14509 Helper function for doing a loop-based block operation on memory
14510 reference MEM. Each iteration of the loop will operate on LENGTH
14511 bytes of MEM.
14512
14513 Create a new base register for use within the loop and point it to
14514 the start of MEM. Create a new memory reference that uses this
14515 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14516
14517 static void
14518 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14519 rtx *loop_mem)
14520 {
14521 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14522
14523 /* Although the new mem does not refer to a known location,
14524 it does keep up to LENGTH bytes of alignment. */
14525 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14526 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14527 }
14528
14529 /* From mips_block_move_loop:
14530
14531 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14532 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14533 the memory regions do not overlap. */
14534
14535 static void
14536 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14537 unsigned int interleave_factor,
14538 HOST_WIDE_INT bytes_per_iter)
14539 {
14540 rtx src_reg, dest_reg, final_src, test;
14541 HOST_WIDE_INT leftover;
14542
14543 leftover = length % bytes_per_iter;
14544 length -= leftover;
14545
14546 /* Create registers and memory references for use within the loop. */
14547 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14548 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14549
14550 /* Calculate the value that SRC_REG should have after the last iteration of
14551 the loop. */
14552 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14553 0, 0, OPTAB_WIDEN);
14554
14555 /* Emit the start of the loop. */
14556 rtx_code_label *label = gen_label_rtx ();
14557 emit_label (label);
14558
14559 /* Emit the loop body. */
14560 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14561 interleave_factor);
14562
14563 /* Move on to the next block. */
14564 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14565 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14566
14567 /* Emit the loop condition. */
14568 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14569 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14570
14571 /* Mop up any left-over bytes. */
14572 if (leftover)
14573 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14574 }
14575
14576 /* Emit a block move when either the source or destination is unaligned (not
14577 aligned to a four-byte boundary). This may need further tuning depending on
14578 core type, optimize_size setting, etc. */
14579
14580 static int
14581 arm_movmemqi_unaligned (rtx *operands)
14582 {
14583 HOST_WIDE_INT length = INTVAL (operands[2]);
14584
14585 if (optimize_size)
14586 {
14587 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14588 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14589 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14590 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14591 or dst_aligned though: allow more interleaving in those cases since the
14592 resulting code can be smaller. */
14593 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14594 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14595
14596 if (length > 12)
14597 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14598 interleave_factor, bytes_per_iter);
14599 else
14600 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14601 interleave_factor);
14602 }
14603 else
14604 {
14605 /* Note that the loop created by arm_block_move_unaligned_loop may be
14606 subject to loop unrolling, which makes tuning this condition a little
14607 redundant. */
14608 if (length > 32)
14609 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14610 else
14611 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14612 }
14613
14614 return 1;
14615 }
14616
14617 int
14618 arm_gen_movmemqi (rtx *operands)
14619 {
14620 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14621 HOST_WIDE_INT srcoffset, dstoffset;
14622 int i;
14623 rtx src, dst, srcbase, dstbase;
14624 rtx part_bytes_reg = NULL;
14625 rtx mem;
14626
14627 if (!CONST_INT_P (operands[2])
14628 || !CONST_INT_P (operands[3])
14629 || INTVAL (operands[2]) > 64)
14630 return 0;
14631
14632 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14633 return arm_movmemqi_unaligned (operands);
14634
14635 if (INTVAL (operands[3]) & 3)
14636 return 0;
14637
14638 dstbase = operands[0];
14639 srcbase = operands[1];
14640
14641 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14642 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14643
14644 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14645 out_words_to_go = INTVAL (operands[2]) / 4;
14646 last_bytes = INTVAL (operands[2]) & 3;
14647 dstoffset = srcoffset = 0;
14648
14649 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14650 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14651
14652 for (i = 0; in_words_to_go >= 2; i+=4)
14653 {
14654 if (in_words_to_go > 4)
14655 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14656 TRUE, srcbase, &srcoffset));
14657 else
14658 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14659 src, FALSE, srcbase,
14660 &srcoffset));
14661
14662 if (out_words_to_go)
14663 {
14664 if (out_words_to_go > 4)
14665 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14666 TRUE, dstbase, &dstoffset));
14667 else if (out_words_to_go != 1)
14668 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14669 out_words_to_go, dst,
14670 (last_bytes == 0
14671 ? FALSE : TRUE),
14672 dstbase, &dstoffset));
14673 else
14674 {
14675 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14676 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14677 if (last_bytes != 0)
14678 {
14679 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14680 dstoffset += 4;
14681 }
14682 }
14683 }
14684
14685 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14686 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14687 }
14688
14689 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14690 if (out_words_to_go)
14691 {
14692 rtx sreg;
14693
14694 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14695 sreg = copy_to_reg (mem);
14696
14697 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14698 emit_move_insn (mem, sreg);
14699 in_words_to_go--;
14700
14701 gcc_assert (!in_words_to_go); /* Sanity check */
14702 }
14703
14704 if (in_words_to_go)
14705 {
14706 gcc_assert (in_words_to_go > 0);
14707
14708 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14709 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14710 }
14711
14712 gcc_assert (!last_bytes || part_bytes_reg);
14713
14714 if (BYTES_BIG_ENDIAN && last_bytes)
14715 {
14716 rtx tmp = gen_reg_rtx (SImode);
14717
14718 /* The bytes we want are in the top end of the word. */
14719 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14720 GEN_INT (8 * (4 - last_bytes))));
14721 part_bytes_reg = tmp;
14722
14723 while (last_bytes)
14724 {
14725 mem = adjust_automodify_address (dstbase, QImode,
14726 plus_constant (Pmode, dst,
14727 last_bytes - 1),
14728 dstoffset + last_bytes - 1);
14729 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14730
14731 if (--last_bytes)
14732 {
14733 tmp = gen_reg_rtx (SImode);
14734 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14735 part_bytes_reg = tmp;
14736 }
14737 }
14738
14739 }
14740 else
14741 {
14742 if (last_bytes > 1)
14743 {
14744 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14745 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14746 last_bytes -= 2;
14747 if (last_bytes)
14748 {
14749 rtx tmp = gen_reg_rtx (SImode);
14750 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14751 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14752 part_bytes_reg = tmp;
14753 dstoffset += 2;
14754 }
14755 }
14756
14757 if (last_bytes)
14758 {
14759 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14760 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14761 }
14762 }
14763
14764 return 1;
14765 }
14766
14767 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14768 by mode size. */
14769 inline static rtx
14770 next_consecutive_mem (rtx mem)
14771 {
14772 machine_mode mode = GET_MODE (mem);
14773 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14774 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14775
14776 return adjust_automodify_address (mem, mode, addr, offset);
14777 }
14778
14779 /* Copy using LDRD/STRD instructions whenever possible.
14780 Returns true upon success. */
14781 bool
14782 gen_movmem_ldrd_strd (rtx *operands)
14783 {
14784 unsigned HOST_WIDE_INT len;
14785 HOST_WIDE_INT align;
14786 rtx src, dst, base;
14787 rtx reg0;
14788 bool src_aligned, dst_aligned;
14789 bool src_volatile, dst_volatile;
14790
14791 gcc_assert (CONST_INT_P (operands[2]));
14792 gcc_assert (CONST_INT_P (operands[3]));
14793
14794 len = UINTVAL (operands[2]);
14795 if (len > 64)
14796 return false;
14797
14798 /* Maximum alignment we can assume for both src and dst buffers. */
14799 align = INTVAL (operands[3]);
14800
14801 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14802 return false;
14803
14804 /* Place src and dst addresses in registers
14805 and update the corresponding mem rtx. */
14806 dst = operands[0];
14807 dst_volatile = MEM_VOLATILE_P (dst);
14808 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14809 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14810 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14811
14812 src = operands[1];
14813 src_volatile = MEM_VOLATILE_P (src);
14814 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14815 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14816 src = adjust_automodify_address (src, VOIDmode, base, 0);
14817
14818 if (!unaligned_access && !(src_aligned && dst_aligned))
14819 return false;
14820
14821 if (src_volatile || dst_volatile)
14822 return false;
14823
14824 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14825 if (!(dst_aligned || src_aligned))
14826 return arm_gen_movmemqi (operands);
14827
14828 src = adjust_address (src, DImode, 0);
14829 dst = adjust_address (dst, DImode, 0);
14830 while (len >= 8)
14831 {
14832 len -= 8;
14833 reg0 = gen_reg_rtx (DImode);
14834 if (src_aligned)
14835 emit_move_insn (reg0, src);
14836 else
14837 emit_insn (gen_unaligned_loaddi (reg0, src));
14838
14839 if (dst_aligned)
14840 emit_move_insn (dst, reg0);
14841 else
14842 emit_insn (gen_unaligned_storedi (dst, reg0));
14843
14844 src = next_consecutive_mem (src);
14845 dst = next_consecutive_mem (dst);
14846 }
14847
14848 gcc_assert (len < 8);
14849 if (len >= 4)
14850 {
14851 /* More than a word but less than a double-word to copy. Copy a word. */
14852 reg0 = gen_reg_rtx (SImode);
14853 src = adjust_address (src, SImode, 0);
14854 dst = adjust_address (dst, SImode, 0);
14855 if (src_aligned)
14856 emit_move_insn (reg0, src);
14857 else
14858 emit_insn (gen_unaligned_loadsi (reg0, src));
14859
14860 if (dst_aligned)
14861 emit_move_insn (dst, reg0);
14862 else
14863 emit_insn (gen_unaligned_storesi (dst, reg0));
14864
14865 src = next_consecutive_mem (src);
14866 dst = next_consecutive_mem (dst);
14867 len -= 4;
14868 }
14869
14870 if (len == 0)
14871 return true;
14872
14873 /* Copy the remaining bytes. */
14874 if (len >= 2)
14875 {
14876 dst = adjust_address (dst, HImode, 0);
14877 src = adjust_address (src, HImode, 0);
14878 reg0 = gen_reg_rtx (SImode);
14879 if (src_aligned)
14880 emit_insn (gen_zero_extendhisi2 (reg0, src));
14881 else
14882 emit_insn (gen_unaligned_loadhiu (reg0, src));
14883
14884 if (dst_aligned)
14885 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14886 else
14887 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14888
14889 src = next_consecutive_mem (src);
14890 dst = next_consecutive_mem (dst);
14891 if (len == 2)
14892 return true;
14893 }
14894
14895 dst = adjust_address (dst, QImode, 0);
14896 src = adjust_address (src, QImode, 0);
14897 reg0 = gen_reg_rtx (QImode);
14898 emit_move_insn (reg0, src);
14899 emit_move_insn (dst, reg0);
14900 return true;
14901 }
14902
14903 /* Select a dominance comparison mode if possible for a test of the general
14904 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14905 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14906 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14907 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14908 In all cases OP will be either EQ or NE, but we don't need to know which
14909 here. If we are unable to support a dominance comparison we return
14910 CC mode. This will then fail to match for the RTL expressions that
14911 generate this call. */
14912 machine_mode
14913 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14914 {
14915 enum rtx_code cond1, cond2;
14916 int swapped = 0;
14917
14918 /* Currently we will probably get the wrong result if the individual
14919 comparisons are not simple. This also ensures that it is safe to
14920 reverse a comparison if necessary. */
14921 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14922 != CCmode)
14923 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14924 != CCmode))
14925 return CCmode;
14926
14927 /* The if_then_else variant of this tests the second condition if the
14928 first passes, but is true if the first fails. Reverse the first
14929 condition to get a true "inclusive-or" expression. */
14930 if (cond_or == DOM_CC_NX_OR_Y)
14931 cond1 = reverse_condition (cond1);
14932
14933 /* If the comparisons are not equal, and one doesn't dominate the other,
14934 then we can't do this. */
14935 if (cond1 != cond2
14936 && !comparison_dominates_p (cond1, cond2)
14937 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14938 return CCmode;
14939
14940 if (swapped)
14941 std::swap (cond1, cond2);
14942
14943 switch (cond1)
14944 {
14945 case EQ:
14946 if (cond_or == DOM_CC_X_AND_Y)
14947 return CC_DEQmode;
14948
14949 switch (cond2)
14950 {
14951 case EQ: return CC_DEQmode;
14952 case LE: return CC_DLEmode;
14953 case LEU: return CC_DLEUmode;
14954 case GE: return CC_DGEmode;
14955 case GEU: return CC_DGEUmode;
14956 default: gcc_unreachable ();
14957 }
14958
14959 case LT:
14960 if (cond_or == DOM_CC_X_AND_Y)
14961 return CC_DLTmode;
14962
14963 switch (cond2)
14964 {
14965 case LT:
14966 return CC_DLTmode;
14967 case LE:
14968 return CC_DLEmode;
14969 case NE:
14970 return CC_DNEmode;
14971 default:
14972 gcc_unreachable ();
14973 }
14974
14975 case GT:
14976 if (cond_or == DOM_CC_X_AND_Y)
14977 return CC_DGTmode;
14978
14979 switch (cond2)
14980 {
14981 case GT:
14982 return CC_DGTmode;
14983 case GE:
14984 return CC_DGEmode;
14985 case NE:
14986 return CC_DNEmode;
14987 default:
14988 gcc_unreachable ();
14989 }
14990
14991 case LTU:
14992 if (cond_or == DOM_CC_X_AND_Y)
14993 return CC_DLTUmode;
14994
14995 switch (cond2)
14996 {
14997 case LTU:
14998 return CC_DLTUmode;
14999 case LEU:
15000 return CC_DLEUmode;
15001 case NE:
15002 return CC_DNEmode;
15003 default:
15004 gcc_unreachable ();
15005 }
15006
15007 case GTU:
15008 if (cond_or == DOM_CC_X_AND_Y)
15009 return CC_DGTUmode;
15010
15011 switch (cond2)
15012 {
15013 case GTU:
15014 return CC_DGTUmode;
15015 case GEU:
15016 return CC_DGEUmode;
15017 case NE:
15018 return CC_DNEmode;
15019 default:
15020 gcc_unreachable ();
15021 }
15022
15023 /* The remaining cases only occur when both comparisons are the
15024 same. */
15025 case NE:
15026 gcc_assert (cond1 == cond2);
15027 return CC_DNEmode;
15028
15029 case LE:
15030 gcc_assert (cond1 == cond2);
15031 return CC_DLEmode;
15032
15033 case GE:
15034 gcc_assert (cond1 == cond2);
15035 return CC_DGEmode;
15036
15037 case LEU:
15038 gcc_assert (cond1 == cond2);
15039 return CC_DLEUmode;
15040
15041 case GEU:
15042 gcc_assert (cond1 == cond2);
15043 return CC_DGEUmode;
15044
15045 default:
15046 gcc_unreachable ();
15047 }
15048 }
15049
15050 machine_mode
15051 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15052 {
15053 /* All floating point compares return CCFP if it is an equality
15054 comparison, and CCFPE otherwise. */
15055 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15056 {
15057 switch (op)
15058 {
15059 case EQ:
15060 case NE:
15061 case UNORDERED:
15062 case ORDERED:
15063 case UNLT:
15064 case UNLE:
15065 case UNGT:
15066 case UNGE:
15067 case UNEQ:
15068 case LTGT:
15069 return CCFPmode;
15070
15071 case LT:
15072 case LE:
15073 case GT:
15074 case GE:
15075 return CCFPEmode;
15076
15077 default:
15078 gcc_unreachable ();
15079 }
15080 }
15081
15082 /* A compare with a shifted operand. Because of canonicalization, the
15083 comparison will have to be swapped when we emit the assembler. */
15084 if (GET_MODE (y) == SImode
15085 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15086 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15087 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15088 || GET_CODE (x) == ROTATERT))
15089 return CC_SWPmode;
15090
15091 /* This operation is performed swapped, but since we only rely on the Z
15092 flag we don't need an additional mode. */
15093 if (GET_MODE (y) == SImode
15094 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15095 && GET_CODE (x) == NEG
15096 && (op == EQ || op == NE))
15097 return CC_Zmode;
15098
15099 /* This is a special case that is used by combine to allow a
15100 comparison of a shifted byte load to be split into a zero-extend
15101 followed by a comparison of the shifted integer (only valid for
15102 equalities and unsigned inequalities). */
15103 if (GET_MODE (x) == SImode
15104 && GET_CODE (x) == ASHIFT
15105 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15106 && GET_CODE (XEXP (x, 0)) == SUBREG
15107 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15108 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15109 && (op == EQ || op == NE
15110 || op == GEU || op == GTU || op == LTU || op == LEU)
15111 && CONST_INT_P (y))
15112 return CC_Zmode;
15113
15114 /* A construct for a conditional compare, if the false arm contains
15115 0, then both conditions must be true, otherwise either condition
15116 must be true. Not all conditions are possible, so CCmode is
15117 returned if it can't be done. */
15118 if (GET_CODE (x) == IF_THEN_ELSE
15119 && (XEXP (x, 2) == const0_rtx
15120 || XEXP (x, 2) == const1_rtx)
15121 && COMPARISON_P (XEXP (x, 0))
15122 && COMPARISON_P (XEXP (x, 1)))
15123 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15124 INTVAL (XEXP (x, 2)));
15125
15126 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15127 if (GET_CODE (x) == AND
15128 && (op == EQ || op == NE)
15129 && COMPARISON_P (XEXP (x, 0))
15130 && COMPARISON_P (XEXP (x, 1)))
15131 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15132 DOM_CC_X_AND_Y);
15133
15134 if (GET_CODE (x) == IOR
15135 && (op == EQ || op == NE)
15136 && COMPARISON_P (XEXP (x, 0))
15137 && COMPARISON_P (XEXP (x, 1)))
15138 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15139 DOM_CC_X_OR_Y);
15140
15141 /* An operation (on Thumb) where we want to test for a single bit.
15142 This is done by shifting that bit up into the top bit of a
15143 scratch register; we can then branch on the sign bit. */
15144 if (TARGET_THUMB1
15145 && GET_MODE (x) == SImode
15146 && (op == EQ || op == NE)
15147 && GET_CODE (x) == ZERO_EXTRACT
15148 && XEXP (x, 1) == const1_rtx)
15149 return CC_Nmode;
15150
15151 /* An operation that sets the condition codes as a side-effect, the
15152 V flag is not set correctly, so we can only use comparisons where
15153 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15154 instead.) */
15155 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15156 if (GET_MODE (x) == SImode
15157 && y == const0_rtx
15158 && (op == EQ || op == NE || op == LT || op == GE)
15159 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15160 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15161 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15162 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15163 || GET_CODE (x) == LSHIFTRT
15164 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15165 || GET_CODE (x) == ROTATERT
15166 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15167 return CC_NOOVmode;
15168
15169 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15170 return CC_Zmode;
15171
15172 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15173 && GET_CODE (x) == PLUS
15174 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15175 return CC_Cmode;
15176
15177 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15178 {
15179 switch (op)
15180 {
15181 case EQ:
15182 case NE:
15183 /* A DImode comparison against zero can be implemented by
15184 or'ing the two halves together. */
15185 if (y == const0_rtx)
15186 return CC_Zmode;
15187
15188 /* We can do an equality test in three Thumb instructions. */
15189 if (!TARGET_32BIT)
15190 return CC_Zmode;
15191
15192 /* FALLTHROUGH */
15193
15194 case LTU:
15195 case LEU:
15196 case GTU:
15197 case GEU:
15198 /* DImode unsigned comparisons can be implemented by cmp +
15199 cmpeq without a scratch register. Not worth doing in
15200 Thumb-2. */
15201 if (TARGET_32BIT)
15202 return CC_CZmode;
15203
15204 /* FALLTHROUGH */
15205
15206 case LT:
15207 case LE:
15208 case GT:
15209 case GE:
15210 /* DImode signed and unsigned comparisons can be implemented
15211 by cmp + sbcs with a scratch register, but that does not
15212 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15213 gcc_assert (op != EQ && op != NE);
15214 return CC_NCVmode;
15215
15216 default:
15217 gcc_unreachable ();
15218 }
15219 }
15220
15221 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15222 return GET_MODE (x);
15223
15224 return CCmode;
15225 }
15226
15227 /* X and Y are two things to compare using CODE. Emit the compare insn and
15228 return the rtx for register 0 in the proper mode. FP means this is a
15229 floating point compare: I don't think that it is needed on the arm. */
15230 rtx
15231 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15232 {
15233 machine_mode mode;
15234 rtx cc_reg;
15235 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15236
15237 /* We might have X as a constant, Y as a register because of the predicates
15238 used for cmpdi. If so, force X to a register here. */
15239 if (dimode_comparison && !REG_P (x))
15240 x = force_reg (DImode, x);
15241
15242 mode = SELECT_CC_MODE (code, x, y);
15243 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15244
15245 if (dimode_comparison
15246 && mode != CC_CZmode)
15247 {
15248 rtx clobber, set;
15249
15250 /* To compare two non-zero values for equality, XOR them and
15251 then compare against zero. Not used for ARM mode; there
15252 CC_CZmode is cheaper. */
15253 if (mode == CC_Zmode && y != const0_rtx)
15254 {
15255 gcc_assert (!reload_completed);
15256 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15257 y = const0_rtx;
15258 }
15259
15260 /* A scratch register is required. */
15261 if (reload_completed)
15262 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15263 else
15264 scratch = gen_rtx_SCRATCH (SImode);
15265
15266 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15267 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15268 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15269 }
15270 else
15271 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15272
15273 return cc_reg;
15274 }
15275
15276 /* Generate a sequence of insns that will generate the correct return
15277 address mask depending on the physical architecture that the program
15278 is running on. */
15279 rtx
15280 arm_gen_return_addr_mask (void)
15281 {
15282 rtx reg = gen_reg_rtx (Pmode);
15283
15284 emit_insn (gen_return_addr_mask (reg));
15285 return reg;
15286 }
15287
15288 void
15289 arm_reload_in_hi (rtx *operands)
15290 {
15291 rtx ref = operands[1];
15292 rtx base, scratch;
15293 HOST_WIDE_INT offset = 0;
15294
15295 if (GET_CODE (ref) == SUBREG)
15296 {
15297 offset = SUBREG_BYTE (ref);
15298 ref = SUBREG_REG (ref);
15299 }
15300
15301 if (REG_P (ref))
15302 {
15303 /* We have a pseudo which has been spilt onto the stack; there
15304 are two cases here: the first where there is a simple
15305 stack-slot replacement and a second where the stack-slot is
15306 out of range, or is used as a subreg. */
15307 if (reg_equiv_mem (REGNO (ref)))
15308 {
15309 ref = reg_equiv_mem (REGNO (ref));
15310 base = find_replacement (&XEXP (ref, 0));
15311 }
15312 else
15313 /* The slot is out of range, or was dressed up in a SUBREG. */
15314 base = reg_equiv_address (REGNO (ref));
15315 }
15316 else
15317 base = find_replacement (&XEXP (ref, 0));
15318
15319 /* Handle the case where the address is too complex to be offset by 1. */
15320 if (GET_CODE (base) == MINUS
15321 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15322 {
15323 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15324
15325 emit_set_insn (base_plus, base);
15326 base = base_plus;
15327 }
15328 else if (GET_CODE (base) == PLUS)
15329 {
15330 /* The addend must be CONST_INT, or we would have dealt with it above. */
15331 HOST_WIDE_INT hi, lo;
15332
15333 offset += INTVAL (XEXP (base, 1));
15334 base = XEXP (base, 0);
15335
15336 /* Rework the address into a legal sequence of insns. */
15337 /* Valid range for lo is -4095 -> 4095 */
15338 lo = (offset >= 0
15339 ? (offset & 0xfff)
15340 : -((-offset) & 0xfff));
15341
15342 /* Corner case, if lo is the max offset then we would be out of range
15343 once we have added the additional 1 below, so bump the msb into the
15344 pre-loading insn(s). */
15345 if (lo == 4095)
15346 lo &= 0x7ff;
15347
15348 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15349 ^ (HOST_WIDE_INT) 0x80000000)
15350 - (HOST_WIDE_INT) 0x80000000);
15351
15352 gcc_assert (hi + lo == offset);
15353
15354 if (hi != 0)
15355 {
15356 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15357
15358 /* Get the base address; addsi3 knows how to handle constants
15359 that require more than one insn. */
15360 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15361 base = base_plus;
15362 offset = lo;
15363 }
15364 }
15365
15366 /* Operands[2] may overlap operands[0] (though it won't overlap
15367 operands[1]), that's why we asked for a DImode reg -- so we can
15368 use the bit that does not overlap. */
15369 if (REGNO (operands[2]) == REGNO (operands[0]))
15370 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15371 else
15372 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15373
15374 emit_insn (gen_zero_extendqisi2 (scratch,
15375 gen_rtx_MEM (QImode,
15376 plus_constant (Pmode, base,
15377 offset))));
15378 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15379 gen_rtx_MEM (QImode,
15380 plus_constant (Pmode, base,
15381 offset + 1))));
15382 if (!BYTES_BIG_ENDIAN)
15383 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15384 gen_rtx_IOR (SImode,
15385 gen_rtx_ASHIFT
15386 (SImode,
15387 gen_rtx_SUBREG (SImode, operands[0], 0),
15388 GEN_INT (8)),
15389 scratch));
15390 else
15391 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15392 gen_rtx_IOR (SImode,
15393 gen_rtx_ASHIFT (SImode, scratch,
15394 GEN_INT (8)),
15395 gen_rtx_SUBREG (SImode, operands[0], 0)));
15396 }
15397
15398 /* Handle storing a half-word to memory during reload by synthesizing as two
15399 byte stores. Take care not to clobber the input values until after we
15400 have moved them somewhere safe. This code assumes that if the DImode
15401 scratch in operands[2] overlaps either the input value or output address
15402 in some way, then that value must die in this insn (we absolutely need
15403 two scratch registers for some corner cases). */
15404 void
15405 arm_reload_out_hi (rtx *operands)
15406 {
15407 rtx ref = operands[0];
15408 rtx outval = operands[1];
15409 rtx base, scratch;
15410 HOST_WIDE_INT offset = 0;
15411
15412 if (GET_CODE (ref) == SUBREG)
15413 {
15414 offset = SUBREG_BYTE (ref);
15415 ref = SUBREG_REG (ref);
15416 }
15417
15418 if (REG_P (ref))
15419 {
15420 /* We have a pseudo which has been spilt onto the stack; there
15421 are two cases here: the first where there is a simple
15422 stack-slot replacement and a second where the stack-slot is
15423 out of range, or is used as a subreg. */
15424 if (reg_equiv_mem (REGNO (ref)))
15425 {
15426 ref = reg_equiv_mem (REGNO (ref));
15427 base = find_replacement (&XEXP (ref, 0));
15428 }
15429 else
15430 /* The slot is out of range, or was dressed up in a SUBREG. */
15431 base = reg_equiv_address (REGNO (ref));
15432 }
15433 else
15434 base = find_replacement (&XEXP (ref, 0));
15435
15436 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15437
15438 /* Handle the case where the address is too complex to be offset by 1. */
15439 if (GET_CODE (base) == MINUS
15440 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15441 {
15442 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15443
15444 /* Be careful not to destroy OUTVAL. */
15445 if (reg_overlap_mentioned_p (base_plus, outval))
15446 {
15447 /* Updating base_plus might destroy outval, see if we can
15448 swap the scratch and base_plus. */
15449 if (!reg_overlap_mentioned_p (scratch, outval))
15450 std::swap (scratch, base_plus);
15451 else
15452 {
15453 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15454
15455 /* Be conservative and copy OUTVAL into the scratch now,
15456 this should only be necessary if outval is a subreg
15457 of something larger than a word. */
15458 /* XXX Might this clobber base? I can't see how it can,
15459 since scratch is known to overlap with OUTVAL, and
15460 must be wider than a word. */
15461 emit_insn (gen_movhi (scratch_hi, outval));
15462 outval = scratch_hi;
15463 }
15464 }
15465
15466 emit_set_insn (base_plus, base);
15467 base = base_plus;
15468 }
15469 else if (GET_CODE (base) == PLUS)
15470 {
15471 /* The addend must be CONST_INT, or we would have dealt with it above. */
15472 HOST_WIDE_INT hi, lo;
15473
15474 offset += INTVAL (XEXP (base, 1));
15475 base = XEXP (base, 0);
15476
15477 /* Rework the address into a legal sequence of insns. */
15478 /* Valid range for lo is -4095 -> 4095 */
15479 lo = (offset >= 0
15480 ? (offset & 0xfff)
15481 : -((-offset) & 0xfff));
15482
15483 /* Corner case, if lo is the max offset then we would be out of range
15484 once we have added the additional 1 below, so bump the msb into the
15485 pre-loading insn(s). */
15486 if (lo == 4095)
15487 lo &= 0x7ff;
15488
15489 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15490 ^ (HOST_WIDE_INT) 0x80000000)
15491 - (HOST_WIDE_INT) 0x80000000);
15492
15493 gcc_assert (hi + lo == offset);
15494
15495 if (hi != 0)
15496 {
15497 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15498
15499 /* Be careful not to destroy OUTVAL. */
15500 if (reg_overlap_mentioned_p (base_plus, outval))
15501 {
15502 /* Updating base_plus might destroy outval, see if we
15503 can swap the scratch and base_plus. */
15504 if (!reg_overlap_mentioned_p (scratch, outval))
15505 std::swap (scratch, base_plus);
15506 else
15507 {
15508 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15509
15510 /* Be conservative and copy outval into scratch now,
15511 this should only be necessary if outval is a
15512 subreg of something larger than a word. */
15513 /* XXX Might this clobber base? I can't see how it
15514 can, since scratch is known to overlap with
15515 outval. */
15516 emit_insn (gen_movhi (scratch_hi, outval));
15517 outval = scratch_hi;
15518 }
15519 }
15520
15521 /* Get the base address; addsi3 knows how to handle constants
15522 that require more than one insn. */
15523 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15524 base = base_plus;
15525 offset = lo;
15526 }
15527 }
15528
15529 if (BYTES_BIG_ENDIAN)
15530 {
15531 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15532 plus_constant (Pmode, base,
15533 offset + 1)),
15534 gen_lowpart (QImode, outval)));
15535 emit_insn (gen_lshrsi3 (scratch,
15536 gen_rtx_SUBREG (SImode, outval, 0),
15537 GEN_INT (8)));
15538 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15539 offset)),
15540 gen_lowpart (QImode, scratch)));
15541 }
15542 else
15543 {
15544 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15545 offset)),
15546 gen_lowpart (QImode, outval)));
15547 emit_insn (gen_lshrsi3 (scratch,
15548 gen_rtx_SUBREG (SImode, outval, 0),
15549 GEN_INT (8)));
15550 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15551 plus_constant (Pmode, base,
15552 offset + 1)),
15553 gen_lowpart (QImode, scratch)));
15554 }
15555 }
15556
15557 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15558 (padded to the size of a word) should be passed in a register. */
15559
15560 static bool
15561 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15562 {
15563 if (TARGET_AAPCS_BASED)
15564 return must_pass_in_stack_var_size (mode, type);
15565 else
15566 return must_pass_in_stack_var_size_or_pad (mode, type);
15567 }
15568
15569
15570 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15571 Return true if an argument passed on the stack should be padded upwards,
15572 i.e. if the least-significant byte has useful data.
15573 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15574 aggregate types are placed in the lowest memory address. */
15575
15576 bool
15577 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15578 {
15579 if (!TARGET_AAPCS_BASED)
15580 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15581
15582 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15583 return false;
15584
15585 return true;
15586 }
15587
15588
15589 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15590 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15591 register has useful data, and return the opposite if the most
15592 significant byte does. */
15593
15594 bool
15595 arm_pad_reg_upward (machine_mode mode,
15596 tree type, int first ATTRIBUTE_UNUSED)
15597 {
15598 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15599 {
15600 /* For AAPCS, small aggregates, small fixed-point types,
15601 and small complex types are always padded upwards. */
15602 if (type)
15603 {
15604 if ((AGGREGATE_TYPE_P (type)
15605 || TREE_CODE (type) == COMPLEX_TYPE
15606 || FIXED_POINT_TYPE_P (type))
15607 && int_size_in_bytes (type) <= 4)
15608 return true;
15609 }
15610 else
15611 {
15612 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15613 && GET_MODE_SIZE (mode) <= 4)
15614 return true;
15615 }
15616 }
15617
15618 /* Otherwise, use default padding. */
15619 return !BYTES_BIG_ENDIAN;
15620 }
15621
15622 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15623 assuming that the address in the base register is word aligned. */
15624 bool
15625 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15626 {
15627 HOST_WIDE_INT max_offset;
15628
15629 /* Offset must be a multiple of 4 in Thumb mode. */
15630 if (TARGET_THUMB2 && ((offset & 3) != 0))
15631 return false;
15632
15633 if (TARGET_THUMB2)
15634 max_offset = 1020;
15635 else if (TARGET_ARM)
15636 max_offset = 255;
15637 else
15638 return false;
15639
15640 return ((offset <= max_offset) && (offset >= -max_offset));
15641 }
15642
15643 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15644 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15645 Assumes that the address in the base register RN is word aligned. Pattern
15646 guarantees that both memory accesses use the same base register,
15647 the offsets are constants within the range, and the gap between the offsets is 4.
15648 If preload complete then check that registers are legal. WBACK indicates whether
15649 address is updated. LOAD indicates whether memory access is load or store. */
15650 bool
15651 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15652 bool wback, bool load)
15653 {
15654 unsigned int t, t2, n;
15655
15656 if (!reload_completed)
15657 return true;
15658
15659 if (!offset_ok_for_ldrd_strd (offset))
15660 return false;
15661
15662 t = REGNO (rt);
15663 t2 = REGNO (rt2);
15664 n = REGNO (rn);
15665
15666 if ((TARGET_THUMB2)
15667 && ((wback && (n == t || n == t2))
15668 || (t == SP_REGNUM)
15669 || (t == PC_REGNUM)
15670 || (t2 == SP_REGNUM)
15671 || (t2 == PC_REGNUM)
15672 || (!load && (n == PC_REGNUM))
15673 || (load && (t == t2))
15674 /* Triggers Cortex-M3 LDRD errata. */
15675 || (!wback && load && fix_cm3_ldrd && (n == t))))
15676 return false;
15677
15678 if ((TARGET_ARM)
15679 && ((wback && (n == t || n == t2))
15680 || (t2 == PC_REGNUM)
15681 || (t % 2 != 0) /* First destination register is not even. */
15682 || (t2 != t + 1)
15683 /* PC can be used as base register (for offset addressing only),
15684 but it is depricated. */
15685 || (n == PC_REGNUM)))
15686 return false;
15687
15688 return true;
15689 }
15690
15691 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15692 operand MEM's address contains an immediate offset from the base
15693 register and has no side effects, in which case it sets BASE and
15694 OFFSET accordingly. */
15695 static bool
15696 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15697 {
15698 rtx addr;
15699
15700 gcc_assert (base != NULL && offset != NULL);
15701
15702 /* TODO: Handle more general memory operand patterns, such as
15703 PRE_DEC and PRE_INC. */
15704
15705 if (side_effects_p (mem))
15706 return false;
15707
15708 /* Can't deal with subregs. */
15709 if (GET_CODE (mem) == SUBREG)
15710 return false;
15711
15712 gcc_assert (MEM_P (mem));
15713
15714 *offset = const0_rtx;
15715
15716 addr = XEXP (mem, 0);
15717
15718 /* If addr isn't valid for DImode, then we can't handle it. */
15719 if (!arm_legitimate_address_p (DImode, addr,
15720 reload_in_progress || reload_completed))
15721 return false;
15722
15723 if (REG_P (addr))
15724 {
15725 *base = addr;
15726 return true;
15727 }
15728 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15729 {
15730 *base = XEXP (addr, 0);
15731 *offset = XEXP (addr, 1);
15732 return (REG_P (*base) && CONST_INT_P (*offset));
15733 }
15734
15735 return false;
15736 }
15737
15738 /* Called from a peephole2 to replace two word-size accesses with a
15739 single LDRD/STRD instruction. Returns true iff we can generate a
15740 new instruction sequence. That is, both accesses use the same base
15741 register and the gap between constant offsets is 4. This function
15742 may reorder its operands to match ldrd/strd RTL templates.
15743 OPERANDS are the operands found by the peephole matcher;
15744 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15745 corresponding memory operands. LOAD indicaates whether the access
15746 is load or store. CONST_STORE indicates a store of constant
15747 integer values held in OPERANDS[4,5] and assumes that the pattern
15748 is of length 4 insn, for the purpose of checking dead registers.
15749 COMMUTE indicates that register operands may be reordered. */
15750 bool
15751 gen_operands_ldrd_strd (rtx *operands, bool load,
15752 bool const_store, bool commute)
15753 {
15754 int nops = 2;
15755 HOST_WIDE_INT offsets[2], offset;
15756 rtx base = NULL_RTX;
15757 rtx cur_base, cur_offset, tmp;
15758 int i, gap;
15759 HARD_REG_SET regset;
15760
15761 gcc_assert (!const_store || !load);
15762 /* Check that the memory references are immediate offsets from the
15763 same base register. Extract the base register, the destination
15764 registers, and the corresponding memory offsets. */
15765 for (i = 0; i < nops; i++)
15766 {
15767 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15768 return false;
15769
15770 if (i == 0)
15771 base = cur_base;
15772 else if (REGNO (base) != REGNO (cur_base))
15773 return false;
15774
15775 offsets[i] = INTVAL (cur_offset);
15776 if (GET_CODE (operands[i]) == SUBREG)
15777 {
15778 tmp = SUBREG_REG (operands[i]);
15779 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15780 operands[i] = tmp;
15781 }
15782 }
15783
15784 /* Make sure there is no dependency between the individual loads. */
15785 if (load && REGNO (operands[0]) == REGNO (base))
15786 return false; /* RAW */
15787
15788 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15789 return false; /* WAW */
15790
15791 /* If the same input register is used in both stores
15792 when storing different constants, try to find a free register.
15793 For example, the code
15794 mov r0, 0
15795 str r0, [r2]
15796 mov r0, 1
15797 str r0, [r2, #4]
15798 can be transformed into
15799 mov r1, 0
15800 strd r1, r0, [r2]
15801 in Thumb mode assuming that r1 is free. */
15802 if (const_store
15803 && REGNO (operands[0]) == REGNO (operands[1])
15804 && INTVAL (operands[4]) != INTVAL (operands[5]))
15805 {
15806 if (TARGET_THUMB2)
15807 {
15808 CLEAR_HARD_REG_SET (regset);
15809 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15810 if (tmp == NULL_RTX)
15811 return false;
15812
15813 /* Use the new register in the first load to ensure that
15814 if the original input register is not dead after peephole,
15815 then it will have the correct constant value. */
15816 operands[0] = tmp;
15817 }
15818 else if (TARGET_ARM)
15819 {
15820 return false;
15821 int regno = REGNO (operands[0]);
15822 if (!peep2_reg_dead_p (4, operands[0]))
15823 {
15824 /* When the input register is even and is not dead after the
15825 pattern, it has to hold the second constant but we cannot
15826 form a legal STRD in ARM mode with this register as the second
15827 register. */
15828 if (regno % 2 == 0)
15829 return false;
15830
15831 /* Is regno-1 free? */
15832 SET_HARD_REG_SET (regset);
15833 CLEAR_HARD_REG_BIT(regset, regno - 1);
15834 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15835 if (tmp == NULL_RTX)
15836 return false;
15837
15838 operands[0] = tmp;
15839 }
15840 else
15841 {
15842 /* Find a DImode register. */
15843 CLEAR_HARD_REG_SET (regset);
15844 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15845 if (tmp != NULL_RTX)
15846 {
15847 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15848 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15849 }
15850 else
15851 {
15852 /* Can we use the input register to form a DI register? */
15853 SET_HARD_REG_SET (regset);
15854 CLEAR_HARD_REG_BIT(regset,
15855 regno % 2 == 0 ? regno + 1 : regno - 1);
15856 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15857 if (tmp == NULL_RTX)
15858 return false;
15859 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15860 }
15861 }
15862
15863 gcc_assert (operands[0] != NULL_RTX);
15864 gcc_assert (operands[1] != NULL_RTX);
15865 gcc_assert (REGNO (operands[0]) % 2 == 0);
15866 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15867 }
15868 }
15869
15870 /* Make sure the instructions are ordered with lower memory access first. */
15871 if (offsets[0] > offsets[1])
15872 {
15873 gap = offsets[0] - offsets[1];
15874 offset = offsets[1];
15875
15876 /* Swap the instructions such that lower memory is accessed first. */
15877 std::swap (operands[0], operands[1]);
15878 std::swap (operands[2], operands[3]);
15879 if (const_store)
15880 std::swap (operands[4], operands[5]);
15881 }
15882 else
15883 {
15884 gap = offsets[1] - offsets[0];
15885 offset = offsets[0];
15886 }
15887
15888 /* Make sure accesses are to consecutive memory locations. */
15889 if (gap != 4)
15890 return false;
15891
15892 /* Make sure we generate legal instructions. */
15893 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15894 false, load))
15895 return true;
15896
15897 /* In Thumb state, where registers are almost unconstrained, there
15898 is little hope to fix it. */
15899 if (TARGET_THUMB2)
15900 return false;
15901
15902 if (load && commute)
15903 {
15904 /* Try reordering registers. */
15905 std::swap (operands[0], operands[1]);
15906 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15907 false, load))
15908 return true;
15909 }
15910
15911 if (const_store)
15912 {
15913 /* If input registers are dead after this pattern, they can be
15914 reordered or replaced by other registers that are free in the
15915 current pattern. */
15916 if (!peep2_reg_dead_p (4, operands[0])
15917 || !peep2_reg_dead_p (4, operands[1]))
15918 return false;
15919
15920 /* Try to reorder the input registers. */
15921 /* For example, the code
15922 mov r0, 0
15923 mov r1, 1
15924 str r1, [r2]
15925 str r0, [r2, #4]
15926 can be transformed into
15927 mov r1, 0
15928 mov r0, 1
15929 strd r0, [r2]
15930 */
15931 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15932 false, false))
15933 {
15934 std::swap (operands[0], operands[1]);
15935 return true;
15936 }
15937
15938 /* Try to find a free DI register. */
15939 CLEAR_HARD_REG_SET (regset);
15940 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15941 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15942 while (true)
15943 {
15944 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15945 if (tmp == NULL_RTX)
15946 return false;
15947
15948 /* DREG must be an even-numbered register in DImode.
15949 Split it into SI registers. */
15950 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15951 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15952 gcc_assert (operands[0] != NULL_RTX);
15953 gcc_assert (operands[1] != NULL_RTX);
15954 gcc_assert (REGNO (operands[0]) % 2 == 0);
15955 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15956
15957 return (operands_ok_ldrd_strd (operands[0], operands[1],
15958 base, offset,
15959 false, load));
15960 }
15961 }
15962
15963 return false;
15964 }
15965
15966
15967
15968 \f
15969 /* Print a symbolic form of X to the debug file, F. */
15970 static void
15971 arm_print_value (FILE *f, rtx x)
15972 {
15973 switch (GET_CODE (x))
15974 {
15975 case CONST_INT:
15976 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15977 return;
15978
15979 case CONST_DOUBLE:
15980 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15981 return;
15982
15983 case CONST_VECTOR:
15984 {
15985 int i;
15986
15987 fprintf (f, "<");
15988 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15989 {
15990 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15991 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15992 fputc (',', f);
15993 }
15994 fprintf (f, ">");
15995 }
15996 return;
15997
15998 case CONST_STRING:
15999 fprintf (f, "\"%s\"", XSTR (x, 0));
16000 return;
16001
16002 case SYMBOL_REF:
16003 fprintf (f, "`%s'", XSTR (x, 0));
16004 return;
16005
16006 case LABEL_REF:
16007 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16008 return;
16009
16010 case CONST:
16011 arm_print_value (f, XEXP (x, 0));
16012 return;
16013
16014 case PLUS:
16015 arm_print_value (f, XEXP (x, 0));
16016 fprintf (f, "+");
16017 arm_print_value (f, XEXP (x, 1));
16018 return;
16019
16020 case PC:
16021 fprintf (f, "pc");
16022 return;
16023
16024 default:
16025 fprintf (f, "????");
16026 return;
16027 }
16028 }
16029 \f
16030 /* Routines for manipulation of the constant pool. */
16031
16032 /* Arm instructions cannot load a large constant directly into a
16033 register; they have to come from a pc relative load. The constant
16034 must therefore be placed in the addressable range of the pc
16035 relative load. Depending on the precise pc relative load
16036 instruction the range is somewhere between 256 bytes and 4k. This
16037 means that we often have to dump a constant inside a function, and
16038 generate code to branch around it.
16039
16040 It is important to minimize this, since the branches will slow
16041 things down and make the code larger.
16042
16043 Normally we can hide the table after an existing unconditional
16044 branch so that there is no interruption of the flow, but in the
16045 worst case the code looks like this:
16046
16047 ldr rn, L1
16048 ...
16049 b L2
16050 align
16051 L1: .long value
16052 L2:
16053 ...
16054
16055 ldr rn, L3
16056 ...
16057 b L4
16058 align
16059 L3: .long value
16060 L4:
16061 ...
16062
16063 We fix this by performing a scan after scheduling, which notices
16064 which instructions need to have their operands fetched from the
16065 constant table and builds the table.
16066
16067 The algorithm starts by building a table of all the constants that
16068 need fixing up and all the natural barriers in the function (places
16069 where a constant table can be dropped without breaking the flow).
16070 For each fixup we note how far the pc-relative replacement will be
16071 able to reach and the offset of the instruction into the function.
16072
16073 Having built the table we then group the fixes together to form
16074 tables that are as large as possible (subject to addressing
16075 constraints) and emit each table of constants after the last
16076 barrier that is within range of all the instructions in the group.
16077 If a group does not contain a barrier, then we forcibly create one
16078 by inserting a jump instruction into the flow. Once the table has
16079 been inserted, the insns are then modified to reference the
16080 relevant entry in the pool.
16081
16082 Possible enhancements to the algorithm (not implemented) are:
16083
16084 1) For some processors and object formats, there may be benefit in
16085 aligning the pools to the start of cache lines; this alignment
16086 would need to be taken into account when calculating addressability
16087 of a pool. */
16088
16089 /* These typedefs are located at the start of this file, so that
16090 they can be used in the prototypes there. This comment is to
16091 remind readers of that fact so that the following structures
16092 can be understood more easily.
16093
16094 typedef struct minipool_node Mnode;
16095 typedef struct minipool_fixup Mfix; */
16096
16097 struct minipool_node
16098 {
16099 /* Doubly linked chain of entries. */
16100 Mnode * next;
16101 Mnode * prev;
16102 /* The maximum offset into the code that this entry can be placed. While
16103 pushing fixes for forward references, all entries are sorted in order
16104 of increasing max_address. */
16105 HOST_WIDE_INT max_address;
16106 /* Similarly for an entry inserted for a backwards ref. */
16107 HOST_WIDE_INT min_address;
16108 /* The number of fixes referencing this entry. This can become zero
16109 if we "unpush" an entry. In this case we ignore the entry when we
16110 come to emit the code. */
16111 int refcount;
16112 /* The offset from the start of the minipool. */
16113 HOST_WIDE_INT offset;
16114 /* The value in table. */
16115 rtx value;
16116 /* The mode of value. */
16117 machine_mode mode;
16118 /* The size of the value. With iWMMXt enabled
16119 sizes > 4 also imply an alignment of 8-bytes. */
16120 int fix_size;
16121 };
16122
16123 struct minipool_fixup
16124 {
16125 Mfix * next;
16126 rtx_insn * insn;
16127 HOST_WIDE_INT address;
16128 rtx * loc;
16129 machine_mode mode;
16130 int fix_size;
16131 rtx value;
16132 Mnode * minipool;
16133 HOST_WIDE_INT forwards;
16134 HOST_WIDE_INT backwards;
16135 };
16136
16137 /* Fixes less than a word need padding out to a word boundary. */
16138 #define MINIPOOL_FIX_SIZE(mode) \
16139 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16140
16141 static Mnode * minipool_vector_head;
16142 static Mnode * minipool_vector_tail;
16143 static rtx_code_label *minipool_vector_label;
16144 static int minipool_pad;
16145
16146 /* The linked list of all minipool fixes required for this function. */
16147 Mfix * minipool_fix_head;
16148 Mfix * minipool_fix_tail;
16149 /* The fix entry for the current minipool, once it has been placed. */
16150 Mfix * minipool_barrier;
16151
16152 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16153 #define JUMP_TABLES_IN_TEXT_SECTION 0
16154 #endif
16155
16156 static HOST_WIDE_INT
16157 get_jump_table_size (rtx_jump_table_data *insn)
16158 {
16159 /* ADDR_VECs only take room if read-only data does into the text
16160 section. */
16161 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16162 {
16163 rtx body = PATTERN (insn);
16164 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16165 HOST_WIDE_INT size;
16166 HOST_WIDE_INT modesize;
16167
16168 modesize = GET_MODE_SIZE (GET_MODE (body));
16169 size = modesize * XVECLEN (body, elt);
16170 switch (modesize)
16171 {
16172 case 1:
16173 /* Round up size of TBB table to a halfword boundary. */
16174 size = (size + 1) & ~(HOST_WIDE_INT)1;
16175 break;
16176 case 2:
16177 /* No padding necessary for TBH. */
16178 break;
16179 case 4:
16180 /* Add two bytes for alignment on Thumb. */
16181 if (TARGET_THUMB)
16182 size += 2;
16183 break;
16184 default:
16185 gcc_unreachable ();
16186 }
16187 return size;
16188 }
16189
16190 return 0;
16191 }
16192
16193 /* Return the maximum amount of padding that will be inserted before
16194 label LABEL. */
16195
16196 static HOST_WIDE_INT
16197 get_label_padding (rtx label)
16198 {
16199 HOST_WIDE_INT align, min_insn_size;
16200
16201 align = 1 << label_to_alignment (label);
16202 min_insn_size = TARGET_THUMB ? 2 : 4;
16203 return align > min_insn_size ? align - min_insn_size : 0;
16204 }
16205
16206 /* Move a minipool fix MP from its current location to before MAX_MP.
16207 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16208 constraints may need updating. */
16209 static Mnode *
16210 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16211 HOST_WIDE_INT max_address)
16212 {
16213 /* The code below assumes these are different. */
16214 gcc_assert (mp != max_mp);
16215
16216 if (max_mp == NULL)
16217 {
16218 if (max_address < mp->max_address)
16219 mp->max_address = max_address;
16220 }
16221 else
16222 {
16223 if (max_address > max_mp->max_address - mp->fix_size)
16224 mp->max_address = max_mp->max_address - mp->fix_size;
16225 else
16226 mp->max_address = max_address;
16227
16228 /* Unlink MP from its current position. Since max_mp is non-null,
16229 mp->prev must be non-null. */
16230 mp->prev->next = mp->next;
16231 if (mp->next != NULL)
16232 mp->next->prev = mp->prev;
16233 else
16234 minipool_vector_tail = mp->prev;
16235
16236 /* Re-insert it before MAX_MP. */
16237 mp->next = max_mp;
16238 mp->prev = max_mp->prev;
16239 max_mp->prev = mp;
16240
16241 if (mp->prev != NULL)
16242 mp->prev->next = mp;
16243 else
16244 minipool_vector_head = mp;
16245 }
16246
16247 /* Save the new entry. */
16248 max_mp = mp;
16249
16250 /* Scan over the preceding entries and adjust their addresses as
16251 required. */
16252 while (mp->prev != NULL
16253 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16254 {
16255 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16256 mp = mp->prev;
16257 }
16258
16259 return max_mp;
16260 }
16261
16262 /* Add a constant to the minipool for a forward reference. Returns the
16263 node added or NULL if the constant will not fit in this pool. */
16264 static Mnode *
16265 add_minipool_forward_ref (Mfix *fix)
16266 {
16267 /* If set, max_mp is the first pool_entry that has a lower
16268 constraint than the one we are trying to add. */
16269 Mnode * max_mp = NULL;
16270 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16271 Mnode * mp;
16272
16273 /* If the minipool starts before the end of FIX->INSN then this FIX
16274 can not be placed into the current pool. Furthermore, adding the
16275 new constant pool entry may cause the pool to start FIX_SIZE bytes
16276 earlier. */
16277 if (minipool_vector_head &&
16278 (fix->address + get_attr_length (fix->insn)
16279 >= minipool_vector_head->max_address - fix->fix_size))
16280 return NULL;
16281
16282 /* Scan the pool to see if a constant with the same value has
16283 already been added. While we are doing this, also note the
16284 location where we must insert the constant if it doesn't already
16285 exist. */
16286 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16287 {
16288 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16289 && fix->mode == mp->mode
16290 && (!LABEL_P (fix->value)
16291 || (CODE_LABEL_NUMBER (fix->value)
16292 == CODE_LABEL_NUMBER (mp->value)))
16293 && rtx_equal_p (fix->value, mp->value))
16294 {
16295 /* More than one fix references this entry. */
16296 mp->refcount++;
16297 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16298 }
16299
16300 /* Note the insertion point if necessary. */
16301 if (max_mp == NULL
16302 && mp->max_address > max_address)
16303 max_mp = mp;
16304
16305 /* If we are inserting an 8-bytes aligned quantity and
16306 we have not already found an insertion point, then
16307 make sure that all such 8-byte aligned quantities are
16308 placed at the start of the pool. */
16309 if (ARM_DOUBLEWORD_ALIGN
16310 && max_mp == NULL
16311 && fix->fix_size >= 8
16312 && mp->fix_size < 8)
16313 {
16314 max_mp = mp;
16315 max_address = mp->max_address;
16316 }
16317 }
16318
16319 /* The value is not currently in the minipool, so we need to create
16320 a new entry for it. If MAX_MP is NULL, the entry will be put on
16321 the end of the list since the placement is less constrained than
16322 any existing entry. Otherwise, we insert the new fix before
16323 MAX_MP and, if necessary, adjust the constraints on the other
16324 entries. */
16325 mp = XNEW (Mnode);
16326 mp->fix_size = fix->fix_size;
16327 mp->mode = fix->mode;
16328 mp->value = fix->value;
16329 mp->refcount = 1;
16330 /* Not yet required for a backwards ref. */
16331 mp->min_address = -65536;
16332
16333 if (max_mp == NULL)
16334 {
16335 mp->max_address = max_address;
16336 mp->next = NULL;
16337 mp->prev = minipool_vector_tail;
16338
16339 if (mp->prev == NULL)
16340 {
16341 minipool_vector_head = mp;
16342 minipool_vector_label = gen_label_rtx ();
16343 }
16344 else
16345 mp->prev->next = mp;
16346
16347 minipool_vector_tail = mp;
16348 }
16349 else
16350 {
16351 if (max_address > max_mp->max_address - mp->fix_size)
16352 mp->max_address = max_mp->max_address - mp->fix_size;
16353 else
16354 mp->max_address = max_address;
16355
16356 mp->next = max_mp;
16357 mp->prev = max_mp->prev;
16358 max_mp->prev = mp;
16359 if (mp->prev != NULL)
16360 mp->prev->next = mp;
16361 else
16362 minipool_vector_head = mp;
16363 }
16364
16365 /* Save the new entry. */
16366 max_mp = mp;
16367
16368 /* Scan over the preceding entries and adjust their addresses as
16369 required. */
16370 while (mp->prev != NULL
16371 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16372 {
16373 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16374 mp = mp->prev;
16375 }
16376
16377 return max_mp;
16378 }
16379
16380 static Mnode *
16381 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16382 HOST_WIDE_INT min_address)
16383 {
16384 HOST_WIDE_INT offset;
16385
16386 /* The code below assumes these are different. */
16387 gcc_assert (mp != min_mp);
16388
16389 if (min_mp == NULL)
16390 {
16391 if (min_address > mp->min_address)
16392 mp->min_address = min_address;
16393 }
16394 else
16395 {
16396 /* We will adjust this below if it is too loose. */
16397 mp->min_address = min_address;
16398
16399 /* Unlink MP from its current position. Since min_mp is non-null,
16400 mp->next must be non-null. */
16401 mp->next->prev = mp->prev;
16402 if (mp->prev != NULL)
16403 mp->prev->next = mp->next;
16404 else
16405 minipool_vector_head = mp->next;
16406
16407 /* Reinsert it after MIN_MP. */
16408 mp->prev = min_mp;
16409 mp->next = min_mp->next;
16410 min_mp->next = mp;
16411 if (mp->next != NULL)
16412 mp->next->prev = mp;
16413 else
16414 minipool_vector_tail = mp;
16415 }
16416
16417 min_mp = mp;
16418
16419 offset = 0;
16420 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16421 {
16422 mp->offset = offset;
16423 if (mp->refcount > 0)
16424 offset += mp->fix_size;
16425
16426 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16427 mp->next->min_address = mp->min_address + mp->fix_size;
16428 }
16429
16430 return min_mp;
16431 }
16432
16433 /* Add a constant to the minipool for a backward reference. Returns the
16434 node added or NULL if the constant will not fit in this pool.
16435
16436 Note that the code for insertion for a backwards reference can be
16437 somewhat confusing because the calculated offsets for each fix do
16438 not take into account the size of the pool (which is still under
16439 construction. */
16440 static Mnode *
16441 add_minipool_backward_ref (Mfix *fix)
16442 {
16443 /* If set, min_mp is the last pool_entry that has a lower constraint
16444 than the one we are trying to add. */
16445 Mnode *min_mp = NULL;
16446 /* This can be negative, since it is only a constraint. */
16447 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16448 Mnode *mp;
16449
16450 /* If we can't reach the current pool from this insn, or if we can't
16451 insert this entry at the end of the pool without pushing other
16452 fixes out of range, then we don't try. This ensures that we
16453 can't fail later on. */
16454 if (min_address >= minipool_barrier->address
16455 || (minipool_vector_tail->min_address + fix->fix_size
16456 >= minipool_barrier->address))
16457 return NULL;
16458
16459 /* Scan the pool to see if a constant with the same value has
16460 already been added. While we are doing this, also note the
16461 location where we must insert the constant if it doesn't already
16462 exist. */
16463 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16464 {
16465 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16466 && fix->mode == mp->mode
16467 && (!LABEL_P (fix->value)
16468 || (CODE_LABEL_NUMBER (fix->value)
16469 == CODE_LABEL_NUMBER (mp->value)))
16470 && rtx_equal_p (fix->value, mp->value)
16471 /* Check that there is enough slack to move this entry to the
16472 end of the table (this is conservative). */
16473 && (mp->max_address
16474 > (minipool_barrier->address
16475 + minipool_vector_tail->offset
16476 + minipool_vector_tail->fix_size)))
16477 {
16478 mp->refcount++;
16479 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16480 }
16481
16482 if (min_mp != NULL)
16483 mp->min_address += fix->fix_size;
16484 else
16485 {
16486 /* Note the insertion point if necessary. */
16487 if (mp->min_address < min_address)
16488 {
16489 /* For now, we do not allow the insertion of 8-byte alignment
16490 requiring nodes anywhere but at the start of the pool. */
16491 if (ARM_DOUBLEWORD_ALIGN
16492 && fix->fix_size >= 8 && mp->fix_size < 8)
16493 return NULL;
16494 else
16495 min_mp = mp;
16496 }
16497 else if (mp->max_address
16498 < minipool_barrier->address + mp->offset + fix->fix_size)
16499 {
16500 /* Inserting before this entry would push the fix beyond
16501 its maximum address (which can happen if we have
16502 re-located a forwards fix); force the new fix to come
16503 after it. */
16504 if (ARM_DOUBLEWORD_ALIGN
16505 && fix->fix_size >= 8 && mp->fix_size < 8)
16506 return NULL;
16507 else
16508 {
16509 min_mp = mp;
16510 min_address = mp->min_address + fix->fix_size;
16511 }
16512 }
16513 /* Do not insert a non-8-byte aligned quantity before 8-byte
16514 aligned quantities. */
16515 else if (ARM_DOUBLEWORD_ALIGN
16516 && fix->fix_size < 8
16517 && mp->fix_size >= 8)
16518 {
16519 min_mp = mp;
16520 min_address = mp->min_address + fix->fix_size;
16521 }
16522 }
16523 }
16524
16525 /* We need to create a new entry. */
16526 mp = XNEW (Mnode);
16527 mp->fix_size = fix->fix_size;
16528 mp->mode = fix->mode;
16529 mp->value = fix->value;
16530 mp->refcount = 1;
16531 mp->max_address = minipool_barrier->address + 65536;
16532
16533 mp->min_address = min_address;
16534
16535 if (min_mp == NULL)
16536 {
16537 mp->prev = NULL;
16538 mp->next = minipool_vector_head;
16539
16540 if (mp->next == NULL)
16541 {
16542 minipool_vector_tail = mp;
16543 minipool_vector_label = gen_label_rtx ();
16544 }
16545 else
16546 mp->next->prev = mp;
16547
16548 minipool_vector_head = mp;
16549 }
16550 else
16551 {
16552 mp->next = min_mp->next;
16553 mp->prev = min_mp;
16554 min_mp->next = mp;
16555
16556 if (mp->next != NULL)
16557 mp->next->prev = mp;
16558 else
16559 minipool_vector_tail = mp;
16560 }
16561
16562 /* Save the new entry. */
16563 min_mp = mp;
16564
16565 if (mp->prev)
16566 mp = mp->prev;
16567 else
16568 mp->offset = 0;
16569
16570 /* Scan over the following entries and adjust their offsets. */
16571 while (mp->next != NULL)
16572 {
16573 if (mp->next->min_address < mp->min_address + mp->fix_size)
16574 mp->next->min_address = mp->min_address + mp->fix_size;
16575
16576 if (mp->refcount)
16577 mp->next->offset = mp->offset + mp->fix_size;
16578 else
16579 mp->next->offset = mp->offset;
16580
16581 mp = mp->next;
16582 }
16583
16584 return min_mp;
16585 }
16586
16587 static void
16588 assign_minipool_offsets (Mfix *barrier)
16589 {
16590 HOST_WIDE_INT offset = 0;
16591 Mnode *mp;
16592
16593 minipool_barrier = barrier;
16594
16595 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16596 {
16597 mp->offset = offset;
16598
16599 if (mp->refcount > 0)
16600 offset += mp->fix_size;
16601 }
16602 }
16603
16604 /* Output the literal table */
16605 static void
16606 dump_minipool (rtx_insn *scan)
16607 {
16608 Mnode * mp;
16609 Mnode * nmp;
16610 int align64 = 0;
16611
16612 if (ARM_DOUBLEWORD_ALIGN)
16613 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16614 if (mp->refcount > 0 && mp->fix_size >= 8)
16615 {
16616 align64 = 1;
16617 break;
16618 }
16619
16620 if (dump_file)
16621 fprintf (dump_file,
16622 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16623 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16624
16625 scan = emit_label_after (gen_label_rtx (), scan);
16626 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16627 scan = emit_label_after (minipool_vector_label, scan);
16628
16629 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16630 {
16631 if (mp->refcount > 0)
16632 {
16633 if (dump_file)
16634 {
16635 fprintf (dump_file,
16636 ";; Offset %u, min %ld, max %ld ",
16637 (unsigned) mp->offset, (unsigned long) mp->min_address,
16638 (unsigned long) mp->max_address);
16639 arm_print_value (dump_file, mp->value);
16640 fputc ('\n', dump_file);
16641 }
16642
16643 switch (GET_MODE_SIZE (mp->mode))
16644 {
16645 #ifdef HAVE_consttable_1
16646 case 1:
16647 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16648 break;
16649
16650 #endif
16651 #ifdef HAVE_consttable_2
16652 case 2:
16653 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16654 break;
16655
16656 #endif
16657 #ifdef HAVE_consttable_4
16658 case 4:
16659 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16660 break;
16661
16662 #endif
16663 #ifdef HAVE_consttable_8
16664 case 8:
16665 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16666 break;
16667
16668 #endif
16669 #ifdef HAVE_consttable_16
16670 case 16:
16671 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16672 break;
16673
16674 #endif
16675 default:
16676 gcc_unreachable ();
16677 }
16678 }
16679
16680 nmp = mp->next;
16681 free (mp);
16682 }
16683
16684 minipool_vector_head = minipool_vector_tail = NULL;
16685 scan = emit_insn_after (gen_consttable_end (), scan);
16686 scan = emit_barrier_after (scan);
16687 }
16688
16689 /* Return the cost of forcibly inserting a barrier after INSN. */
16690 static int
16691 arm_barrier_cost (rtx insn)
16692 {
16693 /* Basing the location of the pool on the loop depth is preferable,
16694 but at the moment, the basic block information seems to be
16695 corrupt by this stage of the compilation. */
16696 int base_cost = 50;
16697 rtx next = next_nonnote_insn (insn);
16698
16699 if (next != NULL && LABEL_P (next))
16700 base_cost -= 20;
16701
16702 switch (GET_CODE (insn))
16703 {
16704 case CODE_LABEL:
16705 /* It will always be better to place the table before the label, rather
16706 than after it. */
16707 return 50;
16708
16709 case INSN:
16710 case CALL_INSN:
16711 return base_cost;
16712
16713 case JUMP_INSN:
16714 return base_cost - 10;
16715
16716 default:
16717 return base_cost + 10;
16718 }
16719 }
16720
16721 /* Find the best place in the insn stream in the range
16722 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16723 Create the barrier by inserting a jump and add a new fix entry for
16724 it. */
16725 static Mfix *
16726 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16727 {
16728 HOST_WIDE_INT count = 0;
16729 rtx_barrier *barrier;
16730 rtx_insn *from = fix->insn;
16731 /* The instruction after which we will insert the jump. */
16732 rtx_insn *selected = NULL;
16733 int selected_cost;
16734 /* The address at which the jump instruction will be placed. */
16735 HOST_WIDE_INT selected_address;
16736 Mfix * new_fix;
16737 HOST_WIDE_INT max_count = max_address - fix->address;
16738 rtx_code_label *label = gen_label_rtx ();
16739
16740 selected_cost = arm_barrier_cost (from);
16741 selected_address = fix->address;
16742
16743 while (from && count < max_count)
16744 {
16745 rtx_jump_table_data *tmp;
16746 int new_cost;
16747
16748 /* This code shouldn't have been called if there was a natural barrier
16749 within range. */
16750 gcc_assert (!BARRIER_P (from));
16751
16752 /* Count the length of this insn. This must stay in sync with the
16753 code that pushes minipool fixes. */
16754 if (LABEL_P (from))
16755 count += get_label_padding (from);
16756 else
16757 count += get_attr_length (from);
16758
16759 /* If there is a jump table, add its length. */
16760 if (tablejump_p (from, NULL, &tmp))
16761 {
16762 count += get_jump_table_size (tmp);
16763
16764 /* Jump tables aren't in a basic block, so base the cost on
16765 the dispatch insn. If we select this location, we will
16766 still put the pool after the table. */
16767 new_cost = arm_barrier_cost (from);
16768
16769 if (count < max_count
16770 && (!selected || new_cost <= selected_cost))
16771 {
16772 selected = tmp;
16773 selected_cost = new_cost;
16774 selected_address = fix->address + count;
16775 }
16776
16777 /* Continue after the dispatch table. */
16778 from = NEXT_INSN (tmp);
16779 continue;
16780 }
16781
16782 new_cost = arm_barrier_cost (from);
16783
16784 if (count < max_count
16785 && (!selected || new_cost <= selected_cost))
16786 {
16787 selected = from;
16788 selected_cost = new_cost;
16789 selected_address = fix->address + count;
16790 }
16791
16792 from = NEXT_INSN (from);
16793 }
16794
16795 /* Make sure that we found a place to insert the jump. */
16796 gcc_assert (selected);
16797
16798 /* Make sure we do not split a call and its corresponding
16799 CALL_ARG_LOCATION note. */
16800 if (CALL_P (selected))
16801 {
16802 rtx_insn *next = NEXT_INSN (selected);
16803 if (next && NOTE_P (next)
16804 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16805 selected = next;
16806 }
16807
16808 /* Create a new JUMP_INSN that branches around a barrier. */
16809 from = emit_jump_insn_after (gen_jump (label), selected);
16810 JUMP_LABEL (from) = label;
16811 barrier = emit_barrier_after (from);
16812 emit_label_after (label, barrier);
16813
16814 /* Create a minipool barrier entry for the new barrier. */
16815 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16816 new_fix->insn = barrier;
16817 new_fix->address = selected_address;
16818 new_fix->next = fix->next;
16819 fix->next = new_fix;
16820
16821 return new_fix;
16822 }
16823
16824 /* Record that there is a natural barrier in the insn stream at
16825 ADDRESS. */
16826 static void
16827 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16828 {
16829 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16830
16831 fix->insn = insn;
16832 fix->address = address;
16833
16834 fix->next = NULL;
16835 if (minipool_fix_head != NULL)
16836 minipool_fix_tail->next = fix;
16837 else
16838 minipool_fix_head = fix;
16839
16840 minipool_fix_tail = fix;
16841 }
16842
16843 /* Record INSN, which will need fixing up to load a value from the
16844 minipool. ADDRESS is the offset of the insn since the start of the
16845 function; LOC is a pointer to the part of the insn which requires
16846 fixing; VALUE is the constant that must be loaded, which is of type
16847 MODE. */
16848 static void
16849 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16850 machine_mode mode, rtx value)
16851 {
16852 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16853
16854 fix->insn = insn;
16855 fix->address = address;
16856 fix->loc = loc;
16857 fix->mode = mode;
16858 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16859 fix->value = value;
16860 fix->forwards = get_attr_pool_range (insn);
16861 fix->backwards = get_attr_neg_pool_range (insn);
16862 fix->minipool = NULL;
16863
16864 /* If an insn doesn't have a range defined for it, then it isn't
16865 expecting to be reworked by this code. Better to stop now than
16866 to generate duff assembly code. */
16867 gcc_assert (fix->forwards || fix->backwards);
16868
16869 /* If an entry requires 8-byte alignment then assume all constant pools
16870 require 4 bytes of padding. Trying to do this later on a per-pool
16871 basis is awkward because existing pool entries have to be modified. */
16872 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16873 minipool_pad = 4;
16874
16875 if (dump_file)
16876 {
16877 fprintf (dump_file,
16878 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16879 GET_MODE_NAME (mode),
16880 INSN_UID (insn), (unsigned long) address,
16881 -1 * (long)fix->backwards, (long)fix->forwards);
16882 arm_print_value (dump_file, fix->value);
16883 fprintf (dump_file, "\n");
16884 }
16885
16886 /* Add it to the chain of fixes. */
16887 fix->next = NULL;
16888
16889 if (minipool_fix_head != NULL)
16890 minipool_fix_tail->next = fix;
16891 else
16892 minipool_fix_head = fix;
16893
16894 minipool_fix_tail = fix;
16895 }
16896
16897 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16898 Returns the number of insns needed, or 99 if we always want to synthesize
16899 the value. */
16900 int
16901 arm_max_const_double_inline_cost ()
16902 {
16903 /* Let the value get synthesized to avoid the use of literal pools. */
16904 if (arm_disable_literal_pool)
16905 return 99;
16906
16907 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16908 }
16909
16910 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16911 Returns the number of insns needed, or 99 if we don't know how to
16912 do it. */
16913 int
16914 arm_const_double_inline_cost (rtx val)
16915 {
16916 rtx lowpart, highpart;
16917 machine_mode mode;
16918
16919 mode = GET_MODE (val);
16920
16921 if (mode == VOIDmode)
16922 mode = DImode;
16923
16924 gcc_assert (GET_MODE_SIZE (mode) == 8);
16925
16926 lowpart = gen_lowpart (SImode, val);
16927 highpart = gen_highpart_mode (SImode, mode, val);
16928
16929 gcc_assert (CONST_INT_P (lowpart));
16930 gcc_assert (CONST_INT_P (highpart));
16931
16932 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16933 NULL_RTX, NULL_RTX, 0, 0)
16934 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16935 NULL_RTX, NULL_RTX, 0, 0));
16936 }
16937
16938 /* Cost of loading a SImode constant. */
16939 static inline int
16940 arm_const_inline_cost (enum rtx_code code, rtx val)
16941 {
16942 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16943 NULL_RTX, NULL_RTX, 1, 0);
16944 }
16945
16946 /* Return true if it is worthwhile to split a 64-bit constant into two
16947 32-bit operations. This is the case if optimizing for size, or
16948 if we have load delay slots, or if one 32-bit part can be done with
16949 a single data operation. */
16950 bool
16951 arm_const_double_by_parts (rtx val)
16952 {
16953 machine_mode mode = GET_MODE (val);
16954 rtx part;
16955
16956 if (optimize_size || arm_ld_sched)
16957 return true;
16958
16959 if (mode == VOIDmode)
16960 mode = DImode;
16961
16962 part = gen_highpart_mode (SImode, mode, val);
16963
16964 gcc_assert (CONST_INT_P (part));
16965
16966 if (const_ok_for_arm (INTVAL (part))
16967 || const_ok_for_arm (~INTVAL (part)))
16968 return true;
16969
16970 part = gen_lowpart (SImode, val);
16971
16972 gcc_assert (CONST_INT_P (part));
16973
16974 if (const_ok_for_arm (INTVAL (part))
16975 || const_ok_for_arm (~INTVAL (part)))
16976 return true;
16977
16978 return false;
16979 }
16980
16981 /* Return true if it is possible to inline both the high and low parts
16982 of a 64-bit constant into 32-bit data processing instructions. */
16983 bool
16984 arm_const_double_by_immediates (rtx val)
16985 {
16986 machine_mode mode = GET_MODE (val);
16987 rtx part;
16988
16989 if (mode == VOIDmode)
16990 mode = DImode;
16991
16992 part = gen_highpart_mode (SImode, mode, val);
16993
16994 gcc_assert (CONST_INT_P (part));
16995
16996 if (!const_ok_for_arm (INTVAL (part)))
16997 return false;
16998
16999 part = gen_lowpart (SImode, val);
17000
17001 gcc_assert (CONST_INT_P (part));
17002
17003 if (!const_ok_for_arm (INTVAL (part)))
17004 return false;
17005
17006 return true;
17007 }
17008
17009 /* Scan INSN and note any of its operands that need fixing.
17010 If DO_PUSHES is false we do not actually push any of the fixups
17011 needed. */
17012 static void
17013 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17014 {
17015 int opno;
17016
17017 extract_constrain_insn (insn);
17018
17019 if (recog_data.n_alternatives == 0)
17020 return;
17021
17022 /* Fill in recog_op_alt with information about the constraints of
17023 this insn. */
17024 preprocess_constraints (insn);
17025
17026 const operand_alternative *op_alt = which_op_alt ();
17027 for (opno = 0; opno < recog_data.n_operands; opno++)
17028 {
17029 /* Things we need to fix can only occur in inputs. */
17030 if (recog_data.operand_type[opno] != OP_IN)
17031 continue;
17032
17033 /* If this alternative is a memory reference, then any mention
17034 of constants in this alternative is really to fool reload
17035 into allowing us to accept one there. We need to fix them up
17036 now so that we output the right code. */
17037 if (op_alt[opno].memory_ok)
17038 {
17039 rtx op = recog_data.operand[opno];
17040
17041 if (CONSTANT_P (op))
17042 {
17043 if (do_pushes)
17044 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17045 recog_data.operand_mode[opno], op);
17046 }
17047 else if (MEM_P (op)
17048 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17049 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17050 {
17051 if (do_pushes)
17052 {
17053 rtx cop = avoid_constant_pool_reference (op);
17054
17055 /* Casting the address of something to a mode narrower
17056 than a word can cause avoid_constant_pool_reference()
17057 to return the pool reference itself. That's no good to
17058 us here. Lets just hope that we can use the
17059 constant pool value directly. */
17060 if (op == cop)
17061 cop = get_pool_constant (XEXP (op, 0));
17062
17063 push_minipool_fix (insn, address,
17064 recog_data.operand_loc[opno],
17065 recog_data.operand_mode[opno], cop);
17066 }
17067
17068 }
17069 }
17070 }
17071
17072 return;
17073 }
17074
17075 /* Rewrite move insn into subtract of 0 if the condition codes will
17076 be useful in next conditional jump insn. */
17077
17078 static void
17079 thumb1_reorg (void)
17080 {
17081 basic_block bb;
17082
17083 FOR_EACH_BB_FN (bb, cfun)
17084 {
17085 rtx dest, src;
17086 rtx pat, op0, set = NULL;
17087 rtx_insn *prev, *insn = BB_END (bb);
17088 bool insn_clobbered = false;
17089
17090 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17091 insn = PREV_INSN (insn);
17092
17093 /* Find the last cbranchsi4_insn in basic block BB. */
17094 if (insn == BB_HEAD (bb)
17095 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17096 continue;
17097
17098 /* Get the register with which we are comparing. */
17099 pat = PATTERN (insn);
17100 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17101
17102 /* Find the first flag setting insn before INSN in basic block BB. */
17103 gcc_assert (insn != BB_HEAD (bb));
17104 for (prev = PREV_INSN (insn);
17105 (!insn_clobbered
17106 && prev != BB_HEAD (bb)
17107 && (NOTE_P (prev)
17108 || DEBUG_INSN_P (prev)
17109 || ((set = single_set (prev)) != NULL
17110 && get_attr_conds (prev) == CONDS_NOCOND)));
17111 prev = PREV_INSN (prev))
17112 {
17113 if (reg_set_p (op0, prev))
17114 insn_clobbered = true;
17115 }
17116
17117 /* Skip if op0 is clobbered by insn other than prev. */
17118 if (insn_clobbered)
17119 continue;
17120
17121 if (!set)
17122 continue;
17123
17124 dest = SET_DEST (set);
17125 src = SET_SRC (set);
17126 if (!low_register_operand (dest, SImode)
17127 || !low_register_operand (src, SImode))
17128 continue;
17129
17130 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17131 in INSN. Both src and dest of the move insn are checked. */
17132 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17133 {
17134 dest = copy_rtx (dest);
17135 src = copy_rtx (src);
17136 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17137 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17138 INSN_CODE (prev) = -1;
17139 /* Set test register in INSN to dest. */
17140 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17141 INSN_CODE (insn) = -1;
17142 }
17143 }
17144 }
17145
17146 /* Convert instructions to their cc-clobbering variant if possible, since
17147 that allows us to use smaller encodings. */
17148
17149 static void
17150 thumb2_reorg (void)
17151 {
17152 basic_block bb;
17153 regset_head live;
17154
17155 INIT_REG_SET (&live);
17156
17157 /* We are freeing block_for_insn in the toplev to keep compatibility
17158 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17159 compute_bb_for_insn ();
17160 df_analyze ();
17161
17162 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17163
17164 FOR_EACH_BB_FN (bb, cfun)
17165 {
17166 if (current_tune->disparage_flag_setting_t16_encodings
17167 && optimize_bb_for_speed_p (bb))
17168 continue;
17169
17170 rtx_insn *insn;
17171 Convert_Action action = SKIP;
17172 Convert_Action action_for_partial_flag_setting
17173 = (current_tune->disparage_partial_flag_setting_t16_encodings
17174 && optimize_bb_for_speed_p (bb))
17175 ? SKIP : CONV;
17176
17177 COPY_REG_SET (&live, DF_LR_OUT (bb));
17178 df_simulate_initialize_backwards (bb, &live);
17179 FOR_BB_INSNS_REVERSE (bb, insn)
17180 {
17181 if (NONJUMP_INSN_P (insn)
17182 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17183 && GET_CODE (PATTERN (insn)) == SET)
17184 {
17185 action = SKIP;
17186 rtx pat = PATTERN (insn);
17187 rtx dst = XEXP (pat, 0);
17188 rtx src = XEXP (pat, 1);
17189 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17190
17191 if (!OBJECT_P (src))
17192 op0 = XEXP (src, 0);
17193
17194 if (BINARY_P (src))
17195 op1 = XEXP (src, 1);
17196
17197 if (low_register_operand (dst, SImode))
17198 {
17199 switch (GET_CODE (src))
17200 {
17201 case PLUS:
17202 /* Adding two registers and storing the result
17203 in the first source is already a 16-bit
17204 operation. */
17205 if (rtx_equal_p (dst, op0)
17206 && register_operand (op1, SImode))
17207 break;
17208
17209 if (low_register_operand (op0, SImode))
17210 {
17211 /* ADDS <Rd>,<Rn>,<Rm> */
17212 if (low_register_operand (op1, SImode))
17213 action = CONV;
17214 /* ADDS <Rdn>,#<imm8> */
17215 /* SUBS <Rdn>,#<imm8> */
17216 else if (rtx_equal_p (dst, op0)
17217 && CONST_INT_P (op1)
17218 && IN_RANGE (INTVAL (op1), -255, 255))
17219 action = CONV;
17220 /* ADDS <Rd>,<Rn>,#<imm3> */
17221 /* SUBS <Rd>,<Rn>,#<imm3> */
17222 else if (CONST_INT_P (op1)
17223 && IN_RANGE (INTVAL (op1), -7, 7))
17224 action = CONV;
17225 }
17226 /* ADCS <Rd>, <Rn> */
17227 else if (GET_CODE (XEXP (src, 0)) == PLUS
17228 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17229 && low_register_operand (XEXP (XEXP (src, 0), 1),
17230 SImode)
17231 && COMPARISON_P (op1)
17232 && cc_register (XEXP (op1, 0), VOIDmode)
17233 && maybe_get_arm_condition_code (op1) == ARM_CS
17234 && XEXP (op1, 1) == const0_rtx)
17235 action = CONV;
17236 break;
17237
17238 case MINUS:
17239 /* RSBS <Rd>,<Rn>,#0
17240 Not handled here: see NEG below. */
17241 /* SUBS <Rd>,<Rn>,#<imm3>
17242 SUBS <Rdn>,#<imm8>
17243 Not handled here: see PLUS above. */
17244 /* SUBS <Rd>,<Rn>,<Rm> */
17245 if (low_register_operand (op0, SImode)
17246 && low_register_operand (op1, SImode))
17247 action = CONV;
17248 break;
17249
17250 case MULT:
17251 /* MULS <Rdm>,<Rn>,<Rdm>
17252 As an exception to the rule, this is only used
17253 when optimizing for size since MULS is slow on all
17254 known implementations. We do not even want to use
17255 MULS in cold code, if optimizing for speed, so we
17256 test the global flag here. */
17257 if (!optimize_size)
17258 break;
17259 /* else fall through. */
17260 case AND:
17261 case IOR:
17262 case XOR:
17263 /* ANDS <Rdn>,<Rm> */
17264 if (rtx_equal_p (dst, op0)
17265 && low_register_operand (op1, SImode))
17266 action = action_for_partial_flag_setting;
17267 else if (rtx_equal_p (dst, op1)
17268 && low_register_operand (op0, SImode))
17269 action = action_for_partial_flag_setting == SKIP
17270 ? SKIP : SWAP_CONV;
17271 break;
17272
17273 case ASHIFTRT:
17274 case ASHIFT:
17275 case LSHIFTRT:
17276 /* ASRS <Rdn>,<Rm> */
17277 /* LSRS <Rdn>,<Rm> */
17278 /* LSLS <Rdn>,<Rm> */
17279 if (rtx_equal_p (dst, op0)
17280 && low_register_operand (op1, SImode))
17281 action = action_for_partial_flag_setting;
17282 /* ASRS <Rd>,<Rm>,#<imm5> */
17283 /* LSRS <Rd>,<Rm>,#<imm5> */
17284 /* LSLS <Rd>,<Rm>,#<imm5> */
17285 else if (low_register_operand (op0, SImode)
17286 && CONST_INT_P (op1)
17287 && IN_RANGE (INTVAL (op1), 0, 31))
17288 action = action_for_partial_flag_setting;
17289 break;
17290
17291 case ROTATERT:
17292 /* RORS <Rdn>,<Rm> */
17293 if (rtx_equal_p (dst, op0)
17294 && low_register_operand (op1, SImode))
17295 action = action_for_partial_flag_setting;
17296 break;
17297
17298 case NOT:
17299 /* MVNS <Rd>,<Rm> */
17300 if (low_register_operand (op0, SImode))
17301 action = action_for_partial_flag_setting;
17302 break;
17303
17304 case NEG:
17305 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17306 if (low_register_operand (op0, SImode))
17307 action = CONV;
17308 break;
17309
17310 case CONST_INT:
17311 /* MOVS <Rd>,#<imm8> */
17312 if (CONST_INT_P (src)
17313 && IN_RANGE (INTVAL (src), 0, 255))
17314 action = action_for_partial_flag_setting;
17315 break;
17316
17317 case REG:
17318 /* MOVS and MOV<c> with registers have different
17319 encodings, so are not relevant here. */
17320 break;
17321
17322 default:
17323 break;
17324 }
17325 }
17326
17327 if (action != SKIP)
17328 {
17329 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17330 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17331 rtvec vec;
17332
17333 if (action == SWAP_CONV)
17334 {
17335 src = copy_rtx (src);
17336 XEXP (src, 0) = op1;
17337 XEXP (src, 1) = op0;
17338 pat = gen_rtx_SET (VOIDmode, dst, src);
17339 vec = gen_rtvec (2, pat, clobber);
17340 }
17341 else /* action == CONV */
17342 vec = gen_rtvec (2, pat, clobber);
17343
17344 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17345 INSN_CODE (insn) = -1;
17346 }
17347 }
17348
17349 if (NONDEBUG_INSN_P (insn))
17350 df_simulate_one_insn_backwards (bb, insn, &live);
17351 }
17352 }
17353
17354 CLEAR_REG_SET (&live);
17355 }
17356
17357 /* Gcc puts the pool in the wrong place for ARM, since we can only
17358 load addresses a limited distance around the pc. We do some
17359 special munging to move the constant pool values to the correct
17360 point in the code. */
17361 static void
17362 arm_reorg (void)
17363 {
17364 rtx_insn *insn;
17365 HOST_WIDE_INT address = 0;
17366 Mfix * fix;
17367
17368 if (TARGET_THUMB1)
17369 thumb1_reorg ();
17370 else if (TARGET_THUMB2)
17371 thumb2_reorg ();
17372
17373 /* Ensure all insns that must be split have been split at this point.
17374 Otherwise, the pool placement code below may compute incorrect
17375 insn lengths. Note that when optimizing, all insns have already
17376 been split at this point. */
17377 if (!optimize)
17378 split_all_insns_noflow ();
17379
17380 minipool_fix_head = minipool_fix_tail = NULL;
17381
17382 /* The first insn must always be a note, or the code below won't
17383 scan it properly. */
17384 insn = get_insns ();
17385 gcc_assert (NOTE_P (insn));
17386 minipool_pad = 0;
17387
17388 /* Scan all the insns and record the operands that will need fixing. */
17389 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17390 {
17391 if (BARRIER_P (insn))
17392 push_minipool_barrier (insn, address);
17393 else if (INSN_P (insn))
17394 {
17395 rtx_jump_table_data *table;
17396
17397 note_invalid_constants (insn, address, true);
17398 address += get_attr_length (insn);
17399
17400 /* If the insn is a vector jump, add the size of the table
17401 and skip the table. */
17402 if (tablejump_p (insn, NULL, &table))
17403 {
17404 address += get_jump_table_size (table);
17405 insn = table;
17406 }
17407 }
17408 else if (LABEL_P (insn))
17409 /* Add the worst-case padding due to alignment. We don't add
17410 the _current_ padding because the minipool insertions
17411 themselves might change it. */
17412 address += get_label_padding (insn);
17413 }
17414
17415 fix = minipool_fix_head;
17416
17417 /* Now scan the fixups and perform the required changes. */
17418 while (fix)
17419 {
17420 Mfix * ftmp;
17421 Mfix * fdel;
17422 Mfix * last_added_fix;
17423 Mfix * last_barrier = NULL;
17424 Mfix * this_fix;
17425
17426 /* Skip any further barriers before the next fix. */
17427 while (fix && BARRIER_P (fix->insn))
17428 fix = fix->next;
17429
17430 /* No more fixes. */
17431 if (fix == NULL)
17432 break;
17433
17434 last_added_fix = NULL;
17435
17436 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17437 {
17438 if (BARRIER_P (ftmp->insn))
17439 {
17440 if (ftmp->address >= minipool_vector_head->max_address)
17441 break;
17442
17443 last_barrier = ftmp;
17444 }
17445 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17446 break;
17447
17448 last_added_fix = ftmp; /* Keep track of the last fix added. */
17449 }
17450
17451 /* If we found a barrier, drop back to that; any fixes that we
17452 could have reached but come after the barrier will now go in
17453 the next mini-pool. */
17454 if (last_barrier != NULL)
17455 {
17456 /* Reduce the refcount for those fixes that won't go into this
17457 pool after all. */
17458 for (fdel = last_barrier->next;
17459 fdel && fdel != ftmp;
17460 fdel = fdel->next)
17461 {
17462 fdel->minipool->refcount--;
17463 fdel->minipool = NULL;
17464 }
17465
17466 ftmp = last_barrier;
17467 }
17468 else
17469 {
17470 /* ftmp is first fix that we can't fit into this pool and
17471 there no natural barriers that we could use. Insert a
17472 new barrier in the code somewhere between the previous
17473 fix and this one, and arrange to jump around it. */
17474 HOST_WIDE_INT max_address;
17475
17476 /* The last item on the list of fixes must be a barrier, so
17477 we can never run off the end of the list of fixes without
17478 last_barrier being set. */
17479 gcc_assert (ftmp);
17480
17481 max_address = minipool_vector_head->max_address;
17482 /* Check that there isn't another fix that is in range that
17483 we couldn't fit into this pool because the pool was
17484 already too large: we need to put the pool before such an
17485 instruction. The pool itself may come just after the
17486 fix because create_fix_barrier also allows space for a
17487 jump instruction. */
17488 if (ftmp->address < max_address)
17489 max_address = ftmp->address + 1;
17490
17491 last_barrier = create_fix_barrier (last_added_fix, max_address);
17492 }
17493
17494 assign_minipool_offsets (last_barrier);
17495
17496 while (ftmp)
17497 {
17498 if (!BARRIER_P (ftmp->insn)
17499 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17500 == NULL))
17501 break;
17502
17503 ftmp = ftmp->next;
17504 }
17505
17506 /* Scan over the fixes we have identified for this pool, fixing them
17507 up and adding the constants to the pool itself. */
17508 for (this_fix = fix; this_fix && ftmp != this_fix;
17509 this_fix = this_fix->next)
17510 if (!BARRIER_P (this_fix->insn))
17511 {
17512 rtx addr
17513 = plus_constant (Pmode,
17514 gen_rtx_LABEL_REF (VOIDmode,
17515 minipool_vector_label),
17516 this_fix->minipool->offset);
17517 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17518 }
17519
17520 dump_minipool (last_barrier->insn);
17521 fix = ftmp;
17522 }
17523
17524 /* From now on we must synthesize any constants that we can't handle
17525 directly. This can happen if the RTL gets split during final
17526 instruction generation. */
17527 cfun->machine->after_arm_reorg = 1;
17528
17529 /* Free the minipool memory. */
17530 obstack_free (&minipool_obstack, minipool_startobj);
17531 }
17532 \f
17533 /* Routines to output assembly language. */
17534
17535 /* Return string representation of passed in real value. */
17536 static const char *
17537 fp_const_from_val (REAL_VALUE_TYPE *r)
17538 {
17539 if (!fp_consts_inited)
17540 init_fp_table ();
17541
17542 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17543 return "0";
17544 }
17545
17546 /* OPERANDS[0] is the entire list of insns that constitute pop,
17547 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17548 is in the list, UPDATE is true iff the list contains explicit
17549 update of base register. */
17550 void
17551 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17552 bool update)
17553 {
17554 int i;
17555 char pattern[100];
17556 int offset;
17557 const char *conditional;
17558 int num_saves = XVECLEN (operands[0], 0);
17559 unsigned int regno;
17560 unsigned int regno_base = REGNO (operands[1]);
17561
17562 offset = 0;
17563 offset += update ? 1 : 0;
17564 offset += return_pc ? 1 : 0;
17565
17566 /* Is the base register in the list? */
17567 for (i = offset; i < num_saves; i++)
17568 {
17569 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17570 /* If SP is in the list, then the base register must be SP. */
17571 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17572 /* If base register is in the list, there must be no explicit update. */
17573 if (regno == regno_base)
17574 gcc_assert (!update);
17575 }
17576
17577 conditional = reverse ? "%?%D0" : "%?%d0";
17578 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17579 {
17580 /* Output pop (not stmfd) because it has a shorter encoding. */
17581 gcc_assert (update);
17582 sprintf (pattern, "pop%s\t{", conditional);
17583 }
17584 else
17585 {
17586 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17587 It's just a convention, their semantics are identical. */
17588 if (regno_base == SP_REGNUM)
17589 sprintf (pattern, "ldm%sfd\t", conditional);
17590 else if (TARGET_UNIFIED_ASM)
17591 sprintf (pattern, "ldmia%s\t", conditional);
17592 else
17593 sprintf (pattern, "ldm%sia\t", conditional);
17594
17595 strcat (pattern, reg_names[regno_base]);
17596 if (update)
17597 strcat (pattern, "!, {");
17598 else
17599 strcat (pattern, ", {");
17600 }
17601
17602 /* Output the first destination register. */
17603 strcat (pattern,
17604 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17605
17606 /* Output the rest of the destination registers. */
17607 for (i = offset + 1; i < num_saves; i++)
17608 {
17609 strcat (pattern, ", ");
17610 strcat (pattern,
17611 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17612 }
17613
17614 strcat (pattern, "}");
17615
17616 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17617 strcat (pattern, "^");
17618
17619 output_asm_insn (pattern, &cond);
17620 }
17621
17622
17623 /* Output the assembly for a store multiple. */
17624
17625 const char *
17626 vfp_output_vstmd (rtx * operands)
17627 {
17628 char pattern[100];
17629 int p;
17630 int base;
17631 int i;
17632 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17633 ? XEXP (operands[0], 0)
17634 : XEXP (XEXP (operands[0], 0), 0);
17635 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17636
17637 if (push_p)
17638 strcpy (pattern, "vpush%?.64\t{%P1");
17639 else
17640 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17641
17642 p = strlen (pattern);
17643
17644 gcc_assert (REG_P (operands[1]));
17645
17646 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17647 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17648 {
17649 p += sprintf (&pattern[p], ", d%d", base + i);
17650 }
17651 strcpy (&pattern[p], "}");
17652
17653 output_asm_insn (pattern, operands);
17654 return "";
17655 }
17656
17657
17658 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17659 number of bytes pushed. */
17660
17661 static int
17662 vfp_emit_fstmd (int base_reg, int count)
17663 {
17664 rtx par;
17665 rtx dwarf;
17666 rtx tmp, reg;
17667 int i;
17668
17669 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17670 register pairs are stored by a store multiple insn. We avoid this
17671 by pushing an extra pair. */
17672 if (count == 2 && !arm_arch6)
17673 {
17674 if (base_reg == LAST_VFP_REGNUM - 3)
17675 base_reg -= 2;
17676 count++;
17677 }
17678
17679 /* FSTMD may not store more than 16 doubleword registers at once. Split
17680 larger stores into multiple parts (up to a maximum of two, in
17681 practice). */
17682 if (count > 16)
17683 {
17684 int saved;
17685 /* NOTE: base_reg is an internal register number, so each D register
17686 counts as 2. */
17687 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17688 saved += vfp_emit_fstmd (base_reg, 16);
17689 return saved;
17690 }
17691
17692 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17693 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17694
17695 reg = gen_rtx_REG (DFmode, base_reg);
17696 base_reg += 2;
17697
17698 XVECEXP (par, 0, 0)
17699 = gen_rtx_SET (VOIDmode,
17700 gen_frame_mem
17701 (BLKmode,
17702 gen_rtx_PRE_MODIFY (Pmode,
17703 stack_pointer_rtx,
17704 plus_constant
17705 (Pmode, stack_pointer_rtx,
17706 - (count * 8)))
17707 ),
17708 gen_rtx_UNSPEC (BLKmode,
17709 gen_rtvec (1, reg),
17710 UNSPEC_PUSH_MULT));
17711
17712 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17713 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17714 RTX_FRAME_RELATED_P (tmp) = 1;
17715 XVECEXP (dwarf, 0, 0) = tmp;
17716
17717 tmp = gen_rtx_SET (VOIDmode,
17718 gen_frame_mem (DFmode, stack_pointer_rtx),
17719 reg);
17720 RTX_FRAME_RELATED_P (tmp) = 1;
17721 XVECEXP (dwarf, 0, 1) = tmp;
17722
17723 for (i = 1; i < count; i++)
17724 {
17725 reg = gen_rtx_REG (DFmode, base_reg);
17726 base_reg += 2;
17727 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17728
17729 tmp = gen_rtx_SET (VOIDmode,
17730 gen_frame_mem (DFmode,
17731 plus_constant (Pmode,
17732 stack_pointer_rtx,
17733 i * 8)),
17734 reg);
17735 RTX_FRAME_RELATED_P (tmp) = 1;
17736 XVECEXP (dwarf, 0, i + 1) = tmp;
17737 }
17738
17739 par = emit_insn (par);
17740 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17741 RTX_FRAME_RELATED_P (par) = 1;
17742
17743 return count * 8;
17744 }
17745
17746 /* Emit a call instruction with pattern PAT. ADDR is the address of
17747 the call target. */
17748
17749 void
17750 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17751 {
17752 rtx insn;
17753
17754 insn = emit_call_insn (pat);
17755
17756 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17757 If the call might use such an entry, add a use of the PIC register
17758 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17759 if (TARGET_VXWORKS_RTP
17760 && flag_pic
17761 && !sibcall
17762 && GET_CODE (addr) == SYMBOL_REF
17763 && (SYMBOL_REF_DECL (addr)
17764 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17765 : !SYMBOL_REF_LOCAL_P (addr)))
17766 {
17767 require_pic_register ();
17768 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17769 }
17770
17771 if (TARGET_AAPCS_BASED)
17772 {
17773 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17774 linker. We need to add an IP clobber to allow setting
17775 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17776 is not needed since it's a fixed register. */
17777 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17778 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17779 }
17780 }
17781
17782 /* Output a 'call' insn. */
17783 const char *
17784 output_call (rtx *operands)
17785 {
17786 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17787
17788 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17789 if (REGNO (operands[0]) == LR_REGNUM)
17790 {
17791 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17792 output_asm_insn ("mov%?\t%0, %|lr", operands);
17793 }
17794
17795 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17796
17797 if (TARGET_INTERWORK || arm_arch4t)
17798 output_asm_insn ("bx%?\t%0", operands);
17799 else
17800 output_asm_insn ("mov%?\t%|pc, %0", operands);
17801
17802 return "";
17803 }
17804
17805 /* Output a 'call' insn that is a reference in memory. This is
17806 disabled for ARMv5 and we prefer a blx instead because otherwise
17807 there's a significant performance overhead. */
17808 const char *
17809 output_call_mem (rtx *operands)
17810 {
17811 gcc_assert (!arm_arch5);
17812 if (TARGET_INTERWORK)
17813 {
17814 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17815 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17816 output_asm_insn ("bx%?\t%|ip", operands);
17817 }
17818 else if (regno_use_in (LR_REGNUM, operands[0]))
17819 {
17820 /* LR is used in the memory address. We load the address in the
17821 first instruction. It's safe to use IP as the target of the
17822 load since the call will kill it anyway. */
17823 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17824 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17825 if (arm_arch4t)
17826 output_asm_insn ("bx%?\t%|ip", operands);
17827 else
17828 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17829 }
17830 else
17831 {
17832 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17833 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17834 }
17835
17836 return "";
17837 }
17838
17839
17840 /* Output a move from arm registers to arm registers of a long double
17841 OPERANDS[0] is the destination.
17842 OPERANDS[1] is the source. */
17843 const char *
17844 output_mov_long_double_arm_from_arm (rtx *operands)
17845 {
17846 /* We have to be careful here because the two might overlap. */
17847 int dest_start = REGNO (operands[0]);
17848 int src_start = REGNO (operands[1]);
17849 rtx ops[2];
17850 int i;
17851
17852 if (dest_start < src_start)
17853 {
17854 for (i = 0; i < 3; i++)
17855 {
17856 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17857 ops[1] = gen_rtx_REG (SImode, src_start + i);
17858 output_asm_insn ("mov%?\t%0, %1", ops);
17859 }
17860 }
17861 else
17862 {
17863 for (i = 2; i >= 0; i--)
17864 {
17865 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17866 ops[1] = gen_rtx_REG (SImode, src_start + i);
17867 output_asm_insn ("mov%?\t%0, %1", ops);
17868 }
17869 }
17870
17871 return "";
17872 }
17873
17874 void
17875 arm_emit_movpair (rtx dest, rtx src)
17876 {
17877 /* If the src is an immediate, simplify it. */
17878 if (CONST_INT_P (src))
17879 {
17880 HOST_WIDE_INT val = INTVAL (src);
17881 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17882 if ((val >> 16) & 0x0000ffff)
17883 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17884 GEN_INT (16)),
17885 GEN_INT ((val >> 16) & 0x0000ffff));
17886 return;
17887 }
17888 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17889 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17890 }
17891
17892 /* Output a move between double words. It must be REG<-MEM
17893 or MEM<-REG. */
17894 const char *
17895 output_move_double (rtx *operands, bool emit, int *count)
17896 {
17897 enum rtx_code code0 = GET_CODE (operands[0]);
17898 enum rtx_code code1 = GET_CODE (operands[1]);
17899 rtx otherops[3];
17900 if (count)
17901 *count = 1;
17902
17903 /* The only case when this might happen is when
17904 you are looking at the length of a DImode instruction
17905 that has an invalid constant in it. */
17906 if (code0 == REG && code1 != MEM)
17907 {
17908 gcc_assert (!emit);
17909 *count = 2;
17910 return "";
17911 }
17912
17913 if (code0 == REG)
17914 {
17915 unsigned int reg0 = REGNO (operands[0]);
17916
17917 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17918
17919 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17920
17921 switch (GET_CODE (XEXP (operands[1], 0)))
17922 {
17923 case REG:
17924
17925 if (emit)
17926 {
17927 if (TARGET_LDRD
17928 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17929 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17930 else
17931 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17932 }
17933 break;
17934
17935 case PRE_INC:
17936 gcc_assert (TARGET_LDRD);
17937 if (emit)
17938 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17939 break;
17940
17941 case PRE_DEC:
17942 if (emit)
17943 {
17944 if (TARGET_LDRD)
17945 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17946 else
17947 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17948 }
17949 break;
17950
17951 case POST_INC:
17952 if (emit)
17953 {
17954 if (TARGET_LDRD)
17955 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17956 else
17957 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17958 }
17959 break;
17960
17961 case POST_DEC:
17962 gcc_assert (TARGET_LDRD);
17963 if (emit)
17964 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17965 break;
17966
17967 case PRE_MODIFY:
17968 case POST_MODIFY:
17969 /* Autoicrement addressing modes should never have overlapping
17970 base and destination registers, and overlapping index registers
17971 are already prohibited, so this doesn't need to worry about
17972 fix_cm3_ldrd. */
17973 otherops[0] = operands[0];
17974 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17975 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17976
17977 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17978 {
17979 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17980 {
17981 /* Registers overlap so split out the increment. */
17982 if (emit)
17983 {
17984 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17985 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17986 }
17987 if (count)
17988 *count = 2;
17989 }
17990 else
17991 {
17992 /* Use a single insn if we can.
17993 FIXME: IWMMXT allows offsets larger than ldrd can
17994 handle, fix these up with a pair of ldr. */
17995 if (TARGET_THUMB2
17996 || !CONST_INT_P (otherops[2])
17997 || (INTVAL (otherops[2]) > -256
17998 && INTVAL (otherops[2]) < 256))
17999 {
18000 if (emit)
18001 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18002 }
18003 else
18004 {
18005 if (emit)
18006 {
18007 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18008 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18009 }
18010 if (count)
18011 *count = 2;
18012
18013 }
18014 }
18015 }
18016 else
18017 {
18018 /* Use a single insn if we can.
18019 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18020 fix these up with a pair of ldr. */
18021 if (TARGET_THUMB2
18022 || !CONST_INT_P (otherops[2])
18023 || (INTVAL (otherops[2]) > -256
18024 && INTVAL (otherops[2]) < 256))
18025 {
18026 if (emit)
18027 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18028 }
18029 else
18030 {
18031 if (emit)
18032 {
18033 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18034 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18035 }
18036 if (count)
18037 *count = 2;
18038 }
18039 }
18040 break;
18041
18042 case LABEL_REF:
18043 case CONST:
18044 /* We might be able to use ldrd %0, %1 here. However the range is
18045 different to ldr/adr, and it is broken on some ARMv7-M
18046 implementations. */
18047 /* Use the second register of the pair to avoid problematic
18048 overlap. */
18049 otherops[1] = operands[1];
18050 if (emit)
18051 output_asm_insn ("adr%?\t%0, %1", otherops);
18052 operands[1] = otherops[0];
18053 if (emit)
18054 {
18055 if (TARGET_LDRD)
18056 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18057 else
18058 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18059 }
18060
18061 if (count)
18062 *count = 2;
18063 break;
18064
18065 /* ??? This needs checking for thumb2. */
18066 default:
18067 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18068 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18069 {
18070 otherops[0] = operands[0];
18071 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18072 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18073
18074 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18075 {
18076 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18077 {
18078 switch ((int) INTVAL (otherops[2]))
18079 {
18080 case -8:
18081 if (emit)
18082 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18083 return "";
18084 case -4:
18085 if (TARGET_THUMB2)
18086 break;
18087 if (emit)
18088 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18089 return "";
18090 case 4:
18091 if (TARGET_THUMB2)
18092 break;
18093 if (emit)
18094 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18095 return "";
18096 }
18097 }
18098 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18099 operands[1] = otherops[0];
18100 if (TARGET_LDRD
18101 && (REG_P (otherops[2])
18102 || TARGET_THUMB2
18103 || (CONST_INT_P (otherops[2])
18104 && INTVAL (otherops[2]) > -256
18105 && INTVAL (otherops[2]) < 256)))
18106 {
18107 if (reg_overlap_mentioned_p (operands[0],
18108 otherops[2]))
18109 {
18110 /* Swap base and index registers over to
18111 avoid a conflict. */
18112 std::swap (otherops[1], otherops[2]);
18113 }
18114 /* If both registers conflict, it will usually
18115 have been fixed by a splitter. */
18116 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18117 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18118 {
18119 if (emit)
18120 {
18121 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18122 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18123 }
18124 if (count)
18125 *count = 2;
18126 }
18127 else
18128 {
18129 otherops[0] = operands[0];
18130 if (emit)
18131 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18132 }
18133 return "";
18134 }
18135
18136 if (CONST_INT_P (otherops[2]))
18137 {
18138 if (emit)
18139 {
18140 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18141 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18142 else
18143 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18144 }
18145 }
18146 else
18147 {
18148 if (emit)
18149 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18150 }
18151 }
18152 else
18153 {
18154 if (emit)
18155 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18156 }
18157
18158 if (count)
18159 *count = 2;
18160
18161 if (TARGET_LDRD)
18162 return "ldr%(d%)\t%0, [%1]";
18163
18164 return "ldm%(ia%)\t%1, %M0";
18165 }
18166 else
18167 {
18168 otherops[1] = adjust_address (operands[1], SImode, 4);
18169 /* Take care of overlapping base/data reg. */
18170 if (reg_mentioned_p (operands[0], operands[1]))
18171 {
18172 if (emit)
18173 {
18174 output_asm_insn ("ldr%?\t%0, %1", otherops);
18175 output_asm_insn ("ldr%?\t%0, %1", operands);
18176 }
18177 if (count)
18178 *count = 2;
18179
18180 }
18181 else
18182 {
18183 if (emit)
18184 {
18185 output_asm_insn ("ldr%?\t%0, %1", operands);
18186 output_asm_insn ("ldr%?\t%0, %1", otherops);
18187 }
18188 if (count)
18189 *count = 2;
18190 }
18191 }
18192 }
18193 }
18194 else
18195 {
18196 /* Constraints should ensure this. */
18197 gcc_assert (code0 == MEM && code1 == REG);
18198 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18199 || (TARGET_ARM && TARGET_LDRD));
18200
18201 switch (GET_CODE (XEXP (operands[0], 0)))
18202 {
18203 case REG:
18204 if (emit)
18205 {
18206 if (TARGET_LDRD)
18207 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18208 else
18209 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18210 }
18211 break;
18212
18213 case PRE_INC:
18214 gcc_assert (TARGET_LDRD);
18215 if (emit)
18216 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18217 break;
18218
18219 case PRE_DEC:
18220 if (emit)
18221 {
18222 if (TARGET_LDRD)
18223 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18224 else
18225 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18226 }
18227 break;
18228
18229 case POST_INC:
18230 if (emit)
18231 {
18232 if (TARGET_LDRD)
18233 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18234 else
18235 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18236 }
18237 break;
18238
18239 case POST_DEC:
18240 gcc_assert (TARGET_LDRD);
18241 if (emit)
18242 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18243 break;
18244
18245 case PRE_MODIFY:
18246 case POST_MODIFY:
18247 otherops[0] = operands[1];
18248 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18249 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18250
18251 /* IWMMXT allows offsets larger than ldrd can handle,
18252 fix these up with a pair of ldr. */
18253 if (!TARGET_THUMB2
18254 && CONST_INT_P (otherops[2])
18255 && (INTVAL(otherops[2]) <= -256
18256 || INTVAL(otherops[2]) >= 256))
18257 {
18258 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18259 {
18260 if (emit)
18261 {
18262 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18263 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18264 }
18265 if (count)
18266 *count = 2;
18267 }
18268 else
18269 {
18270 if (emit)
18271 {
18272 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18273 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18274 }
18275 if (count)
18276 *count = 2;
18277 }
18278 }
18279 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18280 {
18281 if (emit)
18282 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18283 }
18284 else
18285 {
18286 if (emit)
18287 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18288 }
18289 break;
18290
18291 case PLUS:
18292 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18293 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18294 {
18295 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18296 {
18297 case -8:
18298 if (emit)
18299 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18300 return "";
18301
18302 case -4:
18303 if (TARGET_THUMB2)
18304 break;
18305 if (emit)
18306 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18307 return "";
18308
18309 case 4:
18310 if (TARGET_THUMB2)
18311 break;
18312 if (emit)
18313 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18314 return "";
18315 }
18316 }
18317 if (TARGET_LDRD
18318 && (REG_P (otherops[2])
18319 || TARGET_THUMB2
18320 || (CONST_INT_P (otherops[2])
18321 && INTVAL (otherops[2]) > -256
18322 && INTVAL (otherops[2]) < 256)))
18323 {
18324 otherops[0] = operands[1];
18325 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18326 if (emit)
18327 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18328 return "";
18329 }
18330 /* Fall through */
18331
18332 default:
18333 otherops[0] = adjust_address (operands[0], SImode, 4);
18334 otherops[1] = operands[1];
18335 if (emit)
18336 {
18337 output_asm_insn ("str%?\t%1, %0", operands);
18338 output_asm_insn ("str%?\t%H1, %0", otherops);
18339 }
18340 if (count)
18341 *count = 2;
18342 }
18343 }
18344
18345 return "";
18346 }
18347
18348 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18349 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18350
18351 const char *
18352 output_move_quad (rtx *operands)
18353 {
18354 if (REG_P (operands[0]))
18355 {
18356 /* Load, or reg->reg move. */
18357
18358 if (MEM_P (operands[1]))
18359 {
18360 switch (GET_CODE (XEXP (operands[1], 0)))
18361 {
18362 case REG:
18363 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18364 break;
18365
18366 case LABEL_REF:
18367 case CONST:
18368 output_asm_insn ("adr%?\t%0, %1", operands);
18369 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18370 break;
18371
18372 default:
18373 gcc_unreachable ();
18374 }
18375 }
18376 else
18377 {
18378 rtx ops[2];
18379 int dest, src, i;
18380
18381 gcc_assert (REG_P (operands[1]));
18382
18383 dest = REGNO (operands[0]);
18384 src = REGNO (operands[1]);
18385
18386 /* This seems pretty dumb, but hopefully GCC won't try to do it
18387 very often. */
18388 if (dest < src)
18389 for (i = 0; i < 4; i++)
18390 {
18391 ops[0] = gen_rtx_REG (SImode, dest + i);
18392 ops[1] = gen_rtx_REG (SImode, src + i);
18393 output_asm_insn ("mov%?\t%0, %1", ops);
18394 }
18395 else
18396 for (i = 3; i >= 0; i--)
18397 {
18398 ops[0] = gen_rtx_REG (SImode, dest + i);
18399 ops[1] = gen_rtx_REG (SImode, src + i);
18400 output_asm_insn ("mov%?\t%0, %1", ops);
18401 }
18402 }
18403 }
18404 else
18405 {
18406 gcc_assert (MEM_P (operands[0]));
18407 gcc_assert (REG_P (operands[1]));
18408 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18409
18410 switch (GET_CODE (XEXP (operands[0], 0)))
18411 {
18412 case REG:
18413 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18414 break;
18415
18416 default:
18417 gcc_unreachable ();
18418 }
18419 }
18420
18421 return "";
18422 }
18423
18424 /* Output a VFP load or store instruction. */
18425
18426 const char *
18427 output_move_vfp (rtx *operands)
18428 {
18429 rtx reg, mem, addr, ops[2];
18430 int load = REG_P (operands[0]);
18431 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18432 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18433 const char *templ;
18434 char buff[50];
18435 machine_mode mode;
18436
18437 reg = operands[!load];
18438 mem = operands[load];
18439
18440 mode = GET_MODE (reg);
18441
18442 gcc_assert (REG_P (reg));
18443 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18444 gcc_assert (mode == SFmode
18445 || mode == DFmode
18446 || mode == SImode
18447 || mode == DImode
18448 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18449 gcc_assert (MEM_P (mem));
18450
18451 addr = XEXP (mem, 0);
18452
18453 switch (GET_CODE (addr))
18454 {
18455 case PRE_DEC:
18456 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18457 ops[0] = XEXP (addr, 0);
18458 ops[1] = reg;
18459 break;
18460
18461 case POST_INC:
18462 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18463 ops[0] = XEXP (addr, 0);
18464 ops[1] = reg;
18465 break;
18466
18467 default:
18468 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18469 ops[0] = reg;
18470 ops[1] = mem;
18471 break;
18472 }
18473
18474 sprintf (buff, templ,
18475 load ? "ld" : "st",
18476 dp ? "64" : "32",
18477 dp ? "P" : "",
18478 integer_p ? "\t%@ int" : "");
18479 output_asm_insn (buff, ops);
18480
18481 return "";
18482 }
18483
18484 /* Output a Neon double-word or quad-word load or store, or a load
18485 or store for larger structure modes.
18486
18487 WARNING: The ordering of elements is weird in big-endian mode,
18488 because the EABI requires that vectors stored in memory appear
18489 as though they were stored by a VSTM, as required by the EABI.
18490 GCC RTL defines element ordering based on in-memory order.
18491 This can be different from the architectural ordering of elements
18492 within a NEON register. The intrinsics defined in arm_neon.h use the
18493 NEON register element ordering, not the GCC RTL element ordering.
18494
18495 For example, the in-memory ordering of a big-endian a quadword
18496 vector with 16-bit elements when stored from register pair {d0,d1}
18497 will be (lowest address first, d0[N] is NEON register element N):
18498
18499 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18500
18501 When necessary, quadword registers (dN, dN+1) are moved to ARM
18502 registers from rN in the order:
18503
18504 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18505
18506 So that STM/LDM can be used on vectors in ARM registers, and the
18507 same memory layout will result as if VSTM/VLDM were used.
18508
18509 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18510 possible, which allows use of appropriate alignment tags.
18511 Note that the choice of "64" is independent of the actual vector
18512 element size; this size simply ensures that the behavior is
18513 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18514
18515 Due to limitations of those instructions, use of VST1.64/VLD1.64
18516 is not possible if:
18517 - the address contains PRE_DEC, or
18518 - the mode refers to more than 4 double-word registers
18519
18520 In those cases, it would be possible to replace VSTM/VLDM by a
18521 sequence of instructions; this is not currently implemented since
18522 this is not certain to actually improve performance. */
18523
18524 const char *
18525 output_move_neon (rtx *operands)
18526 {
18527 rtx reg, mem, addr, ops[2];
18528 int regno, nregs, load = REG_P (operands[0]);
18529 const char *templ;
18530 char buff[50];
18531 machine_mode mode;
18532
18533 reg = operands[!load];
18534 mem = operands[load];
18535
18536 mode = GET_MODE (reg);
18537
18538 gcc_assert (REG_P (reg));
18539 regno = REGNO (reg);
18540 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18541 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18542 || NEON_REGNO_OK_FOR_QUAD (regno));
18543 gcc_assert (VALID_NEON_DREG_MODE (mode)
18544 || VALID_NEON_QREG_MODE (mode)
18545 || VALID_NEON_STRUCT_MODE (mode));
18546 gcc_assert (MEM_P (mem));
18547
18548 addr = XEXP (mem, 0);
18549
18550 /* Strip off const from addresses like (const (plus (...))). */
18551 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18552 addr = XEXP (addr, 0);
18553
18554 switch (GET_CODE (addr))
18555 {
18556 case POST_INC:
18557 /* We have to use vldm / vstm for too-large modes. */
18558 if (nregs > 4)
18559 {
18560 templ = "v%smia%%?\t%%0!, %%h1";
18561 ops[0] = XEXP (addr, 0);
18562 }
18563 else
18564 {
18565 templ = "v%s1.64\t%%h1, %%A0";
18566 ops[0] = mem;
18567 }
18568 ops[1] = reg;
18569 break;
18570
18571 case PRE_DEC:
18572 /* We have to use vldm / vstm in this case, since there is no
18573 pre-decrement form of the vld1 / vst1 instructions. */
18574 templ = "v%smdb%%?\t%%0!, %%h1";
18575 ops[0] = XEXP (addr, 0);
18576 ops[1] = reg;
18577 break;
18578
18579 case POST_MODIFY:
18580 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18581 gcc_unreachable ();
18582
18583 case REG:
18584 /* We have to use vldm / vstm for too-large modes. */
18585 if (nregs > 1)
18586 {
18587 if (nregs > 4)
18588 templ = "v%smia%%?\t%%m0, %%h1";
18589 else
18590 templ = "v%s1.64\t%%h1, %%A0";
18591
18592 ops[0] = mem;
18593 ops[1] = reg;
18594 break;
18595 }
18596 /* Fall through. */
18597 case LABEL_REF:
18598 case PLUS:
18599 {
18600 int i;
18601 int overlap = -1;
18602 for (i = 0; i < nregs; i++)
18603 {
18604 /* We're only using DImode here because it's a convenient size. */
18605 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18606 ops[1] = adjust_address (mem, DImode, 8 * i);
18607 if (reg_overlap_mentioned_p (ops[0], mem))
18608 {
18609 gcc_assert (overlap == -1);
18610 overlap = i;
18611 }
18612 else
18613 {
18614 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18615 output_asm_insn (buff, ops);
18616 }
18617 }
18618 if (overlap != -1)
18619 {
18620 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18621 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18622 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18623 output_asm_insn (buff, ops);
18624 }
18625
18626 return "";
18627 }
18628
18629 default:
18630 gcc_unreachable ();
18631 }
18632
18633 sprintf (buff, templ, load ? "ld" : "st");
18634 output_asm_insn (buff, ops);
18635
18636 return "";
18637 }
18638
18639 /* Compute and return the length of neon_mov<mode>, where <mode> is
18640 one of VSTRUCT modes: EI, OI, CI or XI. */
18641 int
18642 arm_attr_length_move_neon (rtx_insn *insn)
18643 {
18644 rtx reg, mem, addr;
18645 int load;
18646 machine_mode mode;
18647
18648 extract_insn_cached (insn);
18649
18650 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18651 {
18652 mode = GET_MODE (recog_data.operand[0]);
18653 switch (mode)
18654 {
18655 case EImode:
18656 case OImode:
18657 return 8;
18658 case CImode:
18659 return 12;
18660 case XImode:
18661 return 16;
18662 default:
18663 gcc_unreachable ();
18664 }
18665 }
18666
18667 load = REG_P (recog_data.operand[0]);
18668 reg = recog_data.operand[!load];
18669 mem = recog_data.operand[load];
18670
18671 gcc_assert (MEM_P (mem));
18672
18673 mode = GET_MODE (reg);
18674 addr = XEXP (mem, 0);
18675
18676 /* Strip off const from addresses like (const (plus (...))). */
18677 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18678 addr = XEXP (addr, 0);
18679
18680 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18681 {
18682 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18683 return insns * 4;
18684 }
18685 else
18686 return 4;
18687 }
18688
18689 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18690 return zero. */
18691
18692 int
18693 arm_address_offset_is_imm (rtx_insn *insn)
18694 {
18695 rtx mem, addr;
18696
18697 extract_insn_cached (insn);
18698
18699 if (REG_P (recog_data.operand[0]))
18700 return 0;
18701
18702 mem = recog_data.operand[0];
18703
18704 gcc_assert (MEM_P (mem));
18705
18706 addr = XEXP (mem, 0);
18707
18708 if (REG_P (addr)
18709 || (GET_CODE (addr) == PLUS
18710 && REG_P (XEXP (addr, 0))
18711 && CONST_INT_P (XEXP (addr, 1))))
18712 return 1;
18713 else
18714 return 0;
18715 }
18716
18717 /* Output an ADD r, s, #n where n may be too big for one instruction.
18718 If adding zero to one register, output nothing. */
18719 const char *
18720 output_add_immediate (rtx *operands)
18721 {
18722 HOST_WIDE_INT n = INTVAL (operands[2]);
18723
18724 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18725 {
18726 if (n < 0)
18727 output_multi_immediate (operands,
18728 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18729 -n);
18730 else
18731 output_multi_immediate (operands,
18732 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18733 n);
18734 }
18735
18736 return "";
18737 }
18738
18739 /* Output a multiple immediate operation.
18740 OPERANDS is the vector of operands referred to in the output patterns.
18741 INSTR1 is the output pattern to use for the first constant.
18742 INSTR2 is the output pattern to use for subsequent constants.
18743 IMMED_OP is the index of the constant slot in OPERANDS.
18744 N is the constant value. */
18745 static const char *
18746 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18747 int immed_op, HOST_WIDE_INT n)
18748 {
18749 #if HOST_BITS_PER_WIDE_INT > 32
18750 n &= 0xffffffff;
18751 #endif
18752
18753 if (n == 0)
18754 {
18755 /* Quick and easy output. */
18756 operands[immed_op] = const0_rtx;
18757 output_asm_insn (instr1, operands);
18758 }
18759 else
18760 {
18761 int i;
18762 const char * instr = instr1;
18763
18764 /* Note that n is never zero here (which would give no output). */
18765 for (i = 0; i < 32; i += 2)
18766 {
18767 if (n & (3 << i))
18768 {
18769 operands[immed_op] = GEN_INT (n & (255 << i));
18770 output_asm_insn (instr, operands);
18771 instr = instr2;
18772 i += 6;
18773 }
18774 }
18775 }
18776
18777 return "";
18778 }
18779
18780 /* Return the name of a shifter operation. */
18781 static const char *
18782 arm_shift_nmem(enum rtx_code code)
18783 {
18784 switch (code)
18785 {
18786 case ASHIFT:
18787 return ARM_LSL_NAME;
18788
18789 case ASHIFTRT:
18790 return "asr";
18791
18792 case LSHIFTRT:
18793 return "lsr";
18794
18795 case ROTATERT:
18796 return "ror";
18797
18798 default:
18799 abort();
18800 }
18801 }
18802
18803 /* Return the appropriate ARM instruction for the operation code.
18804 The returned result should not be overwritten. OP is the rtx of the
18805 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18806 was shifted. */
18807 const char *
18808 arithmetic_instr (rtx op, int shift_first_arg)
18809 {
18810 switch (GET_CODE (op))
18811 {
18812 case PLUS:
18813 return "add";
18814
18815 case MINUS:
18816 return shift_first_arg ? "rsb" : "sub";
18817
18818 case IOR:
18819 return "orr";
18820
18821 case XOR:
18822 return "eor";
18823
18824 case AND:
18825 return "and";
18826
18827 case ASHIFT:
18828 case ASHIFTRT:
18829 case LSHIFTRT:
18830 case ROTATERT:
18831 return arm_shift_nmem(GET_CODE(op));
18832
18833 default:
18834 gcc_unreachable ();
18835 }
18836 }
18837
18838 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18839 for the operation code. The returned result should not be overwritten.
18840 OP is the rtx code of the shift.
18841 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18842 shift. */
18843 static const char *
18844 shift_op (rtx op, HOST_WIDE_INT *amountp)
18845 {
18846 const char * mnem;
18847 enum rtx_code code = GET_CODE (op);
18848
18849 switch (code)
18850 {
18851 case ROTATE:
18852 if (!CONST_INT_P (XEXP (op, 1)))
18853 {
18854 output_operand_lossage ("invalid shift operand");
18855 return NULL;
18856 }
18857
18858 code = ROTATERT;
18859 *amountp = 32 - INTVAL (XEXP (op, 1));
18860 mnem = "ror";
18861 break;
18862
18863 case ASHIFT:
18864 case ASHIFTRT:
18865 case LSHIFTRT:
18866 case ROTATERT:
18867 mnem = arm_shift_nmem(code);
18868 if (CONST_INT_P (XEXP (op, 1)))
18869 {
18870 *amountp = INTVAL (XEXP (op, 1));
18871 }
18872 else if (REG_P (XEXP (op, 1)))
18873 {
18874 *amountp = -1;
18875 return mnem;
18876 }
18877 else
18878 {
18879 output_operand_lossage ("invalid shift operand");
18880 return NULL;
18881 }
18882 break;
18883
18884 case MULT:
18885 /* We never have to worry about the amount being other than a
18886 power of 2, since this case can never be reloaded from a reg. */
18887 if (!CONST_INT_P (XEXP (op, 1)))
18888 {
18889 output_operand_lossage ("invalid shift operand");
18890 return NULL;
18891 }
18892
18893 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18894
18895 /* Amount must be a power of two. */
18896 if (*amountp & (*amountp - 1))
18897 {
18898 output_operand_lossage ("invalid shift operand");
18899 return NULL;
18900 }
18901
18902 *amountp = int_log2 (*amountp);
18903 return ARM_LSL_NAME;
18904
18905 default:
18906 output_operand_lossage ("invalid shift operand");
18907 return NULL;
18908 }
18909
18910 /* This is not 100% correct, but follows from the desire to merge
18911 multiplication by a power of 2 with the recognizer for a
18912 shift. >=32 is not a valid shift for "lsl", so we must try and
18913 output a shift that produces the correct arithmetical result.
18914 Using lsr #32 is identical except for the fact that the carry bit
18915 is not set correctly if we set the flags; but we never use the
18916 carry bit from such an operation, so we can ignore that. */
18917 if (code == ROTATERT)
18918 /* Rotate is just modulo 32. */
18919 *amountp &= 31;
18920 else if (*amountp != (*amountp & 31))
18921 {
18922 if (code == ASHIFT)
18923 mnem = "lsr";
18924 *amountp = 32;
18925 }
18926
18927 /* Shifts of 0 are no-ops. */
18928 if (*amountp == 0)
18929 return NULL;
18930
18931 return mnem;
18932 }
18933
18934 /* Obtain the shift from the POWER of two. */
18935
18936 static HOST_WIDE_INT
18937 int_log2 (HOST_WIDE_INT power)
18938 {
18939 HOST_WIDE_INT shift = 0;
18940
18941 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18942 {
18943 gcc_assert (shift <= 31);
18944 shift++;
18945 }
18946
18947 return shift;
18948 }
18949
18950 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18951 because /bin/as is horribly restrictive. The judgement about
18952 whether or not each character is 'printable' (and can be output as
18953 is) or not (and must be printed with an octal escape) must be made
18954 with reference to the *host* character set -- the situation is
18955 similar to that discussed in the comments above pp_c_char in
18956 c-pretty-print.c. */
18957
18958 #define MAX_ASCII_LEN 51
18959
18960 void
18961 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18962 {
18963 int i;
18964 int len_so_far = 0;
18965
18966 fputs ("\t.ascii\t\"", stream);
18967
18968 for (i = 0; i < len; i++)
18969 {
18970 int c = p[i];
18971
18972 if (len_so_far >= MAX_ASCII_LEN)
18973 {
18974 fputs ("\"\n\t.ascii\t\"", stream);
18975 len_so_far = 0;
18976 }
18977
18978 if (ISPRINT (c))
18979 {
18980 if (c == '\\' || c == '\"')
18981 {
18982 putc ('\\', stream);
18983 len_so_far++;
18984 }
18985 putc (c, stream);
18986 len_so_far++;
18987 }
18988 else
18989 {
18990 fprintf (stream, "\\%03o", c);
18991 len_so_far += 4;
18992 }
18993 }
18994
18995 fputs ("\"\n", stream);
18996 }
18997 \f
18998 /* Compute the register save mask for registers 0 through 12
18999 inclusive. This code is used by arm_compute_save_reg_mask. */
19000
19001 static unsigned long
19002 arm_compute_save_reg0_reg12_mask (void)
19003 {
19004 unsigned long func_type = arm_current_func_type ();
19005 unsigned long save_reg_mask = 0;
19006 unsigned int reg;
19007
19008 if (IS_INTERRUPT (func_type))
19009 {
19010 unsigned int max_reg;
19011 /* Interrupt functions must not corrupt any registers,
19012 even call clobbered ones. If this is a leaf function
19013 we can just examine the registers used by the RTL, but
19014 otherwise we have to assume that whatever function is
19015 called might clobber anything, and so we have to save
19016 all the call-clobbered registers as well. */
19017 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19018 /* FIQ handlers have registers r8 - r12 banked, so
19019 we only need to check r0 - r7, Normal ISRs only
19020 bank r14 and r15, so we must check up to r12.
19021 r13 is the stack pointer which is always preserved,
19022 so we do not need to consider it here. */
19023 max_reg = 7;
19024 else
19025 max_reg = 12;
19026
19027 for (reg = 0; reg <= max_reg; reg++)
19028 if (df_regs_ever_live_p (reg)
19029 || (! crtl->is_leaf && call_used_regs[reg]))
19030 save_reg_mask |= (1 << reg);
19031
19032 /* Also save the pic base register if necessary. */
19033 if (flag_pic
19034 && !TARGET_SINGLE_PIC_BASE
19035 && arm_pic_register != INVALID_REGNUM
19036 && crtl->uses_pic_offset_table)
19037 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19038 }
19039 else if (IS_VOLATILE(func_type))
19040 {
19041 /* For noreturn functions we historically omitted register saves
19042 altogether. However this really messes up debugging. As a
19043 compromise save just the frame pointers. Combined with the link
19044 register saved elsewhere this should be sufficient to get
19045 a backtrace. */
19046 if (frame_pointer_needed)
19047 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19048 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19049 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19050 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19051 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19052 }
19053 else
19054 {
19055 /* In the normal case we only need to save those registers
19056 which are call saved and which are used by this function. */
19057 for (reg = 0; reg <= 11; reg++)
19058 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
19059 save_reg_mask |= (1 << reg);
19060
19061 /* Handle the frame pointer as a special case. */
19062 if (frame_pointer_needed)
19063 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19064
19065 /* If we aren't loading the PIC register,
19066 don't stack it even though it may be live. */
19067 if (flag_pic
19068 && !TARGET_SINGLE_PIC_BASE
19069 && arm_pic_register != INVALID_REGNUM
19070 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19071 || crtl->uses_pic_offset_table))
19072 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19073
19074 /* The prologue will copy SP into R0, so save it. */
19075 if (IS_STACKALIGN (func_type))
19076 save_reg_mask |= 1;
19077 }
19078
19079 /* Save registers so the exception handler can modify them. */
19080 if (crtl->calls_eh_return)
19081 {
19082 unsigned int i;
19083
19084 for (i = 0; ; i++)
19085 {
19086 reg = EH_RETURN_DATA_REGNO (i);
19087 if (reg == INVALID_REGNUM)
19088 break;
19089 save_reg_mask |= 1 << reg;
19090 }
19091 }
19092
19093 return save_reg_mask;
19094 }
19095
19096 /* Return true if r3 is live at the start of the function. */
19097
19098 static bool
19099 arm_r3_live_at_start_p (void)
19100 {
19101 /* Just look at cfg info, which is still close enough to correct at this
19102 point. This gives false positives for broken functions that might use
19103 uninitialized data that happens to be allocated in r3, but who cares? */
19104 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19105 }
19106
19107 /* Compute the number of bytes used to store the static chain register on the
19108 stack, above the stack frame. We need to know this accurately to get the
19109 alignment of the rest of the stack frame correct. */
19110
19111 static int
19112 arm_compute_static_chain_stack_bytes (void)
19113 {
19114 /* See the defining assertion in arm_expand_prologue. */
19115 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19116 && IS_NESTED (arm_current_func_type ())
19117 && arm_r3_live_at_start_p ()
19118 && crtl->args.pretend_args_size == 0)
19119 return 4;
19120
19121 return 0;
19122 }
19123
19124 /* Compute a bit mask of which registers need to be
19125 saved on the stack for the current function.
19126 This is used by arm_get_frame_offsets, which may add extra registers. */
19127
19128 static unsigned long
19129 arm_compute_save_reg_mask (void)
19130 {
19131 unsigned int save_reg_mask = 0;
19132 unsigned long func_type = arm_current_func_type ();
19133 unsigned int reg;
19134
19135 if (IS_NAKED (func_type))
19136 /* This should never really happen. */
19137 return 0;
19138
19139 /* If we are creating a stack frame, then we must save the frame pointer,
19140 IP (which will hold the old stack pointer), LR and the PC. */
19141 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19142 save_reg_mask |=
19143 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19144 | (1 << IP_REGNUM)
19145 | (1 << LR_REGNUM)
19146 | (1 << PC_REGNUM);
19147
19148 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19149
19150 /* Decide if we need to save the link register.
19151 Interrupt routines have their own banked link register,
19152 so they never need to save it.
19153 Otherwise if we do not use the link register we do not need to save
19154 it. If we are pushing other registers onto the stack however, we
19155 can save an instruction in the epilogue by pushing the link register
19156 now and then popping it back into the PC. This incurs extra memory
19157 accesses though, so we only do it when optimizing for size, and only
19158 if we know that we will not need a fancy return sequence. */
19159 if (df_regs_ever_live_p (LR_REGNUM)
19160 || (save_reg_mask
19161 && optimize_size
19162 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19163 && !crtl->calls_eh_return))
19164 save_reg_mask |= 1 << LR_REGNUM;
19165
19166 if (cfun->machine->lr_save_eliminated)
19167 save_reg_mask &= ~ (1 << LR_REGNUM);
19168
19169 if (TARGET_REALLY_IWMMXT
19170 && ((bit_count (save_reg_mask)
19171 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19172 arm_compute_static_chain_stack_bytes())
19173 ) % 2) != 0)
19174 {
19175 /* The total number of registers that are going to be pushed
19176 onto the stack is odd. We need to ensure that the stack
19177 is 64-bit aligned before we start to save iWMMXt registers,
19178 and also before we start to create locals. (A local variable
19179 might be a double or long long which we will load/store using
19180 an iWMMXt instruction). Therefore we need to push another
19181 ARM register, so that the stack will be 64-bit aligned. We
19182 try to avoid using the arg registers (r0 -r3) as they might be
19183 used to pass values in a tail call. */
19184 for (reg = 4; reg <= 12; reg++)
19185 if ((save_reg_mask & (1 << reg)) == 0)
19186 break;
19187
19188 if (reg <= 12)
19189 save_reg_mask |= (1 << reg);
19190 else
19191 {
19192 cfun->machine->sibcall_blocked = 1;
19193 save_reg_mask |= (1 << 3);
19194 }
19195 }
19196
19197 /* We may need to push an additional register for use initializing the
19198 PIC base register. */
19199 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19200 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19201 {
19202 reg = thumb_find_work_register (1 << 4);
19203 if (!call_used_regs[reg])
19204 save_reg_mask |= (1 << reg);
19205 }
19206
19207 return save_reg_mask;
19208 }
19209
19210
19211 /* Compute a bit mask of which registers need to be
19212 saved on the stack for the current function. */
19213 static unsigned long
19214 thumb1_compute_save_reg_mask (void)
19215 {
19216 unsigned long mask;
19217 unsigned reg;
19218
19219 mask = 0;
19220 for (reg = 0; reg < 12; reg ++)
19221 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19222 mask |= 1 << reg;
19223
19224 if (flag_pic
19225 && !TARGET_SINGLE_PIC_BASE
19226 && arm_pic_register != INVALID_REGNUM
19227 && crtl->uses_pic_offset_table)
19228 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19229
19230 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19231 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19232 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19233
19234 /* LR will also be pushed if any lo regs are pushed. */
19235 if (mask & 0xff || thumb_force_lr_save ())
19236 mask |= (1 << LR_REGNUM);
19237
19238 /* Make sure we have a low work register if we need one.
19239 We will need one if we are going to push a high register,
19240 but we are not currently intending to push a low register. */
19241 if ((mask & 0xff) == 0
19242 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19243 {
19244 /* Use thumb_find_work_register to choose which register
19245 we will use. If the register is live then we will
19246 have to push it. Use LAST_LO_REGNUM as our fallback
19247 choice for the register to select. */
19248 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19249 /* Make sure the register returned by thumb_find_work_register is
19250 not part of the return value. */
19251 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19252 reg = LAST_LO_REGNUM;
19253
19254 if (! call_used_regs[reg])
19255 mask |= 1 << reg;
19256 }
19257
19258 /* The 504 below is 8 bytes less than 512 because there are two possible
19259 alignment words. We can't tell here if they will be present or not so we
19260 have to play it safe and assume that they are. */
19261 if ((CALLER_INTERWORKING_SLOT_SIZE +
19262 ROUND_UP_WORD (get_frame_size ()) +
19263 crtl->outgoing_args_size) >= 504)
19264 {
19265 /* This is the same as the code in thumb1_expand_prologue() which
19266 determines which register to use for stack decrement. */
19267 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19268 if (mask & (1 << reg))
19269 break;
19270
19271 if (reg > LAST_LO_REGNUM)
19272 {
19273 /* Make sure we have a register available for stack decrement. */
19274 mask |= 1 << LAST_LO_REGNUM;
19275 }
19276 }
19277
19278 return mask;
19279 }
19280
19281
19282 /* Return the number of bytes required to save VFP registers. */
19283 static int
19284 arm_get_vfp_saved_size (void)
19285 {
19286 unsigned int regno;
19287 int count;
19288 int saved;
19289
19290 saved = 0;
19291 /* Space for saved VFP registers. */
19292 if (TARGET_HARD_FLOAT && TARGET_VFP)
19293 {
19294 count = 0;
19295 for (regno = FIRST_VFP_REGNUM;
19296 regno < LAST_VFP_REGNUM;
19297 regno += 2)
19298 {
19299 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19300 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19301 {
19302 if (count > 0)
19303 {
19304 /* Workaround ARM10 VFPr1 bug. */
19305 if (count == 2 && !arm_arch6)
19306 count++;
19307 saved += count * 8;
19308 }
19309 count = 0;
19310 }
19311 else
19312 count++;
19313 }
19314 if (count > 0)
19315 {
19316 if (count == 2 && !arm_arch6)
19317 count++;
19318 saved += count * 8;
19319 }
19320 }
19321 return saved;
19322 }
19323
19324
19325 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19326 everything bar the final return instruction. If simple_return is true,
19327 then do not output epilogue, because it has already been emitted in RTL. */
19328 const char *
19329 output_return_instruction (rtx operand, bool really_return, bool reverse,
19330 bool simple_return)
19331 {
19332 char conditional[10];
19333 char instr[100];
19334 unsigned reg;
19335 unsigned long live_regs_mask;
19336 unsigned long func_type;
19337 arm_stack_offsets *offsets;
19338
19339 func_type = arm_current_func_type ();
19340
19341 if (IS_NAKED (func_type))
19342 return "";
19343
19344 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19345 {
19346 /* If this function was declared non-returning, and we have
19347 found a tail call, then we have to trust that the called
19348 function won't return. */
19349 if (really_return)
19350 {
19351 rtx ops[2];
19352
19353 /* Otherwise, trap an attempted return by aborting. */
19354 ops[0] = operand;
19355 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19356 : "abort");
19357 assemble_external_libcall (ops[1]);
19358 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19359 }
19360
19361 return "";
19362 }
19363
19364 gcc_assert (!cfun->calls_alloca || really_return);
19365
19366 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19367
19368 cfun->machine->return_used_this_function = 1;
19369
19370 offsets = arm_get_frame_offsets ();
19371 live_regs_mask = offsets->saved_regs_mask;
19372
19373 if (!simple_return && live_regs_mask)
19374 {
19375 const char * return_reg;
19376
19377 /* If we do not have any special requirements for function exit
19378 (e.g. interworking) then we can load the return address
19379 directly into the PC. Otherwise we must load it into LR. */
19380 if (really_return
19381 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19382 return_reg = reg_names[PC_REGNUM];
19383 else
19384 return_reg = reg_names[LR_REGNUM];
19385
19386 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19387 {
19388 /* There are three possible reasons for the IP register
19389 being saved. 1) a stack frame was created, in which case
19390 IP contains the old stack pointer, or 2) an ISR routine
19391 corrupted it, or 3) it was saved to align the stack on
19392 iWMMXt. In case 1, restore IP into SP, otherwise just
19393 restore IP. */
19394 if (frame_pointer_needed)
19395 {
19396 live_regs_mask &= ~ (1 << IP_REGNUM);
19397 live_regs_mask |= (1 << SP_REGNUM);
19398 }
19399 else
19400 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19401 }
19402
19403 /* On some ARM architectures it is faster to use LDR rather than
19404 LDM to load a single register. On other architectures, the
19405 cost is the same. In 26 bit mode, or for exception handlers,
19406 we have to use LDM to load the PC so that the CPSR is also
19407 restored. */
19408 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19409 if (live_regs_mask == (1U << reg))
19410 break;
19411
19412 if (reg <= LAST_ARM_REGNUM
19413 && (reg != LR_REGNUM
19414 || ! really_return
19415 || ! IS_INTERRUPT (func_type)))
19416 {
19417 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19418 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19419 }
19420 else
19421 {
19422 char *p;
19423 int first = 1;
19424
19425 /* Generate the load multiple instruction to restore the
19426 registers. Note we can get here, even if
19427 frame_pointer_needed is true, but only if sp already
19428 points to the base of the saved core registers. */
19429 if (live_regs_mask & (1 << SP_REGNUM))
19430 {
19431 unsigned HOST_WIDE_INT stack_adjust;
19432
19433 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19434 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19435
19436 if (stack_adjust && arm_arch5 && TARGET_ARM)
19437 if (TARGET_UNIFIED_ASM)
19438 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19439 else
19440 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19441 else
19442 {
19443 /* If we can't use ldmib (SA110 bug),
19444 then try to pop r3 instead. */
19445 if (stack_adjust)
19446 live_regs_mask |= 1 << 3;
19447
19448 if (TARGET_UNIFIED_ASM)
19449 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19450 else
19451 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19452 }
19453 }
19454 else
19455 if (TARGET_UNIFIED_ASM)
19456 sprintf (instr, "pop%s\t{", conditional);
19457 else
19458 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19459
19460 p = instr + strlen (instr);
19461
19462 for (reg = 0; reg <= SP_REGNUM; reg++)
19463 if (live_regs_mask & (1 << reg))
19464 {
19465 int l = strlen (reg_names[reg]);
19466
19467 if (first)
19468 first = 0;
19469 else
19470 {
19471 memcpy (p, ", ", 2);
19472 p += 2;
19473 }
19474
19475 memcpy (p, "%|", 2);
19476 memcpy (p + 2, reg_names[reg], l);
19477 p += l + 2;
19478 }
19479
19480 if (live_regs_mask & (1 << LR_REGNUM))
19481 {
19482 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19483 /* If returning from an interrupt, restore the CPSR. */
19484 if (IS_INTERRUPT (func_type))
19485 strcat (p, "^");
19486 }
19487 else
19488 strcpy (p, "}");
19489 }
19490
19491 output_asm_insn (instr, & operand);
19492
19493 /* See if we need to generate an extra instruction to
19494 perform the actual function return. */
19495 if (really_return
19496 && func_type != ARM_FT_INTERWORKED
19497 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19498 {
19499 /* The return has already been handled
19500 by loading the LR into the PC. */
19501 return "";
19502 }
19503 }
19504
19505 if (really_return)
19506 {
19507 switch ((int) ARM_FUNC_TYPE (func_type))
19508 {
19509 case ARM_FT_ISR:
19510 case ARM_FT_FIQ:
19511 /* ??? This is wrong for unified assembly syntax. */
19512 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19513 break;
19514
19515 case ARM_FT_INTERWORKED:
19516 sprintf (instr, "bx%s\t%%|lr", conditional);
19517 break;
19518
19519 case ARM_FT_EXCEPTION:
19520 /* ??? This is wrong for unified assembly syntax. */
19521 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19522 break;
19523
19524 default:
19525 /* Use bx if it's available. */
19526 if (arm_arch5 || arm_arch4t)
19527 sprintf (instr, "bx%s\t%%|lr", conditional);
19528 else
19529 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19530 break;
19531 }
19532
19533 output_asm_insn (instr, & operand);
19534 }
19535
19536 return "";
19537 }
19538
19539 /* Write the function name into the code section, directly preceding
19540 the function prologue.
19541
19542 Code will be output similar to this:
19543 t0
19544 .ascii "arm_poke_function_name", 0
19545 .align
19546 t1
19547 .word 0xff000000 + (t1 - t0)
19548 arm_poke_function_name
19549 mov ip, sp
19550 stmfd sp!, {fp, ip, lr, pc}
19551 sub fp, ip, #4
19552
19553 When performing a stack backtrace, code can inspect the value
19554 of 'pc' stored at 'fp' + 0. If the trace function then looks
19555 at location pc - 12 and the top 8 bits are set, then we know
19556 that there is a function name embedded immediately preceding this
19557 location and has length ((pc[-3]) & 0xff000000).
19558
19559 We assume that pc is declared as a pointer to an unsigned long.
19560
19561 It is of no benefit to output the function name if we are assembling
19562 a leaf function. These function types will not contain a stack
19563 backtrace structure, therefore it is not possible to determine the
19564 function name. */
19565 void
19566 arm_poke_function_name (FILE *stream, const char *name)
19567 {
19568 unsigned long alignlength;
19569 unsigned long length;
19570 rtx x;
19571
19572 length = strlen (name) + 1;
19573 alignlength = ROUND_UP_WORD (length);
19574
19575 ASM_OUTPUT_ASCII (stream, name, length);
19576 ASM_OUTPUT_ALIGN (stream, 2);
19577 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19578 assemble_aligned_integer (UNITS_PER_WORD, x);
19579 }
19580
19581 /* Place some comments into the assembler stream
19582 describing the current function. */
19583 static void
19584 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19585 {
19586 unsigned long func_type;
19587
19588 /* ??? Do we want to print some of the below anyway? */
19589 if (TARGET_THUMB1)
19590 return;
19591
19592 /* Sanity check. */
19593 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19594
19595 func_type = arm_current_func_type ();
19596
19597 switch ((int) ARM_FUNC_TYPE (func_type))
19598 {
19599 default:
19600 case ARM_FT_NORMAL:
19601 break;
19602 case ARM_FT_INTERWORKED:
19603 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19604 break;
19605 case ARM_FT_ISR:
19606 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19607 break;
19608 case ARM_FT_FIQ:
19609 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19610 break;
19611 case ARM_FT_EXCEPTION:
19612 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19613 break;
19614 }
19615
19616 if (IS_NAKED (func_type))
19617 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19618
19619 if (IS_VOLATILE (func_type))
19620 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19621
19622 if (IS_NESTED (func_type))
19623 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19624 if (IS_STACKALIGN (func_type))
19625 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19626
19627 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19628 crtl->args.size,
19629 crtl->args.pretend_args_size, frame_size);
19630
19631 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19632 frame_pointer_needed,
19633 cfun->machine->uses_anonymous_args);
19634
19635 if (cfun->machine->lr_save_eliminated)
19636 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19637
19638 if (crtl->calls_eh_return)
19639 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19640
19641 }
19642
19643 static void
19644 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19645 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19646 {
19647 arm_stack_offsets *offsets;
19648
19649 if (TARGET_THUMB1)
19650 {
19651 int regno;
19652
19653 /* Emit any call-via-reg trampolines that are needed for v4t support
19654 of call_reg and call_value_reg type insns. */
19655 for (regno = 0; regno < LR_REGNUM; regno++)
19656 {
19657 rtx label = cfun->machine->call_via[regno];
19658
19659 if (label != NULL)
19660 {
19661 switch_to_section (function_section (current_function_decl));
19662 targetm.asm_out.internal_label (asm_out_file, "L",
19663 CODE_LABEL_NUMBER (label));
19664 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19665 }
19666 }
19667
19668 /* ??? Probably not safe to set this here, since it assumes that a
19669 function will be emitted as assembly immediately after we generate
19670 RTL for it. This does not happen for inline functions. */
19671 cfun->machine->return_used_this_function = 0;
19672 }
19673 else /* TARGET_32BIT */
19674 {
19675 /* We need to take into account any stack-frame rounding. */
19676 offsets = arm_get_frame_offsets ();
19677
19678 gcc_assert (!use_return_insn (FALSE, NULL)
19679 || (cfun->machine->return_used_this_function != 0)
19680 || offsets->saved_regs == offsets->outgoing_args
19681 || frame_pointer_needed);
19682 }
19683 }
19684
19685 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19686 STR and STRD. If an even number of registers are being pushed, one
19687 or more STRD patterns are created for each register pair. If an
19688 odd number of registers are pushed, emit an initial STR followed by
19689 as many STRD instructions as are needed. This works best when the
19690 stack is initially 64-bit aligned (the normal case), since it
19691 ensures that each STRD is also 64-bit aligned. */
19692 static void
19693 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19694 {
19695 int num_regs = 0;
19696 int i;
19697 int regno;
19698 rtx par = NULL_RTX;
19699 rtx dwarf = NULL_RTX;
19700 rtx tmp;
19701 bool first = true;
19702
19703 num_regs = bit_count (saved_regs_mask);
19704
19705 /* Must be at least one register to save, and can't save SP or PC. */
19706 gcc_assert (num_regs > 0 && num_regs <= 14);
19707 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19708 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19709
19710 /* Create sequence for DWARF info. All the frame-related data for
19711 debugging is held in this wrapper. */
19712 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19713
19714 /* Describe the stack adjustment. */
19715 tmp = gen_rtx_SET (VOIDmode,
19716 stack_pointer_rtx,
19717 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19718 RTX_FRAME_RELATED_P (tmp) = 1;
19719 XVECEXP (dwarf, 0, 0) = tmp;
19720
19721 /* Find the first register. */
19722 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19723 ;
19724
19725 i = 0;
19726
19727 /* If there's an odd number of registers to push. Start off by
19728 pushing a single register. This ensures that subsequent strd
19729 operations are dword aligned (assuming that SP was originally
19730 64-bit aligned). */
19731 if ((num_regs & 1) != 0)
19732 {
19733 rtx reg, mem, insn;
19734
19735 reg = gen_rtx_REG (SImode, regno);
19736 if (num_regs == 1)
19737 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19738 stack_pointer_rtx));
19739 else
19740 mem = gen_frame_mem (Pmode,
19741 gen_rtx_PRE_MODIFY
19742 (Pmode, stack_pointer_rtx,
19743 plus_constant (Pmode, stack_pointer_rtx,
19744 -4 * num_regs)));
19745
19746 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19747 RTX_FRAME_RELATED_P (tmp) = 1;
19748 insn = emit_insn (tmp);
19749 RTX_FRAME_RELATED_P (insn) = 1;
19750 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19751 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19752 reg);
19753 RTX_FRAME_RELATED_P (tmp) = 1;
19754 i++;
19755 regno++;
19756 XVECEXP (dwarf, 0, i) = tmp;
19757 first = false;
19758 }
19759
19760 while (i < num_regs)
19761 if (saved_regs_mask & (1 << regno))
19762 {
19763 rtx reg1, reg2, mem1, mem2;
19764 rtx tmp0, tmp1, tmp2;
19765 int regno2;
19766
19767 /* Find the register to pair with this one. */
19768 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19769 regno2++)
19770 ;
19771
19772 reg1 = gen_rtx_REG (SImode, regno);
19773 reg2 = gen_rtx_REG (SImode, regno2);
19774
19775 if (first)
19776 {
19777 rtx insn;
19778
19779 first = false;
19780 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19781 stack_pointer_rtx,
19782 -4 * num_regs));
19783 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19784 stack_pointer_rtx,
19785 -4 * (num_regs - 1)));
19786 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19787 plus_constant (Pmode, stack_pointer_rtx,
19788 -4 * (num_regs)));
19789 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19790 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19791 RTX_FRAME_RELATED_P (tmp0) = 1;
19792 RTX_FRAME_RELATED_P (tmp1) = 1;
19793 RTX_FRAME_RELATED_P (tmp2) = 1;
19794 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19795 XVECEXP (par, 0, 0) = tmp0;
19796 XVECEXP (par, 0, 1) = tmp1;
19797 XVECEXP (par, 0, 2) = tmp2;
19798 insn = emit_insn (par);
19799 RTX_FRAME_RELATED_P (insn) = 1;
19800 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19801 }
19802 else
19803 {
19804 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19805 stack_pointer_rtx,
19806 4 * i));
19807 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19808 stack_pointer_rtx,
19809 4 * (i + 1)));
19810 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19811 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19812 RTX_FRAME_RELATED_P (tmp1) = 1;
19813 RTX_FRAME_RELATED_P (tmp2) = 1;
19814 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19815 XVECEXP (par, 0, 0) = tmp1;
19816 XVECEXP (par, 0, 1) = tmp2;
19817 emit_insn (par);
19818 }
19819
19820 /* Create unwind information. This is an approximation. */
19821 tmp1 = gen_rtx_SET (VOIDmode,
19822 gen_frame_mem (Pmode,
19823 plus_constant (Pmode,
19824 stack_pointer_rtx,
19825 4 * i)),
19826 reg1);
19827 tmp2 = gen_rtx_SET (VOIDmode,
19828 gen_frame_mem (Pmode,
19829 plus_constant (Pmode,
19830 stack_pointer_rtx,
19831 4 * (i + 1))),
19832 reg2);
19833
19834 RTX_FRAME_RELATED_P (tmp1) = 1;
19835 RTX_FRAME_RELATED_P (tmp2) = 1;
19836 XVECEXP (dwarf, 0, i + 1) = tmp1;
19837 XVECEXP (dwarf, 0, i + 2) = tmp2;
19838 i += 2;
19839 regno = regno2 + 1;
19840 }
19841 else
19842 regno++;
19843
19844 return;
19845 }
19846
19847 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19848 whenever possible, otherwise it emits single-word stores. The first store
19849 also allocates stack space for all saved registers, using writeback with
19850 post-addressing mode. All other stores use offset addressing. If no STRD
19851 can be emitted, this function emits a sequence of single-word stores,
19852 and not an STM as before, because single-word stores provide more freedom
19853 scheduling and can be turned into an STM by peephole optimizations. */
19854 static void
19855 arm_emit_strd_push (unsigned long saved_regs_mask)
19856 {
19857 int num_regs = 0;
19858 int i, j, dwarf_index = 0;
19859 int offset = 0;
19860 rtx dwarf = NULL_RTX;
19861 rtx insn = NULL_RTX;
19862 rtx tmp, mem;
19863
19864 /* TODO: A more efficient code can be emitted by changing the
19865 layout, e.g., first push all pairs that can use STRD to keep the
19866 stack aligned, and then push all other registers. */
19867 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19868 if (saved_regs_mask & (1 << i))
19869 num_regs++;
19870
19871 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19872 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19873 gcc_assert (num_regs > 0);
19874
19875 /* Create sequence for DWARF info. */
19876 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19877
19878 /* For dwarf info, we generate explicit stack update. */
19879 tmp = gen_rtx_SET (VOIDmode,
19880 stack_pointer_rtx,
19881 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19882 RTX_FRAME_RELATED_P (tmp) = 1;
19883 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19884
19885 /* Save registers. */
19886 offset = - 4 * num_regs;
19887 j = 0;
19888 while (j <= LAST_ARM_REGNUM)
19889 if (saved_regs_mask & (1 << j))
19890 {
19891 if ((j % 2 == 0)
19892 && (saved_regs_mask & (1 << (j + 1))))
19893 {
19894 /* Current register and previous register form register pair for
19895 which STRD can be generated. */
19896 if (offset < 0)
19897 {
19898 /* Allocate stack space for all saved registers. */
19899 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19900 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19901 mem = gen_frame_mem (DImode, tmp);
19902 offset = 0;
19903 }
19904 else if (offset > 0)
19905 mem = gen_frame_mem (DImode,
19906 plus_constant (Pmode,
19907 stack_pointer_rtx,
19908 offset));
19909 else
19910 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19911
19912 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19913 RTX_FRAME_RELATED_P (tmp) = 1;
19914 tmp = emit_insn (tmp);
19915
19916 /* Record the first store insn. */
19917 if (dwarf_index == 1)
19918 insn = tmp;
19919
19920 /* Generate dwarf info. */
19921 mem = gen_frame_mem (SImode,
19922 plus_constant (Pmode,
19923 stack_pointer_rtx,
19924 offset));
19925 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19926 RTX_FRAME_RELATED_P (tmp) = 1;
19927 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19928
19929 mem = gen_frame_mem (SImode,
19930 plus_constant (Pmode,
19931 stack_pointer_rtx,
19932 offset + 4));
19933 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19934 RTX_FRAME_RELATED_P (tmp) = 1;
19935 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19936
19937 offset += 8;
19938 j += 2;
19939 }
19940 else
19941 {
19942 /* Emit a single word store. */
19943 if (offset < 0)
19944 {
19945 /* Allocate stack space for all saved registers. */
19946 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19947 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19948 mem = gen_frame_mem (SImode, tmp);
19949 offset = 0;
19950 }
19951 else if (offset > 0)
19952 mem = gen_frame_mem (SImode,
19953 plus_constant (Pmode,
19954 stack_pointer_rtx,
19955 offset));
19956 else
19957 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19958
19959 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19960 RTX_FRAME_RELATED_P (tmp) = 1;
19961 tmp = emit_insn (tmp);
19962
19963 /* Record the first store insn. */
19964 if (dwarf_index == 1)
19965 insn = tmp;
19966
19967 /* Generate dwarf info. */
19968 mem = gen_frame_mem (SImode,
19969 plus_constant(Pmode,
19970 stack_pointer_rtx,
19971 offset));
19972 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19973 RTX_FRAME_RELATED_P (tmp) = 1;
19974 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19975
19976 offset += 4;
19977 j += 1;
19978 }
19979 }
19980 else
19981 j++;
19982
19983 /* Attach dwarf info to the first insn we generate. */
19984 gcc_assert (insn != NULL_RTX);
19985 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19986 RTX_FRAME_RELATED_P (insn) = 1;
19987 }
19988
19989 /* Generate and emit an insn that we will recognize as a push_multi.
19990 Unfortunately, since this insn does not reflect very well the actual
19991 semantics of the operation, we need to annotate the insn for the benefit
19992 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19993 MASK for registers that should be annotated for DWARF2 frame unwind
19994 information. */
19995 static rtx
19996 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19997 {
19998 int num_regs = 0;
19999 int num_dwarf_regs = 0;
20000 int i, j;
20001 rtx par;
20002 rtx dwarf;
20003 int dwarf_par_index;
20004 rtx tmp, reg;
20005
20006 /* We don't record the PC in the dwarf frame information. */
20007 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20008
20009 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20010 {
20011 if (mask & (1 << i))
20012 num_regs++;
20013 if (dwarf_regs_mask & (1 << i))
20014 num_dwarf_regs++;
20015 }
20016
20017 gcc_assert (num_regs && num_regs <= 16);
20018 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20019
20020 /* For the body of the insn we are going to generate an UNSPEC in
20021 parallel with several USEs. This allows the insn to be recognized
20022 by the push_multi pattern in the arm.md file.
20023
20024 The body of the insn looks something like this:
20025
20026 (parallel [
20027 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20028 (const_int:SI <num>)))
20029 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20030 (use (reg:SI XX))
20031 (use (reg:SI YY))
20032 ...
20033 ])
20034
20035 For the frame note however, we try to be more explicit and actually
20036 show each register being stored into the stack frame, plus a (single)
20037 decrement of the stack pointer. We do it this way in order to be
20038 friendly to the stack unwinding code, which only wants to see a single
20039 stack decrement per instruction. The RTL we generate for the note looks
20040 something like this:
20041
20042 (sequence [
20043 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20044 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20045 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20046 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20047 ...
20048 ])
20049
20050 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20051 instead we'd have a parallel expression detailing all
20052 the stores to the various memory addresses so that debug
20053 information is more up-to-date. Remember however while writing
20054 this to take care of the constraints with the push instruction.
20055
20056 Note also that this has to be taken care of for the VFP registers.
20057
20058 For more see PR43399. */
20059
20060 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20061 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20062 dwarf_par_index = 1;
20063
20064 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20065 {
20066 if (mask & (1 << i))
20067 {
20068 reg = gen_rtx_REG (SImode, i);
20069
20070 XVECEXP (par, 0, 0)
20071 = gen_rtx_SET (VOIDmode,
20072 gen_frame_mem
20073 (BLKmode,
20074 gen_rtx_PRE_MODIFY (Pmode,
20075 stack_pointer_rtx,
20076 plus_constant
20077 (Pmode, stack_pointer_rtx,
20078 -4 * num_regs))
20079 ),
20080 gen_rtx_UNSPEC (BLKmode,
20081 gen_rtvec (1, reg),
20082 UNSPEC_PUSH_MULT));
20083
20084 if (dwarf_regs_mask & (1 << i))
20085 {
20086 tmp = gen_rtx_SET (VOIDmode,
20087 gen_frame_mem (SImode, stack_pointer_rtx),
20088 reg);
20089 RTX_FRAME_RELATED_P (tmp) = 1;
20090 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20091 }
20092
20093 break;
20094 }
20095 }
20096
20097 for (j = 1, i++; j < num_regs; i++)
20098 {
20099 if (mask & (1 << i))
20100 {
20101 reg = gen_rtx_REG (SImode, i);
20102
20103 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20104
20105 if (dwarf_regs_mask & (1 << i))
20106 {
20107 tmp
20108 = gen_rtx_SET (VOIDmode,
20109 gen_frame_mem
20110 (SImode,
20111 plus_constant (Pmode, stack_pointer_rtx,
20112 4 * j)),
20113 reg);
20114 RTX_FRAME_RELATED_P (tmp) = 1;
20115 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20116 }
20117
20118 j++;
20119 }
20120 }
20121
20122 par = emit_insn (par);
20123
20124 tmp = gen_rtx_SET (VOIDmode,
20125 stack_pointer_rtx,
20126 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20127 RTX_FRAME_RELATED_P (tmp) = 1;
20128 XVECEXP (dwarf, 0, 0) = tmp;
20129
20130 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20131
20132 return par;
20133 }
20134
20135 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20136 SIZE is the offset to be adjusted.
20137 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20138 static void
20139 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20140 {
20141 rtx dwarf;
20142
20143 RTX_FRAME_RELATED_P (insn) = 1;
20144 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20145 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20146 }
20147
20148 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20149 SAVED_REGS_MASK shows which registers need to be restored.
20150
20151 Unfortunately, since this insn does not reflect very well the actual
20152 semantics of the operation, we need to annotate the insn for the benefit
20153 of DWARF2 frame unwind information. */
20154 static void
20155 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20156 {
20157 int num_regs = 0;
20158 int i, j;
20159 rtx par;
20160 rtx dwarf = NULL_RTX;
20161 rtx tmp, reg;
20162 bool return_in_pc;
20163 int offset_adj;
20164 int emit_update;
20165
20166 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20167 offset_adj = return_in_pc ? 1 : 0;
20168 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20169 if (saved_regs_mask & (1 << i))
20170 num_regs++;
20171
20172 gcc_assert (num_regs && num_regs <= 16);
20173
20174 /* If SP is in reglist, then we don't emit SP update insn. */
20175 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20176
20177 /* The parallel needs to hold num_regs SETs
20178 and one SET for the stack update. */
20179 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20180
20181 if (return_in_pc)
20182 {
20183 tmp = ret_rtx;
20184 XVECEXP (par, 0, 0) = tmp;
20185 }
20186
20187 if (emit_update)
20188 {
20189 /* Increment the stack pointer, based on there being
20190 num_regs 4-byte registers to restore. */
20191 tmp = gen_rtx_SET (VOIDmode,
20192 stack_pointer_rtx,
20193 plus_constant (Pmode,
20194 stack_pointer_rtx,
20195 4 * num_regs));
20196 RTX_FRAME_RELATED_P (tmp) = 1;
20197 XVECEXP (par, 0, offset_adj) = tmp;
20198 }
20199
20200 /* Now restore every reg, which may include PC. */
20201 for (j = 0, i = 0; j < num_regs; i++)
20202 if (saved_regs_mask & (1 << i))
20203 {
20204 reg = gen_rtx_REG (SImode, i);
20205 if ((num_regs == 1) && emit_update && !return_in_pc)
20206 {
20207 /* Emit single load with writeback. */
20208 tmp = gen_frame_mem (SImode,
20209 gen_rtx_POST_INC (Pmode,
20210 stack_pointer_rtx));
20211 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20212 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20213 return;
20214 }
20215
20216 tmp = gen_rtx_SET (VOIDmode,
20217 reg,
20218 gen_frame_mem
20219 (SImode,
20220 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20221 RTX_FRAME_RELATED_P (tmp) = 1;
20222 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20223
20224 /* We need to maintain a sequence for DWARF info too. As dwarf info
20225 should not have PC, skip PC. */
20226 if (i != PC_REGNUM)
20227 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20228
20229 j++;
20230 }
20231
20232 if (return_in_pc)
20233 par = emit_jump_insn (par);
20234 else
20235 par = emit_insn (par);
20236
20237 REG_NOTES (par) = dwarf;
20238 if (!return_in_pc)
20239 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20240 stack_pointer_rtx, stack_pointer_rtx);
20241 }
20242
20243 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20244 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20245
20246 Unfortunately, since this insn does not reflect very well the actual
20247 semantics of the operation, we need to annotate the insn for the benefit
20248 of DWARF2 frame unwind information. */
20249 static void
20250 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20251 {
20252 int i, j;
20253 rtx par;
20254 rtx dwarf = NULL_RTX;
20255 rtx tmp, reg;
20256
20257 gcc_assert (num_regs && num_regs <= 32);
20258
20259 /* Workaround ARM10 VFPr1 bug. */
20260 if (num_regs == 2 && !arm_arch6)
20261 {
20262 if (first_reg == 15)
20263 first_reg--;
20264
20265 num_regs++;
20266 }
20267
20268 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20269 there could be up to 32 D-registers to restore.
20270 If there are more than 16 D-registers, make two recursive calls,
20271 each of which emits one pop_multi instruction. */
20272 if (num_regs > 16)
20273 {
20274 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20275 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20276 return;
20277 }
20278
20279 /* The parallel needs to hold num_regs SETs
20280 and one SET for the stack update. */
20281 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20282
20283 /* Increment the stack pointer, based on there being
20284 num_regs 8-byte registers to restore. */
20285 tmp = gen_rtx_SET (VOIDmode,
20286 base_reg,
20287 plus_constant (Pmode, base_reg, 8 * num_regs));
20288 RTX_FRAME_RELATED_P (tmp) = 1;
20289 XVECEXP (par, 0, 0) = tmp;
20290
20291 /* Now show every reg that will be restored, using a SET for each. */
20292 for (j = 0, i=first_reg; j < num_regs; i += 2)
20293 {
20294 reg = gen_rtx_REG (DFmode, i);
20295
20296 tmp = gen_rtx_SET (VOIDmode,
20297 reg,
20298 gen_frame_mem
20299 (DFmode,
20300 plus_constant (Pmode, base_reg, 8 * j)));
20301 RTX_FRAME_RELATED_P (tmp) = 1;
20302 XVECEXP (par, 0, j + 1) = tmp;
20303
20304 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20305
20306 j++;
20307 }
20308
20309 par = emit_insn (par);
20310 REG_NOTES (par) = dwarf;
20311
20312 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20313 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20314 {
20315 RTX_FRAME_RELATED_P (par) = 1;
20316 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20317 }
20318 else
20319 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20320 base_reg, base_reg);
20321 }
20322
20323 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20324 number of registers are being popped, multiple LDRD patterns are created for
20325 all register pairs. If odd number of registers are popped, last register is
20326 loaded by using LDR pattern. */
20327 static void
20328 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20329 {
20330 int num_regs = 0;
20331 int i, j;
20332 rtx par = NULL_RTX;
20333 rtx dwarf = NULL_RTX;
20334 rtx tmp, reg, tmp1;
20335 bool return_in_pc;
20336
20337 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20338 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20339 if (saved_regs_mask & (1 << i))
20340 num_regs++;
20341
20342 gcc_assert (num_regs && num_regs <= 16);
20343
20344 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20345 to be popped. So, if num_regs is even, now it will become odd,
20346 and we can generate pop with PC. If num_regs is odd, it will be
20347 even now, and ldr with return can be generated for PC. */
20348 if (return_in_pc)
20349 num_regs--;
20350
20351 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20352
20353 /* Var j iterates over all the registers to gather all the registers in
20354 saved_regs_mask. Var i gives index of saved registers in stack frame.
20355 A PARALLEL RTX of register-pair is created here, so that pattern for
20356 LDRD can be matched. As PC is always last register to be popped, and
20357 we have already decremented num_regs if PC, we don't have to worry
20358 about PC in this loop. */
20359 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20360 if (saved_regs_mask & (1 << j))
20361 {
20362 /* Create RTX for memory load. */
20363 reg = gen_rtx_REG (SImode, j);
20364 tmp = gen_rtx_SET (SImode,
20365 reg,
20366 gen_frame_mem (SImode,
20367 plus_constant (Pmode,
20368 stack_pointer_rtx, 4 * i)));
20369 RTX_FRAME_RELATED_P (tmp) = 1;
20370
20371 if (i % 2 == 0)
20372 {
20373 /* When saved-register index (i) is even, the RTX to be emitted is
20374 yet to be created. Hence create it first. The LDRD pattern we
20375 are generating is :
20376 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20377 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20378 where target registers need not be consecutive. */
20379 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20380 dwarf = NULL_RTX;
20381 }
20382
20383 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20384 added as 0th element and if i is odd, reg_i is added as 1st element
20385 of LDRD pattern shown above. */
20386 XVECEXP (par, 0, (i % 2)) = tmp;
20387 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20388
20389 if ((i % 2) == 1)
20390 {
20391 /* When saved-register index (i) is odd, RTXs for both the registers
20392 to be loaded are generated in above given LDRD pattern, and the
20393 pattern can be emitted now. */
20394 par = emit_insn (par);
20395 REG_NOTES (par) = dwarf;
20396 RTX_FRAME_RELATED_P (par) = 1;
20397 }
20398
20399 i++;
20400 }
20401
20402 /* If the number of registers pushed is odd AND return_in_pc is false OR
20403 number of registers are even AND return_in_pc is true, last register is
20404 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20405 then LDR with post increment. */
20406
20407 /* Increment the stack pointer, based on there being
20408 num_regs 4-byte registers to restore. */
20409 tmp = gen_rtx_SET (VOIDmode,
20410 stack_pointer_rtx,
20411 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20412 RTX_FRAME_RELATED_P (tmp) = 1;
20413 tmp = emit_insn (tmp);
20414 if (!return_in_pc)
20415 {
20416 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20417 stack_pointer_rtx, stack_pointer_rtx);
20418 }
20419
20420 dwarf = NULL_RTX;
20421
20422 if (((num_regs % 2) == 1 && !return_in_pc)
20423 || ((num_regs % 2) == 0 && return_in_pc))
20424 {
20425 /* Scan for the single register to be popped. Skip until the saved
20426 register is found. */
20427 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20428
20429 /* Gen LDR with post increment here. */
20430 tmp1 = gen_rtx_MEM (SImode,
20431 gen_rtx_POST_INC (SImode,
20432 stack_pointer_rtx));
20433 set_mem_alias_set (tmp1, get_frame_alias_set ());
20434
20435 reg = gen_rtx_REG (SImode, j);
20436 tmp = gen_rtx_SET (SImode, reg, tmp1);
20437 RTX_FRAME_RELATED_P (tmp) = 1;
20438 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20439
20440 if (return_in_pc)
20441 {
20442 /* If return_in_pc, j must be PC_REGNUM. */
20443 gcc_assert (j == PC_REGNUM);
20444 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20445 XVECEXP (par, 0, 0) = ret_rtx;
20446 XVECEXP (par, 0, 1) = tmp;
20447 par = emit_jump_insn (par);
20448 }
20449 else
20450 {
20451 par = emit_insn (tmp);
20452 REG_NOTES (par) = dwarf;
20453 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20454 stack_pointer_rtx, stack_pointer_rtx);
20455 }
20456
20457 }
20458 else if ((num_regs % 2) == 1 && return_in_pc)
20459 {
20460 /* There are 2 registers to be popped. So, generate the pattern
20461 pop_multiple_with_stack_update_and_return to pop in PC. */
20462 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20463 }
20464
20465 return;
20466 }
20467
20468 /* LDRD in ARM mode needs consecutive registers as operands. This function
20469 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20470 offset addressing and then generates one separate stack udpate. This provides
20471 more scheduling freedom, compared to writeback on every load. However,
20472 if the function returns using load into PC directly
20473 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20474 before the last load. TODO: Add a peephole optimization to recognize
20475 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20476 peephole optimization to merge the load at stack-offset zero
20477 with the stack update instruction using load with writeback
20478 in post-index addressing mode. */
20479 static void
20480 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20481 {
20482 int j = 0;
20483 int offset = 0;
20484 rtx par = NULL_RTX;
20485 rtx dwarf = NULL_RTX;
20486 rtx tmp, mem;
20487
20488 /* Restore saved registers. */
20489 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20490 j = 0;
20491 while (j <= LAST_ARM_REGNUM)
20492 if (saved_regs_mask & (1 << j))
20493 {
20494 if ((j % 2) == 0
20495 && (saved_regs_mask & (1 << (j + 1)))
20496 && (j + 1) != PC_REGNUM)
20497 {
20498 /* Current register and next register form register pair for which
20499 LDRD can be generated. PC is always the last register popped, and
20500 we handle it separately. */
20501 if (offset > 0)
20502 mem = gen_frame_mem (DImode,
20503 plus_constant (Pmode,
20504 stack_pointer_rtx,
20505 offset));
20506 else
20507 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20508
20509 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20510 tmp = emit_insn (tmp);
20511 RTX_FRAME_RELATED_P (tmp) = 1;
20512
20513 /* Generate dwarf info. */
20514
20515 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20516 gen_rtx_REG (SImode, j),
20517 NULL_RTX);
20518 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20519 gen_rtx_REG (SImode, j + 1),
20520 dwarf);
20521
20522 REG_NOTES (tmp) = dwarf;
20523
20524 offset += 8;
20525 j += 2;
20526 }
20527 else if (j != PC_REGNUM)
20528 {
20529 /* Emit a single word load. */
20530 if (offset > 0)
20531 mem = gen_frame_mem (SImode,
20532 plus_constant (Pmode,
20533 stack_pointer_rtx,
20534 offset));
20535 else
20536 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20537
20538 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20539 tmp = emit_insn (tmp);
20540 RTX_FRAME_RELATED_P (tmp) = 1;
20541
20542 /* Generate dwarf info. */
20543 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20544 gen_rtx_REG (SImode, j),
20545 NULL_RTX);
20546
20547 offset += 4;
20548 j += 1;
20549 }
20550 else /* j == PC_REGNUM */
20551 j++;
20552 }
20553 else
20554 j++;
20555
20556 /* Update the stack. */
20557 if (offset > 0)
20558 {
20559 tmp = gen_rtx_SET (Pmode,
20560 stack_pointer_rtx,
20561 plus_constant (Pmode,
20562 stack_pointer_rtx,
20563 offset));
20564 tmp = emit_insn (tmp);
20565 arm_add_cfa_adjust_cfa_note (tmp, offset,
20566 stack_pointer_rtx, stack_pointer_rtx);
20567 offset = 0;
20568 }
20569
20570 if (saved_regs_mask & (1 << PC_REGNUM))
20571 {
20572 /* Only PC is to be popped. */
20573 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20574 XVECEXP (par, 0, 0) = ret_rtx;
20575 tmp = gen_rtx_SET (SImode,
20576 gen_rtx_REG (SImode, PC_REGNUM),
20577 gen_frame_mem (SImode,
20578 gen_rtx_POST_INC (SImode,
20579 stack_pointer_rtx)));
20580 RTX_FRAME_RELATED_P (tmp) = 1;
20581 XVECEXP (par, 0, 1) = tmp;
20582 par = emit_jump_insn (par);
20583
20584 /* Generate dwarf info. */
20585 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20586 gen_rtx_REG (SImode, PC_REGNUM),
20587 NULL_RTX);
20588 REG_NOTES (par) = dwarf;
20589 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20590 stack_pointer_rtx, stack_pointer_rtx);
20591 }
20592 }
20593
20594 /* Calculate the size of the return value that is passed in registers. */
20595 static unsigned
20596 arm_size_return_regs (void)
20597 {
20598 machine_mode mode;
20599
20600 if (crtl->return_rtx != 0)
20601 mode = GET_MODE (crtl->return_rtx);
20602 else
20603 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20604
20605 return GET_MODE_SIZE (mode);
20606 }
20607
20608 /* Return true if the current function needs to save/restore LR. */
20609 static bool
20610 thumb_force_lr_save (void)
20611 {
20612 return !cfun->machine->lr_save_eliminated
20613 && (!leaf_function_p ()
20614 || thumb_far_jump_used_p ()
20615 || df_regs_ever_live_p (LR_REGNUM));
20616 }
20617
20618 /* We do not know if r3 will be available because
20619 we do have an indirect tailcall happening in this
20620 particular case. */
20621 static bool
20622 is_indirect_tailcall_p (rtx call)
20623 {
20624 rtx pat = PATTERN (call);
20625
20626 /* Indirect tail call. */
20627 pat = XVECEXP (pat, 0, 0);
20628 if (GET_CODE (pat) == SET)
20629 pat = SET_SRC (pat);
20630
20631 pat = XEXP (XEXP (pat, 0), 0);
20632 return REG_P (pat);
20633 }
20634
20635 /* Return true if r3 is used by any of the tail call insns in the
20636 current function. */
20637 static bool
20638 any_sibcall_could_use_r3 (void)
20639 {
20640 edge_iterator ei;
20641 edge e;
20642
20643 if (!crtl->tail_call_emit)
20644 return false;
20645 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20646 if (e->flags & EDGE_SIBCALL)
20647 {
20648 rtx call = BB_END (e->src);
20649 if (!CALL_P (call))
20650 call = prev_nonnote_nondebug_insn (call);
20651 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20652 if (find_regno_fusage (call, USE, 3)
20653 || is_indirect_tailcall_p (call))
20654 return true;
20655 }
20656 return false;
20657 }
20658
20659
20660 /* Compute the distance from register FROM to register TO.
20661 These can be the arg pointer (26), the soft frame pointer (25),
20662 the stack pointer (13) or the hard frame pointer (11).
20663 In thumb mode r7 is used as the soft frame pointer, if needed.
20664 Typical stack layout looks like this:
20665
20666 old stack pointer -> | |
20667 ----
20668 | | \
20669 | | saved arguments for
20670 | | vararg functions
20671 | | /
20672 --
20673 hard FP & arg pointer -> | | \
20674 | | stack
20675 | | frame
20676 | | /
20677 --
20678 | | \
20679 | | call saved
20680 | | registers
20681 soft frame pointer -> | | /
20682 --
20683 | | \
20684 | | local
20685 | | variables
20686 locals base pointer -> | | /
20687 --
20688 | | \
20689 | | outgoing
20690 | | arguments
20691 current stack pointer -> | | /
20692 --
20693
20694 For a given function some or all of these stack components
20695 may not be needed, giving rise to the possibility of
20696 eliminating some of the registers.
20697
20698 The values returned by this function must reflect the behavior
20699 of arm_expand_prologue() and arm_compute_save_reg_mask().
20700
20701 The sign of the number returned reflects the direction of stack
20702 growth, so the values are positive for all eliminations except
20703 from the soft frame pointer to the hard frame pointer.
20704
20705 SFP may point just inside the local variables block to ensure correct
20706 alignment. */
20707
20708
20709 /* Calculate stack offsets. These are used to calculate register elimination
20710 offsets and in prologue/epilogue code. Also calculates which registers
20711 should be saved. */
20712
20713 static arm_stack_offsets *
20714 arm_get_frame_offsets (void)
20715 {
20716 struct arm_stack_offsets *offsets;
20717 unsigned long func_type;
20718 int leaf;
20719 int saved;
20720 int core_saved;
20721 HOST_WIDE_INT frame_size;
20722 int i;
20723
20724 offsets = &cfun->machine->stack_offsets;
20725
20726 /* We need to know if we are a leaf function. Unfortunately, it
20727 is possible to be called after start_sequence has been called,
20728 which causes get_insns to return the insns for the sequence,
20729 not the function, which will cause leaf_function_p to return
20730 the incorrect result.
20731
20732 to know about leaf functions once reload has completed, and the
20733 frame size cannot be changed after that time, so we can safely
20734 use the cached value. */
20735
20736 if (reload_completed)
20737 return offsets;
20738
20739 /* Initially this is the size of the local variables. It will translated
20740 into an offset once we have determined the size of preceding data. */
20741 frame_size = ROUND_UP_WORD (get_frame_size ());
20742
20743 leaf = leaf_function_p ();
20744
20745 /* Space for variadic functions. */
20746 offsets->saved_args = crtl->args.pretend_args_size;
20747
20748 /* In Thumb mode this is incorrect, but never used. */
20749 offsets->frame
20750 = (offsets->saved_args
20751 + arm_compute_static_chain_stack_bytes ()
20752 + (frame_pointer_needed ? 4 : 0));
20753
20754 if (TARGET_32BIT)
20755 {
20756 unsigned int regno;
20757
20758 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20759 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20760 saved = core_saved;
20761
20762 /* We know that SP will be doubleword aligned on entry, and we must
20763 preserve that condition at any subroutine call. We also require the
20764 soft frame pointer to be doubleword aligned. */
20765
20766 if (TARGET_REALLY_IWMMXT)
20767 {
20768 /* Check for the call-saved iWMMXt registers. */
20769 for (regno = FIRST_IWMMXT_REGNUM;
20770 regno <= LAST_IWMMXT_REGNUM;
20771 regno++)
20772 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20773 saved += 8;
20774 }
20775
20776 func_type = arm_current_func_type ();
20777 /* Space for saved VFP registers. */
20778 if (! IS_VOLATILE (func_type)
20779 && TARGET_HARD_FLOAT && TARGET_VFP)
20780 saved += arm_get_vfp_saved_size ();
20781 }
20782 else /* TARGET_THUMB1 */
20783 {
20784 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20785 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20786 saved = core_saved;
20787 if (TARGET_BACKTRACE)
20788 saved += 16;
20789 }
20790
20791 /* Saved registers include the stack frame. */
20792 offsets->saved_regs
20793 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20794 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20795
20796 /* A leaf function does not need any stack alignment if it has nothing
20797 on the stack. */
20798 if (leaf && frame_size == 0
20799 /* However if it calls alloca(), we have a dynamically allocated
20800 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20801 && ! cfun->calls_alloca)
20802 {
20803 offsets->outgoing_args = offsets->soft_frame;
20804 offsets->locals_base = offsets->soft_frame;
20805 return offsets;
20806 }
20807
20808 /* Ensure SFP has the correct alignment. */
20809 if (ARM_DOUBLEWORD_ALIGN
20810 && (offsets->soft_frame & 7))
20811 {
20812 offsets->soft_frame += 4;
20813 /* Try to align stack by pushing an extra reg. Don't bother doing this
20814 when there is a stack frame as the alignment will be rolled into
20815 the normal stack adjustment. */
20816 if (frame_size + crtl->outgoing_args_size == 0)
20817 {
20818 int reg = -1;
20819
20820 /* Register r3 is caller-saved. Normally it does not need to be
20821 saved on entry by the prologue. However if we choose to save
20822 it for padding then we may confuse the compiler into thinking
20823 a prologue sequence is required when in fact it is not. This
20824 will occur when shrink-wrapping if r3 is used as a scratch
20825 register and there are no other callee-saved writes.
20826
20827 This situation can be avoided when other callee-saved registers
20828 are available and r3 is not mandatory if we choose a callee-saved
20829 register for padding. */
20830 bool prefer_callee_reg_p = false;
20831
20832 /* If it is safe to use r3, then do so. This sometimes
20833 generates better code on Thumb-2 by avoiding the need to
20834 use 32-bit push/pop instructions. */
20835 if (! any_sibcall_could_use_r3 ()
20836 && arm_size_return_regs () <= 12
20837 && (offsets->saved_regs_mask & (1 << 3)) == 0
20838 && (TARGET_THUMB2
20839 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20840 {
20841 reg = 3;
20842 if (!TARGET_THUMB2)
20843 prefer_callee_reg_p = true;
20844 }
20845 if (reg == -1
20846 || prefer_callee_reg_p)
20847 {
20848 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20849 {
20850 /* Avoid fixed registers; they may be changed at
20851 arbitrary times so it's unsafe to restore them
20852 during the epilogue. */
20853 if (!fixed_regs[i]
20854 && (offsets->saved_regs_mask & (1 << i)) == 0)
20855 {
20856 reg = i;
20857 break;
20858 }
20859 }
20860 }
20861
20862 if (reg != -1)
20863 {
20864 offsets->saved_regs += 4;
20865 offsets->saved_regs_mask |= (1 << reg);
20866 }
20867 }
20868 }
20869
20870 offsets->locals_base = offsets->soft_frame + frame_size;
20871 offsets->outgoing_args = (offsets->locals_base
20872 + crtl->outgoing_args_size);
20873
20874 if (ARM_DOUBLEWORD_ALIGN)
20875 {
20876 /* Ensure SP remains doubleword aligned. */
20877 if (offsets->outgoing_args & 7)
20878 offsets->outgoing_args += 4;
20879 gcc_assert (!(offsets->outgoing_args & 7));
20880 }
20881
20882 return offsets;
20883 }
20884
20885
20886 /* Calculate the relative offsets for the different stack pointers. Positive
20887 offsets are in the direction of stack growth. */
20888
20889 HOST_WIDE_INT
20890 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20891 {
20892 arm_stack_offsets *offsets;
20893
20894 offsets = arm_get_frame_offsets ();
20895
20896 /* OK, now we have enough information to compute the distances.
20897 There must be an entry in these switch tables for each pair
20898 of registers in ELIMINABLE_REGS, even if some of the entries
20899 seem to be redundant or useless. */
20900 switch (from)
20901 {
20902 case ARG_POINTER_REGNUM:
20903 switch (to)
20904 {
20905 case THUMB_HARD_FRAME_POINTER_REGNUM:
20906 return 0;
20907
20908 case FRAME_POINTER_REGNUM:
20909 /* This is the reverse of the soft frame pointer
20910 to hard frame pointer elimination below. */
20911 return offsets->soft_frame - offsets->saved_args;
20912
20913 case ARM_HARD_FRAME_POINTER_REGNUM:
20914 /* This is only non-zero in the case where the static chain register
20915 is stored above the frame. */
20916 return offsets->frame - offsets->saved_args - 4;
20917
20918 case STACK_POINTER_REGNUM:
20919 /* If nothing has been pushed on the stack at all
20920 then this will return -4. This *is* correct! */
20921 return offsets->outgoing_args - (offsets->saved_args + 4);
20922
20923 default:
20924 gcc_unreachable ();
20925 }
20926 gcc_unreachable ();
20927
20928 case FRAME_POINTER_REGNUM:
20929 switch (to)
20930 {
20931 case THUMB_HARD_FRAME_POINTER_REGNUM:
20932 return 0;
20933
20934 case ARM_HARD_FRAME_POINTER_REGNUM:
20935 /* The hard frame pointer points to the top entry in the
20936 stack frame. The soft frame pointer to the bottom entry
20937 in the stack frame. If there is no stack frame at all,
20938 then they are identical. */
20939
20940 return offsets->frame - offsets->soft_frame;
20941
20942 case STACK_POINTER_REGNUM:
20943 return offsets->outgoing_args - offsets->soft_frame;
20944
20945 default:
20946 gcc_unreachable ();
20947 }
20948 gcc_unreachable ();
20949
20950 default:
20951 /* You cannot eliminate from the stack pointer.
20952 In theory you could eliminate from the hard frame
20953 pointer to the stack pointer, but this will never
20954 happen, since if a stack frame is not needed the
20955 hard frame pointer will never be used. */
20956 gcc_unreachable ();
20957 }
20958 }
20959
20960 /* Given FROM and TO register numbers, say whether this elimination is
20961 allowed. Frame pointer elimination is automatically handled.
20962
20963 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20964 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20965 pointer, we must eliminate FRAME_POINTER_REGNUM into
20966 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20967 ARG_POINTER_REGNUM. */
20968
20969 bool
20970 arm_can_eliminate (const int from, const int to)
20971 {
20972 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20973 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20974 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20975 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20976 true);
20977 }
20978
20979 /* Emit RTL to save coprocessor registers on function entry. Returns the
20980 number of bytes pushed. */
20981
20982 static int
20983 arm_save_coproc_regs(void)
20984 {
20985 int saved_size = 0;
20986 unsigned reg;
20987 unsigned start_reg;
20988 rtx insn;
20989
20990 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20991 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20992 {
20993 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20994 insn = gen_rtx_MEM (V2SImode, insn);
20995 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20996 RTX_FRAME_RELATED_P (insn) = 1;
20997 saved_size += 8;
20998 }
20999
21000 if (TARGET_HARD_FLOAT && TARGET_VFP)
21001 {
21002 start_reg = FIRST_VFP_REGNUM;
21003
21004 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21005 {
21006 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21007 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21008 {
21009 if (start_reg != reg)
21010 saved_size += vfp_emit_fstmd (start_reg,
21011 (reg - start_reg) / 2);
21012 start_reg = reg + 2;
21013 }
21014 }
21015 if (start_reg != reg)
21016 saved_size += vfp_emit_fstmd (start_reg,
21017 (reg - start_reg) / 2);
21018 }
21019 return saved_size;
21020 }
21021
21022
21023 /* Set the Thumb frame pointer from the stack pointer. */
21024
21025 static void
21026 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21027 {
21028 HOST_WIDE_INT amount;
21029 rtx insn, dwarf;
21030
21031 amount = offsets->outgoing_args - offsets->locals_base;
21032 if (amount < 1024)
21033 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21034 stack_pointer_rtx, GEN_INT (amount)));
21035 else
21036 {
21037 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21038 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21039 expects the first two operands to be the same. */
21040 if (TARGET_THUMB2)
21041 {
21042 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21043 stack_pointer_rtx,
21044 hard_frame_pointer_rtx));
21045 }
21046 else
21047 {
21048 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21049 hard_frame_pointer_rtx,
21050 stack_pointer_rtx));
21051 }
21052 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21053 plus_constant (Pmode, stack_pointer_rtx, amount));
21054 RTX_FRAME_RELATED_P (dwarf) = 1;
21055 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21056 }
21057
21058 RTX_FRAME_RELATED_P (insn) = 1;
21059 }
21060
21061 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21062 function. */
21063 void
21064 arm_expand_prologue (void)
21065 {
21066 rtx amount;
21067 rtx insn;
21068 rtx ip_rtx;
21069 unsigned long live_regs_mask;
21070 unsigned long func_type;
21071 int fp_offset = 0;
21072 int saved_pretend_args = 0;
21073 int saved_regs = 0;
21074 unsigned HOST_WIDE_INT args_to_push;
21075 arm_stack_offsets *offsets;
21076
21077 func_type = arm_current_func_type ();
21078
21079 /* Naked functions don't have prologues. */
21080 if (IS_NAKED (func_type))
21081 return;
21082
21083 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21084 args_to_push = crtl->args.pretend_args_size;
21085
21086 /* Compute which register we will have to save onto the stack. */
21087 offsets = arm_get_frame_offsets ();
21088 live_regs_mask = offsets->saved_regs_mask;
21089
21090 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21091
21092 if (IS_STACKALIGN (func_type))
21093 {
21094 rtx r0, r1;
21095
21096 /* Handle a word-aligned stack pointer. We generate the following:
21097
21098 mov r0, sp
21099 bic r1, r0, #7
21100 mov sp, r1
21101 <save and restore r0 in normal prologue/epilogue>
21102 mov sp, r0
21103 bx lr
21104
21105 The unwinder doesn't need to know about the stack realignment.
21106 Just tell it we saved SP in r0. */
21107 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21108
21109 r0 = gen_rtx_REG (SImode, 0);
21110 r1 = gen_rtx_REG (SImode, 1);
21111
21112 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21113 RTX_FRAME_RELATED_P (insn) = 1;
21114 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21115
21116 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21117
21118 /* ??? The CFA changes here, which may cause GDB to conclude that it
21119 has entered a different function. That said, the unwind info is
21120 correct, individually, before and after this instruction because
21121 we've described the save of SP, which will override the default
21122 handling of SP as restoring from the CFA. */
21123 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21124 }
21125
21126 /* For APCS frames, if IP register is clobbered
21127 when creating frame, save that register in a special
21128 way. */
21129 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21130 {
21131 if (IS_INTERRUPT (func_type))
21132 {
21133 /* Interrupt functions must not corrupt any registers.
21134 Creating a frame pointer however, corrupts the IP
21135 register, so we must push it first. */
21136 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21137
21138 /* Do not set RTX_FRAME_RELATED_P on this insn.
21139 The dwarf stack unwinding code only wants to see one
21140 stack decrement per function, and this is not it. If
21141 this instruction is labeled as being part of the frame
21142 creation sequence then dwarf2out_frame_debug_expr will
21143 die when it encounters the assignment of IP to FP
21144 later on, since the use of SP here establishes SP as
21145 the CFA register and not IP.
21146
21147 Anyway this instruction is not really part of the stack
21148 frame creation although it is part of the prologue. */
21149 }
21150 else if (IS_NESTED (func_type))
21151 {
21152 /* The static chain register is the same as the IP register
21153 used as a scratch register during stack frame creation.
21154 To get around this need to find somewhere to store IP
21155 whilst the frame is being created. We try the following
21156 places in order:
21157
21158 1. The last argument register r3 if it is available.
21159 2. A slot on the stack above the frame if there are no
21160 arguments to push onto the stack.
21161 3. Register r3 again, after pushing the argument registers
21162 onto the stack, if this is a varargs function.
21163 4. The last slot on the stack created for the arguments to
21164 push, if this isn't a varargs function.
21165
21166 Note - we only need to tell the dwarf2 backend about the SP
21167 adjustment in the second variant; the static chain register
21168 doesn't need to be unwound, as it doesn't contain a value
21169 inherited from the caller. */
21170
21171 if (!arm_r3_live_at_start_p ())
21172 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21173 else if (args_to_push == 0)
21174 {
21175 rtx addr, dwarf;
21176
21177 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21178 saved_regs += 4;
21179
21180 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21181 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21182 fp_offset = 4;
21183
21184 /* Just tell the dwarf backend that we adjusted SP. */
21185 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21186 plus_constant (Pmode, stack_pointer_rtx,
21187 -fp_offset));
21188 RTX_FRAME_RELATED_P (insn) = 1;
21189 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21190 }
21191 else
21192 {
21193 /* Store the args on the stack. */
21194 if (cfun->machine->uses_anonymous_args)
21195 {
21196 insn
21197 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21198 (0xf0 >> (args_to_push / 4)) & 0xf);
21199 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21200 saved_pretend_args = 1;
21201 }
21202 else
21203 {
21204 rtx addr, dwarf;
21205
21206 if (args_to_push == 4)
21207 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21208 else
21209 addr
21210 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21211 plus_constant (Pmode,
21212 stack_pointer_rtx,
21213 -args_to_push));
21214
21215 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21216
21217 /* Just tell the dwarf backend that we adjusted SP. */
21218 dwarf
21219 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21220 plus_constant (Pmode, stack_pointer_rtx,
21221 -args_to_push));
21222 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21223 }
21224
21225 RTX_FRAME_RELATED_P (insn) = 1;
21226 fp_offset = args_to_push;
21227 args_to_push = 0;
21228 }
21229 }
21230
21231 insn = emit_set_insn (ip_rtx,
21232 plus_constant (Pmode, stack_pointer_rtx,
21233 fp_offset));
21234 RTX_FRAME_RELATED_P (insn) = 1;
21235 }
21236
21237 if (args_to_push)
21238 {
21239 /* Push the argument registers, or reserve space for them. */
21240 if (cfun->machine->uses_anonymous_args)
21241 insn = emit_multi_reg_push
21242 ((0xf0 >> (args_to_push / 4)) & 0xf,
21243 (0xf0 >> (args_to_push / 4)) & 0xf);
21244 else
21245 insn = emit_insn
21246 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21247 GEN_INT (- args_to_push)));
21248 RTX_FRAME_RELATED_P (insn) = 1;
21249 }
21250
21251 /* If this is an interrupt service routine, and the link register
21252 is going to be pushed, and we're not generating extra
21253 push of IP (needed when frame is needed and frame layout if apcs),
21254 subtracting four from LR now will mean that the function return
21255 can be done with a single instruction. */
21256 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21257 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21258 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21259 && TARGET_ARM)
21260 {
21261 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21262
21263 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21264 }
21265
21266 if (live_regs_mask)
21267 {
21268 unsigned long dwarf_regs_mask = live_regs_mask;
21269
21270 saved_regs += bit_count (live_regs_mask) * 4;
21271 if (optimize_size && !frame_pointer_needed
21272 && saved_regs == offsets->saved_regs - offsets->saved_args)
21273 {
21274 /* If no coprocessor registers are being pushed and we don't have
21275 to worry about a frame pointer then push extra registers to
21276 create the stack frame. This is done is a way that does not
21277 alter the frame layout, so is independent of the epilogue. */
21278 int n;
21279 int frame;
21280 n = 0;
21281 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21282 n++;
21283 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21284 if (frame && n * 4 >= frame)
21285 {
21286 n = frame / 4;
21287 live_regs_mask |= (1 << n) - 1;
21288 saved_regs += frame;
21289 }
21290 }
21291
21292 if (TARGET_LDRD
21293 && current_tune->prefer_ldrd_strd
21294 && !optimize_function_for_size_p (cfun))
21295 {
21296 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21297 if (TARGET_THUMB2)
21298 thumb2_emit_strd_push (live_regs_mask);
21299 else if (TARGET_ARM
21300 && !TARGET_APCS_FRAME
21301 && !IS_INTERRUPT (func_type))
21302 arm_emit_strd_push (live_regs_mask);
21303 else
21304 {
21305 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21306 RTX_FRAME_RELATED_P (insn) = 1;
21307 }
21308 }
21309 else
21310 {
21311 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21312 RTX_FRAME_RELATED_P (insn) = 1;
21313 }
21314 }
21315
21316 if (! IS_VOLATILE (func_type))
21317 saved_regs += arm_save_coproc_regs ();
21318
21319 if (frame_pointer_needed && TARGET_ARM)
21320 {
21321 /* Create the new frame pointer. */
21322 if (TARGET_APCS_FRAME)
21323 {
21324 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21325 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21326 RTX_FRAME_RELATED_P (insn) = 1;
21327
21328 if (IS_NESTED (func_type))
21329 {
21330 /* Recover the static chain register. */
21331 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21332 insn = gen_rtx_REG (SImode, 3);
21333 else
21334 {
21335 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21336 insn = gen_frame_mem (SImode, insn);
21337 }
21338 emit_set_insn (ip_rtx, insn);
21339 /* Add a USE to stop propagate_one_insn() from barfing. */
21340 emit_insn (gen_force_register_use (ip_rtx));
21341 }
21342 }
21343 else
21344 {
21345 insn = GEN_INT (saved_regs - 4);
21346 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21347 stack_pointer_rtx, insn));
21348 RTX_FRAME_RELATED_P (insn) = 1;
21349 }
21350 }
21351
21352 if (flag_stack_usage_info)
21353 current_function_static_stack_size
21354 = offsets->outgoing_args - offsets->saved_args;
21355
21356 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21357 {
21358 /* This add can produce multiple insns for a large constant, so we
21359 need to get tricky. */
21360 rtx_insn *last = get_last_insn ();
21361
21362 amount = GEN_INT (offsets->saved_args + saved_regs
21363 - offsets->outgoing_args);
21364
21365 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21366 amount));
21367 do
21368 {
21369 last = last ? NEXT_INSN (last) : get_insns ();
21370 RTX_FRAME_RELATED_P (last) = 1;
21371 }
21372 while (last != insn);
21373
21374 /* If the frame pointer is needed, emit a special barrier that
21375 will prevent the scheduler from moving stores to the frame
21376 before the stack adjustment. */
21377 if (frame_pointer_needed)
21378 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21379 hard_frame_pointer_rtx));
21380 }
21381
21382
21383 if (frame_pointer_needed && TARGET_THUMB2)
21384 thumb_set_frame_pointer (offsets);
21385
21386 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21387 {
21388 unsigned long mask;
21389
21390 mask = live_regs_mask;
21391 mask &= THUMB2_WORK_REGS;
21392 if (!IS_NESTED (func_type))
21393 mask |= (1 << IP_REGNUM);
21394 arm_load_pic_register (mask);
21395 }
21396
21397 /* If we are profiling, make sure no instructions are scheduled before
21398 the call to mcount. Similarly if the user has requested no
21399 scheduling in the prolog. Similarly if we want non-call exceptions
21400 using the EABI unwinder, to prevent faulting instructions from being
21401 swapped with a stack adjustment. */
21402 if (crtl->profile || !TARGET_SCHED_PROLOG
21403 || (arm_except_unwind_info (&global_options) == UI_TARGET
21404 && cfun->can_throw_non_call_exceptions))
21405 emit_insn (gen_blockage ());
21406
21407 /* If the link register is being kept alive, with the return address in it,
21408 then make sure that it does not get reused by the ce2 pass. */
21409 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21410 cfun->machine->lr_save_eliminated = 1;
21411 }
21412 \f
21413 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21414 static void
21415 arm_print_condition (FILE *stream)
21416 {
21417 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21418 {
21419 /* Branch conversion is not implemented for Thumb-2. */
21420 if (TARGET_THUMB)
21421 {
21422 output_operand_lossage ("predicated Thumb instruction");
21423 return;
21424 }
21425 if (current_insn_predicate != NULL)
21426 {
21427 output_operand_lossage
21428 ("predicated instruction in conditional sequence");
21429 return;
21430 }
21431
21432 fputs (arm_condition_codes[arm_current_cc], stream);
21433 }
21434 else if (current_insn_predicate)
21435 {
21436 enum arm_cond_code code;
21437
21438 if (TARGET_THUMB1)
21439 {
21440 output_operand_lossage ("predicated Thumb instruction");
21441 return;
21442 }
21443
21444 code = get_arm_condition_code (current_insn_predicate);
21445 fputs (arm_condition_codes[code], stream);
21446 }
21447 }
21448
21449
21450 /* Globally reserved letters: acln
21451 Puncutation letters currently used: @_|?().!#
21452 Lower case letters currently used: bcdefhimpqtvwxyz
21453 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21454 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21455
21456 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21457
21458 If CODE is 'd', then the X is a condition operand and the instruction
21459 should only be executed if the condition is true.
21460 if CODE is 'D', then the X is a condition operand and the instruction
21461 should only be executed if the condition is false: however, if the mode
21462 of the comparison is CCFPEmode, then always execute the instruction -- we
21463 do this because in these circumstances !GE does not necessarily imply LT;
21464 in these cases the instruction pattern will take care to make sure that
21465 an instruction containing %d will follow, thereby undoing the effects of
21466 doing this instruction unconditionally.
21467 If CODE is 'N' then X is a floating point operand that must be negated
21468 before output.
21469 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21470 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21471 static void
21472 arm_print_operand (FILE *stream, rtx x, int code)
21473 {
21474 switch (code)
21475 {
21476 case '@':
21477 fputs (ASM_COMMENT_START, stream);
21478 return;
21479
21480 case '_':
21481 fputs (user_label_prefix, stream);
21482 return;
21483
21484 case '|':
21485 fputs (REGISTER_PREFIX, stream);
21486 return;
21487
21488 case '?':
21489 arm_print_condition (stream);
21490 return;
21491
21492 case '(':
21493 /* Nothing in unified syntax, otherwise the current condition code. */
21494 if (!TARGET_UNIFIED_ASM)
21495 arm_print_condition (stream);
21496 break;
21497
21498 case ')':
21499 /* The current condition code in unified syntax, otherwise nothing. */
21500 if (TARGET_UNIFIED_ASM)
21501 arm_print_condition (stream);
21502 break;
21503
21504 case '.':
21505 /* The current condition code for a condition code setting instruction.
21506 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21507 if (TARGET_UNIFIED_ASM)
21508 {
21509 fputc('s', stream);
21510 arm_print_condition (stream);
21511 }
21512 else
21513 {
21514 arm_print_condition (stream);
21515 fputc('s', stream);
21516 }
21517 return;
21518
21519 case '!':
21520 /* If the instruction is conditionally executed then print
21521 the current condition code, otherwise print 's'. */
21522 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21523 if (current_insn_predicate)
21524 arm_print_condition (stream);
21525 else
21526 fputc('s', stream);
21527 break;
21528
21529 /* %# is a "break" sequence. It doesn't output anything, but is used to
21530 separate e.g. operand numbers from following text, if that text consists
21531 of further digits which we don't want to be part of the operand
21532 number. */
21533 case '#':
21534 return;
21535
21536 case 'N':
21537 {
21538 REAL_VALUE_TYPE r;
21539 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21540 r = real_value_negate (&r);
21541 fprintf (stream, "%s", fp_const_from_val (&r));
21542 }
21543 return;
21544
21545 /* An integer or symbol address without a preceding # sign. */
21546 case 'c':
21547 switch (GET_CODE (x))
21548 {
21549 case CONST_INT:
21550 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21551 break;
21552
21553 case SYMBOL_REF:
21554 output_addr_const (stream, x);
21555 break;
21556
21557 case CONST:
21558 if (GET_CODE (XEXP (x, 0)) == PLUS
21559 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21560 {
21561 output_addr_const (stream, x);
21562 break;
21563 }
21564 /* Fall through. */
21565
21566 default:
21567 output_operand_lossage ("Unsupported operand for code '%c'", code);
21568 }
21569 return;
21570
21571 /* An integer that we want to print in HEX. */
21572 case 'x':
21573 switch (GET_CODE (x))
21574 {
21575 case CONST_INT:
21576 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21577 break;
21578
21579 default:
21580 output_operand_lossage ("Unsupported operand for code '%c'", code);
21581 }
21582 return;
21583
21584 case 'B':
21585 if (CONST_INT_P (x))
21586 {
21587 HOST_WIDE_INT val;
21588 val = ARM_SIGN_EXTEND (~INTVAL (x));
21589 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21590 }
21591 else
21592 {
21593 putc ('~', stream);
21594 output_addr_const (stream, x);
21595 }
21596 return;
21597
21598 case 'b':
21599 /* Print the log2 of a CONST_INT. */
21600 {
21601 HOST_WIDE_INT val;
21602
21603 if (!CONST_INT_P (x)
21604 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21605 output_operand_lossage ("Unsupported operand for code '%c'", code);
21606 else
21607 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21608 }
21609 return;
21610
21611 case 'L':
21612 /* The low 16 bits of an immediate constant. */
21613 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21614 return;
21615
21616 case 'i':
21617 fprintf (stream, "%s", arithmetic_instr (x, 1));
21618 return;
21619
21620 case 'I':
21621 fprintf (stream, "%s", arithmetic_instr (x, 0));
21622 return;
21623
21624 case 'S':
21625 {
21626 HOST_WIDE_INT val;
21627 const char *shift;
21628
21629 shift = shift_op (x, &val);
21630
21631 if (shift)
21632 {
21633 fprintf (stream, ", %s ", shift);
21634 if (val == -1)
21635 arm_print_operand (stream, XEXP (x, 1), 0);
21636 else
21637 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21638 }
21639 }
21640 return;
21641
21642 /* An explanation of the 'Q', 'R' and 'H' register operands:
21643
21644 In a pair of registers containing a DI or DF value the 'Q'
21645 operand returns the register number of the register containing
21646 the least significant part of the value. The 'R' operand returns
21647 the register number of the register containing the most
21648 significant part of the value.
21649
21650 The 'H' operand returns the higher of the two register numbers.
21651 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21652 same as the 'Q' operand, since the most significant part of the
21653 value is held in the lower number register. The reverse is true
21654 on systems where WORDS_BIG_ENDIAN is false.
21655
21656 The purpose of these operands is to distinguish between cases
21657 where the endian-ness of the values is important (for example
21658 when they are added together), and cases where the endian-ness
21659 is irrelevant, but the order of register operations is important.
21660 For example when loading a value from memory into a register
21661 pair, the endian-ness does not matter. Provided that the value
21662 from the lower memory address is put into the lower numbered
21663 register, and the value from the higher address is put into the
21664 higher numbered register, the load will work regardless of whether
21665 the value being loaded is big-wordian or little-wordian. The
21666 order of the two register loads can matter however, if the address
21667 of the memory location is actually held in one of the registers
21668 being overwritten by the load.
21669
21670 The 'Q' and 'R' constraints are also available for 64-bit
21671 constants. */
21672 case 'Q':
21673 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21674 {
21675 rtx part = gen_lowpart (SImode, x);
21676 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21677 return;
21678 }
21679
21680 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21681 {
21682 output_operand_lossage ("invalid operand for code '%c'", code);
21683 return;
21684 }
21685
21686 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21687 return;
21688
21689 case 'R':
21690 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21691 {
21692 machine_mode mode = GET_MODE (x);
21693 rtx part;
21694
21695 if (mode == VOIDmode)
21696 mode = DImode;
21697 part = gen_highpart_mode (SImode, mode, x);
21698 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21699 return;
21700 }
21701
21702 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21703 {
21704 output_operand_lossage ("invalid operand for code '%c'", code);
21705 return;
21706 }
21707
21708 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21709 return;
21710
21711 case 'H':
21712 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21713 {
21714 output_operand_lossage ("invalid operand for code '%c'", code);
21715 return;
21716 }
21717
21718 asm_fprintf (stream, "%r", REGNO (x) + 1);
21719 return;
21720
21721 case 'J':
21722 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21723 {
21724 output_operand_lossage ("invalid operand for code '%c'", code);
21725 return;
21726 }
21727
21728 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21729 return;
21730
21731 case 'K':
21732 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21733 {
21734 output_operand_lossage ("invalid operand for code '%c'", code);
21735 return;
21736 }
21737
21738 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21739 return;
21740
21741 case 'm':
21742 asm_fprintf (stream, "%r",
21743 REG_P (XEXP (x, 0))
21744 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21745 return;
21746
21747 case 'M':
21748 asm_fprintf (stream, "{%r-%r}",
21749 REGNO (x),
21750 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21751 return;
21752
21753 /* Like 'M', but writing doubleword vector registers, for use by Neon
21754 insns. */
21755 case 'h':
21756 {
21757 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21758 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21759 if (numregs == 1)
21760 asm_fprintf (stream, "{d%d}", regno);
21761 else
21762 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21763 }
21764 return;
21765
21766 case 'd':
21767 /* CONST_TRUE_RTX means always -- that's the default. */
21768 if (x == const_true_rtx)
21769 return;
21770
21771 if (!COMPARISON_P (x))
21772 {
21773 output_operand_lossage ("invalid operand for code '%c'", code);
21774 return;
21775 }
21776
21777 fputs (arm_condition_codes[get_arm_condition_code (x)],
21778 stream);
21779 return;
21780
21781 case 'D':
21782 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21783 want to do that. */
21784 if (x == const_true_rtx)
21785 {
21786 output_operand_lossage ("instruction never executed");
21787 return;
21788 }
21789 if (!COMPARISON_P (x))
21790 {
21791 output_operand_lossage ("invalid operand for code '%c'", code);
21792 return;
21793 }
21794
21795 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21796 (get_arm_condition_code (x))],
21797 stream);
21798 return;
21799
21800 case 's':
21801 case 'V':
21802 case 'W':
21803 case 'X':
21804 case 'Y':
21805 case 'Z':
21806 /* Former Maverick support, removed after GCC-4.7. */
21807 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21808 return;
21809
21810 case 'U':
21811 if (!REG_P (x)
21812 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21813 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21814 /* Bad value for wCG register number. */
21815 {
21816 output_operand_lossage ("invalid operand for code '%c'", code);
21817 return;
21818 }
21819
21820 else
21821 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21822 return;
21823
21824 /* Print an iWMMXt control register name. */
21825 case 'w':
21826 if (!CONST_INT_P (x)
21827 || INTVAL (x) < 0
21828 || INTVAL (x) >= 16)
21829 /* Bad value for wC register number. */
21830 {
21831 output_operand_lossage ("invalid operand for code '%c'", code);
21832 return;
21833 }
21834
21835 else
21836 {
21837 static const char * wc_reg_names [16] =
21838 {
21839 "wCID", "wCon", "wCSSF", "wCASF",
21840 "wC4", "wC5", "wC6", "wC7",
21841 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21842 "wC12", "wC13", "wC14", "wC15"
21843 };
21844
21845 fputs (wc_reg_names [INTVAL (x)], stream);
21846 }
21847 return;
21848
21849 /* Print the high single-precision register of a VFP double-precision
21850 register. */
21851 case 'p':
21852 {
21853 machine_mode mode = GET_MODE (x);
21854 int regno;
21855
21856 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21857 {
21858 output_operand_lossage ("invalid operand for code '%c'", code);
21859 return;
21860 }
21861
21862 regno = REGNO (x);
21863 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21864 {
21865 output_operand_lossage ("invalid operand for code '%c'", code);
21866 return;
21867 }
21868
21869 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21870 }
21871 return;
21872
21873 /* Print a VFP/Neon double precision or quad precision register name. */
21874 case 'P':
21875 case 'q':
21876 {
21877 machine_mode mode = GET_MODE (x);
21878 int is_quad = (code == 'q');
21879 int regno;
21880
21881 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21882 {
21883 output_operand_lossage ("invalid operand for code '%c'", code);
21884 return;
21885 }
21886
21887 if (!REG_P (x)
21888 || !IS_VFP_REGNUM (REGNO (x)))
21889 {
21890 output_operand_lossage ("invalid operand for code '%c'", code);
21891 return;
21892 }
21893
21894 regno = REGNO (x);
21895 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21896 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21897 {
21898 output_operand_lossage ("invalid operand for code '%c'", code);
21899 return;
21900 }
21901
21902 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21903 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21904 }
21905 return;
21906
21907 /* These two codes print the low/high doubleword register of a Neon quad
21908 register, respectively. For pair-structure types, can also print
21909 low/high quadword registers. */
21910 case 'e':
21911 case 'f':
21912 {
21913 machine_mode mode = GET_MODE (x);
21914 int regno;
21915
21916 if ((GET_MODE_SIZE (mode) != 16
21917 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21918 {
21919 output_operand_lossage ("invalid operand for code '%c'", code);
21920 return;
21921 }
21922
21923 regno = REGNO (x);
21924 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21925 {
21926 output_operand_lossage ("invalid operand for code '%c'", code);
21927 return;
21928 }
21929
21930 if (GET_MODE_SIZE (mode) == 16)
21931 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21932 + (code == 'f' ? 1 : 0));
21933 else
21934 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21935 + (code == 'f' ? 1 : 0));
21936 }
21937 return;
21938
21939 /* Print a VFPv3 floating-point constant, represented as an integer
21940 index. */
21941 case 'G':
21942 {
21943 int index = vfp3_const_double_index (x);
21944 gcc_assert (index != -1);
21945 fprintf (stream, "%d", index);
21946 }
21947 return;
21948
21949 /* Print bits representing opcode features for Neon.
21950
21951 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21952 and polynomials as unsigned.
21953
21954 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21955
21956 Bit 2 is 1 for rounding functions, 0 otherwise. */
21957
21958 /* Identify the type as 's', 'u', 'p' or 'f'. */
21959 case 'T':
21960 {
21961 HOST_WIDE_INT bits = INTVAL (x);
21962 fputc ("uspf"[bits & 3], stream);
21963 }
21964 return;
21965
21966 /* Likewise, but signed and unsigned integers are both 'i'. */
21967 case 'F':
21968 {
21969 HOST_WIDE_INT bits = INTVAL (x);
21970 fputc ("iipf"[bits & 3], stream);
21971 }
21972 return;
21973
21974 /* As for 'T', but emit 'u' instead of 'p'. */
21975 case 't':
21976 {
21977 HOST_WIDE_INT bits = INTVAL (x);
21978 fputc ("usuf"[bits & 3], stream);
21979 }
21980 return;
21981
21982 /* Bit 2: rounding (vs none). */
21983 case 'O':
21984 {
21985 HOST_WIDE_INT bits = INTVAL (x);
21986 fputs ((bits & 4) != 0 ? "r" : "", stream);
21987 }
21988 return;
21989
21990 /* Memory operand for vld1/vst1 instruction. */
21991 case 'A':
21992 {
21993 rtx addr;
21994 bool postinc = FALSE;
21995 rtx postinc_reg = NULL;
21996 unsigned align, memsize, align_bits;
21997
21998 gcc_assert (MEM_P (x));
21999 addr = XEXP (x, 0);
22000 if (GET_CODE (addr) == POST_INC)
22001 {
22002 postinc = 1;
22003 addr = XEXP (addr, 0);
22004 }
22005 if (GET_CODE (addr) == POST_MODIFY)
22006 {
22007 postinc_reg = XEXP( XEXP (addr, 1), 1);
22008 addr = XEXP (addr, 0);
22009 }
22010 asm_fprintf (stream, "[%r", REGNO (addr));
22011
22012 /* We know the alignment of this access, so we can emit a hint in the
22013 instruction (for some alignments) as an aid to the memory subsystem
22014 of the target. */
22015 align = MEM_ALIGN (x) >> 3;
22016 memsize = MEM_SIZE (x);
22017
22018 /* Only certain alignment specifiers are supported by the hardware. */
22019 if (memsize == 32 && (align % 32) == 0)
22020 align_bits = 256;
22021 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22022 align_bits = 128;
22023 else if (memsize >= 8 && (align % 8) == 0)
22024 align_bits = 64;
22025 else
22026 align_bits = 0;
22027
22028 if (align_bits != 0)
22029 asm_fprintf (stream, ":%d", align_bits);
22030
22031 asm_fprintf (stream, "]");
22032
22033 if (postinc)
22034 fputs("!", stream);
22035 if (postinc_reg)
22036 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22037 }
22038 return;
22039
22040 case 'C':
22041 {
22042 rtx addr;
22043
22044 gcc_assert (MEM_P (x));
22045 addr = XEXP (x, 0);
22046 gcc_assert (REG_P (addr));
22047 asm_fprintf (stream, "[%r]", REGNO (addr));
22048 }
22049 return;
22050
22051 /* Translate an S register number into a D register number and element index. */
22052 case 'y':
22053 {
22054 machine_mode mode = GET_MODE (x);
22055 int regno;
22056
22057 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22058 {
22059 output_operand_lossage ("invalid operand for code '%c'", code);
22060 return;
22061 }
22062
22063 regno = REGNO (x);
22064 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22065 {
22066 output_operand_lossage ("invalid operand for code '%c'", code);
22067 return;
22068 }
22069
22070 regno = regno - FIRST_VFP_REGNUM;
22071 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22072 }
22073 return;
22074
22075 case 'v':
22076 gcc_assert (CONST_DOUBLE_P (x));
22077 int result;
22078 result = vfp3_const_double_for_fract_bits (x);
22079 if (result == 0)
22080 result = vfp3_const_double_for_bits (x);
22081 fprintf (stream, "#%d", result);
22082 return;
22083
22084 /* Register specifier for vld1.16/vst1.16. Translate the S register
22085 number into a D register number and element index. */
22086 case 'z':
22087 {
22088 machine_mode mode = GET_MODE (x);
22089 int regno;
22090
22091 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22092 {
22093 output_operand_lossage ("invalid operand for code '%c'", code);
22094 return;
22095 }
22096
22097 regno = REGNO (x);
22098 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22099 {
22100 output_operand_lossage ("invalid operand for code '%c'", code);
22101 return;
22102 }
22103
22104 regno = regno - FIRST_VFP_REGNUM;
22105 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22106 }
22107 return;
22108
22109 default:
22110 if (x == 0)
22111 {
22112 output_operand_lossage ("missing operand");
22113 return;
22114 }
22115
22116 switch (GET_CODE (x))
22117 {
22118 case REG:
22119 asm_fprintf (stream, "%r", REGNO (x));
22120 break;
22121
22122 case MEM:
22123 output_memory_reference_mode = GET_MODE (x);
22124 output_address (XEXP (x, 0));
22125 break;
22126
22127 case CONST_DOUBLE:
22128 {
22129 char fpstr[20];
22130 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22131 sizeof (fpstr), 0, 1);
22132 fprintf (stream, "#%s", fpstr);
22133 }
22134 break;
22135
22136 default:
22137 gcc_assert (GET_CODE (x) != NEG);
22138 fputc ('#', stream);
22139 if (GET_CODE (x) == HIGH)
22140 {
22141 fputs (":lower16:", stream);
22142 x = XEXP (x, 0);
22143 }
22144
22145 output_addr_const (stream, x);
22146 break;
22147 }
22148 }
22149 }
22150 \f
22151 /* Target hook for printing a memory address. */
22152 static void
22153 arm_print_operand_address (FILE *stream, rtx x)
22154 {
22155 if (TARGET_32BIT)
22156 {
22157 int is_minus = GET_CODE (x) == MINUS;
22158
22159 if (REG_P (x))
22160 asm_fprintf (stream, "[%r]", REGNO (x));
22161 else if (GET_CODE (x) == PLUS || is_minus)
22162 {
22163 rtx base = XEXP (x, 0);
22164 rtx index = XEXP (x, 1);
22165 HOST_WIDE_INT offset = 0;
22166 if (!REG_P (base)
22167 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22168 {
22169 /* Ensure that BASE is a register. */
22170 /* (one of them must be). */
22171 /* Also ensure the SP is not used as in index register. */
22172 std::swap (base, index);
22173 }
22174 switch (GET_CODE (index))
22175 {
22176 case CONST_INT:
22177 offset = INTVAL (index);
22178 if (is_minus)
22179 offset = -offset;
22180 asm_fprintf (stream, "[%r, #%wd]",
22181 REGNO (base), offset);
22182 break;
22183
22184 case REG:
22185 asm_fprintf (stream, "[%r, %s%r]",
22186 REGNO (base), is_minus ? "-" : "",
22187 REGNO (index));
22188 break;
22189
22190 case MULT:
22191 case ASHIFTRT:
22192 case LSHIFTRT:
22193 case ASHIFT:
22194 case ROTATERT:
22195 {
22196 asm_fprintf (stream, "[%r, %s%r",
22197 REGNO (base), is_minus ? "-" : "",
22198 REGNO (XEXP (index, 0)));
22199 arm_print_operand (stream, index, 'S');
22200 fputs ("]", stream);
22201 break;
22202 }
22203
22204 default:
22205 gcc_unreachable ();
22206 }
22207 }
22208 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22209 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22210 {
22211 extern machine_mode output_memory_reference_mode;
22212
22213 gcc_assert (REG_P (XEXP (x, 0)));
22214
22215 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22216 asm_fprintf (stream, "[%r, #%s%d]!",
22217 REGNO (XEXP (x, 0)),
22218 GET_CODE (x) == PRE_DEC ? "-" : "",
22219 GET_MODE_SIZE (output_memory_reference_mode));
22220 else
22221 asm_fprintf (stream, "[%r], #%s%d",
22222 REGNO (XEXP (x, 0)),
22223 GET_CODE (x) == POST_DEC ? "-" : "",
22224 GET_MODE_SIZE (output_memory_reference_mode));
22225 }
22226 else if (GET_CODE (x) == PRE_MODIFY)
22227 {
22228 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22229 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22230 asm_fprintf (stream, "#%wd]!",
22231 INTVAL (XEXP (XEXP (x, 1), 1)));
22232 else
22233 asm_fprintf (stream, "%r]!",
22234 REGNO (XEXP (XEXP (x, 1), 1)));
22235 }
22236 else if (GET_CODE (x) == POST_MODIFY)
22237 {
22238 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22239 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22240 asm_fprintf (stream, "#%wd",
22241 INTVAL (XEXP (XEXP (x, 1), 1)));
22242 else
22243 asm_fprintf (stream, "%r",
22244 REGNO (XEXP (XEXP (x, 1), 1)));
22245 }
22246 else output_addr_const (stream, x);
22247 }
22248 else
22249 {
22250 if (REG_P (x))
22251 asm_fprintf (stream, "[%r]", REGNO (x));
22252 else if (GET_CODE (x) == POST_INC)
22253 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22254 else if (GET_CODE (x) == PLUS)
22255 {
22256 gcc_assert (REG_P (XEXP (x, 0)));
22257 if (CONST_INT_P (XEXP (x, 1)))
22258 asm_fprintf (stream, "[%r, #%wd]",
22259 REGNO (XEXP (x, 0)),
22260 INTVAL (XEXP (x, 1)));
22261 else
22262 asm_fprintf (stream, "[%r, %r]",
22263 REGNO (XEXP (x, 0)),
22264 REGNO (XEXP (x, 1)));
22265 }
22266 else
22267 output_addr_const (stream, x);
22268 }
22269 }
22270 \f
22271 /* Target hook for indicating whether a punctuation character for
22272 TARGET_PRINT_OPERAND is valid. */
22273 static bool
22274 arm_print_operand_punct_valid_p (unsigned char code)
22275 {
22276 return (code == '@' || code == '|' || code == '.'
22277 || code == '(' || code == ')' || code == '#'
22278 || (TARGET_32BIT && (code == '?'))
22279 || (TARGET_THUMB2 && (code == '!'))
22280 || (TARGET_THUMB && (code == '_')));
22281 }
22282 \f
22283 /* Target hook for assembling integer objects. The ARM version needs to
22284 handle word-sized values specially. */
22285 static bool
22286 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22287 {
22288 machine_mode mode;
22289
22290 if (size == UNITS_PER_WORD && aligned_p)
22291 {
22292 fputs ("\t.word\t", asm_out_file);
22293 output_addr_const (asm_out_file, x);
22294
22295 /* Mark symbols as position independent. We only do this in the
22296 .text segment, not in the .data segment. */
22297 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22298 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22299 {
22300 /* See legitimize_pic_address for an explanation of the
22301 TARGET_VXWORKS_RTP check. */
22302 if (!arm_pic_data_is_text_relative
22303 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22304 fputs ("(GOT)", asm_out_file);
22305 else
22306 fputs ("(GOTOFF)", asm_out_file);
22307 }
22308 fputc ('\n', asm_out_file);
22309 return true;
22310 }
22311
22312 mode = GET_MODE (x);
22313
22314 if (arm_vector_mode_supported_p (mode))
22315 {
22316 int i, units;
22317
22318 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22319
22320 units = CONST_VECTOR_NUNITS (x);
22321 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22322
22323 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22324 for (i = 0; i < units; i++)
22325 {
22326 rtx elt = CONST_VECTOR_ELT (x, i);
22327 assemble_integer
22328 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22329 }
22330 else
22331 for (i = 0; i < units; i++)
22332 {
22333 rtx elt = CONST_VECTOR_ELT (x, i);
22334 REAL_VALUE_TYPE rval;
22335
22336 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22337
22338 assemble_real
22339 (rval, GET_MODE_INNER (mode),
22340 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22341 }
22342
22343 return true;
22344 }
22345
22346 return default_assemble_integer (x, size, aligned_p);
22347 }
22348
22349 static void
22350 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22351 {
22352 section *s;
22353
22354 if (!TARGET_AAPCS_BASED)
22355 {
22356 (is_ctor ?
22357 default_named_section_asm_out_constructor
22358 : default_named_section_asm_out_destructor) (symbol, priority);
22359 return;
22360 }
22361
22362 /* Put these in the .init_array section, using a special relocation. */
22363 if (priority != DEFAULT_INIT_PRIORITY)
22364 {
22365 char buf[18];
22366 sprintf (buf, "%s.%.5u",
22367 is_ctor ? ".init_array" : ".fini_array",
22368 priority);
22369 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22370 }
22371 else if (is_ctor)
22372 s = ctors_section;
22373 else
22374 s = dtors_section;
22375
22376 switch_to_section (s);
22377 assemble_align (POINTER_SIZE);
22378 fputs ("\t.word\t", asm_out_file);
22379 output_addr_const (asm_out_file, symbol);
22380 fputs ("(target1)\n", asm_out_file);
22381 }
22382
22383 /* Add a function to the list of static constructors. */
22384
22385 static void
22386 arm_elf_asm_constructor (rtx symbol, int priority)
22387 {
22388 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22389 }
22390
22391 /* Add a function to the list of static destructors. */
22392
22393 static void
22394 arm_elf_asm_destructor (rtx symbol, int priority)
22395 {
22396 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22397 }
22398 \f
22399 /* A finite state machine takes care of noticing whether or not instructions
22400 can be conditionally executed, and thus decrease execution time and code
22401 size by deleting branch instructions. The fsm is controlled by
22402 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22403
22404 /* The state of the fsm controlling condition codes are:
22405 0: normal, do nothing special
22406 1: make ASM_OUTPUT_OPCODE not output this instruction
22407 2: make ASM_OUTPUT_OPCODE not output this instruction
22408 3: make instructions conditional
22409 4: make instructions conditional
22410
22411 State transitions (state->state by whom under condition):
22412 0 -> 1 final_prescan_insn if the `target' is a label
22413 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22414 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22415 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22416 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22417 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22418 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22419 (the target insn is arm_target_insn).
22420
22421 If the jump clobbers the conditions then we use states 2 and 4.
22422
22423 A similar thing can be done with conditional return insns.
22424
22425 XXX In case the `target' is an unconditional branch, this conditionalising
22426 of the instructions always reduces code size, but not always execution
22427 time. But then, I want to reduce the code size to somewhere near what
22428 /bin/cc produces. */
22429
22430 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22431 instructions. When a COND_EXEC instruction is seen the subsequent
22432 instructions are scanned so that multiple conditional instructions can be
22433 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22434 specify the length and true/false mask for the IT block. These will be
22435 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22436
22437 /* Returns the index of the ARM condition code string in
22438 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22439 COMPARISON should be an rtx like `(eq (...) (...))'. */
22440
22441 enum arm_cond_code
22442 maybe_get_arm_condition_code (rtx comparison)
22443 {
22444 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22445 enum arm_cond_code code;
22446 enum rtx_code comp_code = GET_CODE (comparison);
22447
22448 if (GET_MODE_CLASS (mode) != MODE_CC)
22449 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22450 XEXP (comparison, 1));
22451
22452 switch (mode)
22453 {
22454 case CC_DNEmode: code = ARM_NE; goto dominance;
22455 case CC_DEQmode: code = ARM_EQ; goto dominance;
22456 case CC_DGEmode: code = ARM_GE; goto dominance;
22457 case CC_DGTmode: code = ARM_GT; goto dominance;
22458 case CC_DLEmode: code = ARM_LE; goto dominance;
22459 case CC_DLTmode: code = ARM_LT; goto dominance;
22460 case CC_DGEUmode: code = ARM_CS; goto dominance;
22461 case CC_DGTUmode: code = ARM_HI; goto dominance;
22462 case CC_DLEUmode: code = ARM_LS; goto dominance;
22463 case CC_DLTUmode: code = ARM_CC;
22464
22465 dominance:
22466 if (comp_code == EQ)
22467 return ARM_INVERSE_CONDITION_CODE (code);
22468 if (comp_code == NE)
22469 return code;
22470 return ARM_NV;
22471
22472 case CC_NOOVmode:
22473 switch (comp_code)
22474 {
22475 case NE: return ARM_NE;
22476 case EQ: return ARM_EQ;
22477 case GE: return ARM_PL;
22478 case LT: return ARM_MI;
22479 default: return ARM_NV;
22480 }
22481
22482 case CC_Zmode:
22483 switch (comp_code)
22484 {
22485 case NE: return ARM_NE;
22486 case EQ: return ARM_EQ;
22487 default: return ARM_NV;
22488 }
22489
22490 case CC_Nmode:
22491 switch (comp_code)
22492 {
22493 case NE: return ARM_MI;
22494 case EQ: return ARM_PL;
22495 default: return ARM_NV;
22496 }
22497
22498 case CCFPEmode:
22499 case CCFPmode:
22500 /* We can handle all cases except UNEQ and LTGT. */
22501 switch (comp_code)
22502 {
22503 case GE: return ARM_GE;
22504 case GT: return ARM_GT;
22505 case LE: return ARM_LS;
22506 case LT: return ARM_MI;
22507 case NE: return ARM_NE;
22508 case EQ: return ARM_EQ;
22509 case ORDERED: return ARM_VC;
22510 case UNORDERED: return ARM_VS;
22511 case UNLT: return ARM_LT;
22512 case UNLE: return ARM_LE;
22513 case UNGT: return ARM_HI;
22514 case UNGE: return ARM_PL;
22515 /* UNEQ and LTGT do not have a representation. */
22516 case UNEQ: /* Fall through. */
22517 case LTGT: /* Fall through. */
22518 default: return ARM_NV;
22519 }
22520
22521 case CC_SWPmode:
22522 switch (comp_code)
22523 {
22524 case NE: return ARM_NE;
22525 case EQ: return ARM_EQ;
22526 case GE: return ARM_LE;
22527 case GT: return ARM_LT;
22528 case LE: return ARM_GE;
22529 case LT: return ARM_GT;
22530 case GEU: return ARM_LS;
22531 case GTU: return ARM_CC;
22532 case LEU: return ARM_CS;
22533 case LTU: return ARM_HI;
22534 default: return ARM_NV;
22535 }
22536
22537 case CC_Cmode:
22538 switch (comp_code)
22539 {
22540 case LTU: return ARM_CS;
22541 case GEU: return ARM_CC;
22542 default: return ARM_NV;
22543 }
22544
22545 case CC_CZmode:
22546 switch (comp_code)
22547 {
22548 case NE: return ARM_NE;
22549 case EQ: return ARM_EQ;
22550 case GEU: return ARM_CS;
22551 case GTU: return ARM_HI;
22552 case LEU: return ARM_LS;
22553 case LTU: return ARM_CC;
22554 default: return ARM_NV;
22555 }
22556
22557 case CC_NCVmode:
22558 switch (comp_code)
22559 {
22560 case GE: return ARM_GE;
22561 case LT: return ARM_LT;
22562 case GEU: return ARM_CS;
22563 case LTU: return ARM_CC;
22564 default: return ARM_NV;
22565 }
22566
22567 case CCmode:
22568 switch (comp_code)
22569 {
22570 case NE: return ARM_NE;
22571 case EQ: return ARM_EQ;
22572 case GE: return ARM_GE;
22573 case GT: return ARM_GT;
22574 case LE: return ARM_LE;
22575 case LT: return ARM_LT;
22576 case GEU: return ARM_CS;
22577 case GTU: return ARM_HI;
22578 case LEU: return ARM_LS;
22579 case LTU: return ARM_CC;
22580 default: return ARM_NV;
22581 }
22582
22583 default: gcc_unreachable ();
22584 }
22585 }
22586
22587 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22588 static enum arm_cond_code
22589 get_arm_condition_code (rtx comparison)
22590 {
22591 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22592 gcc_assert (code != ARM_NV);
22593 return code;
22594 }
22595
22596 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22597 instructions. */
22598 void
22599 thumb2_final_prescan_insn (rtx_insn *insn)
22600 {
22601 rtx_insn *first_insn = insn;
22602 rtx body = PATTERN (insn);
22603 rtx predicate;
22604 enum arm_cond_code code;
22605 int n;
22606 int mask;
22607 int max;
22608
22609 /* max_insns_skipped in the tune was already taken into account in the
22610 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22611 just emit the IT blocks as we can. It does not make sense to split
22612 the IT blocks. */
22613 max = MAX_INSN_PER_IT_BLOCK;
22614
22615 /* Remove the previous insn from the count of insns to be output. */
22616 if (arm_condexec_count)
22617 arm_condexec_count--;
22618
22619 /* Nothing to do if we are already inside a conditional block. */
22620 if (arm_condexec_count)
22621 return;
22622
22623 if (GET_CODE (body) != COND_EXEC)
22624 return;
22625
22626 /* Conditional jumps are implemented directly. */
22627 if (JUMP_P (insn))
22628 return;
22629
22630 predicate = COND_EXEC_TEST (body);
22631 arm_current_cc = get_arm_condition_code (predicate);
22632
22633 n = get_attr_ce_count (insn);
22634 arm_condexec_count = 1;
22635 arm_condexec_mask = (1 << n) - 1;
22636 arm_condexec_masklen = n;
22637 /* See if subsequent instructions can be combined into the same block. */
22638 for (;;)
22639 {
22640 insn = next_nonnote_insn (insn);
22641
22642 /* Jumping into the middle of an IT block is illegal, so a label or
22643 barrier terminates the block. */
22644 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22645 break;
22646
22647 body = PATTERN (insn);
22648 /* USE and CLOBBER aren't really insns, so just skip them. */
22649 if (GET_CODE (body) == USE
22650 || GET_CODE (body) == CLOBBER)
22651 continue;
22652
22653 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22654 if (GET_CODE (body) != COND_EXEC)
22655 break;
22656 /* Maximum number of conditionally executed instructions in a block. */
22657 n = get_attr_ce_count (insn);
22658 if (arm_condexec_masklen + n > max)
22659 break;
22660
22661 predicate = COND_EXEC_TEST (body);
22662 code = get_arm_condition_code (predicate);
22663 mask = (1 << n) - 1;
22664 if (arm_current_cc == code)
22665 arm_condexec_mask |= (mask << arm_condexec_masklen);
22666 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22667 break;
22668
22669 arm_condexec_count++;
22670 arm_condexec_masklen += n;
22671
22672 /* A jump must be the last instruction in a conditional block. */
22673 if (JUMP_P (insn))
22674 break;
22675 }
22676 /* Restore recog_data (getting the attributes of other insns can
22677 destroy this array, but final.c assumes that it remains intact
22678 across this call). */
22679 extract_constrain_insn_cached (first_insn);
22680 }
22681
22682 void
22683 arm_final_prescan_insn (rtx_insn *insn)
22684 {
22685 /* BODY will hold the body of INSN. */
22686 rtx body = PATTERN (insn);
22687
22688 /* This will be 1 if trying to repeat the trick, and things need to be
22689 reversed if it appears to fail. */
22690 int reverse = 0;
22691
22692 /* If we start with a return insn, we only succeed if we find another one. */
22693 int seeking_return = 0;
22694 enum rtx_code return_code = UNKNOWN;
22695
22696 /* START_INSN will hold the insn from where we start looking. This is the
22697 first insn after the following code_label if REVERSE is true. */
22698 rtx_insn *start_insn = insn;
22699
22700 /* If in state 4, check if the target branch is reached, in order to
22701 change back to state 0. */
22702 if (arm_ccfsm_state == 4)
22703 {
22704 if (insn == arm_target_insn)
22705 {
22706 arm_target_insn = NULL;
22707 arm_ccfsm_state = 0;
22708 }
22709 return;
22710 }
22711
22712 /* If in state 3, it is possible to repeat the trick, if this insn is an
22713 unconditional branch to a label, and immediately following this branch
22714 is the previous target label which is only used once, and the label this
22715 branch jumps to is not too far off. */
22716 if (arm_ccfsm_state == 3)
22717 {
22718 if (simplejump_p (insn))
22719 {
22720 start_insn = next_nonnote_insn (start_insn);
22721 if (BARRIER_P (start_insn))
22722 {
22723 /* XXX Isn't this always a barrier? */
22724 start_insn = next_nonnote_insn (start_insn);
22725 }
22726 if (LABEL_P (start_insn)
22727 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22728 && LABEL_NUSES (start_insn) == 1)
22729 reverse = TRUE;
22730 else
22731 return;
22732 }
22733 else if (ANY_RETURN_P (body))
22734 {
22735 start_insn = next_nonnote_insn (start_insn);
22736 if (BARRIER_P (start_insn))
22737 start_insn = next_nonnote_insn (start_insn);
22738 if (LABEL_P (start_insn)
22739 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22740 && LABEL_NUSES (start_insn) == 1)
22741 {
22742 reverse = TRUE;
22743 seeking_return = 1;
22744 return_code = GET_CODE (body);
22745 }
22746 else
22747 return;
22748 }
22749 else
22750 return;
22751 }
22752
22753 gcc_assert (!arm_ccfsm_state || reverse);
22754 if (!JUMP_P (insn))
22755 return;
22756
22757 /* This jump might be paralleled with a clobber of the condition codes
22758 the jump should always come first */
22759 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22760 body = XVECEXP (body, 0, 0);
22761
22762 if (reverse
22763 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22764 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22765 {
22766 int insns_skipped;
22767 int fail = FALSE, succeed = FALSE;
22768 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22769 int then_not_else = TRUE;
22770 rtx_insn *this_insn = start_insn;
22771 rtx label = 0;
22772
22773 /* Register the insn jumped to. */
22774 if (reverse)
22775 {
22776 if (!seeking_return)
22777 label = XEXP (SET_SRC (body), 0);
22778 }
22779 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22780 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22781 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22782 {
22783 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22784 then_not_else = FALSE;
22785 }
22786 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22787 {
22788 seeking_return = 1;
22789 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22790 }
22791 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22792 {
22793 seeking_return = 1;
22794 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22795 then_not_else = FALSE;
22796 }
22797 else
22798 gcc_unreachable ();
22799
22800 /* See how many insns this branch skips, and what kind of insns. If all
22801 insns are okay, and the label or unconditional branch to the same
22802 label is not too far away, succeed. */
22803 for (insns_skipped = 0;
22804 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22805 {
22806 rtx scanbody;
22807
22808 this_insn = next_nonnote_insn (this_insn);
22809 if (!this_insn)
22810 break;
22811
22812 switch (GET_CODE (this_insn))
22813 {
22814 case CODE_LABEL:
22815 /* Succeed if it is the target label, otherwise fail since
22816 control falls in from somewhere else. */
22817 if (this_insn == label)
22818 {
22819 arm_ccfsm_state = 1;
22820 succeed = TRUE;
22821 }
22822 else
22823 fail = TRUE;
22824 break;
22825
22826 case BARRIER:
22827 /* Succeed if the following insn is the target label.
22828 Otherwise fail.
22829 If return insns are used then the last insn in a function
22830 will be a barrier. */
22831 this_insn = next_nonnote_insn (this_insn);
22832 if (this_insn && this_insn == label)
22833 {
22834 arm_ccfsm_state = 1;
22835 succeed = TRUE;
22836 }
22837 else
22838 fail = TRUE;
22839 break;
22840
22841 case CALL_INSN:
22842 /* The AAPCS says that conditional calls should not be
22843 used since they make interworking inefficient (the
22844 linker can't transform BL<cond> into BLX). That's
22845 only a problem if the machine has BLX. */
22846 if (arm_arch5)
22847 {
22848 fail = TRUE;
22849 break;
22850 }
22851
22852 /* Succeed if the following insn is the target label, or
22853 if the following two insns are a barrier and the
22854 target label. */
22855 this_insn = next_nonnote_insn (this_insn);
22856 if (this_insn && BARRIER_P (this_insn))
22857 this_insn = next_nonnote_insn (this_insn);
22858
22859 if (this_insn && this_insn == label
22860 && insns_skipped < max_insns_skipped)
22861 {
22862 arm_ccfsm_state = 1;
22863 succeed = TRUE;
22864 }
22865 else
22866 fail = TRUE;
22867 break;
22868
22869 case JUMP_INSN:
22870 /* If this is an unconditional branch to the same label, succeed.
22871 If it is to another label, do nothing. If it is conditional,
22872 fail. */
22873 /* XXX Probably, the tests for SET and the PC are
22874 unnecessary. */
22875
22876 scanbody = PATTERN (this_insn);
22877 if (GET_CODE (scanbody) == SET
22878 && GET_CODE (SET_DEST (scanbody)) == PC)
22879 {
22880 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22881 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22882 {
22883 arm_ccfsm_state = 2;
22884 succeed = TRUE;
22885 }
22886 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22887 fail = TRUE;
22888 }
22889 /* Fail if a conditional return is undesirable (e.g. on a
22890 StrongARM), but still allow this if optimizing for size. */
22891 else if (GET_CODE (scanbody) == return_code
22892 && !use_return_insn (TRUE, NULL)
22893 && !optimize_size)
22894 fail = TRUE;
22895 else if (GET_CODE (scanbody) == return_code)
22896 {
22897 arm_ccfsm_state = 2;
22898 succeed = TRUE;
22899 }
22900 else if (GET_CODE (scanbody) == PARALLEL)
22901 {
22902 switch (get_attr_conds (this_insn))
22903 {
22904 case CONDS_NOCOND:
22905 break;
22906 default:
22907 fail = TRUE;
22908 break;
22909 }
22910 }
22911 else
22912 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22913
22914 break;
22915
22916 case INSN:
22917 /* Instructions using or affecting the condition codes make it
22918 fail. */
22919 scanbody = PATTERN (this_insn);
22920 if (!(GET_CODE (scanbody) == SET
22921 || GET_CODE (scanbody) == PARALLEL)
22922 || get_attr_conds (this_insn) != CONDS_NOCOND)
22923 fail = TRUE;
22924 break;
22925
22926 default:
22927 break;
22928 }
22929 }
22930 if (succeed)
22931 {
22932 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22933 arm_target_label = CODE_LABEL_NUMBER (label);
22934 else
22935 {
22936 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22937
22938 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22939 {
22940 this_insn = next_nonnote_insn (this_insn);
22941 gcc_assert (!this_insn
22942 || (!BARRIER_P (this_insn)
22943 && !LABEL_P (this_insn)));
22944 }
22945 if (!this_insn)
22946 {
22947 /* Oh, dear! we ran off the end.. give up. */
22948 extract_constrain_insn_cached (insn);
22949 arm_ccfsm_state = 0;
22950 arm_target_insn = NULL;
22951 return;
22952 }
22953 arm_target_insn = this_insn;
22954 }
22955
22956 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22957 what it was. */
22958 if (!reverse)
22959 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22960
22961 if (reverse || then_not_else)
22962 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22963 }
22964
22965 /* Restore recog_data (getting the attributes of other insns can
22966 destroy this array, but final.c assumes that it remains intact
22967 across this call. */
22968 extract_constrain_insn_cached (insn);
22969 }
22970 }
22971
22972 /* Output IT instructions. */
22973 void
22974 thumb2_asm_output_opcode (FILE * stream)
22975 {
22976 char buff[5];
22977 int n;
22978
22979 if (arm_condexec_mask)
22980 {
22981 for (n = 0; n < arm_condexec_masklen; n++)
22982 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22983 buff[n] = 0;
22984 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22985 arm_condition_codes[arm_current_cc]);
22986 arm_condexec_mask = 0;
22987 }
22988 }
22989
22990 /* Returns true if REGNO is a valid register
22991 for holding a quantity of type MODE. */
22992 int
22993 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
22994 {
22995 if (GET_MODE_CLASS (mode) == MODE_CC)
22996 return (regno == CC_REGNUM
22997 || (TARGET_HARD_FLOAT && TARGET_VFP
22998 && regno == VFPCC_REGNUM));
22999
23000 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23001 return false;
23002
23003 if (TARGET_THUMB1)
23004 /* For the Thumb we only allow values bigger than SImode in
23005 registers 0 - 6, so that there is always a second low
23006 register available to hold the upper part of the value.
23007 We probably we ought to ensure that the register is the
23008 start of an even numbered register pair. */
23009 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23010
23011 if (TARGET_HARD_FLOAT && TARGET_VFP
23012 && IS_VFP_REGNUM (regno))
23013 {
23014 if (mode == SFmode || mode == SImode)
23015 return VFP_REGNO_OK_FOR_SINGLE (regno);
23016
23017 if (mode == DFmode)
23018 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23019
23020 /* VFP registers can hold HFmode values, but there is no point in
23021 putting them there unless we have hardware conversion insns. */
23022 if (mode == HFmode)
23023 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23024
23025 if (TARGET_NEON)
23026 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23027 || (VALID_NEON_QREG_MODE (mode)
23028 && NEON_REGNO_OK_FOR_QUAD (regno))
23029 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23030 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23031 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23032 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23033 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23034
23035 return FALSE;
23036 }
23037
23038 if (TARGET_REALLY_IWMMXT)
23039 {
23040 if (IS_IWMMXT_GR_REGNUM (regno))
23041 return mode == SImode;
23042
23043 if (IS_IWMMXT_REGNUM (regno))
23044 return VALID_IWMMXT_REG_MODE (mode);
23045 }
23046
23047 /* We allow almost any value to be stored in the general registers.
23048 Restrict doubleword quantities to even register pairs in ARM state
23049 so that we can use ldrd. Do not allow very large Neon structure
23050 opaque modes in general registers; they would use too many. */
23051 if (regno <= LAST_ARM_REGNUM)
23052 {
23053 if (ARM_NUM_REGS (mode) > 4)
23054 return FALSE;
23055
23056 if (TARGET_THUMB2)
23057 return TRUE;
23058
23059 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23060 }
23061
23062 if (regno == FRAME_POINTER_REGNUM
23063 || regno == ARG_POINTER_REGNUM)
23064 /* We only allow integers in the fake hard registers. */
23065 return GET_MODE_CLASS (mode) == MODE_INT;
23066
23067 return FALSE;
23068 }
23069
23070 /* Implement MODES_TIEABLE_P. */
23071
23072 bool
23073 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23074 {
23075 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23076 return true;
23077
23078 /* We specifically want to allow elements of "structure" modes to
23079 be tieable to the structure. This more general condition allows
23080 other rarer situations too. */
23081 if (TARGET_NEON
23082 && (VALID_NEON_DREG_MODE (mode1)
23083 || VALID_NEON_QREG_MODE (mode1)
23084 || VALID_NEON_STRUCT_MODE (mode1))
23085 && (VALID_NEON_DREG_MODE (mode2)
23086 || VALID_NEON_QREG_MODE (mode2)
23087 || VALID_NEON_STRUCT_MODE (mode2)))
23088 return true;
23089
23090 return false;
23091 }
23092
23093 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23094 not used in arm mode. */
23095
23096 enum reg_class
23097 arm_regno_class (int regno)
23098 {
23099 if (regno == PC_REGNUM)
23100 return NO_REGS;
23101
23102 if (TARGET_THUMB1)
23103 {
23104 if (regno == STACK_POINTER_REGNUM)
23105 return STACK_REG;
23106 if (regno == CC_REGNUM)
23107 return CC_REG;
23108 if (regno < 8)
23109 return LO_REGS;
23110 return HI_REGS;
23111 }
23112
23113 if (TARGET_THUMB2 && regno < 8)
23114 return LO_REGS;
23115
23116 if ( regno <= LAST_ARM_REGNUM
23117 || regno == FRAME_POINTER_REGNUM
23118 || regno == ARG_POINTER_REGNUM)
23119 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23120
23121 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23122 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23123
23124 if (IS_VFP_REGNUM (regno))
23125 {
23126 if (regno <= D7_VFP_REGNUM)
23127 return VFP_D0_D7_REGS;
23128 else if (regno <= LAST_LO_VFP_REGNUM)
23129 return VFP_LO_REGS;
23130 else
23131 return VFP_HI_REGS;
23132 }
23133
23134 if (IS_IWMMXT_REGNUM (regno))
23135 return IWMMXT_REGS;
23136
23137 if (IS_IWMMXT_GR_REGNUM (regno))
23138 return IWMMXT_GR_REGS;
23139
23140 return NO_REGS;
23141 }
23142
23143 /* Handle a special case when computing the offset
23144 of an argument from the frame pointer. */
23145 int
23146 arm_debugger_arg_offset (int value, rtx addr)
23147 {
23148 rtx_insn *insn;
23149
23150 /* We are only interested if dbxout_parms() failed to compute the offset. */
23151 if (value != 0)
23152 return 0;
23153
23154 /* We can only cope with the case where the address is held in a register. */
23155 if (!REG_P (addr))
23156 return 0;
23157
23158 /* If we are using the frame pointer to point at the argument, then
23159 an offset of 0 is correct. */
23160 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23161 return 0;
23162
23163 /* If we are using the stack pointer to point at the
23164 argument, then an offset of 0 is correct. */
23165 /* ??? Check this is consistent with thumb2 frame layout. */
23166 if ((TARGET_THUMB || !frame_pointer_needed)
23167 && REGNO (addr) == SP_REGNUM)
23168 return 0;
23169
23170 /* Oh dear. The argument is pointed to by a register rather
23171 than being held in a register, or being stored at a known
23172 offset from the frame pointer. Since GDB only understands
23173 those two kinds of argument we must translate the address
23174 held in the register into an offset from the frame pointer.
23175 We do this by searching through the insns for the function
23176 looking to see where this register gets its value. If the
23177 register is initialized from the frame pointer plus an offset
23178 then we are in luck and we can continue, otherwise we give up.
23179
23180 This code is exercised by producing debugging information
23181 for a function with arguments like this:
23182
23183 double func (double a, double b, int c, double d) {return d;}
23184
23185 Without this code the stab for parameter 'd' will be set to
23186 an offset of 0 from the frame pointer, rather than 8. */
23187
23188 /* The if() statement says:
23189
23190 If the insn is a normal instruction
23191 and if the insn is setting the value in a register
23192 and if the register being set is the register holding the address of the argument
23193 and if the address is computing by an addition
23194 that involves adding to a register
23195 which is the frame pointer
23196 a constant integer
23197
23198 then... */
23199
23200 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23201 {
23202 if ( NONJUMP_INSN_P (insn)
23203 && GET_CODE (PATTERN (insn)) == SET
23204 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23205 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23206 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23207 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23208 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23209 )
23210 {
23211 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23212
23213 break;
23214 }
23215 }
23216
23217 if (value == 0)
23218 {
23219 debug_rtx (addr);
23220 warning (0, "unable to compute real location of stacked parameter");
23221 value = 8; /* XXX magic hack */
23222 }
23223
23224 return value;
23225 }
23226 \f
23227 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23228
23229 static const char *
23230 arm_invalid_parameter_type (const_tree t)
23231 {
23232 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23233 return N_("function parameters cannot have __fp16 type");
23234 return NULL;
23235 }
23236
23237 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23238
23239 static const char *
23240 arm_invalid_return_type (const_tree t)
23241 {
23242 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23243 return N_("functions cannot return __fp16 type");
23244 return NULL;
23245 }
23246
23247 /* Implement TARGET_PROMOTED_TYPE. */
23248
23249 static tree
23250 arm_promoted_type (const_tree t)
23251 {
23252 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23253 return float_type_node;
23254 return NULL_TREE;
23255 }
23256
23257 /* Implement TARGET_CONVERT_TO_TYPE.
23258 Specifically, this hook implements the peculiarity of the ARM
23259 half-precision floating-point C semantics that requires conversions between
23260 __fp16 to or from double to do an intermediate conversion to float. */
23261
23262 static tree
23263 arm_convert_to_type (tree type, tree expr)
23264 {
23265 tree fromtype = TREE_TYPE (expr);
23266 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23267 return NULL_TREE;
23268 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23269 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23270 return convert (type, convert (float_type_node, expr));
23271 return NULL_TREE;
23272 }
23273
23274 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23275 This simply adds HFmode as a supported mode; even though we don't
23276 implement arithmetic on this type directly, it's supported by
23277 optabs conversions, much the way the double-word arithmetic is
23278 special-cased in the default hook. */
23279
23280 static bool
23281 arm_scalar_mode_supported_p (machine_mode mode)
23282 {
23283 if (mode == HFmode)
23284 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23285 else if (ALL_FIXED_POINT_MODE_P (mode))
23286 return true;
23287 else
23288 return default_scalar_mode_supported_p (mode);
23289 }
23290
23291 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23292 void
23293 neon_reinterpret (rtx dest, rtx src)
23294 {
23295 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23296 }
23297
23298 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23299 not to early-clobber SRC registers in the process.
23300
23301 We assume that the operands described by SRC and DEST represent a
23302 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23303 number of components into which the copy has been decomposed. */
23304 void
23305 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23306 {
23307 unsigned int i;
23308
23309 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23310 || REGNO (operands[0]) < REGNO (operands[1]))
23311 {
23312 for (i = 0; i < count; i++)
23313 {
23314 operands[2 * i] = dest[i];
23315 operands[2 * i + 1] = src[i];
23316 }
23317 }
23318 else
23319 {
23320 for (i = 0; i < count; i++)
23321 {
23322 operands[2 * i] = dest[count - i - 1];
23323 operands[2 * i + 1] = src[count - i - 1];
23324 }
23325 }
23326 }
23327
23328 /* Split operands into moves from op[1] + op[2] into op[0]. */
23329
23330 void
23331 neon_split_vcombine (rtx operands[3])
23332 {
23333 unsigned int dest = REGNO (operands[0]);
23334 unsigned int src1 = REGNO (operands[1]);
23335 unsigned int src2 = REGNO (operands[2]);
23336 machine_mode halfmode = GET_MODE (operands[1]);
23337 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23338 rtx destlo, desthi;
23339
23340 if (src1 == dest && src2 == dest + halfregs)
23341 {
23342 /* No-op move. Can't split to nothing; emit something. */
23343 emit_note (NOTE_INSN_DELETED);
23344 return;
23345 }
23346
23347 /* Preserve register attributes for variable tracking. */
23348 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23349 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23350 GET_MODE_SIZE (halfmode));
23351
23352 /* Special case of reversed high/low parts. Use VSWP. */
23353 if (src2 == dest && src1 == dest + halfregs)
23354 {
23355 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
23356 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
23357 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23358 return;
23359 }
23360
23361 if (!reg_overlap_mentioned_p (operands[2], destlo))
23362 {
23363 /* Try to avoid unnecessary moves if part of the result
23364 is in the right place already. */
23365 if (src1 != dest)
23366 emit_move_insn (destlo, operands[1]);
23367 if (src2 != dest + halfregs)
23368 emit_move_insn (desthi, operands[2]);
23369 }
23370 else
23371 {
23372 if (src2 != dest + halfregs)
23373 emit_move_insn (desthi, operands[2]);
23374 if (src1 != dest)
23375 emit_move_insn (destlo, operands[1]);
23376 }
23377 }
23378 \f
23379 /* Return the number (counting from 0) of
23380 the least significant set bit in MASK. */
23381
23382 inline static int
23383 number_of_first_bit_set (unsigned mask)
23384 {
23385 return ctz_hwi (mask);
23386 }
23387
23388 /* Like emit_multi_reg_push, but allowing for a different set of
23389 registers to be described as saved. MASK is the set of registers
23390 to be saved; REAL_REGS is the set of registers to be described as
23391 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23392
23393 static rtx_insn *
23394 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23395 {
23396 unsigned long regno;
23397 rtx par[10], tmp, reg;
23398 rtx_insn *insn;
23399 int i, j;
23400
23401 /* Build the parallel of the registers actually being stored. */
23402 for (i = 0; mask; ++i, mask &= mask - 1)
23403 {
23404 regno = ctz_hwi (mask);
23405 reg = gen_rtx_REG (SImode, regno);
23406
23407 if (i == 0)
23408 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23409 else
23410 tmp = gen_rtx_USE (VOIDmode, reg);
23411
23412 par[i] = tmp;
23413 }
23414
23415 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23416 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23417 tmp = gen_frame_mem (BLKmode, tmp);
23418 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
23419 par[0] = tmp;
23420
23421 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23422 insn = emit_insn (tmp);
23423
23424 /* Always build the stack adjustment note for unwind info. */
23425 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23426 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
23427 par[0] = tmp;
23428
23429 /* Build the parallel of the registers recorded as saved for unwind. */
23430 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23431 {
23432 regno = ctz_hwi (real_regs);
23433 reg = gen_rtx_REG (SImode, regno);
23434
23435 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23436 tmp = gen_frame_mem (SImode, tmp);
23437 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
23438 RTX_FRAME_RELATED_P (tmp) = 1;
23439 par[j + 1] = tmp;
23440 }
23441
23442 if (j == 0)
23443 tmp = par[0];
23444 else
23445 {
23446 RTX_FRAME_RELATED_P (par[0]) = 1;
23447 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23448 }
23449
23450 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23451
23452 return insn;
23453 }
23454
23455 /* Emit code to push or pop registers to or from the stack. F is the
23456 assembly file. MASK is the registers to pop. */
23457 static void
23458 thumb_pop (FILE *f, unsigned long mask)
23459 {
23460 int regno;
23461 int lo_mask = mask & 0xFF;
23462 int pushed_words = 0;
23463
23464 gcc_assert (mask);
23465
23466 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23467 {
23468 /* Special case. Do not generate a POP PC statement here, do it in
23469 thumb_exit() */
23470 thumb_exit (f, -1);
23471 return;
23472 }
23473
23474 fprintf (f, "\tpop\t{");
23475
23476 /* Look at the low registers first. */
23477 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23478 {
23479 if (lo_mask & 1)
23480 {
23481 asm_fprintf (f, "%r", regno);
23482
23483 if ((lo_mask & ~1) != 0)
23484 fprintf (f, ", ");
23485
23486 pushed_words++;
23487 }
23488 }
23489
23490 if (mask & (1 << PC_REGNUM))
23491 {
23492 /* Catch popping the PC. */
23493 if (TARGET_INTERWORK || TARGET_BACKTRACE
23494 || crtl->calls_eh_return)
23495 {
23496 /* The PC is never poped directly, instead
23497 it is popped into r3 and then BX is used. */
23498 fprintf (f, "}\n");
23499
23500 thumb_exit (f, -1);
23501
23502 return;
23503 }
23504 else
23505 {
23506 if (mask & 0xFF)
23507 fprintf (f, ", ");
23508
23509 asm_fprintf (f, "%r", PC_REGNUM);
23510 }
23511 }
23512
23513 fprintf (f, "}\n");
23514 }
23515
23516 /* Generate code to return from a thumb function.
23517 If 'reg_containing_return_addr' is -1, then the return address is
23518 actually on the stack, at the stack pointer. */
23519 static void
23520 thumb_exit (FILE *f, int reg_containing_return_addr)
23521 {
23522 unsigned regs_available_for_popping;
23523 unsigned regs_to_pop;
23524 int pops_needed;
23525 unsigned available;
23526 unsigned required;
23527 machine_mode mode;
23528 int size;
23529 int restore_a4 = FALSE;
23530
23531 /* Compute the registers we need to pop. */
23532 regs_to_pop = 0;
23533 pops_needed = 0;
23534
23535 if (reg_containing_return_addr == -1)
23536 {
23537 regs_to_pop |= 1 << LR_REGNUM;
23538 ++pops_needed;
23539 }
23540
23541 if (TARGET_BACKTRACE)
23542 {
23543 /* Restore the (ARM) frame pointer and stack pointer. */
23544 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23545 pops_needed += 2;
23546 }
23547
23548 /* If there is nothing to pop then just emit the BX instruction and
23549 return. */
23550 if (pops_needed == 0)
23551 {
23552 if (crtl->calls_eh_return)
23553 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23554
23555 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23556 return;
23557 }
23558 /* Otherwise if we are not supporting interworking and we have not created
23559 a backtrace structure and the function was not entered in ARM mode then
23560 just pop the return address straight into the PC. */
23561 else if (!TARGET_INTERWORK
23562 && !TARGET_BACKTRACE
23563 && !is_called_in_ARM_mode (current_function_decl)
23564 && !crtl->calls_eh_return)
23565 {
23566 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23567 return;
23568 }
23569
23570 /* Find out how many of the (return) argument registers we can corrupt. */
23571 regs_available_for_popping = 0;
23572
23573 /* If returning via __builtin_eh_return, the bottom three registers
23574 all contain information needed for the return. */
23575 if (crtl->calls_eh_return)
23576 size = 12;
23577 else
23578 {
23579 /* If we can deduce the registers used from the function's
23580 return value. This is more reliable that examining
23581 df_regs_ever_live_p () because that will be set if the register is
23582 ever used in the function, not just if the register is used
23583 to hold a return value. */
23584
23585 if (crtl->return_rtx != 0)
23586 mode = GET_MODE (crtl->return_rtx);
23587 else
23588 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23589
23590 size = GET_MODE_SIZE (mode);
23591
23592 if (size == 0)
23593 {
23594 /* In a void function we can use any argument register.
23595 In a function that returns a structure on the stack
23596 we can use the second and third argument registers. */
23597 if (mode == VOIDmode)
23598 regs_available_for_popping =
23599 (1 << ARG_REGISTER (1))
23600 | (1 << ARG_REGISTER (2))
23601 | (1 << ARG_REGISTER (3));
23602 else
23603 regs_available_for_popping =
23604 (1 << ARG_REGISTER (2))
23605 | (1 << ARG_REGISTER (3));
23606 }
23607 else if (size <= 4)
23608 regs_available_for_popping =
23609 (1 << ARG_REGISTER (2))
23610 | (1 << ARG_REGISTER (3));
23611 else if (size <= 8)
23612 regs_available_for_popping =
23613 (1 << ARG_REGISTER (3));
23614 }
23615
23616 /* Match registers to be popped with registers into which we pop them. */
23617 for (available = regs_available_for_popping,
23618 required = regs_to_pop;
23619 required != 0 && available != 0;
23620 available &= ~(available & - available),
23621 required &= ~(required & - required))
23622 -- pops_needed;
23623
23624 /* If we have any popping registers left over, remove them. */
23625 if (available > 0)
23626 regs_available_for_popping &= ~available;
23627
23628 /* Otherwise if we need another popping register we can use
23629 the fourth argument register. */
23630 else if (pops_needed)
23631 {
23632 /* If we have not found any free argument registers and
23633 reg a4 contains the return address, we must move it. */
23634 if (regs_available_for_popping == 0
23635 && reg_containing_return_addr == LAST_ARG_REGNUM)
23636 {
23637 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23638 reg_containing_return_addr = LR_REGNUM;
23639 }
23640 else if (size > 12)
23641 {
23642 /* Register a4 is being used to hold part of the return value,
23643 but we have dire need of a free, low register. */
23644 restore_a4 = TRUE;
23645
23646 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23647 }
23648
23649 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23650 {
23651 /* The fourth argument register is available. */
23652 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23653
23654 --pops_needed;
23655 }
23656 }
23657
23658 /* Pop as many registers as we can. */
23659 thumb_pop (f, regs_available_for_popping);
23660
23661 /* Process the registers we popped. */
23662 if (reg_containing_return_addr == -1)
23663 {
23664 /* The return address was popped into the lowest numbered register. */
23665 regs_to_pop &= ~(1 << LR_REGNUM);
23666
23667 reg_containing_return_addr =
23668 number_of_first_bit_set (regs_available_for_popping);
23669
23670 /* Remove this register for the mask of available registers, so that
23671 the return address will not be corrupted by further pops. */
23672 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23673 }
23674
23675 /* If we popped other registers then handle them here. */
23676 if (regs_available_for_popping)
23677 {
23678 int frame_pointer;
23679
23680 /* Work out which register currently contains the frame pointer. */
23681 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23682
23683 /* Move it into the correct place. */
23684 asm_fprintf (f, "\tmov\t%r, %r\n",
23685 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23686
23687 /* (Temporarily) remove it from the mask of popped registers. */
23688 regs_available_for_popping &= ~(1 << frame_pointer);
23689 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23690
23691 if (regs_available_for_popping)
23692 {
23693 int stack_pointer;
23694
23695 /* We popped the stack pointer as well,
23696 find the register that contains it. */
23697 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23698
23699 /* Move it into the stack register. */
23700 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23701
23702 /* At this point we have popped all necessary registers, so
23703 do not worry about restoring regs_available_for_popping
23704 to its correct value:
23705
23706 assert (pops_needed == 0)
23707 assert (regs_available_for_popping == (1 << frame_pointer))
23708 assert (regs_to_pop == (1 << STACK_POINTER)) */
23709 }
23710 else
23711 {
23712 /* Since we have just move the popped value into the frame
23713 pointer, the popping register is available for reuse, and
23714 we know that we still have the stack pointer left to pop. */
23715 regs_available_for_popping |= (1 << frame_pointer);
23716 }
23717 }
23718
23719 /* If we still have registers left on the stack, but we no longer have
23720 any registers into which we can pop them, then we must move the return
23721 address into the link register and make available the register that
23722 contained it. */
23723 if (regs_available_for_popping == 0 && pops_needed > 0)
23724 {
23725 regs_available_for_popping |= 1 << reg_containing_return_addr;
23726
23727 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23728 reg_containing_return_addr);
23729
23730 reg_containing_return_addr = LR_REGNUM;
23731 }
23732
23733 /* If we have registers left on the stack then pop some more.
23734 We know that at most we will want to pop FP and SP. */
23735 if (pops_needed > 0)
23736 {
23737 int popped_into;
23738 int move_to;
23739
23740 thumb_pop (f, regs_available_for_popping);
23741
23742 /* We have popped either FP or SP.
23743 Move whichever one it is into the correct register. */
23744 popped_into = number_of_first_bit_set (regs_available_for_popping);
23745 move_to = number_of_first_bit_set (regs_to_pop);
23746
23747 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23748
23749 regs_to_pop &= ~(1 << move_to);
23750
23751 --pops_needed;
23752 }
23753
23754 /* If we still have not popped everything then we must have only
23755 had one register available to us and we are now popping the SP. */
23756 if (pops_needed > 0)
23757 {
23758 int popped_into;
23759
23760 thumb_pop (f, regs_available_for_popping);
23761
23762 popped_into = number_of_first_bit_set (regs_available_for_popping);
23763
23764 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23765 /*
23766 assert (regs_to_pop == (1 << STACK_POINTER))
23767 assert (pops_needed == 1)
23768 */
23769 }
23770
23771 /* If necessary restore the a4 register. */
23772 if (restore_a4)
23773 {
23774 if (reg_containing_return_addr != LR_REGNUM)
23775 {
23776 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23777 reg_containing_return_addr = LR_REGNUM;
23778 }
23779
23780 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23781 }
23782
23783 if (crtl->calls_eh_return)
23784 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23785
23786 /* Return to caller. */
23787 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23788 }
23789 \f
23790 /* Scan INSN just before assembler is output for it.
23791 For Thumb-1, we track the status of the condition codes; this
23792 information is used in the cbranchsi4_insn pattern. */
23793 void
23794 thumb1_final_prescan_insn (rtx_insn *insn)
23795 {
23796 if (flag_print_asm_name)
23797 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23798 INSN_ADDRESSES (INSN_UID (insn)));
23799 /* Don't overwrite the previous setter when we get to a cbranch. */
23800 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23801 {
23802 enum attr_conds conds;
23803
23804 if (cfun->machine->thumb1_cc_insn)
23805 {
23806 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23807 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23808 CC_STATUS_INIT;
23809 }
23810 conds = get_attr_conds (insn);
23811 if (conds == CONDS_SET)
23812 {
23813 rtx set = single_set (insn);
23814 cfun->machine->thumb1_cc_insn = insn;
23815 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23816 cfun->machine->thumb1_cc_op1 = const0_rtx;
23817 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23818 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23819 {
23820 rtx src1 = XEXP (SET_SRC (set), 1);
23821 if (src1 == const0_rtx)
23822 cfun->machine->thumb1_cc_mode = CCmode;
23823 }
23824 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23825 {
23826 /* Record the src register operand instead of dest because
23827 cprop_hardreg pass propagates src. */
23828 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23829 }
23830 }
23831 else if (conds != CONDS_NOCOND)
23832 cfun->machine->thumb1_cc_insn = NULL_RTX;
23833 }
23834
23835 /* Check if unexpected far jump is used. */
23836 if (cfun->machine->lr_save_eliminated
23837 && get_attr_far_jump (insn) == FAR_JUMP_YES)
23838 internal_error("Unexpected thumb1 far jump");
23839 }
23840
23841 int
23842 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23843 {
23844 unsigned HOST_WIDE_INT mask = 0xff;
23845 int i;
23846
23847 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23848 if (val == 0) /* XXX */
23849 return 0;
23850
23851 for (i = 0; i < 25; i++)
23852 if ((val & (mask << i)) == val)
23853 return 1;
23854
23855 return 0;
23856 }
23857
23858 /* Returns nonzero if the current function contains,
23859 or might contain a far jump. */
23860 static int
23861 thumb_far_jump_used_p (void)
23862 {
23863 rtx_insn *insn;
23864 bool far_jump = false;
23865 unsigned int func_size = 0;
23866
23867 /* This test is only important for leaf functions. */
23868 /* assert (!leaf_function_p ()); */
23869
23870 /* If we have already decided that far jumps may be used,
23871 do not bother checking again, and always return true even if
23872 it turns out that they are not being used. Once we have made
23873 the decision that far jumps are present (and that hence the link
23874 register will be pushed onto the stack) we cannot go back on it. */
23875 if (cfun->machine->far_jump_used)
23876 return 1;
23877
23878 /* If this function is not being called from the prologue/epilogue
23879 generation code then it must be being called from the
23880 INITIAL_ELIMINATION_OFFSET macro. */
23881 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23882 {
23883 /* In this case we know that we are being asked about the elimination
23884 of the arg pointer register. If that register is not being used,
23885 then there are no arguments on the stack, and we do not have to
23886 worry that a far jump might force the prologue to push the link
23887 register, changing the stack offsets. In this case we can just
23888 return false, since the presence of far jumps in the function will
23889 not affect stack offsets.
23890
23891 If the arg pointer is live (or if it was live, but has now been
23892 eliminated and so set to dead) then we do have to test to see if
23893 the function might contain a far jump. This test can lead to some
23894 false negatives, since before reload is completed, then length of
23895 branch instructions is not known, so gcc defaults to returning their
23896 longest length, which in turn sets the far jump attribute to true.
23897
23898 A false negative will not result in bad code being generated, but it
23899 will result in a needless push and pop of the link register. We
23900 hope that this does not occur too often.
23901
23902 If we need doubleword stack alignment this could affect the other
23903 elimination offsets so we can't risk getting it wrong. */
23904 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23905 cfun->machine->arg_pointer_live = 1;
23906 else if (!cfun->machine->arg_pointer_live)
23907 return 0;
23908 }
23909
23910 /* We should not change far_jump_used during or after reload, as there is
23911 no chance to change stack frame layout. */
23912 if (reload_in_progress || reload_completed)
23913 return 0;
23914
23915 /* Check to see if the function contains a branch
23916 insn with the far jump attribute set. */
23917 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23918 {
23919 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
23920 {
23921 far_jump = true;
23922 }
23923 func_size += get_attr_length (insn);
23924 }
23925
23926 /* Attribute far_jump will always be true for thumb1 before
23927 shorten_branch pass. So checking far_jump attribute before
23928 shorten_branch isn't much useful.
23929
23930 Following heuristic tries to estimate more accurately if a far jump
23931 may finally be used. The heuristic is very conservative as there is
23932 no chance to roll-back the decision of not to use far jump.
23933
23934 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23935 2-byte insn is associated with a 4 byte constant pool. Using
23936 function size 2048/3 as the threshold is conservative enough. */
23937 if (far_jump)
23938 {
23939 if ((func_size * 3) >= 2048)
23940 {
23941 /* Record the fact that we have decided that
23942 the function does use far jumps. */
23943 cfun->machine->far_jump_used = 1;
23944 return 1;
23945 }
23946 }
23947
23948 return 0;
23949 }
23950
23951 /* Return nonzero if FUNC must be entered in ARM mode. */
23952 int
23953 is_called_in_ARM_mode (tree func)
23954 {
23955 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23956
23957 /* Ignore the problem about functions whose address is taken. */
23958 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23959 return TRUE;
23960
23961 #ifdef ARM_PE
23962 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23963 #else
23964 return FALSE;
23965 #endif
23966 }
23967
23968 /* Given the stack offsets and register mask in OFFSETS, decide how
23969 many additional registers to push instead of subtracting a constant
23970 from SP. For epilogues the principle is the same except we use pop.
23971 FOR_PROLOGUE indicates which we're generating. */
23972 static int
23973 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23974 {
23975 HOST_WIDE_INT amount;
23976 unsigned long live_regs_mask = offsets->saved_regs_mask;
23977 /* Extract a mask of the ones we can give to the Thumb's push/pop
23978 instruction. */
23979 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23980 /* Then count how many other high registers will need to be pushed. */
23981 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23982 int n_free, reg_base, size;
23983
23984 if (!for_prologue && frame_pointer_needed)
23985 amount = offsets->locals_base - offsets->saved_regs;
23986 else
23987 amount = offsets->outgoing_args - offsets->saved_regs;
23988
23989 /* If the stack frame size is 512 exactly, we can save one load
23990 instruction, which should make this a win even when optimizing
23991 for speed. */
23992 if (!optimize_size && amount != 512)
23993 return 0;
23994
23995 /* Can't do this if there are high registers to push. */
23996 if (high_regs_pushed != 0)
23997 return 0;
23998
23999 /* Shouldn't do it in the prologue if no registers would normally
24000 be pushed at all. In the epilogue, also allow it if we'll have
24001 a pop insn for the PC. */
24002 if (l_mask == 0
24003 && (for_prologue
24004 || TARGET_BACKTRACE
24005 || (live_regs_mask & 1 << LR_REGNUM) == 0
24006 || TARGET_INTERWORK
24007 || crtl->args.pretend_args_size != 0))
24008 return 0;
24009
24010 /* Don't do this if thumb_expand_prologue wants to emit instructions
24011 between the push and the stack frame allocation. */
24012 if (for_prologue
24013 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24014 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24015 return 0;
24016
24017 reg_base = 0;
24018 n_free = 0;
24019 if (!for_prologue)
24020 {
24021 size = arm_size_return_regs ();
24022 reg_base = ARM_NUM_INTS (size);
24023 live_regs_mask >>= reg_base;
24024 }
24025
24026 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24027 && (for_prologue || call_used_regs[reg_base + n_free]))
24028 {
24029 live_regs_mask >>= 1;
24030 n_free++;
24031 }
24032
24033 if (n_free == 0)
24034 return 0;
24035 gcc_assert (amount / 4 * 4 == amount);
24036
24037 if (amount >= 512 && (amount - n_free * 4) < 512)
24038 return (amount - 508) / 4;
24039 if (amount <= n_free * 4)
24040 return amount / 4;
24041 return 0;
24042 }
24043
24044 /* The bits which aren't usefully expanded as rtl. */
24045 const char *
24046 thumb1_unexpanded_epilogue (void)
24047 {
24048 arm_stack_offsets *offsets;
24049 int regno;
24050 unsigned long live_regs_mask = 0;
24051 int high_regs_pushed = 0;
24052 int extra_pop;
24053 int had_to_push_lr;
24054 int size;
24055
24056 if (cfun->machine->return_used_this_function != 0)
24057 return "";
24058
24059 if (IS_NAKED (arm_current_func_type ()))
24060 return "";
24061
24062 offsets = arm_get_frame_offsets ();
24063 live_regs_mask = offsets->saved_regs_mask;
24064 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24065
24066 /* If we can deduce the registers used from the function's return value.
24067 This is more reliable that examining df_regs_ever_live_p () because that
24068 will be set if the register is ever used in the function, not just if
24069 the register is used to hold a return value. */
24070 size = arm_size_return_regs ();
24071
24072 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24073 if (extra_pop > 0)
24074 {
24075 unsigned long extra_mask = (1 << extra_pop) - 1;
24076 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24077 }
24078
24079 /* The prolog may have pushed some high registers to use as
24080 work registers. e.g. the testsuite file:
24081 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24082 compiles to produce:
24083 push {r4, r5, r6, r7, lr}
24084 mov r7, r9
24085 mov r6, r8
24086 push {r6, r7}
24087 as part of the prolog. We have to undo that pushing here. */
24088
24089 if (high_regs_pushed)
24090 {
24091 unsigned long mask = live_regs_mask & 0xff;
24092 int next_hi_reg;
24093
24094 /* The available low registers depend on the size of the value we are
24095 returning. */
24096 if (size <= 12)
24097 mask |= 1 << 3;
24098 if (size <= 8)
24099 mask |= 1 << 2;
24100
24101 if (mask == 0)
24102 /* Oh dear! We have no low registers into which we can pop
24103 high registers! */
24104 internal_error
24105 ("no low registers available for popping high registers");
24106
24107 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24108 if (live_regs_mask & (1 << next_hi_reg))
24109 break;
24110
24111 while (high_regs_pushed)
24112 {
24113 /* Find lo register(s) into which the high register(s) can
24114 be popped. */
24115 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24116 {
24117 if (mask & (1 << regno))
24118 high_regs_pushed--;
24119 if (high_regs_pushed == 0)
24120 break;
24121 }
24122
24123 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24124
24125 /* Pop the values into the low register(s). */
24126 thumb_pop (asm_out_file, mask);
24127
24128 /* Move the value(s) into the high registers. */
24129 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24130 {
24131 if (mask & (1 << regno))
24132 {
24133 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24134 regno);
24135
24136 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24137 if (live_regs_mask & (1 << next_hi_reg))
24138 break;
24139 }
24140 }
24141 }
24142 live_regs_mask &= ~0x0f00;
24143 }
24144
24145 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24146 live_regs_mask &= 0xff;
24147
24148 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24149 {
24150 /* Pop the return address into the PC. */
24151 if (had_to_push_lr)
24152 live_regs_mask |= 1 << PC_REGNUM;
24153
24154 /* Either no argument registers were pushed or a backtrace
24155 structure was created which includes an adjusted stack
24156 pointer, so just pop everything. */
24157 if (live_regs_mask)
24158 thumb_pop (asm_out_file, live_regs_mask);
24159
24160 /* We have either just popped the return address into the
24161 PC or it is was kept in LR for the entire function.
24162 Note that thumb_pop has already called thumb_exit if the
24163 PC was in the list. */
24164 if (!had_to_push_lr)
24165 thumb_exit (asm_out_file, LR_REGNUM);
24166 }
24167 else
24168 {
24169 /* Pop everything but the return address. */
24170 if (live_regs_mask)
24171 thumb_pop (asm_out_file, live_regs_mask);
24172
24173 if (had_to_push_lr)
24174 {
24175 if (size > 12)
24176 {
24177 /* We have no free low regs, so save one. */
24178 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24179 LAST_ARG_REGNUM);
24180 }
24181
24182 /* Get the return address into a temporary register. */
24183 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24184
24185 if (size > 12)
24186 {
24187 /* Move the return address to lr. */
24188 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24189 LAST_ARG_REGNUM);
24190 /* Restore the low register. */
24191 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24192 IP_REGNUM);
24193 regno = LR_REGNUM;
24194 }
24195 else
24196 regno = LAST_ARG_REGNUM;
24197 }
24198 else
24199 regno = LR_REGNUM;
24200
24201 /* Remove the argument registers that were pushed onto the stack. */
24202 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24203 SP_REGNUM, SP_REGNUM,
24204 crtl->args.pretend_args_size);
24205
24206 thumb_exit (asm_out_file, regno);
24207 }
24208
24209 return "";
24210 }
24211
24212 /* Functions to save and restore machine-specific function data. */
24213 static struct machine_function *
24214 arm_init_machine_status (void)
24215 {
24216 struct machine_function *machine;
24217 machine = ggc_cleared_alloc<machine_function> ();
24218
24219 #if ARM_FT_UNKNOWN != 0
24220 machine->func_type = ARM_FT_UNKNOWN;
24221 #endif
24222 return machine;
24223 }
24224
24225 /* Return an RTX indicating where the return address to the
24226 calling function can be found. */
24227 rtx
24228 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24229 {
24230 if (count != 0)
24231 return NULL_RTX;
24232
24233 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24234 }
24235
24236 /* Do anything needed before RTL is emitted for each function. */
24237 void
24238 arm_init_expanders (void)
24239 {
24240 /* Arrange to initialize and mark the machine per-function status. */
24241 init_machine_status = arm_init_machine_status;
24242
24243 /* This is to stop the combine pass optimizing away the alignment
24244 adjustment of va_arg. */
24245 /* ??? It is claimed that this should not be necessary. */
24246 if (cfun)
24247 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24248 }
24249
24250
24251 /* Like arm_compute_initial_elimination offset. Simpler because there
24252 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24253 to point at the base of the local variables after static stack
24254 space for a function has been allocated. */
24255
24256 HOST_WIDE_INT
24257 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24258 {
24259 arm_stack_offsets *offsets;
24260
24261 offsets = arm_get_frame_offsets ();
24262
24263 switch (from)
24264 {
24265 case ARG_POINTER_REGNUM:
24266 switch (to)
24267 {
24268 case STACK_POINTER_REGNUM:
24269 return offsets->outgoing_args - offsets->saved_args;
24270
24271 case FRAME_POINTER_REGNUM:
24272 return offsets->soft_frame - offsets->saved_args;
24273
24274 case ARM_HARD_FRAME_POINTER_REGNUM:
24275 return offsets->saved_regs - offsets->saved_args;
24276
24277 case THUMB_HARD_FRAME_POINTER_REGNUM:
24278 return offsets->locals_base - offsets->saved_args;
24279
24280 default:
24281 gcc_unreachable ();
24282 }
24283 break;
24284
24285 case FRAME_POINTER_REGNUM:
24286 switch (to)
24287 {
24288 case STACK_POINTER_REGNUM:
24289 return offsets->outgoing_args - offsets->soft_frame;
24290
24291 case ARM_HARD_FRAME_POINTER_REGNUM:
24292 return offsets->saved_regs - offsets->soft_frame;
24293
24294 case THUMB_HARD_FRAME_POINTER_REGNUM:
24295 return offsets->locals_base - offsets->soft_frame;
24296
24297 default:
24298 gcc_unreachable ();
24299 }
24300 break;
24301
24302 default:
24303 gcc_unreachable ();
24304 }
24305 }
24306
24307 /* Generate the function's prologue. */
24308
24309 void
24310 thumb1_expand_prologue (void)
24311 {
24312 rtx_insn *insn;
24313
24314 HOST_WIDE_INT amount;
24315 arm_stack_offsets *offsets;
24316 unsigned long func_type;
24317 int regno;
24318 unsigned long live_regs_mask;
24319 unsigned long l_mask;
24320 unsigned high_regs_pushed = 0;
24321
24322 func_type = arm_current_func_type ();
24323
24324 /* Naked functions don't have prologues. */
24325 if (IS_NAKED (func_type))
24326 return;
24327
24328 if (IS_INTERRUPT (func_type))
24329 {
24330 error ("interrupt Service Routines cannot be coded in Thumb mode");
24331 return;
24332 }
24333
24334 if (is_called_in_ARM_mode (current_function_decl))
24335 emit_insn (gen_prologue_thumb1_interwork ());
24336
24337 offsets = arm_get_frame_offsets ();
24338 live_regs_mask = offsets->saved_regs_mask;
24339
24340 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24341 l_mask = live_regs_mask & 0x40ff;
24342 /* Then count how many other high registers will need to be pushed. */
24343 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24344
24345 if (crtl->args.pretend_args_size)
24346 {
24347 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24348
24349 if (cfun->machine->uses_anonymous_args)
24350 {
24351 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24352 unsigned long mask;
24353
24354 mask = 1ul << (LAST_ARG_REGNUM + 1);
24355 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24356
24357 insn = thumb1_emit_multi_reg_push (mask, 0);
24358 }
24359 else
24360 {
24361 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24362 stack_pointer_rtx, x));
24363 }
24364 RTX_FRAME_RELATED_P (insn) = 1;
24365 }
24366
24367 if (TARGET_BACKTRACE)
24368 {
24369 HOST_WIDE_INT offset = 0;
24370 unsigned work_register;
24371 rtx work_reg, x, arm_hfp_rtx;
24372
24373 /* We have been asked to create a stack backtrace structure.
24374 The code looks like this:
24375
24376 0 .align 2
24377 0 func:
24378 0 sub SP, #16 Reserve space for 4 registers.
24379 2 push {R7} Push low registers.
24380 4 add R7, SP, #20 Get the stack pointer before the push.
24381 6 str R7, [SP, #8] Store the stack pointer
24382 (before reserving the space).
24383 8 mov R7, PC Get hold of the start of this code + 12.
24384 10 str R7, [SP, #16] Store it.
24385 12 mov R7, FP Get hold of the current frame pointer.
24386 14 str R7, [SP, #4] Store it.
24387 16 mov R7, LR Get hold of the current return address.
24388 18 str R7, [SP, #12] Store it.
24389 20 add R7, SP, #16 Point at the start of the
24390 backtrace structure.
24391 22 mov FP, R7 Put this value into the frame pointer. */
24392
24393 work_register = thumb_find_work_register (live_regs_mask);
24394 work_reg = gen_rtx_REG (SImode, work_register);
24395 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24396
24397 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24398 stack_pointer_rtx, GEN_INT (-16)));
24399 RTX_FRAME_RELATED_P (insn) = 1;
24400
24401 if (l_mask)
24402 {
24403 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24404 RTX_FRAME_RELATED_P (insn) = 1;
24405
24406 offset = bit_count (l_mask) * UNITS_PER_WORD;
24407 }
24408
24409 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24410 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24411
24412 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24413 x = gen_frame_mem (SImode, x);
24414 emit_move_insn (x, work_reg);
24415
24416 /* Make sure that the instruction fetching the PC is in the right place
24417 to calculate "start of backtrace creation code + 12". */
24418 /* ??? The stores using the common WORK_REG ought to be enough to
24419 prevent the scheduler from doing anything weird. Failing that
24420 we could always move all of the following into an UNSPEC_VOLATILE. */
24421 if (l_mask)
24422 {
24423 x = gen_rtx_REG (SImode, PC_REGNUM);
24424 emit_move_insn (work_reg, x);
24425
24426 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24427 x = gen_frame_mem (SImode, x);
24428 emit_move_insn (x, work_reg);
24429
24430 emit_move_insn (work_reg, arm_hfp_rtx);
24431
24432 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24433 x = gen_frame_mem (SImode, x);
24434 emit_move_insn (x, work_reg);
24435 }
24436 else
24437 {
24438 emit_move_insn (work_reg, arm_hfp_rtx);
24439
24440 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24441 x = gen_frame_mem (SImode, x);
24442 emit_move_insn (x, work_reg);
24443
24444 x = gen_rtx_REG (SImode, PC_REGNUM);
24445 emit_move_insn (work_reg, x);
24446
24447 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24448 x = gen_frame_mem (SImode, x);
24449 emit_move_insn (x, work_reg);
24450 }
24451
24452 x = gen_rtx_REG (SImode, LR_REGNUM);
24453 emit_move_insn (work_reg, x);
24454
24455 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24456 x = gen_frame_mem (SImode, x);
24457 emit_move_insn (x, work_reg);
24458
24459 x = GEN_INT (offset + 12);
24460 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24461
24462 emit_move_insn (arm_hfp_rtx, work_reg);
24463 }
24464 /* Optimization: If we are not pushing any low registers but we are going
24465 to push some high registers then delay our first push. This will just
24466 be a push of LR and we can combine it with the push of the first high
24467 register. */
24468 else if ((l_mask & 0xff) != 0
24469 || (high_regs_pushed == 0 && l_mask))
24470 {
24471 unsigned long mask = l_mask;
24472 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24473 insn = thumb1_emit_multi_reg_push (mask, mask);
24474 RTX_FRAME_RELATED_P (insn) = 1;
24475 }
24476
24477 if (high_regs_pushed)
24478 {
24479 unsigned pushable_regs;
24480 unsigned next_hi_reg;
24481 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24482 : crtl->args.info.nregs;
24483 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24484
24485 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24486 if (live_regs_mask & (1 << next_hi_reg))
24487 break;
24488
24489 /* Here we need to mask out registers used for passing arguments
24490 even if they can be pushed. This is to avoid using them to stash the high
24491 registers. Such kind of stash may clobber the use of arguments. */
24492 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24493
24494 if (pushable_regs == 0)
24495 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24496
24497 while (high_regs_pushed > 0)
24498 {
24499 unsigned long real_regs_mask = 0;
24500
24501 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24502 {
24503 if (pushable_regs & (1 << regno))
24504 {
24505 emit_move_insn (gen_rtx_REG (SImode, regno),
24506 gen_rtx_REG (SImode, next_hi_reg));
24507
24508 high_regs_pushed --;
24509 real_regs_mask |= (1 << next_hi_reg);
24510
24511 if (high_regs_pushed)
24512 {
24513 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24514 next_hi_reg --)
24515 if (live_regs_mask & (1 << next_hi_reg))
24516 break;
24517 }
24518 else
24519 {
24520 pushable_regs &= ~((1 << regno) - 1);
24521 break;
24522 }
24523 }
24524 }
24525
24526 /* If we had to find a work register and we have not yet
24527 saved the LR then add it to the list of regs to push. */
24528 if (l_mask == (1 << LR_REGNUM))
24529 {
24530 pushable_regs |= l_mask;
24531 real_regs_mask |= l_mask;
24532 l_mask = 0;
24533 }
24534
24535 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24536 RTX_FRAME_RELATED_P (insn) = 1;
24537 }
24538 }
24539
24540 /* Load the pic register before setting the frame pointer,
24541 so we can use r7 as a temporary work register. */
24542 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24543 arm_load_pic_register (live_regs_mask);
24544
24545 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24546 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24547 stack_pointer_rtx);
24548
24549 if (flag_stack_usage_info)
24550 current_function_static_stack_size
24551 = offsets->outgoing_args - offsets->saved_args;
24552
24553 amount = offsets->outgoing_args - offsets->saved_regs;
24554 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24555 if (amount)
24556 {
24557 if (amount < 512)
24558 {
24559 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24560 GEN_INT (- amount)));
24561 RTX_FRAME_RELATED_P (insn) = 1;
24562 }
24563 else
24564 {
24565 rtx reg, dwarf;
24566
24567 /* The stack decrement is too big for an immediate value in a single
24568 insn. In theory we could issue multiple subtracts, but after
24569 three of them it becomes more space efficient to place the full
24570 value in the constant pool and load into a register. (Also the
24571 ARM debugger really likes to see only one stack decrement per
24572 function). So instead we look for a scratch register into which
24573 we can load the decrement, and then we subtract this from the
24574 stack pointer. Unfortunately on the thumb the only available
24575 scratch registers are the argument registers, and we cannot use
24576 these as they may hold arguments to the function. Instead we
24577 attempt to locate a call preserved register which is used by this
24578 function. If we can find one, then we know that it will have
24579 been pushed at the start of the prologue and so we can corrupt
24580 it now. */
24581 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24582 if (live_regs_mask & (1 << regno))
24583 break;
24584
24585 gcc_assert(regno <= LAST_LO_REGNUM);
24586
24587 reg = gen_rtx_REG (SImode, regno);
24588
24589 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24590
24591 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24592 stack_pointer_rtx, reg));
24593
24594 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24595 plus_constant (Pmode, stack_pointer_rtx,
24596 -amount));
24597 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24598 RTX_FRAME_RELATED_P (insn) = 1;
24599 }
24600 }
24601
24602 if (frame_pointer_needed)
24603 thumb_set_frame_pointer (offsets);
24604
24605 /* If we are profiling, make sure no instructions are scheduled before
24606 the call to mcount. Similarly if the user has requested no
24607 scheduling in the prolog. Similarly if we want non-call exceptions
24608 using the EABI unwinder, to prevent faulting instructions from being
24609 swapped with a stack adjustment. */
24610 if (crtl->profile || !TARGET_SCHED_PROLOG
24611 || (arm_except_unwind_info (&global_options) == UI_TARGET
24612 && cfun->can_throw_non_call_exceptions))
24613 emit_insn (gen_blockage ());
24614
24615 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24616 if (live_regs_mask & 0xff)
24617 cfun->machine->lr_save_eliminated = 0;
24618 }
24619
24620 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24621 POP instruction can be generated. LR should be replaced by PC. All
24622 the checks required are already done by USE_RETURN_INSN (). Hence,
24623 all we really need to check here is if single register is to be
24624 returned, or multiple register return. */
24625 void
24626 thumb2_expand_return (bool simple_return)
24627 {
24628 int i, num_regs;
24629 unsigned long saved_regs_mask;
24630 arm_stack_offsets *offsets;
24631
24632 offsets = arm_get_frame_offsets ();
24633 saved_regs_mask = offsets->saved_regs_mask;
24634
24635 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24636 if (saved_regs_mask & (1 << i))
24637 num_regs++;
24638
24639 if (!simple_return && saved_regs_mask)
24640 {
24641 if (num_regs == 1)
24642 {
24643 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24644 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24645 rtx addr = gen_rtx_MEM (SImode,
24646 gen_rtx_POST_INC (SImode,
24647 stack_pointer_rtx));
24648 set_mem_alias_set (addr, get_frame_alias_set ());
24649 XVECEXP (par, 0, 0) = ret_rtx;
24650 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
24651 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24652 emit_jump_insn (par);
24653 }
24654 else
24655 {
24656 saved_regs_mask &= ~ (1 << LR_REGNUM);
24657 saved_regs_mask |= (1 << PC_REGNUM);
24658 arm_emit_multi_reg_pop (saved_regs_mask);
24659 }
24660 }
24661 else
24662 {
24663 emit_jump_insn (simple_return_rtx);
24664 }
24665 }
24666
24667 void
24668 thumb1_expand_epilogue (void)
24669 {
24670 HOST_WIDE_INT amount;
24671 arm_stack_offsets *offsets;
24672 int regno;
24673
24674 /* Naked functions don't have prologues. */
24675 if (IS_NAKED (arm_current_func_type ()))
24676 return;
24677
24678 offsets = arm_get_frame_offsets ();
24679 amount = offsets->outgoing_args - offsets->saved_regs;
24680
24681 if (frame_pointer_needed)
24682 {
24683 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24684 amount = offsets->locals_base - offsets->saved_regs;
24685 }
24686 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24687
24688 gcc_assert (amount >= 0);
24689 if (amount)
24690 {
24691 emit_insn (gen_blockage ());
24692
24693 if (amount < 512)
24694 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24695 GEN_INT (amount)));
24696 else
24697 {
24698 /* r3 is always free in the epilogue. */
24699 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24700
24701 emit_insn (gen_movsi (reg, GEN_INT (amount)));
24702 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24703 }
24704 }
24705
24706 /* Emit a USE (stack_pointer_rtx), so that
24707 the stack adjustment will not be deleted. */
24708 emit_insn (gen_force_register_use (stack_pointer_rtx));
24709
24710 if (crtl->profile || !TARGET_SCHED_PROLOG)
24711 emit_insn (gen_blockage ());
24712
24713 /* Emit a clobber for each insn that will be restored in the epilogue,
24714 so that flow2 will get register lifetimes correct. */
24715 for (regno = 0; regno < 13; regno++)
24716 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24717 emit_clobber (gen_rtx_REG (SImode, regno));
24718
24719 if (! df_regs_ever_live_p (LR_REGNUM))
24720 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24721 }
24722
24723 /* Epilogue code for APCS frame. */
24724 static void
24725 arm_expand_epilogue_apcs_frame (bool really_return)
24726 {
24727 unsigned long func_type;
24728 unsigned long saved_regs_mask;
24729 int num_regs = 0;
24730 int i;
24731 int floats_from_frame = 0;
24732 arm_stack_offsets *offsets;
24733
24734 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24735 func_type = arm_current_func_type ();
24736
24737 /* Get frame offsets for ARM. */
24738 offsets = arm_get_frame_offsets ();
24739 saved_regs_mask = offsets->saved_regs_mask;
24740
24741 /* Find the offset of the floating-point save area in the frame. */
24742 floats_from_frame
24743 = (offsets->saved_args
24744 + arm_compute_static_chain_stack_bytes ()
24745 - offsets->frame);
24746
24747 /* Compute how many core registers saved and how far away the floats are. */
24748 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24749 if (saved_regs_mask & (1 << i))
24750 {
24751 num_regs++;
24752 floats_from_frame += 4;
24753 }
24754
24755 if (TARGET_HARD_FLOAT && TARGET_VFP)
24756 {
24757 int start_reg;
24758 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24759
24760 /* The offset is from IP_REGNUM. */
24761 int saved_size = arm_get_vfp_saved_size ();
24762 if (saved_size > 0)
24763 {
24764 rtx_insn *insn;
24765 floats_from_frame += saved_size;
24766 insn = emit_insn (gen_addsi3 (ip_rtx,
24767 hard_frame_pointer_rtx,
24768 GEN_INT (-floats_from_frame)));
24769 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24770 ip_rtx, hard_frame_pointer_rtx);
24771 }
24772
24773 /* Generate VFP register multi-pop. */
24774 start_reg = FIRST_VFP_REGNUM;
24775
24776 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24777 /* Look for a case where a reg does not need restoring. */
24778 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24779 && (!df_regs_ever_live_p (i + 1)
24780 || call_used_regs[i + 1]))
24781 {
24782 if (start_reg != i)
24783 arm_emit_vfp_multi_reg_pop (start_reg,
24784 (i - start_reg) / 2,
24785 gen_rtx_REG (SImode,
24786 IP_REGNUM));
24787 start_reg = i + 2;
24788 }
24789
24790 /* Restore the remaining regs that we have discovered (or possibly
24791 even all of them, if the conditional in the for loop never
24792 fired). */
24793 if (start_reg != i)
24794 arm_emit_vfp_multi_reg_pop (start_reg,
24795 (i - start_reg) / 2,
24796 gen_rtx_REG (SImode, IP_REGNUM));
24797 }
24798
24799 if (TARGET_IWMMXT)
24800 {
24801 /* The frame pointer is guaranteed to be non-double-word aligned, as
24802 it is set to double-word-aligned old_stack_pointer - 4. */
24803 rtx_insn *insn;
24804 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24805
24806 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24807 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24808 {
24809 rtx addr = gen_frame_mem (V2SImode,
24810 plus_constant (Pmode, hard_frame_pointer_rtx,
24811 - lrm_count * 4));
24812 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24813 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24814 gen_rtx_REG (V2SImode, i),
24815 NULL_RTX);
24816 lrm_count += 2;
24817 }
24818 }
24819
24820 /* saved_regs_mask should contain IP which contains old stack pointer
24821 at the time of activation creation. Since SP and IP are adjacent registers,
24822 we can restore the value directly into SP. */
24823 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24824 saved_regs_mask &= ~(1 << IP_REGNUM);
24825 saved_regs_mask |= (1 << SP_REGNUM);
24826
24827 /* There are two registers left in saved_regs_mask - LR and PC. We
24828 only need to restore LR (the return address), but to
24829 save time we can load it directly into PC, unless we need a
24830 special function exit sequence, or we are not really returning. */
24831 if (really_return
24832 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24833 && !crtl->calls_eh_return)
24834 /* Delete LR from the register mask, so that LR on
24835 the stack is loaded into the PC in the register mask. */
24836 saved_regs_mask &= ~(1 << LR_REGNUM);
24837 else
24838 saved_regs_mask &= ~(1 << PC_REGNUM);
24839
24840 num_regs = bit_count (saved_regs_mask);
24841 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24842 {
24843 rtx_insn *insn;
24844 emit_insn (gen_blockage ());
24845 /* Unwind the stack to just below the saved registers. */
24846 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24847 hard_frame_pointer_rtx,
24848 GEN_INT (- 4 * num_regs)));
24849
24850 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24851 stack_pointer_rtx, hard_frame_pointer_rtx);
24852 }
24853
24854 arm_emit_multi_reg_pop (saved_regs_mask);
24855
24856 if (IS_INTERRUPT (func_type))
24857 {
24858 /* Interrupt handlers will have pushed the
24859 IP onto the stack, so restore it now. */
24860 rtx_insn *insn;
24861 rtx addr = gen_rtx_MEM (SImode,
24862 gen_rtx_POST_INC (SImode,
24863 stack_pointer_rtx));
24864 set_mem_alias_set (addr, get_frame_alias_set ());
24865 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24866 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24867 gen_rtx_REG (SImode, IP_REGNUM),
24868 NULL_RTX);
24869 }
24870
24871 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24872 return;
24873
24874 if (crtl->calls_eh_return)
24875 emit_insn (gen_addsi3 (stack_pointer_rtx,
24876 stack_pointer_rtx,
24877 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24878
24879 if (IS_STACKALIGN (func_type))
24880 /* Restore the original stack pointer. Before prologue, the stack was
24881 realigned and the original stack pointer saved in r0. For details,
24882 see comment in arm_expand_prologue. */
24883 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
24884
24885 emit_jump_insn (simple_return_rtx);
24886 }
24887
24888 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24889 function is not a sibcall. */
24890 void
24891 arm_expand_epilogue (bool really_return)
24892 {
24893 unsigned long func_type;
24894 unsigned long saved_regs_mask;
24895 int num_regs = 0;
24896 int i;
24897 int amount;
24898 arm_stack_offsets *offsets;
24899
24900 func_type = arm_current_func_type ();
24901
24902 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24903 let output_return_instruction take care of instruction emission if any. */
24904 if (IS_NAKED (func_type)
24905 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24906 {
24907 if (really_return)
24908 emit_jump_insn (simple_return_rtx);
24909 return;
24910 }
24911
24912 /* If we are throwing an exception, then we really must be doing a
24913 return, so we can't tail-call. */
24914 gcc_assert (!crtl->calls_eh_return || really_return);
24915
24916 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24917 {
24918 arm_expand_epilogue_apcs_frame (really_return);
24919 return;
24920 }
24921
24922 /* Get frame offsets for ARM. */
24923 offsets = arm_get_frame_offsets ();
24924 saved_regs_mask = offsets->saved_regs_mask;
24925 num_regs = bit_count (saved_regs_mask);
24926
24927 if (frame_pointer_needed)
24928 {
24929 rtx_insn *insn;
24930 /* Restore stack pointer if necessary. */
24931 if (TARGET_ARM)
24932 {
24933 /* In ARM mode, frame pointer points to first saved register.
24934 Restore stack pointer to last saved register. */
24935 amount = offsets->frame - offsets->saved_regs;
24936
24937 /* Force out any pending memory operations that reference stacked data
24938 before stack de-allocation occurs. */
24939 emit_insn (gen_blockage ());
24940 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24941 hard_frame_pointer_rtx,
24942 GEN_INT (amount)));
24943 arm_add_cfa_adjust_cfa_note (insn, amount,
24944 stack_pointer_rtx,
24945 hard_frame_pointer_rtx);
24946
24947 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24948 deleted. */
24949 emit_insn (gen_force_register_use (stack_pointer_rtx));
24950 }
24951 else
24952 {
24953 /* In Thumb-2 mode, the frame pointer points to the last saved
24954 register. */
24955 amount = offsets->locals_base - offsets->saved_regs;
24956 if (amount)
24957 {
24958 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24959 hard_frame_pointer_rtx,
24960 GEN_INT (amount)));
24961 arm_add_cfa_adjust_cfa_note (insn, amount,
24962 hard_frame_pointer_rtx,
24963 hard_frame_pointer_rtx);
24964 }
24965
24966 /* Force out any pending memory operations that reference stacked data
24967 before stack de-allocation occurs. */
24968 emit_insn (gen_blockage ());
24969 insn = emit_insn (gen_movsi (stack_pointer_rtx,
24970 hard_frame_pointer_rtx));
24971 arm_add_cfa_adjust_cfa_note (insn, 0,
24972 stack_pointer_rtx,
24973 hard_frame_pointer_rtx);
24974 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24975 deleted. */
24976 emit_insn (gen_force_register_use (stack_pointer_rtx));
24977 }
24978 }
24979 else
24980 {
24981 /* Pop off outgoing args and local frame to adjust stack pointer to
24982 last saved register. */
24983 amount = offsets->outgoing_args - offsets->saved_regs;
24984 if (amount)
24985 {
24986 rtx_insn *tmp;
24987 /* Force out any pending memory operations that reference stacked data
24988 before stack de-allocation occurs. */
24989 emit_insn (gen_blockage ());
24990 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24991 stack_pointer_rtx,
24992 GEN_INT (amount)));
24993 arm_add_cfa_adjust_cfa_note (tmp, amount,
24994 stack_pointer_rtx, stack_pointer_rtx);
24995 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24996 not deleted. */
24997 emit_insn (gen_force_register_use (stack_pointer_rtx));
24998 }
24999 }
25000
25001 if (TARGET_HARD_FLOAT && TARGET_VFP)
25002 {
25003 /* Generate VFP register multi-pop. */
25004 int end_reg = LAST_VFP_REGNUM + 1;
25005
25006 /* Scan the registers in reverse order. We need to match
25007 any groupings made in the prologue and generate matching
25008 vldm operations. The need to match groups is because,
25009 unlike pop, vldm can only do consecutive regs. */
25010 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25011 /* Look for a case where a reg does not need restoring. */
25012 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25013 && (!df_regs_ever_live_p (i + 1)
25014 || call_used_regs[i + 1]))
25015 {
25016 /* Restore the regs discovered so far (from reg+2 to
25017 end_reg). */
25018 if (end_reg > i + 2)
25019 arm_emit_vfp_multi_reg_pop (i + 2,
25020 (end_reg - (i + 2)) / 2,
25021 stack_pointer_rtx);
25022 end_reg = i;
25023 }
25024
25025 /* Restore the remaining regs that we have discovered (or possibly
25026 even all of them, if the conditional in the for loop never
25027 fired). */
25028 if (end_reg > i + 2)
25029 arm_emit_vfp_multi_reg_pop (i + 2,
25030 (end_reg - (i + 2)) / 2,
25031 stack_pointer_rtx);
25032 }
25033
25034 if (TARGET_IWMMXT)
25035 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25036 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25037 {
25038 rtx_insn *insn;
25039 rtx addr = gen_rtx_MEM (V2SImode,
25040 gen_rtx_POST_INC (SImode,
25041 stack_pointer_rtx));
25042 set_mem_alias_set (addr, get_frame_alias_set ());
25043 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25044 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25045 gen_rtx_REG (V2SImode, i),
25046 NULL_RTX);
25047 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25048 stack_pointer_rtx, stack_pointer_rtx);
25049 }
25050
25051 if (saved_regs_mask)
25052 {
25053 rtx insn;
25054 bool return_in_pc = false;
25055
25056 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25057 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25058 && !IS_STACKALIGN (func_type)
25059 && really_return
25060 && crtl->args.pretend_args_size == 0
25061 && saved_regs_mask & (1 << LR_REGNUM)
25062 && !crtl->calls_eh_return)
25063 {
25064 saved_regs_mask &= ~(1 << LR_REGNUM);
25065 saved_regs_mask |= (1 << PC_REGNUM);
25066 return_in_pc = true;
25067 }
25068
25069 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25070 {
25071 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25072 if (saved_regs_mask & (1 << i))
25073 {
25074 rtx addr = gen_rtx_MEM (SImode,
25075 gen_rtx_POST_INC (SImode,
25076 stack_pointer_rtx));
25077 set_mem_alias_set (addr, get_frame_alias_set ());
25078
25079 if (i == PC_REGNUM)
25080 {
25081 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25082 XVECEXP (insn, 0, 0) = ret_rtx;
25083 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
25084 gen_rtx_REG (SImode, i),
25085 addr);
25086 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25087 insn = emit_jump_insn (insn);
25088 }
25089 else
25090 {
25091 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25092 addr));
25093 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25094 gen_rtx_REG (SImode, i),
25095 NULL_RTX);
25096 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25097 stack_pointer_rtx,
25098 stack_pointer_rtx);
25099 }
25100 }
25101 }
25102 else
25103 {
25104 if (TARGET_LDRD
25105 && current_tune->prefer_ldrd_strd
25106 && !optimize_function_for_size_p (cfun))
25107 {
25108 if (TARGET_THUMB2)
25109 thumb2_emit_ldrd_pop (saved_regs_mask);
25110 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25111 arm_emit_ldrd_pop (saved_regs_mask);
25112 else
25113 arm_emit_multi_reg_pop (saved_regs_mask);
25114 }
25115 else
25116 arm_emit_multi_reg_pop (saved_regs_mask);
25117 }
25118
25119 if (return_in_pc == true)
25120 return;
25121 }
25122
25123 if (crtl->args.pretend_args_size)
25124 {
25125 int i, j;
25126 rtx dwarf = NULL_RTX;
25127 rtx_insn *tmp =
25128 emit_insn (gen_addsi3 (stack_pointer_rtx,
25129 stack_pointer_rtx,
25130 GEN_INT (crtl->args.pretend_args_size)));
25131
25132 RTX_FRAME_RELATED_P (tmp) = 1;
25133
25134 if (cfun->machine->uses_anonymous_args)
25135 {
25136 /* Restore pretend args. Refer arm_expand_prologue on how to save
25137 pretend_args in stack. */
25138 int num_regs = crtl->args.pretend_args_size / 4;
25139 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25140 for (j = 0, i = 0; j < num_regs; i++)
25141 if (saved_regs_mask & (1 << i))
25142 {
25143 rtx reg = gen_rtx_REG (SImode, i);
25144 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25145 j++;
25146 }
25147 REG_NOTES (tmp) = dwarf;
25148 }
25149 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25150 stack_pointer_rtx, stack_pointer_rtx);
25151 }
25152
25153 if (!really_return)
25154 return;
25155
25156 if (crtl->calls_eh_return)
25157 emit_insn (gen_addsi3 (stack_pointer_rtx,
25158 stack_pointer_rtx,
25159 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25160
25161 if (IS_STACKALIGN (func_type))
25162 /* Restore the original stack pointer. Before prologue, the stack was
25163 realigned and the original stack pointer saved in r0. For details,
25164 see comment in arm_expand_prologue. */
25165 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
25166
25167 emit_jump_insn (simple_return_rtx);
25168 }
25169
25170 /* Implementation of insn prologue_thumb1_interwork. This is the first
25171 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25172
25173 const char *
25174 thumb1_output_interwork (void)
25175 {
25176 const char * name;
25177 FILE *f = asm_out_file;
25178
25179 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25180 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25181 == SYMBOL_REF);
25182 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25183
25184 /* Generate code sequence to switch us into Thumb mode. */
25185 /* The .code 32 directive has already been emitted by
25186 ASM_DECLARE_FUNCTION_NAME. */
25187 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25188 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25189
25190 /* Generate a label, so that the debugger will notice the
25191 change in instruction sets. This label is also used by
25192 the assembler to bypass the ARM code when this function
25193 is called from a Thumb encoded function elsewhere in the
25194 same file. Hence the definition of STUB_NAME here must
25195 agree with the definition in gas/config/tc-arm.c. */
25196
25197 #define STUB_NAME ".real_start_of"
25198
25199 fprintf (f, "\t.code\t16\n");
25200 #ifdef ARM_PE
25201 if (arm_dllexport_name_p (name))
25202 name = arm_strip_name_encoding (name);
25203 #endif
25204 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25205 fprintf (f, "\t.thumb_func\n");
25206 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25207
25208 return "";
25209 }
25210
25211 /* Handle the case of a double word load into a low register from
25212 a computed memory address. The computed address may involve a
25213 register which is overwritten by the load. */
25214 const char *
25215 thumb_load_double_from_address (rtx *operands)
25216 {
25217 rtx addr;
25218 rtx base;
25219 rtx offset;
25220 rtx arg1;
25221 rtx arg2;
25222
25223 gcc_assert (REG_P (operands[0]));
25224 gcc_assert (MEM_P (operands[1]));
25225
25226 /* Get the memory address. */
25227 addr = XEXP (operands[1], 0);
25228
25229 /* Work out how the memory address is computed. */
25230 switch (GET_CODE (addr))
25231 {
25232 case REG:
25233 operands[2] = adjust_address (operands[1], SImode, 4);
25234
25235 if (REGNO (operands[0]) == REGNO (addr))
25236 {
25237 output_asm_insn ("ldr\t%H0, %2", operands);
25238 output_asm_insn ("ldr\t%0, %1", operands);
25239 }
25240 else
25241 {
25242 output_asm_insn ("ldr\t%0, %1", operands);
25243 output_asm_insn ("ldr\t%H0, %2", operands);
25244 }
25245 break;
25246
25247 case CONST:
25248 /* Compute <address> + 4 for the high order load. */
25249 operands[2] = adjust_address (operands[1], SImode, 4);
25250
25251 output_asm_insn ("ldr\t%0, %1", operands);
25252 output_asm_insn ("ldr\t%H0, %2", operands);
25253 break;
25254
25255 case PLUS:
25256 arg1 = XEXP (addr, 0);
25257 arg2 = XEXP (addr, 1);
25258
25259 if (CONSTANT_P (arg1))
25260 base = arg2, offset = arg1;
25261 else
25262 base = arg1, offset = arg2;
25263
25264 gcc_assert (REG_P (base));
25265
25266 /* Catch the case of <address> = <reg> + <reg> */
25267 if (REG_P (offset))
25268 {
25269 int reg_offset = REGNO (offset);
25270 int reg_base = REGNO (base);
25271 int reg_dest = REGNO (operands[0]);
25272
25273 /* Add the base and offset registers together into the
25274 higher destination register. */
25275 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25276 reg_dest + 1, reg_base, reg_offset);
25277
25278 /* Load the lower destination register from the address in
25279 the higher destination register. */
25280 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25281 reg_dest, reg_dest + 1);
25282
25283 /* Load the higher destination register from its own address
25284 plus 4. */
25285 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25286 reg_dest + 1, reg_dest + 1);
25287 }
25288 else
25289 {
25290 /* Compute <address> + 4 for the high order load. */
25291 operands[2] = adjust_address (operands[1], SImode, 4);
25292
25293 /* If the computed address is held in the low order register
25294 then load the high order register first, otherwise always
25295 load the low order register first. */
25296 if (REGNO (operands[0]) == REGNO (base))
25297 {
25298 output_asm_insn ("ldr\t%H0, %2", operands);
25299 output_asm_insn ("ldr\t%0, %1", operands);
25300 }
25301 else
25302 {
25303 output_asm_insn ("ldr\t%0, %1", operands);
25304 output_asm_insn ("ldr\t%H0, %2", operands);
25305 }
25306 }
25307 break;
25308
25309 case LABEL_REF:
25310 /* With no registers to worry about we can just load the value
25311 directly. */
25312 operands[2] = adjust_address (operands[1], SImode, 4);
25313
25314 output_asm_insn ("ldr\t%H0, %2", operands);
25315 output_asm_insn ("ldr\t%0, %1", operands);
25316 break;
25317
25318 default:
25319 gcc_unreachable ();
25320 }
25321
25322 return "";
25323 }
25324
25325 const char *
25326 thumb_output_move_mem_multiple (int n, rtx *operands)
25327 {
25328 rtx tmp;
25329
25330 switch (n)
25331 {
25332 case 2:
25333 if (REGNO (operands[4]) > REGNO (operands[5]))
25334 {
25335 tmp = operands[4];
25336 operands[4] = operands[5];
25337 operands[5] = tmp;
25338 }
25339 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25340 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25341 break;
25342
25343 case 3:
25344 if (REGNO (operands[4]) > REGNO (operands[5]))
25345 std::swap (operands[4], operands[5]);
25346 if (REGNO (operands[5]) > REGNO (operands[6]))
25347 std::swap (operands[5], operands[6]);
25348 if (REGNO (operands[4]) > REGNO (operands[5]))
25349 std::swap (operands[4], operands[5]);
25350
25351 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25352 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25353 break;
25354
25355 default:
25356 gcc_unreachable ();
25357 }
25358
25359 return "";
25360 }
25361
25362 /* Output a call-via instruction for thumb state. */
25363 const char *
25364 thumb_call_via_reg (rtx reg)
25365 {
25366 int regno = REGNO (reg);
25367 rtx *labelp;
25368
25369 gcc_assert (regno < LR_REGNUM);
25370
25371 /* If we are in the normal text section we can use a single instance
25372 per compilation unit. If we are doing function sections, then we need
25373 an entry per section, since we can't rely on reachability. */
25374 if (in_section == text_section)
25375 {
25376 thumb_call_reg_needed = 1;
25377
25378 if (thumb_call_via_label[regno] == NULL)
25379 thumb_call_via_label[regno] = gen_label_rtx ();
25380 labelp = thumb_call_via_label + regno;
25381 }
25382 else
25383 {
25384 if (cfun->machine->call_via[regno] == NULL)
25385 cfun->machine->call_via[regno] = gen_label_rtx ();
25386 labelp = cfun->machine->call_via + regno;
25387 }
25388
25389 output_asm_insn ("bl\t%a0", labelp);
25390 return "";
25391 }
25392
25393 /* Routines for generating rtl. */
25394 void
25395 thumb_expand_movmemqi (rtx *operands)
25396 {
25397 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25398 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25399 HOST_WIDE_INT len = INTVAL (operands[2]);
25400 HOST_WIDE_INT offset = 0;
25401
25402 while (len >= 12)
25403 {
25404 emit_insn (gen_movmem12b (out, in, out, in));
25405 len -= 12;
25406 }
25407
25408 if (len >= 8)
25409 {
25410 emit_insn (gen_movmem8b (out, in, out, in));
25411 len -= 8;
25412 }
25413
25414 if (len >= 4)
25415 {
25416 rtx reg = gen_reg_rtx (SImode);
25417 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25418 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25419 len -= 4;
25420 offset += 4;
25421 }
25422
25423 if (len >= 2)
25424 {
25425 rtx reg = gen_reg_rtx (HImode);
25426 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25427 plus_constant (Pmode, in,
25428 offset))));
25429 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25430 offset)),
25431 reg));
25432 len -= 2;
25433 offset += 2;
25434 }
25435
25436 if (len)
25437 {
25438 rtx reg = gen_reg_rtx (QImode);
25439 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25440 plus_constant (Pmode, in,
25441 offset))));
25442 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25443 offset)),
25444 reg));
25445 }
25446 }
25447
25448 void
25449 thumb_reload_out_hi (rtx *operands)
25450 {
25451 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25452 }
25453
25454 /* Handle reading a half-word from memory during reload. */
25455 void
25456 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25457 {
25458 gcc_unreachable ();
25459 }
25460
25461 /* Return the length of a function name prefix
25462 that starts with the character 'c'. */
25463 static int
25464 arm_get_strip_length (int c)
25465 {
25466 switch (c)
25467 {
25468 ARM_NAME_ENCODING_LENGTHS
25469 default: return 0;
25470 }
25471 }
25472
25473 /* Return a pointer to a function's name with any
25474 and all prefix encodings stripped from it. */
25475 const char *
25476 arm_strip_name_encoding (const char *name)
25477 {
25478 int skip;
25479
25480 while ((skip = arm_get_strip_length (* name)))
25481 name += skip;
25482
25483 return name;
25484 }
25485
25486 /* If there is a '*' anywhere in the name's prefix, then
25487 emit the stripped name verbatim, otherwise prepend an
25488 underscore if leading underscores are being used. */
25489 void
25490 arm_asm_output_labelref (FILE *stream, const char *name)
25491 {
25492 int skip;
25493 int verbatim = 0;
25494
25495 while ((skip = arm_get_strip_length (* name)))
25496 {
25497 verbatim |= (*name == '*');
25498 name += skip;
25499 }
25500
25501 if (verbatim)
25502 fputs (name, stream);
25503 else
25504 asm_fprintf (stream, "%U%s", name);
25505 }
25506
25507 /* This function is used to emit an EABI tag and its associated value.
25508 We emit the numerical value of the tag in case the assembler does not
25509 support textual tags. (Eg gas prior to 2.20). If requested we include
25510 the tag name in a comment so that anyone reading the assembler output
25511 will know which tag is being set.
25512
25513 This function is not static because arm-c.c needs it too. */
25514
25515 void
25516 arm_emit_eabi_attribute (const char *name, int num, int val)
25517 {
25518 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25519 if (flag_verbose_asm || flag_debug_asm)
25520 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25521 asm_fprintf (asm_out_file, "\n");
25522 }
25523
25524 static void
25525 arm_file_start (void)
25526 {
25527 int val;
25528
25529 if (TARGET_UNIFIED_ASM)
25530 asm_fprintf (asm_out_file, "\t.syntax unified\n");
25531
25532 if (TARGET_BPABI)
25533 {
25534 const char *fpu_name;
25535 if (arm_selected_arch)
25536 {
25537 /* armv7ve doesn't support any extensions. */
25538 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25539 {
25540 /* Keep backward compatability for assemblers
25541 which don't support armv7ve. */
25542 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25543 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25544 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25545 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25546 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25547 }
25548 else
25549 {
25550 const char* pos = strchr (arm_selected_arch->name, '+');
25551 if (pos)
25552 {
25553 char buf[15];
25554 gcc_assert (strlen (arm_selected_arch->name)
25555 <= sizeof (buf) / sizeof (*pos));
25556 strncpy (buf, arm_selected_arch->name,
25557 (pos - arm_selected_arch->name) * sizeof (*pos));
25558 buf[pos - arm_selected_arch->name] = '\0';
25559 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25560 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25561 }
25562 else
25563 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25564 }
25565 }
25566 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25567 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25568 else
25569 {
25570 const char* truncated_name
25571 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25572 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25573 }
25574
25575 if (TARGET_SOFT_FLOAT)
25576 {
25577 fpu_name = "softvfp";
25578 }
25579 else
25580 {
25581 fpu_name = arm_fpu_desc->name;
25582 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25583 {
25584 if (TARGET_HARD_FLOAT)
25585 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
25586 if (TARGET_HARD_FLOAT_ABI)
25587 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25588 }
25589 }
25590 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25591
25592 /* Some of these attributes only apply when the corresponding features
25593 are used. However we don't have any easy way of figuring this out.
25594 Conservatively record the setting that would have been used. */
25595
25596 if (flag_rounding_math)
25597 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25598
25599 if (!flag_unsafe_math_optimizations)
25600 {
25601 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25602 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25603 }
25604 if (flag_signaling_nans)
25605 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25606
25607 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25608 flag_finite_math_only ? 1 : 3);
25609
25610 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25611 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25612 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25613 flag_short_enums ? 1 : 2);
25614
25615 /* Tag_ABI_optimization_goals. */
25616 if (optimize_size)
25617 val = 4;
25618 else if (optimize >= 2)
25619 val = 2;
25620 else if (optimize)
25621 val = 1;
25622 else
25623 val = 6;
25624 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25625
25626 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25627 unaligned_access);
25628
25629 if (arm_fp16_format)
25630 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25631 (int) arm_fp16_format);
25632
25633 if (arm_lang_output_object_attributes_hook)
25634 arm_lang_output_object_attributes_hook();
25635 }
25636
25637 default_file_start ();
25638 }
25639
25640 static void
25641 arm_file_end (void)
25642 {
25643 int regno;
25644
25645 if (NEED_INDICATE_EXEC_STACK)
25646 /* Add .note.GNU-stack. */
25647 file_end_indicate_exec_stack ();
25648
25649 if (! thumb_call_reg_needed)
25650 return;
25651
25652 switch_to_section (text_section);
25653 asm_fprintf (asm_out_file, "\t.code 16\n");
25654 ASM_OUTPUT_ALIGN (asm_out_file, 1);
25655
25656 for (regno = 0; regno < LR_REGNUM; regno++)
25657 {
25658 rtx label = thumb_call_via_label[regno];
25659
25660 if (label != 0)
25661 {
25662 targetm.asm_out.internal_label (asm_out_file, "L",
25663 CODE_LABEL_NUMBER (label));
25664 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25665 }
25666 }
25667 }
25668
25669 #ifndef ARM_PE
25670 /* Symbols in the text segment can be accessed without indirecting via the
25671 constant pool; it may take an extra binary operation, but this is still
25672 faster than indirecting via memory. Don't do this when not optimizing,
25673 since we won't be calculating al of the offsets necessary to do this
25674 simplification. */
25675
25676 static void
25677 arm_encode_section_info (tree decl, rtx rtl, int first)
25678 {
25679 if (optimize > 0 && TREE_CONSTANT (decl))
25680 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25681
25682 default_encode_section_info (decl, rtl, first);
25683 }
25684 #endif /* !ARM_PE */
25685
25686 static void
25687 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25688 {
25689 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25690 && !strcmp (prefix, "L"))
25691 {
25692 arm_ccfsm_state = 0;
25693 arm_target_insn = NULL;
25694 }
25695 default_internal_label (stream, prefix, labelno);
25696 }
25697
25698 /* Output code to add DELTA to the first argument, and then jump
25699 to FUNCTION. Used for C++ multiple inheritance. */
25700 static void
25701 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25702 HOST_WIDE_INT delta,
25703 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25704 tree function)
25705 {
25706 static int thunk_label = 0;
25707 char label[256];
25708 char labelpc[256];
25709 int mi_delta = delta;
25710 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25711 int shift = 0;
25712 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25713 ? 1 : 0);
25714 if (mi_delta < 0)
25715 mi_delta = - mi_delta;
25716
25717 final_start_function (emit_barrier (), file, 1);
25718
25719 if (TARGET_THUMB1)
25720 {
25721 int labelno = thunk_label++;
25722 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25723 /* Thunks are entered in arm mode when avaiable. */
25724 if (TARGET_THUMB1_ONLY)
25725 {
25726 /* push r3 so we can use it as a temporary. */
25727 /* TODO: Omit this save if r3 is not used. */
25728 fputs ("\tpush {r3}\n", file);
25729 fputs ("\tldr\tr3, ", file);
25730 }
25731 else
25732 {
25733 fputs ("\tldr\tr12, ", file);
25734 }
25735 assemble_name (file, label);
25736 fputc ('\n', file);
25737 if (flag_pic)
25738 {
25739 /* If we are generating PIC, the ldr instruction below loads
25740 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25741 the address of the add + 8, so we have:
25742
25743 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25744 = target + 1.
25745
25746 Note that we have "+ 1" because some versions of GNU ld
25747 don't set the low bit of the result for R_ARM_REL32
25748 relocations against thumb function symbols.
25749 On ARMv6M this is +4, not +8. */
25750 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25751 assemble_name (file, labelpc);
25752 fputs (":\n", file);
25753 if (TARGET_THUMB1_ONLY)
25754 {
25755 /* This is 2 insns after the start of the thunk, so we know it
25756 is 4-byte aligned. */
25757 fputs ("\tadd\tr3, pc, r3\n", file);
25758 fputs ("\tmov r12, r3\n", file);
25759 }
25760 else
25761 fputs ("\tadd\tr12, pc, r12\n", file);
25762 }
25763 else if (TARGET_THUMB1_ONLY)
25764 fputs ("\tmov r12, r3\n", file);
25765 }
25766 if (TARGET_THUMB1_ONLY)
25767 {
25768 if (mi_delta > 255)
25769 {
25770 fputs ("\tldr\tr3, ", file);
25771 assemble_name (file, label);
25772 fputs ("+4\n", file);
25773 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25774 mi_op, this_regno, this_regno);
25775 }
25776 else if (mi_delta != 0)
25777 {
25778 /* Thumb1 unified syntax requires s suffix in instruction name when
25779 one of the operands is immediate. */
25780 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25781 mi_op, this_regno, this_regno,
25782 mi_delta);
25783 }
25784 }
25785 else
25786 {
25787 /* TODO: Use movw/movt for large constants when available. */
25788 while (mi_delta != 0)
25789 {
25790 if ((mi_delta & (3 << shift)) == 0)
25791 shift += 2;
25792 else
25793 {
25794 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25795 mi_op, this_regno, this_regno,
25796 mi_delta & (0xff << shift));
25797 mi_delta &= ~(0xff << shift);
25798 shift += 8;
25799 }
25800 }
25801 }
25802 if (TARGET_THUMB1)
25803 {
25804 if (TARGET_THUMB1_ONLY)
25805 fputs ("\tpop\t{r3}\n", file);
25806
25807 fprintf (file, "\tbx\tr12\n");
25808 ASM_OUTPUT_ALIGN (file, 2);
25809 assemble_name (file, label);
25810 fputs (":\n", file);
25811 if (flag_pic)
25812 {
25813 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25814 rtx tem = XEXP (DECL_RTL (function), 0);
25815 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25816 pipeline offset is four rather than eight. Adjust the offset
25817 accordingly. */
25818 tem = plus_constant (GET_MODE (tem), tem,
25819 TARGET_THUMB1_ONLY ? -3 : -7);
25820 tem = gen_rtx_MINUS (GET_MODE (tem),
25821 tem,
25822 gen_rtx_SYMBOL_REF (Pmode,
25823 ggc_strdup (labelpc)));
25824 assemble_integer (tem, 4, BITS_PER_WORD, 1);
25825 }
25826 else
25827 /* Output ".word .LTHUNKn". */
25828 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25829
25830 if (TARGET_THUMB1_ONLY && mi_delta > 255)
25831 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25832 }
25833 else
25834 {
25835 fputs ("\tb\t", file);
25836 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25837 if (NEED_PLT_RELOC)
25838 fputs ("(PLT)", file);
25839 fputc ('\n', file);
25840 }
25841
25842 final_end_function ();
25843 }
25844
25845 int
25846 arm_emit_vector_const (FILE *file, rtx x)
25847 {
25848 int i;
25849 const char * pattern;
25850
25851 gcc_assert (GET_CODE (x) == CONST_VECTOR);
25852
25853 switch (GET_MODE (x))
25854 {
25855 case V2SImode: pattern = "%08x"; break;
25856 case V4HImode: pattern = "%04x"; break;
25857 case V8QImode: pattern = "%02x"; break;
25858 default: gcc_unreachable ();
25859 }
25860
25861 fprintf (file, "0x");
25862 for (i = CONST_VECTOR_NUNITS (x); i--;)
25863 {
25864 rtx element;
25865
25866 element = CONST_VECTOR_ELT (x, i);
25867 fprintf (file, pattern, INTVAL (element));
25868 }
25869
25870 return 1;
25871 }
25872
25873 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25874 HFmode constant pool entries are actually loaded with ldr. */
25875 void
25876 arm_emit_fp16_const (rtx c)
25877 {
25878 REAL_VALUE_TYPE r;
25879 long bits;
25880
25881 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25882 bits = real_to_target (NULL, &r, HFmode);
25883 if (WORDS_BIG_ENDIAN)
25884 assemble_zeros (2);
25885 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25886 if (!WORDS_BIG_ENDIAN)
25887 assemble_zeros (2);
25888 }
25889
25890 const char *
25891 arm_output_load_gr (rtx *operands)
25892 {
25893 rtx reg;
25894 rtx offset;
25895 rtx wcgr;
25896 rtx sum;
25897
25898 if (!MEM_P (operands [1])
25899 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25900 || !REG_P (reg = XEXP (sum, 0))
25901 || !CONST_INT_P (offset = XEXP (sum, 1))
25902 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25903 return "wldrw%?\t%0, %1";
25904
25905 /* Fix up an out-of-range load of a GR register. */
25906 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25907 wcgr = operands[0];
25908 operands[0] = reg;
25909 output_asm_insn ("ldr%?\t%0, %1", operands);
25910
25911 operands[0] = wcgr;
25912 operands[1] = reg;
25913 output_asm_insn ("tmcr%?\t%0, %1", operands);
25914 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25915
25916 return "";
25917 }
25918
25919 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25920
25921 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25922 named arg and all anonymous args onto the stack.
25923 XXX I know the prologue shouldn't be pushing registers, but it is faster
25924 that way. */
25925
25926 static void
25927 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25928 machine_mode mode,
25929 tree type,
25930 int *pretend_size,
25931 int second_time ATTRIBUTE_UNUSED)
25932 {
25933 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25934 int nregs;
25935
25936 cfun->machine->uses_anonymous_args = 1;
25937 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
25938 {
25939 nregs = pcum->aapcs_ncrn;
25940 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
25941 nregs++;
25942 }
25943 else
25944 nregs = pcum->nregs;
25945
25946 if (nregs < NUM_ARG_REGS)
25947 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
25948 }
25949
25950 /* We can't rely on the caller doing the proper promotion when
25951 using APCS or ATPCS. */
25952
25953 static bool
25954 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
25955 {
25956 return !TARGET_AAPCS_BASED;
25957 }
25958
25959 static machine_mode
25960 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
25961 machine_mode mode,
25962 int *punsignedp ATTRIBUTE_UNUSED,
25963 const_tree fntype ATTRIBUTE_UNUSED,
25964 int for_return ATTRIBUTE_UNUSED)
25965 {
25966 if (GET_MODE_CLASS (mode) == MODE_INT
25967 && GET_MODE_SIZE (mode) < 4)
25968 return SImode;
25969
25970 return mode;
25971 }
25972
25973 /* AAPCS based ABIs use short enums by default. */
25974
25975 static bool
25976 arm_default_short_enums (void)
25977 {
25978 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
25979 }
25980
25981
25982 /* AAPCS requires that anonymous bitfields affect structure alignment. */
25983
25984 static bool
25985 arm_align_anon_bitfield (void)
25986 {
25987 return TARGET_AAPCS_BASED;
25988 }
25989
25990
25991 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
25992
25993 static tree
25994 arm_cxx_guard_type (void)
25995 {
25996 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
25997 }
25998
25999
26000 /* The EABI says test the least significant bit of a guard variable. */
26001
26002 static bool
26003 arm_cxx_guard_mask_bit (void)
26004 {
26005 return TARGET_AAPCS_BASED;
26006 }
26007
26008
26009 /* The EABI specifies that all array cookies are 8 bytes long. */
26010
26011 static tree
26012 arm_get_cookie_size (tree type)
26013 {
26014 tree size;
26015
26016 if (!TARGET_AAPCS_BASED)
26017 return default_cxx_get_cookie_size (type);
26018
26019 size = build_int_cst (sizetype, 8);
26020 return size;
26021 }
26022
26023
26024 /* The EABI says that array cookies should also contain the element size. */
26025
26026 static bool
26027 arm_cookie_has_size (void)
26028 {
26029 return TARGET_AAPCS_BASED;
26030 }
26031
26032
26033 /* The EABI says constructors and destructors should return a pointer to
26034 the object constructed/destroyed. */
26035
26036 static bool
26037 arm_cxx_cdtor_returns_this (void)
26038 {
26039 return TARGET_AAPCS_BASED;
26040 }
26041
26042 /* The EABI says that an inline function may never be the key
26043 method. */
26044
26045 static bool
26046 arm_cxx_key_method_may_be_inline (void)
26047 {
26048 return !TARGET_AAPCS_BASED;
26049 }
26050
26051 static void
26052 arm_cxx_determine_class_data_visibility (tree decl)
26053 {
26054 if (!TARGET_AAPCS_BASED
26055 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26056 return;
26057
26058 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26059 is exported. However, on systems without dynamic vague linkage,
26060 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26061 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26062 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26063 else
26064 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26065 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26066 }
26067
26068 static bool
26069 arm_cxx_class_data_always_comdat (void)
26070 {
26071 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26072 vague linkage if the class has no key function. */
26073 return !TARGET_AAPCS_BASED;
26074 }
26075
26076
26077 /* The EABI says __aeabi_atexit should be used to register static
26078 destructors. */
26079
26080 static bool
26081 arm_cxx_use_aeabi_atexit (void)
26082 {
26083 return TARGET_AAPCS_BASED;
26084 }
26085
26086
26087 void
26088 arm_set_return_address (rtx source, rtx scratch)
26089 {
26090 arm_stack_offsets *offsets;
26091 HOST_WIDE_INT delta;
26092 rtx addr;
26093 unsigned long saved_regs;
26094
26095 offsets = arm_get_frame_offsets ();
26096 saved_regs = offsets->saved_regs_mask;
26097
26098 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26099 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26100 else
26101 {
26102 if (frame_pointer_needed)
26103 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26104 else
26105 {
26106 /* LR will be the first saved register. */
26107 delta = offsets->outgoing_args - (offsets->frame + 4);
26108
26109
26110 if (delta >= 4096)
26111 {
26112 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26113 GEN_INT (delta & ~4095)));
26114 addr = scratch;
26115 delta &= 4095;
26116 }
26117 else
26118 addr = stack_pointer_rtx;
26119
26120 addr = plus_constant (Pmode, addr, delta);
26121 }
26122 /* The store needs to be marked as frame related in order to prevent
26123 DSE from deleting it as dead if it is based on fp. */
26124 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26125 RTX_FRAME_RELATED_P (insn) = 1;
26126 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26127 }
26128 }
26129
26130
26131 void
26132 thumb_set_return_address (rtx source, rtx scratch)
26133 {
26134 arm_stack_offsets *offsets;
26135 HOST_WIDE_INT delta;
26136 HOST_WIDE_INT limit;
26137 int reg;
26138 rtx addr;
26139 unsigned long mask;
26140
26141 emit_use (source);
26142
26143 offsets = arm_get_frame_offsets ();
26144 mask = offsets->saved_regs_mask;
26145 if (mask & (1 << LR_REGNUM))
26146 {
26147 limit = 1024;
26148 /* Find the saved regs. */
26149 if (frame_pointer_needed)
26150 {
26151 delta = offsets->soft_frame - offsets->saved_args;
26152 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26153 if (TARGET_THUMB1)
26154 limit = 128;
26155 }
26156 else
26157 {
26158 delta = offsets->outgoing_args - offsets->saved_args;
26159 reg = SP_REGNUM;
26160 }
26161 /* Allow for the stack frame. */
26162 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26163 delta -= 16;
26164 /* The link register is always the first saved register. */
26165 delta -= 4;
26166
26167 /* Construct the address. */
26168 addr = gen_rtx_REG (SImode, reg);
26169 if (delta > limit)
26170 {
26171 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26172 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26173 addr = scratch;
26174 }
26175 else
26176 addr = plus_constant (Pmode, addr, delta);
26177
26178 /* The store needs to be marked as frame related in order to prevent
26179 DSE from deleting it as dead if it is based on fp. */
26180 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26181 RTX_FRAME_RELATED_P (insn) = 1;
26182 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26183 }
26184 else
26185 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26186 }
26187
26188 /* Implements target hook vector_mode_supported_p. */
26189 bool
26190 arm_vector_mode_supported_p (machine_mode mode)
26191 {
26192 /* Neon also supports V2SImode, etc. listed in the clause below. */
26193 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26194 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26195 return true;
26196
26197 if ((TARGET_NEON || TARGET_IWMMXT)
26198 && ((mode == V2SImode)
26199 || (mode == V4HImode)
26200 || (mode == V8QImode)))
26201 return true;
26202
26203 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26204 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26205 || mode == V2HAmode))
26206 return true;
26207
26208 return false;
26209 }
26210
26211 /* Implements target hook array_mode_supported_p. */
26212
26213 static bool
26214 arm_array_mode_supported_p (machine_mode mode,
26215 unsigned HOST_WIDE_INT nelems)
26216 {
26217 if (TARGET_NEON
26218 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26219 && (nelems >= 2 && nelems <= 4))
26220 return true;
26221
26222 return false;
26223 }
26224
26225 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26226 registers when autovectorizing for Neon, at least until multiple vector
26227 widths are supported properly by the middle-end. */
26228
26229 static machine_mode
26230 arm_preferred_simd_mode (machine_mode mode)
26231 {
26232 if (TARGET_NEON)
26233 switch (mode)
26234 {
26235 case SFmode:
26236 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26237 case SImode:
26238 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26239 case HImode:
26240 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26241 case QImode:
26242 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26243 case DImode:
26244 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26245 return V2DImode;
26246 break;
26247
26248 default:;
26249 }
26250
26251 if (TARGET_REALLY_IWMMXT)
26252 switch (mode)
26253 {
26254 case SImode:
26255 return V2SImode;
26256 case HImode:
26257 return V4HImode;
26258 case QImode:
26259 return V8QImode;
26260
26261 default:;
26262 }
26263
26264 return word_mode;
26265 }
26266
26267 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26268
26269 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26270 using r0-r4 for function arguments, r7 for the stack frame and don't have
26271 enough left over to do doubleword arithmetic. For Thumb-2 all the
26272 potentially problematic instructions accept high registers so this is not
26273 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26274 that require many low registers. */
26275 static bool
26276 arm_class_likely_spilled_p (reg_class_t rclass)
26277 {
26278 if ((TARGET_THUMB1 && rclass == LO_REGS)
26279 || rclass == CC_REG)
26280 return true;
26281
26282 return false;
26283 }
26284
26285 /* Implements target hook small_register_classes_for_mode_p. */
26286 bool
26287 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26288 {
26289 return TARGET_THUMB1;
26290 }
26291
26292 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26293 ARM insns and therefore guarantee that the shift count is modulo 256.
26294 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26295 guarantee no particular behavior for out-of-range counts. */
26296
26297 static unsigned HOST_WIDE_INT
26298 arm_shift_truncation_mask (machine_mode mode)
26299 {
26300 return mode == SImode ? 255 : 0;
26301 }
26302
26303
26304 /* Map internal gcc register numbers to DWARF2 register numbers. */
26305
26306 unsigned int
26307 arm_dbx_register_number (unsigned int regno)
26308 {
26309 if (regno < 16)
26310 return regno;
26311
26312 if (IS_VFP_REGNUM (regno))
26313 {
26314 /* See comment in arm_dwarf_register_span. */
26315 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26316 return 64 + regno - FIRST_VFP_REGNUM;
26317 else
26318 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26319 }
26320
26321 if (IS_IWMMXT_GR_REGNUM (regno))
26322 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26323
26324 if (IS_IWMMXT_REGNUM (regno))
26325 return 112 + regno - FIRST_IWMMXT_REGNUM;
26326
26327 gcc_unreachable ();
26328 }
26329
26330 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26331 GCC models tham as 64 32-bit registers, so we need to describe this to
26332 the DWARF generation code. Other registers can use the default. */
26333 static rtx
26334 arm_dwarf_register_span (rtx rtl)
26335 {
26336 machine_mode mode;
26337 unsigned regno;
26338 rtx parts[16];
26339 int nregs;
26340 int i;
26341
26342 regno = REGNO (rtl);
26343 if (!IS_VFP_REGNUM (regno))
26344 return NULL_RTX;
26345
26346 /* XXX FIXME: The EABI defines two VFP register ranges:
26347 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26348 256-287: D0-D31
26349 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26350 corresponding D register. Until GDB supports this, we shall use the
26351 legacy encodings. We also use these encodings for D0-D15 for
26352 compatibility with older debuggers. */
26353 mode = GET_MODE (rtl);
26354 if (GET_MODE_SIZE (mode) < 8)
26355 return NULL_RTX;
26356
26357 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26358 {
26359 nregs = GET_MODE_SIZE (mode) / 4;
26360 for (i = 0; i < nregs; i += 2)
26361 if (TARGET_BIG_END)
26362 {
26363 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26364 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26365 }
26366 else
26367 {
26368 parts[i] = gen_rtx_REG (SImode, regno + i);
26369 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26370 }
26371 }
26372 else
26373 {
26374 nregs = GET_MODE_SIZE (mode) / 8;
26375 for (i = 0; i < nregs; i++)
26376 parts[i] = gen_rtx_REG (DImode, regno + i);
26377 }
26378
26379 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26380 }
26381
26382 #if ARM_UNWIND_INFO
26383 /* Emit unwind directives for a store-multiple instruction or stack pointer
26384 push during alignment.
26385 These should only ever be generated by the function prologue code, so
26386 expect them to have a particular form.
26387 The store-multiple instruction sometimes pushes pc as the last register,
26388 although it should not be tracked into unwind information, or for -Os
26389 sometimes pushes some dummy registers before first register that needs
26390 to be tracked in unwind information; such dummy registers are there just
26391 to avoid separate stack adjustment, and will not be restored in the
26392 epilogue. */
26393
26394 static void
26395 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26396 {
26397 int i;
26398 HOST_WIDE_INT offset;
26399 HOST_WIDE_INT nregs;
26400 int reg_size;
26401 unsigned reg;
26402 unsigned lastreg;
26403 unsigned padfirst = 0, padlast = 0;
26404 rtx e;
26405
26406 e = XVECEXP (p, 0, 0);
26407 gcc_assert (GET_CODE (e) == SET);
26408
26409 /* First insn will adjust the stack pointer. */
26410 gcc_assert (GET_CODE (e) == SET
26411 && REG_P (SET_DEST (e))
26412 && REGNO (SET_DEST (e)) == SP_REGNUM
26413 && GET_CODE (SET_SRC (e)) == PLUS);
26414
26415 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26416 nregs = XVECLEN (p, 0) - 1;
26417 gcc_assert (nregs);
26418
26419 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26420 if (reg < 16)
26421 {
26422 /* For -Os dummy registers can be pushed at the beginning to
26423 avoid separate stack pointer adjustment. */
26424 e = XVECEXP (p, 0, 1);
26425 e = XEXP (SET_DEST (e), 0);
26426 if (GET_CODE (e) == PLUS)
26427 padfirst = INTVAL (XEXP (e, 1));
26428 gcc_assert (padfirst == 0 || optimize_size);
26429 /* The function prologue may also push pc, but not annotate it as it is
26430 never restored. We turn this into a stack pointer adjustment. */
26431 e = XVECEXP (p, 0, nregs);
26432 e = XEXP (SET_DEST (e), 0);
26433 if (GET_CODE (e) == PLUS)
26434 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26435 else
26436 padlast = offset - 4;
26437 gcc_assert (padlast == 0 || padlast == 4);
26438 if (padlast == 4)
26439 fprintf (asm_out_file, "\t.pad #4\n");
26440 reg_size = 4;
26441 fprintf (asm_out_file, "\t.save {");
26442 }
26443 else if (IS_VFP_REGNUM (reg))
26444 {
26445 reg_size = 8;
26446 fprintf (asm_out_file, "\t.vsave {");
26447 }
26448 else
26449 /* Unknown register type. */
26450 gcc_unreachable ();
26451
26452 /* If the stack increment doesn't match the size of the saved registers,
26453 something has gone horribly wrong. */
26454 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26455
26456 offset = padfirst;
26457 lastreg = 0;
26458 /* The remaining insns will describe the stores. */
26459 for (i = 1; i <= nregs; i++)
26460 {
26461 /* Expect (set (mem <addr>) (reg)).
26462 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26463 e = XVECEXP (p, 0, i);
26464 gcc_assert (GET_CODE (e) == SET
26465 && MEM_P (SET_DEST (e))
26466 && REG_P (SET_SRC (e)));
26467
26468 reg = REGNO (SET_SRC (e));
26469 gcc_assert (reg >= lastreg);
26470
26471 if (i != 1)
26472 fprintf (asm_out_file, ", ");
26473 /* We can't use %r for vfp because we need to use the
26474 double precision register names. */
26475 if (IS_VFP_REGNUM (reg))
26476 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26477 else
26478 asm_fprintf (asm_out_file, "%r", reg);
26479
26480 #ifdef ENABLE_CHECKING
26481 /* Check that the addresses are consecutive. */
26482 e = XEXP (SET_DEST (e), 0);
26483 if (GET_CODE (e) == PLUS)
26484 gcc_assert (REG_P (XEXP (e, 0))
26485 && REGNO (XEXP (e, 0)) == SP_REGNUM
26486 && CONST_INT_P (XEXP (e, 1))
26487 && offset == INTVAL (XEXP (e, 1)));
26488 else
26489 gcc_assert (i == 1
26490 && REG_P (e)
26491 && REGNO (e) == SP_REGNUM);
26492 offset += reg_size;
26493 #endif
26494 }
26495 fprintf (asm_out_file, "}\n");
26496 if (padfirst)
26497 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26498 }
26499
26500 /* Emit unwind directives for a SET. */
26501
26502 static void
26503 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26504 {
26505 rtx e0;
26506 rtx e1;
26507 unsigned reg;
26508
26509 e0 = XEXP (p, 0);
26510 e1 = XEXP (p, 1);
26511 switch (GET_CODE (e0))
26512 {
26513 case MEM:
26514 /* Pushing a single register. */
26515 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26516 || !REG_P (XEXP (XEXP (e0, 0), 0))
26517 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26518 abort ();
26519
26520 asm_fprintf (asm_out_file, "\t.save ");
26521 if (IS_VFP_REGNUM (REGNO (e1)))
26522 asm_fprintf(asm_out_file, "{d%d}\n",
26523 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26524 else
26525 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26526 break;
26527
26528 case REG:
26529 if (REGNO (e0) == SP_REGNUM)
26530 {
26531 /* A stack increment. */
26532 if (GET_CODE (e1) != PLUS
26533 || !REG_P (XEXP (e1, 0))
26534 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26535 || !CONST_INT_P (XEXP (e1, 1)))
26536 abort ();
26537
26538 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26539 -INTVAL (XEXP (e1, 1)));
26540 }
26541 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26542 {
26543 HOST_WIDE_INT offset;
26544
26545 if (GET_CODE (e1) == PLUS)
26546 {
26547 if (!REG_P (XEXP (e1, 0))
26548 || !CONST_INT_P (XEXP (e1, 1)))
26549 abort ();
26550 reg = REGNO (XEXP (e1, 0));
26551 offset = INTVAL (XEXP (e1, 1));
26552 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26553 HARD_FRAME_POINTER_REGNUM, reg,
26554 offset);
26555 }
26556 else if (REG_P (e1))
26557 {
26558 reg = REGNO (e1);
26559 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26560 HARD_FRAME_POINTER_REGNUM, reg);
26561 }
26562 else
26563 abort ();
26564 }
26565 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26566 {
26567 /* Move from sp to reg. */
26568 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26569 }
26570 else if (GET_CODE (e1) == PLUS
26571 && REG_P (XEXP (e1, 0))
26572 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26573 && CONST_INT_P (XEXP (e1, 1)))
26574 {
26575 /* Set reg to offset from sp. */
26576 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26577 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26578 }
26579 else
26580 abort ();
26581 break;
26582
26583 default:
26584 abort ();
26585 }
26586 }
26587
26588
26589 /* Emit unwind directives for the given insn. */
26590
26591 static void
26592 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26593 {
26594 rtx note, pat;
26595 bool handled_one = false;
26596
26597 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26598 return;
26599
26600 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26601 && (TREE_NOTHROW (current_function_decl)
26602 || crtl->all_throwers_are_sibcalls))
26603 return;
26604
26605 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26606 return;
26607
26608 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26609 {
26610 switch (REG_NOTE_KIND (note))
26611 {
26612 case REG_FRAME_RELATED_EXPR:
26613 pat = XEXP (note, 0);
26614 goto found;
26615
26616 case REG_CFA_REGISTER:
26617 pat = XEXP (note, 0);
26618 if (pat == NULL)
26619 {
26620 pat = PATTERN (insn);
26621 if (GET_CODE (pat) == PARALLEL)
26622 pat = XVECEXP (pat, 0, 0);
26623 }
26624
26625 /* Only emitted for IS_STACKALIGN re-alignment. */
26626 {
26627 rtx dest, src;
26628 unsigned reg;
26629
26630 src = SET_SRC (pat);
26631 dest = SET_DEST (pat);
26632
26633 gcc_assert (src == stack_pointer_rtx);
26634 reg = REGNO (dest);
26635 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26636 reg + 0x90, reg);
26637 }
26638 handled_one = true;
26639 break;
26640
26641 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26642 to get correct dwarf information for shrink-wrap. We should not
26643 emit unwind information for it because these are used either for
26644 pretend arguments or notes to adjust sp and restore registers from
26645 stack. */
26646 case REG_CFA_DEF_CFA:
26647 case REG_CFA_ADJUST_CFA:
26648 case REG_CFA_RESTORE:
26649 return;
26650
26651 case REG_CFA_EXPRESSION:
26652 case REG_CFA_OFFSET:
26653 /* ??? Only handling here what we actually emit. */
26654 gcc_unreachable ();
26655
26656 default:
26657 break;
26658 }
26659 }
26660 if (handled_one)
26661 return;
26662 pat = PATTERN (insn);
26663 found:
26664
26665 switch (GET_CODE (pat))
26666 {
26667 case SET:
26668 arm_unwind_emit_set (asm_out_file, pat);
26669 break;
26670
26671 case SEQUENCE:
26672 /* Store multiple. */
26673 arm_unwind_emit_sequence (asm_out_file, pat);
26674 break;
26675
26676 default:
26677 abort();
26678 }
26679 }
26680
26681
26682 /* Output a reference from a function exception table to the type_info
26683 object X. The EABI specifies that the symbol should be relocated by
26684 an R_ARM_TARGET2 relocation. */
26685
26686 static bool
26687 arm_output_ttype (rtx x)
26688 {
26689 fputs ("\t.word\t", asm_out_file);
26690 output_addr_const (asm_out_file, x);
26691 /* Use special relocations for symbol references. */
26692 if (!CONST_INT_P (x))
26693 fputs ("(TARGET2)", asm_out_file);
26694 fputc ('\n', asm_out_file);
26695
26696 return TRUE;
26697 }
26698
26699 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26700
26701 static void
26702 arm_asm_emit_except_personality (rtx personality)
26703 {
26704 fputs ("\t.personality\t", asm_out_file);
26705 output_addr_const (asm_out_file, personality);
26706 fputc ('\n', asm_out_file);
26707 }
26708
26709 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26710
26711 static void
26712 arm_asm_init_sections (void)
26713 {
26714 exception_section = get_unnamed_section (0, output_section_asm_op,
26715 "\t.handlerdata");
26716 }
26717 #endif /* ARM_UNWIND_INFO */
26718
26719 /* Output unwind directives for the start/end of a function. */
26720
26721 void
26722 arm_output_fn_unwind (FILE * f, bool prologue)
26723 {
26724 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26725 return;
26726
26727 if (prologue)
26728 fputs ("\t.fnstart\n", f);
26729 else
26730 {
26731 /* If this function will never be unwound, then mark it as such.
26732 The came condition is used in arm_unwind_emit to suppress
26733 the frame annotations. */
26734 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26735 && (TREE_NOTHROW (current_function_decl)
26736 || crtl->all_throwers_are_sibcalls))
26737 fputs("\t.cantunwind\n", f);
26738
26739 fputs ("\t.fnend\n", f);
26740 }
26741 }
26742
26743 static bool
26744 arm_emit_tls_decoration (FILE *fp, rtx x)
26745 {
26746 enum tls_reloc reloc;
26747 rtx val;
26748
26749 val = XVECEXP (x, 0, 0);
26750 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26751
26752 output_addr_const (fp, val);
26753
26754 switch (reloc)
26755 {
26756 case TLS_GD32:
26757 fputs ("(tlsgd)", fp);
26758 break;
26759 case TLS_LDM32:
26760 fputs ("(tlsldm)", fp);
26761 break;
26762 case TLS_LDO32:
26763 fputs ("(tlsldo)", fp);
26764 break;
26765 case TLS_IE32:
26766 fputs ("(gottpoff)", fp);
26767 break;
26768 case TLS_LE32:
26769 fputs ("(tpoff)", fp);
26770 break;
26771 case TLS_DESCSEQ:
26772 fputs ("(tlsdesc)", fp);
26773 break;
26774 default:
26775 gcc_unreachable ();
26776 }
26777
26778 switch (reloc)
26779 {
26780 case TLS_GD32:
26781 case TLS_LDM32:
26782 case TLS_IE32:
26783 case TLS_DESCSEQ:
26784 fputs (" + (. - ", fp);
26785 output_addr_const (fp, XVECEXP (x, 0, 2));
26786 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26787 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26788 output_addr_const (fp, XVECEXP (x, 0, 3));
26789 fputc (')', fp);
26790 break;
26791 default:
26792 break;
26793 }
26794
26795 return TRUE;
26796 }
26797
26798 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26799
26800 static void
26801 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26802 {
26803 gcc_assert (size == 4);
26804 fputs ("\t.word\t", file);
26805 output_addr_const (file, x);
26806 fputs ("(tlsldo)", file);
26807 }
26808
26809 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26810
26811 static bool
26812 arm_output_addr_const_extra (FILE *fp, rtx x)
26813 {
26814 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26815 return arm_emit_tls_decoration (fp, x);
26816 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26817 {
26818 char label[256];
26819 int labelno = INTVAL (XVECEXP (x, 0, 0));
26820
26821 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26822 assemble_name_raw (fp, label);
26823
26824 return TRUE;
26825 }
26826 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26827 {
26828 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26829 if (GOT_PCREL)
26830 fputs ("+.", fp);
26831 fputs ("-(", fp);
26832 output_addr_const (fp, XVECEXP (x, 0, 0));
26833 fputc (')', fp);
26834 return TRUE;
26835 }
26836 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26837 {
26838 output_addr_const (fp, XVECEXP (x, 0, 0));
26839 if (GOT_PCREL)
26840 fputs ("+.", fp);
26841 fputs ("-(", fp);
26842 output_addr_const (fp, XVECEXP (x, 0, 1));
26843 fputc (')', fp);
26844 return TRUE;
26845 }
26846 else if (GET_CODE (x) == CONST_VECTOR)
26847 return arm_emit_vector_const (fp, x);
26848
26849 return FALSE;
26850 }
26851
26852 /* Output assembly for a shift instruction.
26853 SET_FLAGS determines how the instruction modifies the condition codes.
26854 0 - Do not set condition codes.
26855 1 - Set condition codes.
26856 2 - Use smallest instruction. */
26857 const char *
26858 arm_output_shift(rtx * operands, int set_flags)
26859 {
26860 char pattern[100];
26861 static const char flag_chars[3] = {'?', '.', '!'};
26862 const char *shift;
26863 HOST_WIDE_INT val;
26864 char c;
26865
26866 c = flag_chars[set_flags];
26867 if (TARGET_UNIFIED_ASM)
26868 {
26869 shift = shift_op(operands[3], &val);
26870 if (shift)
26871 {
26872 if (val != -1)
26873 operands[2] = GEN_INT(val);
26874 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26875 }
26876 else
26877 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26878 }
26879 else
26880 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26881 output_asm_insn (pattern, operands);
26882 return "";
26883 }
26884
26885 /* Output assembly for a WMMX immediate shift instruction. */
26886 const char *
26887 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26888 {
26889 int shift = INTVAL (operands[2]);
26890 char templ[50];
26891 machine_mode opmode = GET_MODE (operands[0]);
26892
26893 gcc_assert (shift >= 0);
26894
26895 /* If the shift value in the register versions is > 63 (for D qualifier),
26896 31 (for W qualifier) or 15 (for H qualifier). */
26897 if (((opmode == V4HImode) && (shift > 15))
26898 || ((opmode == V2SImode) && (shift > 31))
26899 || ((opmode == DImode) && (shift > 63)))
26900 {
26901 if (wror_or_wsra)
26902 {
26903 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26904 output_asm_insn (templ, operands);
26905 if (opmode == DImode)
26906 {
26907 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26908 output_asm_insn (templ, operands);
26909 }
26910 }
26911 else
26912 {
26913 /* The destination register will contain all zeros. */
26914 sprintf (templ, "wzero\t%%0");
26915 output_asm_insn (templ, operands);
26916 }
26917 return "";
26918 }
26919
26920 if ((opmode == DImode) && (shift > 32))
26921 {
26922 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26923 output_asm_insn (templ, operands);
26924 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26925 output_asm_insn (templ, operands);
26926 }
26927 else
26928 {
26929 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26930 output_asm_insn (templ, operands);
26931 }
26932 return "";
26933 }
26934
26935 /* Output assembly for a WMMX tinsr instruction. */
26936 const char *
26937 arm_output_iwmmxt_tinsr (rtx *operands)
26938 {
26939 int mask = INTVAL (operands[3]);
26940 int i;
26941 char templ[50];
26942 int units = mode_nunits[GET_MODE (operands[0])];
26943 gcc_assert ((mask & (mask - 1)) == 0);
26944 for (i = 0; i < units; ++i)
26945 {
26946 if ((mask & 0x01) == 1)
26947 {
26948 break;
26949 }
26950 mask >>= 1;
26951 }
26952 gcc_assert (i < units);
26953 {
26954 switch (GET_MODE (operands[0]))
26955 {
26956 case V8QImode:
26957 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
26958 break;
26959 case V4HImode:
26960 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
26961 break;
26962 case V2SImode:
26963 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
26964 break;
26965 default:
26966 gcc_unreachable ();
26967 break;
26968 }
26969 output_asm_insn (templ, operands);
26970 }
26971 return "";
26972 }
26973
26974 /* Output a Thumb-1 casesi dispatch sequence. */
26975 const char *
26976 thumb1_output_casesi (rtx *operands)
26977 {
26978 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
26979
26980 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26981
26982 switch (GET_MODE(diff_vec))
26983 {
26984 case QImode:
26985 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26986 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26987 case HImode:
26988 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26989 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26990 case SImode:
26991 return "bl\t%___gnu_thumb1_case_si";
26992 default:
26993 gcc_unreachable ();
26994 }
26995 }
26996
26997 /* Output a Thumb-2 casesi instruction. */
26998 const char *
26999 thumb2_output_casesi (rtx *operands)
27000 {
27001 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27002
27003 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27004
27005 output_asm_insn ("cmp\t%0, %1", operands);
27006 output_asm_insn ("bhi\t%l3", operands);
27007 switch (GET_MODE(diff_vec))
27008 {
27009 case QImode:
27010 return "tbb\t[%|pc, %0]";
27011 case HImode:
27012 return "tbh\t[%|pc, %0, lsl #1]";
27013 case SImode:
27014 if (flag_pic)
27015 {
27016 output_asm_insn ("adr\t%4, %l2", operands);
27017 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27018 output_asm_insn ("add\t%4, %4, %5", operands);
27019 return "bx\t%4";
27020 }
27021 else
27022 {
27023 output_asm_insn ("adr\t%4, %l2", operands);
27024 return "ldr\t%|pc, [%4, %0, lsl #2]";
27025 }
27026 default:
27027 gcc_unreachable ();
27028 }
27029 }
27030
27031 /* Most ARM cores are single issue, but some newer ones can dual issue.
27032 The scheduler descriptions rely on this being correct. */
27033 static int
27034 arm_issue_rate (void)
27035 {
27036 switch (arm_tune)
27037 {
27038 case cortexa15:
27039 case cortexa57:
27040 return 3;
27041
27042 case cortexm7:
27043 case cortexr4:
27044 case cortexr4f:
27045 case cortexr5:
27046 case genericv7a:
27047 case cortexa5:
27048 case cortexa7:
27049 case cortexa8:
27050 case cortexa9:
27051 case cortexa12:
27052 case cortexa17:
27053 case cortexa53:
27054 case fa726te:
27055 case marvell_pj4:
27056 return 2;
27057
27058 default:
27059 return 1;
27060 }
27061 }
27062
27063 const char *
27064 arm_mangle_type (const_tree type)
27065 {
27066 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27067 has to be managled as if it is in the "std" namespace. */
27068 if (TARGET_AAPCS_BASED
27069 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27070 return "St9__va_list";
27071
27072 /* Half-precision float. */
27073 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27074 return "Dh";
27075
27076 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27077 builtin type. */
27078 if (TYPE_NAME (type) != NULL)
27079 return arm_mangle_builtin_type (type);
27080
27081 /* Use the default mangling. */
27082 return NULL;
27083 }
27084
27085 /* Order of allocation of core registers for Thumb: this allocation is
27086 written over the corresponding initial entries of the array
27087 initialized with REG_ALLOC_ORDER. We allocate all low registers
27088 first. Saving and restoring a low register is usually cheaper than
27089 using a call-clobbered high register. */
27090
27091 static const int thumb_core_reg_alloc_order[] =
27092 {
27093 3, 2, 1, 0, 4, 5, 6, 7,
27094 14, 12, 8, 9, 10, 11
27095 };
27096
27097 /* Adjust register allocation order when compiling for Thumb. */
27098
27099 void
27100 arm_order_regs_for_local_alloc (void)
27101 {
27102 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27103 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27104 if (TARGET_THUMB)
27105 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27106 sizeof (thumb_core_reg_alloc_order));
27107 }
27108
27109 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27110
27111 bool
27112 arm_frame_pointer_required (void)
27113 {
27114 return (cfun->has_nonlocal_label
27115 || SUBTARGET_FRAME_POINTER_REQUIRED
27116 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27117 }
27118
27119 /* Only thumb1 can't support conditional execution, so return true if
27120 the target is not thumb1. */
27121 static bool
27122 arm_have_conditional_execution (void)
27123 {
27124 return !TARGET_THUMB1;
27125 }
27126
27127 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27128 static HOST_WIDE_INT
27129 arm_vector_alignment (const_tree type)
27130 {
27131 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27132
27133 if (TARGET_AAPCS_BASED)
27134 align = MIN (align, 64);
27135
27136 return align;
27137 }
27138
27139 static unsigned int
27140 arm_autovectorize_vector_sizes (void)
27141 {
27142 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27143 }
27144
27145 static bool
27146 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27147 {
27148 /* Vectors which aren't in packed structures will not be less aligned than
27149 the natural alignment of their element type, so this is safe. */
27150 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27151 return !is_packed;
27152
27153 return default_builtin_vector_alignment_reachable (type, is_packed);
27154 }
27155
27156 static bool
27157 arm_builtin_support_vector_misalignment (machine_mode mode,
27158 const_tree type, int misalignment,
27159 bool is_packed)
27160 {
27161 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27162 {
27163 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27164
27165 if (is_packed)
27166 return align == 1;
27167
27168 /* If the misalignment is unknown, we should be able to handle the access
27169 so long as it is not to a member of a packed data structure. */
27170 if (misalignment == -1)
27171 return true;
27172
27173 /* Return true if the misalignment is a multiple of the natural alignment
27174 of the vector's element type. This is probably always going to be
27175 true in practice, since we've already established that this isn't a
27176 packed access. */
27177 return ((misalignment % align) == 0);
27178 }
27179
27180 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27181 is_packed);
27182 }
27183
27184 static void
27185 arm_conditional_register_usage (void)
27186 {
27187 int regno;
27188
27189 if (TARGET_THUMB1 && optimize_size)
27190 {
27191 /* When optimizing for size on Thumb-1, it's better not
27192 to use the HI regs, because of the overhead of
27193 stacking them. */
27194 for (regno = FIRST_HI_REGNUM;
27195 regno <= LAST_HI_REGNUM; ++regno)
27196 fixed_regs[regno] = call_used_regs[regno] = 1;
27197 }
27198
27199 /* The link register can be clobbered by any branch insn,
27200 but we have no way to track that at present, so mark
27201 it as unavailable. */
27202 if (TARGET_THUMB1)
27203 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27204
27205 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27206 {
27207 /* VFPv3 registers are disabled when earlier VFP
27208 versions are selected due to the definition of
27209 LAST_VFP_REGNUM. */
27210 for (regno = FIRST_VFP_REGNUM;
27211 regno <= LAST_VFP_REGNUM; ++ regno)
27212 {
27213 fixed_regs[regno] = 0;
27214 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27215 || regno >= FIRST_VFP_REGNUM + 32;
27216 }
27217 }
27218
27219 if (TARGET_REALLY_IWMMXT)
27220 {
27221 regno = FIRST_IWMMXT_GR_REGNUM;
27222 /* The 2002/10/09 revision of the XScale ABI has wCG0
27223 and wCG1 as call-preserved registers. The 2002/11/21
27224 revision changed this so that all wCG registers are
27225 scratch registers. */
27226 for (regno = FIRST_IWMMXT_GR_REGNUM;
27227 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27228 fixed_regs[regno] = 0;
27229 /* The XScale ABI has wR0 - wR9 as scratch registers,
27230 the rest as call-preserved registers. */
27231 for (regno = FIRST_IWMMXT_REGNUM;
27232 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27233 {
27234 fixed_regs[regno] = 0;
27235 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27236 }
27237 }
27238
27239 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27240 {
27241 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27242 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27243 }
27244 else if (TARGET_APCS_STACK)
27245 {
27246 fixed_regs[10] = 1;
27247 call_used_regs[10] = 1;
27248 }
27249 /* -mcaller-super-interworking reserves r11 for calls to
27250 _interwork_r11_call_via_rN(). Making the register global
27251 is an easy way of ensuring that it remains valid for all
27252 calls. */
27253 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27254 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27255 {
27256 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27257 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27258 if (TARGET_CALLER_INTERWORKING)
27259 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27260 }
27261 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27262 }
27263
27264 static reg_class_t
27265 arm_preferred_rename_class (reg_class_t rclass)
27266 {
27267 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27268 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27269 and code size can be reduced. */
27270 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27271 return LO_REGS;
27272 else
27273 return NO_REGS;
27274 }
27275
27276 /* Compute the atrribute "length" of insn "*push_multi".
27277 So this function MUST be kept in sync with that insn pattern. */
27278 int
27279 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27280 {
27281 int i, regno, hi_reg;
27282 int num_saves = XVECLEN (parallel_op, 0);
27283
27284 /* ARM mode. */
27285 if (TARGET_ARM)
27286 return 4;
27287 /* Thumb1 mode. */
27288 if (TARGET_THUMB1)
27289 return 2;
27290
27291 /* Thumb2 mode. */
27292 regno = REGNO (first_op);
27293 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27294 for (i = 1; i < num_saves && !hi_reg; i++)
27295 {
27296 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27297 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27298 }
27299
27300 if (!hi_reg)
27301 return 2;
27302 return 4;
27303 }
27304
27305 /* Compute the number of instructions emitted by output_move_double. */
27306 int
27307 arm_count_output_move_double_insns (rtx *operands)
27308 {
27309 int count;
27310 rtx ops[2];
27311 /* output_move_double may modify the operands array, so call it
27312 here on a copy of the array. */
27313 ops[0] = operands[0];
27314 ops[1] = operands[1];
27315 output_move_double (ops, false, &count);
27316 return count;
27317 }
27318
27319 int
27320 vfp3_const_double_for_fract_bits (rtx operand)
27321 {
27322 REAL_VALUE_TYPE r0;
27323
27324 if (!CONST_DOUBLE_P (operand))
27325 return 0;
27326
27327 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27328 if (exact_real_inverse (DFmode, &r0))
27329 {
27330 if (exact_real_truncate (DFmode, &r0))
27331 {
27332 HOST_WIDE_INT value = real_to_integer (&r0);
27333 value = value & 0xffffffff;
27334 if ((value != 0) && ( (value & (value - 1)) == 0))
27335 return int_log2 (value);
27336 }
27337 }
27338 return 0;
27339 }
27340
27341 int
27342 vfp3_const_double_for_bits (rtx operand)
27343 {
27344 REAL_VALUE_TYPE r0;
27345
27346 if (!CONST_DOUBLE_P (operand))
27347 return 0;
27348
27349 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27350 if (exact_real_truncate (DFmode, &r0))
27351 {
27352 HOST_WIDE_INT value = real_to_integer (&r0);
27353 value = value & 0xffffffff;
27354 if ((value != 0) && ( (value & (value - 1)) == 0))
27355 return int_log2 (value);
27356 }
27357
27358 return 0;
27359 }
27360 \f
27361 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27362
27363 static void
27364 arm_pre_atomic_barrier (enum memmodel model)
27365 {
27366 if (need_atomic_barrier_p (model, true))
27367 emit_insn (gen_memory_barrier ());
27368 }
27369
27370 static void
27371 arm_post_atomic_barrier (enum memmodel model)
27372 {
27373 if (need_atomic_barrier_p (model, false))
27374 emit_insn (gen_memory_barrier ());
27375 }
27376
27377 /* Emit the load-exclusive and store-exclusive instructions.
27378 Use acquire and release versions if necessary. */
27379
27380 static void
27381 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27382 {
27383 rtx (*gen) (rtx, rtx);
27384
27385 if (acq)
27386 {
27387 switch (mode)
27388 {
27389 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27390 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27391 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27392 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27393 default:
27394 gcc_unreachable ();
27395 }
27396 }
27397 else
27398 {
27399 switch (mode)
27400 {
27401 case QImode: gen = gen_arm_load_exclusiveqi; break;
27402 case HImode: gen = gen_arm_load_exclusivehi; break;
27403 case SImode: gen = gen_arm_load_exclusivesi; break;
27404 case DImode: gen = gen_arm_load_exclusivedi; break;
27405 default:
27406 gcc_unreachable ();
27407 }
27408 }
27409
27410 emit_insn (gen (rval, mem));
27411 }
27412
27413 static void
27414 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27415 rtx mem, bool rel)
27416 {
27417 rtx (*gen) (rtx, rtx, rtx);
27418
27419 if (rel)
27420 {
27421 switch (mode)
27422 {
27423 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27424 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27425 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27426 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27427 default:
27428 gcc_unreachable ();
27429 }
27430 }
27431 else
27432 {
27433 switch (mode)
27434 {
27435 case QImode: gen = gen_arm_store_exclusiveqi; break;
27436 case HImode: gen = gen_arm_store_exclusivehi; break;
27437 case SImode: gen = gen_arm_store_exclusivesi; break;
27438 case DImode: gen = gen_arm_store_exclusivedi; break;
27439 default:
27440 gcc_unreachable ();
27441 }
27442 }
27443
27444 emit_insn (gen (bval, rval, mem));
27445 }
27446
27447 /* Mark the previous jump instruction as unlikely. */
27448
27449 static void
27450 emit_unlikely_jump (rtx insn)
27451 {
27452 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27453
27454 insn = emit_jump_insn (insn);
27455 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27456 }
27457
27458 /* Expand a compare and swap pattern. */
27459
27460 void
27461 arm_expand_compare_and_swap (rtx operands[])
27462 {
27463 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27464 machine_mode mode;
27465 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27466
27467 bval = operands[0];
27468 rval = operands[1];
27469 mem = operands[2];
27470 oldval = operands[3];
27471 newval = operands[4];
27472 is_weak = operands[5];
27473 mod_s = operands[6];
27474 mod_f = operands[7];
27475 mode = GET_MODE (mem);
27476
27477 /* Normally the succ memory model must be stronger than fail, but in the
27478 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27479 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27480
27481 if (TARGET_HAVE_LDACQ
27482 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
27483 && INTVAL (mod_s) == MEMMODEL_RELEASE)
27484 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27485
27486 switch (mode)
27487 {
27488 case QImode:
27489 case HImode:
27490 /* For narrow modes, we're going to perform the comparison in SImode,
27491 so do the zero-extension now. */
27492 rval = gen_reg_rtx (SImode);
27493 oldval = convert_modes (SImode, mode, oldval, true);
27494 /* FALLTHRU */
27495
27496 case SImode:
27497 /* Force the value into a register if needed. We waited until after
27498 the zero-extension above to do this properly. */
27499 if (!arm_add_operand (oldval, SImode))
27500 oldval = force_reg (SImode, oldval);
27501 break;
27502
27503 case DImode:
27504 if (!cmpdi_operand (oldval, mode))
27505 oldval = force_reg (mode, oldval);
27506 break;
27507
27508 default:
27509 gcc_unreachable ();
27510 }
27511
27512 switch (mode)
27513 {
27514 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27515 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27516 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27517 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27518 default:
27519 gcc_unreachable ();
27520 }
27521
27522 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27523
27524 if (mode == QImode || mode == HImode)
27525 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27526
27527 /* In all cases, we arrange for success to be signaled by Z set.
27528 This arrangement allows for the boolean result to be used directly
27529 in a subsequent branch, post optimization. */
27530 x = gen_rtx_REG (CCmode, CC_REGNUM);
27531 x = gen_rtx_EQ (SImode, x, const0_rtx);
27532 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
27533 }
27534
27535 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27536 another memory store between the load-exclusive and store-exclusive can
27537 reset the monitor from Exclusive to Open state. This means we must wait
27538 until after reload to split the pattern, lest we get a register spill in
27539 the middle of the atomic sequence. */
27540
27541 void
27542 arm_split_compare_and_swap (rtx operands[])
27543 {
27544 rtx rval, mem, oldval, newval, scratch;
27545 machine_mode mode;
27546 enum memmodel mod_s, mod_f;
27547 bool is_weak;
27548 rtx_code_label *label1, *label2;
27549 rtx x, cond;
27550
27551 rval = operands[0];
27552 mem = operands[1];
27553 oldval = operands[2];
27554 newval = operands[3];
27555 is_weak = (operands[4] != const0_rtx);
27556 mod_s = (enum memmodel) INTVAL (operands[5]);
27557 mod_f = (enum memmodel) INTVAL (operands[6]);
27558 scratch = operands[7];
27559 mode = GET_MODE (mem);
27560
27561 bool use_acquire = TARGET_HAVE_LDACQ
27562 && !(mod_s == MEMMODEL_RELAXED
27563 || mod_s == MEMMODEL_CONSUME
27564 || mod_s == MEMMODEL_RELEASE);
27565
27566 bool use_release = TARGET_HAVE_LDACQ
27567 && !(mod_s == MEMMODEL_RELAXED
27568 || mod_s == MEMMODEL_CONSUME
27569 || mod_s == MEMMODEL_ACQUIRE);
27570
27571 /* Checks whether a barrier is needed and emits one accordingly. */
27572 if (!(use_acquire || use_release))
27573 arm_pre_atomic_barrier (mod_s);
27574
27575 label1 = NULL;
27576 if (!is_weak)
27577 {
27578 label1 = gen_label_rtx ();
27579 emit_label (label1);
27580 }
27581 label2 = gen_label_rtx ();
27582
27583 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27584
27585 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27586 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27587 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27588 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27589 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27590
27591 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27592
27593 /* Weak or strong, we want EQ to be true for success, so that we
27594 match the flags that we got from the compare above. */
27595 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27596 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27597 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
27598
27599 if (!is_weak)
27600 {
27601 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27602 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27603 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27604 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27605 }
27606
27607 if (mod_f != MEMMODEL_RELAXED)
27608 emit_label (label2);
27609
27610 /* Checks whether a barrier is needed and emits one accordingly. */
27611 if (!(use_acquire || use_release))
27612 arm_post_atomic_barrier (mod_s);
27613
27614 if (mod_f == MEMMODEL_RELAXED)
27615 emit_label (label2);
27616 }
27617
27618 void
27619 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27620 rtx value, rtx model_rtx, rtx cond)
27621 {
27622 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
27623 machine_mode mode = GET_MODE (mem);
27624 machine_mode wmode = (mode == DImode ? DImode : SImode);
27625 rtx_code_label *label;
27626 rtx x;
27627
27628 bool use_acquire = TARGET_HAVE_LDACQ
27629 && !(model == MEMMODEL_RELAXED
27630 || model == MEMMODEL_CONSUME
27631 || model == MEMMODEL_RELEASE);
27632
27633 bool use_release = TARGET_HAVE_LDACQ
27634 && !(model == MEMMODEL_RELAXED
27635 || model == MEMMODEL_CONSUME
27636 || model == MEMMODEL_ACQUIRE);
27637
27638 /* Checks whether a barrier is needed and emits one accordingly. */
27639 if (!(use_acquire || use_release))
27640 arm_pre_atomic_barrier (model);
27641
27642 label = gen_label_rtx ();
27643 emit_label (label);
27644
27645 if (new_out)
27646 new_out = gen_lowpart (wmode, new_out);
27647 if (old_out)
27648 old_out = gen_lowpart (wmode, old_out);
27649 else
27650 old_out = new_out;
27651 value = simplify_gen_subreg (wmode, value, mode, 0);
27652
27653 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27654
27655 switch (code)
27656 {
27657 case SET:
27658 new_out = value;
27659 break;
27660
27661 case NOT:
27662 x = gen_rtx_AND (wmode, old_out, value);
27663 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27664 x = gen_rtx_NOT (wmode, new_out);
27665 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27666 break;
27667
27668 case MINUS:
27669 if (CONST_INT_P (value))
27670 {
27671 value = GEN_INT (-INTVAL (value));
27672 code = PLUS;
27673 }
27674 /* FALLTHRU */
27675
27676 case PLUS:
27677 if (mode == DImode)
27678 {
27679 /* DImode plus/minus need to clobber flags. */
27680 /* The adddi3 and subdi3 patterns are incorrectly written so that
27681 they require matching operands, even when we could easily support
27682 three operands. Thankfully, this can be fixed up post-splitting,
27683 as the individual add+adc patterns do accept three operands and
27684 post-reload cprop can make these moves go away. */
27685 emit_move_insn (new_out, old_out);
27686 if (code == PLUS)
27687 x = gen_adddi3 (new_out, new_out, value);
27688 else
27689 x = gen_subdi3 (new_out, new_out, value);
27690 emit_insn (x);
27691 break;
27692 }
27693 /* FALLTHRU */
27694
27695 default:
27696 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27697 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27698 break;
27699 }
27700
27701 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27702 use_release);
27703
27704 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27705 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27706
27707 /* Checks whether a barrier is needed and emits one accordingly. */
27708 if (!(use_acquire || use_release))
27709 arm_post_atomic_barrier (model);
27710 }
27711 \f
27712 #define MAX_VECT_LEN 16
27713
27714 struct expand_vec_perm_d
27715 {
27716 rtx target, op0, op1;
27717 unsigned char perm[MAX_VECT_LEN];
27718 machine_mode vmode;
27719 unsigned char nelt;
27720 bool one_vector_p;
27721 bool testing_p;
27722 };
27723
27724 /* Generate a variable permutation. */
27725
27726 static void
27727 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27728 {
27729 machine_mode vmode = GET_MODE (target);
27730 bool one_vector_p = rtx_equal_p (op0, op1);
27731
27732 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27733 gcc_checking_assert (GET_MODE (op0) == vmode);
27734 gcc_checking_assert (GET_MODE (op1) == vmode);
27735 gcc_checking_assert (GET_MODE (sel) == vmode);
27736 gcc_checking_assert (TARGET_NEON);
27737
27738 if (one_vector_p)
27739 {
27740 if (vmode == V8QImode)
27741 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27742 else
27743 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27744 }
27745 else
27746 {
27747 rtx pair;
27748
27749 if (vmode == V8QImode)
27750 {
27751 pair = gen_reg_rtx (V16QImode);
27752 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27753 pair = gen_lowpart (TImode, pair);
27754 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27755 }
27756 else
27757 {
27758 pair = gen_reg_rtx (OImode);
27759 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27760 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27761 }
27762 }
27763 }
27764
27765 void
27766 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27767 {
27768 machine_mode vmode = GET_MODE (target);
27769 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27770 bool one_vector_p = rtx_equal_p (op0, op1);
27771 rtx rmask[MAX_VECT_LEN], mask;
27772
27773 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27774 numbering of elements for big-endian, we must reverse the order. */
27775 gcc_checking_assert (!BYTES_BIG_ENDIAN);
27776
27777 /* The VTBL instruction does not use a modulo index, so we must take care
27778 of that ourselves. */
27779 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27780 for (i = 0; i < nelt; ++i)
27781 rmask[i] = mask;
27782 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27783 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27784
27785 arm_expand_vec_perm_1 (target, op0, op1, sel);
27786 }
27787
27788 /* Generate or test for an insn that supports a constant permutation. */
27789
27790 /* Recognize patterns for the VUZP insns. */
27791
27792 static bool
27793 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27794 {
27795 unsigned int i, odd, mask, nelt = d->nelt;
27796 rtx out0, out1, in0, in1, x;
27797 rtx (*gen)(rtx, rtx, rtx, rtx);
27798
27799 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27800 return false;
27801
27802 /* Note that these are little-endian tests. Adjust for big-endian later. */
27803 if (d->perm[0] == 0)
27804 odd = 0;
27805 else if (d->perm[0] == 1)
27806 odd = 1;
27807 else
27808 return false;
27809 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27810
27811 for (i = 0; i < nelt; i++)
27812 {
27813 unsigned elt = (i * 2 + odd) & mask;
27814 if (d->perm[i] != elt)
27815 return false;
27816 }
27817
27818 /* Success! */
27819 if (d->testing_p)
27820 return true;
27821
27822 switch (d->vmode)
27823 {
27824 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27825 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
27826 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
27827 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
27828 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
27829 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
27830 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
27831 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
27832 default:
27833 gcc_unreachable ();
27834 }
27835
27836 in0 = d->op0;
27837 in1 = d->op1;
27838 if (BYTES_BIG_ENDIAN)
27839 {
27840 x = in0, in0 = in1, in1 = x;
27841 odd = !odd;
27842 }
27843
27844 out0 = d->target;
27845 out1 = gen_reg_rtx (d->vmode);
27846 if (odd)
27847 x = out0, out0 = out1, out1 = x;
27848
27849 emit_insn (gen (out0, in0, in1, out1));
27850 return true;
27851 }
27852
27853 /* Recognize patterns for the VZIP insns. */
27854
27855 static bool
27856 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27857 {
27858 unsigned int i, high, mask, nelt = d->nelt;
27859 rtx out0, out1, in0, in1, x;
27860 rtx (*gen)(rtx, rtx, rtx, rtx);
27861
27862 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27863 return false;
27864
27865 /* Note that these are little-endian tests. Adjust for big-endian later. */
27866 high = nelt / 2;
27867 if (d->perm[0] == high)
27868 ;
27869 else if (d->perm[0] == 0)
27870 high = 0;
27871 else
27872 return false;
27873 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27874
27875 for (i = 0; i < nelt / 2; i++)
27876 {
27877 unsigned elt = (i + high) & mask;
27878 if (d->perm[i * 2] != elt)
27879 return false;
27880 elt = (elt + nelt) & mask;
27881 if (d->perm[i * 2 + 1] != elt)
27882 return false;
27883 }
27884
27885 /* Success! */
27886 if (d->testing_p)
27887 return true;
27888
27889 switch (d->vmode)
27890 {
27891 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27892 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
27893 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
27894 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
27895 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
27896 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
27897 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
27898 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
27899 default:
27900 gcc_unreachable ();
27901 }
27902
27903 in0 = d->op0;
27904 in1 = d->op1;
27905 if (BYTES_BIG_ENDIAN)
27906 {
27907 x = in0, in0 = in1, in1 = x;
27908 high = !high;
27909 }
27910
27911 out0 = d->target;
27912 out1 = gen_reg_rtx (d->vmode);
27913 if (high)
27914 x = out0, out0 = out1, out1 = x;
27915
27916 emit_insn (gen (out0, in0, in1, out1));
27917 return true;
27918 }
27919
27920 /* Recognize patterns for the VREV insns. */
27921
27922 static bool
27923 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27924 {
27925 unsigned int i, j, diff, nelt = d->nelt;
27926 rtx (*gen)(rtx, rtx);
27927
27928 if (!d->one_vector_p)
27929 return false;
27930
27931 diff = d->perm[0];
27932 switch (diff)
27933 {
27934 case 7:
27935 switch (d->vmode)
27936 {
27937 case V16QImode: gen = gen_neon_vrev64v16qi; break;
27938 case V8QImode: gen = gen_neon_vrev64v8qi; break;
27939 default:
27940 return false;
27941 }
27942 break;
27943 case 3:
27944 switch (d->vmode)
27945 {
27946 case V16QImode: gen = gen_neon_vrev32v16qi; break;
27947 case V8QImode: gen = gen_neon_vrev32v8qi; break;
27948 case V8HImode: gen = gen_neon_vrev64v8hi; break;
27949 case V4HImode: gen = gen_neon_vrev64v4hi; break;
27950 default:
27951 return false;
27952 }
27953 break;
27954 case 1:
27955 switch (d->vmode)
27956 {
27957 case V16QImode: gen = gen_neon_vrev16v16qi; break;
27958 case V8QImode: gen = gen_neon_vrev16v8qi; break;
27959 case V8HImode: gen = gen_neon_vrev32v8hi; break;
27960 case V4HImode: gen = gen_neon_vrev32v4hi; break;
27961 case V4SImode: gen = gen_neon_vrev64v4si; break;
27962 case V2SImode: gen = gen_neon_vrev64v2si; break;
27963 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
27964 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
27965 default:
27966 return false;
27967 }
27968 break;
27969 default:
27970 return false;
27971 }
27972
27973 for (i = 0; i < nelt ; i += diff + 1)
27974 for (j = 0; j <= diff; j += 1)
27975 {
27976 /* This is guaranteed to be true as the value of diff
27977 is 7, 3, 1 and we should have enough elements in the
27978 queue to generate this. Getting a vector mask with a
27979 value of diff other than these values implies that
27980 something is wrong by the time we get here. */
27981 gcc_assert (i + j < nelt);
27982 if (d->perm[i + j] != i + diff - j)
27983 return false;
27984 }
27985
27986 /* Success! */
27987 if (d->testing_p)
27988 return true;
27989
27990 emit_insn (gen (d->target, d->op0));
27991 return true;
27992 }
27993
27994 /* Recognize patterns for the VTRN insns. */
27995
27996 static bool
27997 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
27998 {
27999 unsigned int i, odd, mask, nelt = d->nelt;
28000 rtx out0, out1, in0, in1, x;
28001 rtx (*gen)(rtx, rtx, rtx, rtx);
28002
28003 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28004 return false;
28005
28006 /* Note that these are little-endian tests. Adjust for big-endian later. */
28007 if (d->perm[0] == 0)
28008 odd = 0;
28009 else if (d->perm[0] == 1)
28010 odd = 1;
28011 else
28012 return false;
28013 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28014
28015 for (i = 0; i < nelt; i += 2)
28016 {
28017 if (d->perm[i] != i + odd)
28018 return false;
28019 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28020 return false;
28021 }
28022
28023 /* Success! */
28024 if (d->testing_p)
28025 return true;
28026
28027 switch (d->vmode)
28028 {
28029 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28030 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28031 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28032 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28033 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28034 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28035 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28036 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28037 default:
28038 gcc_unreachable ();
28039 }
28040
28041 in0 = d->op0;
28042 in1 = d->op1;
28043 if (BYTES_BIG_ENDIAN)
28044 {
28045 x = in0, in0 = in1, in1 = x;
28046 odd = !odd;
28047 }
28048
28049 out0 = d->target;
28050 out1 = gen_reg_rtx (d->vmode);
28051 if (odd)
28052 x = out0, out0 = out1, out1 = x;
28053
28054 emit_insn (gen (out0, in0, in1, out1));
28055 return true;
28056 }
28057
28058 /* Recognize patterns for the VEXT insns. */
28059
28060 static bool
28061 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28062 {
28063 unsigned int i, nelt = d->nelt;
28064 rtx (*gen) (rtx, rtx, rtx, rtx);
28065 rtx offset;
28066
28067 unsigned int location;
28068
28069 unsigned int next = d->perm[0] + 1;
28070
28071 /* TODO: Handle GCC's numbering of elements for big-endian. */
28072 if (BYTES_BIG_ENDIAN)
28073 return false;
28074
28075 /* Check if the extracted indexes are increasing by one. */
28076 for (i = 1; i < nelt; next++, i++)
28077 {
28078 /* If we hit the most significant element of the 2nd vector in
28079 the previous iteration, no need to test further. */
28080 if (next == 2 * nelt)
28081 return false;
28082
28083 /* If we are operating on only one vector: it could be a
28084 rotation. If there are only two elements of size < 64, let
28085 arm_evpc_neon_vrev catch it. */
28086 if (d->one_vector_p && (next == nelt))
28087 {
28088 if ((nelt == 2) && (d->vmode != V2DImode))
28089 return false;
28090 else
28091 next = 0;
28092 }
28093
28094 if (d->perm[i] != next)
28095 return false;
28096 }
28097
28098 location = d->perm[0];
28099
28100 switch (d->vmode)
28101 {
28102 case V16QImode: gen = gen_neon_vextv16qi; break;
28103 case V8QImode: gen = gen_neon_vextv8qi; break;
28104 case V4HImode: gen = gen_neon_vextv4hi; break;
28105 case V8HImode: gen = gen_neon_vextv8hi; break;
28106 case V2SImode: gen = gen_neon_vextv2si; break;
28107 case V4SImode: gen = gen_neon_vextv4si; break;
28108 case V2SFmode: gen = gen_neon_vextv2sf; break;
28109 case V4SFmode: gen = gen_neon_vextv4sf; break;
28110 case V2DImode: gen = gen_neon_vextv2di; break;
28111 default:
28112 return false;
28113 }
28114
28115 /* Success! */
28116 if (d->testing_p)
28117 return true;
28118
28119 offset = GEN_INT (location);
28120 emit_insn (gen (d->target, d->op0, d->op1, offset));
28121 return true;
28122 }
28123
28124 /* The NEON VTBL instruction is a fully variable permuation that's even
28125 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28126 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28127 can do slightly better by expanding this as a constant where we don't
28128 have to apply a mask. */
28129
28130 static bool
28131 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28132 {
28133 rtx rperm[MAX_VECT_LEN], sel;
28134 machine_mode vmode = d->vmode;
28135 unsigned int i, nelt = d->nelt;
28136
28137 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28138 numbering of elements for big-endian, we must reverse the order. */
28139 if (BYTES_BIG_ENDIAN)
28140 return false;
28141
28142 if (d->testing_p)
28143 return true;
28144
28145 /* Generic code will try constant permutation twice. Once with the
28146 original mode and again with the elements lowered to QImode.
28147 So wait and don't do the selector expansion ourselves. */
28148 if (vmode != V8QImode && vmode != V16QImode)
28149 return false;
28150
28151 for (i = 0; i < nelt; ++i)
28152 rperm[i] = GEN_INT (d->perm[i]);
28153 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28154 sel = force_reg (vmode, sel);
28155
28156 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28157 return true;
28158 }
28159
28160 static bool
28161 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28162 {
28163 /* Check if the input mask matches vext before reordering the
28164 operands. */
28165 if (TARGET_NEON)
28166 if (arm_evpc_neon_vext (d))
28167 return true;
28168
28169 /* The pattern matching functions above are written to look for a small
28170 number to begin the sequence (0, 1, N/2). If we begin with an index
28171 from the second operand, we can swap the operands. */
28172 if (d->perm[0] >= d->nelt)
28173 {
28174 unsigned i, nelt = d->nelt;
28175 rtx x;
28176
28177 for (i = 0; i < nelt; ++i)
28178 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28179
28180 x = d->op0;
28181 d->op0 = d->op1;
28182 d->op1 = x;
28183 }
28184
28185 if (TARGET_NEON)
28186 {
28187 if (arm_evpc_neon_vuzp (d))
28188 return true;
28189 if (arm_evpc_neon_vzip (d))
28190 return true;
28191 if (arm_evpc_neon_vrev (d))
28192 return true;
28193 if (arm_evpc_neon_vtrn (d))
28194 return true;
28195 return arm_evpc_neon_vtbl (d);
28196 }
28197 return false;
28198 }
28199
28200 /* Expand a vec_perm_const pattern. */
28201
28202 bool
28203 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28204 {
28205 struct expand_vec_perm_d d;
28206 int i, nelt, which;
28207
28208 d.target = target;
28209 d.op0 = op0;
28210 d.op1 = op1;
28211
28212 d.vmode = GET_MODE (target);
28213 gcc_assert (VECTOR_MODE_P (d.vmode));
28214 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28215 d.testing_p = false;
28216
28217 for (i = which = 0; i < nelt; ++i)
28218 {
28219 rtx e = XVECEXP (sel, 0, i);
28220 int ei = INTVAL (e) & (2 * nelt - 1);
28221 which |= (ei < nelt ? 1 : 2);
28222 d.perm[i] = ei;
28223 }
28224
28225 switch (which)
28226 {
28227 default:
28228 gcc_unreachable();
28229
28230 case 3:
28231 d.one_vector_p = false;
28232 if (!rtx_equal_p (op0, op1))
28233 break;
28234
28235 /* The elements of PERM do not suggest that only the first operand
28236 is used, but both operands are identical. Allow easier matching
28237 of the permutation by folding the permutation into the single
28238 input vector. */
28239 /* FALLTHRU */
28240 case 2:
28241 for (i = 0; i < nelt; ++i)
28242 d.perm[i] &= nelt - 1;
28243 d.op0 = op1;
28244 d.one_vector_p = true;
28245 break;
28246
28247 case 1:
28248 d.op1 = op0;
28249 d.one_vector_p = true;
28250 break;
28251 }
28252
28253 return arm_expand_vec_perm_const_1 (&d);
28254 }
28255
28256 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28257
28258 static bool
28259 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28260 const unsigned char *sel)
28261 {
28262 struct expand_vec_perm_d d;
28263 unsigned int i, nelt, which;
28264 bool ret;
28265
28266 d.vmode = vmode;
28267 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28268 d.testing_p = true;
28269 memcpy (d.perm, sel, nelt);
28270
28271 /* Categorize the set of elements in the selector. */
28272 for (i = which = 0; i < nelt; ++i)
28273 {
28274 unsigned char e = d.perm[i];
28275 gcc_assert (e < 2 * nelt);
28276 which |= (e < nelt ? 1 : 2);
28277 }
28278
28279 /* For all elements from second vector, fold the elements to first. */
28280 if (which == 2)
28281 for (i = 0; i < nelt; ++i)
28282 d.perm[i] -= nelt;
28283
28284 /* Check whether the mask can be applied to the vector type. */
28285 d.one_vector_p = (which != 3);
28286
28287 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28288 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28289 if (!d.one_vector_p)
28290 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28291
28292 start_sequence ();
28293 ret = arm_expand_vec_perm_const_1 (&d);
28294 end_sequence ();
28295
28296 return ret;
28297 }
28298
28299 bool
28300 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28301 {
28302 /* If we are soft float and we do not have ldrd
28303 then all auto increment forms are ok. */
28304 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28305 return true;
28306
28307 switch (code)
28308 {
28309 /* Post increment and Pre Decrement are supported for all
28310 instruction forms except for vector forms. */
28311 case ARM_POST_INC:
28312 case ARM_PRE_DEC:
28313 if (VECTOR_MODE_P (mode))
28314 {
28315 if (code != ARM_PRE_DEC)
28316 return true;
28317 else
28318 return false;
28319 }
28320
28321 return true;
28322
28323 case ARM_POST_DEC:
28324 case ARM_PRE_INC:
28325 /* Without LDRD and mode size greater than
28326 word size, there is no point in auto-incrementing
28327 because ldm and stm will not have these forms. */
28328 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28329 return false;
28330
28331 /* Vector and floating point modes do not support
28332 these auto increment forms. */
28333 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28334 return false;
28335
28336 return true;
28337
28338 default:
28339 return false;
28340
28341 }
28342
28343 return false;
28344 }
28345
28346 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28347 on ARM, since we know that shifts by negative amounts are no-ops.
28348 Additionally, the default expansion code is not available or suitable
28349 for post-reload insn splits (this can occur when the register allocator
28350 chooses not to do a shift in NEON).
28351
28352 This function is used in both initial expand and post-reload splits, and
28353 handles all kinds of 64-bit shifts.
28354
28355 Input requirements:
28356 - It is safe for the input and output to be the same register, but
28357 early-clobber rules apply for the shift amount and scratch registers.
28358 - Shift by register requires both scratch registers. In all other cases
28359 the scratch registers may be NULL.
28360 - Ashiftrt by a register also clobbers the CC register. */
28361 void
28362 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28363 rtx amount, rtx scratch1, rtx scratch2)
28364 {
28365 rtx out_high = gen_highpart (SImode, out);
28366 rtx out_low = gen_lowpart (SImode, out);
28367 rtx in_high = gen_highpart (SImode, in);
28368 rtx in_low = gen_lowpart (SImode, in);
28369
28370 /* Terminology:
28371 in = the register pair containing the input value.
28372 out = the destination register pair.
28373 up = the high- or low-part of each pair.
28374 down = the opposite part to "up".
28375 In a shift, we can consider bits to shift from "up"-stream to
28376 "down"-stream, so in a left-shift "up" is the low-part and "down"
28377 is the high-part of each register pair. */
28378
28379 rtx out_up = code == ASHIFT ? out_low : out_high;
28380 rtx out_down = code == ASHIFT ? out_high : out_low;
28381 rtx in_up = code == ASHIFT ? in_low : in_high;
28382 rtx in_down = code == ASHIFT ? in_high : in_low;
28383
28384 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28385 gcc_assert (out
28386 && (REG_P (out) || GET_CODE (out) == SUBREG)
28387 && GET_MODE (out) == DImode);
28388 gcc_assert (in
28389 && (REG_P (in) || GET_CODE (in) == SUBREG)
28390 && GET_MODE (in) == DImode);
28391 gcc_assert (amount
28392 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28393 && GET_MODE (amount) == SImode)
28394 || CONST_INT_P (amount)));
28395 gcc_assert (scratch1 == NULL
28396 || (GET_CODE (scratch1) == SCRATCH)
28397 || (GET_MODE (scratch1) == SImode
28398 && REG_P (scratch1)));
28399 gcc_assert (scratch2 == NULL
28400 || (GET_CODE (scratch2) == SCRATCH)
28401 || (GET_MODE (scratch2) == SImode
28402 && REG_P (scratch2)));
28403 gcc_assert (!REG_P (out) || !REG_P (amount)
28404 || !HARD_REGISTER_P (out)
28405 || (REGNO (out) != REGNO (amount)
28406 && REGNO (out) + 1 != REGNO (amount)));
28407
28408 /* Macros to make following code more readable. */
28409 #define SUB_32(DEST,SRC) \
28410 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28411 #define RSB_32(DEST,SRC) \
28412 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28413 #define SUB_S_32(DEST,SRC) \
28414 gen_addsi3_compare0 ((DEST), (SRC), \
28415 GEN_INT (-32))
28416 #define SET(DEST,SRC) \
28417 gen_rtx_SET (SImode, (DEST), (SRC))
28418 #define SHIFT(CODE,SRC,AMOUNT) \
28419 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28420 #define LSHIFT(CODE,SRC,AMOUNT) \
28421 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28422 SImode, (SRC), (AMOUNT))
28423 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28424 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28425 SImode, (SRC), (AMOUNT))
28426 #define ORR(A,B) \
28427 gen_rtx_IOR (SImode, (A), (B))
28428 #define BRANCH(COND,LABEL) \
28429 gen_arm_cond_branch ((LABEL), \
28430 gen_rtx_ ## COND (CCmode, cc_reg, \
28431 const0_rtx), \
28432 cc_reg)
28433
28434 /* Shifts by register and shifts by constant are handled separately. */
28435 if (CONST_INT_P (amount))
28436 {
28437 /* We have a shift-by-constant. */
28438
28439 /* First, handle out-of-range shift amounts.
28440 In both cases we try to match the result an ARM instruction in a
28441 shift-by-register would give. This helps reduce execution
28442 differences between optimization levels, but it won't stop other
28443 parts of the compiler doing different things. This is "undefined
28444 behaviour, in any case. */
28445 if (INTVAL (amount) <= 0)
28446 emit_insn (gen_movdi (out, in));
28447 else if (INTVAL (amount) >= 64)
28448 {
28449 if (code == ASHIFTRT)
28450 {
28451 rtx const31_rtx = GEN_INT (31);
28452 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28453 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28454 }
28455 else
28456 emit_insn (gen_movdi (out, const0_rtx));
28457 }
28458
28459 /* Now handle valid shifts. */
28460 else if (INTVAL (amount) < 32)
28461 {
28462 /* Shifts by a constant less than 32. */
28463 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28464
28465 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28466 emit_insn (SET (out_down,
28467 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28468 out_down)));
28469 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28470 }
28471 else
28472 {
28473 /* Shifts by a constant greater than 31. */
28474 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28475
28476 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28477 if (code == ASHIFTRT)
28478 emit_insn (gen_ashrsi3 (out_up, in_up,
28479 GEN_INT (31)));
28480 else
28481 emit_insn (SET (out_up, const0_rtx));
28482 }
28483 }
28484 else
28485 {
28486 /* We have a shift-by-register. */
28487 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28488
28489 /* This alternative requires the scratch registers. */
28490 gcc_assert (scratch1 && REG_P (scratch1));
28491 gcc_assert (scratch2 && REG_P (scratch2));
28492
28493 /* We will need the values "amount-32" and "32-amount" later.
28494 Swapping them around now allows the later code to be more general. */
28495 switch (code)
28496 {
28497 case ASHIFT:
28498 emit_insn (SUB_32 (scratch1, amount));
28499 emit_insn (RSB_32 (scratch2, amount));
28500 break;
28501 case ASHIFTRT:
28502 emit_insn (RSB_32 (scratch1, amount));
28503 /* Also set CC = amount > 32. */
28504 emit_insn (SUB_S_32 (scratch2, amount));
28505 break;
28506 case LSHIFTRT:
28507 emit_insn (RSB_32 (scratch1, amount));
28508 emit_insn (SUB_32 (scratch2, amount));
28509 break;
28510 default:
28511 gcc_unreachable ();
28512 }
28513
28514 /* Emit code like this:
28515
28516 arithmetic-left:
28517 out_down = in_down << amount;
28518 out_down = (in_up << (amount - 32)) | out_down;
28519 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28520 out_up = in_up << amount;
28521
28522 arithmetic-right:
28523 out_down = in_down >> amount;
28524 out_down = (in_up << (32 - amount)) | out_down;
28525 if (amount < 32)
28526 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28527 out_up = in_up << amount;
28528
28529 logical-right:
28530 out_down = in_down >> amount;
28531 out_down = (in_up << (32 - amount)) | out_down;
28532 if (amount < 32)
28533 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28534 out_up = in_up << amount;
28535
28536 The ARM and Thumb2 variants are the same but implemented slightly
28537 differently. If this were only called during expand we could just
28538 use the Thumb2 case and let combine do the right thing, but this
28539 can also be called from post-reload splitters. */
28540
28541 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28542
28543 if (!TARGET_THUMB2)
28544 {
28545 /* Emit code for ARM mode. */
28546 emit_insn (SET (out_down,
28547 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28548 if (code == ASHIFTRT)
28549 {
28550 rtx_code_label *done_label = gen_label_rtx ();
28551 emit_jump_insn (BRANCH (LT, done_label));
28552 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28553 out_down)));
28554 emit_label (done_label);
28555 }
28556 else
28557 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28558 out_down)));
28559 }
28560 else
28561 {
28562 /* Emit code for Thumb2 mode.
28563 Thumb2 can't do shift and or in one insn. */
28564 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28565 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28566
28567 if (code == ASHIFTRT)
28568 {
28569 rtx_code_label *done_label = gen_label_rtx ();
28570 emit_jump_insn (BRANCH (LT, done_label));
28571 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28572 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28573 emit_label (done_label);
28574 }
28575 else
28576 {
28577 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28578 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28579 }
28580 }
28581
28582 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28583 }
28584
28585 #undef SUB_32
28586 #undef RSB_32
28587 #undef SUB_S_32
28588 #undef SET
28589 #undef SHIFT
28590 #undef LSHIFT
28591 #undef REV_LSHIFT
28592 #undef ORR
28593 #undef BRANCH
28594 }
28595
28596
28597 /* Returns true if a valid comparison operation and makes
28598 the operands in a form that is valid. */
28599 bool
28600 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28601 {
28602 enum rtx_code code = GET_CODE (*comparison);
28603 int code_int;
28604 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28605 ? GET_MODE (*op2) : GET_MODE (*op1);
28606
28607 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28608
28609 if (code == UNEQ || code == LTGT)
28610 return false;
28611
28612 code_int = (int)code;
28613 arm_canonicalize_comparison (&code_int, op1, op2, 0);
28614 PUT_CODE (*comparison, (enum rtx_code)code_int);
28615
28616 switch (mode)
28617 {
28618 case SImode:
28619 if (!arm_add_operand (*op1, mode))
28620 *op1 = force_reg (mode, *op1);
28621 if (!arm_add_operand (*op2, mode))
28622 *op2 = force_reg (mode, *op2);
28623 return true;
28624
28625 case DImode:
28626 if (!cmpdi_operand (*op1, mode))
28627 *op1 = force_reg (mode, *op1);
28628 if (!cmpdi_operand (*op2, mode))
28629 *op2 = force_reg (mode, *op2);
28630 return true;
28631
28632 case SFmode:
28633 case DFmode:
28634 if (!arm_float_compare_operand (*op1, mode))
28635 *op1 = force_reg (mode, *op1);
28636 if (!arm_float_compare_operand (*op2, mode))
28637 *op2 = force_reg (mode, *op2);
28638 return true;
28639 default:
28640 break;
28641 }
28642
28643 return false;
28644
28645 }
28646
28647 /* Maximum number of instructions to set block of memory. */
28648 static int
28649 arm_block_set_max_insns (void)
28650 {
28651 if (optimize_function_for_size_p (cfun))
28652 return 4;
28653 else
28654 return current_tune->max_insns_inline_memset;
28655 }
28656
28657 /* Return TRUE if it's profitable to set block of memory for
28658 non-vectorized case. VAL is the value to set the memory
28659 with. LENGTH is the number of bytes to set. ALIGN is the
28660 alignment of the destination memory in bytes. UNALIGNED_P
28661 is TRUE if we can only set the memory with instructions
28662 meeting alignment requirements. USE_STRD_P is TRUE if we
28663 can use strd to set the memory. */
28664 static bool
28665 arm_block_set_non_vect_profit_p (rtx val,
28666 unsigned HOST_WIDE_INT length,
28667 unsigned HOST_WIDE_INT align,
28668 bool unaligned_p, bool use_strd_p)
28669 {
28670 int num = 0;
28671 /* For leftovers in bytes of 0-7, we can set the memory block using
28672 strb/strh/str with minimum instruction number. */
28673 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28674
28675 if (unaligned_p)
28676 {
28677 num = arm_const_inline_cost (SET, val);
28678 num += length / align + length % align;
28679 }
28680 else if (use_strd_p)
28681 {
28682 num = arm_const_double_inline_cost (val);
28683 num += (length >> 3) + leftover[length & 7];
28684 }
28685 else
28686 {
28687 num = arm_const_inline_cost (SET, val);
28688 num += (length >> 2) + leftover[length & 3];
28689 }
28690
28691 /* We may be able to combine last pair STRH/STRB into a single STR
28692 by shifting one byte back. */
28693 if (unaligned_access && length > 3 && (length & 3) == 3)
28694 num--;
28695
28696 return (num <= arm_block_set_max_insns ());
28697 }
28698
28699 /* Return TRUE if it's profitable to set block of memory for
28700 vectorized case. LENGTH is the number of bytes to set.
28701 ALIGN is the alignment of destination memory in bytes.
28702 MODE is the vector mode used to set the memory. */
28703 static bool
28704 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28705 unsigned HOST_WIDE_INT align,
28706 machine_mode mode)
28707 {
28708 int num;
28709 bool unaligned_p = ((align & 3) != 0);
28710 unsigned int nelt = GET_MODE_NUNITS (mode);
28711
28712 /* Instruction loading constant value. */
28713 num = 1;
28714 /* Instructions storing the memory. */
28715 num += (length + nelt - 1) / nelt;
28716 /* Instructions adjusting the address expression. Only need to
28717 adjust address expression if it's 4 bytes aligned and bytes
28718 leftover can only be stored by mis-aligned store instruction. */
28719 if (!unaligned_p && (length & 3) != 0)
28720 num++;
28721
28722 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28723 if (!unaligned_p && mode == V16QImode)
28724 num--;
28725
28726 return (num <= arm_block_set_max_insns ());
28727 }
28728
28729 /* Set a block of memory using vectorization instructions for the
28730 unaligned case. We fill the first LENGTH bytes of the memory
28731 area starting from DSTBASE with byte constant VALUE. ALIGN is
28732 the alignment requirement of memory. Return TRUE if succeeded. */
28733 static bool
28734 arm_block_set_unaligned_vect (rtx dstbase,
28735 unsigned HOST_WIDE_INT length,
28736 unsigned HOST_WIDE_INT value,
28737 unsigned HOST_WIDE_INT align)
28738 {
28739 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28740 rtx dst, mem;
28741 rtx val_elt, val_vec, reg;
28742 rtx rval[MAX_VECT_LEN];
28743 rtx (*gen_func) (rtx, rtx);
28744 machine_mode mode;
28745 unsigned HOST_WIDE_INT v = value;
28746
28747 gcc_assert ((align & 0x3) != 0);
28748 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28749 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28750 if (length >= nelt_v16)
28751 {
28752 mode = V16QImode;
28753 gen_func = gen_movmisalignv16qi;
28754 }
28755 else
28756 {
28757 mode = V8QImode;
28758 gen_func = gen_movmisalignv8qi;
28759 }
28760 nelt_mode = GET_MODE_NUNITS (mode);
28761 gcc_assert (length >= nelt_mode);
28762 /* Skip if it isn't profitable. */
28763 if (!arm_block_set_vect_profit_p (length, align, mode))
28764 return false;
28765
28766 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28767 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28768
28769 v = sext_hwi (v, BITS_PER_WORD);
28770 val_elt = GEN_INT (v);
28771 for (j = 0; j < nelt_mode; j++)
28772 rval[j] = val_elt;
28773
28774 reg = gen_reg_rtx (mode);
28775 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28776 /* Emit instruction loading the constant value. */
28777 emit_move_insn (reg, val_vec);
28778
28779 /* Handle nelt_mode bytes in a vector. */
28780 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28781 {
28782 emit_insn ((*gen_func) (mem, reg));
28783 if (i + 2 * nelt_mode <= length)
28784 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28785 }
28786
28787 /* If there are not less than nelt_v8 bytes leftover, we must be in
28788 V16QI mode. */
28789 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28790
28791 /* Handle (8, 16) bytes leftover. */
28792 if (i + nelt_v8 < length)
28793 {
28794 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28795 /* We are shifting bytes back, set the alignment accordingly. */
28796 if ((length & 1) != 0 && align >= 2)
28797 set_mem_align (mem, BITS_PER_UNIT);
28798
28799 emit_insn (gen_movmisalignv16qi (mem, reg));
28800 }
28801 /* Handle (0, 8] bytes leftover. */
28802 else if (i < length && i + nelt_v8 >= length)
28803 {
28804 if (mode == V16QImode)
28805 {
28806 reg = gen_lowpart (V8QImode, reg);
28807 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
28808 }
28809 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28810 + (nelt_mode - nelt_v8))));
28811 /* We are shifting bytes back, set the alignment accordingly. */
28812 if ((length & 1) != 0 && align >= 2)
28813 set_mem_align (mem, BITS_PER_UNIT);
28814
28815 emit_insn (gen_movmisalignv8qi (mem, reg));
28816 }
28817
28818 return true;
28819 }
28820
28821 /* Set a block of memory using vectorization instructions for the
28822 aligned case. We fill the first LENGTH bytes of the memory area
28823 starting from DSTBASE with byte constant VALUE. ALIGN is the
28824 alignment requirement of memory. Return TRUE if succeeded. */
28825 static bool
28826 arm_block_set_aligned_vect (rtx dstbase,
28827 unsigned HOST_WIDE_INT length,
28828 unsigned HOST_WIDE_INT value,
28829 unsigned HOST_WIDE_INT align)
28830 {
28831 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
28832 rtx dst, addr, mem;
28833 rtx val_elt, val_vec, reg;
28834 rtx rval[MAX_VECT_LEN];
28835 machine_mode mode;
28836 unsigned HOST_WIDE_INT v = value;
28837
28838 gcc_assert ((align & 0x3) == 0);
28839 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28840 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28841 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
28842 mode = V16QImode;
28843 else
28844 mode = V8QImode;
28845
28846 nelt_mode = GET_MODE_NUNITS (mode);
28847 gcc_assert (length >= nelt_mode);
28848 /* Skip if it isn't profitable. */
28849 if (!arm_block_set_vect_profit_p (length, align, mode))
28850 return false;
28851
28852 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28853
28854 v = sext_hwi (v, BITS_PER_WORD);
28855 val_elt = GEN_INT (v);
28856 for (j = 0; j < nelt_mode; j++)
28857 rval[j] = val_elt;
28858
28859 reg = gen_reg_rtx (mode);
28860 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28861 /* Emit instruction loading the constant value. */
28862 emit_move_insn (reg, val_vec);
28863
28864 i = 0;
28865 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28866 if (mode == V16QImode)
28867 {
28868 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28869 emit_insn (gen_movmisalignv16qi (mem, reg));
28870 i += nelt_mode;
28871 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28872 if (i + nelt_v8 < length && i + nelt_v16 > length)
28873 {
28874 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28875 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28876 /* We are shifting bytes back, set the alignment accordingly. */
28877 if ((length & 0x3) == 0)
28878 set_mem_align (mem, BITS_PER_UNIT * 4);
28879 else if ((length & 0x1) == 0)
28880 set_mem_align (mem, BITS_PER_UNIT * 2);
28881 else
28882 set_mem_align (mem, BITS_PER_UNIT);
28883
28884 emit_insn (gen_movmisalignv16qi (mem, reg));
28885 return true;
28886 }
28887 /* Fall through for bytes leftover. */
28888 mode = V8QImode;
28889 nelt_mode = GET_MODE_NUNITS (mode);
28890 reg = gen_lowpart (V8QImode, reg);
28891 }
28892
28893 /* Handle 8 bytes in a vector. */
28894 for (; (i + nelt_mode <= length); i += nelt_mode)
28895 {
28896 addr = plus_constant (Pmode, dst, i);
28897 mem = adjust_automodify_address (dstbase, mode, addr, i);
28898 emit_move_insn (mem, reg);
28899 }
28900
28901 /* Handle single word leftover by shifting 4 bytes back. We can
28902 use aligned access for this case. */
28903 if (i + UNITS_PER_WORD == length)
28904 {
28905 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
28906 mem = adjust_automodify_address (dstbase, mode,
28907 addr, i - UNITS_PER_WORD);
28908 /* We are shifting 4 bytes back, set the alignment accordingly. */
28909 if (align > UNITS_PER_WORD)
28910 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
28911
28912 emit_move_insn (mem, reg);
28913 }
28914 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28915 We have to use unaligned access for this case. */
28916 else if (i < length)
28917 {
28918 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28919 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28920 /* We are shifting bytes back, set the alignment accordingly. */
28921 if ((length & 1) == 0)
28922 set_mem_align (mem, BITS_PER_UNIT * 2);
28923 else
28924 set_mem_align (mem, BITS_PER_UNIT);
28925
28926 emit_insn (gen_movmisalignv8qi (mem, reg));
28927 }
28928
28929 return true;
28930 }
28931
28932 /* Set a block of memory using plain strh/strb instructions, only
28933 using instructions allowed by ALIGN on processor. We fill the
28934 first LENGTH bytes of the memory area starting from DSTBASE
28935 with byte constant VALUE. ALIGN is the alignment requirement
28936 of memory. */
28937 static bool
28938 arm_block_set_unaligned_non_vect (rtx dstbase,
28939 unsigned HOST_WIDE_INT length,
28940 unsigned HOST_WIDE_INT value,
28941 unsigned HOST_WIDE_INT align)
28942 {
28943 unsigned int i;
28944 rtx dst, addr, mem;
28945 rtx val_exp, val_reg, reg;
28946 machine_mode mode;
28947 HOST_WIDE_INT v = value;
28948
28949 gcc_assert (align == 1 || align == 2);
28950
28951 if (align == 2)
28952 v |= (value << BITS_PER_UNIT);
28953
28954 v = sext_hwi (v, BITS_PER_WORD);
28955 val_exp = GEN_INT (v);
28956 /* Skip if it isn't profitable. */
28957 if (!arm_block_set_non_vect_profit_p (val_exp, length,
28958 align, true, false))
28959 return false;
28960
28961 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28962 mode = (align == 2 ? HImode : QImode);
28963 val_reg = force_reg (SImode, val_exp);
28964 reg = gen_lowpart (mode, val_reg);
28965
28966 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
28967 {
28968 addr = plus_constant (Pmode, dst, i);
28969 mem = adjust_automodify_address (dstbase, mode, addr, i);
28970 emit_move_insn (mem, reg);
28971 }
28972
28973 /* Handle single byte leftover. */
28974 if (i + 1 == length)
28975 {
28976 reg = gen_lowpart (QImode, val_reg);
28977 addr = plus_constant (Pmode, dst, i);
28978 mem = adjust_automodify_address (dstbase, QImode, addr, i);
28979 emit_move_insn (mem, reg);
28980 i++;
28981 }
28982
28983 gcc_assert (i == length);
28984 return true;
28985 }
28986
28987 /* Set a block of memory using plain strd/str/strh/strb instructions,
28988 to permit unaligned copies on processors which support unaligned
28989 semantics for those instructions. We fill the first LENGTH bytes
28990 of the memory area starting from DSTBASE with byte constant VALUE.
28991 ALIGN is the alignment requirement of memory. */
28992 static bool
28993 arm_block_set_aligned_non_vect (rtx dstbase,
28994 unsigned HOST_WIDE_INT length,
28995 unsigned HOST_WIDE_INT value,
28996 unsigned HOST_WIDE_INT align)
28997 {
28998 unsigned int i;
28999 rtx dst, addr, mem;
29000 rtx val_exp, val_reg, reg;
29001 unsigned HOST_WIDE_INT v;
29002 bool use_strd_p;
29003
29004 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29005 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29006
29007 v = (value | (value << 8) | (value << 16) | (value << 24));
29008 if (length < UNITS_PER_WORD)
29009 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29010
29011 if (use_strd_p)
29012 v |= (v << BITS_PER_WORD);
29013 else
29014 v = sext_hwi (v, BITS_PER_WORD);
29015
29016 val_exp = GEN_INT (v);
29017 /* Skip if it isn't profitable. */
29018 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29019 align, false, use_strd_p))
29020 {
29021 if (!use_strd_p)
29022 return false;
29023
29024 /* Try without strd. */
29025 v = (v >> BITS_PER_WORD);
29026 v = sext_hwi (v, BITS_PER_WORD);
29027 val_exp = GEN_INT (v);
29028 use_strd_p = false;
29029 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29030 align, false, use_strd_p))
29031 return false;
29032 }
29033
29034 i = 0;
29035 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29036 /* Handle double words using strd if possible. */
29037 if (use_strd_p)
29038 {
29039 val_reg = force_reg (DImode, val_exp);
29040 reg = val_reg;
29041 for (; (i + 8 <= length); i += 8)
29042 {
29043 addr = plus_constant (Pmode, dst, i);
29044 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29045 emit_move_insn (mem, reg);
29046 }
29047 }
29048 else
29049 val_reg = force_reg (SImode, val_exp);
29050
29051 /* Handle words. */
29052 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29053 for (; (i + 4 <= length); i += 4)
29054 {
29055 addr = plus_constant (Pmode, dst, i);
29056 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29057 if ((align & 3) == 0)
29058 emit_move_insn (mem, reg);
29059 else
29060 emit_insn (gen_unaligned_storesi (mem, reg));
29061 }
29062
29063 /* Merge last pair of STRH and STRB into a STR if possible. */
29064 if (unaligned_access && i > 0 && (i + 3) == length)
29065 {
29066 addr = plus_constant (Pmode, dst, i - 1);
29067 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29068 /* We are shifting one byte back, set the alignment accordingly. */
29069 if ((align & 1) == 0)
29070 set_mem_align (mem, BITS_PER_UNIT);
29071
29072 /* Most likely this is an unaligned access, and we can't tell at
29073 compilation time. */
29074 emit_insn (gen_unaligned_storesi (mem, reg));
29075 return true;
29076 }
29077
29078 /* Handle half word leftover. */
29079 if (i + 2 <= length)
29080 {
29081 reg = gen_lowpart (HImode, val_reg);
29082 addr = plus_constant (Pmode, dst, i);
29083 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29084 if ((align & 1) == 0)
29085 emit_move_insn (mem, reg);
29086 else
29087 emit_insn (gen_unaligned_storehi (mem, reg));
29088
29089 i += 2;
29090 }
29091
29092 /* Handle single byte leftover. */
29093 if (i + 1 == length)
29094 {
29095 reg = gen_lowpart (QImode, val_reg);
29096 addr = plus_constant (Pmode, dst, i);
29097 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29098 emit_move_insn (mem, reg);
29099 }
29100
29101 return true;
29102 }
29103
29104 /* Set a block of memory using vectorization instructions for both
29105 aligned and unaligned cases. We fill the first LENGTH bytes of
29106 the memory area starting from DSTBASE with byte constant VALUE.
29107 ALIGN is the alignment requirement of memory. */
29108 static bool
29109 arm_block_set_vect (rtx dstbase,
29110 unsigned HOST_WIDE_INT length,
29111 unsigned HOST_WIDE_INT value,
29112 unsigned HOST_WIDE_INT align)
29113 {
29114 /* Check whether we need to use unaligned store instruction. */
29115 if (((align & 3) != 0 || (length & 3) != 0)
29116 /* Check whether unaligned store instruction is available. */
29117 && (!unaligned_access || BYTES_BIG_ENDIAN))
29118 return false;
29119
29120 if ((align & 3) == 0)
29121 return arm_block_set_aligned_vect (dstbase, length, value, align);
29122 else
29123 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29124 }
29125
29126 /* Expand string store operation. Firstly we try to do that by using
29127 vectorization instructions, then try with ARM unaligned access and
29128 double-word store if profitable. OPERANDS[0] is the destination,
29129 OPERANDS[1] is the number of bytes, operands[2] is the value to
29130 initialize the memory, OPERANDS[3] is the known alignment of the
29131 destination. */
29132 bool
29133 arm_gen_setmem (rtx *operands)
29134 {
29135 rtx dstbase = operands[0];
29136 unsigned HOST_WIDE_INT length;
29137 unsigned HOST_WIDE_INT value;
29138 unsigned HOST_WIDE_INT align;
29139
29140 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29141 return false;
29142
29143 length = UINTVAL (operands[1]);
29144 if (length > 64)
29145 return false;
29146
29147 value = (UINTVAL (operands[2]) & 0xFF);
29148 align = UINTVAL (operands[3]);
29149 if (TARGET_NEON && length >= 8
29150 && current_tune->string_ops_prefer_neon
29151 && arm_block_set_vect (dstbase, length, value, align))
29152 return true;
29153
29154 if (!unaligned_access && (align & 3) != 0)
29155 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29156
29157 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29158 }
29159
29160 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29161
29162 static unsigned HOST_WIDE_INT
29163 arm_asan_shadow_offset (void)
29164 {
29165 return (unsigned HOST_WIDE_INT) 1 << 29;
29166 }
29167
29168
29169 /* This is a temporary fix for PR60655. Ideally we need
29170 to handle most of these cases in the generic part but
29171 currently we reject minus (..) (sym_ref). We try to
29172 ameliorate the case with minus (sym_ref1) (sym_ref2)
29173 where they are in the same section. */
29174
29175 static bool
29176 arm_const_not_ok_for_debug_p (rtx p)
29177 {
29178 tree decl_op0 = NULL;
29179 tree decl_op1 = NULL;
29180
29181 if (GET_CODE (p) == MINUS)
29182 {
29183 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29184 {
29185 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29186 if (decl_op1
29187 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29188 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29189 {
29190 if ((TREE_CODE (decl_op1) == VAR_DECL
29191 || TREE_CODE (decl_op1) == CONST_DECL)
29192 && (TREE_CODE (decl_op0) == VAR_DECL
29193 || TREE_CODE (decl_op0) == CONST_DECL))
29194 return (get_variable_section (decl_op1, false)
29195 != get_variable_section (decl_op0, false));
29196
29197 if (TREE_CODE (decl_op1) == LABEL_DECL
29198 && TREE_CODE (decl_op0) == LABEL_DECL)
29199 return (DECL_CONTEXT (decl_op1)
29200 != DECL_CONTEXT (decl_op0));
29201 }
29202
29203 return true;
29204 }
29205 }
29206
29207 return false;
29208 }
29209
29210 /* return TRUE if x is a reference to a value in a constant pool */
29211 extern bool
29212 arm_is_constant_pool_ref (rtx x)
29213 {
29214 return (MEM_P (x)
29215 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29216 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29217 }
29218
29219 /* If MEM is in the form of [base+offset], extract the two parts
29220 of address and set to BASE and OFFSET, otherwise return false
29221 after clearing BASE and OFFSET. */
29222
29223 static bool
29224 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29225 {
29226 rtx addr;
29227
29228 gcc_assert (MEM_P (mem));
29229
29230 addr = XEXP (mem, 0);
29231
29232 /* Strip off const from addresses like (const (addr)). */
29233 if (GET_CODE (addr) == CONST)
29234 addr = XEXP (addr, 0);
29235
29236 if (GET_CODE (addr) == REG)
29237 {
29238 *base = addr;
29239 *offset = const0_rtx;
29240 return true;
29241 }
29242
29243 if (GET_CODE (addr) == PLUS
29244 && GET_CODE (XEXP (addr, 0)) == REG
29245 && CONST_INT_P (XEXP (addr, 1)))
29246 {
29247 *base = XEXP (addr, 0);
29248 *offset = XEXP (addr, 1);
29249 return true;
29250 }
29251
29252 *base = NULL_RTX;
29253 *offset = NULL_RTX;
29254
29255 return false;
29256 }
29257
29258 /* If INSN is a load or store of address in the form of [base+offset],
29259 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29260 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29261 otherwise return FALSE. */
29262
29263 static bool
29264 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29265 {
29266 rtx x, dest, src;
29267
29268 gcc_assert (INSN_P (insn));
29269 x = PATTERN (insn);
29270 if (GET_CODE (x) != SET)
29271 return false;
29272
29273 src = SET_SRC (x);
29274 dest = SET_DEST (x);
29275 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29276 {
29277 *is_load = false;
29278 extract_base_offset_in_addr (dest, base, offset);
29279 }
29280 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29281 {
29282 *is_load = true;
29283 extract_base_offset_in_addr (src, base, offset);
29284 }
29285 else
29286 return false;
29287
29288 return (*base != NULL_RTX && *offset != NULL_RTX);
29289 }
29290
29291 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29292
29293 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29294 and PRI are only calculated for these instructions. For other instruction,
29295 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29296 instruction fusion can be supported by returning different priorities.
29297
29298 It's important that irrelevant instructions get the largest FUSION_PRI. */
29299
29300 static void
29301 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29302 int *fusion_pri, int *pri)
29303 {
29304 int tmp, off_val;
29305 bool is_load;
29306 rtx base, offset;
29307
29308 gcc_assert (INSN_P (insn));
29309
29310 tmp = max_pri - 1;
29311 if (!fusion_load_store (insn, &base, &offset, &is_load))
29312 {
29313 *pri = tmp;
29314 *fusion_pri = tmp;
29315 return;
29316 }
29317
29318 /* Load goes first. */
29319 if (is_load)
29320 *fusion_pri = tmp - 1;
29321 else
29322 *fusion_pri = tmp - 2;
29323
29324 tmp /= 2;
29325
29326 /* INSN with smaller base register goes first. */
29327 tmp -= ((REGNO (base) & 0xff) << 20);
29328
29329 /* INSN with smaller offset goes first. */
29330 off_val = (int)(INTVAL (offset));
29331 if (off_val >= 0)
29332 tmp -= (off_val & 0xfffff);
29333 else
29334 tmp += ((- off_val) & 0xfffff);
29335
29336 *pri = tmp;
29337 return;
29338 }
29339 #include "gt-arm.h"